From 368af15fd8368cf6694200f1715a0468083740d0 Mon Sep 17 00:00:00 2001 From: joethei Date: Thu, 25 Apr 2019 13:02:59 +0200 Subject: [PATCH 1/4] =?UTF-8?q?Cluster=20fast=20fertig,=20ein=20bischen=20?= =?UTF-8?q?aufr=C3=A4umen=20noch=20n=C3=B6tig?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #SPM-13: add work development 2h 20m Cluster fertig --- .../de/hsel/spm/baudas/analysis/Cluster.java | 88 +++++++++++++++++++ .../hsel/spm/baudas/analysis/ClusterTest.java | 29 ++++++ 2 files changed, 117 insertions(+) create mode 100644 src/main/java/de/hsel/spm/baudas/analysis/Cluster.java create mode 100644 src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java diff --git a/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java new file mode 100644 index 0000000..d340747 --- /dev/null +++ b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java @@ -0,0 +1,88 @@ +package de.hsel.spm.baudas.analysis; + +import weka.clusterers.SimpleKMeans; +import weka.core.Instances; +import weka.filters.Filter; +import weka.filters.unsupervised.attribute.Remove; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +/** + * Cluster Analysis + * + * @author Johannes Theiner + * @version 0.1 + */ +public class Cluster implements Analysis>> { + + private Instances instances; + private Map> result; + + public Cluster(File file) { + instances = load(file); + } + + @Override + public Map> getResult() { + if(result == null) { + result = new HashMap<>(); + //TODO: anpassen wenn #SPM-17 gemerged ist. + int[] keepIndexes = new int[]{0, 1, 3, 5, 6, 7}; + Remove remove = new Remove(); + + try { + remove.setAttributeIndicesArray(keepIndexes); + remove.setInvertSelection(true); + remove.setInputFormat(instances); + instances = Filter.useFilter(instances, remove); + } catch (Exception e) { + e.printStackTrace(); + } + + SimpleKMeans fullMeans = new SimpleKMeans(); + Instances fullCentroids = null; + try { + fullMeans.setNumClusters(1); + fullMeans.setPreserveInstancesOrder(true); + fullMeans.buildClusterer(instances); + fullCentroids = fullMeans.getClusterCentroids(); + + } catch (Exception ex) { + ex.printStackTrace(); + } + assert fullCentroids != null; + + SimpleKMeans kMeans = new SimpleKMeans(); + try { + kMeans.setNumClusters(5); + kMeans.setPreserveInstancesOrder(true); + kMeans.buildClusterer(instances); + + int count = 0; + count = putIntoMap(fullCentroids, count); + + Instances centroids = kMeans.getClusterCentroids(); + putIntoMap(centroids, count); + + } catch (Exception ex) { + ex.printStackTrace(); + } + } + + return result; + } + + private int putIntoMap(Instances centroids, int count) { + for (int i = 0; i < centroids.numInstances(); i++) { + Map map = new HashMap<>(); + for (int j = 0; j < centroids.numAttributes(); j++) { + map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j)); + } + result.put(count, map); + count++; + } + return count; + } +} \ No newline at end of file diff --git a/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java new file mode 100644 index 0000000..2711fa6 --- /dev/null +++ b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java @@ -0,0 +1,29 @@ +package de.hsel.spm.baudas.analysis; + +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.net.URL; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTimeout; + +public class ClusterTest { + + @Test + public void test100() { + URL url = getClass().getClassLoader().getResource("kd100.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } +} \ No newline at end of file From d9e041c1ae8029dd2f8723b794c1ae3d707518ae Mon Sep 17 00:00:00 2001 From: joethei Date: Thu, 25 Apr 2019 16:19:23 +0200 Subject: [PATCH 2/4] + Dokumentation #SPM-13: add work documentation 20m Cluster fertig --- .../de/hsel/spm/baudas/analysis/Cluster.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java index d340747..10e25b1 100644 --- a/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java +++ b/src/main/java/de/hsel/spm/baudas/analysis/Cluster.java @@ -10,7 +10,7 @@ import java.util.HashMap; import java.util.Map; /** - * Cluster Analysis + * Cluster Analysis. * * @author Johannes Theiner * @version 0.1 @@ -24,6 +24,11 @@ public class Cluster implements Analysis>> { instances = load(file); } + /** + * get result of cluster analysis. + * + * @return Result + */ @Override public Map> getResult() { if(result == null) { @@ -41,6 +46,7 @@ public class Cluster implements Analysis>> { e.printStackTrace(); } + //creating a single cluster to get average, weka has no way to get that from a bigger one. SimpleKMeans fullMeans = new SimpleKMeans(); Instances fullCentroids = null; try { @@ -54,6 +60,7 @@ public class Cluster implements Analysis>> { } assert fullCentroids != null; + //creating real cluster SimpleKMeans kMeans = new SimpleKMeans(); try { kMeans.setNumClusters(5); @@ -70,10 +77,16 @@ public class Cluster implements Analysis>> { ex.printStackTrace(); } } - return result; } + /** + * puts data into map. + * + * @param centroids cluster analysis result + * @param count current insert count + * @return count increment + */ private int putIntoMap(Instances centroids, int count) { for (int i = 0; i < centroids.numInstances(); i++) { Map map = new HashMap<>(); From 0f05fd04b1bbdcf5b2c22adbe83bbb1a4ba2d2a2 Mon Sep 17 00:00:00 2001 From: Julian Hinxlage Date: Thu, 2 May 2019 14:54:08 +0200 Subject: [PATCH 3/4] added target to .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f85356d..21399b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.iml .idea -.project \ No newline at end of file +.project +target \ No newline at end of file From ea7598aa645e8fdba120a66b56bb6664b435dd52 Mon Sep 17 00:00:00 2001 From: Julian Hinxlage Date: Fri, 3 May 2019 11:13:58 +0200 Subject: [PATCH 4/4] +Test for Cluster Analysis --- .../hsel/spm/baudas/analysis/ClusterTest.java | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java index 2711fa6..8eddd85 100644 --- a/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java +++ b/src/test/java/de/hsel/spm/baudas/analysis/ClusterTest.java @@ -11,10 +11,16 @@ import java.util.concurrent.atomic.AtomicReference; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTimeout; -public class ClusterTest { +/** + * test for Cluster Analysis. + * + * @author Julian Hinxlage + * @version 0.1 + */ +class ClusterTest { @Test - public void test100() { + void test100() { URL url = getClass().getClassLoader().getResource("kd100.csv"); assert url != null; @@ -22,6 +28,35 @@ public class ClusterTest { AtomicReference>> results = new AtomicReference<>(); + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } + + @Test + void test1000() { + URL url = getClass().getClassLoader().getResource("kd1000.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); + + assertEquals("m", results.get().get(0).get("Geschlecht")); + } + + @Test + void test10000() { + URL url = getClass().getClassLoader().getResource("kd10000.csv"); + assert url != null; + + Cluster cluster = new Cluster(new File(url.getFile())); + + AtomicReference>> results = new AtomicReference<>(); + assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult())); assertEquals("m", results.get().get(0).get("Geschlecht"));