Cluster fast fertig, ein bischen aufräumen noch nötig

#SPM-13: add work development 2h 20m Cluster fertig
This commit is contained in:
Johannes Theiner 2019-04-25 13:02:59 +02:00
parent 691481dbc5
commit 368af15fd8
2 changed files with 117 additions and 0 deletions

View File

@ -0,0 +1,88 @@
package de.hsel.spm.baudas.analysis;
import weka.clusterers.SimpleKMeans;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
/**
* Cluster Analysis
*
* @author Johannes Theiner
* @version 0.1
*/
public class Cluster implements Analysis<Map<Integer, Map<String, String>>> {
private Instances instances;
private Map<Integer, Map<String, String>> result;
public Cluster(File file) {
instances = load(file);
}
@Override
public Map<Integer, Map<String, String>> getResult() {
if(result == null) {
result = new HashMap<>();
//TODO: anpassen wenn #SPM-17 gemerged ist.
int[] keepIndexes = new int[]{0, 1, 3, 5, 6, 7};
Remove remove = new Remove();
try {
remove.setAttributeIndicesArray(keepIndexes);
remove.setInvertSelection(true);
remove.setInputFormat(instances);
instances = Filter.useFilter(instances, remove);
} catch (Exception e) {
e.printStackTrace();
}
SimpleKMeans fullMeans = new SimpleKMeans();
Instances fullCentroids = null;
try {
fullMeans.setNumClusters(1);
fullMeans.setPreserveInstancesOrder(true);
fullMeans.buildClusterer(instances);
fullCentroids = fullMeans.getClusterCentroids();
} catch (Exception ex) {
ex.printStackTrace();
}
assert fullCentroids != null;
SimpleKMeans kMeans = new SimpleKMeans();
try {
kMeans.setNumClusters(5);
kMeans.setPreserveInstancesOrder(true);
kMeans.buildClusterer(instances);
int count = 0;
count = putIntoMap(fullCentroids, count);
Instances centroids = kMeans.getClusterCentroids();
putIntoMap(centroids, count);
} catch (Exception ex) {
ex.printStackTrace();
}
}
return result;
}
private int putIntoMap(Instances centroids, int count) {
for (int i = 0; i < centroids.numInstances(); i++) {
Map<String, String> map = new HashMap<>();
for (int j = 0; j < centroids.numAttributes(); j++) {
map.put(centroids.attribute(j).name(), centroids.instance(i).stringValue(j));
}
result.put(count, map);
count++;
}
return count;
}
}

View File

@ -0,0 +1,29 @@
package de.hsel.spm.baudas.analysis;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.net.URL;
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTimeout;
public class ClusterTest {
@Test
public void test100() {
URL url = getClass().getClassLoader().getResource("kd100.csv");
assert url != null;
Cluster cluster = new Cluster(new File(url.getFile()));
AtomicReference<Map<Integer, Map<String, String>>> results = new AtomicReference<>();
assertTimeout(Duration.ofMillis(2000), () -> results.set(cluster.getResult()));
assertEquals("m", results.get().get(0).get("Geschlecht"));
}
}