From c96863ee9e69fa6133aa30990f949c7f2c40a190 Mon Sep 17 00:00:00 2001 From: AriezLabs Date: Tue, 8 Oct 2019 19:00:27 +0200 Subject: [PATCH] some polish --- README.md | 11 +++++ src/tasks/ConcurrencyTest.java | 23 --------- src/tasks/MergeOverlappingCommunities.java | 57 ++++++++++++++-------- src/tasks/tests.java | 18 ------- test/graph/InducedSubgraphTest.java | 27 +++++++++- test/io/GraphWriterTest.java | 50 ++++++++++++++++++- 6 files changed, 121 insertions(+), 65 deletions(-) delete mode 100644 src/tasks/ConcurrencyTest.java delete mode 100644 src/tasks/tests.java diff --git a/README.md b/README.md index 2cd1509..6262c7d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,15 @@ # HiDALGO-gpt Graph processing tool suite for the [HiDALGO project][1] supporting basic operations for reading, writing and working with graphs. +## Usage + +HiDALGO-gpt can perform various tasks. Each one has a class in src/tasks: + +* `FilterStarlike.java`: Due to the nature of our data and the chosen clustering algorithm, the dataset might be polluted with starlike graphs. The clustering algorithm seems to have a preference for throwing poorly connected nodes together, which results in a "community" where all nodes are connected to a central node, but there are barely any edges beyond that. This task uses heuristics to discard graphs that qualify as "starlike". +* `FindSpecificEVs.java`: Another task used for inspection of the dataset, this class simply looks for communities that have a specific eigenvalue (minus some delta). +* `MergeOverlappingCommunities.java`: This is the biggest task. The idea is to merge similar communities, as lots of these might emerge since we are clustering each node's neighborhood. There are various parameters to play around with, documented in the class. +* `PrecalculateAllEVs.java`: Calculates the eigenvalues of a list of communities so that it doesn't have to be computed on the fly. + +To run a task, compile the project and call `java tasks/[task]`. + [1]: https://hidalgo-project.eu diff --git a/src/tasks/ConcurrencyTest.java b/src/tasks/ConcurrencyTest.java deleted file mode 100644 index e95f0b2..0000000 --- a/src/tasks/ConcurrencyTest.java +++ /dev/null @@ -1,23 +0,0 @@ -package tasks; - -public class ConcurrencyTest { - public static long test = 0; - public static void main(String[] args) throws InterruptedException { - Thread[] threads = new Thread[4]; - for (int i = 0; i < threads.length; i++) { - threads[i] = new Thread(() -> { - for (long j = 0; j < 1000000000l; j++) { - test += j; - } - System.out.println(test); - }); - } - for (int i = 0; i < threads.length; i++) { - System.out.println("started " + i); - threads[i].start(); - } - for (int i = 0; i < threads.length; i++) { - threads[i].join(); - } - } -} diff --git a/src/tasks/MergeOverlappingCommunities.java b/src/tasks/MergeOverlappingCommunities.java index 1bd1a0c..be92976 100644 --- a/src/tasks/MergeOverlappingCommunities.java +++ b/src/tasks/MergeOverlappingCommunities.java @@ -2,8 +2,8 @@ import graph.Graph; import graph.InducedSubgraph; -import index.MergeCandidate; import index.InverseIndex; +import index.MergeCandidate; import io.GraphReader; import io.GraphWriter; @@ -20,6 +20,9 @@ public class MergeOverlappingCommunities { private static double nodeOverlapThreshold; private static final double evDeltaThreshold = 0.01; + private static String pathToGraph; + private static String pathToCommunities; + private static int numThreads; private static int walltime; private static boolean walltimeExceeded = false; @@ -27,24 +30,32 @@ public class MergeOverlappingCommunities { private static ArrayList subgs; public static int evCompareStrategy; - public static double findBestMatchAmong; + public static double candidatesToCheckPerc; // number of communities to skip for each one read, for testing purposes (subtract 1) private static final int skip = 1; public static void main(String[] args) throws IOException, InterruptedException { - if (args.length != 6) { - System.out.println("usage: MergeOverlappingCommunities numThreads walltimeSeconds edgeOverlapThreshold nodeOverlapThreshold evCompareStrategy findBestMatchAmong"); - System.out.println("evCompareStrategy: 0 (measure vs. larger) | 1 (measure vs. average)"); - System.out.println("findBestMatchAmong: [0, 1] (% of possible merging candidates to check before picking the best, but at least 1)"); + if (args.length != 8) { + System.out.println("usage: MergeOverlappingCommunities "); + System.out.println("\tgraph: path to Metis graph"); + System.out.println("\tcommunities: path to list of communities"); + System.out.println("\tnumThreads: number of threads searching for mergeable communities to run in parallel"); + System.out.println("\twalltimeSeconds: number of seconds after which to halt above threads"); + System.out.println("\tedgeOverlapThreshold: A float. Parameter for a heuristic used to determine whether two communities should be merged."); + System.out.println("\tnodeOverlapThreshold: Also a float and a parameter for a heuristic."); + System.out.println("\tevCompareStrategy: 0 (ev delta is measured against the larger community) | 1 (measured vs. average of both)"); + System.out.println("\tcandidatesToCheckPerc: in [0, 1] (% of possible merging candidates to check before picking the best, but at least 1)"); System.exit(1); } else { - numThreads = Integer.parseInt(args[0]); - walltime = Integer.parseInt(args[1]); - edgeOverlapThreshold = Double.parseDouble(args[2]); - nodeOverlapThreshold = Double.parseDouble(args[3]); - evCompareStrategy = Integer.parseInt(args[4]); - findBestMatchAmong = Integer.parseInt(args[5]); + pathToGraph = args[0]; + pathToCommunities = args[1]; + numThreads = Integer.parseInt(args[2]); + walltime = Integer.parseInt(args[3]); + edgeOverlapThreshold = Double.parseDouble(args[4]); + nodeOverlapThreshold = Double.parseDouble(args[5]); + evCompareStrategy = Integer.parseInt(args[6]); + candidatesToCheckPerc = Integer.parseInt(args[7]); } Runtime.getRuntime().addShutdownHook(new Thread(new ShutdownHook())); @@ -53,13 +64,13 @@ public static void main(String[] args) throws IOException, InterruptedException gr.setInputFormat(new GraphReader.Metis()); gr.setReturnFormat(new GraphReader.List()); - Graph pokec = gr.fromFile("resources/pokec.metis"); + Graph pokec = gr.fromFile(pathToGraph); gr.setReturnFormat(new GraphReader.Subgraph()); gr.setInputFormat(new GraphReader.NodeListWithEvs(pokec)); subgs = new ArrayList<>(); - try (BufferedReader br = new BufferedReader(new FileReader(new File("resources/communitiesWithEvs.txt")))) { + try (BufferedReader br = new BufferedReader(new FileReader(new File(pathToCommunities)))) { String line; if(skip != 1) System.err.println("WARNING: SKIPPING " + (skip-1) + " COMMUNITIES FOR EACH ONE READ"); @@ -87,12 +98,11 @@ public static void main(String[] args) throws IOException, InterruptedException && candidate.edgesOverlapping(edgeOverlapThreshold) && candidate.getDelta() >= evDeltaThreshold) { index.update(candidate); - evImprovement += candidate.getDelta(); - numMerged++; - printMerge(candidate); + updateEvDeltaCounter(candidate.getDelta()); break; } } + candidate.unlock(); } }); @@ -105,7 +115,7 @@ public static void main(String[] args) throws IOException, InterruptedException } Thread.sleep(1000*walltime); - System.out.println("exceeded walltime, stopping..."); + System.out.println("exceeded walltime, stopping threads..."); walltimeExceeded = true; for (int i = 0; i < threads.length; i++) @@ -126,10 +136,10 @@ public void run() { System.out.println("#merges: " + numMerged); System.out.println("merges/s: " + (numMerged / (((System.currentTimeMillis() - stime)) / 1000d))); System.out.println("avg ev improvement: " + (evImprovement / (2 * numMerged))); - System.out.println("remaining items: " + subgs.size()); + System.out.println("remaining communities: " + subgs.size()); System.out.println("writing new evs..."); - try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newEigenvalues-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./mergedEigenvalues-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { for(InducedSubgraph subg : subgs) bw.write(subg.getEigenvalue() +"\n"); } catch (IOException e) { @@ -137,7 +147,7 @@ public void run() { } System.out.println("writing new nodelists..."); - try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newCommunities-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./mergedCommunities-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { for(InducedSubgraph subg : subgs) { for (int i : subg.toNodeList()) bw.write(i +" "); @@ -160,4 +170,9 @@ private synchronized static void printMerge(MergeCandidate pair) { System.out.println(gw.toGraphViz(pair)); System.out.println(); } + + private synchronized static void updateEvDeltaCounter(double delta) { + evImprovement += delta; + numMerged++; + } } diff --git a/src/tasks/tests.java b/src/tasks/tests.java deleted file mode 100644 index 3b4cf45..0000000 --- a/src/tasks/tests.java +++ /dev/null @@ -1,18 +0,0 @@ -package tasks; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Set; - -public class tests { - public static void main(String[] args) { - HashMap asd = new HashMap<>(); - String s = new String("asd"); - asd.put(s, 21); - asd.put(s, 22); - Set nodes = new HashSet<>(); - nodes.add(6); - nodes.add(6); - System.out.println(nodes); - } -} diff --git a/test/graph/InducedSubgraphTest.java b/test/graph/InducedSubgraphTest.java index 691dd99..68331cb 100644 --- a/test/graph/InducedSubgraphTest.java +++ b/test/graph/InducedSubgraphTest.java @@ -8,7 +8,8 @@ import java.io.IOException; import java.util.ArrayList; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; class InducedSubgraphTest { InducedSubgraph g; @@ -65,4 +66,28 @@ void getEdgeOverlapPercent() throws IOException { System.out.println(sub1.getEdgeOverlapPercent(sub2)); } + + @Test + void testMerge() throws IOException { + GraphReader gr = new GraphReader(); + gr.setInputFormat(new GraphReader.Metis()); + gr.setReturnFormat(new GraphReader.Matrix()); + Graph main = gr.fromFile(new File("testResources/medium.metis")); + + gr.setInputFormat(new GraphReader.NodeList(main)); + gr.setReturnFormat(new GraphReader.Subgraph()); + InducedSubgraph sg1 = (InducedSubgraph) gr.fromFile("testResources/medium.nl"); + InducedSubgraph sg2 = (InducedSubgraph) gr.fromFile("testResources/medium.nl.2"); + + InducedSubgraph merged = sg1.merge(sg2); + int[] nodes = {0, 1, 2, 3, 10, 13, 16}; + ArrayList whyisthissocomplicated = new ArrayList<>(); + for(int i : nodes) { + assertTrue(merged.toNodeList().contains(i)); + whyisthissocomplicated.add(i); + } + + InducedSubgraph test = new InducedSubgraph(main, whyisthissocomplicated); + assertEquals(test.getEigenvalue(), merged.getEigenvalue(), 0.0000001); + } } \ No newline at end of file diff --git a/test/io/GraphWriterTest.java b/test/io/GraphWriterTest.java index 62da04f..8775bac 100644 --- a/test/io/GraphWriterTest.java +++ b/test/io/GraphWriterTest.java @@ -1,11 +1,12 @@ package io; import graph.Graph; +import graph.InducedSubgraph; +import index.MergeCandidate; import org.junit.jupiter.api.Test; import java.io.IOException; - -import static org.junit.jupiter.api.Assertions.*; +import java.util.ArrayList; class GraphWriterTest { @@ -18,4 +19,49 @@ void testString() throws IOException { gr.setInputFormat(new GraphReader.NodeList(g)); System.out.println(gr.fromFile("testResources/medium.nl")); } + + @Test + void testGraphViz() throws IOException { + GraphWriter gw = new GraphWriter(); + gw.setFormat(new GraphWriter.Metis()); + + GraphReader gr = new GraphReader(); + gr.setInputFormat(new GraphReader.Metis()); + gr.setReturnFormat(new GraphReader.List()); + Graph g = gr.fromFile("testResources/medium.metis"); + + gr.setInputFormat(new GraphReader.NodeList(g)); + gr.setReturnFormat(new GraphReader.Subgraph()); + InducedSubgraph a = (InducedSubgraph) gr.fromFile("testResources/medium.nl"); + InducedSubgraph b = (InducedSubgraph) gr.fromFile("testResources/medium.nl.2"); + + ArrayList subs = new ArrayList<>(); + subs.add(a); + subs.add(b); + + MergeCandidate mc = new MergeCandidate(a,subs); + mc.next(); + System.out.println(gw.toGraphViz(mc)); + + // pt 2 + + gr.setInputFormat(new GraphReader.Metis()); + gr.setReturnFormat(new GraphReader.List()); + g = gr.fromFile("testResources/small.metis"); + + gr.setInputFormat(new GraphReader.NodeList(g)); + gr.setReturnFormat(new GraphReader.Subgraph()); + a = (InducedSubgraph) gr.fromFile("testResources/small1.nl"); + b = (InducedSubgraph) gr.fromFile("testResources/small2.nl"); + + subs = new ArrayList<>(); + subs.add(a); + subs.add(b); + + mc = new MergeCandidate(a,subs); + mc.next(); + gw = new GraphWriter(); + System.out.println(g.hasEdge(5, 3)); + System.out.println(gw.toGraphViz(mc)); + } } \ No newline at end of file