Skip to content

Commit

Permalink
some polish
Browse files Browse the repository at this point in the history
  • Loading branch information
ariez-xyz committed Oct 8, 2019
1 parent 9d7674c commit c96863e
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 65 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
# HiDALGO-gpt
Graph processing tool suite for the [HiDALGO project][1] supporting basic operations for reading, writing and working with graphs.

## Usage

HiDALGO-gpt can perform various tasks. Each one has a class in src/tasks:

* `FilterStarlike.java`: Due to the nature of our data and the chosen clustering algorithm, the dataset might be polluted with starlike graphs. The clustering algorithm seems to have a preference for throwing poorly connected nodes together, which results in a "community" where all nodes are connected to a central node, but there are barely any edges beyond that. This task uses heuristics to discard graphs that qualify as "starlike".
* `FindSpecificEVs.java`: Another task used for inspection of the dataset, this class simply looks for communities that have a specific eigenvalue (minus some delta).
* `MergeOverlappingCommunities.java`: This is the biggest task. The idea is to merge similar communities, as lots of these might emerge since we are clustering each node's neighborhood. There are various parameters to play around with, documented in the class.
* `PrecalculateAllEVs.java`: Calculates the eigenvalues of a list of communities so that it doesn't have to be computed on the fly.

To run a task, compile the project and call `java tasks/[task]`.

[1]: https://hidalgo-project.eu
23 changes: 0 additions & 23 deletions src/tasks/ConcurrencyTest.java

This file was deleted.

57 changes: 36 additions & 21 deletions src/tasks/MergeOverlappingCommunities.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import graph.Graph;
import graph.InducedSubgraph;
import index.MergeCandidate;
import index.InverseIndex;
import index.MergeCandidate;
import io.GraphReader;
import io.GraphWriter;

Expand All @@ -20,31 +20,42 @@ public class MergeOverlappingCommunities {
private static double nodeOverlapThreshold;
private static final double evDeltaThreshold = 0.01;

private static String pathToGraph;
private static String pathToCommunities;

private static int numThreads;
private static int walltime;
private static boolean walltimeExceeded = false;
private static InverseIndex index;
private static ArrayList<InducedSubgraph> subgs;

public static int evCompareStrategy;
public static double findBestMatchAmong;
public static double candidatesToCheckPerc;

// number of communities to skip for each one read, for testing purposes (subtract 1)
private static final int skip = 1;

public static void main(String[] args) throws IOException, InterruptedException {
if (args.length != 6) {
System.out.println("usage: MergeOverlappingCommunities numThreads walltimeSeconds edgeOverlapThreshold nodeOverlapThreshold evCompareStrategy findBestMatchAmong");
System.out.println("evCompareStrategy: 0 (measure vs. larger) | 1 (measure vs. average)");
System.out.println("findBestMatchAmong: [0, 1] (% of possible merging candidates to check before picking the best, but at least 1)");
if (args.length != 8) {
System.out.println("usage: MergeOverlappingCommunities <graph> <communities> <numThreads> <walltimeSeconds> <edgeOverlapThreshold> <nodeOverlapThreshold> <evCompareStrategy> <candidatesToCheckPerc>");
System.out.println("\tgraph: path to Metis graph");
System.out.println("\tcommunities: path to list of communities");
System.out.println("\tnumThreads: number of threads searching for mergeable communities to run in parallel");
System.out.println("\twalltimeSeconds: number of seconds after which to halt above threads");
System.out.println("\tedgeOverlapThreshold: A float. Parameter for a heuristic used to determine whether two communities should be merged.");
System.out.println("\tnodeOverlapThreshold: Also a float and a parameter for a heuristic.");
System.out.println("\tevCompareStrategy: 0 (ev delta is measured against the larger community) | 1 (measured vs. average of both)");
System.out.println("\tcandidatesToCheckPerc: in [0, 1] (% of possible merging candidates to check before picking the best, but at least 1)");
System.exit(1);
} else {
numThreads = Integer.parseInt(args[0]);
walltime = Integer.parseInt(args[1]);
edgeOverlapThreshold = Double.parseDouble(args[2]);
nodeOverlapThreshold = Double.parseDouble(args[3]);
evCompareStrategy = Integer.parseInt(args[4]);
findBestMatchAmong = Integer.parseInt(args[5]);
pathToGraph = args[0];
pathToCommunities = args[1];
numThreads = Integer.parseInt(args[2]);
walltime = Integer.parseInt(args[3]);
edgeOverlapThreshold = Double.parseDouble(args[4]);
nodeOverlapThreshold = Double.parseDouble(args[5]);
evCompareStrategy = Integer.parseInt(args[6]);
candidatesToCheckPerc = Integer.parseInt(args[7]);
}

Runtime.getRuntime().addShutdownHook(new Thread(new ShutdownHook()));
Expand All @@ -53,13 +64,13 @@ public static void main(String[] args) throws IOException, InterruptedException
gr.setInputFormat(new GraphReader.Metis());
gr.setReturnFormat(new GraphReader.List());

Graph pokec = gr.fromFile("resources/pokec.metis");
Graph pokec = gr.fromFile(pathToGraph);

gr.setReturnFormat(new GraphReader.Subgraph());
gr.setInputFormat(new GraphReader.NodeListWithEvs(pokec));

subgs = new ArrayList<>();
try (BufferedReader br = new BufferedReader(new FileReader(new File("resources/communitiesWithEvs.txt")))) {
try (BufferedReader br = new BufferedReader(new FileReader(new File(pathToCommunities)))) {
String line;
if(skip != 1)
System.err.println("WARNING: SKIPPING " + (skip-1) + " COMMUNITIES FOR EACH ONE READ");
Expand Down Expand Up @@ -87,12 +98,11 @@ public static void main(String[] args) throws IOException, InterruptedException
&& candidate.edgesOverlapping(edgeOverlapThreshold)
&& candidate.getDelta() >= evDeltaThreshold) {
index.update(candidate);
evImprovement += candidate.getDelta();
numMerged++;
printMerge(candidate);
updateEvDeltaCounter(candidate.getDelta());
break;
}
}

candidate.unlock();
}
});
Expand All @@ -105,7 +115,7 @@ public static void main(String[] args) throws IOException, InterruptedException
}

Thread.sleep(1000*walltime);
System.out.println("exceeded walltime, stopping...");
System.out.println("exceeded walltime, stopping threads...");
walltimeExceeded = true;

for (int i = 0; i < threads.length; i++)
Expand All @@ -126,18 +136,18 @@ public void run() {
System.out.println("#merges: " + numMerged);
System.out.println("merges/s: " + (numMerged / (((System.currentTimeMillis() - stime)) / 1000d)));
System.out.println("avg ev improvement: " + (evImprovement / (2 * numMerged)));
System.out.println("remaining items: " + subgs.size());
System.out.println("remaining communities: " + subgs.size());

System.out.println("writing new evs...");
try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newEigenvalues-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) {
try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./mergedEigenvalues-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) {
for(InducedSubgraph subg : subgs)
bw.write(subg.getEigenvalue() +"\n");
} catch (IOException e) {
e.printStackTrace();
}

System.out.println("writing new nodelists...");
try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newCommunities-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) {
try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./mergedCommunities-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) {
for(InducedSubgraph subg : subgs) {
for (int i : subg.toNodeList())
bw.write(i +" ");
Expand All @@ -160,4 +170,9 @@ private synchronized static void printMerge(MergeCandidate pair) {
System.out.println(gw.toGraphViz(pair));
System.out.println();
}

private synchronized static void updateEvDeltaCounter(double delta) {
evImprovement += delta;
numMerged++;
}
}
18 changes: 0 additions & 18 deletions src/tasks/tests.java

This file was deleted.

27 changes: 26 additions & 1 deletion test/graph/InducedSubgraphTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import java.io.IOException;
import java.util.ArrayList;

import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

class InducedSubgraphTest {
InducedSubgraph g;
Expand Down Expand Up @@ -65,4 +66,28 @@ void getEdgeOverlapPercent() throws IOException {

System.out.println(sub1.getEdgeOverlapPercent(sub2));
}

@Test
void testMerge() throws IOException {
GraphReader gr = new GraphReader();
gr.setInputFormat(new GraphReader.Metis());
gr.setReturnFormat(new GraphReader.Matrix());
Graph main = gr.fromFile(new File("testResources/medium.metis"));

gr.setInputFormat(new GraphReader.NodeList(main));
gr.setReturnFormat(new GraphReader.Subgraph());
InducedSubgraph sg1 = (InducedSubgraph) gr.fromFile("testResources/medium.nl");
InducedSubgraph sg2 = (InducedSubgraph) gr.fromFile("testResources/medium.nl.2");

InducedSubgraph merged = sg1.merge(sg2);
int[] nodes = {0, 1, 2, 3, 10, 13, 16};
ArrayList<Integer> whyisthissocomplicated = new ArrayList<>();
for(int i : nodes) {
assertTrue(merged.toNodeList().contains(i));
whyisthissocomplicated.add(i);
}

InducedSubgraph test = new InducedSubgraph(main, whyisthissocomplicated);
assertEquals(test.getEigenvalue(), merged.getEigenvalue(), 0.0000001);
}
}
50 changes: 48 additions & 2 deletions test/io/GraphWriterTest.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package io;

import graph.Graph;
import graph.InducedSubgraph;
import index.MergeCandidate;
import org.junit.jupiter.api.Test;

import java.io.IOException;

import static org.junit.jupiter.api.Assertions.*;
import java.util.ArrayList;

class GraphWriterTest {

Expand All @@ -18,4 +19,49 @@ void testString() throws IOException {
gr.setInputFormat(new GraphReader.NodeList(g));
System.out.println(gr.fromFile("testResources/medium.nl"));
}

@Test
void testGraphViz() throws IOException {
GraphWriter gw = new GraphWriter();
gw.setFormat(new GraphWriter.Metis());

GraphReader gr = new GraphReader();
gr.setInputFormat(new GraphReader.Metis());
gr.setReturnFormat(new GraphReader.List());
Graph g = gr.fromFile("testResources/medium.metis");

gr.setInputFormat(new GraphReader.NodeList(g));
gr.setReturnFormat(new GraphReader.Subgraph());
InducedSubgraph a = (InducedSubgraph) gr.fromFile("testResources/medium.nl");
InducedSubgraph b = (InducedSubgraph) gr.fromFile("testResources/medium.nl.2");

ArrayList<InducedSubgraph> subs = new ArrayList<>();
subs.add(a);
subs.add(b);

MergeCandidate mc = new MergeCandidate(a,subs);
mc.next();
System.out.println(gw.toGraphViz(mc));

// pt 2

gr.setInputFormat(new GraphReader.Metis());
gr.setReturnFormat(new GraphReader.List());
g = gr.fromFile("testResources/small.metis");

gr.setInputFormat(new GraphReader.NodeList(g));
gr.setReturnFormat(new GraphReader.Subgraph());
a = (InducedSubgraph) gr.fromFile("testResources/small1.nl");
b = (InducedSubgraph) gr.fromFile("testResources/small2.nl");

subs = new ArrayList<>();
subs.add(a);
subs.add(b);

mc = new MergeCandidate(a,subs);
mc.next();
gw = new GraphWriter();
System.out.println(g.hasEdge(5, 3));
System.out.println(gw.toGraphViz(mc));
}
}

0 comments on commit c96863e

Please sign in to comment.