diff --git a/src/graph/Graph.java b/src/graph/Graph.java index bfd52a7..38c5c53 100644 --- a/src/graph/Graph.java +++ b/src/graph/Graph.java @@ -14,7 +14,7 @@ * abstract base class, doesn't support adding nodes, adding edges is only supported for reading from files etc * pseudo-immutable */ -public abstract class Graph { +public abstract class Graph implements Comparable { int n; int e; double ev = -1; @@ -273,4 +273,8 @@ public double getNodeOverlapPercent(Graph other) { return (double) getOverlappingNodes(other).size() / n; } + @Override + public int compareTo(Graph other) { + return this.n - other.n; + } } diff --git a/src/graph/InducedSubgraph.java b/src/graph/InducedSubgraph.java index 3a73e40..f97662d 100644 --- a/src/graph/InducedSubgraph.java +++ b/src/graph/InducedSubgraph.java @@ -159,7 +159,7 @@ public boolean hasOriginalNode(int originalNodeId) { } public InducedSubgraph merge(InducedSubgraph other) { - assert other.g == this.g : "cannot merge induced subgraphs stemming from different main graphs"; + assert other.g == this.g : "cannot merge induced subgraphs from different main graphs"; ArrayList nodesOfThis = toNodeList(); ArrayList nodesOfOther = other.toNodeList(); @@ -177,4 +177,16 @@ public InducedSubgraph merge(InducedSubgraph other) { public ReentrantLock getLock() { return lock; } + + /** + * @return size of this induced subgraph... DEBUG + */ + @Override + public String toString() { + return n + ""; + } + + public Graph getOriginalGraph() { + return g; + } } diff --git a/src/index/InverseIndex.java b/src/index/InverseIndex.java index 7b77ecd..c79115e 100644 --- a/src/index/InverseIndex.java +++ b/src/index/InverseIndex.java @@ -4,6 +4,7 @@ import graph.InducedSubgraph; import java.util.ArrayList; +import java.util.Collections; public class InverseIndex { private ArrayList[] index; @@ -22,55 +23,59 @@ public InverseIndex(Graph main, ArrayList subgraphs) { for (InducedSubgraph s : subgraphs) for (int node : s.toNodeList()) index[node].add(s); - } - public ArrayList getGraphsHaving(int node) { - return index[node]; + for (ArrayList al : index) + Collections.sort(al); } /** * find two unlocked overlapping subgraphs, lock them, - * @return new OverlappingPair with two random overlapping subgraphs + * @return new MergeCandidate with two random overlapping subgraphs */ - public OverlappingPair getRandomPair() { + public MergeCandidate getCandidate() { synchronized (this) { while (true) { ArrayList overlappingSubgraphs = index[(int) Math.floor(Math.random() * index.length)]; - int randIndex1 = (int) Math.floor(Math.random() * overlappingSubgraphs.size()); - int randIndex2 = (int) Math.floor(Math.random() * overlappingSubgraphs.size()); - - if (randIndex1 == randIndex2) + if(overlappingSubgraphs.size() == 0) continue; + int randIndex1 = (int) Math.floor(Math.random() * overlappingSubgraphs.size()); + InducedSubgraph a = overlappingSubgraphs.get(randIndex1); - InducedSubgraph b = overlappingSubgraphs.get(randIndex2); if (!a.getLock().tryLock()) { continue; } - if (!b.getLock().tryLock()) { - a.getLock().unlock(); - continue; - } - return new OverlappingPair(a, b); + return new MergeCandidate(a, overlappingSubgraphs); } } } /** - * remove pair from index, add merged graph in their places + * remove pair from index, add merged graph in their places, sustain sorting * @param pair */ - public void update(OverlappingPair pair) { + public void update(MergeCandidate pair) { synchronized (this) { for (int i = 0; i < pair.a.getNodeCount(); i++) - index[pair.a.getOriginalNodeID(i)].remove(pair.a); + synchronized (index[pair.a.getOriginalNodeID(i)]) { + index[pair.a.getOriginalNodeID(i)].remove(pair.a); + } for (int i = 0; i < pair.b.getNodeCount(); i++) - index[pair.b.getOriginalNodeID(i)].remove(pair.b); - for (int i = 0; i < pair.merged.getNodeCount(); i++) - index[pair.merged.getOriginalNodeID(i)].add(pair.merged); + synchronized (index[pair.b.getOriginalNodeID(i)]) { + index[pair.b.getOriginalNodeID(i)].remove(pair.b); + } + for (int i = 0; i < pair.merged.getNodeCount(); i++) { + int originalId = pair.merged.getOriginalNodeID(i); + synchronized (index[originalId]) { + int insertionIndex = 0; + while (insertionIndex < index[originalId].size() && pair.merged.compareTo(index[originalId].get(insertionIndex)) >= 0) + insertionIndex++; + index[originalId].add(insertionIndex, pair.merged); + } + } source.remove(pair.a); source.remove(pair.b); diff --git a/src/index/MergeCandidate.java b/src/index/MergeCandidate.java new file mode 100644 index 0000000..e21cba2 --- /dev/null +++ b/src/index/MergeCandidate.java @@ -0,0 +1,79 @@ +package index; + +import graph.InducedSubgraph; +import tasks.MergeOverlappingCommunities; + +import java.util.ArrayList; + +public class MergeCandidate { + public InducedSubgraph a; + public InducedSubgraph b; + public InducedSubgraph merged; + + private final ArrayList source; + private int currentIndex; + + // a is already locked + public MergeCandidate(InducedSubgraph a, ArrayList source) { + currentIndex = 0; + this.a = a; + this.source = source; + } + + public boolean nodesOverlapping(double moreThan) { + return a.getNodeOverlapPercent(b) >= moreThan; + } + + public boolean edgesOverlapping(double moreThan) { + return a.getEdgeOverlapPercent(b) >= moreThan; + } + + public void createMerged() { + merged = a.merge(b); + } + + /** + * find next suitable graph, that is, not a and not locked; locks graph, unlocks previous graph + * @return true if suitable next was found, false if end of list has been reached + */ + public boolean next() { + synchronized (source) { + while (++currentIndex < source.size()) + if (source.get(currentIndex) != a && source.get(currentIndex).getLock().tryLock()) { + if (b != null) + b.getLock().unlock(); + b = source.get(currentIndex); + merged = null; + return true; + } + return false; + } + } + + public double getMergedEv() { + if (merged == null) + createMerged(); + return merged.getEigenvalue(); + } + + public double getDelta() { + switch (MergeOverlappingCommunities.evCompareStrategy) { + case 1: // delta := 2 * merged - a - b + return 2 * getMergedEv() - a.getEigenvalue() - b.getEigenvalue(); + default: // delta := merged - larger of both + return getMergedEv() - (a.getNodeCount() > b.getNodeCount() ? a.getEigenvalue() : b.getEigenvalue()); + + } + } + + public void unlock() { + assert a.getLock().isLocked() : "attempt to unlock unlocked lock"; + a.getLock().unlock(); + + if (b != null) { + assert b.getLock().isLocked() : "attempt to unlock unlocked lock"; + b.getLock().unlock(); + } + } + +} diff --git a/src/index/OverlappingPair.java b/src/index/OverlappingPair.java deleted file mode 100644 index 697fce8..0000000 --- a/src/index/OverlappingPair.java +++ /dev/null @@ -1,42 +0,0 @@ -package index; - -import graph.InducedSubgraph; - -public class OverlappingPair { - InducedSubgraph a; - InducedSubgraph b; - InducedSubgraph merged; - - public OverlappingPair(InducedSubgraph a, InducedSubgraph b) { - this.a = a; - this.b = b; - } - - public boolean nodesOverlapping(double moreThan) { - return a.getNodeOverlapPercent(b) >= moreThan; - } - - public boolean edgesOverlapping(double moreThan) { - return a.getEdgeOverlapPercent(b) >= moreThan; - } - - public void createMerged() { - merged = a.merge(b); - } - - public double getMergedEv() { - if (merged == null) - createMerged(); - return merged.getEigenvalue(); - } - - public double getDelta() { - return (getMergedEv() - a.getEigenvalue()) + (getMergedEv() - b.getEigenvalue()) / 2; - } - - public void unlock() { - assert a.getLock().isLocked() && b.getLock().isLocked() : "attempt to unlock unlocked lock"; - a.getLock().unlock(); - b.getLock().unlock(); - } -} diff --git a/src/io/GraphWriter.java b/src/io/GraphWriter.java index bac432d..a0d3af4 100644 --- a/src/io/GraphWriter.java +++ b/src/io/GraphWriter.java @@ -1,11 +1,13 @@ package io; import graph.Graph; +import index.MergeCandidate; import java.io.*; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.HashSet; public class GraphWriter { OutputFormat format; @@ -48,6 +50,67 @@ public void checkFormatSet() { throw new IllegalStateException("cannot read graph: input format is unset"); } + // this method is an abomination + public String toGraphViz(MergeCandidate candidate) { + final String colorA = "cyan3"; + final String colorB = "crimson"; + final String colorC = "darkgoldenrod"; + final String colorEdges = "darkolivegreen"; + final String overlap = "prism"; + final int penwidth = 3; + + HashSet a = new HashSet<>(candidate.a.toNodeList()); + HashSet b = new HashSet<>(candidate.b.toNodeList()); + HashSet c = new HashSet<>(candidate.b.toNodeList()); + a.removeAll(b); + c.removeAll(candidate.a.toNodeList()); + b.removeAll(c); + Graph og = candidate.a.getOriginalGraph(); + + StringBuilder sb = new StringBuilder(String.format("strict graph {\n\tedge [penwidth=%d]\n\tnode [style=filled]\n\toverlap=\"%s\"\n\toutputorder=\"edgesfirst\"\n", penwidth, overlap)); + + for (int node : b) { + sb.append(String.format("\t%d [color=%s]\n", node, colorB)); + for (int neighbor : a) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorA)); + for (int neighbor : b) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorB)); + for (int neighbor : c) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorC)); + } + + for (int node : a) { + sb.append(String.format("\t%d [color=%s]\n", node, colorA)); + for (int neighbor : a) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorA)); + for (int neighbor : b) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorA)); + for (int neighbor : c) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorEdges)); + } + + for (int node : c) { + sb.append(String.format("\t%d [color=%s]\n", node, colorC)); + for (int neighbor : a) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorEdges)); + for (int neighbor : b) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorC)); + for (int neighbor : c) + if (og.hasEdge(node, neighbor)) + sb.append(String.format("\t%d -- %d [color=%s]\n", node, neighbor, colorC)); + } + + return sb.append("}").toString(); + } + private interface OutputFormat { /** * write graph g with specific output format to w diff --git a/src/tasks/MergeOverlappingCommunities.java b/src/tasks/MergeOverlappingCommunities.java index 614d3a2..1bd1a0c 100644 --- a/src/tasks/MergeOverlappingCommunities.java +++ b/src/tasks/MergeOverlappingCommunities.java @@ -2,18 +2,19 @@ import graph.Graph; import graph.InducedSubgraph; +import index.MergeCandidate; import index.InverseIndex; -import index.OverlappingPair; import io.GraphReader; +import io.GraphWriter; import java.io.*; import java.util.ArrayList; public class MergeOverlappingCommunities { private static long stime; - private static int numMerged = 0; + private static long numMerged = 0; private static double evImprovement = 0; - private static int numPairs = 0; + private static long numPairs = 0; private static double edgeOverlapThreshold; private static double nodeOverlapThreshold; @@ -25,49 +26,28 @@ public class MergeOverlappingCommunities { private static InverseIndex index; private static ArrayList subgs; + public static int evCompareStrategy; + public static double findBestMatchAmong; + + // number of communities to skip for each one read, for testing purposes (subtract 1) + private static final int skip = 1; + public static void main(String[] args) throws IOException, InterruptedException { - if (args.length != 4) { - System.out.println("usage: MergeOverlappingCommunities numThreads walltimeSeconds edgeOverlapThreshold nodeOverlapThreshold"); + if (args.length != 6) { + System.out.println("usage: MergeOverlappingCommunities numThreads walltimeSeconds edgeOverlapThreshold nodeOverlapThreshold evCompareStrategy findBestMatchAmong"); + System.out.println("evCompareStrategy: 0 (measure vs. larger) | 1 (measure vs. average)"); + System.out.println("findBestMatchAmong: [0, 1] (% of possible merging candidates to check before picking the best, but at least 1)"); System.exit(1); } else { numThreads = Integer.parseInt(args[0]); walltime = Integer.parseInt(args[1]); edgeOverlapThreshold = Double.parseDouble(args[2]); nodeOverlapThreshold = Double.parseDouble(args[3]); + evCompareStrategy = Integer.parseInt(args[4]); + findBestMatchAmong = Integer.parseInt(args[5]); } - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - float runtime = (System.currentTimeMillis() - stime) / 1000f; - System.out.println("exiting..."); - System.out.println("time: " + runtime); - System.out.println("#pairs: " + numPairs); - System.out.println("pairs/s: " + (numPairs / (((System.currentTimeMillis() - stime)) / 1000d))); - System.out.println("#merges: " + numMerged); - System.out.println("merges/s: " + (numMerged / (((System.currentTimeMillis() - stime)) / 1000d))); - System.out.println("avg ev improvement: " + (evImprovement / (2 * numMerged))); - System.out.println("remaining items: " + subgs.size()); - - System.out.println("writing new evs..."); - try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newEigenvalues-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { - for(InducedSubgraph subg : subgs) - bw.write(subg.getEigenvalue() +"\n"); - } catch (IOException e) { - e.printStackTrace(); - } - - System.out.println("writing new nodelists..."); - try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newCommunities-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { - for(InducedSubgraph subg : subgs) { - for (int i : subg.toNodeList()) - bw.write(i +" "); - bw.write("\n"); - } - } catch (IOException e) { - e.printStackTrace(); - } - - System.out.println("done."); - })); + Runtime.getRuntime().addShutdownHook(new Thread(new ShutdownHook())); GraphReader gr = new GraphReader(); gr.setInputFormat(new GraphReader.Metis()); @@ -81,7 +61,12 @@ public static void main(String[] args) throws IOException, InterruptedException subgs = new ArrayList<>(); try (BufferedReader br = new BufferedReader(new FileReader(new File("resources/communitiesWithEvs.txt")))) { String line; + if(skip != 1) + System.err.println("WARNING: SKIPPING " + (skip-1) + " COMMUNITIES FOR EACH ONE READ"); + int i = 0; while ((line = br.readLine()) != null) { + if (i++ % skip != 0) + continue; InducedSubgraph g = (InducedSubgraph) gr.fromString(line); subgs.add(g); } @@ -94,19 +79,21 @@ public static void main(String[] args) throws IOException, InterruptedException for (int i = 0; i < threads.length; i++) { threads[i] = new Thread(() -> { while (!walltimeExceeded) { - OverlappingPair pair = index.getRandomPair(); + MergeCandidate candidate = index.getCandidate(); numPairs++; - if (pair.nodesOverlapping(nodeOverlapThreshold) - && pair.edgesOverlapping(edgeOverlapThreshold) - && pair.getDelta() >= evDeltaThreshold) { - index.update(pair); - evImprovement += pair.getDelta(); - numMerged++; - - } else { - pair.unlock(); + while(candidate.next()) { // TODO IMPLEMENT LOOKING FOR BEST MATCH AMONG N (LOCKING PROBLEM!) + if (candidate.nodesOverlapping(nodeOverlapThreshold) + && candidate.edgesOverlapping(edgeOverlapThreshold) + && candidate.getDelta() >= evDeltaThreshold) { + index.update(candidate); + evImprovement += candidate.getDelta(); + numMerged++; + printMerge(candidate); + break; + } } + candidate.unlock(); } }); } @@ -118,12 +105,59 @@ public static void main(String[] args) throws IOException, InterruptedException } Thread.sleep(1000*walltime); - //while((System.currentTimeMillis() - stime) < walltime * 1000) - // continue; System.out.println("exceeded walltime, stopping..."); walltimeExceeded = true; for (int i = 0; i < threads.length; i++) threads[i].join(); } + + /** + * prints stats and saves state of communities, eigenvalues + */ + private static class ShutdownHook implements Runnable { + @Override + public void run() { + float runtime = (System.currentTimeMillis() - stime) / 1000f; + System.out.println("exiting..."); + System.out.println("time: " + runtime); + System.out.println("#pairs: " + numPairs); + System.out.println("pairs/s: " + (numPairs / (((System.currentTimeMillis() - stime)) / 1000d))); + System.out.println("#merges: " + numMerged); + System.out.println("merges/s: " + (numMerged / (((System.currentTimeMillis() - stime)) / 1000d))); + System.out.println("avg ev improvement: " + (evImprovement / (2 * numMerged))); + System.out.println("remaining items: " + subgs.size()); + + System.out.println("writing new evs..."); + try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newEigenvalues-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { + for(InducedSubgraph subg : subgs) + bw.write(subg.getEigenvalue() +"\n"); + } catch (IOException e) { + e.printStackTrace(); + } + + System.out.println("writing new nodelists..."); + try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(String.format("./newCommunities-edge%f-node%f.txt", edgeOverlapThreshold, nodeOverlapThreshold))))) { + for(InducedSubgraph subg : subgs) { + for (int i : subg.toNodeList()) + bw.write(i +" "); + bw.write("\n"); + } + } catch (IOException e) { + e.printStackTrace(); + } + + System.out.println("done."); + } + } + + private synchronized static void printMerge(MergeCandidate pair) { + System.out.println("MERGING:"); + System.out.println("\t(" + pair.a.getEigenvalue() + ") " + pair.a.toNodeList()); + System.out.println("+\t(" + pair.b.getEigenvalue() + ") " + pair.b.toNodeList()); + System.out.println("=>\t(" + pair.merged.getEigenvalue() + ") " + pair.merged.toNodeList()); + GraphWriter gw = new GraphWriter(); + System.out.println(gw.toGraphViz(pair)); + System.out.println(); + } } diff --git a/src/tasks/PrecalculateAllEVs.java b/src/tasks/PrecalculateAllEVs.java index 2bffd8b..a935498 100644 --- a/src/tasks/PrecalculateAllEVs.java +++ b/src/tasks/PrecalculateAllEVs.java @@ -8,27 +8,31 @@ public class PrecalculateAllEVs { public static void main(String[] args) throws IOException { - try(BufferedReader br = new BufferedReader(new FileReader(new File("resources/communities.txt"))); - BufferedWriter bw = new BufferedWriter(new FileWriter(new File("resources/communitiesWithEvs.txt")))) { - GraphReader gr = new GraphReader(); - gr.setInputFormat(new GraphReader.Metis()); - gr.setReturnFormat(new GraphReader.List()); - Graph pokec = gr.fromFile("resources/pokec.metis"); + // look for these files in resources folder, give filenames without .txt ending! + String[] files = {"NCe1n0", "NCe2n0"}; - gr.setReturnFormat(new GraphReader.Subgraph()); - gr.setInputFormat(new GraphReader.NodeList(pokec)); - InducedSubgraph subg; - double ev; + for (String file : files) + try(BufferedReader br = new BufferedReader(new FileReader(new File("resources/" + file + ".txt"))); + BufferedWriter bw = new BufferedWriter(new FileWriter(new File("resources/" + file + "WithEvs.txt")))) { + GraphReader gr = new GraphReader(); + gr.setInputFormat(new GraphReader.Metis()); + gr.setReturnFormat(new GraphReader.List()); + Graph pokec = gr.fromFile("resources/pokec.metis"); - String line; - while ((line = br.readLine()) != null) { - subg = (InducedSubgraph) gr.fromString(line); - ev = subg.getEigenvalue(); - bw.write(ev + ""); - for(int i : subg.toNodeList()) - bw.write(" " + i); - bw.write("\n"); + gr.setReturnFormat(new GraphReader.Subgraph()); + gr.setInputFormat(new GraphReader.NodeList(pokec)); + InducedSubgraph subg; + double ev; + + String line; + while ((line = br.readLine()) != null) { + subg = (InducedSubgraph) gr.fromString(line); + ev = subg.getEigenvalue(); + bw.write(ev + ""); + for(int i : subg.toNodeList()) + bw.write(" " + i); + bw.write("\n"); + } } - } } } diff --git a/src/tasks/tests.java b/src/tasks/tests.java new file mode 100644 index 0000000..3b4cf45 --- /dev/null +++ b/src/tasks/tests.java @@ -0,0 +1,18 @@ +package tasks; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +public class tests { + public static void main(String[] args) { + HashMap asd = new HashMap<>(); + String s = new String("asd"); + asd.put(s, 21); + asd.put(s, 22); + Set nodes = new HashSet<>(); + nodes.add(6); + nodes.add(6); + System.out.println(nodes); + } +}