Parallel execution of algorithms in NOESIS
Parallelism @ NOESIS
Parallelism @ NOESIS
Simple performance test using a Wikipedia GML network (available at http://spark-public.s3.amazonaws.com/sna/other/wikipedia.gml):
NETWORK STATISTICS
- Nodes: 27475
- Links: 85729
- 3 node attributes: id wikiid label
- 0 link attributes:
Degree distributions
- Out-degrees: [n=27475 min=0.0 max=565.0 avg=3.1202547770700635 dev=9.038219683086334]
- In-degrees: [n=27475 min=0.0 max=367.0 avg=3.1202547770700635 dev=8.99990229087909]
Node of maximum out-degree:
- out-degree: 565 out-links
- in-degree: 0 in-links
- id: 8436
- wikiid: 1807178
- label: List of mathematics articles (S)
Node of maximum in-degree:
- out-degree: 44 out-links
- in-degree: 367 in-links
- id: 10807
- wikiid: 7250299
- label: Geometry
Betweenness
[n=27475 min=2.0 max=2.1696583120297994E7 avg=79690.76047315753 dev=404883.4016065179]
Node of maximum betweenness:
- out-degree: 29 out-links
- in-degree: 82 in-links
- id: 12533
- wikiid: 5176
- label: Calculus
Time: 27442 ms using a conventional Core i5 laptop (vs. 56 seconds using a sequential implementation).
- Same experiment running igraph... 103 seconds
- Same experiment running NetworkX... 40 minutes !!!
UPDATE
UPDATE
Additional tests on a 2.67GHz Intel Core i7 920 desktop PC using different CPU schedulers:
- 71.6-71.9s @ SequentialScheduler
- 19.6-26.7s @ FutureScheduler(8)
- 16.7-20.1s @ WorkStealingScheduler(8)
- 16.7-19.4s @ FutureScheduler(32)
- 16.6-19.3s @ FutureScheduler(16)
- 16.5-17.8s @ ThreadPoolScheduler
- 16.4-17.1s @ WorkStealingScheduler(64)
- 16.4-17.0s @ WorkStealingScheduler(32)
- 16.4-16.6s @ WorkStealingScheduler(16)
igraph source code
igraph source code
from igraph import *
from plfit import *
import sys
# Read in the graph in GML format
g = read("wikipedia.gml",format="gml")
# Summary information about the graph
summary(g)
# Undirected degree distribution (the GML file itself is directed)
degrees = g.degree(mode="all")
print 'Degree distribution: [' + str(min(degrees)) + ',' + str(max(degrees)) + ']'
# In and out degrees separately
# use the degree() function and options for calculating directed degree
# see the documentation here: http://igraph.sourceforge.net/documentation.html
# In-degree
# MAX: Geometry
indegrees = g.indegree() # g.degree(mode="IN")
print 'Max in-degree: ', max(indegrees)
print '- Node: ', g.vs.select(_indegree = max(indegrees))["label"]
sys.stdout.flush()
# Out-degree
# MAX: List of mathematics articles
outdegrees = g.outdegree() # g.degree(mode="out")
print 'Max in-degree: ', max(outdegrees)
print '- Node: ', g.vs.select(_outdegree = max(outdegrees))["label"]
sys.stdout.flush()
# find undirected betweenness scores and then nodes with the max betweenness
# warning, can be slow with large graphs, you may consider betweenness.estimate instead
bb = g.betweenness(directed=True) # -> Calculus (1m 43s)
maxindex, maxvalue = max(enumerate(bb), key=operator.itemgetter(1))
print 'Max betweenness: ', maxvalue
print '- Node: ', g.vs[maxindex]["label"]
sys.stdout.flush()
NetworkX source code
NetworkX source code
# see http://networkx.lanl.gov/ for a tutorial on NetworkX
import networkx as nx
import sys
from plfit import *
# Installation: pyparsing @ http://sourceforge.net/projects/pyparsing/files/pyparsing/pyparsing-1.5.6/pyparsing-1.5.6.win32-py2.7.exe/download
# your GML file
filename = 'wikipedia.gml'
# read in the graph using networkX
G = nx.read_gml(filename,relabel=True)
# Graph size
num_nodes = G.number_of_nodes()
print 'number of nodes: ' + str(num_nodes)
num_edges = G.number_of_edges()
print 'number of edges: ' + str(num_edges)
sys.stdout.flush()
# Degree distribution
degrees = list(G.degree().values())
dmax = max(degrees)
dmin = min(degrees)
print 'Degree distribution: [' + str(dmin) + ',' + str(dmax) + ']'
sys.stdout.flush()
# In-degree
# MAX: Geometry
indegree = nx.in_degree_centrality(G)
maxkey,maxvalue= max(indegree.iteritems(), key=lambda x:x[1])
print 'Max in-degree: ' + str(maxkey) + ' ' + str(maxvalue)
sys.stdout.flush()
# Out-degree
# MAX: List of mathematics articles
outdegree = nx.out_degree_centrality(G)
inverse = [(value, key) for key, value in outdegree.items()]
print 'Max out-degree: ', max(inverse)[1], max(inverse)[0]
sys.stdout.flush()
# Betweenness centrality
# MAX = "Calculus"
# WARNING! Slow... 40 minutes for a 27475-node network with 85729 links !!!
between = nx.betweenness_centrality(G)
maxkey,maxvalue= max(between.iteritems(), key=lambda x:x[1])
print 'Max betweenness: ' + str(maxkey) + ' ' + str(maxvalue)
print ' - In-degree: ', indegree[maxkey]
print ' - Out-degree: ', outdegree[maxkey]
sys.stdout.flush()
NOESIS source code
NOESIS source code
package noesis.ui.console;
import java.io.*;
import ikor.parallel.*;
import ikor.parallel.scheduler.*;
import ikor.util.Benchmark;
import noesis.Network;
import noesis.algorithms.traversal.ConnectedComponents;
import noesis.Attribute;
import noesis.AttributeNetwork;
import noesis.analysis.NodeScore;
import noesis.analysis.structure.Betweenness;
import noesis.analysis.structure.InDegree;
import noesis.analysis.structure.OutDegree;
/**
* Network statistics.
*
* @author Fernando Berzal
*/
public class NetworkStats
{
public static void main(String[] args)
throws IOException
{
Scheduler.set ( new WorkStealingScheduler(16) ); // 8 (i5) vs. 16 (i7)
// Scheduler.set ( new FutureScheduler(16) );
// Scheduler.set ( new ThreadPoolScheduler() );
// Scheduler.set ( new SequentialScheduler() );
if (args.length==0) {
System.err.println("NOESIS Network Statistics:");
System.err.println();
System.err.println(" java noesis.ui.console.NetworkStats <file>");
} else {
Benchmark crono = new Benchmark();
crono.start();
Network net = NetworkUtilities.read(args[0]);
NetworkUtilities.printNetworkInformation(net);
// Degree distributions
OutDegree outDegrees = new OutDegree(net);
InDegree inDegrees = new InDegree(net);
outDegrees.compute();
inDegrees.compute();
System.out.println("Degree distributions");
System.out.println("- Out-degrees: "+outDegrees.getResult());
System.out.println("- In-degrees: "+inDegrees.getResult());
if (net instanceof AttributeNetwork) {
System.out.println("Node of maximum out-degree:");
printNode ( (AttributeNetwork) net, outDegrees.getResult().maxIndex());
System.out.println("Node of maximum in-degree:");
printNode ( (AttributeNetwork) net, inDegrees.getResult().maxIndex());
}
// Betweenness
Betweenness betweenness = new Betweenness(net);
betweenness.compute();
System.out.println("Betweenness");
System.out.println(betweenness.getResult());
if (net instanceof AttributeNetwork) {
System.out.println("Node of maximum betweenness:");
printNode ( (AttributeNetwork) net, betweenness.getResult().maxIndex());
}
crono.stop();
System.out.println();
System.out.println("Time: "+crono);
}
}
public static void printNode (AttributeNetwork net, int index)
{
if (index<net.size()) {
System.out.println("- out-degree: "+net.outDegree(index)+" out-links");
System.out.println("- in-degree: "+net.inDegree(index)+" in-links");
for (int i=0; i<net.getNodeAttributeCount(); i++) {
Attribute attribute = net.getNodeAttribute(i);
System.out.println("- "+attribute.getID()+": "+attribute.get(index));
}
}
}
}