Skip to content

Commit

Permalink
Removed some critical synchronization points (issue #56)
Browse files Browse the repository at this point in the history
  • Loading branch information
aecio committed Aug 27, 2016
1 parent f47f2dd commit 4201a6d
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 76 deletions.
24 changes: 13 additions & 11 deletions src/main/java/focusedCrawler/link/BipartiteGraphManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,27 @@ public class BipartiteGraphManager {

private int count = 0;

//Data structure for stop conditions //////////////////////////
private int maxPages = 100; //Maximum number of pages per each domain
private HashMap<String, Integer> domainCounter;//Count number of pages for each domain
///////////////////////////////////////////////////////////////
// Data structure for stop conditions //////////////////////////
private int maxPages = 100; // Maximum number of pages per each domain
private HashMap<String, Integer> domainCounter;// Count number of pages for each domain
///////////////////////////////////////////////////////////////

private final int pagesToCommit = 100;

public BipartiteGraphManager(FrontierManager frontierManager, BipartiteGraphRepository rep, LinkClassifier outlinkClassifier, LinkClassifier backlinkClassifier) {
public BipartiteGraphManager(FrontierManager frontierManager,
BipartiteGraphRepository rep,
LinkClassifier outlinkClassifier,
LinkClassifier backlinkClassifier) {
this.frontierManager = frontierManager;
this.outlinkClassifier = outlinkClassifier;
this.backlinkClassifier = backlinkClassifier;
this.rep = rep;
this.domainCounter = new HashMap<String, Integer>();
}

public void setMaxPages(int max){
this.maxPages = max;
}
public void setMaxPages(int max) {
this.maxPages = max;
}

public void setBacklinkSurfer(BacklinkSurfer surfer){
this.surfer = surfer;
Expand All @@ -93,12 +96,11 @@ public void setOutlinkClassifier(LinkClassifier classifier){
this.outlinkClassifier = classifier;
}


public BipartiteGraphRepository getRepository(){
return this.rep;
}

public void insertOutlinks(Page page) throws IOException, FrontierPersistentException, LinkClassifierException {
public synchronized void insertOutlinks(Page page) throws IOException, FrontierPersistentException, LinkClassifierException {

PaginaURL parsedPage = page.getPageURL();
parsedPage.setRelevance(page.getRelevance());
Expand Down Expand Up @@ -152,7 +154,7 @@ public void insertOutlinks(Page page) throws IOException, FrontierPersistentExce
count++;
}

public void insertBacklinks(Page page) throws IOException, FrontierPersistentException, LinkClassifierException{
public synchronized void insertBacklinks(Page page) throws IOException, FrontierPersistentException, LinkClassifierException{
URL url = page.getURL();
BackLinkNeighborhood[] links = rep.getBacklinks(url);
if(links == null || (links != null && links.length < 10)){
Expand Down
71 changes: 30 additions & 41 deletions src/main/java/focusedCrawler/link/LinkStorage.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.atomic.AtomicInteger;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -67,40 +68,40 @@
* @author Luciano Barbosa
* @version 1.0
*/

public class LinkStorage extends StorageDefault{

public static final Logger logger = LoggerFactory.getLogger(LinkStorage.class);


private FrontierManager frontierManager;

private BipartiteGraphManager graphManager;
public class LinkStorage extends StorageDefault {

private OnlineLearning onlineLearning;
public static final Logger logger = LoggerFactory.getLogger(LinkStorage.class);

private int numberOfPages = 0;
private final boolean getBacklinks;
private final boolean getOutlinks;
private final int learnLimit;

private int numberOfBacklink = 0;
private final FrontierManager frontierManager;
private final BipartiteGraphManager graphManager;
private final OnlineLearning onlineLearning;

private long totalTime = 0;

private boolean getBacklinks = false;

private boolean getOutlinks = false;

private int learnLimit = 10;
private AtomicInteger numberOfPages = new AtomicInteger(0);
private AtomicInteger numberOfBacklink = new AtomicInteger(0);

public LinkStorage(LinkStorageConfig config,
BipartiteGraphManager manager,
FrontierManager frontierManager)
throws IOException {
this(config, manager, frontierManager, null);
}

public LinkStorage(LinkStorageConfig config,
BipartiteGraphManager manager,
FrontierManager frontierManager,
OnlineLearning onlineLearning) throws IOException {
this.frontierManager = frontierManager;
this.graphManager = manager;
this.getBacklinks = config.getBacklinks();
this.getOutlinks = config.getOutlinks();
this.onlineLearning = onlineLearning;
this.learnLimit = config.getLearningLimit();
}

public void close(){
logger.info("Shutting down GraphManager...");
graphManager.getRepository().close();
Expand All @@ -109,19 +110,14 @@ public void close(){
logger.info("done.");
}

public void setOnlineLearning(OnlineLearning onlineLearning, int learnLimit) {
this.onlineLearning = onlineLearning;
this.learnLimit = learnLimit;
}

/**
* This method inserts links from a given page into the frontier
*
* @param obj
* Object - page containing links
* @return Object
*/
public synchronized Object insert(Object obj) throws StorageException {
public Object insert(Object obj) throws StorageException {
if(obj instanceof Page) {
return insert((Page) obj);
}
Expand Down Expand Up @@ -167,16 +163,15 @@ public void insert(SitemapXmlHandler.SitemapData sitemapData) {

public Object insert(Page page) throws StorageException {

long initialTime = System.currentTimeMillis();
numberOfPages++;
int numberOfPages = this.numberOfPages.incrementAndGet();

try {

if (getBacklinks && page.isAuth()) {
logger.info(">>>>>GETTING BACKLINKS:" + page.getURL().toString());
graphManager.insertBacklinks(page);
numberOfBacklink++;
logger.info("TOTAL BACKLINKS:" + numberOfBacklink);
numberOfBacklink.incrementAndGet();
logger.info("TOTAL BACKLINKS:" + numberOfBacklink.get());
}

if (onlineLearning != null && numberOfPages % learnLimit == 0) {
Expand Down Expand Up @@ -209,11 +204,6 @@ public Object insert(Page page) throws StorageException {
throw new StorageException(ex.getMessage(), ex);
}

long finalTime = System.currentTimeMillis();
totalTime = totalTime + (finalTime - initialTime);
double average = totalTime / numberOfPages;
logger.info("\n> TOTAL PAGES:" + numberOfPages + "\n> TOTAL TIME:" + (finalTime - initialTime) + "\n> AVERAGE:" + average);

return null;
}

Expand Down Expand Up @@ -270,16 +260,15 @@ public static Storage createLinkStorage(String configPath, String seedFile,

BipartiteGraphManager manager = createBipartiteGraphManager(config, linkClassifier, frontierManager, graphRep);

LinkStorage linkStorage = new LinkStorage(config, manager, frontierManager);

LinkStorage linkStorage;
if (config.isUseOnlineLearning()) {
LinkNeighborhoodWrapper wrapper = new LinkNeighborhoodWrapper(stoplist);

LinkClassifierBuilder cb = new LinkClassifierBuilder(dataPath, graphRep, stoplist, wrapper, frontierManager.getFrontier());

logger.info("ONLINE LEARNING:" + config.getOnlineMethod());
OnlineLearning onlineLearning = new OnlineLearning(frontierManager.getFrontier(), manager, cb, config.getOnlineMethod(), dataPath + "/" + config.getTargetStorageDirectory());
linkStorage.setOnlineLearning(onlineLearning, config.getLearningLimit());
logger.info("ONLINE LEARNING:" + config.getOnlineMethod());
linkStorage = new LinkStorage(config, manager, frontierManager, onlineLearning);
} else {
linkStorage = new LinkStorage(config, manager, frontierManager);
}

return linkStorage;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/focusedCrawler/target/TargetStorage.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public TargetStorage(TargetClassifier targetClassifier,
/**
* Inserts a page into the repository.
*/
public synchronized Object insert(Object obj) throws StorageException {
public Object insert(Object obj) throws StorageException {
Page page = (Page) obj;

if (config.isEnglishLanguageDetectionEnabled()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ private PrintWriter createBufferedWriter(String file) throws FileNotFoundExcepti
return new PrintWriter(bos, autoFlush);
}

public void countPage(Page page, boolean isRelevant, double prob) {
public synchronized void countPage(Page page, boolean isRelevant, double prob) {
long currentTime = System.currentTimeMillis();
totalOfPages++;
fCrawledPages.printf("%s\t%d\n", page.getIdentifier(), (currentTime));
Expand Down
44 changes: 22 additions & 22 deletions src/main/java/focusedCrawler/util/string/AbstractStopList.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,45 +82,45 @@ public AbstractStopList( String excecoes[],String irrelevantes[],String compleme
if( realizarQuickSort ) realizarQuickSort();
}

public synchronized String[] getExcecoes() {
protected String[] getExcecoes() {
return excecoes;
}

public synchronized void setExcecoes( String array[] ) {
protected void setExcecoes( String array[] ) {
excecoes = array;
if( excecoes != null )
quickSortString( excecoes );
}

public synchronized String[] getIrrelevantes() {
protected String[] getIrrelevantes() {
return irrelevantes;
}

public synchronized void setIrrelevantes( String array[] ) {
protected void setIrrelevantes( String array[] ) {
irrelevantes = array;
if( irrelevantes != null )
quickSortString( irrelevantes );
}

public synchronized void setComplementares( String array[] ) {
protected void setComplementares( String array[] ) {
complementares = array;
if( complementares != null )
quickSortString( complementares );
}

public synchronized void setPrefixos( String array[] ) {
protected void setPrefixos( String array[] ) {
prefixos = array;
if( prefixos != null )
quickSortString( prefixos );
}

public synchronized void setSufixos( String array[] ) {
protected void setSufixos( String array[] ) {
sufixos = array;
if( sufixos != null )
quickSortString( sufixos );
}

protected synchronized void realizarQuickSort() {
protected void realizarQuickSort() {
if( excecoes != null )
quickSortString( excecoes );
if( irrelevantes != null )
Expand All @@ -133,11 +133,11 @@ protected synchronized void realizarQuickSort() {
quickSortString( sufixos );
}

protected synchronized void quickSortString( String str[] ) {
protected void quickSortString( String str[] ) {
quicksort_str( str,0,str.length-1 );
}

protected synchronized void quicksort_str( String str[],int left,int right ) {
protected void quicksort_str( String str[],int left,int right ) {
String pivot;
int l = left;
int r = right;
Expand All @@ -164,7 +164,7 @@ protected synchronized void quicksort_str( String str[],int left,int right ) {

// Funcao auxiliar do quicksort

protected synchronized void troque( String str[],int l,int r ) {
protected void troque( String str[],int l,int r ) {
String temp;
temp = str[l];
str[l] = str[r];
Expand All @@ -188,12 +188,12 @@ public boolean isStopWord(String word){
return isStop;
}

public synchronized boolean isIrrelevant( String palavra) {
public boolean isIrrelevant( String palavra) {
if( palavra == null )
return true;
palavra = palavra.trim();
int size = palavra.length();
if( pertenceAoArray( palavra,excecoes ))
if( pertenceAoArray( palavra, excecoes ))
return false;
if( size < MIN_LENGTH || size > MAX_LENGTH )
return true;
Expand Down Expand Up @@ -238,7 +238,7 @@ public synchronized boolean isIrrelevant( String palavra) {
/**
* Indica se uma string e formada apenas por numeros.
*/
public synchronized boolean apenasNumero( String palavra ) {
protected boolean apenasNumero( String palavra ) {
boolean numero = true;
int i = 0; char c;
int size = palavra.length();
Expand All @@ -253,7 +253,7 @@ public synchronized boolean apenasNumero( String palavra ) {
/**
* Indica se uma string e formada apenas por hifens.
*/
public synchronized boolean apenasHifen( String palavra ) {
protected boolean apenasHifen( String palavra ) {

boolean hifen = true;
int i = 0; char c;
Expand All @@ -270,7 +270,7 @@ public synchronized boolean apenasHifen( String palavra ) {
* Tenta identificar um conjunto de caracteres, no caso os numeros.
* >48 e <57 para 0..9
*/
public synchronized boolean eNumero( char c ) {
protected boolean eNumero( char c ) {
int value = (int) c;
return ( 48 <= value && value <= 57 );
}
Expand All @@ -281,7 +281,7 @@ public synchronized boolean eNumero( char c ) {
* >97 e <122 para A..Z
*/

public synchronized boolean eLetra( char c ) {
protected boolean eLetra( char c ) {
int value = (int)(Acentos.retirarAcentosANSI(""+c).charAt(0));
return ( 65 <= value && value <= 90 ) || ( 97 <= value && value <= 122 );
}
Expand All @@ -290,7 +290,7 @@ public synchronized boolean eLetra( char c ) {
* Indica se a palavra e formada apenas por numeros e hifens, sendo assim capaz
* de identificar os numero de telefone, CPF, CEP, etc.
*/
public synchronized boolean apenasNumeroEHifen( String palavra ) {
protected boolean apenasNumeroEHifen( String palavra ) {

boolean numero = false,hifen = false,outro = false;
int i = 0; char c;
Expand Down Expand Up @@ -329,7 +329,7 @@ else if( c == '-' )
*/

public synchronized boolean possuiCaracteresIrrelevantes(String palavra) {
private boolean possuiCaracteresIrrelevantes(String palavra) {

int codigoANSI;
int size = palavra.length(); // este size evita que o metodo length() seja chamado a cada loop.
Expand All @@ -347,7 +347,7 @@ public synchronized boolean possuiCaracteresIrrelevantes(String palavra) {
}

/** Busca binária em um array de Strings */
protected synchronized boolean pertenceAoArray(String palavra, String array[]) {
protected boolean pertenceAoArray(String palavra, String array[]) {
boolean achou = false;
if (array != null) {
int pos;
Expand All @@ -366,7 +366,7 @@ else if (array[pos].compareTo(palavra) < 0)
}

/** Verifica os prefixos. */
protected synchronized boolean possuiPrefixos(String palavra, String term[]) {
protected boolean possuiPrefixos(String palavra, String term[]) {
boolean achou = false;
if (term != null) {
for (int i = 0; i < term.length && !achou; i++)
Expand All @@ -378,7 +378,7 @@ protected synchronized boolean possuiPrefixos(String palavra, String term[]) {

/** Verifica os sufixos. */

protected synchronized boolean possuiSufixos( String palavra, String term[] ) {
protected boolean possuiSufixos( String palavra, String term[] ) {

boolean achou = false;
if (term != null) {
Expand Down

0 comments on commit 4201a6d

Please sign in to comment.