【发布时间】:2014-03-10 05:35:37
【问题描述】:
状态:在方法中添加静态,变量队列和同步(crawler.class)解决了这个问题。谢谢大家!!
http://pastie.org/8724549#41-42,46,49,100-101,188-189,191
突出显示的方法/块是synchronized。
那个块/方法应该be accessed by one method at a particular time.。
应该是这样的 = 第一个线程进入方法,更新大小,所有其他人看到那个大小。更新了一个。更新应该只由第一个线程进行。不是别人
- 为什么它甚至被运行。它由所有 11 个线程运行。
- 它正在运行而不等待前一个线程完成。 “
queue loaded, new size ------------”它的创建/添加元素
package crawler;
import crawler.Main;
import static crawler.Main.basicDAO;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
*
* @author syncsys
*/
public class Crawler implements Runnable, InterfaceForConstants {
public static final String patternString = "[_A-Za-z0-9-]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9]+(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})";
public ConcurrentLinkedQueue<Link> queue = new ConcurrentLinkedQueue<Link>();
private volatile String url;
private void crawl(String url) {
synchronized (Crawler.class){
System.out.println("queue size "+queue.size());
if(queue.size() < (totalSizeOfEmailBucket / 3)){
updateQueue();
}
System.out.println("This is inside of sync block. ----------- queue size "+queue.size());
}
System.out.println("This is at the end of sync block. ----------- queue size "+queue.size());
BufferedReader bf = null;
try {
url = queue.poll().getLink();
URL target = new URL(url);
bf = new BufferedReader(
new InputStreamReader(target.openStream())
);
StringBuilder html = new StringBuilder();
String inputLine;
while ((inputLine = bf.readLine()) != null) {
html.append(inputLine);
}
List emailList = new ArrayList( getEmailList(html.toString()) );
// List linkList = new ArrayList( getLinkList(html.toString(), url) );
System.out.println("Just worked on --------- "+ url);
Main.processedLinksCount++;
if(emailList.size()>0){
putEmailsInDB(emailList);
}
// putLinksInDB(linkList);
} catch (IOException ex) {
Logging.logError(ex.toString());
basicDAO.deleteLink(url);
} catch (Exception ex) {
Logging.logError(ex.toString());
basicDAO.deleteLink(url);
}finally{
if(bf !=null){
try {
bf.close();
} catch (IOException ex) {
Logging.logError(ex.toString());
}
}
crawl(null);
}
}
public synchronized void updateQueue() {
Queue<Link> tempQueue = new PriorityQueue<Link>();
tempQueue = getNonProcessedLinkFromDB() ;
queue.addAll(tempQueue);
BasicDAO.markLinkAsProcesed(tempQueue);
System.out.println("queue loaded, new size ------------------------------------ "+queue.size());
}
private List getLinkList(String html, String url) {
Document doc = Jsoup.parse(html);
Elements bodies = doc.select("body");
List linkList = new ArrayList();
for(Element body : bodies ){
Elements aTags = body.getElementsByTag("a");
for (Element a: aTags){
String link = a.attr("href");
if ( !(link.startsWith("#"))
&&
!(link.contains("()"))
&&
!(link.endsWith(".jpg"))
&&
!(link.endsWith(".jpeg"))
&&
!(link.endsWith(".png"))
&&
!(link.endsWith(".gif")) ){
if( link.startsWith("/") ){
link = url+link;
}
linkList.add(link);
//put link in db
}
}
}
return linkList;
}
private List getEmailList(String html) {
Pattern p = Pattern.compile(patternString);
Matcher m = p.matcher(html);
List emailList = new ArrayList();
while(m.find()){
emailList.add(m.group());
Main.nonUniqueEmailsCount++;
}
return emailList;
}
private Queue<Link> getNonProcessedLinkFromDB() {
return ( basicDAO.getNonProcessedLink() );
}
private void putEmailsInDB(List emailList) {
basicDAO.insertEmail(emailList);
}
private void putLinksInDB(List linkList) {
basicDAO.insertLinks(linkList);
}
@Override
public void run() {
if(url != null){
crawl(url);
}else{
// crawl();
}
}
public Crawler(String url){
this.url = url;
}
public Crawler(){
this.url = null;
}
}
启动线程的方式:非乐观。我知道。未使用执行器服务或池,但以下是有效代码:
for (int i = 0; i < 11; i++) {
try {
Thread thread = new Thread(new Crawler("https://www.google.com.pk/?gws_rd=cr&ei=-q8vUqqNDIny4QTLlYCwAQ#q=pakistan"/*new BasicDAO().getNonProcessedLink()*/));
System.out.println("resume with saved link true");
thread.start();
System.out.println("thread stared");
threadList.add(thread);
System.out.println("thread added to arraylist");
} catch (Exception ex) {
new Logging().logError(ex.toString());
}
}
调试:
for 11 threads , its says in logs:
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
queue size 0
This is at the end of sync block. ----------- queue size 1000
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
queue size 0
queue loaded, new size ------------------------------------ 1000
This is inside of sync block. ----------- queue size 1000
This is at the end of sync block. ----------- queue size 1000
Just worked on --------- http://ao.com/Advice/Washing-Machines/Top-Tens/Top-Five-Washing-Machines/Advice/Freezers/Top-Tens/Top-Five-Freezers/flavel
queue size 999
Just worked on --------- http://ao.com/Advice/Washing-Machines/Top-Tens/Top-Five-Washing-Machines/l/fridges-width_less_than_50_cm/1-26/29-30//zanussi
queue loaded, new size ------------------------------------ 1999
This is inside of sync block. ----------- queue size 1999
This is at the end of sync block. ----------- queue size 1999
queue size 999
queue loaded, new size ------------------------------------ 1999
This is inside of sync block. ----------- queue size 1999
This is at the end of sync block. ----------- queue size 1999
Just worked on --------- http://ao.com/Advice/Washing-Machines/Top-Tens/Top-Five-Washing-Machines/Advice/Refrigerators/Top-Tens/Top-Five-Fridges/l/small_appliances-bodum/1-6/55/
queue size 999
queue loaded, new size ------------------------------------ 1999
This is inside of sync block. ----------- queue size 1999
This is at the end of sync block. ----------- queue size 1999
8692 characters / 254 lines
Advertising from Carbon:
Advertisement Braintree: 2.9% and 30¢ per transaction. No minimums, no monthly fees.
【问题讨论】:
-
让我们也看看你在哪里创建和启动你的线程。
-
@SotiriosDelimanolis 您刚刚回答并删除了您的回答?为什么?
-
因为我不确定你不理解的行为是什么。你能澄清一下你的期望吗?
标签: java multithreading thread-safety synchronized java.util.concurrent