public abstract class AbstractCrawler extends java.lang.Object implements CrawlerAccess, Crawler
Modifier and Type | Field and Description |
---|---|
private CrawlerAccess |
callback |
protected boolean |
forceExit |
protected int |
handleCount |
private java.util.HashMap<java.lang.String,java.util.ArrayList<Handler>> |
handlers |
private java.util.HashMap |
interruptHandlers |
private URLMask |
mask |
private int |
maxLeft |
private java.util.HashMap |
properties |
private boolean |
verbose |
Constructor and Description |
---|
AbstractCrawler()
Creates a new instance of AbstractCrawler
|
Modifier and Type | Method and Description |
---|---|
void |
addHandler(Handler h)
Adds a ContentHandler to the used content handlers.
|
void |
addInterruptHandler(InterruptHandler h)
Adds a InterruptHandler to the used content handlers.
|
static java.lang.Object |
createObject(org.w3c.dom.Element e)
Deprecated.
|
void |
forceExit() |
CrawlerAccess |
getCallBack() |
int |
getCrawlCounter() |
int |
getHandledDocumentCount() |
java.util.Collection<Handler> |
getHandler(java.lang.String contentType) |
InterruptHandler |
getInterruptHandler(int interruptType) |
URLMask |
getMask() |
java.lang.Object |
getProperty(java.lang.String key) |
boolean |
isVerbose() |
void |
loadSettings(org.w3c.dom.Element rootElement) |
void |
loadSettings(java.io.InputStream in) |
void |
loadSettings(java.lang.String file) |
void |
modifyCrawlCounter(int delta) |
void |
setCallBack(CrawlerAccess cb)
Sets the callback object.
|
void |
setCrawlCounter(int n)
Sets the maximum number of pages the crawler will process.
|
void |
setMask(URLMask m)
Sets the used URLMask
|
void |
setProperty(java.lang.String key,
java.lang.Object value) |
void |
setVerbose(boolean v) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
add, addObject
private java.util.HashMap properties
private java.util.HashMap<java.lang.String,java.util.ArrayList<Handler>> handlers
private java.util.HashMap interruptHandlers
private URLMask mask
private boolean verbose
private CrawlerAccess callback
private int maxLeft
protected boolean forceExit
protected int handleCount
public AbstractCrawler()
public void forceExit()
forceExit
in interface CrawlerAccess
public void setProperty(java.lang.String key, java.lang.Object value)
setProperty
in interface CrawlerAccess
public java.lang.Object getProperty(java.lang.String key)
public void loadSettings(java.lang.String file) throws java.lang.Exception
java.lang.Exception
public void loadSettings(java.io.InputStream in) throws java.lang.Exception
java.lang.Exception
public void loadSettings(org.w3c.dom.Element rootElement) throws java.lang.Exception
java.lang.Exception
public void setMask(URLMask m)
public URLMask getMask()
public void addHandler(Handler h)
public java.util.Collection<Handler> getHandler(java.lang.String contentType)
public void addInterruptHandler(InterruptHandler h)
public InterruptHandler getInterruptHandler(int interruptType)
public void setCallBack(CrawlerAccess cb)
public CrawlerAccess getCallBack()
public void setCrawlCounter(int n)
public int getCrawlCounter()
public void modifyCrawlCounter(int delta)
public int getHandledDocumentCount()
public boolean isVerbose()
public void setVerbose(boolean v)
public static java.lang.Object createObject(org.w3c.dom.Element e) throws java.lang.Exception
If this method is called with the 'o' element in the example, an instance of com.foo.MyObject is created using
a constructor that takes an Object array as a parameter. The array will have two objects. The object with
index 0 is an instance of com.foo.MyParam and the object with index 1 is an instance of org.w2c.dom.Element,
the xml element <param number="3"/>. The com.foo.MyParam is instantiated with one object in the Objects array,
the xml 'param' element containing two 'a' elements. Calling with the 'o2' element will also produce an instance
of com.foo.MyObject, but this object is instantiated with the constructor taking no parameters.java.lang.Exception
Copyright 2004-2015 Wandora Team