public abstract class AbstractExtractor extends AbstractWandoraTool implements WandoraTool, java.lang.Runnable, Handler, InterruptHandler, DropExtractor, BrowserPluginExtractor
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
AbstractExtractor()
Creates a new instance of AbstractExtractor
|
| Modifier and Type | Method and Description |
|---|---|
abstract boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
abstract boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
abstract boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
boolean |
acceptBrowserExtractRequest(BrowserExtractRequest request,
Wandora wandora) |
void |
addCrawlerUrl(java.net.URL url,
int depth) |
boolean |
browserExtractorConsumesPlainText() |
Locator |
buildSI(java.lang.String siend) |
Locator |
buildSL(java.io.File file) |
void |
clearMasterSubject() |
Association |
createAssociation(TopicMap topicMap,
Topic aType,
Topic[] players) |
Association |
createAssociation(TopicMap topicMap,
Topic aType,
Topic[] players,
Topic[] roles) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String baseString) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String siString,
java.lang.String baseString) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String siString,
java.lang.String baseNameString,
java.lang.String baseString) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String siString,
java.lang.String baseNameString,
java.lang.String baseString,
Topic type) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String siString,
java.lang.String baseNameString,
java.lang.String baseString,
Topic[] types) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String siString,
java.lang.String baseString,
Topic type) |
Topic |
createTopic(TopicMap topicMap,
java.lang.String baseString,
Topic type) |
protected java.lang.String |
croppedFilename(java.io.File file) |
protected java.lang.String |
croppedFilename(java.lang.String filename) |
protected java.lang.String |
croppedUrlString(java.lang.String urlString) |
protected java.lang.String |
croppedUrlString(java.net.URL url) |
java.lang.String |
doBrowserExtract(BrowserExtractRequest request,
Wandora wandora) |
void |
dropExtract(java.io.File[] files) |
void |
dropExtract(java.lang.String content) |
void |
dropExtract(java.lang.String[] urls) |
void |
execute(Wandora admin,
Context context)
Runs the tool.
|
void |
extractTopicsFrom(java.io.File file,
TopicMap topicMap) |
int |
extractTopicsFrom(java.lang.String fileName,
java.util.Collection visited,
java.util.regex.Pattern fileMask,
int depth,
int space) |
int |
extractTopicsFrom(java.lang.String fileName,
java.util.regex.Pattern fileMask,
int depth,
int space) |
void |
extractTopicsFrom(java.net.URL url,
TopicMap topicMap) |
void |
extractTopicsFromText(java.lang.String content,
TopicMap topicMap) |
java.lang.String |
getBrowserExtractorName() |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
int |
getCrawlerMode() |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
int |
getExtractorType() |
java.lang.Object |
getForceContent() |
java.io.File[] |
getForceFiles() |
java.lang.String[] |
getForceUrls() |
java.lang.String |
getGUIText(int textType) |
java.lang.String |
getGUIText(int textType,
java.lang.Object[] params) |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
int[] |
getInterruptsHandled() |
java.lang.String |
getMasterSubject() |
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
WandoraToolType |
getType()
Tool type is used to categorize tools.
|
Wandora |
getWandora() |
void |
handle(CrawlerAccess crawler,
java.io.InputStream in,
int depth,
java.net.URL url)
Processes the given page.
|
void |
handleContent(java.lang.Object content,
TopicMap tm) |
void |
handleCustomType() |
void |
handleFiles(java.io.File[] files,
TopicMap tm) |
boolean |
handleForcedContent() |
void |
handleInterrupt(CrawlerAccess crawler,
int interrupt,
java.net.URL url) |
void |
handleStringContent(java.lang.String stringContent,
TopicMap tm) |
void |
handleUrls(java.lang.String[] urls,
TopicMap tm) |
void |
initializeCustomType() |
boolean |
instantDropHandle() |
void |
makeSubclassOfWandoraClass(Topic t,
TopicMap tm) |
boolean |
runInOwnThread()
Whether or not this tool should fork own thread.
|
void |
setData(Topic t,
Topic type,
java.lang.String lang,
java.lang.String text) |
void |
setDisplayName(Topic t,
java.lang.String lang,
java.lang.String name) |
void |
setForceContent(java.lang.Object fcontent) |
void |
setForceFiles(java.io.File[] ffiles) |
void |
setForceUrls(java.lang.String[] furls) |
void |
setMasterSubject(java.lang.String subject) |
void |
setMasterSubject(Topic t) |
void |
setTopicMap(TopicMap tm) |
void |
setupCrawler(java.lang.String startUrl) |
private void |
setupCrawler(java.lang.String[] startUrls) |
void |
setWandora(Wandora app) |
protected void |
takeNap(long napTime) |
protected static java.lang.String |
urlEncode(java.lang.String str) |
boolean |
useTempTopicMap() |
boolean |
useURLCrawler() |
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStatepublic static final java.lang.String STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE
public static final int CUSTOM_EXTRACTOR
public static final int RAW_EXTRACTOR
public static final int FILE_EXTRACTOR
public static final int URL_EXTRACTOR
public static final int EXACTLY_GIVEN_URLS
public static final int GIVEN_URLS_AND_LINKED_DOCUMENTS
public static final int GIVEN_URLS_AND_URL_BELOW
public static final int GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN
public static final int GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS
public static final int SELECT_DIALOG_TITLE
public static final int POINT_START_URL_TEXT
public static final int INFO_WAIT_WHILE_WORKING
public static final int FILE_PATTERN
public static final int DONE_FAILED
public static final int DONE_ONE
public static final int DONE_MANY
public static final int LOG_TITLE
private Wandora wandora
private TopicMap topicMap
private java.lang.String[] forceUrls
private java.io.File[] forceFiles
private java.lang.Object forceContent
private WebCrawler crawler
private int maximumCrawlCounter
private int extractionCounter
private int foundCounter
private int browseCounter
private long errorNapTime
private AbstractExtractorDialog extractorSourceDialog
private java.lang.String masterSubject
private final java.lang.String[] contentTypes
public AbstractExtractor()
public WandoraToolType getType()
AbstractWandoraToolTool type is used to categorize tools. Tool type has no real effect today, it is merely an informative property of a tool.
getType in interface WandoraToolgetType in class AbstractWandoraToolpublic java.lang.String getName()
AbstractWandoraToolgetName in interface WandoraToolgetName in class AbstractWandoraToolpublic java.lang.String getDescription()
AbstractWandoraToolgetDescription in interface WandoraToolgetDescription in class AbstractWandoraToolpublic javax.swing.Icon getIcon()
AbstractWandoraToolgetIcon should return Icon object of
the tool.getIcon in interface WandoraToolgetIcon in class AbstractWandoraToolpublic boolean runInOwnThread()
AbstractWandoraToolrunInOwnThread in class AbstractWandoraToolpublic boolean useTempTopicMap()
public boolean useURLCrawler()
public boolean instantDropHandle()
public void dropExtract(java.io.File[] files)
throws TopicMapException
dropExtract in interface DropExtractorTopicMapExceptionpublic void dropExtract(java.lang.String[] urls)
throws TopicMapException
dropExtract in interface DropExtractorTopicMapExceptionpublic void dropExtract(java.lang.String content)
throws TopicMapException
dropExtract in interface DropExtractorTopicMapExceptionpublic java.lang.String getGUIText(int textType,
java.lang.Object[] params)
public java.lang.String getGUIText(int textType)
public void setForceFiles(java.io.File[] ffiles)
public void setForceUrls(java.lang.String[] furls)
public void setForceContent(java.lang.Object fcontent)
public java.io.File[] getForceFiles()
public java.lang.String[] getForceUrls()
public java.lang.Object getForceContent()
public int getExtractorType()
public void initializeCustomType()
public void handleCustomType()
public void execute(Wandora admin, Context context)
WandoraToolexecute in interface WandoraToolpublic boolean handleForcedContent()
public void handleFiles(java.io.File[] files,
TopicMap tm)
public void handleUrls(java.lang.String[] urls,
TopicMap tm)
public void handleContent(java.lang.Object content,
TopicMap tm)
public void handleStringContent(java.lang.String stringContent,
TopicMap tm)
public void setTopicMap(TopicMap tm)
private void setupCrawler(java.lang.String[] startUrls)
public void setupCrawler(java.lang.String startUrl)
public void addCrawlerUrl(java.net.URL url,
int depth)
public int getCrawlerMode()
protected void takeNap(long napTime)
public int extractTopicsFrom(java.lang.String fileName,
java.util.regex.Pattern fileMask,
int depth,
int space)
public int extractTopicsFrom(java.lang.String fileName,
java.util.Collection visited,
java.util.regex.Pattern fileMask,
int depth,
int space)
public void extractTopicsFrom(java.net.URL url,
TopicMap topicMap)
throws java.lang.Exception
java.lang.Exceptionpublic void extractTopicsFromText(java.lang.String content,
TopicMap topicMap)
throws java.lang.Exception
java.lang.Exceptionpublic void extractTopicsFrom(java.io.File file,
TopicMap topicMap)
throws java.lang.Exception
java.lang.Exceptionpublic Locator buildSL(java.io.File file)
public abstract boolean _extractTopicsFrom(java.io.File f,
TopicMap t)
throws java.lang.Exception
java.lang.Exceptionpublic abstract boolean _extractTopicsFrom(java.net.URL u,
TopicMap t)
throws java.lang.Exception
java.lang.Exceptionpublic abstract boolean _extractTopicsFrom(java.lang.String str,
TopicMap t)
throws java.lang.Exception
java.lang.Exceptionprotected static java.lang.String urlEncode(java.lang.String str)
public Locator buildSI(java.lang.String siend)
public Topic createTopic(TopicMap topicMap, java.lang.String baseString) throws TopicMapException
TopicMapExceptionpublic Topic createTopic(TopicMap topicMap, java.lang.String siString, java.lang.String baseString) throws TopicMapException
TopicMapExceptionpublic Topic createTopic(TopicMap topicMap, java.lang.String siString, java.lang.String baseString, Topic type) throws TopicMapException
TopicMapExceptionpublic Topic createTopic(TopicMap topicMap, java.lang.String baseString, Topic type) throws TopicMapException
TopicMapExceptionpublic Topic createTopic(TopicMap topicMap, java.lang.String siString, java.lang.String baseNameString, java.lang.String baseString) throws TopicMapException
TopicMapExceptionpublic Topic createTopic(TopicMap topicMap, java.lang.String siString, java.lang.String baseNameString, java.lang.String baseString, Topic type) throws TopicMapException
TopicMapExceptionpublic Topic createTopic(TopicMap topicMap, java.lang.String siString, java.lang.String baseNameString, java.lang.String baseString, Topic[] types) throws TopicMapException
TopicMapExceptionpublic Association createAssociation(TopicMap topicMap, Topic aType, Topic[] players) throws TopicMapException
TopicMapExceptionpublic Association createAssociation(TopicMap topicMap, Topic aType, Topic[] players, Topic[] roles) throws TopicMapException
TopicMapExceptionpublic void setDisplayName(Topic t, java.lang.String lang, java.lang.String name) throws TopicMapException
TopicMapExceptionpublic void setData(Topic t, Topic type, java.lang.String lang, java.lang.String text) throws TopicMapException
TopicMapExceptionpublic void makeSubclassOfWandoraClass(Topic t, TopicMap tm) throws TopicMapException
TopicMapExceptionprotected java.lang.String croppedFilename(java.lang.String filename)
protected java.lang.String croppedFilename(java.io.File file)
protected java.lang.String croppedUrlString(java.net.URL url)
protected java.lang.String croppedUrlString(java.lang.String urlString)
public void handle(CrawlerAccess crawler, java.io.InputStream in, int depth, java.net.URL url)
HandlerInputStream contains the data of an object that is
of the content-type this content handler accepts. May use the given
CrawlerAccess object to add further pages to the queue of the
WebCrawler that asked to process the page.handle in interface Handlercrawler - The call back object for the handler. Any objects built from
the content of the page can be sent to this.in - The InputStream of the page.depth - The depth remaining depth. When reporting another page to
the queue, the depth of that page should be set to this depth-1.url - The URL of the page.public java.lang.String[] getContentTypes()
HandlerContentHandler can process.getContentTypes in interface Handlerpublic void handleInterrupt(CrawlerAccess crawler, int interrupt, java.net.URL url)
handleInterrupt in interface InterruptHandlerpublic int[] getInterruptsHandled()
getInterruptsHandled in interface InterruptHandlerpublic java.lang.String doBrowserExtract(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
doBrowserExtract in interface BrowserPluginExtractorTopicMapExceptionpublic boolean acceptBrowserExtractRequest(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
acceptBrowserExtractRequest in interface BrowserPluginExtractorTopicMapExceptionpublic java.lang.String getBrowserExtractorName()
getBrowserExtractorName in interface BrowserPluginExtractorpublic boolean browserExtractorConsumesPlainText()
public java.lang.String getMasterSubject()
public void setMasterSubject(Topic t)
public void setMasterSubject(java.lang.String subject)
public void clearMasterSubject()
public void setWandora(Wandora app)
public Wandora getWandora()
Copyright 2004-2015 Wandora Team