public abstract class AbstractJsoupExtractor extends AbstractExtractor
Modifier and Type | Field and Description |
---|---|
private java.lang.String[] |
contentTypes |
private static java.lang.String |
LANG_SI |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
AbstractJsoupExtractor() |
Modifier and Type | Method and Description |
---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
abstract boolean |
extractTopicsFrom(org.jsoup.nodes.Document d,
java.lang.String u,
TopicMap t) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
protected static Topic |
getLangTopic(TopicMap tm) |
protected static Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected static Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
protected static Topic |
getWandoraClassTopic(TopicMap tm) |
protected static void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getDescription, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getName, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap, useURLCrawler
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
private static final java.lang.String LANG_SI
private final java.lang.String[] contentTypes
public javax.swing.Icon getIcon()
AbstractWandoraTool
getIcon
should return Icon
object of
the tool.getIcon
in interface WandoraTool
getIcon
in class AbstractExtractor
public java.lang.String[] getContentTypes()
Handler
ContentHandler
can process.getContentTypes
in interface Handler
getContentTypes
in class AbstractExtractor
public boolean _extractTopicsFrom(java.io.File f, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.net.URL u, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String str, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
protected static Topic getWandoraClassTopic(TopicMap tm) throws TopicMapException
TopicMapException
protected static Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapException
protected static Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapException
protected static void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapException
protected static Topic getLangTopic(TopicMap tm) throws TopicMapException
TopicMapException
public abstract boolean extractTopicsFrom(org.jsoup.nodes.Document d, java.lang.String u, TopicMap t) throws java.lang.Exception
java.lang.Exception
Copyright 2004-2015 Wandora Team