abstract class AbstractWordExtractor extends AbstractExtractor
Modifier and Type | Field and Description |
---|---|
private Topic |
baseTopic |
private WordConfiguration |
config |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
AbstractWordExtractor() |
Modifier and Type | Method and Description |
---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
private void |
associateWord(Topic w,
Topic t,
Topic s,
TopicMap tm)
Associate the Topic word with a Topic t
|
private Topic |
createScoreTopic(java.lang.Float get,
TopicMap tm) |
private Topic |
createWordTopic(java.lang.String word,
TopicMap tm)
Create a Topic representing word
|
protected abstract java.lang.Object |
formNeedle(java.lang.String s) |
protected abstract java.lang.String |
getBNSuffix() |
(package private) abstract WordConfiguration |
getConfig() |
protected abstract java.lang.String |
getSIBase() |
protected boolean |
handleWordList(java.util.List<java.lang.String> words,
TopicMap tm)
Associates each topic in current context with a word if the word is found
in the topic instance data.
|
protected abstract float |
isMatch(java.lang.Object needle,
java.lang.String haystack) |
private java.util.HashMap<Topic,java.lang.Float> |
solveTopics(java.lang.String word,
TopicMap tm)
Find topics with content matching 'word' according to given configuration
|
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getContentTypes, getCrawlerMode, getDescription, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getIcon, getInterruptsHandled, getMasterSubject, getName, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap, useURLCrawler
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
private Topic baseTopic
private WordConfiguration config
public boolean _extractTopicsFrom(java.io.File f, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.net.URL u, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String str, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
abstract WordConfiguration getConfig()
protected boolean handleWordList(java.util.List<java.lang.String> words, TopicMap tm) throws TopicMapException
words
- a list of words to look for in instance datatm
- TopicMapException
private java.util.HashMap<Topic,java.lang.Float> solveTopics(java.lang.String word, TopicMap tm)
word
- tm
- private Topic createWordTopic(java.lang.String word, TopicMap tm) throws TopicMapException
word
- tm
- TopicMapException
- if topic creation failsprivate Topic createScoreTopic(java.lang.Float get, TopicMap tm) throws TopicMapException
TopicMapException
private void associateWord(Topic w, Topic t, Topic s, TopicMap tm) throws TopicMapException
w
- t
- tm
- TopicMapException
- if creating the association failsprotected abstract java.lang.Object formNeedle(java.lang.String s)
protected abstract java.lang.String getBNSuffix()
protected abstract java.lang.String getSIBase()
protected abstract float isMatch(java.lang.Object needle, java.lang.String haystack)
Copyright 2004-2015 Wandora Team