public class OCRExtractor extends AbstractExtractor
Modifier and Type | Field and Description |
---|---|
private java.lang.String[] |
contentTypes |
protected java.lang.String |
DATE_EXTRACTED_SI |
protected java.lang.String |
DATE_MODIFIED_SI |
protected java.text.SimpleDateFormat |
dateFormatter |
protected java.lang.String |
DOCUMENT_SI |
protected java.lang.String |
FILE_SIZE_SI |
protected java.lang.String |
SOURCE_SI |
protected java.lang.String |
TEMP_PATH |
protected java.lang.String |
TEXT_CONTENT_SI |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
OCRExtractor() |
Modifier and Type | Method and Description |
---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
boolean |
acceptBrowserExtractRequest(BrowserExtractRequest request,
Wandora wandora) |
Topic |
createDocumentTypeTopic(TopicMap tm) |
java.lang.String |
doBrowserExtract(BrowserExtractRequest request,
Wandora wandora) |
java.lang.String |
getBrowserExtractorName() |
Topic |
getContentType(TopicMap tm) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
Topic |
getDateModifiedType(TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
int |
getExtractorType() |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
protected Topic |
getOrCreateLangTopic(TopicMap tm,
java.lang.String lng6392) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
Topic |
getSizeType(TopicMap tm) |
Topic |
getSourceType(TopicMap tm) |
Topic |
getTimeExtractedType(TopicMap tm) |
Topic |
getWandoraClass(TopicMap tm) |
boolean |
isConfigurable()
Whether this tool is configurable.
|
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
private boolean |
processFile(java.io.File f,
TopicMap tm,
Topic documentTopic) |
boolean |
useURLCrawler() |
addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getCrawlerMode, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
protected java.lang.String SOURCE_SI
protected java.lang.String DOCUMENT_SI
protected java.lang.String TEXT_CONTENT_SI
protected java.lang.String DATE_EXTRACTED_SI
protected java.lang.String DATE_MODIFIED_SI
protected java.lang.String FILE_SIZE_SI
protected java.lang.String TEMP_PATH
protected java.text.SimpleDateFormat dateFormatter
private final java.lang.String[] contentTypes
public java.lang.String getName()
AbstractWandoraTool
getName
in interface WandoraTool
getName
in class AbstractExtractor
public java.lang.String getDescription()
AbstractWandoraTool
getDescription
in interface WandoraTool
getDescription
in class AbstractExtractor
public javax.swing.Icon getIcon()
AbstractWandoraTool
getIcon
should return Icon
object of
the tool.getIcon
in interface WandoraTool
getIcon
in class AbstractExtractor
public java.lang.String[] getContentTypes()
Handler
ContentHandler
can process.getContentTypes
in interface Handler
getContentTypes
in class AbstractExtractor
public boolean useURLCrawler()
useURLCrawler
in class AbstractExtractor
public int getExtractorType()
getExtractorType
in class AbstractExtractor
public boolean isConfigurable()
AbstractWandoraTool
isConfigurable
in interface WandoraTool
isConfigurable
in class AbstractWandoraTool
public java.lang.String doBrowserExtract(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
doBrowserExtract
in interface BrowserPluginExtractor
doBrowserExtract
in class AbstractExtractor
TopicMapException
public boolean acceptBrowserExtractRequest(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
acceptBrowserExtractRequest
in interface BrowserPluginExtractor
acceptBrowserExtractRequest
in class AbstractExtractor
TopicMapException
public java.lang.String getBrowserExtractorName()
getBrowserExtractorName
in interface BrowserPluginExtractor
getBrowserExtractorName
in class AbstractExtractor
public boolean _extractTopicsFrom(java.io.File f, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.net.URL u, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String str, TopicMap t) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
private boolean processFile(java.io.File f, TopicMap tm, Topic documentTopic) throws TopicMapException
TopicMapException
public Topic getContentType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getDateModifiedType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getTimeExtractedType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSizeType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSourceType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic createDocumentTypeTopic(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapException
protected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapException
protected Topic getOrCreateLangTopic(TopicMap tm, java.lang.String lng6392) throws TopicMapException
TopicMapException
Copyright 2004-2015 Wandora Team