public class SimplePDFExtractor extends AbstractExtractor
Modifier and Type | Field and Description |
---|---|
static java.lang.String[] |
contentTypes |
protected static java.lang.String |
DEFAULT_DATE_FORMAT |
private java.lang.String |
defaultLang |
boolean |
makePageTopics |
boolean |
makeVariantFromTitle |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
SimplePDFExtractor() |
Modifier and Type | Method and Description |
---|---|
boolean |
_extractTopicsFrom(java.io.File file,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.net.URL url,
TopicMap topicMap) |
void |
_extractTopicsFromStream(java.lang.String locator,
java.io.InputStream inputStream,
TopicMap topicMap,
Topic pdfTopic) |
Locator |
buildSI(java.lang.String siend) |
Topic |
createPDFTypeTopic(TopicMap tm) |
java.lang.String |
doBrowserExtract(BrowserExtractRequest request,
Wandora wandora) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
int |
getExtractorType() |
java.lang.String |
getGUIText(int textType) |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
Topic |
getWandoraClass(TopicMap tm) |
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getForceContent, getForceFiles, getForceUrls, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap, useURLCrawler
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
protected static java.lang.String DEFAULT_DATE_FORMAT
public boolean makePageTopics
public boolean makeVariantFromTitle
private java.lang.String defaultLang
public static final java.lang.String[] contentTypes
public java.lang.String getName()
AbstractWandoraTool
getName
in interface WandoraTool
getName
in class AbstractExtractor
public java.lang.String getDescription()
AbstractWandoraTool
getDescription
in interface WandoraTool
getDescription
in class AbstractExtractor
public javax.swing.Icon getIcon()
AbstractWandoraTool
getIcon
should return Icon
object of
the tool.getIcon
in interface WandoraTool
getIcon
in class AbstractExtractor
public int getExtractorType()
getExtractorType
in class AbstractExtractor
public java.lang.String getGUIText(int textType)
getGUIText
in class AbstractExtractor
public java.lang.String doBrowserExtract(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
doBrowserExtract
in interface BrowserPluginExtractor
doBrowserExtract
in class AbstractExtractor
TopicMapException
public boolean _extractTopicsFrom(java.net.URL url, TopicMap topicMap) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String str, TopicMap topicMap) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.io.File file, TopicMap topicMap) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public void _extractTopicsFromStream(java.lang.String locator, java.io.InputStream inputStream, TopicMap topicMap, Topic pdfTopic)
public java.lang.String[] getContentTypes()
Handler
ContentHandler
can process.getContentTypes
in interface Handler
getContentTypes
in class AbstractExtractor
public Locator buildSI(java.lang.String siend)
buildSI
in class AbstractExtractor
public Topic createPDFTypeTopic(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapException
protected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapException
Copyright 2004-2015 Wandora Team