public abstract class AbstractZemantaExtractor extends AbstractExtractor
Modifier and Type | Field and Description |
---|---|
private static java.lang.String |
apikey |
private java.lang.String[] |
contentTypes |
protected java.lang.String |
defaultEncoding |
static java.lang.String |
DOCUMENT_SI |
protected boolean |
EXTRACT_RELEVANCE |
static java.lang.String |
LANG |
static java.lang.String |
SOURCE_SI |
static java.lang.String |
TOPIC_SI |
static java.lang.String |
ZEMANTA_ARTICLE_SI |
static java.lang.String |
ZEMANTA_ARTICLE_TITLE_SI |
static java.lang.String |
ZEMANTA_CATEGORIZATION_SI |
static java.lang.String |
ZEMANTA_CATEGORY_SI |
static java.lang.String |
ZEMANTA_CONFIDENCE_SI |
static java.lang.String |
ZEMANTA_DATE_SI |
static java.lang.String |
ZEMANTA_IMAGE_ATTRIBUTION_SI |
static java.lang.String |
ZEMANTA_IMAGE_DESCRIPTION_SI |
static java.lang.String |
ZEMANTA_IMAGE_LICENSE_SI |
static java.lang.String |
ZEMANTA_IMAGE_SI |
static java.lang.String |
ZEMANTA_KEYWORD_SI |
static java.lang.String |
ZEMANTA_LARGE_IMAGE_SI |
static java.lang.String |
ZEMANTA_LINK_ANCHOR_SI |
static java.lang.String |
ZEMANTA_LINK_SI |
static java.lang.String |
ZEMANTA_LINK_TITLE_SI |
static java.lang.String |
ZEMANTA_LINK_TYPE_SI |
static java.lang.String |
ZEMANTA_LINK_URL_SI |
static java.lang.String |
ZEMANTA_MEDIUM_IMAGE_SI |
static java.lang.String |
ZEMANTA_PUBLISHED_DATE_SI |
static java.lang.String |
ZEMANTA_RELEVANCE_SI |
static java.lang.String |
ZEMANTA_SCHEMA_SI |
static java.lang.String |
ZEMANTA_SI |
static java.lang.String |
ZEMANTA_SMALL_IMAGE_SI |
static java.lang.String |
ZEMANTA_URL |
static java.lang.String |
ZEMANTA_ZEMIFIED_SI |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
AbstractZemantaExtractor() |
Modifier and Type | Method and Description |
---|---|
abstract boolean |
_extractTopicsFrom(java.io.InputStream in,
TopicMap topicMap) |
void |
configure(Wandora admin,
Options options,
java.lang.String prefix)
If the tool is configurable, shows an user interface to configure the tool.
|
java.lang.String |
doBrowserExtract(BrowserExtractRequest request,
Wandora wandora) |
void |
fillDocumentTopic(Topic textTopic,
TopicMap topicMap,
java.lang.String content) |
void |
forgetAuthorization() |
Topic |
getArticleTitleTopic(java.lang.String str,
TopicMap tm) |
Topic |
getArticleTitleType(TopicMap tm) |
Topic |
getArticleTopic(java.lang.String url,
TopicMap tm) |
Topic |
getArticleType(TopicMap tm) |
private Topic |
getATopic(java.lang.String str,
java.lang.String si,
Topic type,
TopicMap tm) |
Topic |
getCategorizationTopic(java.lang.String str,
TopicMap tm) |
Topic |
getCategorizationType(TopicMap tm) |
Topic |
getCategoryTopic(java.lang.String str,
TopicMap tm) |
Topic |
getCategoryType(TopicMap tm) |
Topic |
getConfidenceTopic(java.lang.String str,
TopicMap tm) |
Topic |
getConfidenceType(TopicMap tm) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
Topic |
getDateTopic(java.lang.String str,
TopicMap tm) |
Topic |
getDateType(TopicMap tm) |
Topic |
getDefaultLangType(TopicMap tm) |
Topic |
getDocumentType(TopicMap tm) |
protected java.lang.String |
getFileContents(java.io.File file) |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
Topic |
getImageAttributionType(TopicMap tm) |
Topic |
getImageDescriptionType(TopicMap tm) |
Topic |
getImageLicenseType(TopicMap tm) |
Topic |
getImageTopic(java.lang.String url,
TopicMap tm) |
Topic |
getImageType(TopicMap tm) |
Topic |
getKeywordTopic(java.lang.String str,
TopicMap tm) |
Topic |
getKeywordType(TopicMap tm) |
Topic |
getLargeImageType(TopicMap tm) |
Topic |
getLinkAnchorTopic(java.lang.String str,
TopicMap tm) |
Topic |
getLinkAnchorType(TopicMap tm) |
Topic |
getLinkTitleTopic(java.lang.String str,
TopicMap tm) |
Topic |
getLinkTitleType(TopicMap tm) |
Topic |
getLinkType(TopicMap tm) |
Topic |
getLinkTypeTopic(java.lang.String str,
TopicMap tm) |
Topic |
getLinkTypeType(TopicMap tm) |
Topic |
getLinkUrlTopic(java.lang.String url,
TopicMap tm) |
Topic |
getLinkUrlType(TopicMap tm) |
Topic |
getMediumImageType(TopicMap tm) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn,
Topic type) |
Topic |
getPublishedDateType(TopicMap tm) |
Topic |
getRelevanceTopic(java.lang.String str,
TopicMap tm) |
Topic |
getRelevanceType(TopicMap tm) |
Topic |
getSchemaTopic(java.lang.String str,
TopicMap tm) |
Topic |
getSchemaType(TopicMap tm) |
Topic |
getSmallImageType(TopicMap tm) |
Topic |
getSourceType(TopicMap tm) |
protected java.lang.String |
getStringFromDocument(org.w3c.dom.Document doc) |
Topic |
getTopicType(TopicMap tm) |
WandoraToolType |
getType()
Tool type is used to categorize tools.
|
private Topic |
getUTopic(java.lang.String si,
Topic type,
TopicMap tm) |
Topic |
getWandoraClass(TopicMap tm) |
Topic |
getZemantaType(TopicMap tm) |
Topic |
getZemifiedTopic(java.lang.String str,
TopicMap tm) |
Topic |
getZemifiedType(TopicMap tm) |
boolean |
isConfigurable()
Whether this tool is configurable.
|
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
static java.lang.String |
sendRequest(java.net.URL url,
java.lang.String data,
java.lang.String ctype,
java.lang.String method) |
java.lang.String |
solveAPIKey() |
java.lang.String |
solveAPIKey(Wandora wandora) |
java.lang.String |
solveTitle(java.lang.String content) |
boolean |
useURLCrawler() |
void |
writeOptions(Wandora admin,
Options options,
java.lang.String prefix)
If the tool is configurable, saves all current tool options.
|
_extractTopicsFrom, _extractTopicsFrom, _extractTopicsFrom, acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getDescription, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getName, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
execute, execute, getContext, getToolMenuItem, hlog, initialize, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
protected boolean EXTRACT_RELEVANCE
protected java.lang.String defaultEncoding
public static java.lang.String LANG
public static final java.lang.String ZEMANTA_URL
public static final java.lang.String SOURCE_SI
public static final java.lang.String DOCUMENT_SI
public static final java.lang.String TOPIC_SI
public static final java.lang.String ZEMANTA_SI
public static final java.lang.String ZEMANTA_IMAGE_SI
public static final java.lang.String ZEMANTA_LARGE_IMAGE_SI
public static final java.lang.String ZEMANTA_MEDIUM_IMAGE_SI
public static final java.lang.String ZEMANTA_SMALL_IMAGE_SI
public static final java.lang.String ZEMANTA_IMAGE_DESCRIPTION_SI
public static final java.lang.String ZEMANTA_IMAGE_ATTRIBUTION_SI
public static final java.lang.String ZEMANTA_IMAGE_LICENSE_SI
public static final java.lang.String ZEMANTA_KEYWORD_SI
public static final java.lang.String ZEMANTA_CATEGORY_SI
public static final java.lang.String ZEMANTA_CATEGORIZATION_SI
public static final java.lang.String ZEMANTA_CONFIDENCE_SI
public static final java.lang.String ZEMANTA_RELEVANCE_SI
public static final java.lang.String ZEMANTA_SCHEMA_SI
public static final java.lang.String ZEMANTA_LINK_SI
public static final java.lang.String ZEMANTA_LINK_ANCHOR_SI
public static final java.lang.String ZEMANTA_LINK_URL_SI
public static final java.lang.String ZEMANTA_LINK_TYPE_SI
public static final java.lang.String ZEMANTA_LINK_TITLE_SI
public static final java.lang.String ZEMANTA_ARTICLE_SI
public static final java.lang.String ZEMANTA_ARTICLE_TITLE_SI
public static final java.lang.String ZEMANTA_DATE_SI
public static final java.lang.String ZEMANTA_PUBLISHED_DATE_SI
public static final java.lang.String ZEMANTA_ZEMIFIED_SI
private final java.lang.String[] contentTypes
private static java.lang.String apikey
public javax.swing.Icon getIcon()
AbstractWandoraTool
getIcon
should return Icon
object of
the tool.getIcon
in interface WandoraTool
getIcon
in class AbstractExtractor
public WandoraToolType getType()
AbstractWandoraTool
Tool type is used to categorize tools. Tool type has no real effect today, it is merely an informative property of a tool.
getType
in interface WandoraTool
getType
in class AbstractExtractor
public java.lang.String[] getContentTypes()
Handler
ContentHandler
can process.getContentTypes
in interface Handler
getContentTypes
in class AbstractExtractor
public boolean useURLCrawler()
useURLCrawler
in class AbstractExtractor
public java.lang.String doBrowserExtract(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
doBrowserExtract
in interface BrowserPluginExtractor
doBrowserExtract
in class AbstractExtractor
TopicMapException
public static java.lang.String sendRequest(java.net.URL url, java.lang.String data, java.lang.String ctype, java.lang.String method) throws java.io.IOException
java.io.IOException
public boolean isConfigurable()
AbstractWandoraTool
isConfigurable
in interface WandoraTool
isConfigurable
in class AbstractWandoraTool
public void configure(Wandora admin, Options options, java.lang.String prefix) throws TopicMapException
AbstractWandoraTool
configure
in interface WandoraTool
configure
in class AbstractWandoraTool
TopicMapException
public void writeOptions(Wandora admin, Options options, java.lang.String prefix)
AbstractWandoraTool
writeOptions
in interface WandoraTool
writeOptions
in class AbstractWandoraTool
public abstract boolean _extractTopicsFrom(java.io.InputStream in, TopicMap topicMap) throws java.lang.Exception
java.lang.Exception
public java.lang.String solveTitle(java.lang.String content)
public void fillDocumentTopic(Topic textTopic, TopicMap topicMap, java.lang.String content)
public Topic getKeywordTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getCategoryTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getConfidenceTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getRelevanceTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSchemaTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getCategorizationTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkUrlTopic(java.lang.String url, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkAnchorTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkTypeTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkTitleTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getZemifiedTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getDateTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getArticleTopic(java.lang.String url, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getArticleTitleTopic(java.lang.String str, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getImageTopic(java.lang.String url, TopicMap tm) throws TopicMapException
TopicMapException
private Topic getATopic(java.lang.String str, java.lang.String si, Topic type, TopicMap tm) throws TopicMapException
TopicMapException
private Topic getUTopic(java.lang.String si, Topic type, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getImageType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLargeImageType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getMediumImageType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSmallImageType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getDefaultLangType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getImageDescriptionType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getImageLicenseType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getImageAttributionType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getArticleType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getArticleTitleType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getDateType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getPublishedDateType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getZemifiedType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkTitleType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkTypeType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkAnchorType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getLinkUrlType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getRelevanceType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getConfidenceType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSchemaType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getCategorizationType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getKeywordType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getCategoryType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getTopicType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSourceType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getZemantaType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getDocumentType(TopicMap tm) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn, Topic type) throws TopicMapException
TopicMapException
protected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapException
public java.lang.String solveAPIKey(Wandora wandora)
public java.lang.String solveAPIKey()
public void forgetAuthorization()
protected java.lang.String getStringFromDocument(org.w3c.dom.Document doc)
protected java.lang.String getFileContents(java.io.File file) throws java.io.IOException, java.io.FileNotFoundException
java.io.IOException
java.io.FileNotFoundException
Copyright 2004-2015 Wandora Team