public class StanfordNERClassifier extends AbstractExtractor
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
classifierPath |
private java.lang.String[] |
contentTypes |
java.lang.String |
defaultClassifier |
private java.lang.String |
defaultEncoding |
static java.lang.String |
DOCUMENT_SI |
static java.lang.String |
ENTITY_SI |
static java.lang.String |
ENTITY_TYPE_SI |
java.lang.String |
optionsPath |
private java.lang.String |
selectedClassifier |
static java.lang.String |
SOURCE_SI |
static java.lang.String |
STANFORD_NER_SI |
static java.lang.String |
TOPIC_SI |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
StanfordNERClassifier() |
Modifier and Type | Method and Description |
---|---|
boolean |
_extractTopicsFrom(java.io.File file,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.io.InputStream in,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.lang.String data,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.net.URL url,
TopicMap topicMap) |
void |
configure(Wandora admin,
Options options,
java.lang.String prefix)
If the tool is configurable, shows an user interface to configure the tool.
|
void |
fillDocumentTopic(Topic textTopic,
TopicMap topicMap,
java.lang.String content) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
Topic |
getDocumentType(TopicMap tm) |
Topic |
getEntityTopic(java.lang.String entity,
java.lang.String type,
TopicMap tm) |
Topic |
getEntityType(java.lang.String type,
TopicMap tm) |
Topic |
getEntityTypeType(TopicMap tm) |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
Topic |
getSourceType(TopicMap tm) |
Topic |
getStanfordNERClass(TopicMap tm) |
Topic |
getTopicType(TopicMap tm) |
WandoraToolType |
getType()
Tool type is used to categorize tools.
|
Topic |
getWandoraClass(TopicMap tm) |
boolean |
isConfigurable()
Whether this tool is configurable.
|
void |
log(java.lang.String msg)
Shortcut to access tool's logger.
|
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
void |
processNER(java.lang.String word,
java.lang.String type,
Topic masterTopic,
TopicMap tm) |
java.lang.String |
solveTitle(java.lang.String content) |
boolean |
useURLCrawler() |
void |
writeOptions(Wandora admin,
Options options,
java.lang.String prefix)
If the tool is configurable, saves all current tool options.
|
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isRunning, isRunning, lockLog, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
execute, execute, getContext, getToolMenuItem, hlog, initialize, isRunning, log, log, log, requiresRefresh, setContext, setToolLogger
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
public static final java.lang.String SOURCE_SI
public static final java.lang.String DOCUMENT_SI
public static final java.lang.String TOPIC_SI
public static final java.lang.String ENTITY_SI
public static final java.lang.String ENTITY_TYPE_SI
public static final java.lang.String STANFORD_NER_SI
public static final java.lang.String classifierPath
public java.lang.String defaultClassifier
private java.lang.String selectedClassifier
private java.lang.String defaultEncoding
public java.lang.String optionsPath
private final java.lang.String[] contentTypes
public java.lang.String getName()
AbstractWandoraTool
getName
in interface WandoraTool
getName
in class AbstractExtractor
public java.lang.String getDescription()
AbstractWandoraTool
getDescription
in interface WandoraTool
getDescription
in class AbstractExtractor
public javax.swing.Icon getIcon()
AbstractWandoraTool
getIcon
should return Icon
object of
the tool.getIcon
in interface WandoraTool
getIcon
in class AbstractExtractor
public WandoraToolType getType()
AbstractWandoraTool
Tool type is used to categorize tools. Tool type has no real effect today, it is merely an informative property of a tool.
getType
in interface WandoraTool
getType
in class AbstractExtractor
public java.lang.String[] getContentTypes()
Handler
ContentHandler
can process.getContentTypes
in interface Handler
getContentTypes
in class AbstractExtractor
public boolean useURLCrawler()
useURLCrawler
in class AbstractExtractor
public boolean isConfigurable()
AbstractWandoraTool
isConfigurable
in interface WandoraTool
isConfigurable
in class AbstractWandoraTool
public void configure(Wandora admin, Options options, java.lang.String prefix) throws TopicMapException
AbstractWandoraTool
configure
in interface WandoraTool
configure
in class AbstractWandoraTool
TopicMapException
public void writeOptions(Wandora admin, Options options, java.lang.String prefix)
AbstractWandoraTool
writeOptions
in interface WandoraTool
writeOptions
in class AbstractWandoraTool
public boolean _extractTopicsFrom(java.net.URL url, TopicMap topicMap) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.io.File file, TopicMap topicMap) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.io.InputStream in, TopicMap topicMap) throws java.lang.Exception
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String data, TopicMap topicMap) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public void processNER(java.lang.String word, java.lang.String type, Topic masterTopic, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getEntityTypeType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getEntityType(java.lang.String type, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getEntityTopic(java.lang.String entity, java.lang.String type, TopicMap tm) throws TopicMapException
TopicMapException
public Topic getStanfordNERClass(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getTopicType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getSourceType(TopicMap tm) throws TopicMapException
TopicMapException
public Topic getDocumentType(TopicMap tm) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapException
protected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapException
protected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapException
public java.lang.String solveTitle(java.lang.String content)
public void fillDocumentTopic(Topic textTopic, TopicMap topicMap, java.lang.String content)
public void log(java.lang.String msg)
WandoraTool
log
in interface WandoraTool
log
in interface WandoraToolLogger
log
in interface TopicMapLogger
log
in class AbstractWandoraTool
msg
- to be logged.Copyright 2004-2015 Wandora Team