public class MillionFirstStepsBookTSVExtractor extends AbstractMillionFirstStepsExtractor
ARKID_SI, AUTHOR_SI, BASE_SI, BL_DLS_SI, BOOK_SI, BRITISH_LIBRARY_SI, CORPORATE_SI, DATE_SI, DATEFIELD_SI, defaultEncoding, defaultLang, EDITION_SI, IMAGE_SI, IMAGEIDX_SI, ISSUANCE_SI, LANG_SI, ORDER_SI, PAGE_SI, PDF_SI, PLACE_SI, PUBLISHER_SI, ROLE_SI, SHELFMARK_SI, TITLE_SI, VOLUME_SI
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
MillionFirstStepsBookTSVExtractor() |
Modifier and Type | Method and Description |
---|---|
void |
_extractTopicsFrom(java.io.File[] f) |
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
private java.lang.String |
getIndex(java.lang.String indexName,
java.lang.String[] array,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes) |
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
void |
handleFiles(java.io.File[] files,
TopicMap tm) |
private boolean |
isValid(java.lang.String data) |
void |
parse(java.lang.String str,
TopicMap tm) |
void |
parseColumnNames(java.lang.String columns,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes) |
void |
parseLine(java.lang.String str,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes,
TopicMap tm) |
doUrl, getArkIdTypeTopic, getATopic, getATypeTopic, getAuthorTopic, getAuthorTypeTopic, getBLDLSIdTypeTopic, getBookTopic, getBookTypeTopic, getBritishLibraryTypeTopic, getCorporateTopic, getCorporateTypeTopic, getDatefieldTopic, getDatefieldTypeTopic, getDateTopic, getDateTypeTopic, getEditionTopic, getEditionTypeTopic, getIcon, getImageIdxTypeTopic, getImageTopic, getImageTopic, getImageTypeTopic, getIssuanceTopic, getIssuanceTypeTopic, getLangTopic, getOrCreateTopic, getOrCreateTopic, getOrderTopic, getOrderTypeTopic, getPageTopic, getPageTypeTopic, getPDFTypeTopic, getPlaceTopic, getPlaceTypeTopic, getPublisherTopic, getPublisherTypeTopic, getRoleTopic, getRoleTypeTopic, getShelfmarkTopic, getShelfmarkTypeTopic, getTitleTopic, getTitleTypeTopic, getVolumeTopic, getVolumeTypeTopic, getWandoraClassTopic, makeSubclassOf, runInOwnThread, useTempTopicMap, useURLCrawler
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getContentTypes, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
public java.lang.String getName()
AbstractWandoraTool
getName
in interface WandoraTool
getName
in class AbstractMillionFirstStepsExtractor
public java.lang.String getDescription()
AbstractWandoraTool
getDescription
in interface WandoraTool
getDescription
in class AbstractMillionFirstStepsExtractor
public boolean _extractTopicsFrom(java.io.File f, TopicMap tm) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public void _extractTopicsFrom(java.io.File[] f) throws java.lang.Exception
java.lang.Exception
public void handleFiles(java.io.File[] files, TopicMap tm)
handleFiles
in class AbstractExtractor
public boolean _extractTopicsFrom(java.net.URL u, TopicMap tm) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String str, TopicMap tm) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public void parse(java.lang.String str, TopicMap tm)
public void parseColumnNames(java.lang.String columns, java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes)
public void parseLine(java.lang.String str, java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes, TopicMap tm)
private boolean isValid(java.lang.String data)
private java.lang.String getIndex(java.lang.String indexName, java.lang.String[] array, java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes)
Copyright 2004-2015 Wandora Team