public class NYTArticleSearchExtractor extends AbstractNYTExtractor
Modifier and Type | Field and Description |
---|---|
private static java.lang.String |
currentURL |
private static java.lang.String |
defaultLang |
private java.lang.String |
defaultPagingOption |
private boolean |
shouldHandlePagination |
abbrToDayOfWeek, ABSTRACT_SI, ARTICLE_SI, BODY_SI, BYLINE_SI, CLASSIFIER_FACET_SI, COLUMN_FACET_SI, DATE_SI, DBPEDIA_RESOURCE_SI, DES_FACET_SI, END_DATE_SI, EVENT_CATEGORY_SI, EVENT_DATE_SI, EVENT_DESCRIPTION_SI, EVENT_DETAIL_URL_SI, EVENT_LATITUDE_SI, EVENT_LONGITUDE_SI, EVENT_NAME_SI, EVENT_SI, EVENT_VENUE_SI, FACET_SI, GEO_FACET_SI, KEYWORD_SI, LANG_SI, LEAD_PARAGRAPH_SI, MATERIAL_TYPE_FACET_SI, NYT_SI, ORG_FACET_SI, PER_FACET_SI, RECURRING_DAY_SI, SOURCE_FACET_SI, START_DATE_SI, TEXT_SI, WEEKDAY_SI
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
NYTArticleSearchExtractor() |
Modifier and Type | Method and Description |
---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
private void |
handlePagination(org.json.JSONObject json,
TopicMap tm) |
private void |
nap() |
void |
parse(org.json.JSONObject json,
TopicMap tm) |
private void |
parseKeyword(java.lang.String name,
java.lang.String value,
Topic articleTopic,
TopicMap tm) |
void |
parseResult(org.json.JSONObject result,
TopicMap tm) |
getAbstractTypeTopic, getArticleTypeTopic, getBodyTypeTopic, getBylineTopic, getBylineTypeTopic, getCategoryTopic, getCategoryTypeTopic, getContentTypes, getDateTypeTopic, getDayOfWeekTypeTopic, getDBpediaResourceTopic, getDBpediaResourceTypeTopic, getDescriptionTypeTopic, getEndDateTypeTopic, getEventDateTypeTopic, getEventTypeTopic, getIcon, getKeywordNameTopic, getKeywordTopic, getKeywordTypeTopic, getLangTopic, getLatitudeTypeTopic, getLeadParagraphTypeTopic, getLongitudeTypeTopic, getNYTTypeTopic, getOrCreateTopic, getOrCreateTopic, getRecurringDayTopic, getRecurringDayTypeTopic, getStartDateTypeTopic, getTextTypeTopic, getVenueTopic, getVenueTypeTopic, getWandoraClassTopic, makeSubclassOf, runInOwnThread, useURLCrawler
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
private static java.lang.String defaultLang
private static java.lang.String currentURL
private boolean shouldHandlePagination
private java.lang.String defaultPagingOption
public java.lang.String getName()
AbstractWandoraTool
getName
in interface WandoraTool
getName
in class AbstractNYTExtractor
public java.lang.String getDescription()
AbstractWandoraTool
getDescription
in interface WandoraTool
getDescription
in class AbstractNYTExtractor
public boolean _extractTopicsFrom(java.io.File f, TopicMap tm) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.net.URL u, TopicMap tm) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public boolean _extractTopicsFrom(java.lang.String str, TopicMap tm) throws java.lang.Exception
_extractTopicsFrom
in class AbstractExtractor
java.lang.Exception
public void parse(org.json.JSONObject json, TopicMap tm) throws TopicMapException
TopicMapException
private void handlePagination(org.json.JSONObject json, TopicMap tm)
private void nap()
public void parseResult(org.json.JSONObject result, TopicMap tm) throws org.json.JSONException, TopicMapException
org.json.JSONException
TopicMapException
private void parseKeyword(java.lang.String name, java.lang.String value, Topic articleTopic, TopicMap tm) throws TopicMapException
TopicMapException
Copyright 2004-2015 Wandora Team