private static class MediaWikiExtractor.WikiParser
extends java.lang.Object
implements org.xml.sax.ContentHandler, org.xml.sax.ErrorHandler
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
CONTRIBUTOR_SI |
private java.lang.String |
data_base |
private java.lang.String |
data_contributorid |
private java.util.HashSet<java.lang.String> |
data_contributors |
private int |
data_latestrevision |
private java.lang.String |
data_latesttext |
private java.lang.String |
data_latesttimestamp |
private java.lang.String |
data_namespace |
private java.util.Vector<java.lang.String> |
data_namespaces |
private java.lang.String |
data_pageid |
private java.lang.String |
data_restrictions |
private java.lang.String |
data_revisionid |
private java.lang.String |
data_sitename |
private java.lang.String |
data_text |
private java.lang.String |
data_timestamp |
private java.lang.String |
data_title |
private java.lang.String |
data_username |
static java.lang.String |
PAGE_SI |
private MediaWikiExtractor |
parent |
static java.lang.String |
REDIRECT_FROM_SI |
static java.lang.String |
REDIRECT_SI |
static java.lang.String |
REDIRECT_TO_SI |
static java.util.regex.Pattern |
redirectPattern |
static java.lang.String |
SIPREFIX |
private int |
state |
private static int |
STATE_BASE |
private static int |
STATE_COMMENT |
private static int |
STATE_CONTRIBUTOR |
private static int |
STATE_CONTRIBUTORID |
private static int |
STATE_MEDIAWIKI |
private static int |
STATE_NAMESPACE |
private static int |
STATE_NAMESPACES |
private static int |
STATE_PAGE |
private static int |
STATE_PAGEID |
private static int |
STATE_RESTRICTIONS |
private static int |
STATE_REVISION |
private static int |
STATE_REVISIONID |
private static int |
STATE_SITEINFO |
private static int |
STATE_SITENAME |
private static int |
STATE_START |
private static int |
STATE_TEXT |
private static int |
STATE_TIMESTAMP |
private static int |
STATE_TITLE |
private static int |
STATE_USERNAME |
static java.lang.String |
TAG_BASE |
static java.lang.String |
TAG_CASE |
static java.lang.String |
TAG_COMMENT |
static java.lang.String |
TAG_CONTRIBUTOR |
static java.lang.String |
TAG_CONTRIBUTORID |
static java.lang.String |
TAG_GENERATOR |
static java.lang.String |
TAG_MEDIAWIKI |
static java.lang.String |
TAG_NAMESPACE |
static java.lang.String |
TAG_NAMESPACES |
static java.lang.String |
TAG_PAGE |
static java.lang.String |
TAG_PAGEID |
static java.lang.String |
TAG_RESTRICTIONS |
static java.lang.String |
TAG_REVISION |
static java.lang.String |
TAG_REVISIONID |
static java.lang.String |
TAG_SITEINFO |
static java.lang.String |
TAG_SITENAME |
static java.lang.String |
TAG_TEXT |
static java.lang.String |
TAG_TIMESTAMP |
static java.lang.String |
TAG_TITLE |
static java.lang.String |
TAG_USERNAME |
static java.lang.String |
TEXT_SI |
static java.lang.String |
TIMESTAMP_SI |
private TopicMap |
tm |
private java.lang.String |
url |
static java.lang.String |
WIKI_SI |
Constructor and Description |
---|
WikiParser(java.lang.String wikiUrl,
TopicMap tm,
MediaWikiExtractor parent) |
Modifier and Type | Method and Description |
---|---|
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
void |
endPrefixMapping(java.lang.String prefix) |
void |
error(org.xml.sax.SAXParseException exception) |
void |
fatalError(org.xml.sax.SAXParseException exception) |
private Topic |
getOrCreateTopic(java.lang.String si) |
private Topic |
getOrCreateTopic(java.lang.String si,
java.lang.String bn) |
void |
ignorableWhitespace(char[] ch,
int start,
int length) |
void |
processingInstruction(java.lang.String target,
java.lang.String data) |
void |
setDocumentLocator(org.xml.sax.Locator locator) |
void |
skippedEntity(java.lang.String name) |
void |
startDocument() |
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri) |
void |
warning(org.xml.sax.SAXParseException exception) |
private java.lang.String url
private TopicMap tm
private MediaWikiExtractor parent
public static final java.util.regex.Pattern redirectPattern
public static final java.lang.String TAG_MEDIAWIKI
public static final java.lang.String TAG_SITEINFO
public static final java.lang.String TAG_SITENAME
public static final java.lang.String TAG_BASE
public static final java.lang.String TAG_GENERATOR
public static final java.lang.String TAG_CASE
public static final java.lang.String TAG_NAMESPACES
public static final java.lang.String TAG_NAMESPACE
public static final java.lang.String TAG_PAGE
public static final java.lang.String TAG_TITLE
public static final java.lang.String TAG_PAGEID
public static final java.lang.String TAG_RESTRICTIONS
public static final java.lang.String TAG_REVISION
public static final java.lang.String TAG_REVISIONID
public static final java.lang.String TAG_TIMESTAMP
public static final java.lang.String TAG_CONTRIBUTOR
public static final java.lang.String TAG_COMMENT
public static final java.lang.String TAG_USERNAME
public static final java.lang.String TAG_CONTRIBUTORID
public static final java.lang.String TAG_TEXT
private static final int STATE_START
private static final int STATE_MEDIAWIKI
private static final int STATE_SITEINFO
private static final int STATE_SITENAME
private static final int STATE_BASE
private static final int STATE_NAMESPACES
private static final int STATE_NAMESPACE
private static final int STATE_PAGE
private static final int STATE_TITLE
private static final int STATE_PAGEID
private static final int STATE_RESTRICTIONS
private static final int STATE_REVISION
private static final int STATE_REVISIONID
private static final int STATE_TIMESTAMP
private static final int STATE_CONTRIBUTOR
private static final int STATE_COMMENT
private static final int STATE_USERNAME
private static final int STATE_CONTRIBUTORID
private static final int STATE_TEXT
private int state
public static java.lang.String SIPREFIX
public static java.lang.String CONTRIBUTOR_SI
public static java.lang.String PAGE_SI
public static java.lang.String TIMESTAMP_SI
public static java.lang.String TEXT_SI
public static java.lang.String WIKI_SI
public static java.lang.String REDIRECT_SI
public static java.lang.String REDIRECT_FROM_SI
public static java.lang.String REDIRECT_TO_SI
private java.lang.String data_sitename
private java.lang.String data_base
private java.util.Vector<java.lang.String> data_namespaces
private java.lang.String data_namespace
private java.lang.String data_title
private java.lang.String data_pageid
private java.lang.String data_restrictions
private java.lang.String data_revisionid
private java.lang.String data_timestamp
private java.lang.String data_username
private java.lang.String data_contributorid
private java.util.HashSet<java.lang.String> data_contributors
private java.lang.String data_text
private int data_latestrevision
private java.lang.String data_latesttext
private java.lang.String data_latesttimestamp
public WikiParser(java.lang.String wikiUrl, TopicMap tm, MediaWikiExtractor parent)
private Topic getOrCreateTopic(java.lang.String si) throws TopicMapException
TopicMapException
private Topic getOrCreateTopic(java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapException
public void startDocument() throws org.xml.sax.SAXException
startDocument
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void endDocument() throws org.xml.sax.SAXException
endDocument
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void startElement(java.lang.String uri, java.lang.String localName, java.lang.String qName, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws org.xml.sax.SAXException
endElement
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void characters(char[] ch, int start, int length) throws org.xml.sax.SAXException
characters
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void warning(org.xml.sax.SAXParseException exception) throws org.xml.sax.SAXException
warning
in interface org.xml.sax.ErrorHandler
org.xml.sax.SAXException
public void error(org.xml.sax.SAXParseException exception) throws org.xml.sax.SAXException
error
in interface org.xml.sax.ErrorHandler
org.xml.sax.SAXException
public void fatalError(org.xml.sax.SAXParseException exception) throws org.xml.sax.SAXException
fatalError
in interface org.xml.sax.ErrorHandler
org.xml.sax.SAXException
public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException
ignorableWhitespace
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void processingInstruction(java.lang.String target, java.lang.String data) throws org.xml.sax.SAXException
processingInstruction
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) throws org.xml.sax.SAXException
startPrefixMapping
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void endPrefixMapping(java.lang.String prefix) throws org.xml.sax.SAXException
endPrefixMapping
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
public void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator
in interface org.xml.sax.ContentHandler
public void skippedEntity(java.lang.String name) throws org.xml.sax.SAXException
skippedEntity
in interface org.xml.sax.ContentHandler
org.xml.sax.SAXException
Copyright 2004-2015 Wandora Team