public class RuntimeConfiguration
extends java.lang.Object
All BUbiNG components must share a certain number of global variables, such as filters and pool of objects. A single instance of this class is created at agent construction time: it is used to pass around a single reference to global data.
All fields in this class are either final or
volatile, depending on whether they can be modified at runtime
(usually by means of JMX methods in Agent).
| Modifier and Type | Field | Description |
|---|---|---|
boolean |
acceptAllCertificates |
|
IntOpenHashSet |
blackListedHostHashes |
The set of hashes of hosts that should be blacklisted.
|
java.util.concurrent.locks.ReadWriteLock |
blackListedHostHashesLock |
A lock used to access
blackListedHostHashes. |
IntOpenHashSet |
blackListedIPv4Addresses |
|
java.util.concurrent.locks.ReadWriteLock |
blackListedIPv4Lock |
A lock used to access
blackListedIPv4Addresses. |
double |
bloomFilterPrecision |
|
int |
connectionTimeout |
|
int |
cookieMaxByteSize |
|
java.lang.String |
cookiePolicy |
|
boolean |
crawlIsNew |
|
java.lang.String |
digestAlgorithm |
|
int |
dnsCacheMaxSize |
|
long |
dnsNegativeTtl |
|
long |
dnsPositiveTtl |
|
org.apache.http.conn.DnsResolver |
dnsResolver |
The DNS resolver used throughout the crawler.
|
int |
dnsThreads |
|
static java.util.regex.Pattern |
DOTTED_ADDRESS |
A pattern used to identify hosts specified directed via their address in dotted notation.
|
static boolean |
FETCH_ROBOTS |
Whether to fetch and use
robots.txt. |
int |
fetchDataBufferByteSize |
|
Filter<java.net.URI> |
fetchFilter |
|
int |
fetchingThreads |
|
Filter<URIResponse> |
followFilter |
|
java.io.File |
frontierDir |
|
java.lang.String |
group |
|
long |
ipDelay |
|
double |
ipDelayFactor |
|
long |
keepAliveTime |
|
long |
maxUrls |
|
int |
maxUrlsPerSchemeAuthority |
|
java.lang.String |
name |
|
Filter<URIResponse> |
parseFilter |
|
java.util.ArrayList<Parser<?>> |
parsers |
The parser, instantiated.
|
int |
parsingThreads |
|
boolean |
paused |
Whether the crawler is currently paused.
|
java.lang.String |
proxyHost |
|
int |
proxyPort |
|
int |
responseBodyMaxByteSize |
|
java.io.File |
responseCacheDir |
|
long |
robotsExpiration |
|
java.io.File |
rootDir |
|
Filter<Link> |
scheduleFilter |
|
long |
schemeAuthorityDelay |
|
java.util.Iterator<java.net.URI> |
seed |
An iterator returning URIs that are then used as a seed; this iterator may return
null (when
invalid or relative URLs are specified). |
int |
sieveAuxFileIOBufferByteSize |
|
java.io.File |
sieveDir |
|
int |
sieveSize |
|
int |
sieveStoreIOBufferByteSize |
|
int |
socketTimeout |
|
int |
spamDetectionPeriodicity |
|
int |
spamDetectionThreshold |
|
SpamDetector<?> |
spamDetector |
|
boolean |
startPaused |
|
boolean |
stopping |
Whether the crawler is currently being stopping.
|
java.lang.Class<? extends Store> |
storeClass |
|
java.io.File |
storeDir |
|
Filter<URIResponse> |
storeFilter |
|
long |
urlCacheMaxByteSize |
|
java.lang.String |
userAgent |
|
java.lang.String |
userAgentFrom |
|
long |
virtualizerMaxByteSize |
|
int |
weight |
|
long |
workbenchMaxByteSize |
| Constructor | Description |
|---|---|
RuntimeConfiguration(StartupConfiguration startupConfiguration) |
| Modifier and Type | Method | Description |
|---|---|---|
void |
addBlackListedHost(java.lang.String spec) |
Adds a (or a set of) new host to the black list; the host can be specified directly or it can be a file (prefixed by
file:). |
void |
addBlackListedIPv4(java.lang.String spec) |
Adds a (or a set of) new IPv4 to the black list; the IPv4 can be specified directly or it can be a file (prefixed by
file:). |
void |
ensureNotPaused() |
|
static java.util.ArrayList<Parser<?>> |
parsersFromSpecs(java.lang.String[] specs) |
Given an array of parser specifications, it returns the corresponding list of parsers (only
the correct specifications are put in the list.
|
java.lang.String |
toString() |
public static final boolean FETCH_ROBOTS
robots.txt. This value cannot be configured and it
requires recompilation from the sources.
You should be better knowing what you are doing if you change this to false.
public final java.lang.String name
StartupConfiguration.namepublic final java.lang.String group
StartupConfiguration.grouppublic final int weight
StartupConfiguration.weightpublic final int maxUrlsPerSchemeAuthority
public volatile int fetchingThreads
StartupConfiguration.fetchingThreadspublic volatile int parsingThreads
StartupConfiguration.parsingThreadspublic volatile int dnsThreads
StartupConfiguration.dnsThreadspublic volatile Filter<java.net.URI> fetchFilter
StartupConfiguration.fetchFilterpublic volatile Filter<Link> scheduleFilter
StartupConfiguration.scheduleFilterpublic volatile Filter<URIResponse> parseFilter
StartupConfiguration.parseFilterpublic volatile Filter<URIResponse> followFilter
StartupConfiguration.followFilterpublic volatile Filter<URIResponse> storeFilter
StartupConfiguration.storeFilterpublic volatile long keepAliveTime
StartupConfiguration.keepAliveTimepublic volatile long schemeAuthorityDelay
public volatile long ipDelay
StartupConfiguration.ipDelaypublic volatile double ipDelayFactor
StartupConfiguration.ipDelayFactorpublic volatile long maxUrls
StartupConfiguration.maxUrlspublic final double bloomFilterPrecision
public final java.util.Iterator<java.net.URI> seed
null (when
invalid or relative URLs are specified).StartupConfiguration.seedpublic final IntOpenHashSet blackListedIPv4Addresses
StartupConfiguration.seedpublic final java.util.concurrent.locks.ReadWriteLock blackListedIPv4Lock
blackListedIPv4Addresses.public final IntOpenHashSet blackListedHostHashes
StartupConfiguration.blackListedHostspublic final java.util.concurrent.locks.ReadWriteLock blackListedHostHashesLock
blackListedHostHashes.public volatile int socketTimeout
StartupConfiguration.socketTimeoutpublic volatile int connectionTimeout
StartupConfiguration.connectionTimeoutpublic final int fetchDataBufferByteSize
public final java.lang.String proxyHost
StartupConfiguration.proxyHostpublic final int proxyPort
StartupConfiguration.proxyPortpublic final java.lang.String cookiePolicy
StartupConfiguration.cookiePolicypublic final int cookieMaxByteSize
StartupConfiguration.cookieMaxByteSizepublic final java.lang.String userAgent
StartupConfiguration.userAgentpublic final java.lang.String userAgentFrom
StartupConfiguration.userAgentFrompublic volatile long robotsExpiration
StartupConfiguration.robotsExpirationpublic volatile boolean acceptAllCertificates
public final java.io.File rootDir
StartupConfiguration.rootDirpublic final java.io.File storeDir
StartupConfiguration.storeDirpublic final java.io.File responseCacheDir
StartupConfiguration.responseCacheDirpublic final java.io.File sieveDir
StartupConfiguration.sieveDirpublic final java.io.File frontierDir
StartupConfiguration.frontierDirpublic volatile int responseBodyMaxByteSize
public final java.lang.String digestAlgorithm
StartupConfiguration.digestAlgorithmpublic final boolean startPaused
StartupConfiguration.startPausedpublic final java.lang.Class<? extends Store> storeClass
StartupConfiguration.storeClasspublic volatile long workbenchMaxByteSize
public final long virtualizerMaxByteSize
public volatile long urlCacheMaxByteSize
StartupConfiguration.urlCacheMaxByteSizepublic final int sieveSize
StartupConfiguration.sieveSizepublic final int sieveStoreIOBufferByteSize
public final int sieveAuxFileIOBufferByteSize
public final int dnsCacheMaxSize
StartupConfiguration.dnsCacheMaxSizepublic final long dnsPositiveTtl
StartupConfiguration.dnsPositiveTtlpublic final long dnsNegativeTtl
StartupConfiguration.dnsNegativeTtlpublic final boolean crawlIsNew
StartupConfiguration.crawlIsNewpublic final SpamDetector<?> spamDetector
StartupConfiguration.spamDetectorUripublic final int spamDetectionThreshold
public final int spamDetectionPeriodicity
public final java.util.ArrayList<Parser<?>> parsers
ParsingThread instances are obtained by copying this parsers.public volatile boolean paused
notifyAll() is issued on this runtime configuration.public volatile boolean stopping
public final org.apache.http.conn.DnsResolver dnsResolver
StartupConfiguration.dnsResolverClasspublic static final java.util.regex.Pattern DOTTED_ADDRESS
public RuntimeConfiguration(StartupConfiguration startupConfiguration) throws ConfigurationException, java.io.IOException
ConfigurationExceptionjava.io.IOExceptionpublic void addBlackListedIPv4(java.lang.String spec)
throws ConfigurationException,
java.io.FileNotFoundException
file:).spec - the specification (an IP address, or a file prefixed by file).ConfigurationExceptionjava.io.FileNotFoundExceptionpublic void addBlackListedHost(java.lang.String spec)
throws ConfigurationException,
java.io.FileNotFoundException
file:).spec - the specification (a host, or a file prefixed by file).ConfigurationExceptionjava.io.FileNotFoundExceptionpublic void ensureNotPaused()
throws java.lang.InterruptedException
java.lang.InterruptedExceptionpublic java.lang.String toString()
toString in class java.lang.Objectpublic static java.util.ArrayList<Parser<?>> parsersFromSpecs(java.lang.String[] specs) throws java.lang.IllegalArgumentException, java.lang.ClassNotFoundException, java.lang.IllegalAccessException, java.lang.reflect.InvocationTargetException, java.lang.InstantiationException, java.lang.NoSuchMethodException, java.io.IOException
specs - the parser specifications (they will be parsed using ObjectParser.java.lang.IllegalArgumentExceptionjava.lang.ClassNotFoundExceptionjava.lang.IllegalAccessExceptionjava.lang.reflect.InvocationTargetExceptionjava.lang.InstantiationExceptionjava.lang.NoSuchMethodExceptionjava.io.IOException