public class ProxyServerPool
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
(package private) class |
ProxyServerPool.ProxyServerValidator
Used to validate a
ProxyServer in an asynchronous fashion. |
Modifier and Type | Field and Description |
---|---|
(package private) static java.util.concurrent.atomic.AtomicBoolean |
HAS_LOADED_AUTOMATIC_ANONYMIZATION_PROXIES |
static org.apache.log4j.Logger |
log
Used for logging.
|
Constructor and Description |
---|
ProxyServerPool()
Generates a new pool.
|
Modifier and Type | Method and Description |
---|---|
void |
addProxyServer(ProxyServer proxyServer)
Adds a proxy to the pool.
|
void |
currentProxyServerIsBad()
Indicates that the current proxy server is bad.
|
void |
filter()
Causes the current pool proxy server to filter its list of proxy servers.
|
void |
filter(int connectionTimeout)
Causes the proxy server to filter it's current pool.
|
void |
filter(int connectionTimeout,
boolean validateHost)
Causes the proxy server to filter it's current pool.
|
boolean |
getAutomaticProxyCycling()
Returns the current state of automatic proxy cycling.
|
int |
getConnectionTimeout()
Gets the connection timeout to use when validating proxies.
|
ProxyServer |
getCurrentProxyServer()
Gets the current proxy.
|
ProxyServer |
getNextProxyServer()
Gets the next proxy to be cycled through.
|
int |
getNumProxiesToValidateConcurrently()
Gets the number of proxies we can test concurrently.
|
int |
getNumProxyServers()
Gets the number of proxy servers.
|
java.io.File |
getPoolFile()
Gets the file used to populate the pool.
|
int |
getRepopulateThreshold()
Gets the threshold that should be used when determining whether or not to repopulate.
|
ScrapingSessionNotifiable |
getScrapingSessionNotifiable()
Gets the
ScrapingSessionNotifiable to be used. |
void |
outputProxyServersToLog()
Outputs the current list of proxies to the log.
|
void |
populateFromFile(java.io.File file)
Gets and loads up the proxy servers, if necessary.
|
void |
populateFromFile(java.lang.String filePath)
Gets and loads up the proxy servers, if necessary.
|
boolean |
populateFromServer()
Populates the pool from the remote server.
|
void |
prepareAnonymousProxies(int numRequiredProxies,
java.util.List<ProxyServer> proxyServerList)
Populates the proxy list from the server and if needed spawns more proxy servers.
|
void |
removeAllProxyServers()
Removes all proxy servers from the pool.
|
void |
repopulateIfThresholdMet()
Repopulates the pool if the repopulate threshold has been met
|
void |
setAutomaticProxyCycling(boolean shouldCycle)
Enables or disables automatic proxy cycling.
|
void |
setConnectionTimeout(int connectionTimeout)
Sets the connection timeout to use when validating proxies.
|
void |
setNumProxiesToValidateConcurrently(int numProxiesToValidateConcurrently)
Sets the number of proxies we can test concurrently.
|
void |
setNumRequestsPerProxy(int numRequestsPerProxy)
Sets the number of subsequent requests that should be made for each proxy server.
|
void |
setPoolFile(java.io.File poolFile)
Sets the file used to populate the pool.
|
void |
setPoolFile(java.lang.String filePath)
Sets the path to the file used to populate the pool.
|
void |
setProxyServers(java.util.Collection<ProxyServer> proxies)
Sets the proxies as a
Collection . |
void |
setRepopulateThreshold(int repopulateThreshold)
Sets the threshold that should be used when determining whether or not to repopulate.
|
void |
setScrapingSessionNotifiable(ScrapingSessionNotifiable scrapingSessionNotifiable)
Sets the
ScrapingSessionNotifiable to be used. |
java.lang.String |
toString() |
void |
waitTillCurrentProxyServerIsAlive()
Keeps trying till a proxy server is available.
|
void |
writeProxyPoolToFile(java.lang.String filePath)
Writes the current proxy list to a file.
|
public static org.apache.log4j.Logger log
static final java.util.concurrent.atomic.AtomicBoolean HAS_LOADED_AUTOMATIC_ANONYMIZATION_PROXIES
@InternalOnly public void prepareAnonymousProxies(int numRequiredProxies, @NotNull java.util.List<ProxyServer> proxyServerList)
numRequiredProxies
- the number of proxy servers neededproxyServerList
- The central List<ProxyServer>
that holds all of the automatic proxy servers.AnonymousProxyException
- On error preparing the proxiespublic void currentProxyServerIsBad()
@InternalOnly public void repopulateIfThresholdMet()
public void removeAllProxyServers()
@Nullable @InternalOnly public ProxyServer getCurrentProxyServer()
ProxyServer
.public void setAutomaticProxyCycling(boolean shouldCycle)
shouldCycle
- True if proxies should be allowed to rotate automatically during the scrape.public boolean getAutomaticProxyCycling()
@Nullable @InternalOnly public ProxyServer getNextProxyServer()
ProxyServer
.@InternalOnly public void waitTillCurrentProxyServerIsAlive()
@InternalOnly public boolean populateFromServer()
public void populateFromFile(@NotNull java.io.File file)
file
- The File
that should be used in populating the list.@Nullable @InternalOnly public java.io.File getPoolFile()
File
.@InternalOnly public void setPoolFile(java.io.File poolFile)
poolFile
- A File
.@InternalOnly public void setPoolFile(@NotNull java.lang.String filePath)
filePath
- A String
representing the path.public void populateFromFile(@NotNull java.lang.String filePath)
filePath
- The path to the file that should be used in populating the list.@InternalOnly public void setProxyServers(@NotNull java.util.Collection<ProxyServer> proxies)
Collection
.proxies
- A Collection
containing ProxyServer
objects.@InternalOnly public void addProxyServer(@Nullable ProxyServer proxyServer)
proxyServer
- The ProxyServer
to add.public void writeProxyPoolToFile(@NotNull java.lang.String filePath)
filePath
- The path to the file that should be used.public void outputProxyServersToLog()
public int getNumProxyServers()
public void setNumRequestsPerProxy(int numRequestsPerProxy)
numRequestsPerProxy
- The number of requests.@Nullable @InternalOnly public ScrapingSessionNotifiable getScrapingSessionNotifiable()
ScrapingSessionNotifiable
to be used.ScrapingSessionNotifiable
.@InternalOnly public void setScrapingSessionNotifiable(@Nullable ScrapingSessionNotifiable scrapingSessionNotifiable)
ScrapingSessionNotifiable
to be used.scrapingSessionNotifiable
- A ScrapingSessionNotifiable
.public int getConnectionTimeout()
public void setConnectionTimeout(int connectionTimeout)
connectionTimeout
- The timeout.public int getNumProxiesToValidateConcurrently()
public void setNumProxiesToValidateConcurrently(int numProxiesToValidateConcurrently)
numProxiesToValidateConcurrently
- The number of proxies.public int getRepopulateThreshold()
public void setRepopulateThreshold(int repopulateThreshold)
repopulateThreshold
- The threshold.public void filter(int connectionTimeout)
connectionTimeout
- The maximum number of seconds to wait when testing each proxy.public void filter(int connectionTimeout, boolean validateHost)
connectionTimeout
- The maximum number of seconds to wait when testing each proxy.validateHost
- Indicates whether or not the host should be validated when filtering proxies.public void filter()
@NotNull public java.lang.String toString()
toString
in class java.lang.Object