public abstract class HttpBase extends java.lang.Object implements Protocol
| Modifier and Type | Field and Description |
|---|---|
protected java.lang.String |
accept
The "Accept" request header value.
|
protected java.lang.String |
acceptCharset
The "Accept-Language" request header value.
|
protected java.lang.String |
acceptLanguage
The "Accept-Language" request header value.
|
static int |
BUFFER_SIZE |
protected int |
maxContent
The length limit for downloaded content, in bytes.
|
protected java.lang.String |
proxyHost
The proxy hostname.
|
protected int |
proxyPort
The proxy port.
|
protected boolean |
responseTime
Response Time
|
protected int |
timeout
The network timeout in millisecond
|
protected java.util.Set<java.lang.String> |
tlsPreferredCipherSuites
Which TLS/SSL cipher suites to support
|
protected java.util.Set<java.lang.String> |
tlsPreferredProtocols
Which TLS/SSL protocols to support
|
protected boolean |
useHttp11
Do we use HTTP/1.1?
|
protected boolean |
useProxy
Indicates if a proxy is used
|
protected java.lang.String |
userAgent
The Nutch 'User-Agent' request header
|
CHECK_BLOCKING, CHECK_ROBOTS, X_POINT_ID| Constructor and Description |
|---|
HttpBase()
Creates a new instance of HttpBase
|
HttpBase(org.slf4j.Logger logger)
Creates a new instance of HttpBase
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
getAccept() |
java.lang.String |
getAcceptCharset() |
java.lang.String |
getAcceptLanguage()
Value of "Accept-Language" request header sent by Nutch.
|
Configuration |
getConf() |
int |
getMaxContent() |
ProtocolOutput |
getProtocolOutput(java.lang.String url,
WebPage page) |
java.lang.String |
getProxyHost() |
int |
getProxyPort() |
protected abstract Response |
getResponse(java.net.URL url,
WebPage page,
boolean followRedirects) |
crawlercommons.robots.BaseRobotRules |
getRobotRules(java.lang.String url,
WebPage page)
Retrieve robot rules applicable for this url.
|
int |
getTimeout() |
java.util.Set<java.lang.String> |
getTlsPreferredCipherSuites() |
java.util.Set<java.lang.String> |
getTlsPreferredProtocols() |
boolean |
getUseHttp11() |
java.lang.String |
getUserAgent() |
protected void |
logConf() |
protected static void |
main(HttpBase http,
java.lang.String[] args) |
byte[] |
processDeflateEncoded(byte[] compressed,
java.net.URL url) |
byte[] |
processGzipEncoded(byte[] compressed,
java.net.URL url) |
void |
setConf(Configuration conf) |
boolean |
useProxy() |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitgetFieldspublic static final int BUFFER_SIZE
protected java.lang.String proxyHost
protected int proxyPort
protected boolean useProxy
protected int timeout
protected int maxContent
protected java.lang.String userAgent
protected java.lang.String acceptLanguage
protected java.lang.String acceptCharset
protected java.lang.String accept
protected boolean useHttp11
protected boolean responseTime
protected java.util.Set<java.lang.String> tlsPreferredProtocols
protected java.util.Set<java.lang.String> tlsPreferredCipherSuites
public HttpBase()
public HttpBase(org.slf4j.Logger logger)
public void setConf(Configuration conf)
setConf in interface Configurablepublic Configuration getConf()
getConf in interface Configurablepublic ProtocolOutput getProtocolOutput(java.lang.String url, WebPage page)
getProtocolOutput in interface Protocolpublic java.lang.String getProxyHost()
public int getProxyPort()
public boolean useProxy()
public int getTimeout()
public int getMaxContent()
public java.lang.String getUserAgent()
public java.lang.String getAcceptLanguage()
public java.lang.String getAcceptCharset()
public java.lang.String getAccept()
public boolean getUseHttp11()
public java.util.Set<java.lang.String> getTlsPreferredCipherSuites()
public java.util.Set<java.lang.String> getTlsPreferredProtocols()
protected void logConf()
public byte[] processGzipEncoded(byte[] compressed,
java.net.URL url)
throws java.io.IOException
java.io.IOExceptionpublic byte[] processDeflateEncoded(byte[] compressed,
java.net.URL url)
throws java.io.IOException
java.io.IOExceptionprotected static void main(HttpBase http, java.lang.String[] args) throws java.lang.Exception
java.lang.Exceptionprotected abstract Response getResponse(java.net.URL url, WebPage page, boolean followRedirects) throws ProtocolException, java.io.IOException
ProtocolExceptionjava.io.IOExceptionpublic crawlercommons.robots.BaseRobotRules getRobotRules(java.lang.String url,
WebPage page)
ProtocolgetRobotRules in interface Protocolurl - url to checkCopyright © 2019 The Apache Software Foundation