WarcRecord
, org.apache.http.HttpMessage
HttpRequestWarcRecord
, HttpResponseWarcRecord
, InfoWarcRecord
public abstract class AbstractWarcRecord extends org.apache.http.message.AbstractHttpMessage implements WarcRecord
WarcRecord
.WarcRecord.Type
Modifier and Type | Field | Description |
---|---|---|
static java.lang.String |
USE_BURL_PROPERTY |
|
protected org.apache.http.message.HeaderGroup |
warcHeaders |
PROTOCOL_VERSION
Constructor | Description |
---|---|
AbstractWarcRecord(java.net.URI targetURI,
org.apache.http.message.HeaderGroup warcHeaders) |
BUilds a record, optionally given the target URI and the warcHeaders.
|
AbstractWarcRecord(org.apache.http.message.HeaderGroup warcHeaders) |
BUilds a record, optionally given the warcHeaders.
|
Modifier and Type | Method | Description |
---|---|---|
static WarcRecord |
fromPayload(org.apache.http.message.HeaderGroup warcHeaders,
BoundSessionInputBuffer payloadBuffer) |
|
org.apache.http.ProtocolVersion |
getProtocolVersion() |
|
long |
getWarcContentLength() |
Returns the WARC
Content-Length header. |
java.util.Date |
getWarcDate() |
Returns the
WARC-Date header. |
org.apache.http.Header |
getWarcHeader(WarcHeader.Name header) |
Returns the specified WARC header.
|
org.apache.http.message.HeaderGroup |
getWarcHeaders() |
Returns the WARC headers.
|
java.util.UUID |
getWarcRecordId() |
Returns the
WARC-Record-ID header. |
java.net.URI |
getWarcTargetURI() |
Returns the
WARC-Target-URI header as a URI . |
WarcRecord.Type |
getWarcType() |
Returns the
WARC-Type header. |
void |
write(java.io.OutputStream output,
ByteArraySessionOutputBuffer buffer) |
Writes the WARC record.
|
protected static void |
writeHeaders(org.apache.http.message.HeaderGroup headers,
java.io.OutputStream output) |
|
protected abstract java.io.InputStream |
writePayload(ByteArraySessionOutputBuffer buffer) |
public static final java.lang.String USE_BURL_PROPERTY
protected final org.apache.http.message.HeaderGroup warcHeaders
public AbstractWarcRecord(org.apache.http.message.HeaderGroup warcHeaders)
warcHeaders
- the WARC headers, may be null
.AbstractWarcRecord(URI,HeaderGroup)
public AbstractWarcRecord(java.net.URI targetURI, org.apache.http.message.HeaderGroup warcHeaders)
null
or the WarcHeader.Name.WARC_RECORD_ID
header is absent, it will be generated at random,
similarly if the headers are null
or the WarcHeader.Name.WARC_DATE
header absent, it will be set to the current time.
If the target URI is not null
and the WarcHeader.Name.WARC_TARGET_URI
header is not set, it will be set to the given vaule.targetURI
- the target URI, may be null
.warcHeaders
- the WARC headers, may be null
.public org.apache.http.ProtocolVersion getProtocolVersion()
getProtocolVersion
in interface org.apache.http.HttpMessage
public org.apache.http.message.HeaderGroup getWarcHeaders()
WarcRecord
getWarcHeaders
in interface WarcRecord
public org.apache.http.Header getWarcHeader(WarcHeader.Name header)
WarcRecord
getWarcHeader
in interface WarcRecord
header
- the name of the header to return.null
if the header is not present.public java.util.UUID getWarcRecordId()
WarcRecord
WARC-Record-ID
header.getWarcRecordId
in interface WarcRecord
public WarcRecord.Type getWarcType()
WarcRecord
WARC-Type
header.getWarcType
in interface WarcRecord
public java.util.Date getWarcDate()
WarcRecord
WARC-Date
header.getWarcDate
in interface WarcRecord
public long getWarcContentLength()
WarcRecord
Content-Length
header.getWarcContentLength
in interface WarcRecord
public java.net.URI getWarcTargetURI()
WARC-Target-URI
header as a URI
.
Parsing is performed by URI.create(String)
, unless the system property "it.unimi.di.law.warc.records.useburl"
has been set to true, in which case BURL.parse(String)
will be used (if BURL.parse(String)
returns null
, we throw an IllegalArgumentException
as from the specification
in WarcRecord.getWarcTargetURI()
).
getWarcTargetURI
in interface WarcRecord
java.lang.IllegalStateException
- if the header is not present.java.lang.IllegalArgumentException
- if the header value cannot be parsed into a URI.getWarcTargetURI()
protected abstract java.io.InputStream writePayload(ByteArraySessionOutputBuffer buffer) throws java.io.IOException
java.io.IOException
protected static void writeHeaders(org.apache.http.message.HeaderGroup headers, java.io.OutputStream output) throws java.io.IOException
java.io.IOException
public void write(java.io.OutputStream output, ByteArraySessionOutputBuffer buffer) throws java.io.IOException
WarcRecord
write
in interface WarcRecord
output
- the stream where to write the record.buffer
- a buffer that will be optionally used by the writer.java.io.IOException
public static WarcRecord fromPayload(org.apache.http.message.HeaderGroup warcHeaders, BoundSessionInputBuffer payloadBuffer) throws java.io.IOException, WarcFormatException
java.io.IOException
WarcFormatException