[squid-dev] [PATCH] Parser-NG: Transfer-Encoding:chunked Parser
Amos Jeffries
squid3 at treenet.co.nz
Mon Jun 1 23:21:19 UTC 2015
Remove several performance regressions incurred in earlier Parser-NG
updates by refactoring the class ChunkedCodingParser to a class
Http1::TeChunkedParser which parses an SBuf I/O buffer for chunked
encoding data and (for now) copies the chunk payloads into a MemBuf buffer.
The new class is inherited from Http1::Parser and presents the same API.
Chunk Trailers are now available via the Parser API mimeHeader() method
- although none of the rest of Squid makes use of that data yet. It
implements parsing using a ::Parser::Tokenizer for (nearly) compliant
protocol tokenization. With enumerated states instead of a dynamic
function-pointer chain.
Measurements:
Co-Advisor shows no compliance change.
Polygraph shows approx 1% speed improvement over trunk.
PS. earlier bugs being investigated turned out to be inherited from
trunk and are now fixed there already.
Amos
-------------- next part --------------
=== modified file 'src/Makefile.am'
--- src/Makefile.am 2015-05-26 09:18:13 +0000
+++ src/Makefile.am 2015-06-01 06:21:35 +0000
@@ -274,42 +274,40 @@
AccessLogEntry.h \
AsyncEngine.cc \
AsyncEngine.h \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
CacheDigest.h \
CacheDigest.cc \
cache_manager.cc \
NeighborTypeDomainList.h \
CachePeer.cc \
CachePeer.h \
CacheManager.h \
carp.h \
carp.cc \
cbdata.cc \
cbdata.h \
- ChunkedCodingParser.cc \
- ChunkedCodingParser.h \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side.h \
client_side_reply.cc \
client_side_reply.h \
client_side_request.cc \
client_side_request.h \
ClientInfo.h \
BodyPipe.cc \
BodyPipe.h \
ClientInfo.h \
ClientRequestContext.h \
clientStream.cc \
clientStream.h \
clientStreamForward.h \
CollapsedForwarding.cc \
CollapsedForwarding.h \
CompletionDispatcher.cc \
@@ -1385,41 +1383,40 @@
tests/stub_main_cc.cc \
tests/stub_ipc_Forwarder.cc \
tests/stub_store_stats.cc \
tests/stub_EventLoop.cc \
time.cc \
BodyPipe.cc \
cache_manager.cc \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
CachePeer.cc \
CachePeer.h \
CacheDigest.h \
tests/stub_CacheDigest.cc \
carp.h \
tests/stub_carp.cc \
cbdata.cc \
- ChunkedCodingParser.cc \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side_reply.cc \
client_side_request.cc \
ClientInfo.h \
clientStream.cc \
tests/stub_CollapsedForwarding.cc \
ConfigOption.cc \
ConfigParser.cc \
CpuAffinityMap.cc \
CpuAffinityMap.h \
CpuAffinitySet.cc \
CpuAffinitySet.h \
$(DELAY_POOL_SOURCE) \
$(DISKIO_SOURCE) \
disk.h \
disk.cc \
dlink.h \
@@ -1815,41 +1812,40 @@
$(SQUID_CPPUNIT_LIBS)
tests_testDns_LDFLAGS= $(LIBADD_DL)
tests_testEvent_SOURCES = \
AccessLogEntry.cc \
BodyPipe.cc \
CacheDigest.h \
tests/stub_CacheDigest.cc \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
CachePeer.cc \
CachePeer.h \
cache_manager.cc \
carp.h \
tests/stub_carp.cc \
cbdata.cc \
- ChunkedCodingParser.cc \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side_reply.cc \
client_side_request.cc \
ClientInfo.h \
clientStream.cc \
tests/stub_CollapsedForwarding.cc \
ConfigOption.cc \
ConfigParser.cc \
CpuAffinityMap.cc \
CpuAffinityMap.h \
CpuAffinitySet.cc \
CpuAffinitySet.h \
debug.cc \
$(DELAY_POOL_SOURCE) \
$(DISKIO_SOURCE) \
disk.h \
disk.cc \
@@ -2061,41 +2057,40 @@
$(SQUID_CPPUNIT_LA)
## Tests of the EventLoop module.
tests_testEventLoop_SOURCES = \
AccessLogEntry.cc \
BodyPipe.cc \
CacheDigest.h \
tests/stub_CacheDigest.cc \
cache_manager.cc \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
CachePeer.cc \
CachePeer.h \
carp.h \
tests/stub_carp.cc \
cbdata.cc \
- ChunkedCodingParser.cc \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side_reply.cc \
client_side_request.cc \
ClientInfo.h \
clientStream.cc \
tests/stub_CollapsedForwarding.cc \
ConfigOption.cc \
ConfigParser.cc \
CpuAffinityMap.cc \
CpuAffinityMap.h \
CpuAffinitySet.cc \
CpuAffinitySet.h \
debug.cc \
$(DELAY_POOL_SOURCE) \
$(DISKIO_SOURCE) \
disk.h \
disk.cc \
@@ -2305,41 +2300,40 @@
$(REPL_OBJS) \
$(SQUID_CPPUNIT_LA)
tests_test_http_range_SOURCES = \
AccessLogEntry.cc \
BodyPipe.cc \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
CachePeer.cc \
CachePeer.h \
cache_manager.cc \
CacheDigest.h \
tests/stub_CacheDigest.cc \
carp.h \
tests/stub_carp.cc \
cbdata.cc \
- ChunkedCodingParser.cc \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side_reply.cc \
client_side_request.cc \
ClientInfo.h \
clientStream.cc \
tests/stub_CollapsedForwarding.cc \
ConfigOption.cc \
ConfigParser.cc \
CpuAffinityMap.cc \
CpuAffinityMap.h \
CpuAffinitySet.cc \
CpuAffinitySet.h \
debug.cc \
$(DELAY_POOL_SOURCE) \
$(DISKIO_SOURCE) \
disk.h \
disk.cc \
@@ -2611,41 +2605,40 @@
tests/stub_libeui.cc \
tests/stub_store_stats.cc \
tests/stub_EventLoop.cc \
time.cc \
BodyPipe.cc \
cache_manager.cc \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
debug.cc \
CacheDigest.h \
tests/stub_CacheDigest.cc \
CachePeer.cc \
CachePeer.h \
carp.h \
tests/stub_carp.cc \
cbdata.cc \
- ChunkedCodingParser.cc \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side_reply.cc \
client_side_request.cc \
ClientInfo.h \
clientStream.cc \
tests/stub_CollapsedForwarding.cc \
ConfigOption.cc \
ConfigParser.cc \
CpuAffinityMap.cc \
CpuAffinityMap.h \
CpuAffinitySet.cc \
CpuAffinitySet.h \
$(DELAY_POOL_SOURCE) \
disk.h \
disk.cc \
dlink.h \
dlink.cc \
@@ -3418,41 +3411,40 @@
## Tests of the URL module.
## TODO: Trim this down once the insanity is over.
tests_testURL_SOURCES = \
AccessLogEntry.cc \
BodyPipe.cc \
cache_cf.h \
AuthReg.h \
YesNoNone.h \
YesNoNone.cc \
RefreshPattern.h \
cache_cf.cc \
tests/stub_cache_manager.cc \
CacheDigest.h \
tests/stub_CacheDigest.cc \
CachePeer.cc \
CachePeer.h \
carp.h \
tests/stub_carp.cc \
cbdata.cc \
- ChunkedCodingParser.cc \
client_db.h \
client_db.cc \
client_side.h \
client_side.cc \
client_side_reply.cc \
client_side_request.cc \
ClientInfo.h \
clientStream.cc \
tests/stub_CollapsedForwarding.cc \
ConfigOption.cc \
ConfigParser.cc \
CpuAffinityMap.cc \
CpuAffinityMap.h \
CpuAffinitySet.cc \
CpuAffinitySet.h \
$(DELAY_POOL_SOURCE) \
disk.h \
disk.cc \
DiskIO/ReadRequest.cc \
DiskIO/WriteRequest.cc \
=== modified file 'src/adaptation/icap/ModXact.cc'
--- src/adaptation/icap/ModXact.cc 2015-04-27 09:52:02 +0000
+++ src/adaptation/icap/ModXact.cc 2015-06-01 19:48:35 +0000
@@ -5,44 +5,44 @@
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
/* DEBUG: section 93 ICAP (RFC 3507) Client */
#include "squid.h"
#include "AccessLogEntry.h"
#include "adaptation/Answer.h"
#include "adaptation/History.h"
#include "adaptation/icap/Client.h"
#include "adaptation/icap/Config.h"
#include "adaptation/icap/History.h"
#include "adaptation/icap/Launcher.h"
#include "adaptation/icap/ModXact.h"
#include "adaptation/icap/ServiceRep.h"
#include "adaptation/Initiator.h"
#include "auth/UserRequest.h"
#include "base/TextException.h"
#include "base64.h"
-#include "ChunkedCodingParser.h"
#include "comm.h"
#include "comm/Connection.h"
#include "err_detail_type.h"
+#include "http/one/TeChunkedParser.h"
#include "HttpHeaderTools.h"
#include "HttpMsg.h"
#include "HttpReply.h"
#include "HttpRequest.h"
#include "SquidTime.h"
#include "URL.h"
// flow and terminology:
// HTTP| --> receive --> encode --> write --> |network
// end | <-- send <-- parse <-- read <-- |end
// TODO: replace gotEncapsulated() with something faster; we call it often
CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXact);
CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXactLauncher);
static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
Adaptation::Icap::ModXact::State::State()
{
@@ -1087,68 +1087,62 @@
if (!parsed) { // need more data
debugs(93, 5, HERE << "parse failed, need more data, return false");
head->reset();
return false;
}
if (HttpRequest *r = dynamic_cast<HttpRequest*>(head))
urlCanonical(r); // parse does not set HttpRequest::canonical
debugs(93, 5, HERE << "parse success, consume " << head->hdr_sz << " bytes, return true");
readBuf.consume(head->hdr_sz);
return true;
}
void Adaptation::Icap::ModXact::decideOnParsingBody()
{
if (gotEncapsulated("res-body") || gotEncapsulated("req-body")) {
debugs(93, 5, HERE << "expecting a body");
state.parsing = State::psBody;
replyHttpBodySize = 0;
- bodyParser = new ChunkedCodingParser;
+ bodyParser = new Http1::TeChunkedParser;
makeAdaptedBodyPipe("adapted response from the ICAP server");
Must(state.sending == State::sendingAdapted);
} else {
debugs(93, 5, HERE << "not expecting a body");
stopParsing();
stopSending(true);
}
}
void Adaptation::Icap::ModXact::parseBody()
{
Must(state.parsing == State::psBody);
Must(bodyParser);
debugs(93, 5, "have " << readBuf.length() << " body bytes to parse");
// the parser will throw on errors
BodyPipeCheckout bpc(*adapted.body_pipe);
- // XXX: performance regression. SBuf-convert (or Parser-convert?) the chunked decoder.
- MemBuf encodedData;
- encodedData.init();
- // NP: we must do this instead of pointing encodedData at the SBuf::rawContent
- // because chunked decoder uses MemBuf::consume, which shuffles buffer bytes around.
- encodedData.append(readBuf.rawContent(), readBuf.length());
- const bool parsed = bodyParser->parse(&encodedData, &bpc.buf);
- // XXX: httpChunkDecoder has consumed from MemBuf.
- readBuf.consume(readBuf.length() - encodedData.contentSize());
+ bodyParser->setPayloadBuffer(&bpc.buf);
+ const bool parsed = bodyParser->parse(readBuf);
+ readBuf = bodyParser->remaining(); // sync buffers after parse
bpc.checkIn();
debugs(93, 5, "have " << readBuf.length() << " body bytes after parsed all: " << parsed);
replyHttpBodySize += adapted.body_pipe->buf().contentSize();
// TODO: expose BodyPipe::putSize() to make this check simpler and clearer
// TODO: do we really need this if we disable when sending headers?
if (adapted.body_pipe->buf().contentSize() > 0) { // parsed something sometime
disableRepeats("sent adapted content");
disableBypass("sent adapted content", true);
}
if (parsed) {
if (state.readyForUob && bodyParser->useOriginBody >= 0) {
prepPartialBodyEchoing(
static_cast<uint64_t>(bodyParser->useOriginBody));
stopParsing();
return;
}
=== modified file 'src/adaptation/icap/ModXact.h'
--- src/adaptation/icap/ModXact.h 2015-01-13 07:25:36 +0000
+++ src/adaptation/icap/ModXact.h 2015-06-01 19:48:29 +0000
@@ -1,49 +1,48 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#ifndef SQUID_ICAPMODXACT_H
#define SQUID_ICAPMODXACT_H
#include "AccessLogEntry.h"
#include "adaptation/icap/InOut.h"
#include "adaptation/icap/Launcher.h"
#include "adaptation/icap/Xaction.h"
#include "BodyPipe.h"
+#include "http/one/forward.h"
/*
* ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
* ICAPXaction as the base. The ICAPModXact receives a virgin HTTP message
* from an ICAP vecoring point, (a.k.a., initiator), communicates with the
* ICAP server, and sends the adapted HTTP message headers back.
* Virgin/adapted HTTP message body is reveived/sent using BodyPipe
* interface. The initiator (or its associate) is expected to send and/or
* receive the HTTP body.
*/
-class ChunkedCodingParser;
-
namespace Adaptation
{
namespace Icap
{
// estimated future presence and size of something (e.g., HTTP body)
class SizedEstimate
{
public:
SizedEstimate(); // not expected by default
void expect(int64_t aSize); // expect with any, even unknown size
bool expected() const;
/* other members can be accessed iff expected() */
bool knownSize() const;
uint64_t size() const; // can be accessed iff knownSize()
@@ -233,41 +232,41 @@
void stopBackup();
virtual void fillPendingStatus(MemBuf &buf) const;
virtual void fillDoneStatus(MemBuf &buf) const;
virtual bool fillVirginHttpHeader(MemBuf&) const;
private:
void packHead(MemBuf &httpBuf, const HttpMsg *head);
void encapsulateHead(MemBuf &icapBuf, const char *section, MemBuf &httpBuf, const HttpMsg *head);
bool gotEncapsulated(const char *section) const;
void checkConsuming();
virtual void finalizeLogInfo();
SizedEstimate virginBody;
VirginBodyAct virginBodyWriting; // virgin body writing state
VirginBodyAct virginBodySending; // virgin body sending state
uint64_t virginConsumed; // virgin data consumed so far
Preview preview; // use for creating (writing) the preview
- ChunkedCodingParser *bodyParser; // ICAP response body parser
+ Http1::TeChunkedParser *bodyParser; // ICAP response body parser
bool canStartBypass; // enables bypass of transaction failures
bool protectGroupBypass; // protects ServiceGroup-wide bypass of failures
/**
* size of HTTP header in ICAP reply or -1 if there is not any encapsulated
* message data
*/
int64_t replyHttpHeaderSize;
/**
* size of dechunked HTTP body in ICAP reply or -1 if there is not any
* encapsulated message data
*/
int64_t replyHttpBodySize;
int adaptHistoryId; ///< adaptation history slot reservation
class State
{
=== modified file 'src/client_side.cc'
--- src/client_side.cc 2015-05-26 17:25:04 +0000
+++ src/client_side.cc 2015-06-01 19:48:10 +0000
@@ -46,64 +46,64 @@
* The request is *immediately* kicked off, and data flows through
* to clientSocketRecipient.
*
\par
* If the data that arrives at clientSocketRecipient is not for the current
* request, clientSocketRecipient simply returns, without requesting more
* data, or sending it.
*
\par
* ClientKeepAliveNextRequest will then detect the presence of data in
* the next ClientHttpRequest, and will send it, restablishing the
* data flow.
*/
#include "squid.h"
#include "acl/FilledChecklist.h"
#include "anyp/PortCfg.h"
#include "base/Subscription.h"
#include "base/TextException.h"
#include "CachePeer.h"
-#include "ChunkedCodingParser.h"
#include "client_db.h"
#include "client_side.h"
#include "client_side_reply.h"
#include "client_side_request.h"
#include "ClientRequestContext.h"
#include "clientStream.h"
#include "comm.h"
#include "comm/Connection.h"
#include "comm/Loops.h"
#include "comm/Read.h"
#include "comm/TcpAcceptor.h"
#include "comm/Write.h"
#include "CommCalls.h"
#include "errorpage.h"
#include "fd.h"
#include "fde.h"
#include "fqdncache.h"
#include "FwdState.h"
#include "globals.h"
#include "helper.h"
#include "helper/Reply.h"
#include "http.h"
#include "http/one/RequestParser.h"
+#include "http/one/TeChunkedParser.h"
#include "HttpHdrContRange.h"
#include "HttpHeaderTools.h"
#include "HttpReply.h"
#include "HttpRequest.h"
#include "ident/Config.h"
#include "ident/Ident.h"
#include "internal.h"
#include "ipc/FdNotes.h"
#include "ipc/StartListening.h"
#include "log/access_log.h"
#include "MemBuf.h"
#include "MemObject.h"
#include "mime_header.h"
#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "rfc1738.h"
#include "servers/forward.h"
#include "SquidConfig.h"
#include "SquidTime.h"
#include "StatCounters.h"
@@ -3188,96 +3188,89 @@
ConnStateData::handleReadData()
{
// if we are reading a body, stuff data into the body pipe
if (bodyPipe != NULL)
return handleRequestBodyData();
return true;
}
/**
* called when new request body data has been buffered in in.buf
* may close the connection if we were closing and piped everything out
*
* \retval false called comm_close or setReplyToError (the caller should bail)
* \retval true we did not call comm_close or setReplyToError
*/
bool
ConnStateData::handleRequestBodyData()
{
assert(bodyPipe != NULL);
- size_t putSize = 0;
-
if (in.bodyParser) { // chunked encoding
- if (const err_type error = handleChunkedRequestBody(putSize)) {
+ if (const err_type error = handleChunkedRequestBody()) {
abortChunkedRequestBody(error);
return false;
}
} else { // identity encoding
debugs(33,5, HERE << "handling plain request body for " << clientConnection);
- putSize = bodyPipe->putMoreData(in.buf.c_str(), in.buf.length());
+ const size_t putSize = bodyPipe->putMoreData(in.buf.c_str(), in.buf.length());
+ if (putSize > 0)
+ consumeInput(putSize);
+
if (!bodyPipe->mayNeedMoreData()) {
// BodyPipe will clear us automagically when we produced everything
bodyPipe = NULL;
}
}
- if (putSize > 0)
- consumeInput(putSize);
-
if (!bodyPipe) {
debugs(33,5, HERE << "produced entire request body for " << clientConnection);
if (const char *reason = stoppedSending()) {
/* we've finished reading like good clients,
* now do the close that initiateClose initiated.
*/
debugs(33, 3, HERE << "closing for earlier sending error: " << reason);
clientConnection->close();
return false;
}
}
return true;
}
/// parses available chunked encoded body bytes, checks size, returns errors
err_type
-ConnStateData::handleChunkedRequestBody(size_t &putSize)
+ConnStateData::handleChunkedRequestBody()
{
debugs(33, 7, "chunked from " << clientConnection << ": " << in.buf.length());
try { // the parser will throw on errors
if (in.buf.isEmpty()) // nothing to do
return ERR_NONE;
- MemBuf raw; // ChunkedCodingParser only works with MemBufs
- // add one because MemBuf will assert if it cannot 0-terminate
- raw.init(in.buf.length(), in.buf.length()+1);
- raw.append(in.buf.c_str(), in.buf.length());
-
- const mb_size_t wasContentSize = raw.contentSize();
BodyPipeCheckout bpc(*bodyPipe);
- const bool parsed = in.bodyParser->parse(&raw, &bpc.buf);
+ in.bodyParser->setPayloadBuffer(&bpc.buf);
+ const bool parsed = in.bodyParser->parse(in.buf);
+ in.buf = in.bodyParser->remaining(); // sync buffers
bpc.checkIn();
- putSize = wasContentSize - raw.contentSize();
// dechunk then check: the size limit applies to _dechunked_ content
if (clientIsRequestBodyTooLargeForPolicy(bodyPipe->producedSize()))
return ERR_TOO_BIG;
if (parsed) {
finishDechunkingRequest(true);
Must(!bodyPipe);
return ERR_NONE; // nil bodyPipe implies body end for the caller
}
// if chunk parser needs data, then the body pipe must need it too
Must(!in.bodyParser->needsMoreData() || bodyPipe->mayNeedMoreData());
// if parser needs more space and we can consume nothing, we will stall
Must(!in.bodyParser->needsMoreSpace() || bodyPipe->buf().hasContent());
} catch (...) { // TODO: be more specific
debugs(33, 3, HERE << "malformed chunks" << bodyPipe->status());
return ERR_INVALID_REQ;
}
@@ -4692,41 +4685,41 @@
clientConnection->close();
}
}
void
ConnStateData::expectNoForwarding()
{
if (bodyPipe != NULL) {
debugs(33, 4, HERE << "no consumer for virgin body " << bodyPipe->status());
bodyPipe->expectNoConsumption();
}
}
/// initialize dechunking state
void
ConnStateData::startDechunkingRequest()
{
Must(bodyPipe != NULL);
debugs(33, 5, HERE << "start dechunking" << bodyPipe->status());
assert(!in.bodyParser);
- in.bodyParser = new ChunkedCodingParser;
+ in.bodyParser = new Http1::TeChunkedParser;
}
/// put parsed content into input buffer and clean up
void
ConnStateData::finishDechunkingRequest(bool withSuccess)
{
debugs(33, 5, HERE << "finish dechunking: " << withSuccess);
if (bodyPipe != NULL) {
debugs(33, 7, HERE << "dechunked tail: " << bodyPipe->status());
BodyPipe::Pointer myPipe = bodyPipe;
stopProducingFor(bodyPipe, withSuccess); // sets bodyPipe->bodySize()
Must(!bodyPipe); // we rely on it being nil after we are done with body
if (withSuccess) {
Must(myPipe->bodySizeKnown());
ClientSocketContext::Pointer context = getCurrentContext();
if (context != NULL && context->http && context->http->request)
context->http->request->setContentLength(myPipe->bodySize());
}
}
=== modified file 'src/client_side.h'
--- src/client_side.h 2015-04-10 08:54:13 +0000
+++ src/client_side.h 2015-06-01 19:48:21 +0000
@@ -11,41 +11,40 @@
#ifndef SQUID_CLIENTSIDE_H
#define SQUID_CLIENTSIDE_H
#include "clientStreamForward.h"
#include "comm.h"
#include "helper/forward.h"
#include "http/forward.h"
#include "HttpControlMsg.h"
#include "ipc/FdNotes.h"
#include "SBuf.h"
#if USE_AUTH
#include "auth/UserRequest.h"
#endif
#if USE_OPENSSL
#include "ssl/support.h"
#endif
class ConnStateData;
class ClientHttpRequest;
class clientStreamNode;
-class ChunkedCodingParser;
namespace AnyP
{
class PortCfg;
} // namespace Anyp
/**
* Badly named.
* This is in fact the processing context for a single HTTP request.
*
* Managing what has been done, and what happens next to the data buffer
* holding what we hope is an HTTP request.
*
* Parsing is still a mess of global functions done in conjunction with the
* real socket controller which generated ClientHttpRequest.
* It also generates one of us and passes us control from there based on
* the results of the parse.
*
* After that all the request interpretation and adaptation is in our scope.
* Then finally the reply fetcher is created by this and we get the result
* back. Which we then have to manage writing of it to the ConnStateData.
@@ -191,41 +190,41 @@
void receivedFirstByte();
// HttpControlMsgSink API
virtual void sendControlMsg(HttpControlMsg msg);
// Client TCP connection details from comm layer.
Comm::ConnectionPointer clientConnection;
/**
* The transfer protocol currently being spoken on this connection.
* HTTP/1 CONNECT and HTTP/2 SETTINGS offers the ability to change
* protocols on the fly.
*/
AnyP::ProtocolVersion transferProtocol;
struct In {
In();
~In();
bool maybeMakeSpaceAvailable();
- ChunkedCodingParser *bodyParser; ///< parses chunked request body
+ Http1::TeChunkedParser *bodyParser; ///< parses chunked request body
SBuf buf;
} in;
/** number of body bytes we need to comm_read for the "current" request
*
* \retval 0 We do not need to read any [more] body bytes
* \retval negative May need more but do not know how many; could be zero!
* \retval positive Need to read exactly that many more body bytes
*/
int64_t mayNeedToReadMoreBody() const;
#if USE_AUTH
/**
* Fetch the user details for connection based authentication
* NOTE: this is ONLY connection based because NTLM and Negotiate is against HTTP spec.
*/
const Auth::UserRequest::Pointer &getAuth() const { return auth_; }
/**
* Set the user details for connection-based authentication to use from now until connection closure.
@@ -406,41 +405,41 @@
/// ClientStream calls this to supply response header (once) and data
/// for the current ClientSocketContext.
virtual void handleReply(HttpReply *header, StoreIOBuffer receivedData) = 0;
/// remove no longer needed leading bytes from the input buffer
void consumeInput(const size_t byteCount);
/* TODO: Make the methods below (at least) non-public when possible. */
/// stop parsing the request and create context for relaying error info
ClientSocketContext *abortRequestParsing(const char *const errUri);
/// client data which may need to forward as-is to server after an
/// on_unsupported_protocol tunnel decision.
SBuf preservedClientData;
protected:
void startDechunkingRequest();
void finishDechunkingRequest(bool withSuccess);
void abortChunkedRequestBody(const err_type error);
- err_type handleChunkedRequestBody(size_t &putSize);
+ err_type handleChunkedRequestBody();
void startPinnedConnectionMonitoring();
void clientPinnedConnectionRead(const CommIoCbParams &io);
/// parse input buffer prefix into a single transfer protocol request
/// return NULL to request more header bytes (after checking any limits)
/// use abortRequestParsing() to handle parsing errors w/o creating request
virtual ClientSocketContext *parseOneRequest() = 0;
/// start processing a freshly parsed request
virtual void processParsedRequest(ClientSocketContext *context) = 0;
/// returning N allows a pipeline of 1+N requests (see pipeline_prefetch)
virtual int pipelinePrefetchMax() const;
/// timeout to use when waiting for the next request
virtual time_t idleTimeout() const = 0;
BodyPipe::Pointer bodyPipe; ///< set when we are reading request body
=== modified file 'src/http.cc'
--- src/http.cc 2015-04-27 09:52:02 +0000
+++ src/http.cc 2015-06-01 19:47:47 +0000
@@ -2,53 +2,53 @@
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
/* DEBUG: section 11 Hypertext Transfer Protocol (HTTP) */
/*
* Anonymizing patch by lutz at as-node.jena.thur.de
* have a look into http-anon.c to get more informations.
*/
#include "squid.h"
#include "acl/FilledChecklist.h"
#include "base/AsyncJobCalls.h"
#include "base/TextException.h"
#include "base64.h"
#include "CachePeer.h"
-#include "ChunkedCodingParser.h"
#include "client_side.h"
#include "comm/Connection.h"
#include "comm/Read.h"
#include "comm/Write.h"
#include "CommRead.h"
#include "err_detail_type.h"
#include "errorpage.h"
#include "fd.h"
#include "fde.h"
#include "globals.h"
#include "http.h"
#include "http/one/ResponseParser.h"
+#include "http/one/TeChunkedParser.h"
#include "HttpControlMsg.h"
#include "HttpHdrCc.h"
#include "HttpHdrContRange.h"
#include "HttpHdrSc.h"
#include "HttpHdrScTarget.h"
#include "HttpHeaderTools.h"
#include "HttpReply.h"
#include "HttpRequest.h"
#include "HttpStateFlags.h"
#include "log/access_log.h"
#include "MemBuf.h"
#include "MemObject.h"
#include "neighbors.h"
#include "peer_proxy_negotiate_auth.h"
#include "profiler/Profiler.h"
#include "refresh.h"
#include "RefreshPattern.h"
#include "rfc1738.h"
#include "SquidConfig.h"
#include "SquidTime.h"
@@ -771,41 +771,41 @@
newrep->sline.version.protocol = hp->messageProtocol().protocol;
newrep->sline.version.major = hp->messageProtocol().major;
newrep->sline.version.minor = hp->messageProtocol().minor;
debugs(11, 2, "error parsing response headers mime block");
}
// done with Parser, now process using the HttpReply
hp = NULL;
newrep->removeStaleWarnings();
if (newrep->sline.protocol == AnyP::PROTO_HTTP && newrep->sline.status() >= 100 && newrep->sline.status() < 200) {
handle1xx(newrep);
ctx_exit(ctx);
return;
}
flags.chunked = false;
if (newrep->sline.protocol == AnyP::PROTO_HTTP && newrep->header.chunked()) {
flags.chunked = true;
- httpChunkDecoder = new ChunkedCodingParser;
+ httpChunkDecoder = new Http1::TeChunkedParser;
}
if (!peerSupportsConnectionPinning())
request->flags.connectionAuthDisabled = true;
HttpReply *vrep = setVirginReply(newrep);
flags.headers_parsed = true;
keepaliveAccounting(vrep);
checkDateSkew(vrep);
processSurrogateControl (vrep);
request->hier.peer_reply_status = newrep->sline.status();
ctx_exit(ctx);
}
/// ignore or start forwarding the 1xx response (a.k.a., control message)
@@ -1376,49 +1376,43 @@
HttpStateData::writeReplyBody()
{
truncateVirginBody(); // if needed
const char *data = inBuf.rawContent();
int len = inBuf.length();
addVirginReplyBody(data, len);
inBuf.consume(len);
}
bool
HttpStateData::decodeAndWriteReplyBody()
{
const char *data = NULL;
int len;
bool wasThereAnException = false;
assert(flags.chunked);
assert(httpChunkDecoder);
SQUID_ENTER_THROWING_CODE();
MemBuf decodedData;
decodedData.init();
- // XXX: performance regression. SBuf-convert (or Parser-convert?) the chunked decoder.
- MemBuf encodedData;
- encodedData.init();
- // NP: we must do this instead of pointing encodedData at the SBuf::rawContent
- // because chunked decoder uses MemBuf::consume, which shuffles buffer bytes around.
- encodedData.append(inBuf.rawContent(), inBuf.length());
- const bool doneParsing = httpChunkDecoder->parse(&encodedData,&decodedData);
- // XXX: httpChunkDecoder has consumed from MemBuf.
- inBuf.consume(inBuf.length() - encodedData.contentSize());
+ httpChunkDecoder->setPayloadBuffer(&decodedData);
+ const bool doneParsing = httpChunkDecoder->parse(inBuf);
+ inBuf = httpChunkDecoder->remaining(); // sync buffers after parse
len = decodedData.contentSize();
data=decodedData.content();
addVirginReplyBody(data, len);
if (doneParsing) {
lastChunk = 1;
flags.do_next_read = false;
}
SQUID_EXIT_THROWING_CODE(wasThereAnException);
return wasThereAnException;
}
/**
* processReplyBody has two purposes:
* 1 - take the reply body data, if any, and put it into either
* the StoreEntry, or give it over to ICAP.
* 2 - see if we made it to the end of the response (persistent
* connections and such)
*/
void
HttpStateData::processReplyBody()
=== modified file 'src/http.h'
--- src/http.h 2015-03-17 02:53:05 +0000
+++ src/http.h 2015-06-01 19:47:33 +0000
@@ -1,36 +1,36 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#ifndef SQUID_HTTP_H
#define SQUID_HTTP_H
#include "clients/Client.h"
#include "comm.h"
+#include "http/forward.h"
#include "HttpStateFlags.h"
-class ChunkedCodingParser;
class FwdState;
class HttpHeader;
class HttpStateData : public Client
{
CBDATA_CLASS(HttpStateData);
public:
HttpStateData(FwdState *);
~HttpStateData();
static void httpBuildRequestHeader(HttpRequest * request,
StoreEntry * entry,
const AccessLogEntryPointer &al,
HttpHeader * hdr_out,
const HttpStateFlags &flags);
virtual const Comm::ConnectionPointer & dataConnection() const;
/* should be private */
bool sendRequest();
@@ -103,34 +103,34 @@
virtual void handleRequestBodyProducerAborted();
void writeReplyBody();
bool decodeAndWriteReplyBody();
bool finishingBrokenPost();
bool finishingChunkedRequest();
void doneSendingRequestBody();
void requestBodyHandler(MemBuf &);
virtual void sentRequestBody(const CommIoCbParams &io);
void wroteLast(const CommIoCbParams &io);
void sendComplete();
void httpStateConnClosed(const CommCloseCbParams ¶ms);
void httpTimeout(const CommTimeoutCbParams ¶ms);
mb_size_t buildRequestPrefix(MemBuf * mb);
static bool decideIfWeDoRanges (HttpRequest * orig_request);
bool peerSupportsConnectionPinning() const;
/// Parser being used at present to parse the HTTP/ICY server response.
Http1::ResponseParserPointer hp;
- ChunkedCodingParser *httpChunkDecoder;
+ Http1::TeChunkedParser *httpChunkDecoder;
/// amount of message payload/body received so far.
int64_t payloadSeen;
/// positive when we read more than we wanted
int64_t payloadTruncated;
};
int httpCachable(const HttpRequestMethod&);
void httpStart(FwdState *);
const char *httpMakeVaryMark(HttpRequest * request, HttpReply const * reply);
#endif /* SQUID_HTTP_H */
=== modified file 'src/http/one/Makefile.am'
--- src/http/one/Makefile.am 2015-01-20 12:33:23 +0000
+++ src/http/one/Makefile.am 2015-06-01 19:43:57 +0000
@@ -1,20 +1,22 @@
## Copyright (C) 1996-2015 The Squid Software Foundation and contributors
##
## Squid software is distributed under GPLv2+ license and includes
## contributions from numerous individuals and organizations.
## Please see the COPYING and CONTRIBUTORS files for details.
##
include $(top_srcdir)/src/Common.am
include $(top_srcdir)/src/TestHeaders.am
noinst_LTLIBRARIES = libhttp1.la
libhttp1_la_SOURCES = \
forward.h \
Parser.cc \
Parser.h \
RequestParser.cc \
RequestParser.h \
ResponseParser.cc \
- ResponseParser.h
+ ResponseParser.h \
+ TeChunkedParser.cc \
+ TeChunkedParser.h
=== modified file 'src/http/one/Parser.h'
--- src/http/one/Parser.h 2015-02-20 03:25:12 +0000
+++ src/http/one/Parser.h 2015-04-12 11:05:50 +0000
@@ -6,44 +6,47 @@
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#ifndef _SQUID_SRC_HTTP_ONE_PARSER_H
#define _SQUID_SRC_HTTP_ONE_PARSER_H
#include "anyp/ProtocolVersion.h"
#include "http/one/forward.h"
#include "http/StatusCode.h"
#include "SBuf.h"
namespace Parser {
class Tokenizer;
}
namespace Http {
namespace One {
// Parser states
enum ParseState {
- HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet
- HTTP_PARSE_FIRST, ///< HTTP/1 message first-line
- HTTP_PARSE_MIME, ///< HTTP/1 mime-header block
- HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error
+ HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet
+ HTTP_PARSE_FIRST, ///< HTTP/1 message first-line
+ HTTP_PARSE_CHUNK_SZ, ///< HTTP/1.1 chunked encoding chunk-size
+ HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext
+ HTTP_PARSE_CHUNK, ///< HTTP/1.1 chunked encoding chunk-data
+ HTTP_PARSE_MIME, ///< HTTP/1 mime-header block
+ HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error
};
/** HTTP/1.x protocol parser
*
* Works on a raw character I/O buffer and tokenizes the content into
* the major CRLF delimited segments of an HTTP/1 procotol message:
*
* \item first-line (request-line / simple-request / status-line)
* \item mime-header 0*( header-name ':' SP field-value CRLF)
*/
class Parser : public RefCountable
{
public:
typedef SBuf::size_type size_type;
Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE) {}
virtual ~Parser() {}
/// Set this parser back to a default state.
/// Will DROP any reference to a buffer (does not free).
=== renamed file 'src/ChunkedCodingParser.cc' => 'src/http/one/TeChunkedParser.cc'
--- src/ChunkedCodingParser.cc 2015-01-13 07:25:36 +0000
+++ src/http/one/TeChunkedParser.cc 2015-06-01 19:45:00 +0000
@@ -1,311 +1,214 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#include "squid.h"
#include "base/TextException.h"
-#include "ChunkedCodingParser.h"
#include "Debug.h"
+#include "http/one/TeChunkedParser.h"
+#include "http/ProtocolVersion.h"
#include "MemBuf.h"
+#include "parser/Tokenizer.h"
#include "Parsing.h"
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
-ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
-ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
-ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
-ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
-
-ChunkedCodingParser::ChunkedCodingParser()
+Http::One::TeChunkedParser::TeChunkedParser()
{
- reset();
+ // chunked encoding only exists in HTTP/1.1
+ Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
+
+ clear();
}
-void ChunkedCodingParser::reset()
+void
+Http::One::TeChunkedParser::clear()
{
- theStep = psChunkSize;
+ parsingStage_ = Http1::HTTP_PARSE_NONE;
+ buf_.clear();
theChunkSize = theLeftBodySize = 0;
- doNeedMoreData = false;
- theIn = theOut = NULL;
+ theOut = NULL;
useOriginBody = -1;
- inQuoted = inSlashed = false;
}
-bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
+bool
+Http::One::TeChunkedParser::parse(const SBuf &aBuf)
{
- Must(rawData && parsedContent);
- theIn = rawData;
- theOut = parsedContent;
-
- // we must reset this all the time so that mayContinue() lets us
- // output more content if we stopped due to needsMoreSpace() before
- doNeedMoreData = !theIn->hasContent();
+ buf_ = aBuf; // sync buffers first so calls to remaining() work properly if nothing done.
- while (mayContinue()) {
- (this->*theStep)();
- }
+ if (buf_.isEmpty()) // nothing to do (yet)
+ return false;
- return theStep == psMessageEnd;
-}
+ debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
-bool ChunkedCodingParser::needsMoreData() const
-{
- return doNeedMoreData;
-}
+ Must(!buf_.isEmpty() && theOut);
-bool ChunkedCodingParser::needsMoreSpace() const
-{
- assert(theOut);
- return theStep == psChunkBody && !theOut->hasPotentialSpace();
+ if (parsingStage_ == Http1::HTTP_PARSE_NONE)
+ parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+
+ ::Parser::Tokenizer tok(buf_);
+
+ // loop for as many chunks as we can
+ // use do-while instead of while so that we can incrementally
+ // restart in the middle of a chunk/frame
+ do {
+
+ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
+ return false;
+
+ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
+ return false;
+
+ if (parsingStage_ == Http1::HTTP_PARSE_MIME && !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
+ return false;
+
+ // loop for as many chunks as we can
+ } while (parsingStage_ == Http1::HTTP_PARSE_CHUNK_SZ && parseChunkSize(tok));
+
+ return !needsMoreData() && !needsMoreSpace();
}
-bool ChunkedCodingParser::mayContinue() const
+bool
+Http::One::TeChunkedParser::needsMoreSpace() const
{
- return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
+ assert(theOut);
+ return parsingStage_ == Http1::HTTP_PARSE_CHUNK && !theOut->hasPotentialSpace();
}
-void ChunkedCodingParser::parseChunkSize()
+/// RFC 7230 section 4.1 chunk-size
+bool
+Http::One::TeChunkedParser::parseChunkSize(::Parser::Tokenizer &tok)
{
Must(theChunkSize <= 0); // Should(), really
- const char *p = theIn->content();
- while (p < theIn->space() && xisxdigit(*p)) ++p;
- if (p >= theIn->space()) {
- doNeedMoreData = true;
- return;
- }
-
int64_t size = -1;
- if (StringToInt64(theIn->content(), size, &p, 16)) {
+ if (tok.int64(size, 16, false) && !tok.atEnd()) {
if (size < 0)
throw TexcHere("negative chunk size");
theChunkSize = theLeftBodySize = size;
debugs(94,7, "found chunk: " << theChunkSize);
- // parse chunk extensions only in the last-chunk
- if (theChunkSize)
- theStep = psUnusedChunkExtension;
- else {
- theIn->consume(p - theIn->content());
- theStep = psLastChunkExtension;
- }
- } else
- throw TexcHere("corrupted chunk size");
-}
+ buf_ = tok.remaining(); // parse checkpoint
+ parsingStage_ = Http1::HTTP_PARSE_CHUNK_EXT;
+ return true;
-void ChunkedCodingParser::parseUnusedChunkExtension()
-{
- size_t crlfBeg = 0;
- size_t crlfEnd = 0;
- if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
- inQuoted = inSlashed = false;
- theIn->consume(crlfEnd);
- theStep = theChunkSize ? psChunkBody : psTrailer;
- } else {
- theIn->consume(theIn->contentSize());
- doNeedMoreData = true;
+ } else if (tok.atEnd()) {
+ return false; // need more data
}
+
+ // else error
+ throw TexcHere("corrupted chunk size");
+ return false; // should not be reachable
}
-void ChunkedCodingParser::parseChunkBody()
+/**
+ * Parses a set of RFC 7230 section 4.1.1 chunk-ext
+ * http://tools.ietf.org/html/rfc7230#section-4.1.1
+ *
+ * chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+ * chunk-ext-name = token
+ * chunk-ext-val = token / quoted-string
+ *
+ * ICAP 'use-original-body=N' extension is supported.
+ */
+bool
+Http::One::TeChunkedParser::parseChunkExtension(::Parser::Tokenizer &tok, bool skipKnown)
{
- Must(theLeftBodySize > 0); // Should, really
+ // TODO implement a proper quoted-string Tokenizer method
+ static const CharacterSet qString = CharacterSet("qString","\"\r\n").add('\0').complement();
- const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
- const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
+ SBuf ext;
+ while (tok.skip(';') && tok.prefix(ext, CharacterSet::TCHAR)) {
- doNeedMoreData = availSize < theLeftBodySize;
- // and we may also need more space
+ // whole value part is optional. if no '=' expect next chunk-ext
+ if (tok.skip('=')) {
+
+ if (!skipKnown) {
+ if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
+ debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
+ buf_ = tok.remaining(); // parse checkpoint
+ continue;
+ }
+ }
- theOut->append(theIn->content(), safeSize);
- theIn->consume(safeSize);
- theLeftBodySize -= safeSize;
+ debugs(94, 5, "skipping unknown chunk extension " << ext);
- if (theLeftBodySize == 0)
- theStep = psChunkEnd;
- else
- Must(needsMoreData() || needsMoreSpace());
-}
-
-void ChunkedCodingParser::parseChunkEnd()
-{
- Must(theLeftBodySize == 0); // Should(), really
+ // unknown might have a value token ...
+ if (tok.skipAll(CharacterSet::TCHAR) && !tok.atEnd()) {
+ buf_ = tok.remaining(); // parse checkpoint
+ continue;
+ }
- size_t crlfBeg = 0;
- size_t crlfEnd = 0;
+ // ... or a quoted-string
+ if (tok.skipOne(CharacterSet::DQUOTE) && tok.skipAll(qString) && tok.skipOne(CharacterSet::DQUOTE)) {
+ buf_ = tok.remaining(); // parse checkpoint
+ continue;
+ }
- if (findCrlf(crlfBeg, crlfEnd)) {
- if (crlfBeg != 0) {
- throw TexcHere("found data between chunk end and CRLF");
- return;
+ // otherwise need more data OR corrupt syntax
+ break;
}
- theIn->consume(crlfEnd);
- theChunkSize = 0; // done with the current chunk
- theStep = psChunkSize;
- return;
+ if (!tok.atEnd())
+ buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
}
- doNeedMoreData = true;
-}
-
-void ChunkedCodingParser::parseTrailer()
-{
- Must(theChunkSize == 0); // Should(), really
-
- while (mayContinue())
- parseTrailerHeader();
-}
-
-void ChunkedCodingParser::parseTrailerHeader()
-{
- size_t crlfBeg = 0;
- size_t crlfEnd = 0;
-
- if (findCrlf(crlfBeg, crlfEnd)) {
+ if (tok.atEnd())
+ return false;
-#if TRAILERS_ARE_SUPPORTED
- if (crlfBeg > 0)
- theTrailer.append(theIn->content(), crlfEnd);
-#endif
-
- theIn->consume(crlfEnd);
-
- if (crlfBeg == 0)
- theStep = psMessageEnd;
-
- return;
+ if (skipLineTerminator(tok)) {
+ buf_ = tok.remaining(); // checkpoint
+ // non-0 chunk means data, 0-size means optional Trailer follows
+ parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
+ return true;
}
- doNeedMoreData = true;
-}
-
-void ChunkedCodingParser::parseMessageEnd()
-{
- // termination step, should not be called
- Must(false); // Should(), really
-}
-
-/// Finds next CRLF. Does not store parsing state.
-bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
-{
- bool quoted = false;
- bool slashed = false;
- return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
+ throw TexcHere("corrupted chunk extension value");
+ return false;
}
-/// Finds next CRLF. Parsing state stored in quoted and slashed
-/// parameters. Incremental: can resume when more data is available.
-bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed)
+bool
+Http::One::TeChunkedParser::parseChunkBody(::Parser::Tokenizer &tok)
{
- // XXX: This code was copied, with permission, from another software.
- // There is a similar and probably better code inside httpHeaderParse
- // but it seems difficult to isolate due to parsing-unrelated bloat.
- // Such isolation should probably be done before this class is used
- // for handling of traffic "more external" than ICAP.
-
- const char *buf = theIn->content();
- size_t size = theIn->contentSize();
-
- ssize_t crOff = -1;
-
- for (size_t i = 0; i < size; ++i) {
- if (slashed) {
- slashed = false;
- continue;
- }
+ Must(theLeftBodySize > 0); // Should, really
- const char c = buf[i];
+ buf_ = tok.remaining(); // sync buffers before buf_ use
- // handle quoted strings
- if (quoted) {
- if (c == '\\')
- slashed = true;
- else if (c == '"')
- quoted = false;
-
- continue;
- } else if (c == '"') {
- quoted = true;
- crOff = -1;
- continue;
- }
-
- if (crOff < 0) { // looking for the first CR or LF
+ // TODO fix type mismatches and casting for these
+ const size_t availSize = min(theLeftBodySize, (uint64_t)buf_.length());
+ const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
- if (c == '\n') {
- crlfBeg = i;
- crlfEnd = ++i;
- return true;
- }
+ theOut->append(buf_.rawContent(), safeSize);
+ buf_.consume(safeSize);
+ theLeftBodySize -= safeSize;
- if (c == '\r')
- crOff = i;
- } else { // skipping CRs, looking for the first LF
-
- if (c == '\n') {
- crlfBeg = crOff;
- crlfEnd = ++i;
- return true;
- }
+ tok.reset(buf_); // sync buffers after consume()
- if (c != '\r')
- crOff = -1;
- }
- }
+ if (theLeftBodySize == 0)
+ return parseChunkEnd(tok);
+ else
+ Must(needsMoreData() || needsMoreSpace());
- return false;
+ return true;
}
-// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
-void ChunkedCodingParser::parseLastChunkExtension()
+bool
+Http::One::TeChunkedParser::parseChunkEnd(::Parser::Tokenizer &tok)
{
- size_t crlfBeg = 0;
- size_t crlfEnd = 0;
-
- if (!findCrlf(crlfBeg, crlfEnd)) {
- doNeedMoreData = true;
- return;
- }
-
- const char *const startExt = theIn->content();
- const char *const endExt = theIn->content() + crlfBeg;
-
- // chunk-extension starts at startExt and ends with LF at endEx
- for (const char *p = startExt; p < endExt;) {
-
- while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'
-
- if (*p++ != ';') // each ext name=value pair is preceded with ';'
- break;
-
- while (*p == ' ' || *p == '\t') ++p; // skip spaces before name
+ Must(theLeftBodySize == 0); // Should(), really
- if (p >= endExt)
- break; // malformed extension: ';' without ext name=value pair
+ if (skipLineTerminator(tok)) {
+ buf_ = tok.remaining(); // parse checkpoint
+ theChunkSize = 0; // done with the current chunk
+ parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+ return true;
- const int extSize = endExt - p;
- // TODO: we need debugData() stream manipulator to dump data
- debugs(94,7, "Found chunk extension; size=" << extSize);
-
- // TODO: support implied *LWS around '='
- if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
- (void)StringToInt64(p+18, useOriginBody, &p, 10);
- debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
- break; // remove to support more than just use-original-body
- } else {
- debugs(94, 5, HERE << "skipping unknown chunk extension");
- // TODO: support quoted-string chunk-ext-val
- while (p < endExt && *p != ';') ++p; // skip until the next ';'
- }
+ } else if (!tok.atEnd()) {
+ throw TexcHere("found data between chunk end and CRLF");
}
- theIn->consume(crlfEnd);
- theStep = theChunkSize ? psChunkBody : psTrailer;
+ return false;
}
=== renamed file 'src/ChunkedCodingParser.h' => 'src/http/one/TeChunkedParser.h'
--- src/ChunkedCodingParser.h 2015-01-13 07:25:36 +0000
+++ src/http/one/TeChunkedParser.h 2015-06-01 19:46:19 +0000
@@ -1,84 +1,65 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
-#ifndef SQUID_CHUNKEDCODINGPARSER_H
-#define SQUID_CHUNKEDCODINGPARSER_H
+#ifndef SQUID_SRC_HTTP_ONE_TeChunkedParser_H
+#define SQUID_SRC_HTTP_ONE_TeChunkedParser_H
+
+#include "http/one/Parser.h"
class MemBuf;
+namespace Http
+{
+namespace One
+{
+
/**
- \ingroup ChunkEncodingAPI Chunked Encoding API
- \par
- * ChunkedCodingParser is an incremental parser for chunked transfer coding
- * used by HTTP and ICAP. The parser shovels content bytes from the raw
+ * An incremental parser for chunked transfer coding
+ * defined in RFC 7230 section 4.1.
+ * http://tools.ietf.org/html/rfc7230#section-4.1
+ *
+ * The parser shovels content bytes from the raw
* input buffer into the content output buffer, both caller-supplied.
* Ignores chunk extensions except for ICAP's ieof.
- * Has a trailer-handling placeholder.
+ * Trailers are available via mimeHeader() if wanted.
*/
-class ChunkedCodingParser
+class TeChunkedParser : public Http1::Parser
{
-
public:
- ChunkedCodingParser();
-
- void reset();
+ TeChunkedParser();
+ virtual ~TeChunkedParser() {theOut=NULL;/* we dont own this object */}
- /**
- \retval true complete success
- \retval false needs more data
- \throws ?? error.
- */
- bool parse(MemBuf *rawData, MemBuf *parsedContent);
+ /// set the buffer to be used to store decoded chunk data
+ void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
- bool needsMoreData() const;
bool needsMoreSpace() const;
-private:
- typedef void (ChunkedCodingParser::*Step)();
+ /* Http1::Parser API */
+ virtual void clear();
+ virtual bool parse(const SBuf &);
+ virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
private:
- bool mayContinue() const;
-
- void parseChunkSize();
- void parseUnusedChunkExtension();
- void parseLastChunkExtension();
- void parseChunkBeg();
- void parseChunkBody();
- void parseChunkEnd();
- void parseTrailer();
- void parseTrailerHeader();
- void parseMessageEnd();
+ bool parseChunkSize(::Parser::Tokenizer &tok);
+ bool parseChunkExtension(::Parser::Tokenizer &tok, bool skipKnown);
+ bool parseChunkBody(::Parser::Tokenizer &tok);
+ bool parseChunkEnd(::Parser::Tokenizer &tok);
- bool findCrlf(size_t &crlfBeg, size_t &crlfEnd);
- bool findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed);
-
-private:
- static Step psChunkSize;
- static Step psUnusedChunkExtension;
- static Step psLastChunkExtension;
- static Step psChunkBody;
- static Step psChunkEnd;
- static Step psTrailer;
- static Step psMessageEnd;
-
- MemBuf *theIn;
MemBuf *theOut;
-
- Step theStep;
uint64_t theChunkSize;
uint64_t theLeftBodySize;
- bool doNeedMoreData;
- bool inQuoted; ///< stores parsing state for incremental findCrlf
- bool inSlashed; ///< stores parsing state for incremental findCrlf
public:
int64_t useOriginBody;
};
-#endif /* SQUID_CHUNKEDCODINGPARSER_H */
+} // namespace One
+} // namespace Http
+
+#endif /* SQUID_SRC_HTTP_ONE_TeChunkedParser_H */
=== modified file 'src/http/one/forward.h'
--- src/http/one/forward.h 2015-01-20 12:33:23 +0000
+++ src/http/one/forward.h 2015-06-01 19:44:26 +0000
@@ -1,32 +1,34 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#ifndef SQUID_SRC_HTTP_ONE_FORWARD_H
#define SQUID_SRC_HTTP_ONE_FORWARD_H
#include "base/RefCount.h"
namespace Http {
namespace One {
class Parser;
typedef RefCount<Http::One::Parser> ParserPointer;
+class TeChunkedParser;
+
class RequestParser;
typedef RefCount<Http::One::RequestParser> RequestParserPointer;
class ResponseParser;
typedef RefCount<Http::One::ResponseParser> ResponseParserPointer;
} // namespace One
} // namespace Http
namespace Http1 = Http::One;
#endif /* SQUID_SRC_HTTP_ONE_FORWARD_H */
More information about the squid-dev
mailing list