[squid-dev] [PATCH] Parser-NG: Transfer-Encoding:chunked Parser

Amos Jeffries squid3 at treenet.co.nz
Mon Jun 1 23:21:19 UTC 2015


Remove several performance regressions incurred in earlier Parser-NG
updates by refactoring the class ChunkedCodingParser to a class
Http1::TeChunkedParser which parses an SBuf I/O buffer for chunked
encoding data and (for now) copies the chunk payloads into a MemBuf buffer.

The new class is inherited from Http1::Parser and presents the same API.
Chunk Trailers are now available via the Parser API mimeHeader() method
- although none of the rest of Squid makes use of that data yet. It
implements parsing using a ::Parser::Tokenizer for (nearly) compliant
protocol tokenization. With enumerated states instead of a dynamic
function-pointer chain.


Measurements:
 Co-Advisor shows no compliance change.
 Polygraph shows approx 1% speed improvement over trunk.

PS. earlier bugs being investigated turned out to be inherited from
trunk and are now fixed there already.

Amos
-------------- next part --------------
=== modified file 'src/Makefile.am'
--- src/Makefile.am	2015-05-26 09:18:13 +0000
+++ src/Makefile.am	2015-06-01 06:21:35 +0000
@@ -274,42 +274,40 @@
 	AccessLogEntry.h \
 	AsyncEngine.cc \
 	AsyncEngine.h \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	CacheDigest.h \
 	CacheDigest.cc \
 	cache_manager.cc \
 	NeighborTypeDomainList.h \
 	CachePeer.cc \
 	CachePeer.h \
 	CacheManager.h \
 	carp.h \
 	carp.cc \
 	cbdata.cc \
 	cbdata.h \
-	ChunkedCodingParser.cc \
-	ChunkedCodingParser.h \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side.h \
 	client_side_reply.cc \
 	client_side_reply.h \
 	client_side_request.cc \
 	client_side_request.h \
 	ClientInfo.h \
 	BodyPipe.cc \
 	BodyPipe.h \
 	ClientInfo.h \
 	ClientRequestContext.h \
 	clientStream.cc \
 	clientStream.h \
 	clientStreamForward.h \
 	CollapsedForwarding.cc \
 	CollapsedForwarding.h \
 	CompletionDispatcher.cc \
@@ -1385,41 +1383,40 @@
 	tests/stub_main_cc.cc \
 	tests/stub_ipc_Forwarder.cc \
 	tests/stub_store_stats.cc \
 	tests/stub_EventLoop.cc \
 	time.cc \
 	BodyPipe.cc \
 	cache_manager.cc \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	CachePeer.cc \
 	CachePeer.h \
 	CacheDigest.h \
 	tests/stub_CacheDigest.cc \
 	carp.h \
 	tests/stub_carp.cc \
 	cbdata.cc \
-	ChunkedCodingParser.cc \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side_reply.cc \
 	client_side_request.cc \
 	ClientInfo.h \
 	clientStream.cc \
 	tests/stub_CollapsedForwarding.cc \
 	ConfigOption.cc \
 	ConfigParser.cc \
 	CpuAffinityMap.cc \
 	CpuAffinityMap.h \
 	CpuAffinitySet.cc \
 	CpuAffinitySet.h \
 	$(DELAY_POOL_SOURCE) \
 	$(DISKIO_SOURCE) \
 	disk.h \
 	disk.cc \
 	dlink.h \
@@ -1815,41 +1812,40 @@
 	$(SQUID_CPPUNIT_LIBS)
 tests_testDns_LDFLAGS= $(LIBADD_DL)
 
 tests_testEvent_SOURCES = \
 	AccessLogEntry.cc \
 	BodyPipe.cc \
 	CacheDigest.h \
 	tests/stub_CacheDigest.cc \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	CachePeer.cc \
 	CachePeer.h \
 	cache_manager.cc \
 	carp.h \
 	tests/stub_carp.cc \
 	cbdata.cc \
-	ChunkedCodingParser.cc \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side_reply.cc \
 	client_side_request.cc \
 	ClientInfo.h \
 	clientStream.cc \
 	tests/stub_CollapsedForwarding.cc \
 	ConfigOption.cc \
 	ConfigParser.cc \
 	CpuAffinityMap.cc \
 	CpuAffinityMap.h \
 	CpuAffinitySet.cc \
 	CpuAffinitySet.h \
 	debug.cc \
 	$(DELAY_POOL_SOURCE) \
 	$(DISKIO_SOURCE) \
 	disk.h \
 	disk.cc \
@@ -2061,41 +2057,40 @@
 	$(SQUID_CPPUNIT_LA)
 
 ## Tests of the EventLoop module.
 tests_testEventLoop_SOURCES = \
 	AccessLogEntry.cc \
 	BodyPipe.cc \
 	CacheDigest.h \
 	tests/stub_CacheDigest.cc \
 	cache_manager.cc \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	CachePeer.cc \
 	CachePeer.h \
 	carp.h \
 	tests/stub_carp.cc \
 	cbdata.cc \
-	ChunkedCodingParser.cc \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side_reply.cc \
 	client_side_request.cc \
 	ClientInfo.h \
 	clientStream.cc \
 	tests/stub_CollapsedForwarding.cc \
 	ConfigOption.cc \
 	ConfigParser.cc \
 	CpuAffinityMap.cc \
 	CpuAffinityMap.h \
 	CpuAffinitySet.cc \
 	CpuAffinitySet.h \
 	debug.cc \
 	$(DELAY_POOL_SOURCE) \
 	$(DISKIO_SOURCE) \
 	disk.h \
 	disk.cc \
@@ -2305,41 +2300,40 @@
 	$(REPL_OBJS) \
 	$(SQUID_CPPUNIT_LA)
 
 tests_test_http_range_SOURCES = \
 	AccessLogEntry.cc \
 	BodyPipe.cc \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	CachePeer.cc \
 	CachePeer.h \
 	cache_manager.cc \
 	CacheDigest.h \
 	tests/stub_CacheDigest.cc \
 	carp.h \
 	tests/stub_carp.cc \
 	cbdata.cc \
-	ChunkedCodingParser.cc \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side_reply.cc \
 	client_side_request.cc \
 	ClientInfo.h \
 	clientStream.cc \
 	tests/stub_CollapsedForwarding.cc \
 	ConfigOption.cc \
 	ConfigParser.cc \
 	CpuAffinityMap.cc \
 	CpuAffinityMap.h \
 	CpuAffinitySet.cc \
 	CpuAffinitySet.h \
 	debug.cc \
 	$(DELAY_POOL_SOURCE) \
 	$(DISKIO_SOURCE) \
 	disk.h \
 	disk.cc \
@@ -2611,41 +2605,40 @@
 	tests/stub_libeui.cc \
 	tests/stub_store_stats.cc \
 	tests/stub_EventLoop.cc \
 	time.cc \
 	BodyPipe.cc \
 	cache_manager.cc \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	debug.cc \
 	CacheDigest.h \
 	tests/stub_CacheDigest.cc \
 	CachePeer.cc \
 	CachePeer.h \
 	carp.h \
 	tests/stub_carp.cc \
 	cbdata.cc \
-	ChunkedCodingParser.cc \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side_reply.cc \
 	client_side_request.cc \
 	ClientInfo.h \
 	clientStream.cc \
 	tests/stub_CollapsedForwarding.cc \
 	ConfigOption.cc \
 	ConfigParser.cc \
 	CpuAffinityMap.cc \
 	CpuAffinityMap.h \
 	CpuAffinitySet.cc \
 	CpuAffinitySet.h \
 	$(DELAY_POOL_SOURCE) \
 	disk.h \
 	disk.cc \
 	dlink.h \
 	dlink.cc \
@@ -3418,41 +3411,40 @@
 
 ## Tests of the URL module.
 ## TODO: Trim this down once the insanity is over.
 tests_testURL_SOURCES = \
 	AccessLogEntry.cc \
 	BodyPipe.cc \
 	cache_cf.h \
 	AuthReg.h \
 	YesNoNone.h \
 	YesNoNone.cc \
 	RefreshPattern.h \
 	cache_cf.cc \
 	tests/stub_cache_manager.cc \
 	CacheDigest.h \
 	tests/stub_CacheDigest.cc \
 	CachePeer.cc \
 	CachePeer.h \
 	carp.h \
 	tests/stub_carp.cc \
 	cbdata.cc \
-	ChunkedCodingParser.cc \
 	client_db.h \
 	client_db.cc \
 	client_side.h \
 	client_side.cc \
 	client_side_reply.cc \
 	client_side_request.cc \
 	ClientInfo.h \
 	clientStream.cc \
 	tests/stub_CollapsedForwarding.cc \
 	ConfigOption.cc \
 	ConfigParser.cc \
 	CpuAffinityMap.cc \
 	CpuAffinityMap.h \
 	CpuAffinitySet.cc \
 	CpuAffinitySet.h \
 	$(DELAY_POOL_SOURCE) \
 	disk.h \
 	disk.cc \
 	DiskIO/ReadRequest.cc \
 	DiskIO/WriteRequest.cc \

=== modified file 'src/adaptation/icap/ModXact.cc'
--- src/adaptation/icap/ModXact.cc	2015-04-27 09:52:02 +0000
+++ src/adaptation/icap/ModXact.cc	2015-06-01 19:48:35 +0000
@@ -5,44 +5,44 @@
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 /* DEBUG: section 93    ICAP (RFC 3507) Client */
 
 #include "squid.h"
 #include "AccessLogEntry.h"
 #include "adaptation/Answer.h"
 #include "adaptation/History.h"
 #include "adaptation/icap/Client.h"
 #include "adaptation/icap/Config.h"
 #include "adaptation/icap/History.h"
 #include "adaptation/icap/Launcher.h"
 #include "adaptation/icap/ModXact.h"
 #include "adaptation/icap/ServiceRep.h"
 #include "adaptation/Initiator.h"
 #include "auth/UserRequest.h"
 #include "base/TextException.h"
 #include "base64.h"
-#include "ChunkedCodingParser.h"
 #include "comm.h"
 #include "comm/Connection.h"
 #include "err_detail_type.h"
+#include "http/one/TeChunkedParser.h"
 #include "HttpHeaderTools.h"
 #include "HttpMsg.h"
 #include "HttpReply.h"
 #include "HttpRequest.h"
 #include "SquidTime.h"
 #include "URL.h"
 
 // flow and terminology:
 //     HTTP| --> receive --> encode --> write --> |network
 //     end | <-- send    <-- parse  <-- read  <-- |end
 
 // TODO: replace gotEncapsulated() with something faster; we call it often
 
 CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXact);
 CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXactLauncher);
 
 static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
 
 Adaptation::Icap::ModXact::State::State()
 {
@@ -1087,68 +1087,62 @@
     if (!parsed) { // need more data
         debugs(93, 5, HERE << "parse failed, need more data, return false");
         head->reset();
         return false;
     }
 
     if (HttpRequest *r = dynamic_cast<HttpRequest*>(head))
         urlCanonical(r); // parse does not set HttpRequest::canonical
 
     debugs(93, 5, HERE << "parse success, consume " << head->hdr_sz << " bytes, return true");
     readBuf.consume(head->hdr_sz);
     return true;
 }
 
 void Adaptation::Icap::ModXact::decideOnParsingBody()
 {
     if (gotEncapsulated("res-body") || gotEncapsulated("req-body")) {
         debugs(93, 5, HERE << "expecting a body");
         state.parsing = State::psBody;
         replyHttpBodySize = 0;
-        bodyParser = new ChunkedCodingParser;
+        bodyParser = new Http1::TeChunkedParser;
         makeAdaptedBodyPipe("adapted response from the ICAP server");
         Must(state.sending == State::sendingAdapted);
     } else {
         debugs(93, 5, HERE << "not expecting a body");
         stopParsing();
         stopSending(true);
     }
 }
 
 void Adaptation::Icap::ModXact::parseBody()
 {
     Must(state.parsing == State::psBody);
     Must(bodyParser);
 
     debugs(93, 5, "have " << readBuf.length() << " body bytes to parse");
 
     // the parser will throw on errors
     BodyPipeCheckout bpc(*adapted.body_pipe);
-    // XXX: performance regression. SBuf-convert (or Parser-convert?) the chunked decoder.
-    MemBuf encodedData;
-    encodedData.init();
-    // NP: we must do this instead of pointing encodedData at the SBuf::rawContent
-    // because chunked decoder uses MemBuf::consume, which shuffles buffer bytes around.
-    encodedData.append(readBuf.rawContent(), readBuf.length());
-    const bool parsed = bodyParser->parse(&encodedData, &bpc.buf);
-    // XXX: httpChunkDecoder has consumed from MemBuf.
-    readBuf.consume(readBuf.length() - encodedData.contentSize());
+    bodyParser->setPayloadBuffer(&bpc.buf);
+    const bool parsed = bodyParser->parse(readBuf);
+    readBuf = bodyParser->remaining(); // sync buffers after parse
     bpc.checkIn();
 
     debugs(93, 5, "have " << readBuf.length() << " body bytes after parsed all: " << parsed);
     replyHttpBodySize += adapted.body_pipe->buf().contentSize();
 
     // TODO: expose BodyPipe::putSize() to make this check simpler and clearer
     // TODO: do we really need this if we disable when sending headers?
     if (adapted.body_pipe->buf().contentSize() > 0) { // parsed something sometime
         disableRepeats("sent adapted content");
         disableBypass("sent adapted content", true);
     }
 
     if (parsed) {
         if (state.readyForUob && bodyParser->useOriginBody >= 0) {
             prepPartialBodyEchoing(
                 static_cast<uint64_t>(bodyParser->useOriginBody));
             stopParsing();
             return;
         }
 

=== modified file 'src/adaptation/icap/ModXact.h'
--- src/adaptation/icap/ModXact.h	2015-01-13 07:25:36 +0000
+++ src/adaptation/icap/ModXact.h	2015-06-01 19:48:29 +0000
@@ -1,49 +1,48 @@
 /*
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #ifndef SQUID_ICAPMODXACT_H
 #define SQUID_ICAPMODXACT_H
 
 #include "AccessLogEntry.h"
 #include "adaptation/icap/InOut.h"
 #include "adaptation/icap/Launcher.h"
 #include "adaptation/icap/Xaction.h"
 #include "BodyPipe.h"
+#include "http/one/forward.h"
 
 /*
  * ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
  * ICAPXaction as the base. The ICAPModXact receives a virgin HTTP message
  * from an ICAP vecoring point, (a.k.a., initiator), communicates with the
  * ICAP server, and sends the adapted HTTP message headers back.
  * Virgin/adapted HTTP message body is reveived/sent using BodyPipe
  * interface. The initiator (or its associate) is expected to send and/or
  * receive the HTTP body.
  */
 
-class ChunkedCodingParser;
-
 namespace Adaptation
 {
 namespace Icap
 {
 
 // estimated future presence and size of something (e.g., HTTP body)
 
 class SizedEstimate
 {
 
 public:
     SizedEstimate(); // not expected by default
     void expect(int64_t aSize); // expect with any, even unknown size
     bool expected() const;
 
     /* other members can be accessed iff expected() */
 
     bool knownSize() const;
     uint64_t size() const; // can be accessed iff knownSize()
 
@@ -233,41 +232,41 @@
     void stopBackup();
 
     virtual void fillPendingStatus(MemBuf &buf) const;
     virtual void fillDoneStatus(MemBuf &buf) const;
     virtual bool fillVirginHttpHeader(MemBuf&) const;
 
 private:
     void packHead(MemBuf &httpBuf, const HttpMsg *head);
     void encapsulateHead(MemBuf &icapBuf, const char *section, MemBuf &httpBuf, const HttpMsg *head);
     bool gotEncapsulated(const char *section) const;
     void checkConsuming();
 
     virtual void finalizeLogInfo();
 
     SizedEstimate virginBody;
     VirginBodyAct virginBodyWriting; // virgin body writing state
     VirginBodyAct virginBodySending;  // virgin body sending state
     uint64_t virginConsumed;        // virgin data consumed so far
     Preview preview; // use for creating (writing) the preview
 
-    ChunkedCodingParser *bodyParser; // ICAP response body parser
+    Http1::TeChunkedParser *bodyParser; // ICAP response body parser
 
     bool canStartBypass; // enables bypass of transaction failures
     bool protectGroupBypass; // protects ServiceGroup-wide bypass of failures
 
     /**
      * size of HTTP header in ICAP reply or -1 if there is not any encapsulated
      * message data
      */
     int64_t replyHttpHeaderSize;
     /**
      * size of dechunked HTTP body in ICAP reply or -1 if there is not any
      * encapsulated message data
      */
     int64_t replyHttpBodySize;
 
     int adaptHistoryId; ///< adaptation history slot reservation
 
     class State
     {
 

=== modified file 'src/client_side.cc'
--- src/client_side.cc	2015-05-26 17:25:04 +0000
+++ src/client_side.cc	2015-06-01 19:48:10 +0000
@@ -46,64 +46,64 @@
  * The request is *immediately* kicked off, and data flows through
  * to clientSocketRecipient.
  *
  \par
  * If the data that arrives at clientSocketRecipient is not for the current
  * request, clientSocketRecipient simply returns, without requesting more
  * data, or sending it.
  *
  \par
  * ClientKeepAliveNextRequest will then detect the presence of data in
  * the next ClientHttpRequest, and will send it, restablishing the
  * data flow.
  */
 
 #include "squid.h"
 #include "acl/FilledChecklist.h"
 #include "anyp/PortCfg.h"
 #include "base/Subscription.h"
 #include "base/TextException.h"
 #include "CachePeer.h"
-#include "ChunkedCodingParser.h"
 #include "client_db.h"
 #include "client_side.h"
 #include "client_side_reply.h"
 #include "client_side_request.h"
 #include "ClientRequestContext.h"
 #include "clientStream.h"
 #include "comm.h"
 #include "comm/Connection.h"
 #include "comm/Loops.h"
 #include "comm/Read.h"
 #include "comm/TcpAcceptor.h"
 #include "comm/Write.h"
 #include "CommCalls.h"
 #include "errorpage.h"
 #include "fd.h"
 #include "fde.h"
 #include "fqdncache.h"
 #include "FwdState.h"
 #include "globals.h"
 #include "helper.h"
 #include "helper/Reply.h"
 #include "http.h"
 #include "http/one/RequestParser.h"
+#include "http/one/TeChunkedParser.h"
 #include "HttpHdrContRange.h"
 #include "HttpHeaderTools.h"
 #include "HttpReply.h"
 #include "HttpRequest.h"
 #include "ident/Config.h"
 #include "ident/Ident.h"
 #include "internal.h"
 #include "ipc/FdNotes.h"
 #include "ipc/StartListening.h"
 #include "log/access_log.h"
 #include "MemBuf.h"
 #include "MemObject.h"
 #include "mime_header.h"
 #include "parser/Tokenizer.h"
 #include "profiler/Profiler.h"
 #include "rfc1738.h"
 #include "servers/forward.h"
 #include "SquidConfig.h"
 #include "SquidTime.h"
 #include "StatCounters.h"
@@ -3188,96 +3188,89 @@
 ConnStateData::handleReadData()
 {
     // if we are reading a body, stuff data into the body pipe
     if (bodyPipe != NULL)
         return handleRequestBodyData();
     return true;
 }
 
 /**
  * called when new request body data has been buffered in in.buf
  * may close the connection if we were closing and piped everything out
  *
  * \retval false called comm_close or setReplyToError (the caller should bail)
  * \retval true  we did not call comm_close or setReplyToError
  */
 bool
 ConnStateData::handleRequestBodyData()
 {
     assert(bodyPipe != NULL);
 
-    size_t putSize = 0;
-
     if (in.bodyParser) { // chunked encoding
-        if (const err_type error = handleChunkedRequestBody(putSize)) {
+        if (const err_type error = handleChunkedRequestBody()) {
             abortChunkedRequestBody(error);
             return false;
         }
     } else { // identity encoding
         debugs(33,5, HERE << "handling plain request body for " << clientConnection);
-        putSize = bodyPipe->putMoreData(in.buf.c_str(), in.buf.length());
+        const size_t putSize = bodyPipe->putMoreData(in.buf.c_str(), in.buf.length());
+        if (putSize > 0)
+            consumeInput(putSize);
+
         if (!bodyPipe->mayNeedMoreData()) {
             // BodyPipe will clear us automagically when we produced everything
             bodyPipe = NULL;
         }
     }
 
-    if (putSize > 0)
-        consumeInput(putSize);
-
     if (!bodyPipe) {
         debugs(33,5, HERE << "produced entire request body for " << clientConnection);
 
         if (const char *reason = stoppedSending()) {
             /* we've finished reading like good clients,
              * now do the close that initiateClose initiated.
              */
             debugs(33, 3, HERE << "closing for earlier sending error: " << reason);
             clientConnection->close();
             return false;
         }
     }
 
     return true;
 }
 
 /// parses available chunked encoded body bytes, checks size, returns errors
 err_type
-ConnStateData::handleChunkedRequestBody(size_t &putSize)
+ConnStateData::handleChunkedRequestBody()
 {
     debugs(33, 7, "chunked from " << clientConnection << ": " << in.buf.length());
 
     try { // the parser will throw on errors
 
         if (in.buf.isEmpty()) // nothing to do
             return ERR_NONE;
 
-        MemBuf raw; // ChunkedCodingParser only works with MemBufs
-        // add one because MemBuf will assert if it cannot 0-terminate
-        raw.init(in.buf.length(), in.buf.length()+1);
-        raw.append(in.buf.c_str(), in.buf.length());
-
-        const mb_size_t wasContentSize = raw.contentSize();
         BodyPipeCheckout bpc(*bodyPipe);
-        const bool parsed = in.bodyParser->parse(&raw, &bpc.buf);
+        in.bodyParser->setPayloadBuffer(&bpc.buf);
+        const bool parsed = in.bodyParser->parse(in.buf);
+        in.buf = in.bodyParser->remaining(); // sync buffers
         bpc.checkIn();
-        putSize = wasContentSize - raw.contentSize();
 
         // dechunk then check: the size limit applies to _dechunked_ content
         if (clientIsRequestBodyTooLargeForPolicy(bodyPipe->producedSize()))
             return ERR_TOO_BIG;
 
         if (parsed) {
             finishDechunkingRequest(true);
             Must(!bodyPipe);
             return ERR_NONE; // nil bodyPipe implies body end for the caller
         }
 
         // if chunk parser needs data, then the body pipe must need it too
         Must(!in.bodyParser->needsMoreData() || bodyPipe->mayNeedMoreData());
 
         // if parser needs more space and we can consume nothing, we will stall
         Must(!in.bodyParser->needsMoreSpace() || bodyPipe->buf().hasContent());
     } catch (...) { // TODO: be more specific
         debugs(33, 3, HERE << "malformed chunks" << bodyPipe->status());
         return ERR_INVALID_REQ;
     }
@@ -4692,41 +4685,41 @@
         clientConnection->close();
     }
 }
 
 void
 ConnStateData::expectNoForwarding()
 {
     if (bodyPipe != NULL) {
         debugs(33, 4, HERE << "no consumer for virgin body " << bodyPipe->status());
         bodyPipe->expectNoConsumption();
     }
 }
 
 /// initialize dechunking state
 void
 ConnStateData::startDechunkingRequest()
 {
     Must(bodyPipe != NULL);
     debugs(33, 5, HERE << "start dechunking" << bodyPipe->status());
     assert(!in.bodyParser);
-    in.bodyParser = new ChunkedCodingParser;
+    in.bodyParser = new Http1::TeChunkedParser;
 }
 
 /// put parsed content into input buffer and clean up
 void
 ConnStateData::finishDechunkingRequest(bool withSuccess)
 {
     debugs(33, 5, HERE << "finish dechunking: " << withSuccess);
 
     if (bodyPipe != NULL) {
         debugs(33, 7, HERE << "dechunked tail: " << bodyPipe->status());
         BodyPipe::Pointer myPipe = bodyPipe;
         stopProducingFor(bodyPipe, withSuccess); // sets bodyPipe->bodySize()
         Must(!bodyPipe); // we rely on it being nil after we are done with body
         if (withSuccess) {
             Must(myPipe->bodySizeKnown());
             ClientSocketContext::Pointer context = getCurrentContext();
             if (context != NULL && context->http && context->http->request)
                 context->http->request->setContentLength(myPipe->bodySize());
         }
     }

=== modified file 'src/client_side.h'
--- src/client_side.h	2015-04-10 08:54:13 +0000
+++ src/client_side.h	2015-06-01 19:48:21 +0000
@@ -11,41 +11,40 @@
 #ifndef SQUID_CLIENTSIDE_H
 #define SQUID_CLIENTSIDE_H
 
 #include "clientStreamForward.h"
 #include "comm.h"
 #include "helper/forward.h"
 #include "http/forward.h"
 #include "HttpControlMsg.h"
 #include "ipc/FdNotes.h"
 #include "SBuf.h"
 #if USE_AUTH
 #include "auth/UserRequest.h"
 #endif
 #if USE_OPENSSL
 #include "ssl/support.h"
 #endif
 
 class ConnStateData;
 class ClientHttpRequest;
 class clientStreamNode;
-class ChunkedCodingParser;
 namespace AnyP
 {
 class PortCfg;
 } // namespace Anyp
 
 /**
  * Badly named.
  * This is in fact the processing context for a single HTTP request.
  *
  * Managing what has been done, and what happens next to the data buffer
  * holding what we hope is an HTTP request.
  *
  * Parsing is still a mess of global functions done in conjunction with the
  * real socket controller which generated ClientHttpRequest.
  * It also generates one of us and passes us control from there based on
  * the results of the parse.
  *
  * After that all the request interpretation and adaptation is in our scope.
  * Then finally the reply fetcher is created by this and we get the result
  * back. Which we then have to manage writing of it to the ConnStateData.
@@ -191,41 +190,41 @@
     void receivedFirstByte();
 
     // HttpControlMsgSink API
     virtual void sendControlMsg(HttpControlMsg msg);
 
     // Client TCP connection details from comm layer.
     Comm::ConnectionPointer clientConnection;
 
     /**
      * The transfer protocol currently being spoken on this connection.
      * HTTP/1 CONNECT and HTTP/2 SETTINGS offers the ability to change
      * protocols on the fly.
      */
     AnyP::ProtocolVersion transferProtocol;
 
     struct In {
         In();
         ~In();
         bool maybeMakeSpaceAvailable();
 
-        ChunkedCodingParser *bodyParser; ///< parses chunked request body
+        Http1::TeChunkedParser *bodyParser; ///< parses chunked request body
         SBuf buf;
     } in;
 
     /** number of body bytes we need to comm_read for the "current" request
      *
      * \retval 0         We do not need to read any [more] body bytes
      * \retval negative  May need more but do not know how many; could be zero!
      * \retval positive  Need to read exactly that many more body bytes
      */
     int64_t mayNeedToReadMoreBody() const;
 
 #if USE_AUTH
     /**
      * Fetch the user details for connection based authentication
      * NOTE: this is ONLY connection based because NTLM and Negotiate is against HTTP spec.
      */
     const Auth::UserRequest::Pointer &getAuth() const { return auth_; }
 
     /**
      * Set the user details for connection-based authentication to use from now until connection closure.
@@ -406,41 +405,41 @@
 
     /// ClientStream calls this to supply response header (once) and data
     /// for the current ClientSocketContext.
     virtual void handleReply(HttpReply *header, StoreIOBuffer receivedData) = 0;
 
     /// remove no longer needed leading bytes from the input buffer
     void consumeInput(const size_t byteCount);
 
     /* TODO: Make the methods below (at least) non-public when possible. */
 
     /// stop parsing the request and create context for relaying error info
     ClientSocketContext *abortRequestParsing(const char *const errUri);
 
     /// client data which may need to forward as-is to server after an
     /// on_unsupported_protocol tunnel decision.
     SBuf preservedClientData;
 protected:
     void startDechunkingRequest();
     void finishDechunkingRequest(bool withSuccess);
     void abortChunkedRequestBody(const err_type error);
-    err_type handleChunkedRequestBody(size_t &putSize);
+    err_type handleChunkedRequestBody();
 
     void startPinnedConnectionMonitoring();
     void clientPinnedConnectionRead(const CommIoCbParams &io);
 
     /// parse input buffer prefix into a single transfer protocol request
     /// return NULL to request more header bytes (after checking any limits)
     /// use abortRequestParsing() to handle parsing errors w/o creating request
     virtual ClientSocketContext *parseOneRequest() = 0;
 
     /// start processing a freshly parsed request
     virtual void processParsedRequest(ClientSocketContext *context) = 0;
 
     /// returning N allows a pipeline of 1+N requests (see pipeline_prefetch)
     virtual int pipelinePrefetchMax() const;
 
     /// timeout to use when waiting for the next request
     virtual time_t idleTimeout() const = 0;
 
     BodyPipe::Pointer bodyPipe; ///< set when we are reading request body
 

=== modified file 'src/http.cc'
--- src/http.cc	2015-04-27 09:52:02 +0000
+++ src/http.cc	2015-06-01 19:47:47 +0000
@@ -2,53 +2,53 @@
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 /* DEBUG: section 11    Hypertext Transfer Protocol (HTTP) */
 
 /*
  * Anonymizing patch by lutz at as-node.jena.thur.de
  * have a look into http-anon.c to get more informations.
  */
 
 #include "squid.h"
 #include "acl/FilledChecklist.h"
 #include "base/AsyncJobCalls.h"
 #include "base/TextException.h"
 #include "base64.h"
 #include "CachePeer.h"
-#include "ChunkedCodingParser.h"
 #include "client_side.h"
 #include "comm/Connection.h"
 #include "comm/Read.h"
 #include "comm/Write.h"
 #include "CommRead.h"
 #include "err_detail_type.h"
 #include "errorpage.h"
 #include "fd.h"
 #include "fde.h"
 #include "globals.h"
 #include "http.h"
 #include "http/one/ResponseParser.h"
+#include "http/one/TeChunkedParser.h"
 #include "HttpControlMsg.h"
 #include "HttpHdrCc.h"
 #include "HttpHdrContRange.h"
 #include "HttpHdrSc.h"
 #include "HttpHdrScTarget.h"
 #include "HttpHeaderTools.h"
 #include "HttpReply.h"
 #include "HttpRequest.h"
 #include "HttpStateFlags.h"
 #include "log/access_log.h"
 #include "MemBuf.h"
 #include "MemObject.h"
 #include "neighbors.h"
 #include "peer_proxy_negotiate_auth.h"
 #include "profiler/Profiler.h"
 #include "refresh.h"
 #include "RefreshPattern.h"
 #include "rfc1738.h"
 #include "SquidConfig.h"
 #include "SquidTime.h"
@@ -771,41 +771,41 @@
         newrep->sline.version.protocol = hp->messageProtocol().protocol;
         newrep->sline.version.major = hp->messageProtocol().major;
         newrep->sline.version.minor = hp->messageProtocol().minor;
         debugs(11, 2, "error parsing response headers mime block");
     }
 
     // done with Parser, now process using the HttpReply
     hp = NULL;
 
     newrep->removeStaleWarnings();
 
     if (newrep->sline.protocol == AnyP::PROTO_HTTP && newrep->sline.status() >= 100 && newrep->sline.status() < 200) {
         handle1xx(newrep);
         ctx_exit(ctx);
         return;
     }
 
     flags.chunked = false;
     if (newrep->sline.protocol == AnyP::PROTO_HTTP && newrep->header.chunked()) {
         flags.chunked = true;
-        httpChunkDecoder = new ChunkedCodingParser;
+        httpChunkDecoder = new Http1::TeChunkedParser;
     }
 
     if (!peerSupportsConnectionPinning())
         request->flags.connectionAuthDisabled = true;
 
     HttpReply *vrep = setVirginReply(newrep);
     flags.headers_parsed = true;
 
     keepaliveAccounting(vrep);
 
     checkDateSkew(vrep);
 
     processSurrogateControl (vrep);
 
     request->hier.peer_reply_status = newrep->sline.status();
 
     ctx_exit(ctx);
 }
 
 /// ignore or start forwarding the 1xx response (a.k.a., control message)
@@ -1376,49 +1376,43 @@
 HttpStateData::writeReplyBody()
 {
     truncateVirginBody(); // if needed
     const char *data = inBuf.rawContent();
     int len = inBuf.length();
     addVirginReplyBody(data, len);
     inBuf.consume(len);
 }
 
 bool
 HttpStateData::decodeAndWriteReplyBody()
 {
     const char *data = NULL;
     int len;
     bool wasThereAnException = false;
     assert(flags.chunked);
     assert(httpChunkDecoder);
     SQUID_ENTER_THROWING_CODE();
     MemBuf decodedData;
     decodedData.init();
-    // XXX: performance regression. SBuf-convert (or Parser-convert?) the chunked decoder.
-    MemBuf encodedData;
-    encodedData.init();
-    // NP: we must do this instead of pointing encodedData at the SBuf::rawContent
-    // because chunked decoder uses MemBuf::consume, which shuffles buffer bytes around.
-    encodedData.append(inBuf.rawContent(), inBuf.length());
-    const bool doneParsing = httpChunkDecoder->parse(&encodedData,&decodedData);
-    // XXX: httpChunkDecoder has consumed from MemBuf.
-    inBuf.consume(inBuf.length() - encodedData.contentSize());
+    httpChunkDecoder->setPayloadBuffer(&decodedData);
+    const bool doneParsing = httpChunkDecoder->parse(inBuf);
+    inBuf = httpChunkDecoder->remaining(); // sync buffers after parse
     len = decodedData.contentSize();
     data=decodedData.content();
     addVirginReplyBody(data, len);
     if (doneParsing) {
         lastChunk = 1;
         flags.do_next_read = false;
     }
     SQUID_EXIT_THROWING_CODE(wasThereAnException);
     return wasThereAnException;
 }
 
 /**
  * processReplyBody has two purposes:
  *  1 - take the reply body data, if any, and put it into either
  *      the StoreEntry, or give it over to ICAP.
  *  2 - see if we made it to the end of the response (persistent
  *      connections and such)
  */
 void
 HttpStateData::processReplyBody()

=== modified file 'src/http.h'
--- src/http.h	2015-03-17 02:53:05 +0000
+++ src/http.h	2015-06-01 19:47:33 +0000
@@ -1,36 +1,36 @@
 /*
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #ifndef SQUID_HTTP_H
 #define SQUID_HTTP_H
 
 #include "clients/Client.h"
 #include "comm.h"
+#include "http/forward.h"
 #include "HttpStateFlags.h"
 
-class ChunkedCodingParser;
 class FwdState;
 class HttpHeader;
 
 class HttpStateData : public Client
 {
     CBDATA_CLASS(HttpStateData);
 
 public:
     HttpStateData(FwdState *);
     ~HttpStateData();
 
     static void httpBuildRequestHeader(HttpRequest * request,
                                        StoreEntry * entry,
                                        const AccessLogEntryPointer &al,
                                        HttpHeader * hdr_out,
                                        const HttpStateFlags &flags);
 
     virtual const Comm::ConnectionPointer & dataConnection() const;
     /* should be private */
     bool sendRequest();
@@ -103,34 +103,34 @@
     virtual void handleRequestBodyProducerAborted();
 
     void writeReplyBody();
     bool decodeAndWriteReplyBody();
     bool finishingBrokenPost();
     bool finishingChunkedRequest();
     void doneSendingRequestBody();
     void requestBodyHandler(MemBuf &);
     virtual void sentRequestBody(const CommIoCbParams &io);
     void wroteLast(const CommIoCbParams &io);
     void sendComplete();
     void httpStateConnClosed(const CommCloseCbParams &params);
     void httpTimeout(const CommTimeoutCbParams &params);
 
     mb_size_t buildRequestPrefix(MemBuf * mb);
     static bool decideIfWeDoRanges (HttpRequest * orig_request);
     bool peerSupportsConnectionPinning() const;
 
     /// Parser being used at present to parse the HTTP/ICY server response.
     Http1::ResponseParserPointer hp;
-    ChunkedCodingParser *httpChunkDecoder;
+    Http1::TeChunkedParser *httpChunkDecoder;
 
     /// amount of message payload/body received so far.
     int64_t payloadSeen;
     /// positive when we read more than we wanted
     int64_t payloadTruncated;
 };
 
 int httpCachable(const HttpRequestMethod&);
 void httpStart(FwdState *);
 const char *httpMakeVaryMark(HttpRequest * request, HttpReply const * reply);
 
 #endif /* SQUID_HTTP_H */
 

=== modified file 'src/http/one/Makefile.am'
--- src/http/one/Makefile.am	2015-01-20 12:33:23 +0000
+++ src/http/one/Makefile.am	2015-06-01 19:43:57 +0000
@@ -1,20 +1,22 @@
 ## Copyright (C) 1996-2015 The Squid Software Foundation and contributors
 ##
 ## Squid software is distributed under GPLv2+ license and includes
 ## contributions from numerous individuals and organizations.
 ## Please see the COPYING and CONTRIBUTORS files for details.
 ##
 
 include $(top_srcdir)/src/Common.am
 include $(top_srcdir)/src/TestHeaders.am
 
 noinst_LTLIBRARIES = libhttp1.la
 
 libhttp1_la_SOURCES = \
 	forward.h \
 	Parser.cc \
 	Parser.h \
 	RequestParser.cc \
 	RequestParser.h \
 	ResponseParser.cc \
-	ResponseParser.h
+	ResponseParser.h \
+	TeChunkedParser.cc \
+	TeChunkedParser.h

=== modified file 'src/http/one/Parser.h'
--- src/http/one/Parser.h	2015-02-20 03:25:12 +0000
+++ src/http/one/Parser.h	2015-04-12 11:05:50 +0000
@@ -6,44 +6,47 @@
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #ifndef _SQUID_SRC_HTTP_ONE_PARSER_H
 #define _SQUID_SRC_HTTP_ONE_PARSER_H
 
 #include "anyp/ProtocolVersion.h"
 #include "http/one/forward.h"
 #include "http/StatusCode.h"
 #include "SBuf.h"
 
 namespace Parser {
 class Tokenizer;
 }
 
 namespace Http {
 namespace One {
 
 // Parser states
 enum ParseState {
-    HTTP_PARSE_NONE,     ///< initialized, but nothing usefully parsed yet
-    HTTP_PARSE_FIRST,    ///< HTTP/1 message first-line
-    HTTP_PARSE_MIME,     ///< HTTP/1 mime-header block
-    HTTP_PARSE_DONE      ///< parsed a message header, or reached a terminal syntax error
+    HTTP_PARSE_NONE,      ///< initialized, but nothing usefully parsed yet
+    HTTP_PARSE_FIRST,     ///< HTTP/1 message first-line
+    HTTP_PARSE_CHUNK_SZ,  ///< HTTP/1.1 chunked encoding chunk-size
+    HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext
+    HTTP_PARSE_CHUNK,     ///< HTTP/1.1 chunked encoding chunk-data
+    HTTP_PARSE_MIME,      ///< HTTP/1 mime-header block
+    HTTP_PARSE_DONE       ///< parsed a message header, or reached a terminal syntax error
 };
 
 /** HTTP/1.x protocol parser
  *
  * Works on a raw character I/O buffer and tokenizes the content into
  * the major CRLF delimited segments of an HTTP/1 procotol message:
  *
  * \item first-line (request-line / simple-request / status-line)
  * \item mime-header 0*( header-name ':' SP field-value CRLF)
  */
 class Parser : public RefCountable
 {
 public:
     typedef SBuf::size_type size_type;
 
     Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE) {}
     virtual ~Parser() {}
 
     /// Set this parser back to a default state.
     /// Will DROP any reference to a buffer (does not free).

=== renamed file 'src/ChunkedCodingParser.cc' => 'src/http/one/TeChunkedParser.cc'
--- src/ChunkedCodingParser.cc	2015-01-13 07:25:36 +0000
+++ src/http/one/TeChunkedParser.cc	2015-06-01 19:45:00 +0000
@@ -1,311 +1,214 @@
 /*
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #include "squid.h"
 #include "base/TextException.h"
-#include "ChunkedCodingParser.h"
 #include "Debug.h"
+#include "http/one/TeChunkedParser.h"
+#include "http/ProtocolVersion.h"
 #include "MemBuf.h"
+#include "parser/Tokenizer.h"
 #include "Parsing.h"
 
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
-ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
-ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
-ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
-ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
-
-ChunkedCodingParser::ChunkedCodingParser()
+Http::One::TeChunkedParser::TeChunkedParser()
 {
-    reset();
+    // chunked encoding only exists in HTTP/1.1
+    Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
+
+    clear();
 }
 
-void ChunkedCodingParser::reset()
+void
+Http::One::TeChunkedParser::clear()
 {
-    theStep = psChunkSize;
+    parsingStage_ = Http1::HTTP_PARSE_NONE;
+    buf_.clear();
     theChunkSize = theLeftBodySize = 0;
-    doNeedMoreData = false;
-    theIn = theOut = NULL;
+    theOut = NULL;
     useOriginBody = -1;
-    inQuoted = inSlashed = false;
 }
 
-bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
+bool
+Http::One::TeChunkedParser::parse(const SBuf &aBuf)
 {
-    Must(rawData && parsedContent);
-    theIn = rawData;
-    theOut = parsedContent;
-
-    // we must reset this all the time so that mayContinue() lets us
-    // output more content if we stopped due to needsMoreSpace() before
-    doNeedMoreData = !theIn->hasContent();
+    buf_ = aBuf; // sync buffers first so calls to remaining() work properly if nothing done.
 
-    while (mayContinue()) {
-        (this->*theStep)();
-    }
+    if (buf_.isEmpty()) // nothing to do (yet)
+        return false;
 
-    return theStep == psMessageEnd;
-}
+    debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 
-bool ChunkedCodingParser::needsMoreData() const
-{
-    return doNeedMoreData;
-}
+    Must(!buf_.isEmpty() && theOut);
 
-bool ChunkedCodingParser::needsMoreSpace() const
-{
-    assert(theOut);
-    return theStep == psChunkBody && !theOut->hasPotentialSpace();
+    if (parsingStage_ == Http1::HTTP_PARSE_NONE)
+        parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+
+    ::Parser::Tokenizer tok(buf_);
+
+    // loop for as many chunks as we can
+    // use do-while instead of while so that we can incrementally
+    // restart in the middle of a chunk/frame
+    do {
+
+        if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
+            return false;
+
+        if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
+            return false;
+
+        if (parsingStage_ == Http1::HTTP_PARSE_MIME && !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
+            return false;
+
+        // loop for as many chunks as we can
+    } while (parsingStage_ == Http1::HTTP_PARSE_CHUNK_SZ && parseChunkSize(tok));
+
+    return !needsMoreData() && !needsMoreSpace();
 }
 
-bool ChunkedCodingParser::mayContinue() const
+bool
+Http::One::TeChunkedParser::needsMoreSpace() const
 {
-    return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
+    assert(theOut);
+    return parsingStage_ == Http1::HTTP_PARSE_CHUNK && !theOut->hasPotentialSpace();
 }
 
-void ChunkedCodingParser::parseChunkSize()
+/// RFC 7230 section 4.1 chunk-size
+bool
+Http::One::TeChunkedParser::parseChunkSize(::Parser::Tokenizer &tok)
 {
     Must(theChunkSize <= 0); // Should(), really
 
-    const char *p = theIn->content();
-    while (p < theIn->space() && xisxdigit(*p)) ++p;
-    if (p >= theIn->space()) {
-        doNeedMoreData = true;
-        return;
-    }
-
     int64_t size = -1;
-    if (StringToInt64(theIn->content(), size, &p, 16)) {
+    if (tok.int64(size, 16, false) && !tok.atEnd()) {
         if (size < 0)
             throw TexcHere("negative chunk size");
 
         theChunkSize = theLeftBodySize = size;
         debugs(94,7, "found chunk: " << theChunkSize);
-        // parse chunk extensions only in the last-chunk
-        if (theChunkSize)
-            theStep = psUnusedChunkExtension;
-        else {
-            theIn->consume(p - theIn->content());
-            theStep = psLastChunkExtension;
-        }
-    } else
-        throw TexcHere("corrupted chunk size");
-}
+        buf_ = tok.remaining(); // parse checkpoint
+        parsingStage_ = Http1::HTTP_PARSE_CHUNK_EXT;
+        return true;
 
-void ChunkedCodingParser::parseUnusedChunkExtension()
-{
-    size_t crlfBeg = 0;
-    size_t crlfEnd = 0;
-    if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
-        inQuoted = inSlashed = false;
-        theIn->consume(crlfEnd);
-        theStep = theChunkSize ? psChunkBody : psTrailer;
-    } else {
-        theIn->consume(theIn->contentSize());
-        doNeedMoreData = true;
+    } else if (tok.atEnd()) {
+        return false; // need more data
     }
+
+    // else error
+    throw TexcHere("corrupted chunk size");
+    return false; // should not be reachable
 }
 
-void ChunkedCodingParser::parseChunkBody()
+/**
+ * Parses a set of RFC 7230 section 4.1.1 chunk-ext
+ * http://tools.ietf.org/html/rfc7230#section-4.1.1
+ *
+ *   chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+ *   chunk-ext-name = token
+ *   chunk-ext-val  = token / quoted-string
+ *
+ * ICAP 'use-original-body=N' extension is supported.
+ */
+bool
+Http::One::TeChunkedParser::parseChunkExtension(::Parser::Tokenizer &tok, bool skipKnown)
 {
-    Must(theLeftBodySize > 0); // Should, really
+    // TODO implement a proper quoted-string Tokenizer method
+    static const CharacterSet qString = CharacterSet("qString","\"\r\n").add('\0').complement();
 
-    const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
-    const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
+    SBuf ext;
+    while (tok.skip(';') && tok.prefix(ext, CharacterSet::TCHAR)) {
 
-    doNeedMoreData = availSize < theLeftBodySize;
-    // and we may also need more space
+        // whole value part is optional. if no '=' expect next chunk-ext
+        if (tok.skip('=')) {
+
+            if (!skipKnown) {
+                if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
+                    debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
+                    buf_ = tok.remaining(); // parse checkpoint
+                    continue;
+                }
+            }
 
-    theOut->append(theIn->content(), safeSize);
-    theIn->consume(safeSize);
-    theLeftBodySize -= safeSize;
+            debugs(94, 5, "skipping unknown chunk extension " << ext);
 
-    if (theLeftBodySize == 0)
-        theStep = psChunkEnd;
-    else
-        Must(needsMoreData() || needsMoreSpace());
-}
-
-void ChunkedCodingParser::parseChunkEnd()
-{
-    Must(theLeftBodySize == 0); // Should(), really
+            // unknown might have a value token ...
+            if (tok.skipAll(CharacterSet::TCHAR) && !tok.atEnd()) {
+                buf_ = tok.remaining(); // parse checkpoint
+                continue;
+            }
 
-    size_t crlfBeg = 0;
-    size_t crlfEnd = 0;
+            // ... or a quoted-string
+            if (tok.skipOne(CharacterSet::DQUOTE) && tok.skipAll(qString) && tok.skipOne(CharacterSet::DQUOTE)) {
+                buf_ = tok.remaining(); // parse checkpoint
+                continue;
+            }
 
-    if (findCrlf(crlfBeg, crlfEnd)) {
-        if (crlfBeg != 0) {
-            throw TexcHere("found data between chunk end and CRLF");
-            return;
+            // otherwise need more data OR corrupt syntax
+            break;
         }
 
-        theIn->consume(crlfEnd);
-        theChunkSize = 0; // done with the current chunk
-        theStep = psChunkSize;
-        return;
+        if (!tok.atEnd())
+            buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
     }
 
-    doNeedMoreData = true;
-}
-
-void ChunkedCodingParser::parseTrailer()
-{
-    Must(theChunkSize == 0); // Should(), really
-
-    while (mayContinue())
-        parseTrailerHeader();
-}
-
-void ChunkedCodingParser::parseTrailerHeader()
-{
-    size_t crlfBeg = 0;
-    size_t crlfEnd = 0;
-
-    if (findCrlf(crlfBeg, crlfEnd)) {
+    if (tok.atEnd())
+        return false;
 
-#if TRAILERS_ARE_SUPPORTED
-        if (crlfBeg > 0)
-            theTrailer.append(theIn->content(), crlfEnd);
-#endif
-
-        theIn->consume(crlfEnd);
-
-        if (crlfBeg == 0)
-            theStep = psMessageEnd;
-
-        return;
+    if (skipLineTerminator(tok)) {
+        buf_ = tok.remaining(); // checkpoint
+        // non-0 chunk means data, 0-size means optional Trailer follows
+        parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
+        return true;
     }
 
-    doNeedMoreData = true;
-}
-
-void ChunkedCodingParser::parseMessageEnd()
-{
-    // termination step, should not be called
-    Must(false); // Should(), really
-}
-
-/// Finds next CRLF. Does not store parsing state.
-bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
-{
-    bool quoted = false;
-    bool slashed = false;
-    return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
+    throw TexcHere("corrupted chunk extension value");
+    return false;
 }
 
-/// Finds next CRLF. Parsing state stored in quoted and slashed
-/// parameters. Incremental: can resume when more data is available.
-bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed)
+bool
+Http::One::TeChunkedParser::parseChunkBody(::Parser::Tokenizer &tok)
 {
-    // XXX: This code was copied, with permission, from another software.
-    // There is a similar and probably better code inside httpHeaderParse
-    // but it seems difficult to isolate due to parsing-unrelated bloat.
-    // Such isolation should probably be done before this class is used
-    // for handling of traffic "more external" than ICAP.
-
-    const char *buf = theIn->content();
-    size_t size = theIn->contentSize();
-
-    ssize_t crOff = -1;
-
-    for (size_t i = 0; i < size; ++i) {
-        if (slashed) {
-            slashed = false;
-            continue;
-        }
+    Must(theLeftBodySize > 0); // Should, really
 
-        const char c = buf[i];
+    buf_ = tok.remaining(); // sync buffers before buf_ use
 
-        // handle quoted strings
-        if (quoted) {
-            if (c == '\\')
-                slashed = true;
-            else if (c == '"')
-                quoted = false;
-
-            continue;
-        } else if (c == '"') {
-            quoted = true;
-            crOff = -1;
-            continue;
-        }
-
-        if (crOff < 0) { // looking for the first CR or LF
+    // TODO fix type mismatches and casting for these
+    const size_t availSize = min(theLeftBodySize, (uint64_t)buf_.length());
+    const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
 
-            if (c == '\n') {
-                crlfBeg = i;
-                crlfEnd = ++i;
-                return true;
-            }
+    theOut->append(buf_.rawContent(), safeSize);
+    buf_.consume(safeSize);
+    theLeftBodySize -= safeSize;
 
-            if (c == '\r')
-                crOff = i;
-        } else { // skipping CRs, looking for the first LF
-
-            if (c == '\n') {
-                crlfBeg = crOff;
-                crlfEnd = ++i;
-                return true;
-            }
+    tok.reset(buf_); // sync buffers after consume()
 
-            if (c != '\r')
-                crOff = -1;
-        }
-    }
+    if (theLeftBodySize == 0)
+        return parseChunkEnd(tok);
+    else
+        Must(needsMoreData() || needsMoreSpace());
 
-    return false;
+    return true;
 }
 
-// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
-void ChunkedCodingParser::parseLastChunkExtension()
+bool
+Http::One::TeChunkedParser::parseChunkEnd(::Parser::Tokenizer &tok)
 {
-    size_t crlfBeg = 0;
-    size_t crlfEnd = 0;
-
-    if (!findCrlf(crlfBeg, crlfEnd)) {
-        doNeedMoreData = true;
-        return;
-    }
-
-    const char *const startExt = theIn->content();
-    const char *const endExt = theIn->content() + crlfBeg;
-
-    // chunk-extension starts at startExt and ends with LF at endEx
-    for (const char *p = startExt; p < endExt;) {
-
-        while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'
-
-        if (*p++ != ';') // each ext name=value pair is preceded with ';'
-            break;
-
-        while (*p == ' ' || *p == '\t') ++p; // skip spaces before name
+    Must(theLeftBodySize == 0); // Should(), really
 
-        if (p >= endExt)
-            break; // malformed extension: ';' without ext name=value pair
+    if (skipLineTerminator(tok)) {
+        buf_ = tok.remaining(); // parse checkpoint
+        theChunkSize = 0; // done with the current chunk
+        parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+        return true;
 
-        const int extSize = endExt - p;
-        // TODO: we need debugData() stream manipulator to dump data
-        debugs(94,7, "Found chunk extension; size=" << extSize);
-
-        // TODO: support implied *LWS around '='
-        if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
-            (void)StringToInt64(p+18, useOriginBody, &p, 10);
-            debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
-            break; // remove to support more than just use-original-body
-        } else {
-            debugs(94, 5, HERE << "skipping unknown chunk extension");
-            // TODO: support quoted-string chunk-ext-val
-            while (p < endExt && *p != ';') ++p; // skip until the next ';'
-        }
+    } else if (!tok.atEnd()) {
+        throw TexcHere("found data between chunk end and CRLF");
     }
 
-    theIn->consume(crlfEnd);
-    theStep = theChunkSize ? psChunkBody : psTrailer;
+    return false;
 }
 

=== renamed file 'src/ChunkedCodingParser.h' => 'src/http/one/TeChunkedParser.h'
--- src/ChunkedCodingParser.h	2015-01-13 07:25:36 +0000
+++ src/http/one/TeChunkedParser.h	2015-06-01 19:46:19 +0000
@@ -1,84 +1,65 @@
 /*
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
-#ifndef SQUID_CHUNKEDCODINGPARSER_H
-#define SQUID_CHUNKEDCODINGPARSER_H
+#ifndef SQUID_SRC_HTTP_ONE_TeChunkedParser_H
+#define SQUID_SRC_HTTP_ONE_TeChunkedParser_H
+
+#include "http/one/Parser.h"
 
 class MemBuf;
 
+namespace Http
+{
+namespace One
+{
+
 /**
- \ingroup ChunkEncodingAPI Chunked Encoding API
- \par
- * ChunkedCodingParser is an incremental parser for chunked transfer coding
- * used by HTTP and ICAP. The parser shovels content bytes from the raw
+ * An incremental parser for chunked transfer coding
+ * defined in RFC 7230 section 4.1.
+ * http://tools.ietf.org/html/rfc7230#section-4.1
+ *
+ * The parser shovels content bytes from the raw
  * input buffer into the content output buffer, both caller-supplied.
  * Ignores chunk extensions except for ICAP's ieof.
- * Has a trailer-handling placeholder.
+ * Trailers are available via mimeHeader() if wanted.
  */
-class ChunkedCodingParser
+class TeChunkedParser : public Http1::Parser
 {
-
 public:
-    ChunkedCodingParser();
-
-    void reset();
+    TeChunkedParser();
+    virtual ~TeChunkedParser() {theOut=NULL;/* we dont own this object */}
 
-    /**
-     \retval true    complete success
-     \retval false   needs more data
-     \throws ??      error.
-     */
-    bool parse(MemBuf *rawData, MemBuf *parsedContent);
+    /// set the buffer to be used to store decoded chunk data
+    void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
 
-    bool needsMoreData() const;
     bool needsMoreSpace() const;
 
-private:
-    typedef void (ChunkedCodingParser::*Step)();
+    /* Http1::Parser API */
+    virtual void clear();
+    virtual bool parse(const SBuf &);
+    virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
 
 private:
-    bool mayContinue() const;
-
-    void parseChunkSize();
-    void parseUnusedChunkExtension();
-    void parseLastChunkExtension();
-    void parseChunkBeg();
-    void parseChunkBody();
-    void parseChunkEnd();
-    void parseTrailer();
-    void parseTrailerHeader();
-    void parseMessageEnd();
+    bool parseChunkSize(::Parser::Tokenizer &tok);
+    bool parseChunkExtension(::Parser::Tokenizer &tok, bool skipKnown);
+    bool parseChunkBody(::Parser::Tokenizer &tok);
+    bool parseChunkEnd(::Parser::Tokenizer &tok);
 
-    bool findCrlf(size_t &crlfBeg, size_t &crlfEnd);
-    bool findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool &quoted, bool &slashed);
-
-private:
-    static Step psChunkSize;
-    static Step psUnusedChunkExtension;
-    static Step psLastChunkExtension;
-    static Step psChunkBody;
-    static Step psChunkEnd;
-    static Step psTrailer;
-    static Step psMessageEnd;
-
-    MemBuf *theIn;
     MemBuf *theOut;
-
-    Step theStep;
     uint64_t theChunkSize;
     uint64_t theLeftBodySize;
-    bool doNeedMoreData;
-    bool inQuoted; ///< stores parsing state for incremental findCrlf
-    bool inSlashed; ///< stores parsing state for incremental findCrlf
 
 public:
     int64_t useOriginBody;
 };
 
-#endif /* SQUID_CHUNKEDCODINGPARSER_H */
+} // namespace One
+} // namespace Http
+
+#endif /* SQUID_SRC_HTTP_ONE_TeChunkedParser_H */
 

=== modified file 'src/http/one/forward.h'
--- src/http/one/forward.h	2015-01-20 12:33:23 +0000
+++ src/http/one/forward.h	2015-06-01 19:44:26 +0000
@@ -1,32 +1,34 @@
 /*
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #ifndef SQUID_SRC_HTTP_ONE_FORWARD_H
 #define SQUID_SRC_HTTP_ONE_FORWARD_H
 
 #include "base/RefCount.h"
 
 namespace Http {
 namespace One {
 
 class Parser;
 typedef RefCount<Http::One::Parser> ParserPointer;
 
+class TeChunkedParser;
+
 class RequestParser;
 typedef RefCount<Http::One::RequestParser> RequestParserPointer;
 
 class ResponseParser;
 typedef RefCount<Http::One::ResponseParser> ResponseParserPointer;
 
 } // namespace One
 } // namespace Http
 
 namespace Http1 = Http::One;
 
 #endif /* SQUID_SRC_HTTP_ONE_FORWARD_H */
 



More information about the squid-dev mailing list