[squid-dev] [PATCH] Simpler and more robust request line parser

Amos Jeffries squid3 at treenet.co.nz
Wed Nov 18 09:07:21 UTC 2015


Since this is getting kind of urgent now I have taken the liberty of
making the CharacterSet changes myself.

If there are no objections I would like to apply this tomorrow. It is at
least a vast improvement over current trunk as-is. We can continue to
debating the CTL characters and "strict parse" behaviour for a followup
change if necessary.

Amos

-------------- next part --------------
=== modified file 'src/http/one/RequestParser.cc'
--- src/http/one/RequestParser.cc	2015-07-16 05:09:30 +0000
+++ src/http/one/RequestParser.cc	2015-11-18 06:59:16 +0000
@@ -1,39 +1,44 @@
 /*
  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #include "squid.h"
 #include "Debug.h"
 #include "http/one/RequestParser.h"
 #include "http/one/Tokenizer.h"
 #include "http/ProtocolVersion.h"
 #include "profiler/Profiler.h"
 #include "SquidConfig.h"
 
+// the right debugs() level for parsing errors
+inline static int
+ErrorLevel() {
+    return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
+}
+
 Http::One::RequestParser::RequestParser() :
-    Parser(),
-    firstLineGarbage_(0)
+    Parser()
 {}
 
 Http1::Parser::size_type
 Http::One::RequestParser::firstLineSize() const
 {
     // RFC 7230 section 2.6
     /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
     return method_.image().length() + uri_.length() + 12;
 }
 
 /**
  * Attempt to parse the first line of a new request message.
  *
  * Governed by RFC 7230 section 3.5
  *  "
  *    In the interest of robustness, a server that is expecting to receive
  *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
  *    received prior to the request-line.
  *  "
  *
@@ -45,404 +50,304 @@
 {
     if (Config.onoff.relaxed_header_parser) {
         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                    "CRLF bytes received ahead of request-line. " <<
                    "Ignored due to relaxed_header_parser.");
         // Be tolerant of prefix empty lines
         // ie any series of either \n or \r\n with no other characters and no repeated \r
         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
             buf_.consume(1);
         }
     }
 }
 
 /**
  * Attempt to parse the method field out of an HTTP message request-line.
  *
  * Governed by:
  *  RFC 1945 section 5.1
  *  RFC 7230 section 2.6, 3.1 and 3.5
- *
- * Parsing state is stored between calls. The current implementation uses
- * checkpoints after each successful request-line field.
- * The return value tells you whether the parsing is completed or not.
- *
- * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
- * \retval  1  successful parse. method_ is filled and buffer consumed including first delimiter.
- * \retval  0  more data is needed to complete the parse
  */
-int
-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
+bool
+Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
 {
-    // scan for up to 16 valid method characters.
-    static const size_t maxMethodLength = 16; // TODO: make this configurable?
-
     // method field is a sequence of TCHAR.
-    SBuf methodFound;
-    if (tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength) && tok.skipOne(WspDelim)) {
-
-        method_ = HttpRequestMethod(methodFound);
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
-
-    } else if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data to find method");
-        return 0;
-
-    } // else error(s)
+    // Limit to 32 characters to prevent overly long sequences of non-HTTP
+    // being sucked in before mismatch is detected. 32 is itself annoyingly
+    // big but there are methods registered by IANA that reach 17 bytes:
+    //  http://www.iana.org/assignments/http-methods
+    static const size_t maxMethodLength = 32; // TODO: make this configurable?
 
-    // non-delimiter found after accepted method bytes means ...
-    if (methodFound.length() == maxMethodLength) {
-        // method longer than acceptible.
-        // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
-        parseStatusCode = Http::scNotImplemented;
-        debugs(33, 5, "invalid request-line. method too long");
-    } else {
-        // invalid character in the URL
-        // RFC 7230 section 3.1.1 required (SHOULD) 400 response
+    SBuf methodFound;
+    if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
+        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
         parseStatusCode = Http::scBadRequest;
-        debugs(33, 5, "invalid request-line. missing method delimiter");
+        return false;
     }
-    return -1;
+    method_ = HttpRequestMethod(methodFound);
+    return true;
 }
 
-static CharacterSet
-uriValidCharacters()
+/// the characters which truly are valid within URI
+static const CharacterSet &
+UriValidCharacters()
 {
-    CharacterSet UriChars("URI-Chars","");
-
     /* RFC 3986 section 2:
      * "
      *   A URI is composed from a limited set of characters consisting of
      *   digits, letters, and a few graphic symbols.
      * "
      */
-    // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
-    UriChars.add('%');
-    UriChars += CharacterSet::HEXDIG;
-    // RFC 3986 section 2.2 - reserved characters
-    UriChars += CharacterSet("gen-delims", ":/?#[]@");
-    UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
-    // RFC 3986 section 2.3 - unreserved characters
-    UriChars += CharacterSet::ALPHA;
-    UriChars += CharacterSet::DIGIT;
-    UriChars += CharacterSet("unreserved", "-._~");
+    static const CharacterSet UriChars =
+        CharacterSet("URI-Chars","") +
+        // RFC 3986 section 2.2 - reserved characters
+        CharacterSet("gen-delims", ":/?#[]@") +
+        CharacterSet("sub-delims", "!$&'()*+,;=") +
+        // RFC 3986 section 2.3 - unreserved characters
+        CharacterSet::ALPHA +
+        CharacterSet::DIGIT +
+        CharacterSet("unreserved", "-._~") +
+        // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
+        CharacterSet("pct-encoded", "%") +
+        CharacterSet::HEXDIG;
 
     return UriChars;
 }
 
-int
-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+/// characters HTTP permits tolerant parsers to accept as delimiters
+static const CharacterSet &
+RelaxedDelimiterCharacters()
+{
+    // RFC 7230 section 3.5
+    // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
+    // or bare CR as whitespace between request-line fields
+    static const CharacterSet RelaxedDels =
+        CharacterSet::SP +
+        CharacterSet::HTAB +
+        CharacterSet("VT,FF","\x0B\x0C") +
+        CharacterSet::CR;
+
+    return RelaxedDels;
+}
+
+/// characters used to separate HTTP fields
+const CharacterSet &
+Http::One::RequestParser::DelimiterCharacters()
 {
-    // URI field is a sequence of ... what? segments all have different valid charset
-    // go with non-whitespace non-binary characters for now
-    static CharacterSet UriChars = uriValidCharacters();
+    return Config.onoff.relaxed_header_parser ?
+           RelaxedDelimiterCharacters() : CharacterSet::SP;
+}
+
+/// characters which Squid will accept in the HTTP request-target (URI)
+const CharacterSet &
+Http::One::RequestParser::RequestTargetCharacters()
+{
+    if (Config.onoff.relaxed_header_parser) {
+#if USE_HTTP_VIOLATIONS
+        static const CharacterSet RelaxedExtended =
+            UriValidCharacters() +
+            // accept whitespace (extended), it will be dealt with later
+            DelimiterCharacters() +
+            // RFC 2396 unwise character set which must never be transmitted
+            // in un-escaped form. But many web services do anyway.
+            CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
+            // UTF-8 because we want to be future-proof
+            CharacterSet("UTF-8", 128, 255);
+
+        return RelaxedExtended;
+#else
+        static const CharacterSet RelaxedCompliant =
+            UriValidCharacters() +
+            // accept whitespace (extended), it will be dealt with later.
+            DelimiterCharacters();
 
+        return RelaxedCompliant;
+#endif
+    }
+
+    // strict parse only accepts what the RFC say we can
+    return UriValidCharacters();
+}
+
+bool
+Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+{
     /* Arbitrary 64KB URI upper length limit.
      *
      * Not quite as arbitrary as it seems though. Old SquidString objects
      * cannot store strings larger than 64KB, so we must limit until they
      * have all been replaced with SBuf.
      *
      * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
      * at least 8000 octets for the whole line, including method and version.
      */
-    const size_t maxUriLength = min(static_cast<size_t>(Config.maxRequestHeaderSize) - firstLineSize(),
-                                    static_cast<size_t>((64*1024)-1));
+    const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
 
     SBuf uriFound;
-
-    // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
-    if (tok.prefix(uriFound, UriChars, maxUriLength) && tok.skipOne(CharacterSet::SP)) {
-        uri_ = uriFound;
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
-
-        // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
-    } else if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
-        debugs(33, 5, "HTTP/0.9 syntax request-line detected");
-        msgProtocol_ = Http::ProtocolVersion(0,9);
-        uri_ = uriFound; // found by successful prefix() call earlier.
-        parseStatusCode = Http::scOkay;
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
-
-    } else if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data to find URI");
-        return 0;
+    if (!tok.prefix(uriFound, RequestTargetCharacters())) {
+        parseStatusCode = Http::scBadRequest;
+        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
+        return false;
     }
 
-    // else errors...
-
-    if (uriFound.length() == maxUriLength) {
+    if (uriFound.length() > maxUriLength) {
         // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
         parseStatusCode = Http::scUriTooLong;
-        debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
-    } else {
-        // RFC 7230 section 3.1.1 required (SHOULD) 400 response
-        parseStatusCode = Http::scBadRequest;
-        debugs(33, 5, "invalid request-line. missing URI delimiter");
+        debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
+               "-byte URI exceeds " << maxUriLength << "-byte limit");
+        return false;
     }
-    return -1;
+
+    uri_ = uriFound;
+    return true;
 }
 
-int
+bool
 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
 {
-    // partial match of HTTP/1 magic prefix
-    if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
-        debugs(74, 5, "Parser needs more data to find version");
-        return 0;
-    }
+    const auto savedTok = tok;
 
-    if (!tok.skip(Http1magic)) {
-        debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
-        parseStatusCode = Http::scHttpVersionNotSupported;
-        return -1;
+    SBuf digit;
+    // Searching for Http1magic precludes detecting HTTP/2+ versions.
+    // Rewrite if we ever _need_ to return 505 (Version Not Supported) errors.
+    if (tok.suffix(digit, CharacterSet::DIGIT) && tok.skipSuffix(Http1magic)) {
+        msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
+        return true;
     }
 
-    if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data to find version");
-        return 0;
+    // A GET request might use HTTP/0.9 syntax
+    if (method_ == Http::METHOD_GET) {
+        // RFC 1945 - no HTTP version field at all
+        tok = savedTok; // in case the URI ends with a digit
+        // report this assumption as an error if configured to triage parsing
+        debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
+        msgProtocol_ = Http::ProtocolVersion(0,9);
+        return true;
     }
 
-    // get the version minor DIGIT
-    SBuf digit;
-    if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
+    debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
+    parseStatusCode = Http::scBadRequest;
+    return false;
+}
 
-        // found version fully AND terminator
-        msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
-        parseStatusCode = Http::scOkay;
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
+/**
+ * Skip characters separating request-line fields.
+ * To handle bidirectional parsing, the caller does the actual skipping and
+ * we just check how many character the caller has skipped.
+ */
+bool
+Http::One::RequestParser::skipDelimiter(const size_t count)
+{
+    if (count <= 0) {
+        debugs(33, ErrorLevel(), "invalid request-line: missing delimiter");
+        parseStatusCode = Http::scBadRequest;
+        return false;
+    }
 
-    } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
-        debugs(74, 5, "Parser needs more data to find version");
-        return 0;
+    // tolerant parser allows multiple whitespace characters between request-line fields
+    if (count > 1 && !Config.onoff.relaxed_header_parser) {
+        debugs(33, ErrorLevel(), "invalid request-line: too many delimiters");
+        parseStatusCode = Http::scBadRequest;
+        return false;
+    }
 
-    } // else error ...
+    return true;
+}
 
-    // non-DIGIT. invalid version number.
-    parseStatusCode = Http::scHttpVersionNotSupported;
-    debugs(33, 5, "invalid request-line. garbage before line terminator");
-    return -1;
+/// Parse CRs at the end of request-line, just before the terminating LF.
+bool
+Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
+{
+    if (Config.onoff.relaxed_header_parser) {
+        (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
+    } else {
+        if (!tok.skipOneTrailing(CharacterSet::CR)) {
+            debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
+            parseStatusCode = Http::scBadRequest;
+            return false;
+        }
+    }
+    return true;
 }
 
 /**
  * Attempt to parse the first line of a new request message.
  *
  * Governed by:
  *  RFC 1945 section 5.1
  *  RFC 7230 section 2.6, 3.1 and 3.5
  *
- * Parsing state is stored between calls. The current implementation uses
- * checkpoints after each successful request-line field.
- * The return value tells you whether the parsing is completed or not.
- *
  * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
  * \retval  1  successful parse. member fields contain the request-line items
  * \retval  0  more data is needed to complete the parse
  */
 int
 Http::One::RequestParser::parseRequestFirstLine()
 {
-    Http1::Tokenizer tok(buf_);
-
     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
     debugs(74, DBG_DATA, buf_);
 
-    // NP: would be static, except it need to change with reconfigure
-    CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
-
-    if (Config.onoff.relaxed_header_parser) {
-        // RFC 7230 section 3.5
-        // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
-        // as whitespace between request-line fields
-        WspDelim += CharacterSet::HTAB
-                    + CharacterSet("VT,FF","\x0B\x0C")
-                    + CharacterSet::CR;
-        debugs(74, 5, "using Parser relaxed WSP characters");
-    }
-
-    // only search for method if we have not yet found one
-    if (method_ == Http::METHOD_NONE) {
-        const int res = parseMethodField(tok, WspDelim);
-        if (res < 1)
-            return res;
-        // else keep going...
-    }
+    SBuf line;
 
-    // tolerant parser allows multiple whitespace characters between request-line fields
-    if (Config.onoff.relaxed_header_parser) {
-        const size_t garbage = tok.skipAll(WspDelim);
-        if (garbage > 0) {
-            firstLineGarbage_ += garbage;
-            buf_ = tok.remaining(); // re-checkpoint after garbage
-        }
-    }
-    if (tok.atEnd()) {
+    // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
+    // Now, the request line has to end at the first LF.
+    static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
+    ::Parser::Tokenizer lineTok(buf_);
+    if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
         debugs(74, 5, "Parser needs more data");
         return 0;
     }
 
-    // from here on, we have two possible parse paths: whitespace tolerant, and strict
-    if (Config.onoff.relaxed_header_parser) {
-        // whitespace tolerant
+    Http1::Tokenizer tok(line);
+    const CharacterSet &delimiters = DelimiterCharacters();
 
-        int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
+    if (!parseMethodField(tok))
+        return -1;
 
-        // NOTES:
-        // * this would be static, except WspDelim changes with reconfigure
-        // * HTTP-version charset is included by uriValidCharacters()
-        // * terminal CR is included by WspDelim here in relaxed parsing
-        CharacterSet LfDelim = uriValidCharacters() + WspDelim;
-
-        // seek the LF character, then tokenize the line in reverse
-        SBuf line;
-        if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
-            Http1::Tokenizer rTok(line);
-            SBuf nil;
-            (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
-            SBuf digit;
-            if (rTok.suffix(digit,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
-                uri_ = rTok.remaining();
-                msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
-                if (uri_.isEmpty()) {
-                    debugs(33, warnOnError, "invalid request-line. missing URL");
-                    parseStatusCode = Http::scBadRequest;
-                    return -1;
-                }
-
-                parseStatusCode = Http::scOkay;
-                buf_ = tok.remaining(); // incremental parse checkpoint
-                return 1;
-
-            } else if (method_ == Http::METHOD_GET) {
-                // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
-                debugs(33, 5, "HTTP/0.9 syntax request-line detected");
-                msgProtocol_ = Http::ProtocolVersion(0,9);
-                static const SBuf cr("\r",1);
-                uri_ = line.trim(cr,false,true);
-                parseStatusCode = Http::scOkay;
-                buf_ = tok.remaining(); // incremental parse checkpoint
-                return 1;
-            }
+    if (!skipDelimiter(tok.skipAll(delimiters)))
+        return -1;
 
-            debugs(33, warnOnError, "invalid request-line. not HTTP");
-            parseStatusCode = Http::scBadRequest;
-            return -1;
-        }
+    /* now parse backwards, to leave just the URI */
+    if (!skipTrailingCrs(tok))
+        return -1;
 
-        if (!tok.atEnd()) {
+    if (!parseHttpVersionField(tok))
+        return -1;
 
-#if USE_HTTP_VIOLATIONS
-            /*
-             * RFC 3986 explicitly lists the characters permitted in URI.
-             * A non-permitted character was found somewhere in the request-line.
-             * However, as long as we can find the LF, accept the characters
-             * which we know are invalid in any URI but actively used.
-             */
-            LfDelim.add('\0'); // Java
-            LfDelim.add(' ');  // IIS
-            LfDelim.add('\"'); // Bing
-            LfDelim.add('\\'); // MSIE, Firefox
-            LfDelim.add('|');  // Amazon
-            LfDelim.add('^');  // Microsoft News
-
-            // other ASCII characters for which RFC 2396 has explicitly disallowed use
-            // since 1998 and which were not later permitted by RFC 3986 in 2005.
-            LfDelim.add('<');  // HTML embedded in URL
-            LfDelim.add('>');  // HTML embedded in URL
-            LfDelim.add('`');  // Shell Script embedded in URL
-            LfDelim.add('{');  // JSON or Javascript embedded in URL
-            LfDelim.add('}');  // JSON or Javascript embedded in URL
-
-            // reset the tokenizer from anything the above did, then seek the LF character.
-            tok.reset(buf_);
-
-            if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
-
-                Http1::Tokenizer rTok(line);
-
-                // strip terminating CR (if any)
-                SBuf nil;
-                (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
-                line = rTok.remaining();
-
-                // strip terminating 'WSP HTTP-version' (if any)
-                if (rTok.suffix(nil,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
-                    hackExpectsMime_ = true; // client thinks its speaking HTTP, probably sent a mime block.
-                    uri_ = rTok.remaining();
-                } else
-                    uri_ = line; // no HTTP/1.x label found. Use the whole line.
-
-                if (uri_.isEmpty()) {
-                    debugs(33, warnOnError, "invalid request-line. missing URL");
-                    parseStatusCode = Http::scBadRequest;
-                    return -1;
-                }
-
-                debugs(33, warnOnError, "invalid request-line. treating as HTTP/0.9" << (hackExpectsMime_?" (with mime)":""));
-                msgProtocol_ = Http::ProtocolVersion(0,9);
-                parseStatusCode = Http::scOkay;
-                buf_ = tok.remaining(); // incremental parse checkpoint
-                return 1;
-
-            } else if (tok.atEnd()) {
-                debugs(74, 5, "Parser needs more data");
-                return 0;
-            }
-            // else, drop back to invalid request-line handling
-#endif
-            const SBuf t = tok.remaining();
-            debugs(33, warnOnError, "invalid request-line characters." << Raw("data", t.rawContent(), t.length()));
-            parseStatusCode = Http::scBadRequest;
-            return -1;
-        }
-        debugs(74, 5, "Parser needs more data");
-        return 0;
-    }
-    // else strict non-whitespace tolerant parse
+    if (!http0() && !skipDelimiter(tok.skipAllTrailing(delimiters)))
+        return -1;
 
-    // only search for request-target (URL) if we have not yet found one
-    if (uri_.isEmpty()) {
-        const int res = parseUriField(tok);
-        if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
-            return res;
-        // else keep going...
-    }
+    /* parsed everything before and after the URI */
 
-    if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data");
-        return 0;
-    }
+    if (!parseUriField(tok))
+        return -1;
 
-    // HTTP/1 version suffix (protocol magic) followed by CR*LF
-    if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
-        return parseHttpVersionField(tok);
+    if (!tok.atEnd()) {
+        debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
+        parseStatusCode = Http::scBadRequest;
+        return -1;
     }
 
-    // If we got here this method has been called too many times
-    parseStatusCode = Http::scInternalServerError;
-    debugs(33, 5, "ERROR: Parser already processed request-line");
-    return -1;
+    parseStatusCode = Http::scOkay;
+    buf_ = lineTok.remaining(); // incremental parse checkpoint
+    return 1;
 }
 
 bool
 Http::One::RequestParser::parse(const SBuf &aBuf)
 {
     buf_ = aBuf;
     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 
     // stage 1: locate the request-line
     if (parsingStage_ == HTTP_PARSE_NONE) {
         skipGarbageLines();
 
         // if we hit something before EOS treat it as a message
         if (!buf_.isEmpty())
             parsingStage_ = HTTP_PARSE_FIRST;
         else
             return false;
     }
 
     // stage 2: parse the request-line

=== modified file 'src/http/one/RequestParser.h'
--- src/http/one/RequestParser.h	2015-04-10 11:02:44 +0000
+++ src/http/one/RequestParser.h	2015-11-18 04:57:12 +0000
@@ -30,40 +30,44 @@
 class RequestParser : public Http1::Parser
 {
 public:
     RequestParser();
     virtual ~RequestParser() {}
 
     /* Http::One::Parser API */
     virtual void clear() {*this = RequestParser();}
     virtual Http1::Parser::size_type firstLineSize() const;
     virtual bool parse(const SBuf &aBuf);
 
     /// the HTTP method if this is a request message
     const HttpRequestMethod & method() const {return method_;}
 
     /// the request-line URI if this is a request message, or an empty string.
     const SBuf &requestUri() const {return uri_;}
 
 private:
     void skipGarbageLines();
     int parseRequestFirstLine();
-    int parseMethodField(Http1::Tokenizer &, const CharacterSet &);
-    int parseUriField(Http1::Tokenizer &);
-    int parseHttpVersionField(Http1::Tokenizer &);
+
+    /* all these return false and set parseStatusCode on parsing failures */
+    bool parseMethodField(Http1::Tokenizer &);
+    bool parseUriField(Http1::Tokenizer &);
+    bool parseHttpVersionField(Http1::Tokenizer &);
+    bool skipDelimiter(const size_t count);
+    bool skipTrailingCrs(Http1::Tokenizer &tok);
+
+    bool http0() const {return !msgProtocol_.major;}
+    static const CharacterSet &DelimiterCharacters();
+    static const CharacterSet &RequestTargetCharacters();
 
     /// what request method has been found on the first line
     HttpRequestMethod method_;
 
     /// raw copy of the original client request-line URI field
     SBuf uri_;
-
-    /// amount of garbage bytes tolerantly skipped inside the request-line
-    /// may be -1 if sender only omitted CR on terminator
-    int64_t firstLineGarbage_;
 };
 
 } // namespace One
 } // namespace Http
 
 #endif /*  _SQUID_SRC_HTTP_ONE_REQUESTPARSER_H */
 

=== modified file 'src/tests/testHttp1Parser.cc'
--- src/tests/testHttp1Parser.cc	2015-02-20 03:25:12 +0000
+++ src/tests/testHttp1Parser.cc	2015-11-18 05:02:38 +0000
@@ -36,55 +36,99 @@
 
     // default to strict parser. set for loose parsing specifically where behaviour differs.
     Config.onoff.relaxed_header_parser = 0;
 
     Config.maxRequestHeaderSize = 1024; // XXX: unit test the RequestParser handling of this limit
 }
 
 #if __cplusplus >= 201103L
 
 struct resultSet {
     bool parsed;
     bool needsMore;
     Http1::ParseState parserState;
     Http::StatusCode status;
     SBuf::size_type suffixSz;
     HttpRequestMethod method;
     const char *uri;
     AnyP::ProtocolVersion version;
 };
 
+// define SQUID_DEBUG_TESTS to see exactly which test sub-cases fail and where
+#ifdef SQUID_DEBUG_TESTS
+// not optimized for runtime use
+static void
+Replace(SBuf &where, const SBuf &what, const SBuf &with)
+{
+    // prevent infinite loops
+    if (!what.length() || with.find(what) != SBuf::npos)
+        return;
+
+    SBuf::size_type pos = 0;
+    while ((pos = where.find(what, pos)) != SBuf::npos) {
+        SBuf buf = where.substr(0, pos);
+        buf.append(with);
+        buf.append(where.substr(pos+what.length()));
+        where = buf;
+        pos += with.length();
+    }
+}
+
+static SBuf Pretty(SBuf raw)
+{
+    Replace(raw, SBuf("\r"), SBuf("\\r"));
+    Replace(raw, SBuf("\n"), SBuf("\\n"));
+    return raw;
+}
+#endif
+
 static void
 testResults(int line, const SBuf &input, Http1::RequestParser &output, struct resultSet &expect)
 {
-#if WHEN_TEST_DEBUG_IS_NEEDED
-    printf("TEST @%d, in=%u: " SQUIDSBUFPH "\n", line, input.length(), SQUIDSBUFPRINT(input));
+#ifdef SQUID_DEBUG_TESTS
+    std::cerr << "TEST @" << line << ", in=" << Pretty(input) << "\n";
+#endif
+
+    const bool parsed = output.parse(input);
+
+#ifdef SQUID_DEBUG_TESTS
+    if (expect.parsed != parsed)
+        std::cerr << "\tparse-FAILED: " << expect.parsed << "!=" << parsed << "\n";
+    else if (parsed && expect.method != output.method_)
+        std::cerr << "\tmethod-FAILED: " << expect.method << "!=" << output.method_ << "\n";
+    if (expect.status != output.parseStatusCode)
+        std::cerr << "\tscode-FAILED: " << expect.status << "!=" << output.parseStatusCode << "\n";
+    if (expect.suffixSz != output.buf_.length())
+        std::cerr << "\tsuffixSz-FAILED: " << expect.suffixSz << "!=" << output.buf_.length() << "\n";
 #endif
 
     // runs the parse
-    CPPUNIT_ASSERT_EQUAL(expect.parsed, output.parse(input));
+    CPPUNIT_ASSERT_EQUAL(expect.parsed, parsed);
+
+    // if parsing was successful, check easily visible field outputs
+    if (parsed) {
+        CPPUNIT_ASSERT_EQUAL(expect.method, output.method_);
+        if (expect.uri != NULL)
+            CPPUNIT_ASSERT_EQUAL(0, output.uri_.cmp(expect.uri));
+        CPPUNIT_ASSERT_EQUAL(expect.version, output.msgProtocol_);
+    }
 
-    // check easily visible field outputs
-    CPPUNIT_ASSERT_EQUAL(expect.method, output.method_);
-    if (expect.uri != NULL)
-        CPPUNIT_ASSERT_EQUAL(0, output.uri_.cmp(expect.uri));
-    CPPUNIT_ASSERT_EQUAL(expect.version, output.msgProtocol_);
     CPPUNIT_ASSERT_EQUAL(expect.status, output.parseStatusCode);
 
     // check more obscure states
     CPPUNIT_ASSERT_EQUAL(expect.needsMore, output.needsMoreData());
     if (output.needsMoreData())
         CPPUNIT_ASSERT_EQUAL(expect.parserState, output.parsingStage_);
     CPPUNIT_ASSERT_EQUAL(expect.suffixSz, output.buf_.length());
 }
 #endif /* __cplusplus >= 200103L */
 
 void
 testHttp1Parser::testParserConstruct()
 {
     // whether the constructor works
     {
         Http1::RequestParser output;
         CPPUNIT_ASSERT_EQUAL(true, output.needsMoreData());
         CPPUNIT_ASSERT_EQUAL(Http1::HTTP_PARSE_NONE, output.parsingStage_);
         CPPUNIT_ASSERT_EQUAL(Http::scNone, output.parseStatusCode); // XXX: clear() not being called.
         CPPUNIT_ASSERT(output.buf_.isEmpty());
@@ -129,41 +173,41 @@
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,0,9)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // RFC 1945 : invalid HTTP/0.9 simple-request (only GET is valid)
     {
         input.append("POST /\r\n", 8);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
-            .suffixSz = 3,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_POST),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // RFC 1945 and 7230 : HTTP/1.0 request
     {
         input.append("GET / HTTP/1.0\r\n", 16);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
@@ -200,183 +244,183 @@
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,2)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // RFC 7230 : future versions do not use request-line syntax
     {
         input.append("GET / HTTP/2.0\r\n", 16);
         struct resultSet expectA = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_MIME,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectA);
         input.clear();
 
         input.append("GET / HTTP/10.12\r\n", 18);
         struct resultSet expectB = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_MIME,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectB);
         input.clear();
     }
 
     // unknown non-HTTP protocol names
     {
         input.append("GET / FOO/1.0\r\n", 15);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // no version digits
     {
         input.append("GET / HTTP/\r\n", 13);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // no major version
     {
         input.append("GET / HTTP/.1\r\n", 15);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // no version dot
     {
         input.append("GET / HTTP/11\r\n", 15);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // negative major version (bug 3062)
     {
         input.append("GET / HTTP/-999999.1\r\n", 22);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // no minor version
     {
         input.append("GET / HTTP/1.\r\n", 15);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // negative major version (bug 3062 corollary)
     {
         input.append("GET / HTTP/1.-999999\r\n", 22);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 }
 
 void
 testHttp1Parser::testParseRequestLineStrange()
 {
     // ensure MemPools etc exist
     globalSetup();
 
     SBuf input;
     Http1::RequestParser output;
 
     // space padded URL
@@ -386,75 +430,75 @@
         Config.onoff.relaxed_header_parser = 1;
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
 
         Config.onoff.relaxed_header_parser = 0;
         struct resultSet expectStrict = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
-            .suffixSz = input.length()-4,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectStrict);
         input.clear();
     }
 
     // whitespace inside URI. (nasty but happens)
     {
         input.append("GET /fo o/ HTTP/1.1\r\n", 21);
         Config.onoff.relaxed_header_parser = 1;
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/fo o/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
 
         Config.onoff.relaxed_header_parser = 0;
         struct resultSet expectStrict = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported, // version being "o/ HTTP/1.1"
-            .suffixSz = 13,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectStrict);
         input.clear();
     }
 
     // additional data in buffer
     {
         input.append("GET / HTTP/1.1\r\nboo!", 20);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 4, // strlen("boo!")
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1)
@@ -481,98 +525,98 @@
         input.append("GET / HTTP/1.1\n", 15);
         Config.onoff.relaxed_header_parser = 1;
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
 
         Config.onoff.relaxed_header_parser = 0;
         struct resultSet expectStrict = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = 9,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "/",
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectStrict);
         input.clear();
     }
 
     // alternative EOL sequence: double-NL-only
     // RFC 7230 tolerance permits omitted CR
     // NP: represents a request with no mime headers
     {
         input.append("GET / HTTP/1.1\n\n", 16);
         Config.onoff.relaxed_header_parser = 1;
         struct resultSet expect = {
             .parsed = true,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(Http::METHOD_GET),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
 
         Config.onoff.relaxed_header_parser = 0;
         struct resultSet expectStrict = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = 10,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "/",
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectStrict);
         input.clear();
     }
 
     // space padded version
     {
         // RFC 7230 specifies version is followed by CRLF. No intermediary bytes.
         input.append("GET / HTTP/1.1 \r\n", 17);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "/",
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 }
 
 void
 testHttp1Parser::testParseRequestLineMethods()
 {
     // ensure MemPools etc exist
     globalSetup();
 
     SBuf input;
     Http1::RequestParser output;
 
     // RFC 7230 : dot method
     {
         input.append(". / HTTP/1.1\r\n", 14);
@@ -628,57 +672,59 @@
     }
 
     // unknown method
     {
         input.append("HELLOWORLD / HTTP/1.1\r\n", 23);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
             .parserState = Http1::HTTP_PARSE_MIME,
             .status = Http::scOkay,
             .suffixSz = 0,
             .method = HttpRequestMethod(SBuf("HELLOWORLD")),
             .uri = "/",
             .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1)
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
+#if 0
     // too-long method (over 16 bytes)
     {
         input.append("HELLOSTRANGEWORLD / HTTP/1.1\r\n", 31);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scNotImplemented,
             .suffixSz = input.length(),
             .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
+#endif
 
     // method-only
     {
         input.append("A\n", 2);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
             .suffixSz = input.length(),
             .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     {
@@ -865,122 +911,123 @@
     }
 
     // no method (with method delimiter)
     {
         input.append(" / HTTP/1.0\n", 12);
         struct resultSet expectStrict = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
             .suffixSz = input.length(),
             .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectStrict);
         input.clear();
     }
 
-    // binary code in method (invalid)
+    // binary code after method (invalid)
     {
-        input.append("GET\x0A / HTTP/1.1\r\n", 17);
+        input.append("GET\x16 / HTTP/1.1\r\n", 17);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
             .suffixSz = input.length(),
             .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
-    // binary code NUL! in method (always invalid)
+    // binary code NUL! after method (always invalid)
     {
         input.append("GET\0 / HTTP/1.1\r\n", 17);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
             .suffixSz = input.length(),
             .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
-    // no URL (grammer invalid, ambiguous with RFC 1945 HTTP/0.9 simple-request)
+    // Either an RFC 1945 HTTP/0.9 simple-request for an "HTTP/1.1" URI or
+    // an invalid (no URI) HTTP/1.1 request. We treat this as latter, naturally.
     {
         input.append("GET  HTTP/1.1\r\n", 15);
-        // RFC 7230 tolerance allows sequence of SP to make this ambiguous
         Config.onoff.relaxed_header_parser = 1;
         struct resultSet expect = {
-            .parsed = true,
+            .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scOkay,
-            .suffixSz = 0,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "HTTP/1.1",
-            .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,0,9)
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
+            .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
 
         Config.onoff.relaxed_header_parser = 0;
         struct resultSet expectStrict = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
-            .suffixSz = 11,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expectStrict);
         input.clear();
     }
 
-    // no URL (grammer invalid, ambiguous with RFC 1945 HTTP/0.9 simple-request)
+    // Either an RFC 1945 HTTP/0.9 simple-request for an "HTTP/1.1" URI or
+    // an invalid (no URI) HTTP/1.1 request. We treat this as latter, naturally.
     {
         input.append("GET HTTP/1.1\r\n", 14);
         struct resultSet expect = {
-            .parsed = true,
+            .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scOkay,
-            .suffixSz = 0,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "HTTP/1.1",
-            .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,0,9)
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
+            .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 
     // binary line
     {
         input.append("\xB\xC\xE\xF\n", 5);
         struct resultSet expect = {
             .parsed = false,
             .needsMore = false,
             .parserState = Http1::HTTP_PARSE_DONE,
             .status = Http::scBadRequest,
             .suffixSz = input.length(),
             .method = HttpRequestMethod(),
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
@@ -1019,43 +1066,41 @@
             .uri = NULL,
             .version = AnyP::ProtocolVersion()
         };
         output.clear();
         testResults(__LINE__, input, output, expect);
         input.clear();
     }
 }
 
 void
 testHttp1Parser::testDripFeed()
 {
     // Simulate a client drip-feeding Squid a few bytes at a time.
     // extend the size of the buffer from 0 bytes to full request length
     // calling the parser repeatedly as visible data grows.
 
     SBuf data;
     data.append("\n\n\n\n\n\n\n\n\n\n\n\n", 12);
     SBuf::size_type garbageEnd = data.length();
     data.append("GET ", 4);
-    SBuf::size_type methodEnd = data.length()-1;
     data.append("http://example.com/ ", 20);
-    SBuf::size_type uriEnd = data.length()-1;
     data.append("HTTP/1.1\r\n", 10);
     SBuf::size_type reqLineEnd = data.length() - 1;
     data.append("Host: example.com\r\n\r\n", 21);
     SBuf::size_type mimeEnd = data.length() - 1;
     data.append("...", 3); // trailer to catch mime EOS errors.
 
     SBuf ioBuf;
     Http1::RequestParser hp;
 
     // start with strict and move on to relaxed
     Config.onoff.relaxed_header_parser = 2;
 
     Config.maxRequestHeaderSize = 1024; // large enough to hold the test data.
 
     do {
 
         // state of things we expect right now
         struct resultSet expect = {
             .parsed = false,
             .needsMore = true,
@@ -1074,53 +1119,40 @@
 
         for (SBuf::size_type pos = 0; pos <= data.length(); ++pos) {
 
             // simulate reading one more byte
             ioBuf.append(data.substr(pos,1));
 
             // strict does not permit the garbage prefix
             if (pos < garbageEnd && !Config.onoff.relaxed_header_parser) {
                 ioBuf.clear();
                 continue;
             }
 
             // when the garbage is passed we expect to start seeing first-line bytes
             if (pos == garbageEnd)
                 expect.parserState = Http1::HTTP_PARSE_FIRST;
 
             // all points after garbage start to see accumulated bytes looking for end of current section
             if (pos >= garbageEnd)
                 expect.suffixSz = ioBuf.length();
 
-            // at end of request line expect to see method details
-            if (pos == methodEnd) {
-                expect.suffixSz = 0; // and a checkpoint buffer reset
-                expect.method = HttpRequestMethod(Http::METHOD_GET);
-            }
-
-            // at end of URI strict expects to see method, URI details
-            // relaxed must wait to end of line for whitespace tolerance
-            if (pos == uriEnd && !Config.onoff.relaxed_header_parser) {
-                expect.suffixSz = 0; // and a checkpoint buffer reset
-                expect.uri = "http://example.com/";
-            }
-
             // at end of request line expect to see method, URI, version details
             // and switch to seeking Mime header section
             if (pos == reqLineEnd) {
                 expect.parserState = Http1::HTTP_PARSE_MIME;
                 expect.suffixSz = 0; // and a checkpoint buffer reset
                 expect.status = Http::scOkay;
                 expect.method = HttpRequestMethod(Http::METHOD_GET);
                 expect.uri = "http://example.com/";
                 expect.version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1);
             }
 
             // one mime header is done we are expecting a new request
             // parse results say true and initial data is all gone from the buffer
             if (pos == mimeEnd) {
                 expect.parsed = true;
                 expect.needsMore = false;
                 expect.suffixSz = 0; // and a checkpoint buffer reset
             }
 
             testResults(__LINE__, ioBuf, hp, expect);



More information about the squid-dev mailing list