[squid-dev] [PATCH] implement RFC3986

Amos Jeffries squid3 at treenet.co.nz
Wed Feb 10 15:59:55 UTC 2016


This is the updated patch implementing RFC 3986 URI coding ('URL-escaping').

So far all the audit suggestions except Tokenizer usage have been
implemented. Tokenizer is omitted because we still want helpers to be
able to instantiate these templates functions with std::string.

Amos
-------------- next part --------------
=== modified file 'src/Makefile.am'
--- src/Makefile.am	2016-02-09 08:57:33 +0000
+++ src/Makefile.am	2016-02-10 14:53:12 +0000
@@ -3819,6 +3819,27 @@
 	$(XTRA_LIBS)
 tests_testYesNoNone_LDFLAGS = $(LIBADD_DL)
 
+check_PROGRAMS += tests/testRFC3986
+tests_testRFC3986_SOURCES= \
+	tests/stub_debug.cc \
+	tests/stub_libmem.cc \
+	tests/stub_SBufDetailedStats.cc \
+	tests/testRFC3986.h \
+	tests/testRFC3986.cc
+nodist_tests_testRFC3986_SOURCES= \
+	$(SBUF_SOURCE) \
+	String.cc \
+	$(TESTSOURCES)
+tests_testRFC3986_LDADD= \
+	anyp/libanyp.la \
+	base/libbase.la \
+	$(top_builddir)/lib/libmiscencoding.la \
+	$(COMPAT_LIB) \
+	$(SQUID_CPPUNIT_LA) \
+	$(SQUID_CPPUNIT_LIBS) \
+	$(XTRA_LIBS)
+tests_testRFC3986_LDFLAGS= $(LIBADD_DL)
+
 TESTS += testHeaders
 
 ## Special Universal .h dependency test script

=== modified file 'src/SBuf.h'
--- src/SBuf.h	2016-02-08 11:22:48 +0000
+++ src/SBuf.h	2016-02-10 14:53:46 +0000
@@ -234,6 +234,7 @@
 
     /// Append a single character. The character may be NUL (\0).
     SBuf& append(const char c);
+    SBuf& push_back(const char c) {return append(c);}
 
     /** Append operation for C-style strings.
      *

=== modified file 'src/anyp/Makefile.am'
--- src/anyp/Makefile.am	2016-01-01 00:12:18 +0000
+++ src/anyp/Makefile.am	2016-02-10 15:02:15 +0000
@@ -17,6 +17,8 @@
 	ProtocolType.cc \
 	ProtocolType.h \
 	ProtocolVersion.h \
+	Rfc3986.cc \
+	Rfc3986.h \
 	TrafficMode.h \
 	UriScheme.cc \
 	UriScheme.h

=== added file 'src/anyp/Rfc3986.cc'
--- src/anyp/Rfc3986.cc	1970-01-01 00:00:00 +0000
+++ src/anyp/Rfc3986.cc	2016-02-10 15:02:22 +0000
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "anyp/Rfc3986.h"
+
+const CharacterSet
+Rfc1738::Unsafe("rfc1738:unsafe", "<>\"# %{}|\\^~[]`'"),
+Rfc1738::Ctrls("rfc1738:ctrls", {{0x00, 0x1f}, {0x7f,0xff}}),
+Rfc1738::Reserved("rfc1738:reserved", ";/?:@=&"),
+Rfc1738::UnsafeAndCtrls = Rfc1738::Unsafe + Rfc1738::Ctrls,
+         Rfc1738::Unescaped = (Rfc1738::UnsafeAndCtrls - CharacterSet(nullptr,"%") ).rename("rfc1738:unescaped")
+                              ;
+
+const CharacterSet
+Rfc3986::GenDelims("rfc3986:gen-delims",":/?#[]@"),
+        Rfc3986::SubDelims("rfc3986:sub-delims","!$&'()*+,;="),
+        Rfc3986::Reserved = (Rfc3986::GenDelims + Rfc3986::SubDelims).rename("rfc3986:reserved"),
+                 Rfc3986::Unreserved = CharacterSet("rfc3986:unreserved","-._~") +
+                                       CharacterSet::ALPHA + CharacterSet::DIGIT,
+                                       Rfc3986::All = (Rfc1738::UnsafeAndCtrls + Rfc3986::Reserved).rename("rfc3986:all")
+                                               ;
+
+const char *Rfc1738::toHexTable[256] = {
+    "00", "01", "02", "03", "04", "05", "06", "07",
+    "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
+    "10", "11", "12", "13", "14", "15", "16", "17",
+    "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
+    "20", "21", "22", "23", "24", "25", "26", "27",
+    "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
+    "30", "31", "32", "33", "34", "35", "36", "37",
+    "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
+    "40", "41", "42", "43", "44", "45", "46", "47",
+    "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
+    "50", "51", "52", "53", "54", "55", "56", "57",
+    "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
+    "60", "61", "62", "63", "64", "65", "66", "67",
+    "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
+    "70", "71", "72", "73", "74", "75", "76", "77",
+    "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
+    "80", "81", "82", "83", "84", "85", "86", "87",
+    "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
+    "90", "91", "92", "93", "94", "95", "96", "97",
+    "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
+    "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7",
+    "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
+    "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7",
+    "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
+    "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7",
+    "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
+    "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7",
+    "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
+    "E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7",
+    "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
+    "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
+    "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF"
+};
+
+const int16_t Rfc1738::fromHexTable[256] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+};
+

=== added file 'src/anyp/Rfc3986.h'
--- src/anyp/Rfc3986.h	1970-01-01 00:00:00 +0000
+++ src/anyp/Rfc3986.h	2016-02-10 15:02:24 +0000
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_SRC_ANYP_RFC3986_H
+#define SQUID_SRC_ANYP_RFC3986_H
+
+#include "base/CharacterSet.h"
+
+/// RFC 1738 symbol and charset definitions
+namespace Rfc1738
+{
+
+extern const CharacterSet
+Unsafe,         // RFC 1738 unsafe set
+Ctrls,          // CTL characters (\0x00 to \0x1f)
+UnsafeAndCtrls, // RFC 1738 Unsafe and Ctrls
+Unescaped,      // ctrls and unsafe (except for percent symbol)
+Reserved;       // RFC 1738 Reserved set
+
+extern const char *toHexTable[256];
+extern const int16_t fromHexTable[256];
+
+/// \return the numeric representation of the HEXDIG argument ch, or -1 if invalid.
+inline const int16_t
+FromHex(unsigned char ch)
+{
+    // no need to check bounds, the lookup table has 256 entries
+    return fromHexTable[ch];
+}
+
+/// \return a static 2-char zero-terminated buffer with a HEXDIG
+///         representation of argument c
+inline const char*
+ToHex(const unsigned char c)
+{
+    // no need to check bounds, the lookup table has 256 entries
+    return toHexTable[c];
+}
+
+} // namespace Rfc1738
+
+/// RFC 3986 symbol and charset definitions
+namespace Rfc3986
+{
+
+extern const CharacterSet
+GenDelims,// RFC 3986 gen-delims set
+SubDelims,// RFC 3986 sub-delims set
+Reserved, // RFC 3986 reserved characters set
+Unreserved, // RFC 3986 unreserved characters set
+All;
+
+template <class Str>
+Str
+Escape(const Str &s, const CharacterSet &escapeChars = Rfc1738::UnsafeAndCtrls)
+{
+    Str rv;
+    bool didEscape = false;
+    // XXX: SBuf lacking reserve(N)
+    // rv.reserve(s.length()*2); //TODO: optimize arbitrary constant
+    for (const auto c : s) {
+        if (escapeChars[c]) {
+            rv.push_back('%');
+            const char *hex = Rfc1738::ToHex(c);
+            rv.push_back(hex[0]);
+            rv.push_back(hex[1]);
+            didEscape = true;
+        } else {
+            rv.push_back(c);
+        }
+    }
+    if (didEscape)
+        return rv;
+    else
+        return s;
+}
+
+/** unescape a percent-encoded string
+ *
+ * API-compatible with std::string and SBuf
+ */
+template <class Str>
+Str
+Unescape(const Str &s)
+{
+    typename Str::size_type pos=s.find('%');
+    if (pos == Str::npos)
+        return s;
+    Str rv;
+    // rv.reserve(s.length()); // XXX: SBuf lacking reserve(N)
+    const auto e = s.end();
+    for (auto in = s.begin(); in != e; ++in) {
+        if (*in != '%') { // normal case, copy and continue
+            rv.push_back(*in);
+            continue;
+        }
+        auto ti = in;
+        ++ti;
+        if (ti == e) { // String ends in %
+            rv.push_back(*in);
+            break;
+        }
+        if (*ti == '%') { //double '%' escaping
+            rv.push_back(*in);
+            ++in;
+            continue;
+        }
+        const int v1 = Rfc1738::FromHex(*ti);
+        if (v1 < 0) { // decoding failed at first hextdigit
+            rv.push_back(*in);
+            continue;
+        }
+        ++ti;
+        if (ti == e) { // String ends in '%[[:hexdigit:]]'
+            rv.push_back(*in);
+            continue;
+        }
+        const int v2 = Rfc1738::FromHex(*ti);
+        if (v2 < 0) { // decoding failed at second hextdigit
+            rv.push_back(*in);
+            continue;
+        }
+        const int x = v1 << 4 | v2;
+        if (x > 0 && x <= 255) {
+            rv.push_back(static_cast<char>(x));
+            ++in;
+            ++in;
+            continue;
+        }
+        rv.push_back(*in);
+    }
+    return rv;
+}
+
+} // namespace Rfc3986
+
+#endif /* SQUID_SRC_ANYP_RFC3986_H */
+

=== added file 'src/tests/testRFC3986.cc'
--- src/tests/testRFC3986.cc	1970-01-01 00:00:00 +0000
+++ src/tests/testRFC3986.cc	2016-02-10 15:30:17 +0000
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "anyp/Rfc3986.h"
+#include "rfc1738.h"
+#include "SBuf.h"
+#include "testRFC3986.h"
+#include "unitTestMain.h"
+
+#include <cassert>
+
+CPPUNIT_TEST_SUITE_REGISTRATION( testRFC3986 );
+
+static void
+performDecodingTest(const std::string &encoded_str, const std::string &plaintext_str)
+{
+    std::string decoded_str = Rfc3986::Unescape(encoded_str);
+    CPPUNIT_ASSERT_EQUAL(plaintext_str, decoded_str);
+
+    SBuf encoded_sbuf(encoded_str);
+    SBuf plaintext_sbuf(plaintext_str);
+    SBuf decoded_sbuf = Rfc3986::Unescape(encoded_sbuf);
+    CPPUNIT_ASSERT_EQUAL(plaintext_sbuf, decoded_sbuf);
+}
+
+/* Regular Format de-coding tests */
+void testRFC3986::testUrlDecode()
+{
+    performDecodingTest("%2Fdata%2Fsource%2Fpath","/data/source/path");
+    performDecodingTest("http://foo.invalid%2Fdata%2Fsource%2Fpath",
+                        "http://foo.invalid/data/source/path");
+    // TODO query string
+
+    performDecodingTest("1 w%0Ard","1 w\nrd"); // Newline %0A encoded
+    performDecodingTest("2 w%rd","2 w%rd"); // Un-encoded %
+    performDecodingTest("3 w%%rd","3 w%rd"); // encoded %
+    performDecodingTest("5 Bad String %1","5 Bad String %1"); // corrupt string
+    performDecodingTest("6 Bad String %1A%3","6 Bad String \032%3"); //partly corrupt string
+    performDecodingTest("7 Good String %1A","7 Good String \032"); // non corrupt string
+    //test various endings
+    performDecodingTest("8 word%","8 word%");
+    performDecodingTest("9 word%z","9 word%z");
+    performDecodingTest("10 word%1","10 word%1");
+    performDecodingTest("11 word%1q","11 word%1q");
+    performDecodingTest("12 word%1a","12 word\032");
+}
+
+// perform a test for std::string, SBuf and if rfc1738flag is != 0 compare
+//  against rfc1738 implementation
+static void
+performEncodingTest(const char *plaintext_str, const char *encoded_str, int rfc1738flag, const CharacterSet  &rfc3986CSet)
+{
+    CPPUNIT_ASSERT_EQUAL(std::string(encoded_str), Rfc3986::Escape(std::string(plaintext_str), rfc3986CSet));
+    CPPUNIT_ASSERT_EQUAL(SBuf(encoded_str), Rfc3986::Escape(SBuf(plaintext_str), rfc3986CSet));
+    if (!rfc1738flag)
+        return;
+    char *result = rfc1738_do_escape(plaintext_str, rfc1738flag);
+    CPPUNIT_ASSERT_EQUAL(std::string(encoded_str), std::string(result));
+}
+
+void testRFC3986::testUrlEncode()
+{
+    /* TEST: Escaping only unsafe characters */
+    performEncodingTest("http://foo.invalid/data/source/path",
+                        "http://foo.invalid/data/source/path",
+                        RFC1738_ESCAPE_UNSAFE, Rfc1738::Unsafe);
+
+    /* regular URL (no encoding needed) */
+    performEncodingTest("http://foo.invalid/data/source/path",
+                        "http://foo.invalid/data/source/path",
+                        RFC1738_ESCAPE_UNSAFE, Rfc1738::Unsafe);
+
+    /* long string of unsafe # characters */
+    performEncodingTest("################ ################ ################ ################ ################ ################ ################ ################",
+                        "%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23",
+                        RFC1738_ESCAPE_UNSAFE, Rfc1738::Unsafe);
+
+    /* TEST: escaping only reserved characters */
+
+    /* regular URL (full encoding requested) */
+    performEncodingTest("http://foo.invalid/data/source/path",
+                        "http%3A%2F%2Ffoo.invalid%2Fdata%2Fsource%2Fpath",
+                        RFC1738_ESCAPE_RESERVED, Rfc3986::Reserved);
+
+    /* regular path (encoding wanted for ALL special chars) */
+    performEncodingTest("/data/source/path",
+                        "%2Fdata%2Fsource%2Fpath",
+                        RFC1738_ESCAPE_RESERVED, Rfc3986::Reserved);
+
+    /* TEST: safety-escaping a string already partially escaped */
+
+    /* escaping of dangerous characters in a partially escaped string */
+    performEncodingTest("http://foo.invalid/data%2Fsource[]",
+                        "http://foo.invalid/data%2Fsource%5B%5D",
+                        RFC1738_ESCAPE_UNESCAPED, Rfc1738::Unescaped);
+
+    /* escaping of hexadecimal 0xFF characters in a partially escaped string */
+    performEncodingTest("http://foo.invalid/data%2Fsource\xFF\xFF",
+                        "http://foo.invalid/data%2Fsource%FF%FF",
+                        RFC1738_ESCAPE_UNESCAPED, Rfc1738::Unescaped);
+}
+
+/** SECURITY BUG TESTS: avoid null truncation attacks by skipping %00 bytes */
+void testRFC3986::PercentZeroNullDecoding()
+{
+    /* Attack with %00 encoded NULL */
+    performDecodingTest("w%00rd", "w%00rd");
+
+    /* Attack with %0 encoded NULL */
+    performDecodingTest("w%0rd", "w%0rd");
+
+    /* Handle '0' bytes embeded in encoded % */
+    performDecodingTest("w%%00%rd", "w%00%rd");
+
+    /* Handle NULL bytes with encoded % */
+    performDecodingTest("w%%%00%rd", "w%%00%rd");
+}
+

=== added file 'src/tests/testRFC3986.h'
--- src/tests/testRFC3986.h	1970-01-01 00:00:00 +0000
+++ src/tests/testRFC3986.h	2016-02-10 15:43:56 +0000
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_LIB_TEST_RFC3986_H
+#define SQUID_LIB_TEST_RFC3986_H
+
+#include <cppunit/extensions/HelperMacros.h>
+
+/**
+ * Test the URL coder RFC 3986 Engine
+ */
+class testRFC3986 : public CPPUNIT_NS::TestFixture
+{
+    CPPUNIT_TEST_SUITE( testRFC3986 );
+    CPPUNIT_TEST( testUrlDecode );
+    CPPUNIT_TEST( testUrlEncode );
+    CPPUNIT_TEST( PercentZeroNullDecoding );
+    CPPUNIT_TEST_SUITE_END();
+
+protected:
+    void testUrlDecode();
+    void testUrlEncode();
+
+    // bugs.
+    void PercentZeroNullDecoding();
+};
+
+#endif /* SQUID_LIB_TEST_RFC3986_H */
+



More information about the squid-dev mailing list