[squid-dev] [PATCH Bug 4534 and N-bit fixes for CacheDigest

Amos Jeffries squid3 at treenet.co.nz
Sat Jul 9 13:37:02 UTC 2016


This patch converts the CacheDigest members and method parameters to use
explicitly sized data types more appropriate for what details they hold.

* 64-bit Digest capacity (entry count)
* 32-bit Mask Size (byte count)
*  8-bit Bit count per entry

Due to various store_digest.cc code still relying on masks not exceeding
2^31-1 worth of memory space we have to still assert that bitCount
calculation does not exceed that value.

Amos

-------------- next part --------------
=== modified file 'src/CacheDigest.cc'
--- src/CacheDigest.cc	2016-01-01 00:12:18 +0000
+++ src/CacheDigest.cc	2016-07-09 08:12:56 +0000
@@ -18,89 +18,89 @@
 
 #include "CacheDigest.h"
 #include "util.h"
 
 /* local types */
 
 typedef struct {
     int bit_count;      /* total number of bits */
     int bit_on_count;       /* #bits turned on */
     int bseq_len_sum;       /* sum of all bit seq length */
     int bseq_count;     /* number of bit seqs */
 } CacheDigestStats;
 
 /* local functions */
 static void cacheDigestHashKey(const CacheDigest * cd, const cache_key * key);
 
 /* static array used by cacheDigestHashKey for optimization purposes */
 static uint32_t hashed_keys[4];
 
 void
-CacheDigest::init(int newCapacity)
+CacheDigest::init(uint64_t newCapacity)
 {
     const auto newMaskSz = CacheDigest::CalcMaskSize(newCapacity, bits_per_entry);
     assert(newCapacity > 0 && bits_per_entry > 0);
-    assert(newMaskSz > 0);
+    assert(newMaskSz != 0);
     capacity = newCapacity;
     mask_size = newMaskSz;
     mask = static_cast<char *>(xcalloc(mask_size,1));
     debugs(70, 2, "capacity: " << capacity << " entries, bpe: " << bits_per_entry << "; size: "
            << mask_size << " bytes");
 }
 
-CacheDigest::CacheDigest(int aCapacity, int bpe) :
+CacheDigest::CacheDigest(uint64_t aCapacity, uint8_t bpe) :
     mask(nullptr),
     mask_size(0),
     capacity(0),
     bits_per_entry(bpe),
     count(0),
     del_count(0)
 {
     assert(SQUID_MD5_DIGEST_LENGTH == 16);  /* our hash functions rely on 16 byte keys */
     updateCapacity(aCapacity);
 }
 
 CacheDigest::~CacheDigest()
 {
     xfree(mask);
 }
 
 CacheDigest *
 CacheDigest::clone() const
 {
     CacheDigest *cl = new CacheDigest(capacity, bits_per_entry);
     cl->count = count;
     cl->del_count = del_count;
     assert(mask_size == cl->mask_size);
     memcpy(cl->mask, mask, mask_size);
     return cl;
 }
 
 void
 CacheDigest::clear()
 {
     count = del_count = 0;
     memset(mask, 0, mask_size);
 }
 
 void
-CacheDigest::updateCapacity(int newCapacity)
+CacheDigest::updateCapacity(uint64_t newCapacity)
 {
     safe_free(mask);
     init(newCapacity); // will re-init mask and mask_size
 }
 
 bool
 CacheDigest::contains(const cache_key * key) const
 {
     assert(key);
     /* hash */
     cacheDigestHashKey(this, key);
     /* test corresponding bits */
     return
         CBIT_TEST(mask, hashed_keys[0]) &&
         CBIT_TEST(mask, hashed_keys[1]) &&
         CBIT_TEST(mask, hashed_keys[2]) &&
         CBIT_TEST(mask, hashed_keys[3]);
 }
 
 void
@@ -244,64 +244,65 @@
                       stats->falseHits, xpercent(stats->falseHits, tot_count),
                       stats->falseMisses, xpercent(stats->falseMisses, tot_count),
                       false_count, xpercent(false_count, tot_count));
     storeAppendPrintf(sentry, "all\t %d\t %.2f\t %d\t %.2f\t %d\t %.2f\n",
                       hit_count, xpercent(hit_count, tot_count),
                       miss_count, xpercent(miss_count, tot_count),
                       tot_count, xpercent(tot_count, tot_count));
     storeAppendPrintf(sentry, "\tclose_hits: %d ( %d%%) /* cd said hit, doc was in the peer cache, but we got a miss */\n",
                       stats->closeHits, xpercentInt(stats->closeHits, stats->falseHits));
 }
 
 void
 cacheDigestReport(CacheDigest * cd, const char *label, StoreEntry * e)
 {
     CacheDigestStats stats;
     assert(cd && e);
     cacheDigestStats(cd, &stats);
     storeAppendPrintf(e, "%s digest: size: %d bytes\n",
                       label ? label : "", stats.bit_count / 8
                      );
-    storeAppendPrintf(e, "\t entries: count: %d capacity: %d util: %d%%\n",
+    storeAppendPrintf(e, "\t entries: count: %d capacity: %" PRIu64 " util: %d%%\n",
                       cd->count,
                       cd->capacity,
                       xpercentInt(cd->count, cd->capacity)
                      );
     storeAppendPrintf(e, "\t deletion attempts: %d\n",
                       cd->del_count
                      );
     storeAppendPrintf(e, "\t bits: per entry: %d on: %d capacity: %d util: %d%%\n",
                       cd->bits_per_entry,
                       stats.bit_on_count, stats.bit_count,
                       xpercentInt(stats.bit_on_count, stats.bit_count)
                      );
     storeAppendPrintf(e, "\t bit-seq: count: %d avg.len: %.2f\n",
                       stats.bseq_count,
                       xdiv(stats.bseq_len_sum, stats.bseq_count)
                      );
 }
 
-size_t
-CacheDigest::CalcMaskSize(int cap, int bpe)
+uint32_t
+CacheDigest::CalcMaskSize(uint64_t cap, uint8_t bpe)
 {
-    // XXX: might 32-bit overflow during multiply
-    return (size_t) (cap * bpe + 7) / 8;
+    uint64_t bitCount = (cap * bpe) + 7;
+    assert(bitCount < INT_MAX); // dont 31-bit overflow later
+    return static_cast<uint32_t>(bitCount / 8);
 }
 
 static void
 cacheDigestHashKey(const CacheDigest * cd, const cache_key * key)
 {
-    const unsigned int bit_count = cd->mask_size * 8;
+    const uint32_t bit_count = cd->mask_size * 8;
     unsigned int tmp_keys[4];
     /* we must memcpy to ensure alignment */
     memcpy(tmp_keys, key, sizeof(tmp_keys));
     hashed_keys[0] = htonl(tmp_keys[0]) % bit_count;
     hashed_keys[1] = htonl(tmp_keys[1]) % bit_count;
     hashed_keys[2] = htonl(tmp_keys[2]) % bit_count;
     hashed_keys[3] = htonl(tmp_keys[3]) % bit_count;
     debugs(70, 9, "cacheDigestHashKey: " << storeKeyText(key) << " -(" <<
            bit_count << ")-> " << hashed_keys[0] << " " << hashed_keys[1] <<
            " " << hashed_keys[2] << " " << hashed_keys[3]);
 }
 
 #endif
 

=== modified file 'src/CacheDigest.h'
--- src/CacheDigest.h	2016-01-01 00:12:18 +0000
+++ src/CacheDigest.h	2016-07-09 05:33:28 +0000
@@ -4,65 +4,65 @@
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 /* DEBUG: section 70    Cache Digest */
 
 #ifndef SQUID_CACHEDIGEST_H_
 #define SQUID_CACHEDIGEST_H_
 
 #include "mem/forward.h"
 #include "store_key_md5.h"
 
 class CacheDigestGuessStats;
 class StoreEntry;
 
 class CacheDigest
 {
     MEMPROXY_CLASS(CacheDigest);
 public:
-    CacheDigest(int capacity, int bpe);
+    CacheDigest(uint64_t capacity, uint8_t bpe);
     ~CacheDigest();
 
     // NP: only used by broken unit-test
     /// produce a new identical copy of the digest object
     CacheDigest *clone() const;
 
     /// reset the digest mask and counters
     void clear();
 
     /// changes mask size to fit newCapacity, resets bits to 0
-    void updateCapacity(int newCapacity);
+    void updateCapacity(uint64_t newCapacity);
 
     void add(const cache_key * key);
     void remove(const cache_key * key);
 
     /// \returns true if the key belongs to the digest
     bool contains(const cache_key * key) const;
 
     /// percentage of mask bits which are used
     double usedMaskPercent() const;
 
     /// calculate the size of mask required to digest up to
     /// a specified capacity and bitsize.
-    static size_t CalcMaskSize(int cap, int bpe);
+    static uint32_t CalcMaskSize(uint64_t cap, uint8_t bpe);
 
 private:
-    void init(int newCapacity);
+    void init(uint64_t newCapacity);
 
 public:
     /* public, read-only */
     char *mask;         /* bit mask */
-    int mask_size;      /* mask size in bytes */
-    int capacity;       /* expected maximum for .count, not a hard limit */
-    int bits_per_entry;     /* number of bits allocated for each entry from capacity */
+    uint32_t mask_size; /* mask size in bytes */
+    uint64_t capacity;       /* expected maximum for .count, not a hard limit */
+    int8_t bits_per_entry;     /* number of bits allocated for each entry from capacity */
     int count;          /* number of digested entries */
     int del_count;      /* number of deletions performed so far */
 };
 
 void cacheDigestGuessStatsUpdate(CacheDigestGuessStats * stats, int real_hit, int guess_hit);
 void cacheDigestGuessStatsReport(const CacheDigestGuessStats * stats, StoreEntry * sentry, const char *label);
 void cacheDigestReport(CacheDigest * cd, const char *label, StoreEntry * e);
 
 #endif /* SQUID_CACHEDIGEST_H_ */
 

=== modified file 'src/PeerDigest.h'
--- src/PeerDigest.h	2016-01-01 00:12:18 +0000
+++ src/PeerDigest.h	2016-07-09 04:03:58 +0000
@@ -39,41 +39,41 @@
 
 class HttpRequest;
 class PeerDigest;
 class store_client;
 
 class DigestFetchState
 {
     CBDATA_CLASS(DigestFetchState);
 
 public:
     DigestFetchState(PeerDigest *,HttpRequest *);
     ~DigestFetchState();
 
     PeerDigest *pd;
     StoreEntry *entry;
     StoreEntry *old_entry;
     store_client *sc;
     store_client *old_sc;
     HttpRequest *request;
     int offset;
-    int mask_offset;
+    uint32_t mask_offset;
     time_t start_time;
     time_t resp_time;
     time_t expires;
 
     struct {
         int msg;
         int bytes;
     } sent, recv;
 
     char buf[SM_PAGE_SIZE];
     ssize_t bufofs;
     digest_read_state_t state;
 };
 
 class PeerDigest
 {
     CBDATA_CLASS(PeerDigest);
 
 public:
     CachePeer *peer;          /**< pointer back to peer structure, argh */

=== modified file 'src/peer_digest.cc'
--- src/peer_digest.cc	2016-03-11 18:00:51 +0000
+++ src/peer_digest.cc	2016-07-09 04:07:04 +0000
@@ -729,41 +729,41 @@
     debugs(72, 6, step_name << ": peer " << host << ", offset: " <<
            fetch->offset << " size: " << size << ".");
 
     /* continue checking (with pd and host known and valid) */
 
     if (!reason) {
         if (!cbdataReferenceValid(pd->peer))
             reason = "peer disappeared";
         else if (size < 0)
             reason = "swap failure";
         else if (!fetch->entry)
             reason = "swap aborted?!";
         else if (EBIT_TEST(fetch->entry->flags, ENTRY_ABORTED))
             reason = "swap aborted";
     }
 
     /* continue checking (maybe-successful eof case) */
     if (!reason && !size) {
         if (!pd->cd)
             reason = "null digest?!";
-        else if (fetch->mask_offset != (int)pd->cd->mask_size)
+        else if (fetch->mask_offset != pd->cd->mask_size)
             reason = "premature end of digest?!";
         else if (!peerDigestUseful(pd))
             reason = "useless digest";
         else
             reason = no_bug = "success";
     }
 
     /* finish if we have a reason */
     if (reason) {
         const int level = strstr(reason, "?!") ? 1 : 3;
         debugs(72, level, "" << step_name << ": peer " << host << ", exiting after '" << reason << "'");
         peerDigestReqFinish(fetch, buf,
                             1, pdcb_valid, pcb_valid, reason, !no_bug);
     } else {
         /* paranoid check */
         assert(pdcb_valid && pcb_valid);
     }
 
     return reason != NULL;
 }

=== modified file 'src/store_digest.cc'
--- src/store_digest.cc	2016-01-01 00:12:18 +0000
+++ src/store_digest.cc	2016-07-09 10:54:40 +0000
@@ -423,105 +423,103 @@
     sd_state.rewrite_lock = NULL;
     ++sd_state.rewrite_count;
     eventAdd("storeDigestRewriteStart", storeDigestRewriteStart, NULL, (double)
              Config.digest.rewrite_period, 1);
     /* resume pending Rebuild if any */
 
     if (sd_state.rebuild_lock)
         storeDigestRebuildResume();
 }
 
 /* swaps out one digest "chunk" per invocation; schedules next swap out */
 static void
 storeDigestSwapOutStep(void *data)
 {
     StoreEntry *e = static_cast<StoreEntry *>(data);
     int chunk_size = Config.digest.swapout_chunk_size;
     assert(e == sd_state.rewrite_lock);
     assert(e);
     /* _add_ check that nothing bad happened while we were waiting @?@ @?@ */
 
-    if (sd_state.rewrite_offset + chunk_size > store_digest->mask_size)
+    if (static_cast<uint32_t>(sd_state.rewrite_offset + chunk_size) > store_digest->mask_size)
         chunk_size = store_digest->mask_size - sd_state.rewrite_offset;
 
     e->append(store_digest->mask + sd_state.rewrite_offset, chunk_size);
 
     debugs(71, 3, "storeDigestSwapOutStep: size: " << store_digest->mask_size <<
            " offset: " << sd_state.rewrite_offset << " chunk: " <<
            chunk_size << " bytes");
 
     sd_state.rewrite_offset += chunk_size;
 
     /* are we done ? */
-    if (sd_state.rewrite_offset >= store_digest->mask_size)
+    if (static_cast<uint32_t>(sd_state.rewrite_offset) >= store_digest->mask_size)
         storeDigestRewriteFinish(e);
     else
         eventAdd("storeDigestSwapOutStep", storeDigestSwapOutStep, data, 0.0, 1, false);
 }
 
 static void
 storeDigestCBlockSwapOut(StoreEntry * e)
 {
     memset(&sd_state.cblock, 0, sizeof(sd_state.cblock));
     sd_state.cblock.ver.current = htons(CacheDigestVer.current);
     sd_state.cblock.ver.required = htons(CacheDigestVer.required);
     sd_state.cblock.capacity = htonl(store_digest->capacity);
     sd_state.cblock.count = htonl(store_digest->count);
     sd_state.cblock.del_count = htonl(store_digest->del_count);
     sd_state.cblock.mask_size = htonl(store_digest->mask_size);
-    sd_state.cblock.bits_per_entry = (unsigned char)
-                                     Config.digest.bits_per_entry;
+    sd_state.cblock.bits_per_entry = Config.digest.bits_per_entry;
     sd_state.cblock.hash_func_count = (unsigned char) CacheDigestHashFuncCount;
     e->append((char *) &sd_state.cblock, sizeof(sd_state.cblock));
 }
 
 /* calculates digest capacity */
 static int
 storeDigestCalcCap(void)
 {
     /*
      * To-Do: Bloom proved that the optimal filter utilization is 50% (half of
      * the bits are off). However, we do not have a formula to calculate the
      * number of _entries_ we want to pre-allocate for.
      */
     const int hi_cap = Store::Root().maxSize() / Config.Store.avgObjectSize;
     const int lo_cap = 1 + Store::Root().currentSize() / Config.Store.avgObjectSize;
     const int e_count = StoreEntry::inUseCount();
     int cap = e_count ? e_count :hi_cap;
     debugs(71, 2, "storeDigestCalcCap: have: " << e_count << ", want " << cap <<
            " entries; limits: [" << lo_cap << ", " << hi_cap << "]");
 
     if (cap < lo_cap)
         cap = lo_cap;
 
     /* do not enforce hi_cap limit, average-based estimation may be wrong
      *if (cap > hi_cap)
      *  cap = hi_cap;
      */
     return cap;
 }
 
 /* returns true if we actually resized the digest */
 static int
 storeDigestResize(void)
 {
     const int cap = storeDigestCalcCap();
-    int diff;
     assert(store_digest);
-    diff = abs(cap - store_digest->capacity);
+    uint64_t diff = abs(cap - store_digest->capacity);
     debugs(71, 2, "storeDigestResize: " <<
            store_digest->capacity << " -> " << cap << "; change: " <<
            diff << " (" << xpercentInt(diff, store_digest->capacity) << "%)" );
     /* avoid minor adjustments */
 
     if (diff <= store_digest->capacity / 10) {
         debugs(71, 2, "storeDigestResize: small change, will not resize.");
         return 0;
     } else {
         debugs(71, 2, "storeDigestResize: big change, resizing.");
         store_digest->updateCapacity(cap);
         return 1;
     }
 }
 
 #endif /* USE_CACHE_DIGESTS */
 

=== modified file 'src/tests/stub_CacheDigest.cc'
--- src/tests/stub_CacheDigest.cc	2016-01-01 00:12:18 +0000
+++ src/tests/stub_CacheDigest.cc	2016-07-09 10:24:06 +0000
@@ -1,33 +1,33 @@
 /*
  * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
 
 #include "squid.h"
 #include "store_key_md5.h"
 
 #define STUB_API "CacheDigest.cc"
 #include "tests/STUB.h"
 
 class CacheDigest;
 class CacheDigestGuessStats;
 class StoreEntry;
 
 #include "CacheDigest.h"
-CacheDigest::CacheDigest(int, int) {STUB}
+CacheDigest::CacheDigest(uint64_t, uint8_t) {STUB}
 CacheDigest::~CacheDigest() {STUB}
 CacheDigest *CacheDigest::clone() const STUB_RETVAL(nullptr)
 void CacheDigest::clear() STUB
-void CacheDigest::updateCapacity(int) STUB
+void CacheDigest::updateCapacity(uint64_t) STUB
 bool CacheDigest::contains(const cache_key *) const STUB_RETVAL(false)
 void CacheDigest::add(const cache_key *) STUB
 void CacheDigest::remove(const cache_key *) STUB
 double CacheDigest::usedMaskPercent() const STUB_RETVAL(0.0)
 void cacheDigestGuessStatsUpdate(CacheDigestGuessStats *, int, int) STUB
 void cacheDigestGuessStatsReport(const CacheDigestGuessStats *, StoreEntry *, const char *) STUB
 void cacheDigestReport(CacheDigest *, const char *, StoreEntry *) STUB
-size_t CacheDigest::CalcMaskSize(int, int) STUB_RETVAL(1)
+uint32_t CacheDigest::CalcMaskSize(uint64_t, uint8_t) STUB_RETVAL(1)
 



More information about the squid-dev mailing list