1From f2a571dae7d70f7e3b59022834d8003ecd2df884 Mon Sep 17 00:00:00 2001 2From: Yves Orton <demerphq@gmail.com> 3Date: Tue, 12 Feb 2013 10:53:05 +0100 4Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH 5 after hsplit() 6 7Triggering a hsplit due to long chain length allows an attacker 8to create a carefully chosen set of keys which can cause the hash 9to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory 10exhaustion. Doing so also takes non trivial time. 11 12Eliminating this check, and only inspecting chain length after a 13normal hsplit() (triggered when keys>buckets) prevents the attack 14entirely, and makes such attacks relatively benign. 15 16(cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac) 17--- 18 ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++-- 19 hv.c | 26 ++++++-------------------- 20 t/op/hash.t | 20 +++++++++++++++++--- 21 3 files changed, 39 insertions(+), 25 deletions(-) 22 23diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t 24index 2cfb4e8..d58f053 100644 25--- a/ext/Hash-Util-FieldHash/t/10_hash.t 26+++ b/ext/Hash-Util-FieldHash/t/10_hash.t 27@@ -38,15 +38,29 @@ use constant START => "a"; 28 29 # some initial hash data 30 fieldhash my %h2; 31-%h2 = map {$_ => 1} 'a'..'cc'; 32+my $counter= "a"; 33+$h2{$counter++}++ while $counter ne 'cd'; 34 35 ok (!Internals::HvREHASH(%h2), 36 "starting with pre-populated non-pathological hash (rehash flag if off)"); 37 38 my @keys = get_keys(\%h2); 39+my $buckets= buckets(\%h2); 40 $h2{$_}++ for @keys; 41+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split 42 ok (Internals::HvREHASH(%h2), 43- scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); 44+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split"); 45+ 46+# returns the number of buckets in a hash 47+sub buckets { 48+ my $hr = shift; 49+ my $keys_buckets= scalar(%$hr); 50+ if ($keys_buckets=~m!/([0-9]+)\z!) { 51+ return 0+$1; 52+ } else { 53+ return 8; 54+ } 55+} 56 57 sub get_keys { 58 my $hr = shift; 59diff --git a/hv.c b/hv.c 60index 89c6456..8659678 100644 61--- a/hv.c 62+++ b/hv.c 63@@ -35,7 +35,8 @@ holds the key and hash value. 64 #define PERL_HASH_INTERNAL_ACCESS 65 #include "perl.h" 66 67-#define HV_MAX_LENGTH_BEFORE_SPLIT 14 68+#define HV_MAX_LENGTH_BEFORE_REHASH 14 69+#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */ 70 71 static const char S_strtab_error[] 72 = "Cannot modify shared string table in hv_%s"; 73@@ -818,23 +819,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, 74 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ 75 if (!counter) { /* initial entry? */ 76 xhv->xhv_fill++; /* HvFILL(hv)++ */ 77- } else if (xhv->xhv_keys > (IV)xhv->xhv_max) { 78+ } else if ( SHOULD_DO_HSPLIT(xhv) ) { 79 hsplit(hv); 80- } else if(!HvREHASH(hv)) { 81- U32 n_links = 1; 82- 83- while ((counter = HeNEXT(counter))) 84- n_links++; 85- 86- if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) { 87- /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit 88- bucket splits on a rehashed hash, as we're not going to 89- split it again, and if someone is lucky (evil) enough to 90- get all the keys in one list they could exhaust our memory 91- as we repeatedly double the number of buckets on every 92- entry. Linear search feels a less worse thing to do. */ 93- hsplit(hv); 94- } 95 } 96 } 97 98@@ -1180,7 +1166,7 @@ S_hsplit(pTHX_ HV *hv) 99 100 101 /* Pick your policy for "hashing isn't working" here: */ 102- if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */ 103+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked? */ 104 || HvREHASH(hv)) { 105 return; 106 } 107@@ -2551,8 +2537,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) 108 xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ 109 if (!next) { /* initial entry? */ 110 xhv->xhv_fill++; /* HvFILL(hv)++ */ 111- } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) { 112- hsplit(PL_strtab); 113+ } else if ( SHOULD_DO_HSPLIT(xhv) ) { 114+ hsplit(PL_strtab); 115 } 116 } 117 118diff --git a/t/op/hash.t b/t/op/hash.t 119index 9bde518..45eb782 100644 120--- a/t/op/hash.t 121+++ b/t/op/hash.t 122@@ -39,22 +39,36 @@ use constant THRESHOLD => 14; 123 use constant START => "a"; 124 125 # some initial hash data 126-my %h2 = map {$_ => 1} 'a'..'cc'; 127+my %h2; 128+my $counter= "a"; 129+$h2{$counter++}++ while $counter ne 'cd'; 130 131 ok (!Internals::HvREHASH(%h2), 132 "starting with pre-populated non-pathological hash (rehash flag if off)"); 133 134 my @keys = get_keys(\%h2); 135+my $buckets= buckets(\%h2); 136 $h2{$_}++ for @keys; 137+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split 138 ok (Internals::HvREHASH(%h2), 139- scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); 140+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split"); 141+ 142+# returns the number of buckets in a hash 143+sub buckets { 144+ my $hr = shift; 145+ my $keys_buckets= scalar(%$hr); 146+ if ($keys_buckets=~m!/([0-9]+)\z!) { 147+ return 0+$1; 148+ } else { 149+ return 8; 150+ } 151+} 152 153 sub get_keys { 154 my $hr = shift; 155 156 # the minimum of bits required to mount the attack on a hash 157 my $min_bits = log(THRESHOLD)/log(2); 158- 159 # if the hash has already been populated with a significant amount 160 # of entries the number of mask bits can be higher 161 my $keys = scalar keys %$hr; 162-- 1631.8.1.3 164 165