1From f2a571dae7d70f7e3b59022834d8003ecd2df884 Mon Sep 17 00:00:00 2001
2From: Yves Orton <demerphq@gmail.com>
3Date: Tue, 12 Feb 2013 10:53:05 +0100
4Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH
5 after hsplit()
6
7Triggering a hsplit due to long chain length allows an attacker
8to create a carefully chosen set of keys which can cause the hash
9to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory
10exhaustion. Doing so also takes non trivial time.
11
12Eliminating this check, and only inspecting chain length after a
13normal hsplit() (triggered when keys>buckets) prevents the attack
14entirely, and makes such attacks relatively benign.
15
16(cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac)
17---
18 ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++--
19 hv.c                                | 26 ++++++--------------------
20 t/op/hash.t                         | 20 +++++++++++++++++---
21 3 files changed, 39 insertions(+), 25 deletions(-)
22
23diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
24index 2cfb4e8..d58f053 100644
25--- a/ext/Hash-Util-FieldHash/t/10_hash.t
26+++ b/ext/Hash-Util-FieldHash/t/10_hash.t
27@@ -38,15 +38,29 @@ use constant START     => "a";
28 
29 # some initial hash data
30 fieldhash my %h2;
31-%h2 = map {$_ => 1} 'a'..'cc';
32+my $counter= "a";
33+$h2{$counter++}++ while $counter ne 'cd';
34 
35 ok (!Internals::HvREHASH(%h2), 
36     "starting with pre-populated non-pathological hash (rehash flag if off)");
37 
38 my @keys = get_keys(\%h2);
39+my $buckets= buckets(\%h2);
40 $h2{$_}++ for @keys;
41+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
42 ok (Internals::HvREHASH(%h2), 
43-    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
44+    scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
45+
46+# returns the number of buckets in a hash
47+sub buckets {
48+    my $hr = shift;
49+    my $keys_buckets= scalar(%$hr);
50+    if ($keys_buckets=~m!/([0-9]+)\z!) {
51+        return 0+$1;
52+    } else {
53+        return 8;
54+    }
55+}
56 
57 sub get_keys {
58     my $hr = shift;
59diff --git a/hv.c b/hv.c
60index 89c6456..8659678 100644
61--- a/hv.c
62+++ b/hv.c
63@@ -35,7 +35,8 @@ holds the key and hash value.
64 #define PERL_HASH_INTERNAL_ACCESS
65 #include "perl.h"
66 
67-#define HV_MAX_LENGTH_BEFORE_SPLIT 14
68+#define HV_MAX_LENGTH_BEFORE_REHASH 14
69+#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
70 
71 static const char S_strtab_error[]
72     = "Cannot modify shared string table in hv_%s";
73@@ -818,23 +819,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
74 	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
75 	if (!counter) {				/* initial entry? */
76 	    xhv->xhv_fill++; /* HvFILL(hv)++ */
77-	} else if (xhv->xhv_keys > (IV)xhv->xhv_max) {
78+	} else if ( SHOULD_DO_HSPLIT(xhv) ) {
79 	    hsplit(hv);
80-	} else if(!HvREHASH(hv)) {
81-	    U32 n_links = 1;
82-
83-	    while ((counter = HeNEXT(counter)))
84-		n_links++;
85-
86-	    if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) {
87-		/* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit
88-		   bucket splits on a rehashed hash, as we're not going to
89-		   split it again, and if someone is lucky (evil) enough to
90-		   get all the keys in one list they could exhaust our memory
91-		   as we repeatedly double the number of buckets on every
92-		   entry. Linear search feels a less worse thing to do.  */
93-		hsplit(hv);
94-	    }
95 	}
96     }
97 
98@@ -1180,7 +1166,7 @@ S_hsplit(pTHX_ HV *hv)
99 
100 
101     /* Pick your policy for "hashing isn't working" here:  */
102-    if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked?  */
103+    if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked?  */
104 	|| HvREHASH(hv)) {
105 	return;
106     }
107@@ -2551,8 +2537,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
108 	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
109 	if (!next) {			/* initial entry? */
110 	    xhv->xhv_fill++; /* HvFILL(hv)++ */
111-	} else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
112-		hsplit(PL_strtab);
113+	} else if ( SHOULD_DO_HSPLIT(xhv) ) {
114+            hsplit(PL_strtab);
115 	}
116     }
117 
118diff --git a/t/op/hash.t b/t/op/hash.t
119index 9bde518..45eb782 100644
120--- a/t/op/hash.t
121+++ b/t/op/hash.t
122@@ -39,22 +39,36 @@ use constant THRESHOLD => 14;
123 use constant START     => "a";
124 
125 # some initial hash data
126-my %h2 = map {$_ => 1} 'a'..'cc';
127+my %h2;
128+my $counter= "a";
129+$h2{$counter++}++ while $counter ne 'cd';
130 
131 ok (!Internals::HvREHASH(%h2), 
132     "starting with pre-populated non-pathological hash (rehash flag if off)");
133 
134 my @keys = get_keys(\%h2);
135+my $buckets= buckets(\%h2);
136 $h2{$_}++ for @keys;
137+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
138 ok (Internals::HvREHASH(%h2), 
139-    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
140+    scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
141+
142+# returns the number of buckets in a hash
143+sub buckets {
144+    my $hr = shift;
145+    my $keys_buckets= scalar(%$hr);
146+    if ($keys_buckets=~m!/([0-9]+)\z!) {
147+        return 0+$1;
148+    } else {
149+        return 8;
150+    }
151+}
152 
153 sub get_keys {
154     my $hr = shift;
155 
156     # the minimum of bits required to mount the attack on a hash
157     my $min_bits = log(THRESHOLD)/log(2);
158-
159     # if the hash has already been populated with a significant amount
160     # of entries the number of mask bits can be higher
161     my $keys = scalar keys %$hr;
162-- 
1631.8.1.3
164
165