1#!./perl
2
3# Test that setting PERL_HASH_SEED and PERL_PERTURB_KEYS in different
4# combinations works as expected, and that changing the values provided
5# produces the expected results
6#
7# We do this by first executing Perl with a given PERL_PERTURB_KEYS
8# mode, and then extract the randomly chosen PERL_HASH_SEED it ran under
9# from its debug output which was printed to STDERR, and then use it for
10# further tests. This allows the tests to be robust to the choice of hash
11# function and seed sizes that might be in use in the perl being tested.
12# We do not ask perl to output any keys on this run, as our subsequent
13# runs will use different environment variables (specifically
14# PERL_HASH_SEED) which will change any key order results we see.
15#
16# We then execute perl a further three times and ask perl to build a
17# hash with a specific number of buckets and a specific set of keys. We
18# then have perl print the raw keys to STDOUT.
19#
20# For two of these three runs we supply the same seed, and both of those
21# times we supply the same perturb mode, but in different ways, once as
22# a name and once as a digit. The debug output should be identical in
23# both cases regardless of mode. For PERL_PERTURB_KEYS mode 0=NO, and
24# 2=DETERMINISTIC the key order should match. For mode 1=RANDOM the key
25# order should differ the vast majority of the time, however the test is
26# probabilistic and occasionally may result in the same key order.
27#
28# The third run we supply a different seed, with a 1 bit difference, but
29# with the same PERL_PERTURB_KEYS mode. In this case we expect the key
30# order to differ for all three modes, but again the test is
31# probabilistic and we may get the same key order in a small percentage
32# of the times we try this.
33#
34# To address the probabilistic nature of these tests we run them
35# multiple times and count how many times we get the same key order.
36# Most times this should be zero, but occasionally it might be higher.
37# Therefore we use a threshold $allowed_fails to determine how many
38# times the key order may be unchanged before we consider the tests
39# actually failed. We also use a largish number of keys in a hash with
40# a large number of buckets, which means we produce a lot a large temp
41# files as we test, so we aggressively clean them up as we go.
42
43
44BEGIN {
45    chdir 't' if -d 't';
46    @INC = '../lib';
47    require './test.pl';
48    require Config;
49    Config->import;
50}
51
52skip_all_without_config('d_fork');
53skip_all("NO_PERL_HASH_ENV or NO_PERL_HASH_SEED_DEBUG set")
54    if $Config{ccflags} =~ /-DNO_PERL_HASH_ENV\b/
55    || $Config{ccflags} =~ /-DNO_PERL_HASH_SEED_DEBUG\b/;
56use strict;
57use warnings;
58
59# enable DEBUG_RUNENV if you want to see what is being returned
60# by the executed perl.
61sub my_runperl {
62    my ($cmd_array, $perturb, $set_seed) = @_;
63    my $opts_hash= {
64        PERL_HASH_SEED_DEBUG => 1,
65        PERL_PERTURB_KEYS => $perturb
66    };
67    $opts_hash->{PERL_HASH_SEED}= $set_seed if $set_seed;
68
69    my ( $out, $err )
70        = runperl_and_capture( $opts_hash, $cmd_array );
71    my @err= split /\n/, $err;
72
73    my $seed;
74    my $mode_name;
75    my $mode_digit;
76    my @err_got_data;
77    my @rand_bits;
78    foreach my $line (@err) {
79        if ($line=~/^Got.*/) {
80            push @err_got_data, $line;
81        }
82        elsif ($line=~/^PL_hash_rand_bits=.*/) {
83            push @rand_bits, $line;
84        }
85        elsif ($line=~/HASH_SEED = (0x[a-f0-9]+)/) {
86            $seed= $1;
87            $line =~ /PERTURB_KEYS = (\d) \((\w+)\)/
88                or die "Failed to extract perturb mode: $err";
89            $mode_digit = $1;
90            $mode_name = $2;
91
92        }
93    }
94    if (!$seed){
95        die "Failed to extract seed: $err";
96    }
97    my $err_got_data= join("\n",@err_got_data);
98    return ($seed, $mode_digit, $mode_name, $out, $err_got_data, \@rand_bits);
99}
100
101my @mode_names = (
102    'NO',            # 0
103    'RANDOM',        # 1
104    'DETERMINISTIC', # 2
105);
106
107my $repeat = 50; # if this changes adjust the comments below.
108my $min_buckets = 100_000;
109my $actual_buckets = 1;
110$actual_buckets *= 2 while $actual_buckets <= $min_buckets;
111my $key_expr = '0..999, "aa".."zz", map { $_ x 30 } "a".."z"'; #1702 keys
112my @keys = eval $key_expr
113    or die "bad '$key_expr': $@";
114my $allowed_fails = 2; # Adjust this up to make the test tolerate
115                       # more "errors". Maybe one day we will compute
116                       # it from the value of $repeat, and $actual_buckets
117                       # and the number of @keys.
118
119plan tests => (4 * $repeat)     # DETERMINISTIC
120            + (1 * $repeat)     # NO
121            + 1                 # RANDOM mode
122            + (8 * @mode_names) # validation per mode
123            + @mode_names;      # all modes
124
125
126# Note the keys(%h) = $n will cause perl to allocate the power of 2 larger
127# than $n buckets, so if $n = 100_000, then $actual_buckets will be 131072.
128
129my @perl_args = (
130    '-I../lib',
131    (is_miniperl() ? () # no Hash::Util here!
132                   : '-MHash::Util=hash_traversal_mask,num_buckets'),
133    '-e',
134    'my %h; keys(%h)=' . $min_buckets . '; ' .
135    '@h{' . $key_expr . '}=(); @k=keys %h; ' .
136      'print join ":", 0+@k, ' .
137      (is_miniperl() ? '' :  # no Hash::Util here!
138          'num_buckets(%h),hash_traversal_mask(\\%h), ') .
139      'join ",", @k;'
140  );
141
142for my $test_mode_digit (0 .. $#mode_names) {
143    my $test_mode_name = $mode_names[$test_mode_digit];
144    my $descr_mode = "mode = $test_mode_name";
145
146    my $print_keys= [ ($test_mode_name eq "DETERMINISTIC")
147                      ? "-Dh" : (), # enable hash diags
148                      @perl_args ];
149
150    my $validated_mode= 0;
151    my $random_same = 0;
152    my $seed_change_same = 0;
153    for my $try (1 .. $repeat) {
154
155        my $descr = sprintf "%s, try %2d:", $descr_mode, $try;
156
157        # First let perl choose the seed. We only use the $seed and $err
158        # output here. We extract the seed that perl chose, which
159        # hardens us against the use of different hash functions with
160        # different seed sizes. Also the act of adding the PERL_HASH_SEED
161        # to the environment later on will likely change the $out.
162        my ( $seed, $digit, $mode )
163            = my_runperl( ['-e1'], $test_mode_name );
164
165        # Now we have to run it again.
166        my ( $seed1, $digit1, $mode1, $out1, $err_got_data1, $rand_bits1 )
167            = my_runperl( $print_keys, $test_mode_name, $seed );
168
169        # And once more, these two should do the same thing for
170        # DETERMINISTIC and NO, and be different for RANDOM.
171        # We set the mode via the digit not the name here.
172        my ( $seed2, $digit2, $mode2, $out2, $err_got_data2, $rand_bits2 )
173            = my_runperl( $print_keys, $test_mode_digit, $seed );
174
175        if (!$validated_mode++) {
176            is($digit, $test_mode_digit,
177                "$descr base run set the mode digit as expected");
178
179            is($mode, $test_mode_name,
180                "$descr base run set the mode name as expected");
181
182            is( $seed1, $seed,
183                "$descr retry 1 set the seed as expected");
184
185            is( $mode1, $test_mode_name,
186                "$descr retry 1 set the mode by name as expected");
187
188            is( $digit2, $test_mode_digit,
189                "$descr retry 2 set the mode by digit as expected");
190
191            is( $seed1, $seed2,
192                "$descr seeds match between retries");
193
194            is( $digit1, $digit2,
195                "$descr mode digits match between retries");
196
197            is( $mode1, $mode2,
198                "$descr mode names match between retries");
199        }
200
201        {
202            # We also test that a 1 bit change to the seed will
203            # actually change the output in all modes. It should
204            # most of the time.
205            my $munged_seed = $seed;
206            substr($munged_seed,-1)=~tr/0-9a-f/1-9a-f0/;
207            if ( $munged_seed eq $seed ) {
208                die "Failed to munge seed '$seed'";
209            }
210
211            my ( $new_seed, $new_digit, $new_mode, $new_out )
212                = my_runperl( \@perl_args, $test_mode_name, $munged_seed );
213            if ($new_seed ne $munged_seed) {
214                die "panic: seed change didn't seem to propagate";
215            }
216            if (
217                $new_mode  ne $test_mode_name or
218                $new_digit ne $test_mode_digit
219            ) {
220                die "panic: mode setting not as expected";
221            }
222
223            # The result should be different most times, but there
224            # is a small chance that we got the same result, so
225            # count how many times it happens and then check if it
226            # exceeds $allowed_fails later.
227            $seed_change_same++ if $out1 eq $new_out;
228        }
229
230        if ( $test_mode_name eq 'RANDOM' ) {
231            # The result should be different most times, but there is a
232            # small chance that we get the same result, so count how
233            # many times it happens and then check if it exceeds
234            # $allowed_fails later.
235            $random_same++ if $out1 eq $out2;
236            next;
237        }
238
239        # From this point on we are testing DETERMINISTIC and NO
240        # modes only.
241
242        is( $out1, $out2,
243            "$descr results in the same key order each time"
244        );
245
246        next if $test_mode_name eq "NO";
247
248        # From this point on we are testing the DETERMINISTIC
249        # mode only.
250
251        SKIP: {
252            # skip these tests if we are not running in a DEBUGGING perl.
253            skip "$descr not testing rand bits, not a DEBUGGING perl", 3
254                if @$rand_bits1 + @$rand_bits2 == 0;
255
256            is ( 0+@$rand_bits1, 0+@$rand_bits2,
257                "$descr same count of rand_bits entries each time");
258
259            my $max_i = $#$rand_bits1 > $#$rand_bits2
260                      ? $#$rand_bits1 : $#$rand_bits2;
261
262            my $bad_idx;
263            for my $i (0 .. $max_i) {
264                if (($rand_bits2->[$i] // "") ne
265                    ($rand_bits1->[$i] // ""))
266                {
267                    $bad_idx = $i;
268                    last;
269                }
270            }
271            is($bad_idx, undef,
272                "$descr bad rand bits data index should be undef");
273            if (defined $bad_idx) {
274                # we use is() to see the differing data, but this test
275                # is expected to fail - the description seems a little
276                # odd here, but since it will always fail it makes sense
277                # in context.
278                is($rand_bits2->[$bad_idx],$rand_bits1->[$bad_idx],
279                    "$descr rand bits data is the same at idx $bad_idx");
280            } else {
281                pass("$descr rand bits data is the same");
282            }
283        }
284    }
285    continue {
286        # We create a lot of big temp files so clean them up as we go.
287        # This is in a continue block so we can do this cleanup after
288        # each iteration even if we call next in the middle of the loop.
289        unlink_tempfiles();
290    }
291
292    # We just finished $repeat tests, now deal with the probabilistic
293    # results and ensure that we are under the $allowed_fails threshold
294
295    if ($test_mode_name eq "RANDOM") {
296        # There is a small chance we got the same result a few times
297        # even when everything is working as expected. So allow a
298        # small number number of fails determined by $allowed_fails.
299        ok( $random_same <= $allowed_fails,
300            "$descr_mode same key order no more than $allowed_fails times")
301            or diag(
302                "Key order was the same $random_same/$repeat times in",
303                "RANDOM mode. This test is probabilistic so if the number",
304                "is low and you re-run the tests and it does not fail",
305                "again then you can ignore this test fail.");
306
307    }
308
309    # There is a small chance we got the same result a few times even
310    # when everything is working as expected. So allow a small number
311    # of fails as determined by $allowed_fails.
312    ok( $seed_change_same <= $allowed_fails,
313        "$descr_mode same key order with different seed no more " .
314        "than $allowed_fails times" )
315        or diag(
316            "Key order was the same $random_same/$repeat times with",
317            "a different seed. This test is probabilistic so if the number",
318            "is low and you re-run the tests and it does not fail",
319            "again then you can ignore this test fail.");
320}
321