1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Kselftest framework requirement - SKIP code is 4.
5ksft_skip=4
6
7set -e
8
9if [[ $(id -u) -ne 0 ]]; then
10  echo "This test must be run as root. Skipping..."
11  exit $ksft_skip
12fi
13
14nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
15
16fault_limit_file=limit_in_bytes
17reservation_limit_file=rsvd.limit_in_bytes
18fault_usage_file=usage_in_bytes
19reservation_usage_file=rsvd.usage_in_bytes
20
21if [[ "$1" == "-cgroup-v2" ]]; then
22  cgroup2=1
23  fault_limit_file=max
24  reservation_limit_file=rsvd.max
25  fault_usage_file=current
26  reservation_usage_file=rsvd.current
27fi
28
29if [[ $cgroup2 ]]; then
30  cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
31  if [[ -z "$cgroup_path" ]]; then
32    cgroup_path=/dev/cgroup/memory
33    mount -t cgroup2 none $cgroup_path
34    do_umount=1
35  fi
36  echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
37else
38  cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
39  if [[ -z "$cgroup_path" ]]; then
40    cgroup_path=/dev/cgroup/memory
41    mount -t cgroup memory,hugetlb $cgroup_path
42    do_umount=1
43  fi
44fi
45export cgroup_path
46
47function cleanup() {
48  if [[ $cgroup2 ]]; then
49    echo $$ >$cgroup_path/cgroup.procs
50  else
51    echo $$ >$cgroup_path/tasks
52  fi
53
54  if [[ -e /mnt/huge ]]; then
55    rm -rf /mnt/huge/*
56    umount /mnt/huge || echo error
57    rmdir /mnt/huge
58  fi
59  if [[ -e $cgroup_path/hugetlb_cgroup_test ]]; then
60    rmdir $cgroup_path/hugetlb_cgroup_test
61  fi
62  if [[ -e $cgroup_path/hugetlb_cgroup_test1 ]]; then
63    rmdir $cgroup_path/hugetlb_cgroup_test1
64  fi
65  if [[ -e $cgroup_path/hugetlb_cgroup_test2 ]]; then
66    rmdir $cgroup_path/hugetlb_cgroup_test2
67  fi
68  echo 0 >/proc/sys/vm/nr_hugepages
69  echo CLEANUP DONE
70}
71
72function expect_equal() {
73  local expected="$1"
74  local actual="$2"
75  local error="$3"
76
77  if [[ "$expected" != "$actual" ]]; then
78    echo "expected ($expected) != actual ($actual): $3"
79    cleanup
80    exit 1
81  fi
82}
83
84function get_machine_hugepage_size() {
85  hpz=$(grep -i hugepagesize /proc/meminfo)
86  kb=${hpz:14:-3}
87  mb=$(($kb / 1024))
88  echo $mb
89}
90
91MB=$(get_machine_hugepage_size)
92
93function setup_cgroup() {
94  local name="$1"
95  local cgroup_limit="$2"
96  local reservation_limit="$3"
97
98  mkdir $cgroup_path/$name
99
100  echo writing cgroup limit: "$cgroup_limit"
101  echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file
102
103  echo writing reseravation limit: "$reservation_limit"
104  echo "$reservation_limit" > \
105    $cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file
106
107  if [ -e "$cgroup_path/$name/cpuset.cpus" ]; then
108    echo 0 >$cgroup_path/$name/cpuset.cpus
109  fi
110  if [ -e "$cgroup_path/$name/cpuset.mems" ]; then
111    echo 0 >$cgroup_path/$name/cpuset.mems
112  fi
113}
114
115function wait_for_hugetlb_memory_to_get_depleted() {
116  local cgroup="$1"
117  local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
118  # Wait for hugetlbfs memory to get depleted.
119  while [ $(cat $path) != 0 ]; do
120    echo Waiting for hugetlb memory to get depleted.
121    cat $path
122    sleep 0.5
123  done
124}
125
126function wait_for_hugetlb_memory_to_get_reserved() {
127  local cgroup="$1"
128  local size="$2"
129
130  local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
131  # Wait for hugetlbfs memory to get written.
132  while [ $(cat $path) != $size ]; do
133    echo Waiting for hugetlb memory reservation to reach size $size.
134    cat $path
135    sleep 0.5
136  done
137}
138
139function wait_for_hugetlb_memory_to_get_written() {
140  local cgroup="$1"
141  local size="$2"
142
143  local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
144  # Wait for hugetlbfs memory to get written.
145  while [ $(cat $path) != $size ]; do
146    echo Waiting for hugetlb memory to reach size $size.
147    cat $path
148    sleep 0.5
149  done
150}
151
152function write_hugetlbfs_and_get_usage() {
153  local cgroup="$1"
154  local size="$2"
155  local populate="$3"
156  local write="$4"
157  local path="$5"
158  local method="$6"
159  local private="$7"
160  local expect_failure="$8"
161  local reserve="$9"
162
163  # Function return values.
164  reservation_failed=0
165  oom_killed=0
166  hugetlb_difference=0
167  reserved_difference=0
168
169  local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file
170  local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file
171
172  local hugetlb_before=$(cat $hugetlb_usage)
173  local reserved_before=$(cat $reserved_usage)
174
175  echo
176  echo Starting:
177  echo hugetlb_usage="$hugetlb_before"
178  echo reserved_usage="$reserved_before"
179  echo expect_failure is "$expect_failure"
180
181  output=$(mktemp)
182  set +e
183  if [[ "$method" == "1" ]] || [[ "$method" == 2 ]] ||
184    [[ "$private" == "-r" ]] && [[ "$expect_failure" != 1 ]]; then
185
186    bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
187      "$cgroup" "$path" "$method" "$private" "-l" "$reserve" 2>&1 | tee $output &
188
189    local write_result=$?
190    local write_pid=$!
191
192    until grep -q -i "DONE" $output; do
193      echo waiting for DONE signal.
194      if ! ps $write_pid > /dev/null
195      then
196        echo "FAIL: The write died"
197        cleanup
198        exit 1
199      fi
200      sleep 0.5
201    done
202
203    echo ================= write_hugetlb_memory.sh output is:
204    cat $output
205    echo ================= end output.
206
207    if [[ "$populate" == "-o" ]] || [[ "$write" == "-w" ]]; then
208      wait_for_hugetlb_memory_to_get_written "$cgroup" "$size"
209    elif [[ "$reserve" != "-n" ]]; then
210      wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
211    else
212      # This case doesn't produce visible effects, but we still have
213      # to wait for the async process to start and execute...
214      sleep 0.5
215    fi
216
217    echo write_result is $write_result
218  else
219    bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
220      "$cgroup" "$path" "$method" "$private" "$reserve"
221    local write_result=$?
222
223    if [[ "$reserve" != "-n" ]]; then
224      wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
225    fi
226  fi
227  set -e
228
229  if [[ "$write_result" == 1 ]]; then
230    reservation_failed=1
231  fi
232
233  # On linus/master, the above process gets SIGBUS'd on oomkill, with
234  # return code 135. On earlier kernels, it gets actual oomkill, with return
235  # code 137, so just check for both conditions in case we're testing
236  # against an earlier kernel.
237  if [[ "$write_result" == 135 ]] || [[ "$write_result" == 137 ]]; then
238    oom_killed=1
239  fi
240
241  local hugetlb_after=$(cat $hugetlb_usage)
242  local reserved_after=$(cat $reserved_usage)
243
244  echo After write:
245  echo hugetlb_usage="$hugetlb_after"
246  echo reserved_usage="$reserved_after"
247
248  hugetlb_difference=$(($hugetlb_after - $hugetlb_before))
249  reserved_difference=$(($reserved_after - $reserved_before))
250}
251
252function cleanup_hugetlb_memory() {
253  set +e
254  local cgroup="$1"
255  if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then
256    echo killing write_to_hugetlbfs
257    killall -2 write_to_hugetlbfs
258    wait_for_hugetlb_memory_to_get_depleted $cgroup
259  fi
260  set -e
261
262  if [[ -e /mnt/huge ]]; then
263    rm -rf /mnt/huge/*
264    umount /mnt/huge
265    rmdir /mnt/huge
266  fi
267}
268
269function run_test() {
270  local size=$(($1 * ${MB} * 1024 * 1024))
271  local populate="$2"
272  local write="$3"
273  local cgroup_limit=$(($4 * ${MB} * 1024 * 1024))
274  local reservation_limit=$(($5 * ${MB} * 1024 * 1024))
275  local nr_hugepages="$6"
276  local method="$7"
277  local private="$8"
278  local expect_failure="$9"
279  local reserve="${10}"
280
281  # Function return values.
282  hugetlb_difference=0
283  reserved_difference=0
284  reservation_failed=0
285  oom_killed=0
286
287  echo nr hugepages = "$nr_hugepages"
288  echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
289
290  setup_cgroup "hugetlb_cgroup_test" "$cgroup_limit" "$reservation_limit"
291
292  mkdir -p /mnt/huge
293  mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
294
295  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test" "$size" "$populate" \
296    "$write" "/mnt/huge/test" "$method" "$private" "$expect_failure" \
297    "$reserve"
298
299  cleanup_hugetlb_memory "hugetlb_cgroup_test"
300
301  local final_hugetlb=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$fault_usage_file)
302  local final_reservation=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$reservation_usage_file)
303
304  echo $hugetlb_difference
305  echo $reserved_difference
306  expect_equal "0" "$final_hugetlb" "final hugetlb is not zero"
307  expect_equal "0" "$final_reservation" "final reservation is not zero"
308}
309
310function run_multiple_cgroup_test() {
311  local size1="$1"
312  local populate1="$2"
313  local write1="$3"
314  local cgroup_limit1="$4"
315  local reservation_limit1="$5"
316
317  local size2="$6"
318  local populate2="$7"
319  local write2="$8"
320  local cgroup_limit2="$9"
321  local reservation_limit2="${10}"
322
323  local nr_hugepages="${11}"
324  local method="${12}"
325  local private="${13}"
326  local expect_failure="${14}"
327  local reserve="${15}"
328
329  # Function return values.
330  hugetlb_difference1=0
331  reserved_difference1=0
332  reservation_failed1=0
333  oom_killed1=0
334
335  hugetlb_difference2=0
336  reserved_difference2=0
337  reservation_failed2=0
338  oom_killed2=0
339
340  echo nr hugepages = "$nr_hugepages"
341  echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
342
343  setup_cgroup "hugetlb_cgroup_test1" "$cgroup_limit1" "$reservation_limit1"
344  setup_cgroup "hugetlb_cgroup_test2" "$cgroup_limit2" "$reservation_limit2"
345
346  mkdir -p /mnt/huge
347  mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
348
349  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test1" "$size1" \
350    "$populate1" "$write1" "/mnt/huge/test1" "$method" "$private" \
351    "$expect_failure" "$reserve"
352
353  hugetlb_difference1=$hugetlb_difference
354  reserved_difference1=$reserved_difference
355  reservation_failed1=$reservation_failed
356  oom_killed1=$oom_killed
357
358  local cgroup1_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$fault_usage_file
359  local cgroup1_reservation_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$reservation_usage_file
360  local cgroup2_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$fault_usage_file
361  local cgroup2_reservation_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$reservation_usage_file
362
363  local usage_before_second_write=$(cat $cgroup1_hugetlb_usage)
364  local reservation_usage_before_second_write=$(cat $cgroup1_reservation_usage)
365
366  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test2" "$size2" \
367    "$populate2" "$write2" "/mnt/huge/test2" "$method" "$private" \
368    "$expect_failure" "$reserve"
369
370  hugetlb_difference2=$hugetlb_difference
371  reserved_difference2=$reserved_difference
372  reservation_failed2=$reservation_failed
373  oom_killed2=$oom_killed
374
375  expect_equal "$usage_before_second_write" \
376    "$(cat $cgroup1_hugetlb_usage)" "Usage changed."
377  expect_equal "$reservation_usage_before_second_write" \
378    "$(cat $cgroup1_reservation_usage)" "Reservation usage changed."
379
380  cleanup_hugetlb_memory
381
382  local final_hugetlb=$(cat $cgroup1_hugetlb_usage)
383  local final_reservation=$(cat $cgroup1_reservation_usage)
384
385  expect_equal "0" "$final_hugetlb" \
386    "hugetlbt_cgroup_test1 final hugetlb is not zero"
387  expect_equal "0" "$final_reservation" \
388    "hugetlbt_cgroup_test1 final reservation is not zero"
389
390  local final_hugetlb=$(cat $cgroup2_hugetlb_usage)
391  local final_reservation=$(cat $cgroup2_reservation_usage)
392
393  expect_equal "0" "$final_hugetlb" \
394    "hugetlb_cgroup_test2 final hugetlb is not zero"
395  expect_equal "0" "$final_reservation" \
396    "hugetlb_cgroup_test2 final reservation is not zero"
397}
398
399cleanup
400
401for populate in "" "-o"; do
402  for method in 0 1 2; do
403    for private in "" "-r"; do
404      for reserve in "" "-n"; do
405
406        # Skip mmap(MAP_HUGETLB | MAP_SHARED). Doesn't seem to be supported.
407        if [[ "$method" == 1 ]] && [[ "$private" == "" ]]; then
408          continue
409        fi
410
411        # Skip populated shmem tests. Doesn't seem to be supported.
412        if [[ "$method" == 2"" ]] && [[ "$populate" == "-o" ]]; then
413          continue
414        fi
415
416        if [[ "$method" == 2"" ]] && [[ "$reserve" == "-n" ]]; then
417          continue
418        fi
419
420        cleanup
421        echo
422        echo
423        echo
424        echo Test normal case.
425        echo private=$private, populate=$populate, method=$method, reserve=$reserve
426        run_test 5 "$populate" "" 10 10 10 "$method" "$private" "0" "$reserve"
427
428        echo Memory charged to hugtlb=$hugetlb_difference
429        echo Memory charged to reservation=$reserved_difference
430
431        if [[ "$populate" == "-o" ]]; then
432          expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
433            "Reserved memory charged to hugetlb cgroup."
434        else
435          expect_equal "0" "$hugetlb_difference" \
436            "Reserved memory charged to hugetlb cgroup."
437        fi
438
439        if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
440          expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
441            "Reserved memory not charged to reservation usage."
442        else
443          expect_equal "0" "$reserved_difference" \
444            "Reserved memory not charged to reservation usage."
445        fi
446
447        echo 'PASS'
448
449        cleanup
450        echo
451        echo
452        echo
453        echo Test normal case with write.
454        echo private=$private, populate=$populate, method=$method, reserve=$reserve
455        run_test 5 "$populate" '-w' 5 5 10 "$method" "$private" "0" "$reserve"
456
457        echo Memory charged to hugtlb=$hugetlb_difference
458        echo Memory charged to reservation=$reserved_difference
459
460        expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
461          "Reserved memory charged to hugetlb cgroup."
462
463        expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
464          "Reserved memory not charged to reservation usage."
465
466        echo 'PASS'
467
468        cleanup
469        continue
470        echo
471        echo
472        echo
473        echo Test more than reservation case.
474        echo private=$private, populate=$populate, method=$method, reserve=$reserve
475
476        if [ "$reserve" != "-n" ]; then
477          run_test "5" "$populate" '' "10" "2" "10" "$method" "$private" "1" \
478            "$reserve"
479
480          expect_equal "1" "$reservation_failed" "Reservation succeeded."
481        fi
482
483        echo 'PASS'
484
485        cleanup
486
487        echo
488        echo
489        echo
490        echo Test more than cgroup limit case.
491        echo private=$private, populate=$populate, method=$method, reserve=$reserve
492
493        # Not sure if shm memory can be cleaned up when the process gets sigbus'd.
494        if [[ "$method" != 2 ]]; then
495          run_test 5 "$populate" "-w" 2 10 10 "$method" "$private" "1" "$reserve"
496
497          expect_equal "1" "$oom_killed" "Not oom killed."
498        fi
499        echo 'PASS'
500
501        cleanup
502
503        echo
504        echo
505        echo
506        echo Test normal case, multiple cgroups.
507        echo private=$private, populate=$populate, method=$method, reserve=$reserve
508        run_multiple_cgroup_test "3" "$populate" "" "10" "10" "5" \
509          "$populate" "" "10" "10" "10" \
510          "$method" "$private" "0" "$reserve"
511
512        echo Memory charged to hugtlb1=$hugetlb_difference1
513        echo Memory charged to reservation1=$reserved_difference1
514        echo Memory charged to hugtlb2=$hugetlb_difference2
515        echo Memory charged to reservation2=$reserved_difference2
516
517        if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
518          expect_equal "3" "$reserved_difference1" \
519            "Incorrect reservations charged to cgroup 1."
520
521          expect_equal "5" "$reserved_difference2" \
522            "Incorrect reservation charged to cgroup 2."
523
524        else
525          expect_equal "0" "$reserved_difference1" \
526            "Incorrect reservations charged to cgroup 1."
527
528          expect_equal "0" "$reserved_difference2" \
529            "Incorrect reservation charged to cgroup 2."
530        fi
531
532        if [[ "$populate" == "-o" ]]; then
533          expect_equal "3" "$hugetlb_difference1" \
534            "Incorrect hugetlb charged to cgroup 1."
535
536          expect_equal "5" "$hugetlb_difference2" \
537            "Incorrect hugetlb charged to cgroup 2."
538
539        else
540          expect_equal "0" "$hugetlb_difference1" \
541            "Incorrect hugetlb charged to cgroup 1."
542
543          expect_equal "0" "$hugetlb_difference2" \
544            "Incorrect hugetlb charged to cgroup 2."
545        fi
546        echo 'PASS'
547
548        cleanup
549        echo
550        echo
551        echo
552        echo Test normal case with write, multiple cgroups.
553        echo private=$private, populate=$populate, method=$method, reserve=$reserve
554        run_multiple_cgroup_test "3" "$populate" "-w" "10" "10" "5" \
555          "$populate" "-w" "10" "10" "10" \
556          "$method" "$private" "0" "$reserve"
557
558        echo Memory charged to hugtlb1=$hugetlb_difference1
559        echo Memory charged to reservation1=$reserved_difference1
560        echo Memory charged to hugtlb2=$hugetlb_difference2
561        echo Memory charged to reservation2=$reserved_difference2
562
563        expect_equal "3" "$hugetlb_difference1" \
564          "Incorrect hugetlb charged to cgroup 1."
565
566        expect_equal "3" "$reserved_difference1" \
567          "Incorrect reservation charged to cgroup 1."
568
569        expect_equal "5" "$hugetlb_difference2" \
570          "Incorrect hugetlb charged to cgroup 2."
571
572        expect_equal "5" "$reserved_difference2" \
573          "Incorrected reservation charged to cgroup 2."
574        echo 'PASS'
575
576        cleanup
577
578      done # reserve
579    done   # private
580  done     # populate
581done       # method
582
583if [[ $do_umount ]]; then
584  umount $cgroup_path
585  rmdir $cgroup_path
586fi
587
588echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
589