1#!/usr/bin/python3
2
3# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
4#
5# SPDX-License-Identifier: MPL-2.0
6#
7# This Source Code Form is subject to the terms of the Mozilla Public
8# License, v. 2.0.  If a copy of the MPL was not distributed with this
9# file, you can obtain one at https://mozilla.org/MPL/2.0/.
10#
11# See the COPYRIGHT file distributed with this work for additional
12# information regarding copyright ownership.
13
14from concurrent.futures import ThreadPoolExecutor, as_completed
15import os
16import random
17import signal
18import subprocess
19from string import ascii_lowercase as letters
20import time
21
22import pytest
23
24pytest.importorskip("dns", minversion="2.0.0")
25import dns.exception
26import dns.resolver
27
28
29def do_work(named_proc, resolver, rndc_cmd, kill_method, n_workers, n_queries):
30    """Creates a number of A queries to run in parallel
31    in order simulate a slightly more realistic test scenario.
32
33    The main idea of this function is to create and send a bunch
34    of A queries to a target named instance and during this process
35    a request for shutting down named will be issued.
36
37    In the process of shutting down named, a couple control connections
38    are created (by launching rndc) to ensure that the crash was fixed.
39
40    if kill_method=="rndc" named will be asked to shutdown by
41    means of rndc stop.
42    if kill_method=="sigterm" named will be killed by SIGTERM on
43    POSIX systems or by TerminateProcess() on Windows systems.
44
45    :param named_proc: named process instance
46    :type named_proc: subprocess.Popen
47
48    :param resolver: target resolver
49    :type resolver: dns.resolver.Resolver
50
51    :param rndc_cmd: rndc command with default arguments
52    :type rndc_cmd: list of strings, e.g. ["rndc", "-p", "23750"]
53
54    :kill_method: "rndc" or "sigterm"
55    :type kill_method: str
56
57    :param n_workers: Number of worker threads to create
58    :type n_workers: int
59
60    :param n_queries: Total number of queries to send
61    :type n_queries: int
62    """
63    # pylint: disable-msg=too-many-arguments
64    # pylint: disable-msg=too-many-locals
65
66    # helper function, args must be a list or tuple with arguments to rndc.
67    def launch_rndc(args):
68        return subprocess.call(rndc_cmd + args, timeout=10)
69
70    # We're going to execute queries in parallel by means of a thread pool.
71    # dnspython functions block, so we need to circunvent that.
72    with ThreadPoolExecutor(n_workers + 1) as executor:
73        # Helper dict, where keys=Future objects and values are tags used
74        # to process results later.
75        futures = {}
76
77        # 50% of work will be A queries.
78        # 1 work will be rndc stop.
79        # Remaining work will be rndc status (so we test parallel control
80        #  connections that were crashing named).
81        shutdown = True
82        for i in range(n_queries):
83            if i < (n_queries // 2):
84                # Half work will be standard A queries.
85                # Among those we split 50% queries relname='www',
86                # 50% queries relname=random characters
87                if random.randrange(2) == 1:
88                    tag = "good"
89                    relname = "www"
90                else:
91                    tag = "bad"
92                    length = random.randint(4, 10)
93                    relname = "".join(
94                        letters[random.randrange(len(letters))] for i in range(length)
95                    )
96
97                qname = relname + ".test"
98                futures[executor.submit(resolver.resolve, qname, "A")] = tag
99            elif shutdown:  # We attempt to stop named in the middle
100                shutdown = False
101                if kill_method == "rndc":
102                    futures[executor.submit(launch_rndc, ["stop"])] = "stop"
103                else:
104                    futures[executor.submit(named_proc.terminate)] = "kill"
105            else:
106                # We attempt to send couple rndc commands while named is
107                # being shutdown
108                futures[executor.submit(launch_rndc, ["status"])] = "status"
109
110        ret_code = -1
111        for future in as_completed(futures):
112            try:
113                result = future.result()
114                # If tag is "stop", result is an instance of
115                # subprocess.CompletedProcess, then we check returncode
116                # attribute to know if rncd stop command finished successfully.
117                #
118                # if tag is "kill" then the main function will check if
119                # named process exited gracefully after SIGTERM signal.
120                if futures[future] == "stop":
121                    ret_code = result
122
123            except (
124                dns.resolver.NXDOMAIN,
125                dns.resolver.NoNameservers,
126                dns.exception.Timeout,
127            ):
128                pass
129
130        if kill_method == "rndc":
131            assert ret_code == 0
132
133
134def wait_for_named_loaded(resolver, retries=10):
135    for _ in range(retries):
136        try:
137            resolver.resolve("version.bind", "TXT", "CH")
138            return True
139        except (dns.resolver.NoNameservers, dns.exception.Timeout):
140            time.sleep(1)
141    return False
142
143
144def wait_for_proc_termination(proc, max_timeout=10):
145    for _ in range(max_timeout):
146        if proc.poll() is not None:
147            return True
148        time.sleep(1)
149
150    proc.send_signal(signal.SIGABRT)
151    for _ in range(max_timeout):
152        if proc.poll() is not None:
153            return True
154        time.sleep(1)
155
156    return False
157
158
159# We test named shutting down using two methods:
160# Method 1: using rndc ctop
161# Method 2: killing with SIGTERM
162# In both methods named should exit gracefully.
163@pytest.mark.parametrize("kill_method", ["rndc", "sigterm"])
164def test_named_shutdown(named_port, control_port, kill_method):
165    # pylint: disable-msg=too-many-locals
166    cfg_dir = os.path.join(os.getcwd(), "resolver")
167    assert os.path.isdir(cfg_dir)
168
169    cfg_file = os.path.join(cfg_dir, "named.conf")
170    assert os.path.isfile(cfg_file)
171
172    named = os.getenv("NAMED")
173    assert named is not None
174
175    rndc = os.getenv("RNDC")
176    assert rndc is not None
177
178    # rndc configuration resides in ../_common/rndc.conf
179    rndc_cfg = os.path.join("..", "_common", "rndc.conf")
180    assert os.path.isfile(rndc_cfg)
181
182    # rndc command with default arguments.
183    rndc_cmd = [rndc, "-c", rndc_cfg, "-p", str(control_port), "-s", "10.53.0.3"]
184
185    # We create a resolver instance that will be used to send queries.
186    resolver = dns.resolver.Resolver()
187    resolver.nameservers = ["10.53.0.3"]
188    resolver.port = named_port
189
190    named_cmdline = [named, "-c", cfg_file, "-f"]
191    with subprocess.Popen(named_cmdline, cwd=cfg_dir) as named_proc:
192        try:
193            assert named_proc.poll() is None, "named isn't running"
194            assert wait_for_named_loaded(resolver)
195            do_work(
196                named_proc,
197                resolver,
198                rndc_cmd,
199                kill_method,
200                n_workers=12,
201                n_queries=16,
202            )
203            assert wait_for_proc_termination(named_proc)
204            assert named_proc.returncode == 0, "named crashed"
205        finally:  # Ensure named is terminated in case of an exception
206            named_proc.kill()
207