1#
2# Copyright 2020 Haiku, Inc. All rights reserved.
3# Distributed under the terms of the MIT License.
4#
5# Authors:
6#  Kyle Ambroff-Kao, kyle@ambroffkao.com
7#
8
9"""
10HTTP(S) server used for integration testing of ServicesKit.
11
12This service receives HTTP requests and just echos them back in the response.
13
14This is intentionally not using any fancy frameworks or libraries so as to not
15require any dependencies, and also to allow for adding endpoints to replicate
16behavior of other servers in the future.
17"""
18
19import abc
20import base64
21import gzip
22import hashlib
23import http.server
24import io
25import optparse
26import os
27import re
28import socket
29import ssl
30import subprocess
31import sys
32import tempfile
33import zlib
34
35
36MULTIPART_FORM_BOUNDARY_RE = re.compile(
37    r'^multipart/form-data; boundary=(----------------------------\d+)$')
38AUTH_PATH_RE = re.compile(
39    r'^/auth/(?P<strategy>(basic|digest))'
40    '/(?P<username>[a-z0-9]+)/(?P<password>[a-z0-9]+)',
41    re.IGNORECASE)
42
43
44class RequestHandler(http.server.BaseHTTPRequestHandler):
45    """
46    Any GET or POST request just gets echoed back to the sender. If the path
47    ends with a numeric component like "/404" or "/500", then that value will
48    be set as the status code in the response.
49
50    Note that this isn't meant to replicate expected functionality exactly.
51    Rather than implementing all of these status codes as expected per RFC,
52    such as having an empty response body for 201 response, only the
53    functionality that is required to handle requests from HttpTests is
54    implemented.
55
56    There can also be endpoints here that are intentionally non-compliant in
57    order to exercize the HTTP client's behavior when a server is badly
58    behaved.
59    """
60    def do_GET(self, write_response=True):
61        authorized, extra_headers = self._authorize()
62        if not authorized:
63            return
64
65        encoding, response_body = self._build_response_body()
66
67        status_code = extract_desired_status_code_from_path(self.path, 200)
68        self.send_response(status_code)
69        if status_code >= 300 and status_code < 400:
70            self.send_header('Location', '/')
71
72        if status_code == 204:
73            write_response = False
74        else:
75            self.send_header('Content-Type', 'text/plain')
76            self.send_header('Content-Length', str(len(response_body)))
77            if encoding:
78                self.send_header('Content-Encoding', encoding)
79
80        for header_name, header_value in extra_headers:
81            self.send_header(header_name, header_value)
82        self.end_headers()
83
84        if write_response:
85            self.wfile.write(response_body)
86
87    def do_HEAD(self):
88        self.do_GET(False)
89
90    def do_POST(self):
91        authorized, extra_headers = self._authorize()
92        if not authorized:
93            return
94
95        encoding, response_body = self._build_response_body()
96        self.send_response(
97            extract_desired_status_code_from_path(self.path, 200))
98        self.send_header('Content-Type', 'text/plain')
99        self.send_header('Content-Length', str(len(response_body)))
100        if encoding:
101            self.send_header('Content-Encoding', encoding)
102        for header_name, header_value in extra_headers:
103            self.send_header(header_name, header_value)
104
105        self.end_headers()
106        self.wfile.write(response_body)
107
108    def do_DELETE(self):
109        self._not_supported()
110
111    def do_PATCH(self):
112        self._not_supported()
113
114    def do_OPTIONS(self):
115        self._not_supported()
116
117    def send_response(self, code, message=None):
118        self.log_request(code)
119        self.send_response_only(code, message)
120        self.send_header('Server', 'Test HTTP Server for Haiku')
121        self.send_header('Date', 'Sun, 09 Feb 2020 19:32:42 GMT')
122
123    def _build_response_body(self):
124        # The post-body may be multi-part/form-data, in which case the client
125        # will have generated some random identifier to identify the boundary.
126        # If that's the case, we'll replace it here in order to allow the test
127        # client to validate the response data without needing to predict the
128        # boundary identifier. This makes the response body deterministic even
129        # though the boundary will change with every request, and lets the
130        # tests in HttpTests hard-code the entire expected response body for
131        # validation.
132        boundary_id_value = None
133
134        supported_encodings = [
135            e.strip()
136            for e in self.headers.get('Accept-Encoding', '').split(',')
137            if e.strip()]
138        if 'gzip' in supported_encodings:
139            encoding = 'gzip'
140            output_stream = GzipResponseBodyBuilder()
141        elif 'deflate' in supported_encodings:
142            encoding = 'deflate'
143            output_stream = DeflateResponseBodyBuilder()
144        else:
145            encoding = None
146            output_stream = RawResponseBodyBuilder()
147
148        output_stream.write(
149            'Path: {}\r\n\r\n'.format(self.path).encode('utf-8'))
150        output_stream.write(b'Headers:\r\n')
151        output_stream.write(b'--------\r\n')
152        for header in self.headers:
153            for header_value in self.headers.get_all(header):
154                if header in ('Host', 'Referer', 'X-Forwarded-For'):
155                    # The server port can change between runs which will change
156                    # the size and contents of the response body. To make tests
157                    # that verify the contents of the response body easier the
158                    # server port will be stripped from these headers when
159                    # echoed to the response body.
160                    header_value = re.sub(r':[0-9]+', ':PORT', header_value)
161
162                    # The scheme will also be in this header value, and we want
163                    # to return the same reguardless of whether http:// or
164                    # https:// was used.
165                    header_value = re.sub(
166                        r'https?://',
167                        'SCHEME://',
168                        header_value)
169                if header == 'Content-Type':
170                    match = MULTIPART_FORM_BOUNDARY_RE.match(
171                        self.headers.get('Content-Type', 'text/plain'))
172                    if match is not None:
173                        boundary_id_value = match.group(1)
174                        header_value = header_value.replace(
175                            boundary_id_value,
176                            '<<BOUNDARY-ID>>')
177                output_stream.write(
178                    '{}: {}\r\n'.format(header, header_value).encode('utf-8'))
179
180        content_length = int(self.headers.get('Content-Length', 0))
181        if content_length > 0:
182            output_stream.write(b'\r\n')
183            output_stream.write(b'Request body:\r\n')
184            output_stream.write(b'-------------\r\n')
185
186            body_bytes = self.rfile.read(content_length).decode('utf-8')
187            if boundary_id_value:
188                body_bytes = body_bytes.replace(
189                    boundary_id_value, '<<BOUNDARY-ID>>')
190
191            output_stream.write(body_bytes.encode('utf-8'))
192            output_stream.write(b'\r\n')
193
194        return encoding, output_stream.get_bytes()
195
196    def _not_supported(self):
197        self.send_response(405, '{} not supported'.format(self.command))
198        self.end_headers()
199        self.wfile.write(
200            '{} not supported\r\n'.format(self.command).encode('utf-8'))
201
202    def _authorize(self):
203        """
204        Authorizes the request. If True is returned that means that the
205        request was not authorized and the 4xx response has been send to the
206        client.
207        """
208        # We only authorize paths like
209        # /auth/<strategy>/<expected-username>/<expected-password>
210        match = AUTH_PATH_RE.match(self.path)
211        if match is None:
212            return True, []
213
214        strategy = match.group('strategy')
215        expected_username = match.group('username')
216        expected_password = match.group('password')
217
218        if strategy == 'basic':
219            return self._handle_basic_auth(
220                expected_username,
221                expected_password)
222        elif strategy == 'digest':
223            return self._handle_digest_auth(
224                expected_username,
225                expected_password)
226        else:
227            raise NotImplementedError(
228                'Unimplemented authorization strategy ' + strategy)
229
230    def _handle_basic_auth(self, expected_username, expected_password):
231        authorization = self.headers.get('Authorization', None)
232        auth_type = None
233        encoded_credentials = None
234        username = None
235        password = None
236
237        if authorization:
238            auth_type, encoded_credentials = authorization.split()
239
240        if encoded_credentials is not None:
241            decoded = base64.decodebytes(encoded_credentials.encode('utf-8'))
242            username, password = decoded.decode('utf-8').split(':')
243
244        if authorization is None or auth_type != 'Basic' \
245                or encoded_credentials is None \
246                or username != expected_username \
247                or password != expected_password:
248            self.send_response(401, 'Not authorized')
249            self.send_header('Www-Authenticate', 'Basic realm="Fake Realm"')
250            self.end_headers()
251            return False, []
252
253        return True, [('Www-Authenticate', 'Basic realm="Fake Realm"')]
254
255    def _handle_digest_auth(self, expected_username, expected_password):
256        """
257        Implement enough of the digest auth RFC to make tests pass.
258        """
259        # Note: These values will always be the same because we want the
260        # response to be deterministic for testing purposes.
261        NONCE = 'f3a95f20879dd891a5544bf96a3e5518'
262        OPAQUE = 'f0bb55f1221a51b6d38117c331611799'
263
264        extra_headers = []
265        authorization = self.headers.get('Authorization', None)
266        credentials = None
267        auth_type = None
268        if authorization is not None:
269            auth_type, fields = authorization.split(maxsplit=1)
270            if auth_type == 'Digest':
271                credentials = parse_kv_pair_header(fields)
272
273        expected_response_hash = None
274        if credentials:
275            expected_response_hash = compute_digest_challenge_response_hash(
276                self.command,
277                self.path,
278                '',
279                credentials,
280                expected_password)
281
282        if authorization is None or credentials is None \
283                or auth_type != 'Digest' \
284                or expected_response_hash != credentials.get('response'):
285            self.send_response(401, 'Not authorized')
286            self.send_header(
287                'Www-Authenticate',
288                'Digest realm="user@shredder",'
289                ' nonce="{}",'
290                ' qop="auth",'
291                ' opaque={},'
292                ' algorithm=MD5,'
293                ' stale=FALSE'.format(NONCE, OPAQUE))
294            self.send_header('Set-Cookie', 'stale_after=never; Path=/')
295            self.send_header('Set-Cookie', 'fake=fake_value; Path=/')
296            self.end_headers()
297            return False, extra_headers
298
299        return True, extra_headers
300
301
302class ResponseBodyBuilder(object):
303    __meta__ = abc.ABCMeta
304
305    @abc.abstractmethod
306    def write(self, bytes):
307        raise NotImplementedError()
308
309    @abc.abstractmethod
310    def get_bytes(self):
311        raise NotImplementedError()
312
313
314class RawResponseBodyBuilder(ResponseBodyBuilder):
315    def __init__(self):
316        self.buf = io.BytesIO()
317
318    def write(self, bytes):
319        self.buf.write(bytes)
320
321    def get_bytes(self):
322        return self.buf.getvalue()
323
324
325class GzipResponseBodyBuilder(ResponseBodyBuilder):
326    def __init__(self):
327        self.buf = io.BytesIO()
328        self.compressor = gzip.GzipFile(
329            mode='wb',
330            compresslevel=4,
331            fileobj=self.buf)
332
333    def write(self, bytes):
334        self.compressor.write(bytes)
335
336    def get_bytes(self):
337        self.compressor.close()
338        return self.buf.getvalue()
339
340
341class DeflateResponseBodyBuilder(ResponseBodyBuilder):
342    def __init__(self):
343        self.raw = RawResponseBodyBuilder()
344
345    def write(self, bytes):
346        self.raw.write(bytes)
347
348    def get_bytes(self):
349        return zlib.compress(self.raw.get_bytes())
350
351
352def extract_desired_status_code_from_path(path, default=200):
353    status_code = default
354    path_parts = os.path.split(path)
355    try:
356        status_code = int(path_parts[-1])
357    except ValueError:
358        pass
359    return status_code
360
361
362def generate_self_signed_tls_cert(common_name, cert_path, key_path):
363    subprocess.check_call([
364        'openssl',
365        'req',
366        '-x509',
367        '-nodes',
368        '-subj', '/CN={}'.format(common_name),
369        '-newkey', 'rsa:4096',
370        '-keyout', key_path,
371        '-out', cert_path,
372        '-days', '1'
373    ])
374
375
376def compute_digest_challenge_response_hash(
377        request_method,
378        request_uri,
379        request_body,
380        credentials,
381        expected_password):
382    """
383    Compute hash as defined by RFC2069, although this isn't an attempt to be
384    perfect, just enough for basic integration tests in HttpTests to work.
385
386    :param credentials: Map of values parsed from the Authorization header
387                        from the client.
388    :param expected_password: The known correct password of the user
389                              attempting to authenticate.
390    :return: None if a hash cannot be produced, otherwise the hash as defined
391             by RFC2069.
392    """
393    algorithm = credentials.get('algorithm')
394    if algorithm == 'MD5':
395        hashfunc = hashlib.md5
396    elif algorithm == 'SHA-256':
397        hashfunc = hashlib.sha256
398    elif algorithm == 'SHA-512':
399        hashfunc = hashlib.sha512
400    else:
401        return None
402
403    realm = credentials.get('realm')
404    username = credentials.get('username')
405
406    ha1 = hashfunc(':'.join([
407        username,
408        realm,
409        expected_password]).encode('utf-8')).hexdigest()
410
411    qop = credentials.get('qop')
412    if qop is None or qop == 'auth':
413        ha2 = hashfunc(':'.join([
414            request_method,
415            request_uri]).encode('utf-8')).hexdigest()
416    elif qop == 'auth-int':
417        ha2 = hashfunc(':'.join([
418            request_method,
419            request_uri,
420            request_body]).encode('utf-8')).hexdigest()
421    else:
422        ha2 = None
423
424    if ha1 is None or ha2 is None:
425        return None
426
427    if qop is None:
428        return hashfunc(':'.join([
429            ha1,
430            credentials.get('nonce', ''),
431            ha2]).encode('utf-8')).hexdigest()
432    elif qop == 'auth' or qop == 'auth-int':
433        hash_components = [
434            ha1,
435            credentials.get('nonce', ''),
436            credentials.get('nc', ''),
437            credentials.get('cnonce', ''),
438            qop,
439            ha2]
440        return hashfunc(':'.join(hash_components).encode('utf-8')).hexdigest()
441
442
443def parse_kv_pair_header(header_value, sep=','):
444    d = {}
445    for kvpair in header_value.split(sep):
446        key, value = kvpair.strip().split('=')
447        d[key.strip()] = value.strip().strip('"')
448    return d
449
450
451def main():
452    options = parse_args(sys.argv)
453
454    bind_addr = (
455        options.bind_addr,
456        0 if options.port is None else options.port)
457
458    server = http.server.HTTPServer(
459        bind_addr,
460        RequestHandler,
461        bind_and_activate=False)
462    if options.port is None:
463        server.server_port = server.socket.getsockname()[1]
464    else:
465        server.server_port = options.port
466
467    if options.server_socket_fd:
468        server.socket = socket.fromfd(
469            options.server_socket_fd,
470            socket.AF_INET,
471            socket.SOCK_STREAM)
472
473    def run_server():
474        if not options.server_socket_fd:
475            server.server_bind()
476            server.server_activate()
477        print(
478            'Test server listening on port',
479            server.server_port,
480            file=sys.stderr)
481        server.serve_forever(0.01)
482
483    try:
484        if options.use_tls:
485            with tempfile.TemporaryDirectory() as temp_cert_dir:
486                common_name = options.bind_addr + ':' + str(options.port)
487                cert_file = os.path.join(temp_cert_dir, 'cert.pem')
488                key_file = os.path.join(temp_cert_dir, 'key.pem')
489                generate_self_signed_tls_cert(
490                    common_name,
491                    cert_file,
492                    key_file)
493                server.socket = ssl.wrap_socket(
494                    server.socket,
495                    certfile=cert_file,
496                    keyfile=key_file,
497                    server_side=True,
498                    do_handshake_on_connect=False)
499            run_server()
500        else:
501            run_server()
502    except KeyboardInterrupt:
503        server.server_close()
504
505
506def parse_args(argv):
507    parser = optparse.OptionParser(
508        usage='Usage: %prog [OPTIONS]',
509        description=__doc__)
510    parser.add_option(
511        '--bind-addr',
512        default='127.0.0.1',
513        dest='bind_addr',
514        help='By default only bind to loopback')
515    parser.add_option(
516        '--use-tls',
517        dest='use_tls',
518        default=False,
519        action='store_true',
520        help='If set, a self-signed TLS certificate, key and CA will be'
521        ' generated for testing purposes.')
522    parser.add_option(
523        '--port',
524        dest='port',
525        default=None,
526        type='int',
527        help='If not specified a random port will be used.')
528    parser.add_option(
529        "--fd",
530        dest='server_socket_fd',
531        default=None,
532        type='int',
533        help='A socket FD to use for accept() instead of binding a new one.')
534    options, args = parser.parse_args(argv)
535    if len(args) > 1:
536        parser.error('Unexpected arguments: {}'.format(', '.join(args[1:])))
537    return options
538
539
540if __name__ == '__main__':
541    main()
542