1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * mod_setenvif.c
19 * Set environment variables based on matching request headers or
20 * attributes against regex strings
21 *
22 * Paul Sutton <paul@ukweb.com> 27 Oct 1996
23 * Based on mod_browser by Alexei Kosut <akosut@organic.com>
24 */
25
26/*
27 * Used to set environment variables based on the incoming request headers,
28 * or some selected other attributes of the request (e.g., the remote host
29 * name).
30 *
31 * Usage:
32 *
33 *   SetEnvIf name regex var ...
34 *
35 * where name is either a HTTP request header name, or one of the
36 * special values (see below). 'name' may be a regex when it is used
37 * to specify an HTTP request header name. The 'value' of the header
38 & (or the value of the special value from below) are compared against
39 * the regex argument. If this is a simple string, a simple sub-string
40 * match is performed. Otherwise, a request expression match is
41 * done. If the value matches the string or regular expression, the
42 * environment variables listed as var ... are set. Each var can
43 * be in one of three formats: var, which sets the named variable
44 * (the value value "1"); var=value, which sets the variable to
45 * the given value; or !var, which unsets the variable is it has
46 * been previously set.
47 *
48 * Normally the strings are compared with regard to case. To ignore
49 * case, use the directive SetEnvIfNoCase instead.
50 *
51 * Special values for 'name' are:
52 *
53 *   server_addr        IP address of interface on which request arrived
54 *                      (analogous to SERVER_ADDR set in ap_add_common_vars())
55 *   remote_host        Remote host name (if available)
56 *   remote_addr        Remote IP address
57 *   request_method     Request method (GET, POST, etc)
58 *   request_uri        Requested URI
59 *
60 * Examples:
61 *
62 * To set the environment variable LOCALHOST if the client is the local
63 * machine:
64 *
65 *    SetEnvIf remote_addr 127.0.0.1 LOCALHOST
66 *
67 * To set LOCAL if the client is the local host, or within our company's
68 * domain (192.168.10):
69 *
70 *    SetEnvIf remote_addr 192.168.10. LOCAL
71 *    SetEnvIf remote_addr 127.0.0.1   LOCALHOST
72 *
73 * This could be written as:
74 *
75 *    SetEnvIf remote_addr (127.0.0.1|192.168.10.) LOCAL
76 *
77 * To set HAVE_TS if the client request contains any header beginning
78 * with "TS" with a value beginning with a lower case alphabet:
79 *
80 *    SetEnvIf ^TS* ^[a-z].* HAVE_TS
81 */
82
83#include "apr.h"
84#include "apr_strings.h"
85#include "apr_strmatch.h"
86
87#define APR_WANT_STRFUNC
88#include "apr_want.h"
89
90#include "ap_config.h"
91#include "httpd.h"
92#include "http_config.h"
93#include "http_core.h"
94#include "http_log.h"
95#include "http_protocol.h"
96
97enum special {
98    SPECIAL_NOT,
99    SPECIAL_REMOTE_ADDR,
100    SPECIAL_REMOTE_HOST,
101    SPECIAL_REQUEST_URI,
102    SPECIAL_REQUEST_METHOD,
103    SPECIAL_REQUEST_PROTOCOL,
104    SPECIAL_SERVER_ADDR
105};
106typedef struct {
107    char *name;                 /* header name */
108    ap_regex_t *pnamereg;       /* compiled header name regex */
109    char *regex;                /* regex to match against */
110    ap_regex_t *preg;           /* compiled regex */
111    const apr_strmatch_pattern *pattern; /* non-regex pattern to match */
112    ap_expr_info_t *expr;       /* parsed expression */
113    apr_table_t *features;      /* env vars to set (or unset) */
114    enum special special_type;  /* is it a "special" header ? */
115    int icase;                  /* ignoring case? */
116} sei_entry;
117
118typedef struct {
119    apr_array_header_t *conditionals;
120} sei_cfg_rec;
121
122module AP_MODULE_DECLARE_DATA setenvif_module;
123
124/*
125 * These routines, the create- and merge-config functions, are called
126 * for both the server-wide and the per-directory contexts.  This is
127 * because the different definitions are used at different times; the
128 * server-wide ones are used in the post-read-request phase, and the
129 * per-directory ones are used during the header-parse phase (after
130 * the URI has been mapped to a file and we have anything from the
131 * .htaccess file and <Directory> and <Files> containers).
132 */
133static void *create_setenvif_config(apr_pool_t *p)
134{
135    sei_cfg_rec *new = (sei_cfg_rec *) apr_palloc(p, sizeof(sei_cfg_rec));
136
137    new->conditionals = apr_array_make(p, 20, sizeof(sei_entry));
138    return (void *) new;
139}
140
141static void *create_setenvif_config_svr(apr_pool_t *p, server_rec *dummy)
142{
143    return create_setenvif_config(p);
144}
145
146static void *create_setenvif_config_dir(apr_pool_t *p, char *dummy)
147{
148    return create_setenvif_config(p);
149}
150
151static void *merge_setenvif_config(apr_pool_t *p, void *basev, void *overridesv)
152{
153    sei_cfg_rec *a = apr_pcalloc(p, sizeof(sei_cfg_rec));
154    sei_cfg_rec *base = basev, *overrides = overridesv;
155
156    a->conditionals = apr_array_append(p, base->conditionals,
157                                       overrides->conditionals);
158    return a;
159}
160
161/*
162 * any non-NULL magic constant will do... used to indicate if AP_REG_ICASE should
163 * be used
164 */
165#define ICASE_MAGIC  ((void *)(&setenvif_module))
166#define SEI_MAGIC_HEIRLOOM "setenvif-phase-flag"
167
168static ap_regex_t *is_header_regex_regex;
169
170static int is_header_regex(apr_pool_t *p, const char* name)
171{
172    /* If a Header name contains characters other than:
173     *    -,_,[A-Z\, [a-z] and [0-9].
174     * assume the header name is a regular expression.
175     */
176    if (ap_regexec(is_header_regex_regex, name, 0, NULL, 0)) {
177        return 1;
178    }
179
180    return 0;
181}
182
183/* If the input string does not take advantage of regular
184 * expression metacharacters, return a pointer to an equivalent
185 * string that can be searched using apr_strmatch().  (The
186 * returned string will often be the input string.  But if
187 * the input string contains escaped characters, the returned
188 * string will be a copy with the escapes removed.)
189 */
190static const char *non_regex_pattern(apr_pool_t *p, const char *s)
191{
192    const char *src = s;
193    int escapes_found = 0;
194    int in_escape = 0;
195
196    while (*src) {
197        switch (*src) {
198        case '^':
199        case '.':
200        case '$':
201        case '|':
202        case '(':
203        case ')':
204        case '[':
205        case ']':
206        case '*':
207        case '+':
208        case '?':
209        case '{':
210        case '}':
211            if (!in_escape) {
212                return NULL;
213            }
214            in_escape = 0;
215            break;
216        case '\\':
217            if (!in_escape) {
218                in_escape = 1;
219                escapes_found = 1;
220            }
221            else {
222                in_escape = 0;
223            }
224            break;
225        default:
226            if (in_escape) {
227                return NULL;
228            }
229            break;
230        }
231        src++;
232    }
233    if (!escapes_found) {
234        return s;
235    }
236    else {
237        char *unescaped = (char *)apr_palloc(p, src - s + 1);
238        char *dst = unescaped;
239        src = s;
240        do {
241            if (*src == '\\') {
242                src++;
243            }
244        } while ((*dst++ = *src++));
245        return unescaped;
246    }
247}
248
249static const char *add_envvars(cmd_parms *cmd, const char *args, sei_entry *new)
250{
251    const char *feature;
252    int beenhere = 0;
253    char *var;
254
255    for ( ; ; ) {
256        feature = ap_getword_conf(cmd->pool, &args);
257        if (!*feature) {
258            break;
259        }
260        beenhere++;
261
262        var = ap_getword(cmd->pool, &feature, '=');
263        if (*feature) {
264            apr_table_setn(new->features, var, feature);
265        }
266        else if (*var == '!') {
267            apr_table_setn(new->features, var + 1, "!");
268        }
269        else {
270            apr_table_setn(new->features, var, "1");
271        }
272    }
273
274    if (!beenhere) {
275        return apr_pstrcat(cmd->pool, "Missing envariable expression for ",
276                           cmd->cmd->name, NULL);
277    }
278
279    return NULL;
280}
281
282static const char *add_setenvif_core(cmd_parms *cmd, void *mconfig,
283                                     char *fname, const char *args)
284{
285    char *regex;
286    const char *simple_pattern;
287    sei_cfg_rec *sconf;
288    sei_entry *new;
289    sei_entry *entries;
290    int i;
291    int icase;
292
293    /*
294     * Determine from our context into which record to put the entry.
295     * cmd->path == NULL means we're in server-wide context; otherwise,
296     * we're dealing with a per-directory setting.
297     */
298    sconf = (cmd->path != NULL)
299      ? (sei_cfg_rec *) mconfig
300      : (sei_cfg_rec *) ap_get_module_config(cmd->server->module_config,
301                                               &setenvif_module);
302    entries = (sei_entry *) sconf->conditionals->elts;
303    /* get regex */
304    regex = ap_getword_conf(cmd->pool, &args);
305    if (!*regex) {
306        return apr_pstrcat(cmd->pool, "Missing regular expression for ",
307                           cmd->cmd->name, NULL);
308    }
309
310    /*
311     * If we've already got a sei_entry with the same name we want to
312     * just copy the name pointer... so that later on we can compare
313     * two header names just by comparing the pointers.
314     */
315    for (i = 0; i < sconf->conditionals->nelts; ++i) {
316        new = &entries[i];
317        if (new->name && !strcasecmp(new->name, fname)) {
318            fname = new->name;
319            break;
320        }
321    }
322
323    /* if the last entry has an identical headername and regex then
324     * merge with it
325     */
326    i = sconf->conditionals->nelts - 1;
327    icase = cmd->info == ICASE_MAGIC;
328    if (i < 0
329        || entries[i].name != fname
330        || entries[i].icase != icase
331        || strcmp(entries[i].regex, regex)) {
332
333        /* no match, create a new entry */
334        new = apr_array_push(sconf->conditionals);
335        new->name = fname;
336        new->regex = regex;
337        new->icase = icase;
338        if ((simple_pattern = non_regex_pattern(cmd->pool, regex))) {
339            new->pattern = apr_strmatch_precompile(cmd->pool,
340                                                   simple_pattern, !icase);
341            if (new->pattern == NULL) {
342                return apr_pstrcat(cmd->pool, cmd->cmd->name,
343                                   " pattern could not be compiled.", NULL);
344            }
345            new->preg = NULL;
346        }
347        else {
348            new->preg = ap_pregcomp(cmd->pool, regex,
349                                    (AP_REG_EXTENDED | (icase ? AP_REG_ICASE : 0)));
350            if (new->preg == NULL) {
351                return apr_pstrcat(cmd->pool, cmd->cmd->name,
352                                   " regex could not be compiled.", NULL);
353            }
354            new->pattern = NULL;
355        }
356        new->features = apr_table_make(cmd->pool, 2);
357
358        if (!strcasecmp(fname, "remote_addr")) {
359            new->special_type = SPECIAL_REMOTE_ADDR;
360        }
361        else if (!strcasecmp(fname, "remote_host")) {
362            new->special_type = SPECIAL_REMOTE_HOST;
363        }
364        else if (!strcasecmp(fname, "request_uri")) {
365            new->special_type = SPECIAL_REQUEST_URI;
366        }
367        else if (!strcasecmp(fname, "request_method")) {
368            new->special_type = SPECIAL_REQUEST_METHOD;
369        }
370        else if (!strcasecmp(fname, "request_protocol")) {
371            new->special_type = SPECIAL_REQUEST_PROTOCOL;
372        }
373        else if (!strcasecmp(fname, "server_addr")) {
374            new->special_type = SPECIAL_SERVER_ADDR;
375        }
376        else {
377            new->special_type = SPECIAL_NOT;
378            /* Handle fname as a regular expression.
379             * If fname a simple header string, identify as such
380             * (new->pnamereg = NULL) to avoid the overhead of searching
381             * through headers_in for a regex match.
382             */
383            if (is_header_regex(cmd->temp_pool, fname)) {
384                new->pnamereg = ap_pregcomp(cmd->pool, fname,
385                                            (AP_REG_EXTENDED | AP_REG_NOSUB
386                                             | (icase ? AP_REG_ICASE : 0)));
387                if (new->pnamereg == NULL)
388                    return apr_pstrcat(cmd->pool, cmd->cmd->name,
389                                       "Header name regex could not be "
390                                       "compiled.", NULL);
391            }
392            else {
393                new->pnamereg = NULL;
394            }
395        }
396    }
397    else {
398        new = &entries[i];
399    }
400
401    return add_envvars(cmd, args, new);
402}
403
404static const char *add_setenvif(cmd_parms *cmd, void *mconfig,
405                                const char *args)
406{
407    char *fname;
408
409    /* get header name */
410    fname = ap_getword_conf(cmd->pool, &args);
411    if (!*fname) {
412        return apr_pstrcat(cmd->pool, "Missing header-field name for ",
413                           cmd->cmd->name, NULL);
414    }
415    return add_setenvif_core(cmd, mconfig, fname, args);
416}
417
418static const char *add_setenvifexpr(cmd_parms *cmd, void *mconfig,
419                                    const char *args)
420{
421    char *expr;
422    sei_cfg_rec *sconf;
423    sei_entry *new;
424    const char *err;
425
426    /*
427     * Determine from our context into which record to put the entry.
428     * cmd->path == NULL means we're in server-wide context; otherwise,
429     * we're dealing with a per-directory setting.
430     */
431    sconf = (cmd->path != NULL)
432      ? (sei_cfg_rec *) mconfig
433      : (sei_cfg_rec *) ap_get_module_config(cmd->server->module_config,
434                                               &setenvif_module);
435    /* get expr */
436    expr = ap_getword_conf(cmd->pool, &args);
437    if (!*expr) {
438        return apr_pstrcat(cmd->pool, "Missing expression for ",
439                           cmd->cmd->name, NULL);
440    }
441
442    new = apr_array_push(sconf->conditionals);
443    new->features = apr_table_make(cmd->pool, 2);
444    new->name = NULL;
445    new->regex = NULL;
446    new->pattern = NULL;
447    new->preg = NULL;
448    new->expr = ap_expr_parse_cmd(cmd, expr, 0, &err, NULL);
449    if (err)
450        return apr_psprintf(cmd->pool, "Could not parse expression \"%s\": %s",
451                            expr, err);
452
453    return add_envvars(cmd, args, new);
454}
455
456/*
457 * This routine handles the BrowserMatch* directives.  It simply turns around
458 * and feeds them, with the appropriate embellishments, to the general-purpose
459 * command handler.
460 */
461static const char *add_browser(cmd_parms *cmd, void *mconfig, const char *args)
462{
463    return add_setenvif_core(cmd, mconfig, "User-Agent", args);
464}
465
466static const command_rec setenvif_module_cmds[] =
467{
468    AP_INIT_RAW_ARGS("SetEnvIf", add_setenvif, NULL, OR_FILEINFO,
469                     "A header-name, regex and a list of variables."),
470    AP_INIT_RAW_ARGS("SetEnvIfNoCase", add_setenvif, ICASE_MAGIC, OR_FILEINFO,
471                     "a header-name, regex and a list of variables."),
472    AP_INIT_RAW_ARGS("SetEnvIfExpr", add_setenvifexpr, NULL, OR_FILEINFO,
473                     "an expression and a list of variables."),
474    AP_INIT_RAW_ARGS("BrowserMatch", add_browser, NULL, OR_FILEINFO,
475                     "A browser regex and a list of variables."),
476    AP_INIT_RAW_ARGS("BrowserMatchNoCase", add_browser, ICASE_MAGIC,
477                     OR_FILEINFO,
478                     "A browser regex and a list of variables."),
479    { NULL },
480};
481
482/*
483 * This routine gets called at two different points in request processing:
484 * once before the URI has been translated (during the post-read-request
485 * phase) and once after (during the header-parse phase).  We use different
486 * config records for the two different calls to reduce overhead (by not
487 * re-doing the server-wide settings during directory processing), and
488 * signal which call it is by having the earlier one pass a flag to the
489 * later one.
490 */
491static int match_headers(request_rec *r)
492{
493    sei_cfg_rec *sconf;
494    sei_entry *entries;
495    const apr_table_entry_t *elts;
496    const char *val, *err;
497    apr_size_t val_len = 0;
498    int i, j;
499    char *last_name;
500    ap_regmatch_t regm[AP_MAX_REG_MATCH];
501
502    if (!ap_get_module_config(r->request_config, &setenvif_module)) {
503        ap_set_module_config(r->request_config, &setenvif_module,
504                             SEI_MAGIC_HEIRLOOM);
505        sconf  = (sei_cfg_rec *) ap_get_module_config(r->server->module_config,
506                                                      &setenvif_module);
507    }
508    else {
509        sconf = (sei_cfg_rec *) ap_get_module_config(r->per_dir_config,
510                                                     &setenvif_module);
511    }
512    entries = (sei_entry *) sconf->conditionals->elts;
513    last_name = NULL;
514    val = NULL;
515    for (i = 0; i < sconf->conditionals->nelts; ++i) {
516        sei_entry *b = &entries[i];
517
518        if (!b->expr) {
519            /* Optimize the case where a bunch of directives in a row use the
520             * same header.  Remember we don't need to strcmp the two header
521             * names because we made sure the pointers were equal during
522             * configuration.
523             */
524            if (b->name != last_name) {
525                last_name = b->name;
526                switch (b->special_type) {
527                case SPECIAL_REMOTE_ADDR:
528                    val = r->useragent_ip;
529                    break;
530                case SPECIAL_SERVER_ADDR:
531                    val = r->connection->local_ip;
532                    break;
533                case SPECIAL_REMOTE_HOST:
534                    val =  ap_get_remote_host(r->connection, r->per_dir_config,
535                                              REMOTE_NAME, NULL);
536                    break;
537                case SPECIAL_REQUEST_URI:
538                    val = r->uri;
539                    break;
540                case SPECIAL_REQUEST_METHOD:
541                    val = r->method;
542                    break;
543                case SPECIAL_REQUEST_PROTOCOL:
544                    val = r->protocol;
545                    break;
546                case SPECIAL_NOT:
547                    if (b->pnamereg) {
548                        /* Matching headers_in against a regex. Iterate through
549                         * the headers_in until we find a match or run out of
550                         * headers.
551                         */
552                        const apr_array_header_t
553                            *arr = apr_table_elts(r->headers_in);
554
555                        elts = (const apr_table_entry_t *) arr->elts;
556                        val = NULL;
557                        for (j = 0; j < arr->nelts; ++j) {
558                            if (!ap_regexec(b->pnamereg, elts[j].key, 0, NULL, 0)) {
559                                val = elts[j].val;
560                            }
561                        }
562                    }
563                    else {
564                        /* Not matching against a regex */
565                        val = apr_table_get(r->headers_in, b->name);
566                        if (val == NULL) {
567                            val = apr_table_get(r->subprocess_env, b->name);
568                        }
569                    }
570                }
571                val_len = val ? strlen(val) : 0;
572            }
573
574        }
575
576        /*
577         * A NULL value indicates that the header field or special entity
578         * wasn't present or is undefined.  Represent that as an empty string
579         * so that REs like "^$" will work and allow envariable setting
580         * based on missing or empty field. This is also necessary to make
581         * ap_pregsub work after evaluating an ap_expr_t which does set the
582         * regexp backref data.
583         */
584        if (val == NULL) {
585            val = "";
586            val_len = 0;
587        }
588
589        if ((b->pattern && apr_strmatch(b->pattern, val, val_len)) ||
590            (b->preg && !ap_regexec(b->preg, val, AP_MAX_REG_MATCH, regm, 0)) ||
591            (b->expr && ap_expr_exec_re(r, b->expr, AP_MAX_REG_MATCH, regm, &val, &err) > 0))
592        {
593            const apr_array_header_t *arr = apr_table_elts(b->features);
594            elts = (const apr_table_entry_t *) arr->elts;
595
596            for (j = 0; j < arr->nelts; ++j) {
597                if (*(elts[j].val) == '!') {
598                    apr_table_unset(r->subprocess_env, elts[j].key);
599                }
600                else {
601                    if (!b->pattern) {
602                        char *replaced = ap_pregsub(r->pool, elts[j].val, val,
603                                                    AP_MAX_REG_MATCH, regm);
604                        if (replaced) {
605                            apr_table_setn(r->subprocess_env, elts[j].key,
606                                           replaced);
607                        }
608                        else {
609                            ap_log_rerror(APLOG_MARK, APLOG_CRIT, 0, r, APLOGNO(01505)
610                                          "Regular expression replacement "
611                                          "failed for '%s', value too long?",
612                                          elts[j].key);
613                            return HTTP_INTERNAL_SERVER_ERROR;
614                        }
615                    }
616                    else {
617                        apr_table_setn(r->subprocess_env, elts[j].key,
618                                       elts[j].val);
619                    }
620                }
621                ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, "Setting %s",
622                              elts[j].key);
623            }
624        }
625    }
626
627    return DECLINED;
628}
629
630static void register_hooks(apr_pool_t *p)
631{
632    ap_hook_header_parser(match_headers, NULL, NULL, APR_HOOK_MIDDLE);
633    ap_hook_post_read_request(match_headers, NULL, NULL, APR_HOOK_MIDDLE);
634
635    is_header_regex_regex = ap_pregcomp(p, "^[-A-Za-z0-9_]*$",
636                                        (AP_REG_EXTENDED | AP_REG_NOSUB ));
637    ap_assert(is_header_regex_regex != NULL);
638}
639
640AP_DECLARE_MODULE(setenvif) =
641{
642    STANDARD20_MODULE_STUFF,
643    create_setenvif_config_dir, /* dir config creater */
644    merge_setenvif_config,      /* dir merger --- default is to override */
645    create_setenvif_config_svr, /* server config */
646    merge_setenvif_config,      /* merge server configs */
647    setenvif_module_cmds,       /* command apr_table_t */
648    register_hooks              /* register hooks */
649};
650