1/*
2 * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
3 * Use is subject to license terms.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *  http://www.apache.org/licenses/LICENSE-2.0.
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13 * or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "httpd.h"
19#include "http_config.h"
20#include "http_log.h"
21#include "apr_strings.h"
22#include "apr_general.h"
23#include "util_filter.h"
24#include "apr_buckets.h"
25#include "http_request.h"
26#include "libsed.h"
27
28static const char *sed_filter_name = "Sed";
29#define MODSED_OUTBUF_SIZE 8000
30#define MAX_TRANSIENT_BUCKETS 50
31
32typedef struct sed_expr_config
33{
34    sed_commands_t *sed_cmds;
35    const char *last_error;
36} sed_expr_config;
37
38typedef struct sed_config
39{
40    sed_expr_config output;
41    sed_expr_config input;
42} sed_config;
43
44/* Context for filter invocation for single HTTP request */
45typedef struct sed_filter_ctxt
46{
47    sed_eval_t eval;
48    ap_filter_t *f;
49    request_rec *r;
50    apr_bucket_brigade *bb;
51    apr_bucket_brigade *bbinp;
52    char *outbuf;
53    char *curoutbuf;
54    int bufsize;
55    apr_pool_t *tpool;
56    int numbuckets;
57} sed_filter_ctxt;
58
59module AP_MODULE_DECLARE_DATA sed_module;
60
61/* This function will be call back from libsed functions if there is any error
62 * happend during execution of sed scripts
63 */
64static apr_status_t log_sed_errf(void *data, const char *error)
65{
66    request_rec *r = (request_rec *) data;
67    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "%s", error);
68    return APR_SUCCESS;
69}
70
71/* This function will be call back from libsed functions if there is any
72 * compilation error.
73 */
74static apr_status_t sed_compile_errf(void *data, const char *error)
75{
76    sed_expr_config *sed_cfg = (sed_expr_config *) data;
77    sed_cfg->last_error = error;
78    return APR_SUCCESS;
79}
80
81/* clear the temporary pool (used for transient buckets)
82 */
83static void clear_ctxpool(sed_filter_ctxt* ctx)
84{
85    apr_pool_clear(ctx->tpool);
86    ctx->outbuf = NULL;
87    ctx->curoutbuf = NULL;
88    ctx->numbuckets = 0;
89}
90
91/* alloc_outbuf
92 * allocate output buffer
93 */
94static void alloc_outbuf(sed_filter_ctxt* ctx)
95{
96    ctx->outbuf = apr_palloc(ctx->tpool, ctx->bufsize + 1);
97    ctx->curoutbuf = ctx->outbuf;
98}
99
100/* append_bucket
101 * Allocate a new bucket from buf and sz and append to ctx->bb
102 */
103static apr_status_t append_bucket(sed_filter_ctxt* ctx, char* buf, int sz)
104{
105    apr_status_t status = APR_SUCCESS;
106    apr_bucket *b;
107    if (ctx->tpool == ctx->r->pool) {
108        /* We are not using transient bucket */
109        b = apr_bucket_pool_create(buf, sz, ctx->r->pool,
110                                   ctx->r->connection->bucket_alloc);
111        APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
112    }
113    else {
114        /* We are using transient bucket */
115        b = apr_bucket_transient_create(buf, sz,
116                                        ctx->r->connection->bucket_alloc);
117        APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
118        ctx->numbuckets++;
119        if (ctx->numbuckets >= MAX_TRANSIENT_BUCKETS) {
120            b = apr_bucket_flush_create(ctx->r->connection->bucket_alloc);
121            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
122            status = ap_pass_brigade(ctx->f->next, ctx->bb);
123            apr_brigade_cleanup(ctx->bb);
124            clear_ctxpool(ctx);
125        }
126    }
127    return status;
128}
129
130/*
131 * flush_output_buffer
132 * Flush the  output data (stored in ctx->outbuf)
133 */
134static apr_status_t flush_output_buffer(sed_filter_ctxt *ctx)
135{
136    int size = ctx->curoutbuf - ctx->outbuf;
137    char *out;
138    apr_status_t status = APR_SUCCESS;
139    if ((ctx->outbuf == NULL) || (size <=0))
140        return status;
141    out = apr_pmemdup(ctx->tpool, ctx->outbuf, size);
142    status = append_bucket(ctx, out, size);
143    ctx->curoutbuf = ctx->outbuf;
144    return status;
145}
146
147/* This is a call back function. When libsed wants to generate the output,
148 * this function will be invoked.
149 */
150static apr_status_t sed_write_output(void *dummy, char *buf, int sz)
151{
152    /* dummy is basically filter context. Context is passed during invocation
153     * of sed_eval_buffer
154     */
155    int remainbytes = 0;
156    apr_status_t status = APR_SUCCESS;
157    sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
158    if (ctx->outbuf == NULL) {
159        alloc_outbuf(ctx);
160    }
161    remainbytes = ctx->bufsize - (ctx->curoutbuf - ctx->outbuf);
162    if (sz >= remainbytes) {
163        if (remainbytes > 0) {
164            memcpy(ctx->curoutbuf, buf, remainbytes);
165            buf += remainbytes;
166            sz -= remainbytes;
167            ctx->curoutbuf += remainbytes;
168        }
169        /* buffer is now full */
170        status = append_bucket(ctx, ctx->outbuf, ctx->bufsize);
171        /* old buffer is now used so allocate new buffer */
172        alloc_outbuf(ctx);
173        /* if size is bigger than the allocated buffer directly add to output
174         * brigade */
175        if ((status == APR_SUCCESS) && (sz >= ctx->bufsize)) {
176            char* newbuf = apr_pmemdup(ctx->tpool, buf, sz);
177            status = append_bucket(ctx, newbuf, sz);
178            /* pool might get clear after append_bucket */
179            if (ctx->outbuf == NULL) {
180                alloc_outbuf(ctx);
181            }
182        }
183        else {
184            memcpy(ctx->curoutbuf, buf, sz);
185            ctx->curoutbuf += sz;
186        }
187    }
188    else {
189        memcpy(ctx->curoutbuf, buf, sz);
190        ctx->curoutbuf += sz;
191    }
192    return status;
193}
194
195/* Compile a sed expression. Compiled context is saved in sed_cfg->sed_cmds.
196 * Memory required for compilation context is allocated from cmd->pool.
197 */
198static apr_status_t compile_sed_expr(sed_expr_config *sed_cfg,
199                                     cmd_parms *cmd,
200                                     const char *expr)
201{
202    apr_status_t status = APR_SUCCESS;
203
204    if (!sed_cfg->sed_cmds) {
205        sed_commands_t *sed_cmds;
206        sed_cmds = apr_pcalloc(cmd->pool, sizeof(sed_commands_t));
207        status = sed_init_commands(sed_cmds, sed_compile_errf, sed_cfg,
208                                   cmd->pool);
209        if (status != APR_SUCCESS) {
210            sed_destroy_commands(sed_cmds);
211            return status;
212        }
213        sed_cfg->sed_cmds = sed_cmds;
214    }
215    status = sed_compile_string(sed_cfg->sed_cmds, expr);
216    if (status != APR_SUCCESS) {
217        sed_destroy_commands(sed_cfg->sed_cmds);
218        sed_cfg->sed_cmds = NULL;
219    }
220    return status;
221}
222
223/* sed eval cleanup function */
224static apr_status_t sed_eval_cleanup(void *data)
225{
226    sed_eval_t *eval = (sed_eval_t *) data;
227    sed_destroy_eval(eval);
228    return APR_SUCCESS;
229}
230
231/* Initialize sed filter context. If successful then context is set in f->ctx
232 */
233static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg, int usetpool)
234{
235    apr_status_t status;
236    sed_filter_ctxt* ctx;
237    request_rec *r = f->r;
238    /* Create the context. Call sed_init_eval. libsed will generated
239     * output by calling sed_write_output and generates any error by
240     * invoking log_sed_errf.
241     */
242    ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
243    ctx->r = r;
244    ctx->bb = NULL;
245    ctx->numbuckets = 0;
246    ctx->f = f;
247    status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
248                           r, &sed_write_output, r->pool);
249    if (status != APR_SUCCESS) {
250        return status;
251    }
252    apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
253                              apr_pool_cleanup_null);
254    ctx->bufsize = MODSED_OUTBUF_SIZE;
255    if (usetpool) {
256        apr_pool_create(&(ctx->tpool), r->pool);
257    }
258    else {
259        ctx->tpool = r->pool;
260    }
261    alloc_outbuf(ctx);
262    f->ctx = ctx;
263    return APR_SUCCESS;
264}
265
266/* Entry function for Sed output filter */
267static apr_status_t sed_response_filter(ap_filter_t *f,
268                                        apr_bucket_brigade *bb)
269{
270    apr_bucket *b;
271    apr_status_t status;
272    sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
273                                           &sed_module);
274    sed_filter_ctxt *ctx = f->ctx;
275    sed_expr_config *sed_cfg = &cfg->output;
276
277    if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
278        /* No sed expressions */
279        ap_remove_output_filter(f);
280        return ap_pass_brigade(f->next, bb);
281    }
282
283    if (ctx == NULL) {
284
285        if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) {
286            /* no need to run sed filter for Head requests */
287            ap_remove_output_filter(f);
288            return ap_pass_brigade(f->next, bb);
289        }
290
291        status = init_context(f, sed_cfg, 1);
292        if (status != APR_SUCCESS)
293             return status;
294        ctx = f->ctx;
295        apr_table_unset(f->r->headers_out, "Content-Length");
296    }
297
298    ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
299
300    /* Here is the main logic. Iterate through all the buckets, read the
301     * content of the bucket, call sed_eval_buffer on the data.
302     * sed_eval_buffer will read the data line by line, run filters on each
303     * line. sed_eval_buffer will generates the output by calling
304     * sed_write_output which will add the output to ctx->bb. At the end of
305     * the loop, ctx->bb is passed to the next filter in chain. At the end of
306     * the data, if new line is not found then sed_eval_buffer will store the
307     * data in its own buffer.
308     *
309     * Once eos bucket is found then sed_finalize_eval will flush the rest of
310     * the data. If there is no new line in last line of data, new line is
311     * appended (that is a solaris sed behavior). libsed's internal memory for
312     * evaluation is allocated on request's pool so it will be cleared once
313     * request is over.
314     *
315     * If flush bucket is found then append the the flush bucket to ctx->bb
316     * and pass it to next filter. There may be some data which will still be
317     * in sed's internal buffer which can't be flushed until new line
318     * character is arrived.
319     */
320    for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb);) {
321        const char *buf = NULL;
322        apr_size_t bytes = 0;
323        if (APR_BUCKET_IS_EOS(b)) {
324            apr_bucket *b1 = APR_BUCKET_NEXT(b);
325            /* Now clean up the internal sed buffer */
326            sed_finalize_eval(&ctx->eval, ctx);
327            status = flush_output_buffer(ctx);
328            if (status != APR_SUCCESS) {
329                clear_ctxpool(ctx);
330                return status;
331            }
332            APR_BUCKET_REMOVE(b);
333            /* Insert the eos bucket to ctx->bb brigade */
334            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
335            b = b1;
336        }
337        else if (APR_BUCKET_IS_FLUSH(b)) {
338            apr_bucket *b1 = APR_BUCKET_NEXT(b);
339            APR_BUCKET_REMOVE(b);
340            status = flush_output_buffer(ctx);
341            if (status != APR_SUCCESS) {
342                clear_ctxpool(ctx);
343                return status;
344            }
345            APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
346            b = b1;
347        }
348        else if (APR_BUCKET_IS_METADATA(b)) {
349            b = APR_BUCKET_NEXT(b);
350        }
351        else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
352                 == APR_SUCCESS) {
353            apr_bucket *b1 = APR_BUCKET_NEXT(b);
354            status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
355            if (status != APR_SUCCESS) {
356                clear_ctxpool(ctx);
357                return status;
358            }
359            APR_BUCKET_REMOVE(b);
360            apr_bucket_delete(b);
361            b = b1;
362        }
363        else {
364            apr_bucket *b1 = APR_BUCKET_NEXT(b);
365            APR_BUCKET_REMOVE(b);
366            b = b1;
367        }
368    }
369    apr_brigade_cleanup(bb);
370    status = flush_output_buffer(ctx);
371    if (status != APR_SUCCESS) {
372        clear_ctxpool(ctx);
373        return status;
374    }
375    if (!APR_BRIGADE_EMPTY(ctx->bb)) {
376        status = ap_pass_brigade(f->next, ctx->bb);
377        apr_brigade_cleanup(ctx->bb);
378    }
379    clear_ctxpool(ctx);
380    return status;
381}
382
383/* Entry function for Sed input filter */
384static apr_status_t sed_request_filter(ap_filter_t *f,
385                                       apr_bucket_brigade *bb,
386                                       ap_input_mode_t mode,
387                                       apr_read_type_e block,
388                                       apr_off_t readbytes)
389{
390    sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
391                                           &sed_module);
392    sed_filter_ctxt *ctx = f->ctx;
393    apr_status_t status;
394    apr_bucket_brigade *bbinp;
395    sed_expr_config *sed_cfg = &cfg->input;
396
397    if (mode != AP_MODE_READBYTES) {
398        return ap_get_brigade(f->next, bb, mode, block, readbytes);
399    }
400
401    if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
402        /* No sed expression */
403        return ap_get_brigade(f->next, bb, mode, block, readbytes);
404    }
405
406    if (!ctx) {
407        if (!ap_is_initial_req(f->r)) {
408            ap_remove_input_filter(f);
409            /* XXX : Should we filter the sub requests too */
410            return ap_get_brigade(f->next, bb, mode, block, readbytes);
411        }
412        status = init_context(f, sed_cfg, 0);
413        if (status != APR_SUCCESS)
414             return status;
415        ctx = f->ctx;
416        ctx->bb    = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
417        ctx->bbinp = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
418    }
419
420    bbinp = ctx->bbinp;
421
422    /* Here is the logic :
423     * Read the readbytes data from next level fiter into bbinp. Loop through
424     * the buckets in bbinp and read the data from buckets and invoke
425     * sed_eval_buffer on the data. libsed will generate its output using
426     * sed_write_output which will add data in ctx->bb. Do it until it have
427     * atleast one bucket bucket in ctx->bb. At the end of data eos bucket
428     * should be there.
429     *
430     * Once eos bucket is seen, then invoke sed_finalize_eval to clear the
431     * output. If the last byte of data is not a new line character then sed
432     * will add a new line to the data that is default sed behaviour. Note
433     * that using this filter with POST data, caller may not expect this
434     * behaviour.
435     *
436     * If next level fiter generate the flush bucket, we can't do much about
437     * it. If we want to return the flush bucket in brigade bb (to the caller)
438     * the question is where to add it?
439     */
440    while (APR_BRIGADE_EMPTY(ctx->bb)) {
441        apr_bucket *b;
442
443        /* read the bytes from next level filter */
444        apr_brigade_cleanup(bbinp);
445        status = ap_get_brigade(f->next, bbinp, mode, block, readbytes);
446        if (status != APR_SUCCESS) {
447            return status;
448        }
449        for (b = APR_BRIGADE_FIRST(bbinp); b != APR_BRIGADE_SENTINEL(bbinp);
450             b = APR_BUCKET_NEXT(b)) {
451            const char *buf = NULL;
452            apr_size_t bytes;
453
454            if (APR_BUCKET_IS_EOS(b)) {
455                /* eos bucket. Clear the internal sed buffers */
456                sed_finalize_eval(&ctx->eval, ctx);
457                flush_output_buffer(ctx);
458                APR_BUCKET_REMOVE(b);
459                APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
460                break;
461            }
462            else if (APR_BUCKET_IS_FLUSH(b)) {
463                /* What should we do with flush bucket */
464                continue;
465            }
466            if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
467                     == APR_SUCCESS) {
468                status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
469                if (status != APR_SUCCESS)
470                    return status;
471                flush_output_buffer(ctx);
472            }
473        }
474    }
475
476    if (!APR_BRIGADE_EMPTY(ctx->bb)) {
477        apr_bucket *b = NULL;
478
479        if (apr_brigade_partition(ctx->bb, readbytes, &b) == APR_INCOMPLETE) {
480            APR_BRIGADE_CONCAT(bb, ctx->bb);
481        }
482        else {
483            APR_BRIGADE_CONCAT(bb, ctx->bb);
484            apr_brigade_split_ex(bb, b, ctx->bb);
485        }
486    }
487    return APR_SUCCESS;
488}
489
490static const char *sed_add_expr(cmd_parms *cmd, void *cfg, const char *arg)
491{
492    int offset = (int) (long) cmd->info;
493    sed_expr_config *sed_cfg =
494                (sed_expr_config *) (((char *) cfg) + offset);
495    if (compile_sed_expr(sed_cfg, cmd, arg) != APR_SUCCESS) {
496        return apr_psprintf(cmd->temp_pool,
497                            "Failed to compile sed expression. %s",
498                            sed_cfg->last_error);
499    }
500    return NULL;
501}
502
503static void *create_sed_dir_config(apr_pool_t *p, char *s)
504{
505    sed_config *cfg = apr_pcalloc(p, sizeof(sed_config));
506    return cfg;
507}
508
509static const command_rec sed_filter_cmds[] = {
510    AP_INIT_TAKE1("OutputSed", sed_add_expr,
511                  (void *) APR_OFFSETOF(sed_config, output),
512                  ACCESS_CONF,
513                  "Sed regular expression for Response"),
514    AP_INIT_TAKE1("InputSed", sed_add_expr,
515                  (void *) APR_OFFSETOF(sed_config, input),
516                  ACCESS_CONF,
517                  "Sed regular expression for Request"),
518    {NULL}
519};
520
521static void register_hooks(apr_pool_t *p)
522{
523    ap_register_output_filter(sed_filter_name, sed_response_filter, NULL,
524                              AP_FTYPE_RESOURCE);
525    ap_register_input_filter(sed_filter_name, sed_request_filter, NULL,
526                             AP_FTYPE_RESOURCE);
527}
528
529AP_DECLARE_MODULE(sed) = {
530    STANDARD20_MODULE_STUFF,
531    create_sed_dir_config,      /* dir config creater */
532    NULL,                       /* dir merger --- default is to override */
533    NULL,                       /* server config */
534    NULL,                       /* merge server config */
535    sed_filter_cmds,            /* command table */
536    register_hooks              /* register hooks */
537};
538