1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22#include "setup.h"
23
24#ifndef CURL_DISABLE_HTTP
25/* -- WIN32 approved -- */
26#include <stdio.h>
27#include <string.h>
28#include <stdarg.h>
29#include <stdlib.h>
30#include <ctype.h>
31
32#include "urldata.h" /* it includes http_chunks.h */
33#include "sendf.h"   /* for the client write stuff */
34
35#include "content_encoding.h"
36#include "http.h"
37#include "curl_memory.h"
38#include "non-ascii.h" /* for Curl_convert_to_network prototype */
39
40#define _MPRINTF_REPLACE /* use our functions only */
41#include <curl/mprintf.h>
42
43/* The last #include file should be: */
44#include "memdebug.h"
45
46/*
47 * Chunk format (simplified):
48 *
49 * <HEX SIZE>[ chunk extension ] CRLF
50 * <DATA> CRLF
51 *
52 * Highlights from RFC2616 section 3.6 say:
53
54   The chunked encoding modifies the body of a message in order to
55   transfer it as a series of chunks, each with its own size indicator,
56   followed by an OPTIONAL trailer containing entity-header fields. This
57   allows dynamically produced content to be transferred along with the
58   information necessary for the recipient to verify that it has
59   received the full message.
60
61       Chunked-Body   = *chunk
62                        last-chunk
63                        trailer
64                        CRLF
65
66       chunk          = chunk-size [ chunk-extension ] CRLF
67                        chunk-data CRLF
68       chunk-size     = 1*HEX
69       last-chunk     = 1*("0") [ chunk-extension ] CRLF
70
71       chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
72       chunk-ext-name = token
73       chunk-ext-val  = token | quoted-string
74       chunk-data     = chunk-size(OCTET)
75       trailer        = *(entity-header CRLF)
76
77   The chunk-size field is a string of hex digits indicating the size of
78   the chunk. The chunked encoding is ended by any chunk whose size is
79   zero, followed by the trailer, which is terminated by an empty line.
80
81 */
82
83/* Check for an ASCII hex digit.
84 We avoid the use of isxdigit to accommodate non-ASCII hosts. */
85static bool Curl_isxdigit(char digit)
86{
87  return (bool)( (digit >= 0x30 && digit <= 0x39)    /* 0-9 */
88              || (digit >= 0x41 && digit <= 0x46)    /* A-F */
89              || (digit >= 0x61 && digit <= 0x66) ); /* a-f */
90}
91
92void Curl_httpchunk_init(struct connectdata *conn)
93{
94  struct Curl_chunker *chunk = &conn->chunk;
95  chunk->hexindex=0; /* start at 0 */
96  chunk->dataleft=0; /* no data left yet! */
97  chunk->state = CHUNK_HEX; /* we get hex first! */
98}
99
100/*
101 * chunk_read() returns a OK for normal operations, or a positive return code
102 * for errors. STOP means this sequence of chunks is complete.  The 'wrote'
103 * argument is set to tell the caller how many bytes we actually passed to the
104 * client (for byte-counting and whatever).
105 *
106 * The states and the state-machine is further explained in the header file.
107 *
108 * This function always uses ASCII hex values to accommodate non-ASCII hosts.
109 * For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
110 */
111CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
112                              char *datap,
113                              ssize_t datalen,
114                              ssize_t *wrotep)
115{
116  CURLcode result=CURLE_OK;
117  struct SessionHandle *data = conn->data;
118  struct Curl_chunker *ch = &conn->chunk;
119  struct SingleRequest *k = &data->req;
120  size_t piece;
121  size_t length = (size_t)datalen;
122  size_t *wrote = (size_t *)wrotep;
123
124  *wrote = 0; /* nothing's written yet */
125
126  /* the original data is written to the client, but we go on with the
127     chunk read process, to properly calculate the content length*/
128  if(data->set.http_te_skip && !k->ignorebody) {
129    result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen);
130    if(result)
131      return CHUNKE_WRITE_ERROR;
132  }
133
134  while(length) {
135    switch(ch->state) {
136    case CHUNK_HEX:
137      if(Curl_isxdigit(*datap)) {
138        if(ch->hexindex < MAXNUM_SIZE) {
139          ch->hexbuffer[ch->hexindex] = *datap;
140          datap++;
141          length--;
142          ch->hexindex++;
143        }
144        else {
145          return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
146        }
147      }
148      else {
149        if(0 == ch->hexindex) {
150          /* This is illegal data, we received junk where we expected
151             a hexadecimal digit. */
152          return CHUNKE_ILLEGAL_HEX;
153        }
154        /* length and datap are unmodified */
155        ch->hexbuffer[ch->hexindex]=0;
156
157        /* convert to host encoding before calling strtoul */
158        result = Curl_convert_from_network(conn->data, ch->hexbuffer,
159                                           ch->hexindex);
160        if(result) {
161          /* Curl_convert_from_network calls failf if unsuccessful */
162          /* Treat it as a bad hex character */
163          return(CHUNKE_ILLEGAL_HEX);
164        }
165
166        ch->datasize=strtoul(ch->hexbuffer, NULL, 16);
167        ch->state = CHUNK_POSTHEX;
168      }
169      break;
170
171    case CHUNK_POSTHEX:
172      /* In this state, we're waiting for CRLF to arrive. We support
173         this to allow so called chunk-extensions to show up here
174         before the CRLF comes. */
175      if(*datap == 0x0d)
176        ch->state = CHUNK_CR;
177      length--;
178      datap++;
179      break;
180
181    case CHUNK_CR:
182      /* waiting for the LF */
183      if(*datap == 0x0a) {
184        /* we're now expecting data to come, unless size was zero! */
185        if(0 == ch->datasize) {
186          ch->state = CHUNK_TRAILER; /* now check for trailers */
187          conn->trlPos=0;
188        }
189        else {
190          ch->state = CHUNK_DATA;
191        }
192      }
193      else
194        /* previously we got a fake CR, go back to CR waiting! */
195        ch->state = CHUNK_CR;
196      datap++;
197      length--;
198      break;
199
200    case CHUNK_DATA:
201      /* we get pure and fine data
202
203         We expect another 'datasize' of data. We have 'length' right now,
204         it can be more or less than 'datasize'. Get the smallest piece.
205      */
206      piece = (ch->datasize >= length)?length:ch->datasize;
207
208      /* Write the data portion available */
209#ifdef HAVE_LIBZ
210      switch (conn->data->set.http_ce_skip?
211              IDENTITY : data->req.auto_decoding) {
212      case IDENTITY:
213#endif
214        if(!k->ignorebody) {
215          if(!data->set.http_te_skip)
216            result = Curl_client_write(conn, CLIENTWRITE_BODY, datap,
217                                       piece);
218          else
219            result = CURLE_OK;
220        }
221#ifdef HAVE_LIBZ
222        break;
223
224      case DEFLATE:
225        /* update data->req.keep.str to point to the chunk data. */
226        data->req.str = datap;
227        result = Curl_unencode_deflate_write(conn, &data->req,
228                                             (ssize_t)piece);
229        break;
230
231      case GZIP:
232        /* update data->req.keep.str to point to the chunk data. */
233        data->req.str = datap;
234        result = Curl_unencode_gzip_write(conn, &data->req,
235                                          (ssize_t)piece);
236        break;
237
238      case COMPRESS:
239      default:
240        failf (conn->data,
241               "Unrecognized content encoding type. "
242               "libcurl understands `identity', `deflate' and `gzip' "
243               "content encodings.");
244        return CHUNKE_BAD_ENCODING;
245      }
246#endif
247
248      if(result)
249        return CHUNKE_WRITE_ERROR;
250
251      *wrote += piece;
252
253      ch->datasize -= piece; /* decrease amount left to expect */
254      datap += piece;    /* move read pointer forward */
255      length -= piece;   /* decrease space left in this round */
256
257      if(0 == ch->datasize)
258        /* end of data this round, we now expect a trailing CRLF */
259        ch->state = CHUNK_POSTCR;
260      break;
261
262    case CHUNK_POSTCR:
263      if(*datap == 0x0d) {
264        ch->state = CHUNK_POSTLF;
265        datap++;
266        length--;
267      }
268      else
269        return CHUNKE_BAD_CHUNK;
270
271      break;
272
273    case CHUNK_POSTLF:
274      if(*datap == 0x0a) {
275        /*
276         * The last one before we go back to hex state and start all
277         * over.
278         */
279        Curl_httpchunk_init(conn);
280        datap++;
281        length--;
282      }
283      else
284        return CHUNKE_BAD_CHUNK;
285
286      break;
287
288    case CHUNK_TRAILER:
289      if(*datap == 0x0d) {
290        /* this is the end of a trailer, but if the trailer was zero bytes
291           there was no trailer and we move on */
292
293        if(conn->trlPos) {
294          /* we allocate trailer with 3 bytes extra room to fit this */
295          conn->trailer[conn->trlPos++]=0x0d;
296          conn->trailer[conn->trlPos++]=0x0a;
297          conn->trailer[conn->trlPos]=0;
298
299          /* Convert to host encoding before calling Curl_client_write */
300          result = Curl_convert_from_network(conn->data, conn->trailer,
301                                             conn->trlPos);
302          if(result)
303            /* Curl_convert_from_network calls failf if unsuccessful */
304            /* Treat it as a bad chunk */
305            return CHUNKE_BAD_CHUNK;
306
307          if(!data->set.http_te_skip) {
308            result = Curl_client_write(conn, CLIENTWRITE_HEADER,
309                                       conn->trailer, conn->trlPos);
310            if(result)
311              return CHUNKE_WRITE_ERROR;
312          }
313          conn->trlPos=0;
314          ch->state = CHUNK_TRAILER_CR;
315        }
316        else {
317          /* no trailer, we're on the final CRLF pair */
318          ch->state = CHUNK_TRAILER_POSTCR;
319          break; /* don't advance the pointer */
320        }
321      }
322      else {
323        /* conn->trailer is assumed to be freed in url.c on a
324           connection basis */
325        if(conn->trlPos >= conn->trlMax) {
326          /* we always allocate three extra bytes, just because when the full
327             header has been received we append CRLF\0 */
328          char *ptr;
329          if(conn->trlMax) {
330            conn->trlMax *= 2;
331            ptr = realloc(conn->trailer, conn->trlMax + 3);
332          }
333          else {
334            conn->trlMax=128;
335            ptr = malloc(conn->trlMax + 3);
336          }
337          if(!ptr)
338            return CHUNKE_OUT_OF_MEMORY;
339          conn->trailer = ptr;
340        }
341        conn->trailer[conn->trlPos++]=*datap;
342      }
343      datap++;
344      length--;
345      break;
346
347    case CHUNK_TRAILER_CR:
348      if(*datap == 0x0a) {
349        ch->state = CHUNK_TRAILER_POSTCR;
350        datap++;
351        length--;
352      }
353      else
354        return CHUNKE_BAD_CHUNK;
355      break;
356
357    case CHUNK_TRAILER_POSTCR:
358      /* We enter this state when a CR should arrive so we expect to
359         have to first pass a CR before we wait for LF */
360      if(*datap != 0x0d) {
361        /* not a CR then it must be another header in the trailer */
362        ch->state = CHUNK_TRAILER;
363        break;
364      }
365      datap++;
366      length--;
367      /* now wait for the final LF */
368      ch->state = CHUNK_STOP;
369      break;
370
371    case CHUNK_STOPCR:
372      /* Read the final CRLF that ends all chunk bodies */
373
374      if(*datap == 0x0d) {
375        ch->state = CHUNK_STOP;
376        datap++;
377        length--;
378      }
379      else
380        return CHUNKE_BAD_CHUNK;
381      break;
382
383    case CHUNK_STOP:
384      if(*datap == 0x0a) {
385        length--;
386
387        /* Record the length of any data left in the end of the buffer
388           even if there's no more chunks to read */
389
390        ch->dataleft = length;
391        return CHUNKE_STOP; /* return stop */
392      }
393      else
394        return CHUNKE_BAD_CHUNK;
395
396    default:
397      return CHUNKE_STATE_ERROR;
398    }
399  }
400  return CHUNKE_OK;
401}
402#endif /* CURL_DISABLE_HTTP */
403