1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23#include "curl_setup.h"
24
25#ifndef CURL_DISABLE_HTTP
26
27#include "urldata.h" /* it includes http_chunks.h */
28#include "sendf.h"   /* for the client write stuff */
29
30#include "content_encoding.h"
31#include "http.h"
32#include "curl_memory.h"
33#include "non-ascii.h" /* for Curl_convert_to_network prototype */
34#include "strtoofft.h"
35#include "warnless.h"
36
37#define _MPRINTF_REPLACE /* use our functions only */
38#include <curl/mprintf.h>
39
40/* The last #include file should be: */
41#include "memdebug.h"
42
43/*
44 * Chunk format (simplified):
45 *
46 * <HEX SIZE>[ chunk extension ] CRLF
47 * <DATA> CRLF
48 *
49 * Highlights from RFC2616 section 3.6 say:
50
51   The chunked encoding modifies the body of a message in order to
52   transfer it as a series of chunks, each with its own size indicator,
53   followed by an OPTIONAL trailer containing entity-header fields. This
54   allows dynamically produced content to be transferred along with the
55   information necessary for the recipient to verify that it has
56   received the full message.
57
58       Chunked-Body   = *chunk
59                        last-chunk
60                        trailer
61                        CRLF
62
63       chunk          = chunk-size [ chunk-extension ] CRLF
64                        chunk-data CRLF
65       chunk-size     = 1*HEX
66       last-chunk     = 1*("0") [ chunk-extension ] CRLF
67
68       chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
69       chunk-ext-name = token
70       chunk-ext-val  = token | quoted-string
71       chunk-data     = chunk-size(OCTET)
72       trailer        = *(entity-header CRLF)
73
74   The chunk-size field is a string of hex digits indicating the size of
75   the chunk. The chunked encoding is ended by any chunk whose size is
76   zero, followed by the trailer, which is terminated by an empty line.
77
78 */
79
80/* Check for an ASCII hex digit.
81 We avoid the use of isxdigit to accommodate non-ASCII hosts. */
82static bool Curl_isxdigit(char digit)
83{
84  return ( (digit >= 0x30 && digit <= 0x39) /* 0-9 */
85        || (digit >= 0x41 && digit <= 0x46) /* A-F */
86        || (digit >= 0x61 && digit <= 0x66) /* a-f */ ) ? TRUE : FALSE;
87}
88
89void Curl_httpchunk_init(struct connectdata *conn)
90{
91  struct Curl_chunker *chunk = &conn->chunk;
92  chunk->hexindex=0;        /* start at 0 */
93  chunk->dataleft=0;        /* no data left yet! */
94  chunk->state = CHUNK_HEX; /* we get hex first! */
95}
96
97/*
98 * chunk_read() returns a OK for normal operations, or a positive return code
99 * for errors. STOP means this sequence of chunks is complete.  The 'wrote'
100 * argument is set to tell the caller how many bytes we actually passed to the
101 * client (for byte-counting and whatever).
102 *
103 * The states and the state-machine is further explained in the header file.
104 *
105 * This function always uses ASCII hex values to accommodate non-ASCII hosts.
106 * For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
107 */
108CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
109                              char *datap,
110                              ssize_t datalen,
111                              ssize_t *wrotep)
112{
113  CURLcode result=CURLE_OK;
114  struct SessionHandle *data = conn->data;
115  struct Curl_chunker *ch = &conn->chunk;
116  struct SingleRequest *k = &data->req;
117  size_t piece;
118  curl_off_t length = (curl_off_t)datalen;
119  size_t *wrote = (size_t *)wrotep;
120
121  *wrote = 0; /* nothing's written yet */
122
123  /* the original data is written to the client, but we go on with the
124     chunk read process, to properly calculate the content length*/
125  if(data->set.http_te_skip && !k->ignorebody) {
126    result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen);
127    if(result)
128      return CHUNKE_WRITE_ERROR;
129  }
130
131  while(length) {
132    switch(ch->state) {
133    case CHUNK_HEX:
134      if(Curl_isxdigit(*datap)) {
135        if(ch->hexindex < MAXNUM_SIZE) {
136          ch->hexbuffer[ch->hexindex] = *datap;
137          datap++;
138          length--;
139          ch->hexindex++;
140        }
141        else {
142          return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
143        }
144      }
145      else {
146        char *endptr;
147        if(0 == ch->hexindex)
148          /* This is illegal data, we received junk where we expected
149             a hexadecimal digit. */
150          return CHUNKE_ILLEGAL_HEX;
151
152        /* length and datap are unmodified */
153        ch->hexbuffer[ch->hexindex]=0;
154
155        /* convert to host encoding before calling strtoul */
156        result = Curl_convert_from_network(conn->data, ch->hexbuffer,
157                                           ch->hexindex);
158        if(result) {
159          /* Curl_convert_from_network calls failf if unsuccessful */
160          /* Treat it as a bad hex character */
161          return CHUNKE_ILLEGAL_HEX ;
162        }
163
164        ch->datasize=curlx_strtoofft(ch->hexbuffer, &endptr, 16);
165        if((ch->datasize == CURL_OFF_T_MAX) && (errno == ERANGE))
166          /* overflow is an error */
167          return CHUNKE_ILLEGAL_HEX;
168        ch->state = CHUNK_LF; /* now wait for the CRLF */
169      }
170      break;
171
172    case CHUNK_LF:
173      /* waiting for the LF after a chunk size */
174      if(*datap == 0x0a) {
175        /* we're now expecting data to come, unless size was zero! */
176        if(0 == ch->datasize) {
177          ch->state = CHUNK_TRAILER; /* now check for trailers */
178          conn->trlPos=0;
179        }
180        else
181          ch->state = CHUNK_DATA;
182      }
183
184      datap++;
185      length--;
186      break;
187
188    case CHUNK_DATA:
189      /* We expect 'datasize' of data. We have 'length' right now, it can be
190         more or less than 'datasize'. Get the smallest piece.
191      */
192      piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
193
194      /* Write the data portion available */
195#ifdef HAVE_LIBZ
196      switch (conn->data->set.http_ce_skip?
197              IDENTITY : data->req.auto_decoding) {
198      case IDENTITY:
199#endif
200        if(!k->ignorebody) {
201          if(!data->set.http_te_skip)
202            result = Curl_client_write(conn, CLIENTWRITE_BODY, datap,
203                                       piece);
204          else
205            result = CURLE_OK;
206        }
207#ifdef HAVE_LIBZ
208        break;
209
210      case DEFLATE:
211        /* update data->req.keep.str to point to the chunk data. */
212        data->req.str = datap;
213        result = Curl_unencode_deflate_write(conn, &data->req,
214                                             (ssize_t)piece);
215        break;
216
217      case GZIP:
218        /* update data->req.keep.str to point to the chunk data. */
219        data->req.str = datap;
220        result = Curl_unencode_gzip_write(conn, &data->req,
221                                          (ssize_t)piece);
222        break;
223
224      case COMPRESS:
225      default:
226        failf (conn->data,
227               "Unrecognized content encoding type. "
228               "libcurl understands `identity', `deflate' and `gzip' "
229               "content encodings.");
230        return CHUNKE_BAD_ENCODING;
231      }
232#endif
233
234      if(result)
235        return CHUNKE_WRITE_ERROR;
236
237      *wrote += piece;
238
239      ch->datasize -= piece; /* decrease amount left to expect */
240      datap += piece;    /* move read pointer forward */
241      length -= piece;   /* decrease space left in this round */
242
243      if(0 == ch->datasize)
244        /* end of data this round, we now expect a trailing CRLF */
245        ch->state = CHUNK_POSTLF;
246      break;
247
248    case CHUNK_POSTLF:
249      if(*datap == 0x0a) {
250        /* The last one before we go back to hex state and start all over. */
251        Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
252      }
253      else if(*datap != 0x0d)
254        return CHUNKE_BAD_CHUNK;
255      datap++;
256      length--;
257      break;
258
259    case CHUNK_TRAILER:
260      if((*datap == 0x0d) || (*datap == 0x0a)) {
261        /* this is the end of a trailer, but if the trailer was zero bytes
262           there was no trailer and we move on */
263
264        if(conn->trlPos) {
265          /* we allocate trailer with 3 bytes extra room to fit this */
266          conn->trailer[conn->trlPos++]=0x0d;
267          conn->trailer[conn->trlPos++]=0x0a;
268          conn->trailer[conn->trlPos]=0;
269
270          /* Convert to host encoding before calling Curl_client_write */
271          result = Curl_convert_from_network(conn->data, conn->trailer,
272                                             conn->trlPos);
273          if(result)
274            /* Curl_convert_from_network calls failf if unsuccessful */
275            /* Treat it as a bad chunk */
276            return CHUNKE_BAD_CHUNK;
277
278          if(!data->set.http_te_skip) {
279            result = Curl_client_write(conn, CLIENTWRITE_HEADER,
280                                       conn->trailer, conn->trlPos);
281            if(result)
282              return CHUNKE_WRITE_ERROR;
283          }
284          conn->trlPos=0;
285          ch->state = CHUNK_TRAILER_CR;
286          if(*datap == 0x0a)
287            /* already on the LF */
288            break;
289        }
290        else {
291          /* no trailer, we're on the final CRLF pair */
292          ch->state = CHUNK_TRAILER_POSTCR;
293          break; /* don't advance the pointer */
294        }
295      }
296      else {
297        /* conn->trailer is assumed to be freed in url.c on a
298           connection basis */
299        if(conn->trlPos >= conn->trlMax) {
300          /* we always allocate three extra bytes, just because when the full
301             header has been received we append CRLF\0 */
302          char *ptr;
303          if(conn->trlMax) {
304            conn->trlMax *= 2;
305            ptr = realloc(conn->trailer, conn->trlMax + 3);
306          }
307          else {
308            conn->trlMax=128;
309            ptr = malloc(conn->trlMax + 3);
310          }
311          if(!ptr)
312            return CHUNKE_OUT_OF_MEMORY;
313          conn->trailer = ptr;
314        }
315        conn->trailer[conn->trlPos++]=*datap;
316      }
317      datap++;
318      length--;
319      break;
320
321    case CHUNK_TRAILER_CR:
322      if(*datap == 0x0a) {
323        ch->state = CHUNK_TRAILER_POSTCR;
324        datap++;
325        length--;
326      }
327      else
328        return CHUNKE_BAD_CHUNK;
329      break;
330
331    case CHUNK_TRAILER_POSTCR:
332      /* We enter this state when a CR should arrive so we expect to
333         have to first pass a CR before we wait for LF */
334      if((*datap != 0x0d) && (*datap != 0x0a)) {
335        /* not a CR then it must be another header in the trailer */
336        ch->state = CHUNK_TRAILER;
337        break;
338      }
339      if(*datap == 0x0d) {
340        /* skip if CR */
341        datap++;
342        length--;
343      }
344      /* now wait for the final LF */
345      ch->state = CHUNK_STOP;
346      break;
347
348    case CHUNK_STOP:
349      if(*datap == 0x0a) {
350        length--;
351
352        /* Record the length of any data left in the end of the buffer
353           even if there's no more chunks to read */
354        ch->dataleft = curlx_sotouz(length);
355
356        return CHUNKE_STOP; /* return stop */
357      }
358      else
359        return CHUNKE_BAD_CHUNK;
360    }
361  }
362  return CHUNKE_OK;
363}
364
365const char *Curl_chunked_strerror(CHUNKcode code)
366{
367  switch (code) {
368  default:
369    return "OK";
370  case CHUNKE_TOO_LONG_HEX:
371    return "Too long hexadecimal number";
372  case CHUNKE_ILLEGAL_HEX:
373    return "Illegal or missing hexadecimal sequence";
374  case CHUNKE_BAD_CHUNK:
375    return "Malformed encoding found";
376  case CHUNKE_WRITE_ERROR:
377    return "Write error";
378  case CHUNKE_BAD_ENCODING:
379    return "Bad content-encoding found";
380  case CHUNKE_OUT_OF_MEMORY:
381    return "Out of memory";
382  }
383}
384
385#endif /* CURL_DISABLE_HTTP */
386