1/*
2 * Copyright 2022 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Niels Sascha Reedijk, niels.reedijk@gmail.com
7 */
8
9#include "HttpParser.h"
10
11#include <stdexcept>
12#include <string>
13
14#include <HttpFields.h>
15#include <NetServicesDefs.h>
16#include <ZlibCompressionAlgorithm.h>
17
18using namespace std::literals;
19using namespace BPrivate::Network;
20
21
22// #pragma mark -- HttpParser
23
24
25/*!
26	\brief Explicitly mark the response as having no content.
27
28	This is done in cases where the request was a HEAD request. Setting it to no content, will
29	instruct the parser to move to completion after all the header fields have been parsed.
30*/
31void
32HttpParser::SetNoContent() noexcept
33{
34	if (fStreamState > HttpInputStreamState::Fields)
35		debugger("Cannot set the parser to no content after parsing of the body has started");
36	fBodyType = HttpBodyType::NoContent;
37};
38
39
40/*!
41	\brief Parse the status from the \a buffer and store it in \a status.
42
43	\retval true The status was succesfully parsed
44	\retval false There is not enough data in the buffer for a full status.
45
46	\exception BNetworkRequestException The status does not conform to the HTTP spec.
47*/
48bool
49HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status)
50{
51	if (fStreamState != HttpInputStreamState::StatusLine)
52		debugger("The Status line has already been parsed");
53
54	auto statusLine = buffer.GetNextLine();
55	if (!statusLine)
56		return false;
57
58	auto codeStart = statusLine->FindFirst(' ') + 1;
59	if (codeStart < 0)
60		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
61
62	auto codeEnd = statusLine->FindFirst(' ', codeStart);
63
64	if (codeEnd < 0 || (codeEnd - codeStart) != 3)
65		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
66
67	std::string statusCodeString(statusLine->String() + codeStart, 3);
68
69	// build the output
70	try {
71		status.code = std::stol(statusCodeString);
72	} catch (...) {
73		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
74	}
75
76	status.text = std::move(statusLine.value());
77	fStatus.code = status.code; // cache the status code
78	fStreamState = HttpInputStreamState::Fields;
79	return true;
80}
81
82
83/*!
84	\brief Parse the fields from the \a buffer and store it in \a fields.
85
86	The fields are parsed incrementally, meaning that even if the full header is not yet in the
87	\a buffer, it will still parse all complete fields and store them in the \a fields.
88
89	After all fields have been parsed, it will determine the properties of the request body.
90	This means it will determine whether there is any content compression, if there is a body,
91	and if so if it has a fixed size or not.
92
93	\retval true All fields were succesfully parsed
94	\retval false There is not enough data in the buffer to complete parsing of fields.
95
96	\exception BNetworkRequestException The fields not conform to the HTTP spec.
97*/
98bool
99HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields)
100{
101	if (fStreamState != HttpInputStreamState::Fields)
102		debugger("The parser is not expecting header fields at this point");
103
104	auto fieldLine = buffer.GetNextLine();
105
106	while (fieldLine && !fieldLine.value().IsEmpty()) {
107		// Parse next header line
108		fields.AddField(fieldLine.value());
109		fieldLine = buffer.GetNextLine();
110	}
111
112	if (!fieldLine || (fieldLine && !fieldLine.value().IsEmpty())) {
113		// there is more to parse
114		return false;
115	}
116
117	// Determine the properties for the body
118	// RFC 7230 section 3.3.3 has a prioritized list of 7 rules around determining the body:
119	std::optional<off_t> bodyBytesTotal = std::nullopt;
120	if (fBodyType == HttpBodyType::NoContent || fStatus.StatusCode() == BHttpStatusCode::NoContent
121		|| fStatus.StatusCode() == BHttpStatusCode::NotModified) {
122		// [1] In case of HEAD (set previously), status codes 1xx (TODO!), status code 204 or 304,
123		// no content [2] NOT SUPPORTED: when doing a CONNECT request, no content
124		fBodyType = HttpBodyType::NoContent;
125		fStreamState = HttpInputStreamState::Done;
126	} else if (auto header = fields.FindField("Transfer-Encoding"sv);
127			   header != fields.end() && header->Value() == "chunked"sv) {
128		// [3] If there is a Transfer-Encoding heading set to 'chunked'
129		// TODO: support the more advanced rules in the RFC around the meaning of this field
130		fBodyType = HttpBodyType::Chunked;
131		fStreamState = HttpInputStreamState::Body;
132	} else if (fields.CountFields("Content-Length"sv) > 0) {
133		// [4] When there is no Transfer-Encoding, then look for Content-Encoding:
134		//	- If there are more than one, the values must match
135		//	- The value must be a valid number
136		// [5] If there is a valid value, then that is the expected size of the body
137		try {
138			auto contentLength = std::string();
139			for (const auto& field: fields) {
140				if (field.Name() == "Content-Length"sv) {
141					if (contentLength.size() == 0)
142						contentLength = field.Value();
143					else if (contentLength != field.Value()) {
144						throw BNetworkRequestError(__PRETTY_FUNCTION__,
145							BNetworkRequestError::ProtocolError,
146							"Multiple Content-Length fields with differing values");
147					}
148				}
149			}
150			bodyBytesTotal = std::stol(contentLength);
151			if (*bodyBytesTotal == 0) {
152				fBodyType = HttpBodyType::NoContent;
153				fStreamState = HttpInputStreamState::Done;
154			} else {
155				fBodyType = HttpBodyType::FixedSize;
156				fStreamState = HttpInputStreamState::Body;
157			}
158		} catch (const std::logic_error& e) {
159			throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError,
160				"Cannot parse Content-Length field value (logic_error)");
161		}
162	} else {
163		// [6] Applies to request messages only (this is a response)
164		// [7] If nothing else then the received message is all data until connection close
165		// (this is the default)
166		fStreamState = HttpInputStreamState::Body;
167	}
168
169	// Set up the body parser based on the logic above.
170	switch (fBodyType) {
171		case HttpBodyType::VariableSize:
172			fBodyParser = std::make_unique<HttpRawBodyParser>();
173			break;
174		case HttpBodyType::FixedSize:
175			fBodyParser = std::make_unique<HttpRawBodyParser>(*bodyBytesTotal);
176			break;
177		case HttpBodyType::Chunked:
178			fBodyParser = std::make_unique<HttpChunkedBodyParser>();
179			break;
180		case HttpBodyType::NoContent:
181		default:
182			return true;
183	}
184
185	// Check Content-Encoding for compression
186	auto header = fields.FindField("Content-Encoding"sv);
187	if (header != fields.end() && (header->Value() == "gzip" || header->Value() == "deflate")) {
188		fBodyParser = std::make_unique<HttpBodyDecompression>(std::move(fBodyParser));
189	}
190
191	return true;
192}
193
194
195/*!
196	\brief Parse the body from the \a buffer and use \a writeToBody function to save.
197
198	The \a readEnd parameter indicates to the parser that the buffer currently contains all the
199	expected data for this request.
200*/
201size_t
202HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
203{
204	if (fStreamState < HttpInputStreamState::Body || fStreamState == HttpInputStreamState::Done)
205		debugger("The parser is not in the correct state to parse a body");
206
207	auto parseResult = fBodyParser->ParseBody(buffer, writeToBody, readEnd);
208
209	if (parseResult.complete)
210		fStreamState = HttpInputStreamState::Done;
211
212	return parseResult.bytesParsed;
213}
214
215
216/*!
217	\brief Return if the body is currently expecting to having content.
218
219	This may change if the header fields have not yet been parsed, as these may contain
220	instructions about the body having no content.
221*/
222bool
223HttpParser::HasContent() const noexcept
224{
225	return fBodyType != HttpBodyType::NoContent;
226}
227
228
229/*!
230	\brief Return the total size of the body, if known.
231*/
232std::optional<off_t>
233HttpParser::BodyBytesTotal() const noexcept
234{
235	if (fBodyParser)
236		return fBodyParser->TotalBodySize();
237	return std::nullopt;
238}
239
240
241/*!
242	\brief Return the number of body bytes transferred from the response.
243*/
244off_t
245HttpParser::BodyBytesTransferred() const noexcept
246{
247	if (fBodyParser)
248		return fBodyParser->TransferredBodySize();
249	return 0;
250}
251
252
253/*!
254	\brief Check if the body is fully parsed.
255*/
256bool
257HttpParser::Complete() const noexcept
258{
259	return fStreamState == HttpInputStreamState::Done;
260}
261
262
263// #pragma mark -- HttpBodyParser
264
265
266/*!
267	\brief Default implementation to return std::nullopt.
268*/
269std::optional<off_t>
270HttpBodyParser::TotalBodySize() const noexcept
271{
272	return std::nullopt;
273}
274
275
276/*!
277	\brief Return the number of body bytes read from the stream so far.
278
279	For chunked transfers, this excludes the chunk headers and other metadata.
280*/
281off_t
282HttpBodyParser::TransferredBodySize() const noexcept
283{
284	return fTransferredBodySize;
285}
286
287
288// #pragma mark -- HttpRawBodyParser
289/*!
290	\brief Construct a HttpRawBodyParser with an unknown content size.
291*/
292HttpRawBodyParser::HttpRawBodyParser()
293{
294}
295
296
297/*!
298	\brief Construct a HttpRawBodyParser with expected \a bodyBytesTotal size.
299*/
300HttpRawBodyParser::HttpRawBodyParser(off_t bodyBytesTotal)
301	:
302	fBodyBytesTotal(bodyBytesTotal)
303{
304}
305
306
307/*!
308	\brief Parse a regular (non-chunked) body from a buffer.
309
310	The buffer is parsed into a target using the \a writeToBody function.
311
312	The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
313	In case the total body size is known, and the remaining bytes in the buffer are smaller than
314	the expected remainder, a ProtocolError will be raised. The data in the buffer will *not* be
315	copied to the target.
316
317	Also, if the body size is known, and the data in the \a buffer is larger than the expected
318	expected length, then it will only read the bytes needed and leave the remainder in the buffer.
319
320	It is required that the \a writeToBody function writes all the bytes it is asked to; this
321	method does not support partial writes and throws an exception when it fails.
322
323	\exception BNetworkRequestError In case the buffer contains too little or invalid data.
324
325	\returns The number of bytes parsed from the \a buffer.
326*/
327BodyParseResult
328HttpRawBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
329{
330	auto bytesToRead = buffer.RemainingBytes();
331	if (fBodyBytesTotal) {
332		auto expectedRemainingBytes = *fBodyBytesTotal - fTransferredBodySize;
333		if (expectedRemainingBytes < static_cast<off_t>(buffer.RemainingBytes()))
334			bytesToRead = expectedRemainingBytes;
335		else if (readEnd && expectedRemainingBytes > static_cast<off_t>(buffer.RemainingBytes())) {
336			throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError,
337				"Message body is incomplete; less data received than expected");
338		}
339	}
340
341	// Copy the data
342	auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead);
343	fTransferredBodySize += bytesRead;
344
345	if (bytesRead != bytesToRead) {
346		// Fail if not all expected bytes are written.
347		throw BNetworkRequestError(__PRETTY_FUNCTION__, BNetworkRequestError::SystemError,
348			"Could not write all available body bytes to the target.");
349	}
350
351	if (fBodyBytesTotal) {
352		if (*fBodyBytesTotal == fTransferredBodySize)
353			return {bytesRead, bytesRead, true};
354		else
355			return {bytesRead, bytesRead, false};
356	} else
357		return {bytesRead, bytesRead, readEnd};
358}
359
360
361/*!
362	\brief Override default implementation and return known body size (or std::nullopt)
363*/
364std::optional<off_t>
365HttpRawBodyParser::TotalBodySize() const noexcept
366{
367	return fBodyBytesTotal;
368}
369
370
371// #pragma mark -- HttpChunkedBodyParser
372/*!
373	\brief Parse a chunked body from a buffer.
374
375	The contents of the cunks are copied into a target using the \a writeToBody function.
376
377	The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
378	In case the chunk argument indicates that more data was to come, an exception is thrown.
379
380	It is required that the \a writeToBody function writes all the bytes it is asked to; this
381	method does not support partial writes and throws an exception when it fails.
382
383	\exception BNetworkRequestError In case there is an error parsing the buffer, or there is too
384		little data.
385
386	\returns The number of bytes parsed from the \a buffer.
387*/
388BodyParseResult
389HttpChunkedBodyParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
390{
391	size_t totalBytesRead = 0;
392	while (buffer.RemainingBytes() > 0) {
393		switch (fChunkParserState) {
394			case ChunkSize:
395			{
396				// Read the next chunk size from the buffer; if unsuccesful wait for more data
397				auto chunkSizeString = buffer.GetNextLine();
398				if (!chunkSizeString)
399					return {totalBytesRead, totalBytesRead, false};
400				auto chunkSizeStr = std::string(chunkSizeString.value().String());
401				try {
402					size_t pos = 0;
403					fRemainingChunkSize = std::stoll(chunkSizeStr, &pos, 16);
404					if (pos < chunkSizeStr.size() && chunkSizeStr[pos] != ';') {
405						throw BNetworkRequestError(
406							__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
407					}
408				} catch (const std::invalid_argument&) {
409					throw BNetworkRequestError(
410						__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
411				} catch (const std::out_of_range&) {
412					throw BNetworkRequestError(
413						__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
414				}
415
416				if (fRemainingChunkSize > 0)
417					fChunkParserState = Chunk;
418				else
419					fChunkParserState = Trailers;
420				break;
421			}
422
423			case Chunk:
424			{
425				size_t bytesToRead;
426				if (fRemainingChunkSize > static_cast<off_t>(buffer.RemainingBytes()))
427					bytesToRead = buffer.RemainingBytes();
428				else
429					bytesToRead = fRemainingChunkSize;
430
431				auto bytesRead = buffer.WriteTo(writeToBody, bytesToRead);
432				if (bytesRead != bytesToRead) {
433					// Fail if not all expected bytes are written.
434					throw BNetworkRequestError(__PRETTY_FUNCTION__,
435						BNetworkRequestError::SystemError,
436						"Could not write all available body bytes to the target.");
437				}
438
439				fTransferredBodySize += bytesRead;
440				totalBytesRead += bytesRead;
441				fRemainingChunkSize -= bytesRead;
442				if (fRemainingChunkSize == 0)
443					fChunkParserState = ChunkEnd;
444				break;
445			}
446
447			case ChunkEnd:
448			{
449				if (buffer.RemainingBytes() < 2) {
450					// not enough data in the buffer to finish the chunk
451					return {totalBytesRead, totalBytesRead, false};
452				}
453				auto chunkEndString = buffer.GetNextLine();
454				if (!chunkEndString || chunkEndString.value().Length() != 0) {
455					// There should have been an empty chunk
456					throw BNetworkRequestError(
457						__PRETTY_FUNCTION__, BNetworkRequestError::ProtocolError);
458				}
459
460				fChunkParserState = ChunkSize;
461				break;
462			}
463
464			case Trailers:
465			{
466				auto trailerString = buffer.GetNextLine();
467				if (!trailerString) {
468					// More data to come
469					return {totalBytesRead, totalBytesRead, false};
470				}
471
472				if (trailerString.value().Length() > 0) {
473					// Ignore empty trailers for now
474					// TODO: review if the API should support trailing headers
475				} else {
476					fChunkParserState = Complete;
477					return {totalBytesRead, totalBytesRead, true};
478				}
479				break;
480			}
481
482			case Complete:
483				return {totalBytesRead, totalBytesRead, true};
484		}
485	}
486	return {totalBytesRead, totalBytesRead, false};
487}
488
489
490// #pragma mark -- HttpBodyDecompression
491/*!
492	\brief Set up a decompression stream that decompresses the data read by \a bodyParser.
493*/
494HttpBodyDecompression::HttpBodyDecompression(std::unique_ptr<HttpBodyParser> bodyParser)
495{
496	fDecompressorStorage = std::make_unique<BMallocIO>();
497
498	BDataIO* stream = nullptr;
499	auto result = BZlibCompressionAlgorithm().CreateDecompressingOutputStream(
500		fDecompressorStorage.get(), nullptr, stream);
501
502	if (result != B_OK) {
503		throw BNetworkRequestError("BZlibCompressionAlgorithm().CreateCompressingOutputStream",
504			BNetworkRequestError::SystemError, result);
505	}
506
507	fDecompressingStream = std::unique_ptr<BDataIO>(stream);
508	fBodyParser = std::move(bodyParser);
509}
510
511
512/*!
513	\brief Read a compressed body into a target..
514
515	The stream captures chunked or raw data, and decompresses it. The decompressed data is then
516	copied into a target using the \a writeToBody function.
517
518	The \a readEnd argument indicates whether the current \a buffer contains all the expected data.
519	It is up for the underlying parser to determine if more data was expected, and therefore, if
520	there is an error.
521
522	It is required that the \a writeToBody function writes all the bytes it is asked to; this
523	method does not support partial writes and throws an exception when it fails.
524
525	\exception BNetworkRequestError In case there is an error parsing the buffer, or there is too
526		little data.
527
528	\returns The number of bytes parsed from the \a buffer.
529*/
530BodyParseResult
531HttpBodyDecompression::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody, bool readEnd)
532{
533	// Get the underlying raw or chunked parser to write data to our decompressionstream
534	auto parseResults = fBodyParser->ParseBody(
535		buffer,
536		[this](const std::byte* buffer, size_t bufferSize) {
537			auto status = fDecompressingStream->WriteExactly(buffer, bufferSize);
538			if (status != B_OK) {
539				throw BNetworkRequestError(
540					"BDataIO::WriteExactly()", BNetworkRequestError::SystemError, status);
541			}
542			return bufferSize;
543		},
544		readEnd);
545	fTransferredBodySize += parseResults.bytesParsed;
546
547	if (readEnd || parseResults.complete) {
548		// No more bytes expected so flush out the final bytes
549		if (auto status = fDecompressingStream->Flush(); status != B_OK) {
550			throw BNetworkRequestError(
551				"BZlibDecompressionStream::Flush()", BNetworkRequestError::SystemError, status);
552		}
553	}
554
555	size_t bytesWritten = 0;
556	if (auto bodySize = fDecompressorStorage->Position(); bodySize > 0) {
557		bytesWritten
558			= writeToBody(static_cast<const std::byte*>(fDecompressorStorage->Buffer()), bodySize);
559		if (static_cast<off_t>(bytesWritten) != bodySize) {
560			throw BNetworkRequestError(
561				__PRETTY_FUNCTION__, BNetworkRequestError::SystemError, B_PARTIAL_WRITE);
562		}
563		fDecompressorStorage->Seek(0, SEEK_SET);
564	}
565	return {parseResults.bytesParsed, bytesWritten, parseResults.complete};
566}
567
568
569/*!
570	\brief Return the TotalBodySize() from the underlying chunked or raw parser.
571*/
572std::optional<off_t>
573HttpBodyDecompression::TotalBodySize() const noexcept
574{
575	return fBodyParser->TotalBodySize();
576}
577