1/*
2 * Copyright 2013-2014, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include <package/hpkg/PackageFileHeapWriter.h>
8
9#include <algorithm>
10#include <new>
11
12#include <ByteOrder.h>
13#include <List.h>
14#include <package/hpkg/ErrorOutput.h>
15#include <package/hpkg/HPKGDefs.h>
16
17#include <AutoDeleter.h>
18#include <package/hpkg/DataReader.h>
19#include <package/hpkg/PackageFileHeapReader.h>
20#include <RangeArray.h>
21#include <CompressionAlgorithm.h>
22
23
24// minimum length of data we require before trying to compress them
25static const size_t kCompressionSizeThreshold = 64;
26
27
28namespace BPackageKit {
29
30namespace BHPKG {
31
32namespace BPrivate {
33
34
35struct PackageFileHeapWriter::Chunk {
36	uint64	offset;
37	uint32	compressedSize;
38	uint32	uncompressedSize;
39	void*	buffer;
40};
41
42
43struct PackageFileHeapWriter::ChunkSegment {
44	ssize_t	chunkIndex;
45	uint32	toKeepOffset;
46	uint32	toKeepSize;
47};
48
49
50struct PackageFileHeapWriter::ChunkBuffer {
51	ChunkBuffer(PackageFileHeapWriter* writer, size_t bufferSize)
52		:
53		fWriter(writer),
54		fChunks(),
55		fCurrentChunkIndex(0),
56		fNextReadIndex(0),
57		fSegments(),
58		fCurrentSegmentIndex(0),
59		fBuffers(),
60		fUnusedBuffers(),
61		fBufferSize(bufferSize)
62	{
63	}
64
65	~ChunkBuffer()
66	{
67		for (int32 i = 0; void* buffer = fBuffers.ItemAt(i); i++)
68			free(buffer);
69	}
70
71	bool PushChunkSegment(uint64 chunkOffset, uint32 compressedSize,
72		uint32 uncompressedSize, uint32 toKeepOffset, uint32 toKeepSize)
73	{
74		ChunkSegment segment;
75		segment.toKeepOffset = toKeepOffset;
76		segment.toKeepSize = toKeepSize;
77
78		// might refer to the last chunk
79		segment.chunkIndex = fChunks.Count() - 1;
80
81		if (segment.chunkIndex < 0
82			|| fChunks.ElementAt(segment.chunkIndex).offset != chunkOffset) {
83			// no, need to push a new chunk
84			segment.chunkIndex++;
85
86			Chunk chunk;
87			chunk.offset = chunkOffset;
88			chunk.compressedSize = compressedSize;
89			chunk.uncompressedSize = uncompressedSize;
90			chunk.buffer = NULL;
91			if (!fChunks.Add(chunk))
92				return false;
93		}
94
95		return fSegments.Add(segment);
96	}
97
98	bool IsEmpty() const
99	{
100		return fSegments.IsEmpty();
101	}
102
103	bool HasMoreSegments() const
104	{
105		return fCurrentSegmentIndex < fSegments.Count();
106	}
107
108	const ChunkSegment& CurrentSegment() const
109	{
110		return fSegments[fCurrentSegmentIndex];
111	}
112
113	const Chunk& ChunkAt(ssize_t index) const
114	{
115		return fChunks[index];
116	}
117
118	bool HasMoreChunksToRead() const
119	{
120		return fNextReadIndex < fChunks.Count();
121	}
122
123	bool HasBufferedChunk() const
124	{
125		return fCurrentChunkIndex < fNextReadIndex;
126	}
127
128	uint64 NextReadOffset() const
129	{
130		return fChunks[fNextReadIndex].offset;
131	}
132
133	void ReadNextChunk()
134	{
135		if (!HasMoreChunksToRead())
136			throw status_t(B_BAD_VALUE);
137
138		Chunk& chunk = fChunks[fNextReadIndex++];
139		chunk.buffer = _GetBuffer();
140
141		status_t error = fWriter->ReadFileData(chunk.offset, chunk.buffer,
142			chunk.compressedSize);
143		if (error != B_OK)
144			throw error;
145	}
146
147	void CurrentSegmentDone()
148	{
149		// Unless the next segment refers to the same chunk, advance to the next
150		// chunk.
151		const ChunkSegment& segment = fSegments[fCurrentSegmentIndex++];
152		if (!HasMoreSegments()
153			|| segment.chunkIndex != CurrentSegment().chunkIndex) {
154			_PutBuffer(fChunks[fCurrentChunkIndex++].buffer);
155		}
156	}
157
158private:
159	void* _GetBuffer()
160	{
161		if (!fUnusedBuffers.IsEmpty())
162			return fUnusedBuffers.RemoveItem(fUnusedBuffers.CountItems() - 1);
163
164		void* buffer = malloc(fBufferSize);
165		if (buffer == NULL || !fBuffers.AddItem(buffer)) {
166			free(buffer);
167			throw std::bad_alloc();
168		}
169
170		return buffer;
171	}
172
173	void _PutBuffer(void* buffer)
174	{
175		if (buffer != NULL && !fUnusedBuffers.AddItem(buffer)) {
176			fBuffers.RemoveItem(buffer);
177			free(buffer);
178		}
179	}
180
181private:
182	PackageFileHeapWriter*	fWriter;
183
184	Array<Chunk>			fChunks;
185	ssize_t					fCurrentChunkIndex;
186	ssize_t					fNextReadIndex;
187
188	Array<ChunkSegment>		fSegments;
189	ssize_t					fCurrentSegmentIndex;
190
191	BList					fBuffers;
192	BList					fUnusedBuffers;
193	size_t					fBufferSize;
194};
195
196
197PackageFileHeapWriter::PackageFileHeapWriter(BErrorOutput* errorOutput,
198	BPositionIO* file, off_t heapOffset,
199	CompressionAlgorithmOwner* compressionAlgorithm,
200	DecompressionAlgorithmOwner* decompressionAlgorithm)
201	:
202	PackageFileHeapAccessorBase(errorOutput, file, heapOffset,
203		decompressionAlgorithm),
204	fPendingDataBuffer(NULL),
205	fCompressedDataBuffer(NULL),
206	fPendingDataSize(0),
207	fOffsets(),
208	fCompressionAlgorithm(compressionAlgorithm)
209{
210	if (fCompressionAlgorithm != NULL)
211		fCompressionAlgorithm->AcquireReference();
212}
213
214
215PackageFileHeapWriter::~PackageFileHeapWriter()
216{
217	_Uninit();
218
219	if (fCompressionAlgorithm != NULL)
220		fCompressionAlgorithm->ReleaseReference();
221}
222
223
224void
225PackageFileHeapWriter::Init()
226{
227	// allocate data buffers
228	fPendingDataBuffer = malloc(kChunkSize);
229	fCompressedDataBuffer = malloc(kChunkSize);
230	if (fPendingDataBuffer == NULL || fCompressedDataBuffer == NULL)
231		throw std::bad_alloc();
232}
233
234
235void
236PackageFileHeapWriter::Reinit(PackageFileHeapReader* heapReader)
237{
238	fHeapOffset = heapReader->HeapOffset();
239	fCompressedHeapSize = heapReader->CompressedHeapSize();
240	fUncompressedHeapSize = heapReader->UncompressedHeapSize();
241	fPendingDataSize = 0;
242
243	// copy the offsets array
244	size_t chunkCount = (fUncompressedHeapSize + kChunkSize - 1) / kChunkSize;
245	if (chunkCount > 0) {
246		if (!fOffsets.AddUninitialized(chunkCount))
247			throw std::bad_alloc();
248
249		for (size_t i = 0; i < chunkCount; i++)
250			fOffsets[i] = heapReader->Offsets()[i];
251	}
252
253	_UnwriteLastPartialChunk();
254}
255
256
257status_t
258PackageFileHeapWriter::AddData(BDataReader& dataReader, off_t size,
259	uint64& _offset)
260{
261	_offset = fUncompressedHeapSize;
262
263	// copy the data to the heap
264	off_t readOffset = 0;
265	off_t remainingSize = size;
266	while (remainingSize > 0) {
267		// read data into pending data buffer
268		size_t toCopy = std::min(remainingSize,
269			off_t(kChunkSize - fPendingDataSize));
270		status_t error = dataReader.ReadData(readOffset,
271			(uint8*)fPendingDataBuffer + fPendingDataSize, toCopy);
272		if (error != B_OK) {
273			fErrorOutput->PrintError("Failed to read data: %s\n",
274				strerror(error));
275			return error;
276		}
277
278		fPendingDataSize += toCopy;
279		fUncompressedHeapSize += toCopy;
280		remainingSize -= toCopy;
281		readOffset += toCopy;
282
283		if (fPendingDataSize == kChunkSize) {
284			error = _FlushPendingData();
285			if (error != B_OK)
286				return error;
287		}
288	}
289
290	return B_OK;
291}
292
293
294void
295PackageFileHeapWriter::AddDataThrows(const void* buffer, size_t size)
296{
297	BBufferDataReader reader(buffer, size);
298	uint64 dummyOffset;
299	status_t error = AddData(reader, size, dummyOffset);
300	if (error != B_OK)
301		throw status_t(error);
302}
303
304
305void
306PackageFileHeapWriter::RemoveDataRanges(
307	const ::BPrivate::RangeArray<uint64>& ranges)
308{
309	ssize_t rangeCount = ranges.CountRanges();
310	if (rangeCount == 0)
311		return;
312
313	if (fUncompressedHeapSize == 0) {
314		fErrorOutput->PrintError("Can't remove ranges from empty heap\n");
315		throw status_t(B_BAD_VALUE);
316	}
317
318	// Before we begin flush any pending data, so we don't need any special
319	// handling and also can use the pending data buffer.
320	status_t status = _FlushPendingData();
321	if (status != B_OK)
322		throw status_t(status);
323
324	// We potentially have to recompress all data from the first affected chunk
325	// to the end (minus the removed ranges, of course). As a basic algorithm we
326	// can use our usual data writing strategy, i.e. read a chunk, decompress it
327	// to a temporary buffer, and write the data to keep via AddData(). There
328	// are a few complications/optimizations, though:
329	// * As data moves to other chunks, it may actually compress worse than
330	//   before. While unlikely, we still have to take care of this case by
331	//   making sure our reading end is at least a complete uncompressed chunk
332	//   ahead of the writing end.
333	// * When we run into the situation that we have to move complete aligned
334	//   chunks, we want to avoid uncompressing and recompressing them
335	//   needlessly.
336
337	// Build a list of (possibly partial) chunks we want to keep.
338
339	// the first partial chunk (if any) and all chunks between ranges
340	ChunkBuffer chunkBuffer(this, kChunkSize);
341	uint64 writeOffset = ranges[0].offset - ranges[0].offset % kChunkSize;
342	uint64 readOffset = writeOffset;
343	for (ssize_t i = 0; i < rangeCount; i++) {
344		const Range<uint64>& range = ranges[i];
345		if (range.size > 0) {
346			_PushChunks(chunkBuffer, readOffset, range.offset);
347			readOffset = range.offset + range.size;
348		}
349	}
350
351	if (readOffset == writeOffset) {
352		fErrorOutput->PrintError("Only empty ranges to remove from heap\n");
353		throw status_t(B_BAD_VALUE);
354	}
355
356	// all chunks after the last range
357	_PushChunks(chunkBuffer, readOffset, fUncompressedHeapSize);
358
359	// Reset our state to look like all chunks from the first affected one have
360	// been removed and re-add all data we want to keep.
361
362	// truncate the offsets array and reset the heap sizes
363	ssize_t firstChunkIndex = ssize_t(writeOffset / kChunkSize);
364	fCompressedHeapSize = fOffsets[firstChunkIndex];
365	fUncompressedHeapSize = (uint64)firstChunkIndex * kChunkSize;
366	fOffsets.Remove(firstChunkIndex, fOffsets.Count() - firstChunkIndex);
367
368	// we need a decompression buffer
369	void* decompressionBuffer = malloc(kChunkSize);
370	if (decompressionBuffer == NULL)
371		throw std::bad_alloc();
372	MemoryDeleter decompressionBufferDeleter(decompressionBuffer);
373
374	const Chunk* decompressedChunk = NULL;
375
376	while (chunkBuffer.HasMoreSegments()) {
377		const ChunkSegment& segment = chunkBuffer.CurrentSegment();
378
379		// If we have an aligned, complete chunk, copy its compressed data.
380		bool copyCompressed = fPendingDataSize == 0 && segment.toKeepOffset == 0
381			&& segment.toKeepSize == kChunkSize;
382
383		// Read more chunks. We need at least one buffered one to do anything
384		// and we want to buffer as many as necessary to ensure we don't
385		// overwrite one we haven't buffered yet.
386		while (chunkBuffer.HasMoreChunksToRead()
387			&& (!chunkBuffer.HasBufferedChunk()
388				|| (!copyCompressed
389					&& chunkBuffer.NextReadOffset()
390						< fCompressedHeapSize + kChunkSize))) {
391			// read chunk
392			chunkBuffer.ReadNextChunk();
393		}
394
395		// copy compressed chunk data, if possible
396		const Chunk& chunk = chunkBuffer.ChunkAt(segment.chunkIndex);
397		if (copyCompressed) {
398			status_t error = _WriteChunk(chunk.buffer, chunk.compressedSize,
399				false);
400			if (error != B_OK)
401				throw error;
402			continue;
403		}
404
405		// decompress chunk, if compressed
406		void* uncompressedData;
407		if (chunk.uncompressedSize == chunk.compressedSize) {
408			uncompressedData = chunk.buffer;
409		} else if (decompressedChunk == &chunk) {
410			uncompressedData = decompressionBuffer;
411		} else {
412			status_t error = DecompressChunkData(chunk.buffer,
413				chunk.compressedSize, decompressionBuffer,
414				chunk.uncompressedSize);
415			if (error != B_OK)
416				throw error;
417
418			decompressedChunk = &chunk;
419			uncompressedData = decompressionBuffer;
420		}
421
422		// add chunk data
423		AddDataThrows((uint8*)uncompressedData + segment.toKeepOffset,
424			segment.toKeepSize);
425
426		chunkBuffer.CurrentSegmentDone();
427	}
428
429	// Make sure a last partial chunk ends up in the pending data buffer. This
430	// is only necessary when we didn't have to move any chunk segments, since
431	// the loop would otherwise have read it in and left it in the pending data
432	// buffer.
433	if (chunkBuffer.IsEmpty())
434		_UnwriteLastPartialChunk();
435}
436
437
438status_t
439PackageFileHeapWriter::Finish()
440{
441	// flush pending data, if any
442	status_t error = _FlushPendingData();
443	if (error != B_OK)
444		return error;
445
446	// write chunk sizes table
447
448	// We don't need to do that, if we don't use any compression.
449	if (fCompressionAlgorithm == NULL)
450		return B_OK;
451
452	// We don't need to write the last chunk size, since it is implied by the
453	// total size minus the sum of all other chunk sizes.
454	ssize_t offsetCount = fOffsets.Count();
455	if (offsetCount < 2)
456		return B_OK;
457
458	// Convert the offsets to 16 bit sizes and write them. We use the (no longer
459	// used) pending data buffer for the conversion.
460	uint16* buffer = (uint16*)fPendingDataBuffer;
461	for (ssize_t offsetIndex = 1; offsetIndex < offsetCount;) {
462		ssize_t toWrite = std::min(offsetCount - offsetIndex,
463			ssize_t(kChunkSize / 2));
464
465		for (ssize_t i = 0; i < toWrite; i++, offsetIndex++) {
466			// store chunkSize - 1, so it fits 16 bit (chunks cannot be empty)
467			buffer[i] = B_HOST_TO_BENDIAN_INT16(
468				uint16(fOffsets[offsetIndex] - fOffsets[offsetIndex - 1] - 1));
469		}
470
471		error = _WriteDataUncompressed(buffer, toWrite * 2);
472		if (error != B_OK)
473			return error;
474	}
475
476	return B_OK;
477}
478
479
480status_t
481PackageFileHeapWriter::ReadAndDecompressChunk(size_t chunkIndex,
482	void* compressedDataBuffer, void* uncompressedDataBuffer)
483{
484	if (uint64(chunkIndex + 1) * kChunkSize > fUncompressedHeapSize) {
485		// The chunk has not been written to disk yet. Its data are still in the
486		// pending data buffer.
487		memcpy(uncompressedDataBuffer, fPendingDataBuffer, fPendingDataSize);
488		// TODO: This can be optimized. Since we write to a BDataIO anyway,
489		// there's no need to copy the data.
490		return B_OK;
491	}
492
493	uint64 offset = fOffsets[chunkIndex];
494	size_t compressedSize = chunkIndex + 1 == (size_t)fOffsets.Count()
495		? fCompressedHeapSize - offset
496		: fOffsets[chunkIndex + 1] - offset;
497
498	return ReadAndDecompressChunkData(offset, compressedSize, kChunkSize,
499		compressedDataBuffer, uncompressedDataBuffer);
500}
501
502
503void
504PackageFileHeapWriter::_Uninit()
505{
506	free(fPendingDataBuffer);
507	free(fCompressedDataBuffer);
508	fPendingDataBuffer = NULL;
509	fCompressedDataBuffer = NULL;
510}
511
512
513status_t
514PackageFileHeapWriter::_FlushPendingData()
515{
516	if (fPendingDataSize == 0)
517		return B_OK;
518
519	status_t error = _WriteChunk(fPendingDataBuffer, fPendingDataSize, true);
520	if (error == B_OK)
521		fPendingDataSize = 0;
522
523	return error;
524}
525
526
527status_t
528PackageFileHeapWriter::_WriteChunk(const void* data, size_t size,
529	bool mayCompress)
530{
531	// add offset
532	if (!fOffsets.Add(fCompressedHeapSize)) {
533		fErrorOutput->PrintError("Out of memory!\n");
534		return B_NO_MEMORY;
535	}
536
537	// Try to use compression only for data large enough.
538	bool compress = mayCompress && size >= (off_t)kCompressionSizeThreshold;
539	if (compress) {
540		status_t error = _WriteDataCompressed(data, size);
541		if (error != B_OK) {
542			if (error != B_BUFFER_OVERFLOW)
543				return error;
544			compress = false;
545		}
546	}
547
548	// Write uncompressed, if necessary.
549	if (!compress) {
550		status_t error = _WriteDataUncompressed(data, size);
551		if (error != B_OK)
552			return error;
553	}
554
555	return B_OK;
556}
557
558
559status_t
560PackageFileHeapWriter::_WriteDataCompressed(const void* data, size_t size)
561{
562	if (fCompressionAlgorithm == NULL)
563		return B_BUFFER_OVERFLOW;
564
565	size_t compressedSize;
566	status_t error = fCompressionAlgorithm->algorithm->CompressBuffer(data,
567		size, fCompressedDataBuffer, size, compressedSize,
568		fCompressionAlgorithm->parameters);
569	if (error != B_OK) {
570		if (error != B_BUFFER_OVERFLOW) {
571			fErrorOutput->PrintError("Failed to compress chunk data: %s\n",
572				strerror(error));
573		}
574		return error;
575	}
576
577	// only use compressed data when we've actually saved space
578	if (compressedSize == size)
579		return B_BUFFER_OVERFLOW;
580
581	return _WriteDataUncompressed(fCompressedDataBuffer, compressedSize);
582}
583
584
585status_t
586PackageFileHeapWriter::_WriteDataUncompressed(const void* data, size_t size)
587{
588	status_t error = fFile->WriteAtExactly(
589		fHeapOffset + (off_t)fCompressedHeapSize, data, size);
590	if (error != B_OK) {
591		fErrorOutput->PrintError("Failed to write data: %s\n", strerror(error));
592		return error;
593	}
594
595	fCompressedHeapSize += size;
596
597	return B_OK;
598}
599
600
601void
602PackageFileHeapWriter::_PushChunks(ChunkBuffer& chunkBuffer, uint64 startOffset,
603	uint64 endOffset)
604{
605	if (endOffset > fUncompressedHeapSize) {
606		fErrorOutput->PrintError("Invalid range to remove from heap\n");
607		throw status_t(B_BAD_VALUE);
608	}
609
610	ssize_t chunkIndex = startOffset / kChunkSize;
611	uint64 uncompressedChunkOffset = (uint64)chunkIndex * kChunkSize;
612
613	while (startOffset < endOffset) {
614		bool isLastChunk = fUncompressedHeapSize - uncompressedChunkOffset
615			<= kChunkSize;
616		uint32 inChunkOffset = uint32(startOffset - uncompressedChunkOffset);
617		uint32 uncompressedChunkSize = isLastChunk
618			? fUncompressedHeapSize - uncompressedChunkOffset
619			: kChunkSize;
620		uint64 compressedChunkOffset = fOffsets[chunkIndex];
621		uint32 compressedChunkSize = isLastChunk
622			? fCompressedHeapSize - compressedChunkOffset
623			: fOffsets[chunkIndex + 1] - compressedChunkOffset;
624		uint32 toKeepSize = uint32(std::min(
625			(uint64)uncompressedChunkSize - inChunkOffset,
626			endOffset - startOffset));
627
628		if (!chunkBuffer.PushChunkSegment(compressedChunkOffset,
629				compressedChunkSize, uncompressedChunkSize, inChunkOffset,
630				toKeepSize)) {
631			throw std::bad_alloc();
632		}
633
634		startOffset += toKeepSize;
635		chunkIndex++;
636		uncompressedChunkOffset += uncompressedChunkSize;
637	}
638}
639
640
641void
642PackageFileHeapWriter::_UnwriteLastPartialChunk()
643{
644	// If the last chunk is partial, read it in and remove it from the offsets.
645	size_t lastChunkSize = fUncompressedHeapSize % kChunkSize;
646	if (lastChunkSize != 0) {
647		uint64 lastChunkOffset = fOffsets[fOffsets.Count() - 1];
648		size_t compressedSize = fCompressedHeapSize - lastChunkOffset;
649
650		status_t error = ReadAndDecompressChunkData(lastChunkOffset,
651			compressedSize, lastChunkSize, fCompressedDataBuffer,
652			fPendingDataBuffer);
653		if (error != B_OK)
654			throw error;
655
656		fPendingDataSize = lastChunkSize;
657		fCompressedHeapSize = lastChunkOffset;
658		fOffsets.Remove(fOffsets.Count() - 1);
659	}
660}
661
662
663}	// namespace BPrivate
664
665}	// namespace BHPKG
666
667}	// namespace BPackageKit
668