1/*
2 * Copyright 2003-2006, Haiku, Inc. All rights reserved.
3 * Copyright 2004-2005 yellowTAB GmbH. All Rights Reserved.
4 * Copyright 2006 Bernd Korz. All Rights Reserved
5 * Distributed under the terms of the MIT License.
6 *
7 * Authors:
8 *		Michael Pfeiffer, laplace@haiku-os.org
9 *		Ryan Leavengood, leavengood@gmail.com
10 *		yellowTAB GmbH
11 *		Bernd Korz
12 */
13
14#include <scheduler.h>
15#include <Debug.h>
16#include <Screen.h>
17
18#include <syscalls.h>
19
20#include "Filter.h"
21
22
23// Implementation of FilterThread
24FilterThread::FilterThread(Filter* filter, int32 i, int32 n,
25	bool runInCurrentThread)
26	:
27	fFilter(filter),
28	fI(i),
29	fN(n)
30{
31	if (runInCurrentThread)
32		Run();
33	else {
34		thread_id tid;
35		tid = spawn_thread(worker_thread, "filter",
36			suggest_thread_priority(B_STATUS_RENDERING), this);
37		if (tid >= 0)
38			resume_thread(tid);
39		else
40			delete this;
41	}
42}
43
44
45FilterThread::~FilterThread()
46{
47	fFilter->FilterThreadDone();
48}
49
50
51status_t
52FilterThread::worker_thread(void* data)
53{
54	FilterThread* thread = (FilterThread*)data;
55	return thread->Run();
56}
57
58
59status_t
60FilterThread::Run()
61{
62	if (fI == 0) {
63		BBitmap* bm;
64		// create destination image in first thread
65		bm = fFilter->GetBitmap();
66		if (bm == NULL) {
67			fFilter->FilterThreadInitFailed();
68			return B_ERROR;
69		}
70		// and start other filter threads
71		for (int32 i = fI + 1; i < fN; i ++) {
72			new FilterThread(fFilter, i, fN);
73		}
74	}
75	if (fFilter->GetBitmap())
76		fFilter->Run(fI, fN);
77
78	delete this;
79	return B_OK;
80}
81
82// Implementation of Filter
83Filter::Filter(BBitmap* image, BMessenger listener, uint32 what)
84	:
85	fListener(listener),
86	fWhat(what),
87	fStarted(false),
88	fN(0),
89	fNumberOfThreads(0),
90	fIsRunning(false),
91	fSrcImage(image),
92	fDestImageInitialized(false),
93	fDestImage(NULL)
94{
95	fCPUCount = NumberOfActiveCPUs();
96
97	fWaitForThreads = create_sem(0, "wait_for_threads");
98
99	#if TIME_FILTER
100	fStopWatch = NULL;
101	#endif
102}
103
104
105Filter::~Filter()
106{
107	delete fDestImage;
108	delete_sem(fWaitForThreads);
109}
110
111
112BBitmap*
113Filter::GetBitmap()
114{
115	if (!fDestImageInitialized) {
116		fDestImageInitialized = true;
117		fDestImage = CreateDestImage(fSrcImage);
118	}
119	return fDestImage;
120}
121
122
123BBitmap*
124Filter::DetachBitmap()
125{
126	BBitmap* image = fDestImage;
127	fDestImage = NULL;
128	return image;
129}
130
131
132void
133Filter::Start(bool async)
134{
135	if (fStarted || fSrcImage == NULL) return;
136
137	#if TIME_FILTER
138		fStopWatch = new BStopWatch("Filter Time");
139	#endif
140
141	fN = NumberOfThreads();
142	fNumberOfThreads = fN;
143	fIsRunning = true;
144	fStarted = true;
145
146	// start first filter thread
147	new FilterThread(this, 0, fN, !async);
148
149	if (!async)
150		Wait();
151}
152
153
154void
155Filter::Wait()
156{
157	if (fStarted) {
158		// wait for threads to exit
159		while (acquire_sem_etc(fWaitForThreads, fN, 0, 0) == B_INTERRUPTED);
160		// ready to start again
161		fStarted = false;
162	}
163}
164
165
166void
167Filter::Stop()
168{
169	// tell FilterThreads to stop calculations
170	fIsRunning = false;
171	Wait();
172}
173
174
175bool
176Filter::IsRunning() const
177{
178	return fIsRunning;
179}
180
181
182void
183Filter::Completed()
184{
185}
186
187
188void
189Filter::FilterThreadDone()
190{
191	if (atomic_add(&fNumberOfThreads, -1) == 1) {
192		#if TIME_FILTER
193			delete fStopWatch; fStopWatch = NULL;
194		#endif
195		Completed();
196		if (fIsRunning)
197			fListener.SendMessage(fWhat);
198
199		fIsRunning = false;
200	}
201	release_sem(fWaitForThreads);
202}
203
204
205void
206Filter::FilterThreadInitFailed()
207{
208	ASSERT(fNumberOfThreads == fN);
209	fNumberOfThreads = 0;
210	Completed();
211	fIsRunning = false;
212	release_sem_etc(fWaitForThreads, fN, 0);
213}
214
215
216bool
217Filter::IsBitmapValid(BBitmap* bitmap) const
218{
219	return bitmap != NULL && bitmap->InitCheck() == B_OK && bitmap->IsValid();
220}
221
222
223int32
224Filter::NumberOfThreads()
225{
226	const int32 units = GetNumberOfUnits();
227	int32 n;
228	n = units / 32; // at least 32 units per CPU
229	if (n > CPUCount())
230		n = CPUCount();
231	else if (n <= 0)
232		n = 1; // at least one thread!
233
234	return n;
235}
236
237
238BBitmap*
239Filter::GetSrcImage()
240{
241	return fSrcImage;
242}
243
244
245BBitmap*
246Filter::GetDestImage()
247{
248	return fDestImage;
249}
250
251
252int32
253Filter::NumberOfActiveCPUs() const
254{
255	int count;
256	system_info info;
257	get_system_info(&info);
258	count = info.cpu_count;
259	int32 cpuCount = 0;
260	for (int i = 0; i < count; i ++) {
261		if (_kern_cpu_enabled(i))
262			cpuCount++;
263	}
264	if (cpuCount == 0)
265		cpuCount = 1;
266
267	return cpuCount;
268}
269
270
271// Implementation of (bilinear) Scaler
272Scaler::Scaler(BBitmap* image, BRect rect, BMessenger listener, uint32 what,
273	bool dither)
274	:
275	Filter(image, listener, what),
276	fScaledImage(NULL),
277	fRect(rect),
278	fDither(dither)
279{
280}
281
282
283Scaler::~Scaler()
284{
285	if (GetDestImage() != fScaledImage) {
286		delete fScaledImage;
287		fScaledImage = NULL;
288	}
289}
290
291
292BBitmap*
293Scaler::CreateDestImage(BBitmap* srcImage)
294{
295	if (srcImage == NULL || (srcImage->ColorSpace() != B_RGB32
296		&& srcImage->ColorSpace() != B_RGBA32))
297			return NULL;
298
299	BRect dest(0, 0, fRect.IntegerWidth(), fRect.IntegerHeight());
300	BBitmap* destImage = new BBitmap(dest,
301		fDither ? B_CMAP8 : srcImage->ColorSpace());
302
303	if (!IsBitmapValid(destImage)) {
304		delete destImage;
305		return NULL;
306	}
307
308	if (fDither)
309	{
310		BRect dest_rect(0, 0, fRect.IntegerWidth(), fRect.IntegerHeight());
311		fScaledImage = new BBitmap(dest_rect, srcImage->ColorSpace());
312		if (!IsBitmapValid(fScaledImage)) {
313			delete destImage;
314			delete fScaledImage;
315			fScaledImage = NULL;
316			return NULL;
317		}
318	} else
319		fScaledImage = destImage;
320
321	return destImage;
322}
323
324
325bool
326Scaler::Matches(BRect rect, bool dither) const
327{
328	return fRect.IntegerWidth() == rect.IntegerWidth()
329		&& fRect.IntegerHeight() == rect.IntegerHeight()
330		&& fDither == dither;
331}
332
333
334// Scale bilinear using floating point calculations
335typedef struct {
336	intType srcColumn;
337	float alpha0;
338	float alpha1;
339} ColumnData;
340
341
342void
343Scaler::ScaleBilinear(intType fromRow, int32 toRow)
344{
345	BBitmap* src;
346	BBitmap* dest;
347	intType srcW, srcH;
348	intType destW, destH;
349	intType x, y, i;
350	ColumnData* columnData;
351	ColumnData* cd;
352	const uchar* srcBits;
353	uchar* destBits;
354	intType srcBPR, destBPR;
355	const uchar* srcData;
356	uchar* destDataRow;
357	uchar* destData;
358	const int32 kBPP = 4;
359
360	src = GetSrcImage();
361	dest = fScaledImage;
362
363	srcW = src->Bounds().IntegerWidth();
364	srcH = src->Bounds().IntegerHeight();
365	destW = dest->Bounds().IntegerWidth();
366	destH = dest->Bounds().IntegerHeight();
367
368	srcBits = (uchar*)src->Bits();
369	destBits = (uchar*)dest->Bits();
370	srcBPR = src->BytesPerRow();
371	destBPR = dest->BytesPerRow();
372
373	columnData = new ColumnData[destW];
374	cd = columnData;
375	for (i = 0; i < destW; i++, cd++) {
376		float column = (float)i * (float)srcW / (float)destW;
377		cd->srcColumn = (intType)column;
378		cd->alpha1 = column - cd->srcColumn;
379		cd->alpha0 = 1.0 - cd->alpha1;
380	}
381
382	destDataRow = destBits + fromRow * destBPR;
383
384	for (y = fromRow; IsRunning() && y <= toRow; y++, destDataRow += destBPR) {
385		float row;
386		intType srcRow;
387		float alpha0, alpha1;
388
389		if (destH == 0)
390			row = 0;
391		else
392			row = (float)y * (float)srcH / (float)destH;
393
394		srcRow = (intType)row;
395		alpha1 = row - srcRow;
396		alpha0 = 1.0 - alpha1;
397
398		srcData = srcBits + srcRow * srcBPR;
399		destData = destDataRow;
400
401		if (y < destH) {
402			float a0, a1;
403			const uchar *a, *b, *c, *d;
404
405			for (x = 0; x < destW; x ++, destData += kBPP) {
406				a = srcData + columnData[x].srcColumn * kBPP;
407				b = a + kBPP;
408				c = a + srcBPR;
409				d = c + kBPP;
410
411				a0 = columnData[x].alpha0;
412				a1 = columnData[x].alpha1;
413
414				destData[0] = static_cast<uchar>(
415								(a[0] * a0 + b[0] * a1) * alpha0 +
416								(c[0] * a0 + d[0] * a1) * alpha1);
417				destData[1] = static_cast<uchar>(
418								(a[1] * a0 + b[1] * a1) * alpha0 +
419								(c[1] * a0 + d[1] * a1) * alpha1);
420				destData[2] = static_cast<uchar>(
421								(a[2] * a0 + b[2] * a1) * alpha0 +
422								(c[2] * a0 + d[2] * a1) * alpha1);
423				destData[3] = static_cast<uchar>(
424								(a[3] * a0 + b[3] * a1) * alpha0 +
425								(c[3] * a0 + d[3] * a1) * alpha1);
426			}
427
428			// right column
429			a = srcData + srcW * kBPP;
430			c = a + srcBPR;
431
432			destData[0] = static_cast<uchar>(a[0] * alpha0 + c[0] * alpha1);
433			destData[1] = static_cast<uchar>(a[1] * alpha0 + c[1] * alpha1);
434			destData[2] = static_cast<uchar>(a[2] * alpha0 + c[2] * alpha1);
435			destData[3] = static_cast<uchar>(a[3] * alpha0 + c[3] * alpha1);
436		} else {
437			float a0, a1;
438			const uchar *a, *b;
439			for (x = 0; x < destW; x ++, destData += kBPP) {
440				a = srcData + columnData[x].srcColumn * kBPP;
441				b = a + kBPP;
442
443				a0 = columnData[x].alpha0;
444				a1 = columnData[x].alpha1;
445
446				destData[0] = static_cast<uchar>(a[0] * a0 + b[0] * a1);
447				destData[1] = static_cast<uchar>(a[1] * a0 + b[1] * a1);
448				destData[2] = static_cast<uchar>(a[2] * a0 + b[2] * a1);
449				destData[3] = static_cast<uchar>(a[3] * a0 + b[3] * a1);
450			}
451
452			// bottom, right pixel
453			a = srcData + srcW * kBPP;
454
455			destData[0] = a[0];
456			destData[1] = a[1];
457			destData[2] = a[2];
458			destData[3] = a[3];
459		}
460
461	}
462
463	delete[] columnData;
464}
465
466
467// Scale bilinear using fixed point calculations
468// Is already more than two times faster than floating point version
469// on AMD Athlon 1 GHz and Dual Intel Pentium III 866 MHz.
470
471typedef struct {
472	int32 srcColumn;
473	fixed_point alpha0;
474	fixed_point alpha1;
475} ColumnDataFP;
476
477
478void
479Scaler::ScaleBilinearFP(intType fromRow, int32 toRow)
480{
481	BBitmap* src;
482	BBitmap* dest;
483	intType srcW, srcH;
484	intType destW, destH;
485	intType x, y, i;
486	ColumnDataFP* columnData;
487	ColumnDataFP* cd;
488	const uchar* srcBits;
489	uchar* destBits;
490	intType srcBPR, destBPR;
491	const uchar* srcData;
492	uchar* destDataRow;
493	uchar* destData;
494	const int32 kBPP = 4;
495
496	src = GetSrcImage();
497	dest = fScaledImage;
498
499	srcW = src->Bounds().IntegerWidth();
500	srcH = src->Bounds().IntegerHeight();
501	destW = dest->Bounds().IntegerWidth();
502	destH = dest->Bounds().IntegerHeight();
503
504	srcBits = (uchar*)src->Bits();
505	destBits = (uchar*)dest->Bits();
506	srcBPR = src->BytesPerRow();
507	destBPR = dest->BytesPerRow();
508
509	fixed_point fpSrcW = to_fixed_point(srcW);
510	fixed_point fpDestW = to_fixed_point(destW);
511	fixed_point fpSrcH = to_fixed_point(srcH);
512	fixed_point fpDestH = to_fixed_point(destH);
513
514	columnData = new ColumnDataFP[destW];
515	cd = columnData;
516	for (i = 0; i < destW; i++, cd++) {
517		fixed_point column = to_fixed_point(i) * (long_fixed_point)fpSrcW
518			/ fpDestW;
519		cd->srcColumn = from_fixed_point(column);
520		cd->alpha1 = tail_value(column); // weigth for left pixel value
521		cd->alpha0 = kFPOne - cd->alpha1; // weigth for right pixel value
522	}
523
524	destDataRow = destBits + fromRow * destBPR;
525
526	for (y = fromRow; IsRunning() && y <= toRow; y++, destDataRow += destBPR) {
527		fixed_point row;
528		intType srcRow;
529		fixed_point alpha0, alpha1;
530
531		if (fpDestH == 0)
532			row = 0;
533		else
534			row = to_fixed_point(y) * (long_fixed_point)fpSrcH / fpDestH;
535
536		srcRow = from_fixed_point(row);
537		alpha1 = tail_value(row); // weight for row y + 1
538		alpha0 = kFPOne - alpha1; // weight for row y
539
540		srcData = srcBits + srcRow * srcBPR;
541		destData = destDataRow;
542
543		// Need mult_correction for "outer" multiplication only
544		#define I4(i) from_fixed_point(mult_correction(\
545							(a[i] * a0 + b[i] * a1) * alpha0 + \
546							(c[i] * a0 + d[i] * a1) * alpha1))
547		#define V2(i) from_fixed_point(a[i] * alpha0 + c[i] * alpha1);
548		#define H2(i) from_fixed_point(a[i] * a0 + b[i] * a1);
549
550		if (y < destH) {
551			fixed_point a0, a1;
552			const uchar *a, *b, *c, *d;
553
554			for (x = 0; x < destW; x ++, destData += kBPP) {
555				a = srcData + columnData[x].srcColumn * kBPP;
556				b = a + kBPP;
557				c = a + srcBPR;
558				d = c + kBPP;
559
560				a0 = columnData[x].alpha0;
561				a1 = columnData[x].alpha1;
562
563				destData[0] = I4(0);
564				destData[1] = I4(1);
565				destData[2] = I4(2);
566				destData[3] = I4(3);
567			}
568
569			// right column
570			a = srcData + srcW * kBPP;
571			c = a + srcBPR;
572
573			destData[0] = V2(0);
574			destData[1] = V2(1);
575			destData[2] = V2(2);
576			destData[3] = V2(3);
577		} else {
578			fixed_point a0, a1;
579			const uchar *a, *b;
580			for (x = 0; x < destW; x ++, destData += kBPP) {
581				a = srcData + columnData[x].srcColumn * kBPP;
582				b = a + kBPP;
583
584				a0 = columnData[x].alpha0;
585				a1 = columnData[x].alpha1;
586
587				destData[0] = H2(0);
588				destData[1] = H2(1);
589				destData[2] = H2(2);
590				destData[3] = H2(3);
591			}
592
593			// bottom, right pixel
594			a = srcData + srcW * kBPP;
595
596			destData[0] = a[0];
597			destData[1] = a[1];
598			destData[2] = a[2];
599			destData[3] = a[3];
600		}
601	}
602
603	delete[] columnData;
604}
605
606
607void
608Scaler::RowValues(float* sum, const uchar* src, intType srcW, intType fromX,
609	intType toX, const float a0X, const float a1X, const int32 kBPP)
610{
611	sum[0] = a0X * src[0];
612	sum[1] = a0X * src[1];
613	sum[2] = a0X * src[2];
614
615	src += kBPP;
616
617	for (int32 x = fromX + 1; x < toX; x++, src += kBPP) {
618		sum[0] += src[0];
619		sum[1] += src[1];
620		sum[2] += src[2];
621	}
622
623	if (toX <= srcW) {
624		sum[0] += a1X * src[0];
625		sum[1] += a1X * src[1];
626		sum[2] += a1X * src[2];
627	}
628}
629
630
631typedef struct {
632	int32 from;
633	int32 to;
634	float alpha0;
635	float alpha1;
636} DownScaleColumnData;
637
638
639void
640Scaler::DownScaleBilinear(intType fromRow, int32 toRow)
641{
642	BBitmap* src;
643	BBitmap* dest;
644	intType srcW, srcH;
645	intType destW, destH;
646	intType x, y;
647	const uchar* srcBits;
648	uchar* destBits;
649	intType srcBPR, destBPR;
650	const uchar* srcData;
651	uchar* destDataRow;
652	uchar* destData;
653	const int32 kBPP = 4;
654	DownScaleColumnData* columnData;
655
656	src = GetSrcImage();
657	dest = fScaledImage;
658
659	srcW = src->Bounds().IntegerWidth();
660	srcH = src->Bounds().IntegerHeight();
661	destW = dest->Bounds().IntegerWidth();
662	destH = dest->Bounds().IntegerHeight();
663
664	srcBits = (uchar*)src->Bits();
665	destBits = (uchar*)dest->Bits();
666	srcBPR = src->BytesPerRow();
667	destBPR = dest->BytesPerRow();
668
669	destDataRow = destBits + fromRow * destBPR;
670
671	const float deltaX = (srcW + 1.0) / (destW + 1.0);
672	const float deltaY = (srcH + 1.0) / (destH + 1.0);
673	const float deltaXY = deltaX * deltaY;
674
675	columnData = new DownScaleColumnData[destW + 1];
676	DownScaleColumnData* cd = columnData;
677	for (x = 0; x <= destW; x++, cd++) {
678		const float fFromX = x * deltaX;
679		const float fToX = fFromX + deltaX;
680
681		cd->from = (intType)fFromX;
682		cd->to = (intType)fToX;
683
684		cd->alpha0 = 1.0 - (fFromX - cd->from);
685		cd->alpha1 = fToX - cd->to;
686	}
687
688	for (y = fromRow; IsRunning() && y <= toRow; y ++, destDataRow += destBPR) {
689		const float fFromY = y * deltaY;
690		const float fToY = fFromY + deltaY;
691
692		const intType fromY = (intType)fFromY;
693		const intType toY = (intType)fToY;
694
695		const float a0Y = 1.0 - (fFromY - fromY);
696		const float a1Y = fToY - toY;
697
698		const uchar* srcDataRow = srcBits + fromY * srcBPR;
699		destData = destDataRow;
700
701		cd = columnData;
702		for (x = 0; x <= destW; x++, destData += kBPP, cd++) {
703			const intType fromX = cd->from;
704			const intType toX = cd->to;
705
706			const float a0X = cd->alpha0;
707			const float a1X = cd->alpha1;
708
709			srcData = srcDataRow + fromX * kBPP;
710
711			float totalSum[3];
712			float sum[3];
713
714			RowValues(sum, srcData, srcW, fromX, toX, a0X, a1X, kBPP);
715			totalSum[0] = a0Y * sum[0];
716			totalSum[1] = a0Y * sum[1];
717			totalSum[2] = a0Y * sum[2];
718
719			srcData += srcBPR;
720
721			for (int32 r = fromY + 1; r < toY; r++, srcData += srcBPR) {
722				RowValues(sum, srcData, srcW, fromX, toX, a0X, a1X, kBPP);
723				totalSum[0] += sum[0];
724				totalSum[1] += sum[1];
725				totalSum[2] += sum[2];
726			}
727
728			if (toY <= srcH) {
729				RowValues(sum, srcData, srcW, fromX, toX, a0X, a1X, kBPP);
730				totalSum[0] += a1Y * sum[0];
731				totalSum[1] += a1Y * sum[1];
732				totalSum[2] += a1Y * sum[2];
733			}
734
735			destData[0] = static_cast<uchar>(totalSum[0] / deltaXY);
736			destData[1] = static_cast<uchar>(totalSum[1] / deltaXY);
737			destData[2] = static_cast<uchar>(totalSum[2] / deltaXY);
738		}
739	}
740
741	delete[] columnData;
742}
743
744
745// Flyod-Steinberg Dithering
746// Filter (distribution of error to adjacent pixels, X is current pixel):
747// 0 X 7
748// 3 5 1
749
750typedef struct {
751	intType error[3];
752} DitheringColumnData;
753
754
755uchar
756Scaler::Limit(intType value)
757{
758	if (value < 0) {
759		value = 0;
760	} else if (value > 255) {
761		value = 255;
762	}
763	return value;
764}
765
766
767void
768Scaler::Dither(int32 fromRow, int32 toRow)
769{
770	BBitmap* src;
771	BBitmap* dest;
772	intType destW;
773	intType x, y;
774
775	uchar* srcBits;
776	intType srcBPR;
777	uchar* srcDataRow;
778	uchar* srcData;
779
780	uchar* destBits;
781	intType destBPR;
782	uchar* destDataRow;
783	uchar* destData;
784	const int32 kBPP = 4;
785	DitheringColumnData* columnData0;
786	DitheringColumnData* columnData;
787	DitheringColumnData* cd;
788	BScreen screen;
789	intType error[3], err[3];
790
791	src = fScaledImage;
792	dest = GetDestImage();
793
794	ASSERT(src->ColorSpace() == B_RGB32 || src->ColorSpace() == B_RGBA32);
795	ASSERT(dest->ColorSpace() == B_CMAP8);
796	ASSERT(src->Bounds().IntegerWidth() == dest->Bounds().IntegerWidth());
797	ASSERT(src->Bounds().IntegerHeight() == dest->Bounds().IntegerHeight());
798
799	destW = dest->Bounds().IntegerWidth();
800
801	srcBits = (uchar*)src->Bits();
802	srcBPR = src->BytesPerRow();
803	destBits = (uchar*)dest->Bits();
804	destBPR = dest->BytesPerRow();
805
806	// Allocate space for sentinel at left and right bounds,
807	// so that columnData[-1] and columnData[destW + 1] can be safely accessed
808	columnData0 = new DitheringColumnData[destW + 3];
809	columnData = columnData0 + 1;
810
811	// clear error
812	cd = columnData;
813	for (x = destW; x >= 0; x --, cd++) {
814		cd->error[0] = cd->error[1] = cd->error[2] = 0;
815	}
816
817	srcDataRow = srcBits + fromRow * srcBPR;
818	destDataRow = destBits + fromRow * destBPR;
819	for (y = fromRow; IsRunning() && y <= toRow; y++, srcDataRow += srcBPR,
820		destDataRow += destBPR) {
821		// left to right
822		error[0] = error[1] = error[2] = 0;
823		srcData = srcDataRow;
824		destData = destDataRow;
825		for (x = 0; x <= destW; x ++, srcData += kBPP, destData += 1) {
826			rgb_color color, actualColor;
827			uint8 index;
828
829			color.red = Limit(srcData[2] + error[0] / 16);
830			color.green = Limit(srcData[1] + error[1] / 16);
831			color.blue = Limit(srcData[0] + error[2] / 16);
832			color.alpha = UINT8_MAX;
833
834			index = screen.IndexForColor(color);
835			actualColor = screen.ColorForIndex(index);
836
837			*destData = index;
838
839			err[0] = color.red - actualColor.red;
840			err[1] = color.green - actualColor.green;
841			err[2] = color.blue - actualColor.blue;
842
843			// distribute error
844			// get error for next pixel
845			cd = &columnData[x + 1];
846			error[0] = cd->error[0] + 7 * err[0];
847			error[1] = cd->error[1] + 7 * err[1];
848			error[2] = cd->error[2] + 7 * err[2];
849
850			// set error for right pixel below current pixel
851			cd->error[0] = err[0];
852			cd->error[1] = err[1];
853			cd->error[2] = err[2];
854
855			// add error for pixel below current pixel
856			cd--;
857			cd->error[0] += 5 * err[0];
858			cd->error[1] += 5 * err[1];
859			cd->error[2] += 5 * err[2];
860
861			// add error for left pixel below current pixel
862			cd--;
863			cd->error[0] += 3 * err[0];
864			cd->error[1] += 3 * err[1];
865			cd->error[2] += 3 * err[2];
866		}
867		// Note: Alogrithm has good results with "left to right" already
868		// Optionally remove code to end of block:
869		y++;
870		srcDataRow += srcBPR; destDataRow += destBPR;
871		if (y > toRow) break;
872		// right to left
873		error[0] = error[1] = error[2] = 0;
874		srcData = srcDataRow + destW * kBPP;
875		destData = destDataRow + destW;
876		for (x = 0; x <= destW; x++, srcData -= kBPP, destData -= 1) {
877			rgb_color color, actualColor;
878			uint8 index;
879
880			color.red = Limit(srcData[2] + error[0] / 16);
881			color.green = Limit(srcData[1] + error[1] / 16);
882			color.blue = Limit(srcData[0] + error[2] / 16);
883			color.alpha = UINT8_MAX;
884
885			index = screen.IndexForColor(color);
886			actualColor = screen.ColorForIndex(index);
887
888			*destData = index;
889
890			err[0] = color.red - actualColor.red;
891			err[1] = color.green - actualColor.green;
892			err[2] = color.blue - actualColor.blue;
893
894			// distribute error
895			// get error for next pixel
896			cd = &columnData[x - 1];
897			error[0] = cd->error[0] + 7 * err[0];
898			error[1] = cd->error[1] + 7 * err[1];
899			error[2] = cd->error[2] + 7 * err[2];
900
901			// set error for left pixel below current pixel
902			cd->error[0] = err[0];
903			cd->error[1] = err[1];
904			cd->error[2] = err[2];
905
906			// add error for pixel below current pixel
907			cd++;
908			cd->error[0] += 5 * err[0];
909			cd->error[1] += 5 * err[1];
910			cd->error[2] += 5 * err[2];
911
912			// add error for right pixel below current pixel
913			cd++;
914			cd->error[0] += 3 * err[0];
915			cd->error[1] += 3 * err[1];
916			cd->error[2] += 3 * err[2];
917		}
918	}
919
920	delete[] columnData0;
921}
922
923
924int32
925Scaler::GetNumberOfUnits()
926{
927	return fRect.IntegerHeight() + 1;
928}
929
930
931void
932Scaler::Run(int32 i, int32 n)
933{
934	int32 from, to, height, imageHeight;
935	imageHeight = GetDestImage()->Bounds().IntegerHeight() + 1;
936	height = imageHeight / n;
937	from = i * height;
938	if (i + 1 == n)
939		to = imageHeight - 1;
940	else
941		to = from + height - 1;
942
943	if (GetDestImage()->Bounds().Width() >= GetSrcImage()->Bounds().Width())
944		ScaleBilinearFP(from, to);
945	else
946		DownScaleBilinear(from, to);
947
948	if (fDither)
949		Dither(from, to);
950
951}
952
953
954void
955Scaler::Completed()
956{
957	if (GetDestImage() != fScaledImage)
958		delete fScaledImage;
959
960	fScaledImage = NULL;
961}
962
963
964// Implementation of ImageProcessor
965ImageProcessor::ImageProcessor(enum operation op, BBitmap* image,
966	BMessenger listener, uint32 what)
967	:
968	Filter(image, listener, what),
969	fOp(op),
970	fBPP(0),
971	fWidth(0),
972	fHeight(0),
973	fSrcBPR(0),
974	fDestBPR(0)
975{
976}
977
978
979BBitmap*
980ImageProcessor::CreateDestImage(BBitmap* /* srcImage */)
981{
982	color_space cs;
983	BBitmap* bm;
984	BRect rect;
985
986	if (GetSrcImage() == NULL)
987		return NULL;
988
989	cs = GetSrcImage()->ColorSpace();
990	fBPP = BytesPerPixel(cs);
991	if (fBPP < 1)
992		return NULL;
993
994	fWidth = GetSrcImage()->Bounds().IntegerWidth();
995	fHeight = GetSrcImage()->Bounds().IntegerHeight();
996
997	if (fOp == kRotateClockwise || fOp == kRotateCounterClockwise)
998		rect.Set(0, 0, fHeight, fWidth);
999	else
1000		rect.Set(0, 0, fWidth, fHeight);
1001
1002	bm = new BBitmap(rect, cs);
1003	if (!IsBitmapValid(bm)) {
1004		delete bm;
1005		return NULL;
1006	}
1007
1008	fSrcBPR = GetSrcImage()->BytesPerRow();
1009	fDestBPR = bm->BytesPerRow();
1010
1011	return bm;
1012}
1013
1014
1015int32
1016ImageProcessor::GetNumberOfUnits()
1017{
1018	return GetSrcImage()->Bounds().IntegerHeight() + 1;
1019}
1020
1021
1022int32
1023ImageProcessor::BytesPerPixel(color_space cs) const
1024{
1025	switch (cs) {
1026		case B_RGB32:		// fall through
1027		case B_RGB32_BIG:	// fall through
1028		case B_RGBA32:		// fall through
1029		case B_RGBA32_BIG:	return 4;
1030
1031		case B_RGB24_BIG:	// fall through
1032		case B_RGB24:		return 3;
1033
1034		case B_RGB16:		// fall through
1035		case B_RGB16_BIG:	// fall through
1036		case B_RGB15:		// fall through
1037		case B_RGB15_BIG:	// fall through
1038		case B_RGBA15:		// fall through
1039		case B_RGBA15_BIG:	return 2;
1040
1041		case B_GRAY8:		// fall through
1042		case B_CMAP8:		return 1;
1043		case B_GRAY1:		return 0;
1044		default: return -1;
1045	}
1046}
1047
1048
1049void
1050ImageProcessor::CopyPixel(uchar* dest, int32 destX, int32 destY,
1051	const uchar* src, int32 x, int32 y)
1052{
1053	// Note: On my systems (Dual Intel P3 866MHz and AMD Athlon 1GHz),
1054	// replacing the multiplications below with pointer arithmethics showed
1055	// no speedup at all!
1056	dest += fDestBPR * destY + destX * fBPP;
1057	src += fSrcBPR * y + x * fBPP;
1058	// Replacing memcpy with this switch statement is slightly faster
1059	switch (fBPP) {
1060		case 4:
1061			dest[3] = src[3];
1062		case 3:
1063			dest[2] = src[2];
1064		case 2:
1065			dest[1] = src[1];
1066		case 1:
1067			dest[0] = src[0];
1068			break;
1069	}
1070}
1071
1072
1073// Note: For B_CMAP8 InvertPixel inverts the color index not the color value!
1074void
1075ImageProcessor::InvertPixel(int32 x, int32 y, uchar* dest, const uchar* src)
1076{
1077	dest += fDestBPR * y + x * fBPP;
1078	src += fSrcBPR * y + x * fBPP;
1079	switch (fBPP) {
1080		case 4:
1081			// dest[3] = ~src[3]; DON'T invert alpha channel
1082		case 3:
1083			dest[2] = ~src[2];
1084		case 2:
1085			dest[1] = ~src[1];
1086		case 1:
1087			dest[0] = ~src[0];
1088			break;
1089	}
1090}
1091
1092
1093// Note: On my systems, the operation kInvert shows a speedup on
1094// multiple CPUs only!
1095void
1096ImageProcessor::Run(int32 i, int32 n)
1097{
1098	int32 from, to;
1099	int32 height = (fHeight + 1) / n;
1100	from = i * height;
1101	if (i + 1 == n)
1102		to = fHeight;
1103	else
1104		to = from + height - 1;
1105
1106	int32 x, y, destX, destY;
1107	const uchar* src = (uchar*)GetSrcImage()->Bits();
1108	uchar* dest = (uchar*)GetDestImage()->Bits();
1109
1110	switch (fOp) {
1111		case kRotateClockwise:
1112			for (y = from; y <= to; y++) {
1113				for (x = 0; x <= fWidth; x++) {
1114					destX = fHeight - y;
1115					destY = x;
1116					CopyPixel(dest, destX, destY, src, x, y);
1117				}
1118			}
1119			break;
1120		case kRotateCounterClockwise:
1121			for (y = from; y <= to; y ++) {
1122				for (x = 0; x <= fWidth; x ++) {
1123					destX = y;
1124					destY = fWidth - x;
1125					CopyPixel(dest, destX, destY, src, x, y);
1126				}
1127			}
1128			break;
1129		case kFlipTopToBottom:
1130			for (y = from; y <= to; y ++) {
1131				for (x = 0; x <= fWidth; x ++) {
1132					destX = x;
1133					destY = fHeight - y;
1134					CopyPixel(dest, destX, destY, src, x, y);
1135				}
1136			}
1137			break;
1138		case kFlipLeftToRight:
1139			for (y = from; y <= to; y ++) {
1140				for (x = 0; x <= fWidth; x ++) {
1141					destX = fWidth - x;
1142					destY = y;
1143					CopyPixel(dest, destX, destY, src, x, y);
1144				}
1145			}
1146			break;
1147		case kInvert:
1148			for (y = from; y <= to; y ++) {
1149				for (x = 0; x <= fWidth; x ++) {
1150					InvertPixel(x, y, dest, src);
1151				}
1152			}
1153			break;
1154	}
1155}
1156