1/*
2 * Copyright 2007-2010, Axel D��rfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "cdda.h"
8
9#include <KernelExport.h>
10#include <device/scsi.h>
11
12#include <algorithm>
13#include <ctype.h>
14#include <errno.h>
15#include <stdlib.h>
16#include <string.h>
17#include <strings.h>
18
19
20struct cdtext_pack_data {
21	uint8	id;
22	uint8	track;
23	uint8	number;
24	uint8	character_position : 4;
25	uint8	block_number : 3;
26	uint8	double_byte : 1;
27	char	text[12];
28	uint8	crc[2];
29} _PACKED;
30
31enum {
32	kTrackID	= 0x80,
33	kArtistID	= 0x81,
34	kMessageID	= 0x85,
35};
36
37static const uint32 kBufferSize = 16384;
38static const uint32 kSenseSize = 1024;
39
40
41//	#pragma mark - string functions
42
43
44static char *
45copy_string(const char *string)
46{
47	if (string == NULL || !string[0])
48		return NULL;
49
50	return strdup(string);
51}
52
53
54static char *
55to_utf8(const char* string)
56{
57	char buffer[256];
58	size_t out = 0;
59
60	// TODO: assume CP1252 or ISO-8859-1 character set for now
61	while (uint32 c = (uint8)string[0]) {
62
63		if (c < 0x80) {
64			if (out >= sizeof(buffer) - 1)
65				break;
66			// ASCII character: no change needed
67			buffer[out++] = c;
68		} else {
69			if (c < 0xA0) {
70				// Windows CP-1252 - Use a lookup table
71				static const uint32 lookup[] = {
72					0x20AC, 0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
73					0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0x017D, 0,
74					0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
75					0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0, 0x017E, 0x0178
76				};
77
78				c = lookup[c - 0x80];
79			}
80
81			// Convert to 2 or 3-byte representation
82			if (c == 0) {
83				// invalid character, ignore
84			} else if (c < 0x800) {
85				if (out >= sizeof(buffer) - 2)
86					break;
87				buffer[out++] = 0xc0 | (c >> 6);
88				buffer[out++] = 0x80 | (c & 0x3f);
89			} else {
90				if (out >= sizeof(buffer) - 3)
91					break;
92				buffer[out++] = 0xe0 | (c >> 12);
93				buffer[out++] = 0x80 | ((c >> 6) & 0x3f);
94				buffer[out++] = 0x80 | (c & 0x3f);
95			}
96		}
97
98		string++;
99	}
100	buffer[out++] = '\0';
101
102	char *copy = (char *)malloc(out);
103	if (copy == NULL)
104		return NULL;
105
106	memcpy(copy, buffer, out);
107	return copy;
108}
109
110
111static bool
112is_garbage(char c)
113{
114	return isspace(c) || c == '-' || c == '/' || c == '\\';
115}
116
117
118static void
119sanitize_string(char *&string)
120{
121	if (string == NULL)
122		return;
123
124	// strip garbage at the start
125
126	uint32 length = strlen(string);
127	uint32 garbage = 0;
128	while (is_garbage(string[garbage])) {
129		garbage++;
130	}
131
132	length -= garbage;
133	if (garbage)
134		memmove(string, string + garbage, length + 1);
135
136	// strip garbage from the end
137
138	while (length > 1 && isspace(string[length - 1])) {
139		string[--length] = '\0';
140	}
141
142	if (!string[0]) {
143		// free string if it's empty
144		free(string);
145		string = NULL;
146	}
147}
148
149
150//! Finds the first occurrence of \a find in \a string, ignores case.
151static char*
152find_string(const char *string, const char *find)
153{
154	if (string == NULL || find == NULL)
155		return NULL;
156
157	char first = tolower(find[0]);
158	if (first == '\0')
159		return (char *)string;
160
161	int32 findLength = strlen(find) - 1;
162	find++;
163
164	for (; string[0]; string++) {
165		if (tolower(string[0]) != first)
166			continue;
167		if (strncasecmp(string + 1, find, findLength) == 0)
168			return (char *)string;
169	}
170
171	return NULL;
172}
173
174
175static void
176cut_string(char *string, const char *cut)
177{
178	if (string == NULL || cut == NULL)
179		return;
180
181	char *found = find_string(string, cut);
182	if (found != NULL) {
183		uint32 foundLength = strlen(found);
184		uint32 cutLength = strlen(cut);
185		memmove(found, found + cutLength, foundLength + 1 - cutLength);
186	}
187}
188
189
190static void
191sanitize_album(cdtext &text)
192{
193	cut_string(text.album, text.artist);
194	sanitize_string(text.album);
195
196	if (text.album != NULL && !strcasecmp(text.album, "My CD")) {
197		// don't laugh, people really do that!
198		free(text.album);
199		text.album = NULL;
200	}
201
202	if ((text.artist == NULL || text.artist[0] == '\0') && text.album != NULL) {
203		// try to extract artist from album
204		char *space = strstr(text.album, "  ");
205		if (space != NULL) {
206			space[0] = '\0';
207			text.artist = text.album;
208			text.album = copy_string(space + 2);
209
210			sanitize_string(text.artist);
211			sanitize_string(text.album);
212		}
213	}
214}
215
216
217static void
218sanitize_titles(cdtext &text)
219{
220	for (uint8 i = 0; i < text.track_count; i++) {
221		cut_string(text.titles[i], "(Album Version)");
222		sanitize_string(text.titles[i]);
223		sanitize_string(text.artists[i]);
224
225		if (text.artists[i] != NULL && text.artist != NULL
226			&& !strcasecmp(text.artists[i], text.artist)) {
227			// if the title artist is the same as the main artist, remove it
228			free(text.artists[i]);
229			text.artists[i] = NULL;
230		}
231
232		if (text.titles[i] != NULL && text.titles[i][0] == '\t' && i > 0)
233			text.titles[i] = copy_string(text.titles[i - 1]);
234	}
235}
236
237
238static bool
239single_case(const char *string, bool &upper, bool &first)
240{
241	if (string == NULL)
242		return true;
243
244	while (string[0]) {
245		while (!isalpha(string[0])) {
246			string++;
247		}
248
249		if (first) {
250			upper = isupper(string[0]) != 0;
251			first = false;
252		} else if ((isupper(string[0]) != 0) ^ upper)
253			return false;
254
255		string++;
256	}
257
258	return true;
259}
260
261
262static void
263capitalize_string(char *string)
264{
265	if (string == NULL)
266		return;
267
268	bool newWord = isalpha(string[0]) || isspace(string[0]);
269	while (string[0]) {
270		if (isalpha(string[0])) {
271			if (newWord) {
272				string[0] = toupper(string[0]);
273				newWord = false;
274			} else
275				string[0] = tolower(string[0]);
276		} else if (string[0] != '\'')
277			newWord = true;
278
279		string++;
280	}
281}
282
283
284static void
285correct_case(cdtext &text)
286{
287	// check if all titles share a single case
288	bool first = true;
289	bool upper;
290	if (!single_case(text.album, upper, first)
291		|| !single_case(text.artist, upper, first))
292		return;
293
294	for (int32 i = 0; i < text.track_count; i++) {
295		if (!single_case(text.titles[i], upper, first)
296			|| !single_case(text.artists[i], upper, first))
297			return;
298	}
299
300	// If we get here, everything has a single case; we fix that
301	// and capitalize each word
302
303	capitalize_string(text.album);
304	capitalize_string(text.artist);
305	for (int32 i = 0; i < text.track_count; i++) {
306		capitalize_string(text.titles[i]);
307		capitalize_string(text.artists[i]);
308	}
309}
310
311
312//	#pragma mark - CD-Text
313
314
315cdtext::cdtext()
316	:
317	artist(NULL),
318	album(NULL),
319	genre(NULL),
320	track_count(0)
321{
322	memset(titles, 0, sizeof(titles));
323	memset(artists, 0, sizeof(artists));
324}
325
326
327cdtext::~cdtext()
328{
329	free(album);
330	free(artist);
331	free(genre);
332
333	for (uint8 i = 0; i < track_count; i++) {
334		free(titles[i]);
335		free(artists[i]);
336	}
337}
338
339
340static bool
341is_string_id(uint8 id)
342{
343	return id >= kTrackID && id <= kMessageID;
344}
345
346
347/*!	Parses a \a pack data into the provided text buffer; the corresponding
348	track number will be left in \a track, and the type of the data in \a id.
349	The pack data is explained in SCSI MMC-3.
350
351	\a id, \a track, and \a state must stay constant between calls to this
352	function. \a state must be initialized to zero for the first call.
353*/
354static bool
355parse_pack_data(cdtext_pack_data *&pack, uint32 &packLeft,
356	cdtext_pack_data *&lastPack, uint8 &id, uint8 &track, uint8 &state,
357	char *buffer, size_t &length)
358{
359	if (packLeft < sizeof(cdtext_pack_data))
360		return false;
361
362	uint8 number = pack->number;
363	size_t size = length;
364
365	if (state != 0) {
366		// we had a terminated string and a missing track
367		track++;
368
369		memcpy(buffer, lastPack->text + state, 12 - state);
370		if (pack->track - track == 1)
371			state = 0;
372		else
373			state += strnlen(buffer, 12 - state);
374		return true;
375	}
376
377	id = pack->id;
378	track = pack->track;
379
380	buffer[0] = '\0';
381	length = 0;
382
383	size_t position = pack->character_position;
384	if (position > 0 && lastPack != NULL) {
385		memcpy(buffer, &lastPack->text[12 - position], position);
386		length = position;
387	}
388
389	while (id == pack->id && track == pack->track) {
390#if 0
391		dprintf("%u.%u.%u, %u.%u.%u, ", pack->id, pack->track, pack->number,
392			pack->double_byte, pack->block_number, pack->character_position);
393		for (int32 i = 0; i < 12; i++) {
394			if (isprint(pack->text[i]))
395				dprintf("%c", pack->text[i]);
396			else
397				dprintf("-");
398		}
399		dprintf("\n");
400#endif
401		if (is_string_id(id)) {
402			// TODO: support double byte characters
403			if (length + 12 < size) {
404				memcpy(buffer + length, pack->text, 12);
405				length += 12;
406			}
407		}
408
409		packLeft -= sizeof(cdtext_pack_data);
410		if (packLeft < sizeof(cdtext_pack_data))
411			return false;
412
413		lastPack = pack;
414		number++;
415		pack++;
416
417		if (pack->number != number)
418			return false;
419	}
420
421	if (id == pack->id) {
422		length -= pack->character_position;
423		if (length >= size)
424			length = size - 1;
425		buffer[length] = '\0';
426
427		if (pack->track > lastPack->track + 1) {
428			// there is a missing track
429			for (int32 i = 0; i < 12; i++) {
430				if (lastPack->text[i] == '\0') {
431					state = i + (lastPack->double_byte ? 2 : 1);
432					break;
433				}
434			}
435		}
436	}
437
438	return true;
439}
440
441
442static void
443dump_cdtext(cdtext &text)
444{
445	if (text.album)
446		dprintf("Album:    \"%s\"\n", text.album);
447	if (text.artist)
448		dprintf("Artist:   \"%s\"\n", text.artist);
449	for (uint8 i = 0; i < text.track_count; i++) {
450		dprintf("Track %02u: \"%s\"%s%s%s\n", i + 1, text.titles[i],
451			text.artists[i] ? " (" : "", text.artists[i] ? text.artists[i] : "",
452			text.artists[i] ? ")" : "");
453	}
454}
455
456
457static void
458dump_toc(scsi_toc_toc *toc)
459{
460	int32 numTracks = toc->last_track + 1 - toc->first_track;
461
462	for (int32 i = 0; i < numTracks; i++) {
463		scsi_toc_track& track = toc->tracks[i];
464		scsi_cd_msf& next = toc->tracks[i + 1].start.time;
465			// the last track is always lead-out
466		scsi_cd_msf& start = toc->tracks[i].start.time;
467		scsi_cd_msf length;
468
469		uint64 diff = next.minute * kFramesPerMinute
470			+ next.second * kFramesPerSecond + next.frame
471			- start.minute * kFramesPerMinute
472			- start.second * kFramesPerSecond - start.frame;
473		length.minute = diff / kFramesPerMinute;
474		length.second = (diff % kFramesPerMinute) / kFramesPerSecond;
475		length.frame = diff % kFramesPerSecond;
476
477		dprintf("%02u. %02u:%02u.%02u (length %02u:%02u.%02u)\n",
478			track.track_number, start.minute, start.second, start.frame,
479			length.minute, length.second, length.frame);
480	}
481}
482
483
484static status_t
485read_frames(int fd, off_t firstFrame, uint8 *buffer, size_t count)
486{
487	size_t framesLeft = count;
488
489	while (framesLeft > 0) {
490		// If the initial count was >= 32, and not a multiple of 8, and the
491		// ioctl fails, we switch to reading 8 frames at a time. However the
492		// last read can read between 1 and 7 frames only, to not overflow
493		// the buffer.
494		count = std::min(count, framesLeft);
495
496		scsi_read_cd read;
497		read.start_m = firstFrame / kFramesPerMinute;
498		read.start_s = (firstFrame / kFramesPerSecond) % 60;
499		read.start_f = firstFrame % kFramesPerSecond;
500
501		read.length_m = count / kFramesPerMinute;
502		read.length_s = (count / kFramesPerSecond) % 60;
503		read.length_f = count % kFramesPerSecond;
504
505		read.buffer_length = count * kFrameSize;
506		read.buffer = (char *)buffer;
507		read.play = false;
508
509		if (ioctl(fd, B_SCSI_READ_CD, &read, sizeof(scsi_read_cd)) < 0) {
510			// drive couldn't read data - try again to read with a smaller block size
511			if (count == 1)
512				return errno;
513
514			if (count >= 32)
515				count = 8;
516			else
517				count = 1;
518
519			continue;
520		}
521
522		buffer += count * kFrameSize;
523		framesLeft -= count;
524		firstFrame += count;
525	}
526
527	return B_OK;
528}
529
530
531static status_t
532read_table_of_contents(int fd, uint32 track, uint8 format, uint8 *buffer,
533	size_t bufferSize)
534{
535	raw_device_command raw;
536	uint8 *senseData = (uint8 *)malloc(kSenseSize);
537	if (senseData == NULL)
538		return B_NO_MEMORY;
539
540	memset(&raw, 0, sizeof(raw_device_command));
541	memset(senseData, 0, kSenseSize);
542	memset(buffer, 0, bufferSize);
543
544	scsi_cmd_read_toc &toc = *(scsi_cmd_read_toc*)&raw.command;
545	toc.opcode = SCSI_OP_READ_TOC;
546	toc.time = 1;
547	toc.format = format;
548	toc.track = track;
549	toc.allocation_length = B_HOST_TO_BENDIAN_INT16(bufferSize);
550
551	raw.command_length = 10;
552	raw.flags = B_RAW_DEVICE_DATA_IN | B_RAW_DEVICE_REPORT_RESIDUAL
553		| B_RAW_DEVICE_SHORT_READ_VALID;
554	raw.scsi_status = 0;
555	raw.cam_status = 0;
556	raw.data = buffer;
557	raw.data_length = bufferSize;
558	raw.timeout = 10000000LL;	// 10 secs
559	raw.sense_data = senseData;
560	raw.sense_data_length = sizeof(kSenseSize);
561
562	if (ioctl(fd, B_RAW_DEVICE_COMMAND, &raw, sizeof(raw)) == 0
563		&& raw.scsi_status == 0 && raw.cam_status == 1) {
564		free(senseData);
565		return B_OK;
566	}
567
568	free(senseData);
569	return B_ERROR;
570}
571
572
573//	#pragma mark - exported functions
574
575
576status_t
577read_cdtext(int fd, struct cdtext &cdtext)
578{
579	uint8 *buffer = (uint8 *)malloc(kBufferSize);
580	if (buffer == NULL)
581		return B_NO_MEMORY;
582
583	// do it twice, just in case...
584	// (at least my CD-ROM sometimes returned broken data on first try)
585	read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
586		kBufferSize);
587	if (read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
588			kBufferSize) != B_OK) {
589		free(buffer);
590		return B_ERROR;
591	}
592
593	scsi_toc_general *header = (scsi_toc_general *)buffer;
594
595	uint32 packLength = B_BENDIAN_TO_HOST_INT16(header->data_length) - 2;
596	cdtext_pack_data *pack = (cdtext_pack_data *)(header + 1);
597	cdtext_pack_data *lastPack = NULL;
598	uint8 state = 0;
599	uint8 track = 0;
600	uint8 id = 0;
601	char text[256];
602
603	// TODO: determine encoding!
604
605	while (true) {
606		size_t length = sizeof(text);
607
608		if (!parse_pack_data(pack, packLength, lastPack, id, track,
609				state, text, length))
610			break;
611
612		switch (id) {
613			case kTrackID:
614				if (track == 0) {
615					if (cdtext.album == NULL)
616						cdtext.album = to_utf8(text);
617				} else if (track <= kMaxTracks) {
618					if (cdtext.titles[track - 1] == NULL)
619						cdtext.titles[track - 1] = to_utf8(text);
620					if (track > cdtext.track_count)
621						cdtext.track_count = track;
622				}
623				break;
624
625			case kArtistID:
626				if (track == 0) {
627					if (cdtext.artist == NULL)
628						cdtext.artist = to_utf8(text);
629				} else if (track <= kMaxTracks) {
630					if (cdtext.artists[track - 1] == NULL)
631						cdtext.artists[track - 1] = to_utf8(text);
632				}
633				break;
634
635			default:
636				if (is_string_id(id))
637					dprintf("UNKNOWN %u: \"%s\"\n", id, text);
638				break;
639		}
640	}
641
642	free(buffer);
643
644	if (cdtext.artist == NULL && cdtext.album == NULL)
645		return B_ERROR;
646
647	for (int i = 0; i < cdtext.track_count; i++) {
648		if (cdtext.titles[i] == NULL)
649			return B_ERROR;
650	}
651
652	sanitize_string(cdtext.artist);
653	sanitize_album(cdtext);
654	sanitize_titles(cdtext);
655	correct_case(cdtext);
656
657	dump_cdtext(cdtext);
658	return B_OK;
659}
660
661
662status_t
663read_table_of_contents(int fd, scsi_toc_toc *toc, size_t length)
664{
665	status_t status = read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_TOC,
666		(uint8*)toc, length);
667	if (status < B_OK)
668		return status;
669
670	// make sure the values in the TOC make sense
671
672	int32 lastTrack = toc->last_track + 1 - toc->first_track;
673	size_t dataLength = B_BENDIAN_TO_HOST_INT16(toc->data_length) + 2;
674	if (dataLength < sizeof(scsi_toc_toc) || lastTrack <= 0)
675		return B_BAD_DATA;
676
677	if (length > dataLength)
678		length = dataLength;
679
680	length -= sizeof(scsi_toc_general);
681
682	if (lastTrack * sizeof(scsi_toc_track) > length)
683		toc->last_track = length / sizeof(scsi_toc_track) + toc->first_track;
684
685	dump_toc(toc);
686	return B_OK;
687}
688
689
690status_t
691read_cdda_data(int fd, off_t endFrame, off_t offset, void *data, size_t length,
692	off_t bufferOffset, void *buffer, size_t bufferSize)
693{
694	if (bufferOffset >= 0 && bufferOffset <= offset + (off_t)length
695		&& bufferOffset + (off_t)bufferSize > offset) {
696		if (offset >= bufferOffset) {
697			// buffer reaches into the beginning of the request
698			off_t dataOffset = offset - bufferOffset;
699			size_t bytes = min_c(bufferSize - dataOffset, length);
700			if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
701				return B_BAD_ADDRESS;
702
703			data = (void *)((uint8 *)data + bytes);
704			length -= bytes;
705			offset += bytes;
706		} else if (offset < bufferOffset
707			&& offset + length < bufferOffset + bufferSize) {
708			// buffer overlaps at the end of the request
709			off_t dataOffset = bufferOffset - offset;
710			size_t bytes = length - dataOffset;
711			if (user_memcpy((uint8 *)data + dataOffset, buffer, bytes) < B_OK)
712				return B_BAD_ADDRESS;
713
714			length -= bytes;
715		}
716		// we don't handle the case where we would need to split the request
717	}
718
719	while (length > 0) {
720		off_t frame = offset / kFrameSize;
721		uint32 count = bufferSize / kFrameSize;
722		if (frame + count > endFrame)
723			count = endFrame - frame;
724
725		status_t status = read_frames(fd, frame, (uint8 *)buffer, count);
726		if (status < B_OK)
727			return status;
728
729		off_t dataOffset = offset % kFrameSize;
730		size_t bytes = bufferSize - dataOffset;
731		if (bytes > length)
732			bytes = length;
733
734		if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
735			return B_BAD_ADDRESS;
736
737		data = (void *)((uint8 *)data + bytes);
738		length -= bytes;
739		offset += bytes;
740	}
741
742	return B_OK;
743}
744