1/*
2 * Copyright 2010-2018 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Christophe Huriaux, c.huriaux@gmail.com
7 *		Andrew Lindesay, apl@lindesay.co.nz
8 */
9
10
11#include <Url.h>
12
13#include <ctype.h>
14#include <cstdio>
15#include <cstdlib>
16#include <new>
17
18#include <MimeType.h>
19#include <Roster.h>
20
21#ifdef HAIKU_TARGET_PLATFORM_HAIKU
22	#include <ICUWrapper.h>
23#endif
24
25#ifdef HAIKU_TARGET_PLATFORM_HAIKU
26	#include <unicode/idna.h>
27	#include <unicode/stringpiece.h>
28#endif
29
30
31static const char* kArchivedUrl = "be:url string";
32
33/*! These flags can be combined to control the parse process. */
34
35const uint32 PARSE_NO_MASK_BIT				= 0x00000000;
36const uint32 PARSE_RAW_PATH_MASK_BIT		= 0x00000001;
37
38
39BUrl::BUrl(const char* url)
40	:
41	fUrlString(),
42	fProtocol(),
43	fUser(),
44	fPassword(),
45	fHost(),
46	fPort(0),
47	fPath(),
48	fRequest(),
49	fHasHost(false),
50	fHasFragment(false)
51{
52	SetUrlString(url);
53}
54
55
56BUrl::BUrl(BMessage* archive)
57	:
58	fUrlString(),
59	fProtocol(),
60	fUser(),
61	fPassword(),
62	fHost(),
63	fPort(0),
64	fPath(),
65	fRequest(),
66	fHasHost(false),
67	fHasFragment(false)
68{
69	BString url;
70
71	if (archive->FindString(kArchivedUrl, &url) == B_OK)
72		SetUrlString(url);
73	else
74		_ResetFields();
75}
76
77
78BUrl::BUrl(const BUrl& other)
79	:
80	BArchivable(),
81	fUrlString(),
82	fProtocol(other.fProtocol),
83	fUser(other.fUser),
84	fPassword(other.fPassword),
85	fHost(other.fHost),
86	fPort(other.fPort),
87	fPath(other.fPath),
88	fRequest(other.fRequest),
89	fFragment(other.fFragment),
90	fUrlStringValid(other.fUrlStringValid),
91	fAuthorityValid(other.fAuthorityValid),
92	fUserInfoValid(other.fUserInfoValid),
93	fHasProtocol(other.fHasProtocol),
94	fHasUserName(other.fHasUserName),
95	fHasPassword(other.fHasPassword),
96	fHasHost(other.fHasHost),
97	fHasPort(other.fHasPort),
98	fHasPath(other.fHasPath),
99	fHasRequest(other.fHasRequest),
100	fHasFragment(other.fHasFragment)
101{
102	if (fUrlStringValid)
103		fUrlString = other.fUrlString;
104
105	if (fAuthorityValid)
106		fAuthority = other.fAuthority;
107
108	if (fUserInfoValid)
109		fUserInfo = other.fUserInfo;
110
111}
112
113
114BUrl::BUrl(const BUrl& base, const BString& location)
115	:
116	fUrlString(),
117	fProtocol(),
118	fUser(),
119	fPassword(),
120	fHost(),
121	fPort(0),
122	fPath(),
123	fRequest(),
124	fAuthorityValid(false),
125	fUserInfoValid(false),
126	fHasUserName(false),
127	fHasPassword(false),
128	fHasHost(false),
129	fHasPort(false),
130	fHasFragment(false)
131{
132	// This implements the algorithm in RFC3986, Section 5.2.
133
134	BUrl relative;
135	relative._ExplodeUrlString(location, PARSE_RAW_PATH_MASK_BIT);
136		// This parse will leave the path 'raw' so that it still carries any
137		// special sequences such as '..' and '.' in it.  This way it can be
138		// later combined with the base.
139
140	if (relative.HasProtocol()) {
141		SetProtocol(relative.Protocol());
142		if (relative.HasAuthority())
143			SetAuthority(relative.Authority());
144		SetPath(relative.Path());
145		SetRequest(relative.Request());
146	} else {
147		if (relative.HasAuthority()) {
148			SetAuthority(relative.Authority());
149			SetPath(relative.Path());
150			SetRequest(relative.Request());
151		} else {
152			if (relative.Path().IsEmpty()) {
153				_SetPathUnsafe(base.Path());
154				if (relative.HasRequest())
155					SetRequest(relative.Request());
156				else
157					SetRequest(base.Request());
158			} else {
159				if (relative.Path()[0] == '/')
160					SetPath(relative.Path());
161				else {
162					BString path = base._MergePath(relative.Path());
163					SetPath(path);
164				}
165				SetRequest(relative.Request());
166			}
167
168			if (base.HasAuthority())
169				SetAuthority(base.Authority());
170		}
171		SetProtocol(base.Protocol());
172	}
173
174	if (relative.HasFragment())
175		SetFragment(relative.Fragment());
176}
177
178
179BUrl::BUrl()
180	:
181	fUrlString(),
182	fProtocol(),
183	fUser(),
184	fPassword(),
185	fHost(),
186	fPort(0),
187	fPath(),
188	fRequest(),
189	fHasHost(false),
190	fHasFragment(false)
191{
192	_ResetFields();
193}
194
195
196BUrl::BUrl(const BPath& path)
197	:
198	fUrlString(),
199	fProtocol(),
200	fUser(),
201	fPassword(),
202	fHost(),
203	fPort(0),
204	fPath(),
205	fRequest(),
206	fHasHost(false),
207	fHasFragment(false)
208{
209	SetUrlString(UrlEncode(path.Path(), true, true));
210	SetProtocol("file");
211}
212
213
214BUrl::~BUrl()
215{
216}
217
218
219// #pragma mark URL fields modifiers
220
221
222BUrl&
223BUrl::SetUrlString(const BString& url)
224{
225	_ExplodeUrlString(url, PARSE_NO_MASK_BIT);
226	return *this;
227}
228
229
230BUrl&
231BUrl::SetProtocol(const BString& protocol)
232{
233	fProtocol = protocol;
234	fHasProtocol = !fProtocol.IsEmpty();
235	fUrlStringValid = false;
236	return *this;
237}
238
239
240BUrl&
241BUrl::SetUserName(const BString& user)
242{
243	fUser = user;
244	fHasUserName = !fUser.IsEmpty();
245	fUrlStringValid = false;
246	fAuthorityValid = false;
247	fUserInfoValid = false;
248	return *this;
249}
250
251
252BUrl&
253BUrl::SetPassword(const BString& password)
254{
255	fPassword = password;
256	fHasPassword = !fPassword.IsEmpty();
257	fUrlStringValid = false;
258	fAuthorityValid = false;
259	fUserInfoValid = false;
260	return *this;
261}
262
263
264BUrl&
265BUrl::SetHost(const BString& host)
266{
267	fHost = host;
268	fHasHost = !fHost.IsEmpty();
269	fUrlStringValid = false;
270	fAuthorityValid = false;
271	return *this;
272}
273
274
275BUrl&
276BUrl::SetPort(int port)
277{
278	fPort = port;
279	fHasPort = (port != 0);
280	fUrlStringValid = false;
281	fAuthorityValid = false;
282	return *this;
283}
284
285
286void
287BUrl::_RemoveLastPathComponent(BString& path)
288{
289	int32 outputLastSlashIdx = path.FindLast('/');
290
291	if (outputLastSlashIdx == B_ERROR)
292		path.Truncate(0);
293	else
294		path.Truncate(outputLastSlashIdx);
295}
296
297
298BUrl&
299BUrl::SetPath(const BString& path)
300{
301	// Implements RFC3986 section 5.2.4, "Remove dot segments"
302
303	// 1.
304	BString output;
305	BString input(path);
306
307	// 2.
308	while (!input.IsEmpty()) {
309		// 2.A.
310		if (input.StartsWith("./")) {
311			input.Remove(0, 2);
312			continue;
313		}
314
315		if (input.StartsWith("../")) {
316			input.Remove(0, 3);
317			continue;
318		}
319
320		// 2.B.
321		if (input.StartsWith("/./")) {
322			input.Remove(0, 2);
323			continue;
324		}
325
326		if (input == "/.") {
327			input.Remove(1, 1);
328			continue;
329		}
330
331		// 2.C.
332		if (input.StartsWith("/../")) {
333			input.Remove(0, 3);
334			_RemoveLastPathComponent(output);
335			continue;
336		}
337
338		if (input == "/..") {
339			input.Remove(1, 2);
340			_RemoveLastPathComponent(output);
341			continue;
342		}
343
344		// 2.D.
345		if (input == "." || input == "..") {
346			break;
347		}
348
349		if (input == "/.") {
350			input.Remove(1, 1);
351			continue;
352		}
353
354		// 2.E.
355		int slashpos = input.FindFirst('/', 1);
356		if (slashpos > 0) {
357			output.Append(input, slashpos);
358			input.Remove(0, slashpos);
359		} else {
360			output.Append(input);
361			break;
362		}
363	}
364
365	_SetPathUnsafe(output);
366	return *this;
367}
368
369
370BUrl&
371BUrl::SetRequest(const BString& request)
372{
373	fRequest = request;
374	fHasRequest = !fRequest.IsEmpty();
375	fUrlStringValid = false;
376	return *this;
377}
378
379
380BUrl&
381BUrl::SetFragment(const BString& fragment)
382{
383	fFragment = fragment;
384	fHasFragment = true;
385	fUrlStringValid = false;
386	return *this;
387}
388
389
390// #pragma mark URL fields access
391
392
393const BString&
394BUrl::UrlString() const
395{
396	if (!fUrlStringValid) {
397		fUrlString.Truncate(0);
398
399		if (HasProtocol()) {
400			fUrlString << fProtocol << ':';
401		}
402
403		if (HasAuthority()) {
404			fUrlString << "//";
405			fUrlString << Authority();
406		}
407		fUrlString << Path();
408
409		if (HasRequest())
410			fUrlString << '?' << fRequest;
411
412		if (HasFragment())
413			fUrlString << '#' << fFragment;
414
415		fUrlStringValid = true;
416	}
417
418	return fUrlString;
419}
420
421
422const BString&
423BUrl::Protocol() const
424{
425	return fProtocol;
426}
427
428
429const BString&
430BUrl::UserName() const
431{
432	return fUser;
433}
434
435
436const BString&
437BUrl::Password() const
438{
439	return fPassword;
440}
441
442
443const BString&
444BUrl::UserInfo() const
445{
446	if (!fUserInfoValid) {
447		fUserInfo = fUser;
448
449		if (HasPassword())
450			fUserInfo << ':' << fPassword;
451
452		fUserInfoValid = true;
453	}
454
455	return fUserInfo;
456}
457
458
459const BString&
460BUrl::Host() const
461{
462	return fHost;
463}
464
465
466int
467BUrl::Port() const
468{
469	return fPort;
470}
471
472
473const BString&
474BUrl::Authority() const
475{
476	if (!fAuthorityValid) {
477		fAuthority.Truncate(0);
478
479		if (HasUserInfo())
480			fAuthority << UserInfo() << '@';
481		fAuthority << Host();
482
483		if (HasPort())
484			fAuthority << ':' << fPort;
485
486		fAuthorityValid = true;
487	}
488	return fAuthority;
489}
490
491
492const BString&
493BUrl::Path() const
494{
495	return fPath;
496}
497
498
499const BString&
500BUrl::Request() const
501{
502	return fRequest;
503}
504
505
506const BString&
507BUrl::Fragment() const
508{
509	return fFragment;
510}
511
512
513// #pragma mark URL fields tests
514
515
516bool
517BUrl::IsValid() const
518{
519	if (!fHasProtocol)
520		return false;
521
522	if (!_IsProtocolValid())
523		return false;
524
525	// it is possible that there can be an authority but no host.
526	// wierd://tea:tree@/x
527	if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid())
528		return false;
529
530	if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
531		|| fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
532		|| fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
533		|| fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
534		return HasHost() && !fHost.IsEmpty();
535	}
536
537	if (fProtocol == "file")
538		return fHasPath;
539
540	return true;
541}
542
543
544bool
545BUrl::HasProtocol() const
546{
547	return fHasProtocol;
548}
549
550
551bool
552BUrl::HasAuthority() const
553{
554	return fHasHost || fHasUserName;
555}
556
557
558bool
559BUrl::HasUserName() const
560{
561	return fHasUserName;
562}
563
564
565bool
566BUrl::HasPassword() const
567{
568	return fHasPassword;
569}
570
571
572bool
573BUrl::HasUserInfo() const
574{
575	return fHasUserName || fHasPassword;
576}
577
578
579bool
580BUrl::HasHost() const
581{
582	return fHasHost;
583}
584
585
586bool
587BUrl::HasPort() const
588{
589	return fHasPort;
590}
591
592
593bool
594BUrl::HasPath() const
595{
596	return fHasPath;
597}
598
599
600bool
601BUrl::HasRequest() const
602{
603	return fHasRequest;
604}
605
606
607bool
608BUrl::HasFragment() const
609{
610	return fHasFragment;
611}
612
613
614// #pragma mark URL encoding/decoding of needed fields
615
616
617void
618BUrl::UrlEncode(bool strict)
619{
620	fUser = _DoUrlEncodeChunk(fUser, strict);
621	fPassword = _DoUrlEncodeChunk(fPassword, strict);
622	fHost = _DoUrlEncodeChunk(fHost, strict);
623	fFragment = _DoUrlEncodeChunk(fFragment, strict);
624	fPath = _DoUrlEncodeChunk(fPath, strict, true);
625}
626
627
628void
629BUrl::UrlDecode(bool strict)
630{
631	fUser = _DoUrlDecodeChunk(fUser, strict);
632	fPassword = _DoUrlDecodeChunk(fPassword, strict);
633	fHost = _DoUrlDecodeChunk(fHost, strict);
634	fFragment = _DoUrlDecodeChunk(fFragment, strict);
635	fPath = _DoUrlDecodeChunk(fPath, strict);
636}
637
638
639#ifdef HAIKU_TARGET_PLATFORM_HAIKU
640status_t
641BUrl::IDNAToAscii()
642{
643	UErrorCode err = U_ZERO_ERROR;
644	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
645	icu::IDNAInfo info;
646
647	BString result;
648	BStringByteSink sink(&result);
649	converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
650		err);
651
652	delete converter;
653
654	if (U_FAILURE(err))
655		return B_ERROR;
656
657	fHost = result;
658	return B_OK;
659}
660#endif
661
662
663#ifdef HAIKU_TARGET_PLATFORM_HAIKU
664status_t
665BUrl::IDNAToUnicode()
666{
667	UErrorCode err = U_ZERO_ERROR;
668	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
669	icu::IDNAInfo info;
670
671	BString result;
672	BStringByteSink sink(&result);
673	converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
674		err);
675
676	delete converter;
677
678	if (U_FAILURE(err))
679		return B_ERROR;
680
681	fHost = result;
682	return B_OK;
683}
684#endif
685
686
687// #pragma mark - utility functionality
688
689
690#ifdef HAIKU_TARGET_PLATFORM_HAIKU
691bool
692BUrl::HasPreferredApplication() const
693{
694	BString appSignature = PreferredApplication();
695	BMimeType mime(appSignature.String());
696
697	if (appSignature.IFindFirst("application/") == 0
698		&& mime.IsValid())
699		return true;
700
701	return false;
702}
703#endif
704
705
706#ifdef HAIKU_TARGET_PLATFORM_HAIKU
707BString
708BUrl::PreferredApplication() const
709{
710	BString appSignature;
711	BMimeType mime(_UrlMimeType().String());
712	mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH));
713	appSignature.UnlockBuffer();
714
715	return BString(appSignature);
716}
717#endif
718
719
720#ifdef HAIKU_TARGET_PLATFORM_HAIKU
721status_t
722BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const
723{
724	if (!IsValid())
725		return B_BAD_VALUE;
726
727	BString urlString = UrlString();
728	if (urlString.Length() > B_PATH_NAME_LENGTH) {
729		// TODO: BAlert
730		//	if (onProblemAskUser)
731		//		BAlert ... Too long URL!
732#if DEBUG
733		fprintf(stderr, "URL too long");
734#endif
735		return B_NAME_TOO_LONG;
736	}
737
738	char* argv[] = {
739		const_cast<char*>("BUrlInvokedApplication"),
740		const_cast<char*>(urlString.String()),
741		NULL
742	};
743
744#if DEBUG
745	if (HasPreferredApplication())
746		printf("HasPreferredApplication() == true\n");
747	else
748		printf("HasPreferredApplication() == false\n");
749#endif
750
751	status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1);
752	if (status != B_OK) {
753#if DEBUG
754		fprintf(stderr, "Opening URL failed: %s\n", strerror(status));
755#endif
756	}
757
758	return status;
759}
760#endif
761
762
763// #pragma mark Url encoding/decoding of string
764
765
766/*static*/ BString
767BUrl::UrlEncode(const BString& url, bool strict, bool directory)
768{
769	return _DoUrlEncodeChunk(url, strict, directory);
770}
771
772
773/*static*/ BString
774BUrl::UrlDecode(const BString& url, bool strict)
775{
776	return _DoUrlDecodeChunk(url, strict);
777}
778
779
780// #pragma mark BArchivable members
781
782
783status_t
784BUrl::Archive(BMessage* into, bool deep) const
785{
786	status_t ret = BArchivable::Archive(into, deep);
787
788	if (ret == B_OK)
789		ret = into->AddString(kArchivedUrl, UrlString());
790
791	return ret;
792}
793
794
795/*static*/ BArchivable*
796BUrl::Instantiate(BMessage* archive)
797{
798	if (validate_instantiation(archive, "BUrl"))
799		return new(std::nothrow) BUrl(archive);
800	return NULL;
801}
802
803
804// #pragma mark URL comparison
805
806
807bool
808BUrl::operator==(BUrl& other) const
809{
810	UrlString();
811	other.UrlString();
812
813	return fUrlString == other.fUrlString;
814}
815
816
817bool
818BUrl::operator!=(BUrl& other) const
819{
820	return !(*this == other);
821}
822
823
824// #pragma mark URL assignment
825
826
827const BUrl&
828BUrl::operator=(const BUrl& other)
829{
830	fUrlStringValid = other.fUrlStringValid;
831	if (fUrlStringValid)
832		fUrlString = other.fUrlString;
833
834	fAuthorityValid = other.fAuthorityValid;
835	if (fAuthorityValid)
836		fAuthority = other.fAuthority;
837
838	fUserInfoValid = other.fUserInfoValid;
839	if (fUserInfoValid)
840		fUserInfo = other.fUserInfo;
841
842	fProtocol = other.fProtocol;
843	fUser = other.fUser;
844	fPassword = other.fPassword;
845	fHost = other.fHost;
846	fPort = other.fPort;
847	fPath = other.fPath;
848	fRequest = other.fRequest;
849	fFragment = other.fFragment;
850
851	fHasProtocol = other.fHasProtocol;
852	fHasUserName = other.fHasUserName;
853	fHasPassword = other.fHasPassword;
854	fHasHost = other.fHasHost;
855	fHasPort = other.fHasPort;
856	fHasPath = other.fHasPath;
857	fHasRequest = other.fHasRequest;
858	fHasFragment = other.fHasFragment;
859
860	return *this;
861}
862
863
864const BUrl&
865BUrl::operator=(const BString& string)
866{
867	SetUrlString(string);
868	return *this;
869}
870
871
872const BUrl&
873BUrl::operator=(const char* string)
874{
875	SetUrlString(string);
876	return *this;
877}
878
879
880// #pragma mark URL to string conversion
881
882
883BUrl::operator const char*() const
884{
885	return UrlString();
886}
887
888
889void
890BUrl::_ResetFields()
891{
892	fHasProtocol = false;
893	fHasUserName = false;
894	fHasPassword = false;
895	fHasHost = false;
896	fHasPort = false;
897	fHasPath = false;
898	fHasRequest = false;
899	fHasFragment = false;
900
901	fProtocol.Truncate(0);
902	fUser.Truncate(0);
903	fPassword.Truncate(0);
904	fHost.Truncate(0);
905	fPort = 0;
906	fPath.Truncate(0);
907	fRequest.Truncate(0);
908	fFragment.Truncate(0);
909
910	// Force re-generation of these fields
911	fUrlStringValid = false;
912	fUserInfoValid = false;
913	fAuthorityValid = false;
914}
915
916
917bool
918BUrl::_ContainsDelimiter(const BString& url)
919{
920	int32 len = url.Length();
921
922	for (int32 i = 0; i < len; i++) {
923		switch (url[i]) {
924			case ' ':
925			case '\n':
926			case '\t':
927			case '\r':
928			case '<':
929			case '>':
930			case '"':
931				return true;
932		}
933	}
934
935	return false;
936}
937
938
939enum explode_url_parse_state {
940	EXPLODE_PROTOCOL,
941	EXPLODE_PROTOCOLTERMINATOR,
942	EXPLODE_AUTHORITYORPATH,
943	EXPLODE_AUTHORITY,
944	EXPLODE_PATH,
945	EXPLODE_REQUEST, // query
946	EXPLODE_FRAGMENT,
947	EXPLODE_COMPLETE
948};
949
950
951typedef bool (*explode_char_match_fn)(char c);
952
953
954static bool
955explode_is_protocol_char(char c)
956{
957	return isalnum(c) || c == '+' || c == '.' || c == '-';
958}
959
960
961static bool
962explode_is_authority_char(char c)
963{
964	return !(c == '/' || c == '?' || c == '#');
965}
966
967
968static bool
969explode_is_path_char(char c)
970{
971	return !(c == '#' || c == '?');
972}
973
974
975static bool
976explode_is_request_char(char c)
977{
978	return c != '#';
979}
980
981
982static int32
983char_offset_until_fn_false(const char* url, int32 len, int32 offset,
984	explode_char_match_fn fn)
985{
986	while (offset < len && fn(url[offset]))
987		offset++;
988
989	return offset;
990}
991
992/*
993 * This function takes a URL in string-form and parses the components of the URL out.
994 */
995status_t
996BUrl::_ExplodeUrlString(const BString& url, uint32 flags)
997{
998	_ResetFields();
999
1000	// RFC3986, Appendix C; the URL should not contain whitespace or delimiters
1001	// by this point.
1002
1003	if (_ContainsDelimiter(url))
1004		return B_BAD_VALUE;
1005
1006	explode_url_parse_state state = EXPLODE_PROTOCOL;
1007	int32 offset = 0;
1008	int32 length = url.Length();
1009	bool forceHasHost = false;
1010	const char *url_c = url.String();
1011
1012	// The regexp is provided in RFC3986 (URI generic syntax), Appendix B
1013	// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
1014	// The ensuing logic attempts to simulate the behaviour of extracting the groups
1015	// from the string without requiring a group-capable regex engine.
1016
1017	while (offset < length) {
1018		switch (state) {
1019
1020			case EXPLODE_PROTOCOL:
1021			{
1022				int32 end_protocol = char_offset_until_fn_false(url_c, length,
1023					offset, explode_is_protocol_char);
1024
1025				if (end_protocol < length) {
1026					SetProtocol(BString(&url_c[offset], end_protocol - offset));
1027					state = EXPLODE_PROTOCOLTERMINATOR;
1028					offset = end_protocol;
1029				} else {
1030					// No protocol was found, try parsing from the string
1031					// start, beginning with authority or path
1032					SetProtocol("");
1033					offset = 0;
1034					state = EXPLODE_AUTHORITYORPATH;
1035				}
1036				break;
1037			}
1038
1039			case EXPLODE_PROTOCOLTERMINATOR:
1040			{
1041				if (url[offset] == ':') {
1042					offset++;
1043				} else {
1044					// No protocol was found, try parsing from the string
1045					// start, beginning with authority or path
1046					SetProtocol("");
1047					offset = 0;
1048				}
1049				state = EXPLODE_AUTHORITYORPATH;
1050				break;
1051			}
1052
1053			case EXPLODE_AUTHORITYORPATH:
1054			{
1055				// The authority must start with //. If it isn't there, skip
1056				// to parsing the path.
1057				if (strncmp(&url_c[offset], "//", 2) == 0) {
1058					state = EXPLODE_AUTHORITY;
1059					// if we see the // then this would imply that a host is
1060					// to be rendered even if no host has been parsed.
1061					forceHasHost = true;
1062					offset += 2;
1063				} else {
1064					state = EXPLODE_PATH;
1065				}
1066				break;
1067			}
1068
1069			case EXPLODE_AUTHORITY:
1070			{
1071				int end_authority = char_offset_until_fn_false(url_c, length,
1072					offset, explode_is_authority_char);
1073				SetAuthority(BString(&url_c[offset], end_authority - offset));
1074				state = EXPLODE_PATH;
1075				offset = end_authority;
1076				break;
1077			}
1078
1079			case EXPLODE_PATH:
1080			{
1081				int end_path = char_offset_until_fn_false(url_c, length, offset,
1082					explode_is_path_char);
1083				BString path(&url_c[offset], end_path - offset);
1084
1085				if ((flags & PARSE_RAW_PATH_MASK_BIT) == 0)
1086					SetPath(path);
1087				else
1088					_SetPathUnsafe(path);
1089				state = EXPLODE_REQUEST;
1090				offset = end_path;
1091				break;
1092			}
1093
1094			case EXPLODE_REQUEST: // query
1095			{
1096				if (url_c[offset] == '?') {
1097					offset++;
1098					int end_request = char_offset_until_fn_false(url_c, length,
1099						offset, explode_is_request_char);
1100					SetRequest(BString(&url_c[offset], end_request - offset));
1101					offset = end_request;
1102					// if there is a "?" in the parse then it is clear that
1103					// there is a 'request' / query present regardless if there
1104					// are any valid key-value pairs.
1105					fHasRequest = true;
1106				}
1107				state = EXPLODE_FRAGMENT;
1108				break;
1109			}
1110
1111			case EXPLODE_FRAGMENT:
1112			{
1113				if (url_c[offset] == '#') {
1114					offset++;
1115					SetFragment(BString(&url_c[offset], length - offset));
1116					offset = length;
1117				}
1118				state = EXPLODE_COMPLETE;
1119				break;
1120			}
1121
1122			case EXPLODE_COMPLETE:
1123				// should never be reached - keeps the compiler happy
1124				break;
1125
1126		}
1127	}
1128
1129	if (forceHasHost)
1130		fHasHost = true;
1131
1132	return B_OK;
1133}
1134
1135
1136BString
1137BUrl::_MergePath(const BString& relative) const
1138{
1139	// This implements RFC3986, Section 5.2.3.
1140	if (HasAuthority() && fPath == "") {
1141		BString result("/");
1142		result << relative;
1143		return result;
1144	}
1145
1146	int32 lastSlashIndex = fPath.FindLast("/");
1147
1148	if (lastSlashIndex == B_ERROR)
1149		return relative;
1150
1151	BString result;
1152	result.SetTo(fPath, lastSlashIndex + 1);
1153	result << relative;
1154
1155	return result;
1156}
1157
1158
1159// This sets the path without normalizing it. If fed with a path that has . or
1160// .. segments, this would make the URL invalid.
1161void
1162BUrl::_SetPathUnsafe(const BString& path)
1163{
1164	fPath = path;
1165	fHasPath = true; // RFC says an empty path is still a path
1166	fUrlStringValid = false;
1167}
1168
1169
1170enum authority_parse_state {
1171	AUTHORITY_USERNAME,
1172	AUTHORITY_PASSWORD,
1173	AUTHORITY_HOST,
1174	AUTHORITY_PORT,
1175	AUTHORITY_COMPLETE
1176};
1177
1178void
1179BUrl::SetAuthority(const BString& authority)
1180{
1181	fAuthority = authority;
1182
1183	fUser.Truncate(0);
1184	fPassword.Truncate(0);
1185	fHost.Truncate(0);
1186	fPort = 0;
1187	fHasPort = false;
1188	fHasUserName = false;
1189	fHasPassword = false;
1190
1191	bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@');
1192	authority_parse_state state = AUTHORITY_USERNAME;
1193	int32 offset = 0;
1194	int32 length = authority.Length();
1195	const char *authority_c = authority.String();
1196
1197	while (AUTHORITY_COMPLETE != state && offset < length) {
1198
1199		switch (state) {
1200
1201			case AUTHORITY_USERNAME:
1202			{
1203				if (hasUsernamePassword) {
1204					int32 end_username = char_offset_until_fn_false(
1205						authority_c, length, offset, _IsUsernameChar);
1206
1207					SetUserName(BString(&authority_c[offset],
1208						end_username - offset));
1209
1210					state = AUTHORITY_PASSWORD;
1211					offset = end_username;
1212				} else {
1213					state = AUTHORITY_HOST;
1214				}
1215				break;
1216			}
1217
1218			case AUTHORITY_PASSWORD:
1219			{
1220				if (hasUsernamePassword && ':' == authority[offset]) {
1221					offset++; // move past the delimiter
1222					int32 end_password = char_offset_until_fn_false(
1223						authority_c, length, offset, _IsPasswordChar);
1224
1225					SetPassword(BString(&authority_c[offset],
1226						end_password - offset));
1227
1228					offset = end_password;
1229				}
1230
1231				// if the host was preceded by a username + password couple
1232				// then there will be an '@' delimiter to avoid.
1233
1234				if (authority_c[offset] == '@') {
1235					offset++;
1236				}
1237
1238				state = AUTHORITY_HOST;
1239				break;
1240			}
1241
1242			case AUTHORITY_HOST:
1243			{
1244
1245				// the host may be enclosed within brackets in order to express
1246				// an IPV6 address.
1247
1248				if (authority_c[offset] == '[') {
1249					int32 end_ipv6_host = char_offset_until_fn_false(
1250						authority_c, length, offset + 1, _IsIPV6Char);
1251
1252					if (authority_c[end_ipv6_host] == ']') {
1253						SetHost(BString(&authority_c[offset],
1254							(end_ipv6_host - offset) + 1));
1255						state = AUTHORITY_PORT;
1256						offset = end_ipv6_host + 1;
1257					}
1258				}
1259
1260				// if an IPV6 host was not found.
1261
1262				if (AUTHORITY_HOST == state) {
1263					int32 end_host = char_offset_until_fn_false(
1264						authority_c, length, offset, _IsHostChar);
1265
1266					SetHost(BString(&authority_c[offset], end_host - offset));
1267					state = AUTHORITY_PORT;
1268					offset = end_host;
1269				}
1270
1271				break;
1272			}
1273
1274			case AUTHORITY_PORT:
1275			{
1276				if (authority_c[offset] == ':') {
1277					offset++;
1278					int32 end_port = char_offset_until_fn_false(
1279						authority_c, length, offset, _IsPortChar);
1280					SetPort(atoi(&authority_c[offset]));
1281					offset = end_port;
1282				}
1283
1284				state = AUTHORITY_COMPLETE;
1285
1286				break;
1287			}
1288
1289			case AUTHORITY_COMPLETE:
1290				// should never be reached - keeps the compiler happy
1291				break;
1292		}
1293	}
1294
1295	// An empty authority is still an authority, making it possible to have
1296	// URLs such as file:///path/to/file.
1297	// TODO however, there is no way to unset the authority once it is set...
1298	// We may want to take a const char* parameter and allow NULL.
1299	fHasHost = true;
1300}
1301
1302
1303/*static*/ BString
1304BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
1305{
1306	BString result;
1307
1308	for (int32 i = 0; i < chunk.Length(); i++) {
1309		if (_IsUnreserved(chunk[i])
1310				|| (directory && (chunk[i] == '/' || chunk[i] == '\\'))) {
1311			result << chunk[i];
1312		} else {
1313			if (chunk[i] == ' ' && !strict) {
1314				result << '+';
1315					// In non-strict mode, spaces are encoded by a plus sign
1316			} else {
1317				char hexString[5];
1318				snprintf(hexString, 5, "%X", chunk[i]);
1319
1320				result << '%' << hexString;
1321			}
1322		}
1323	}
1324
1325	return result;
1326}
1327
1328
1329/*static*/ BString
1330BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
1331{
1332	BString result;
1333
1334	for (int32 i = 0; i < chunk.Length(); i++) {
1335		if (chunk[i] == '+' && !strict)
1336			result << ' ';
1337		else {
1338			char decoded = 0;
1339			char* out = NULL;
1340			char hexString[3];
1341
1342			if (chunk[i] == '%' && i < chunk.Length() - 2
1343				&& isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) {
1344				hexString[0] = chunk[i + 1];
1345				hexString[1] = chunk[i + 2];
1346				hexString[2] = 0;
1347				decoded = (char)strtol(hexString, &out, 16);
1348			}
1349
1350			if (out == hexString + 2) {
1351				i += 2;
1352				result << decoded;
1353			} else
1354				result << chunk[i];
1355		}
1356	}
1357	return result;
1358}
1359
1360
1361bool
1362BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const
1363{
1364	for (int32 i = 0; i < length; i++) {
1365		char c = fHost[offset + i];
1366		if (!_IsIPV6Char(c))
1367			return false;
1368	}
1369
1370	return length > 0;
1371}
1372
1373
1374bool
1375BUrl::_IsHostValid() const
1376{
1377	if (fHost.StartsWith("[") && fHost.EndsWith("]"))
1378		return _IsHostIPV6Valid(1, fHost.Length() - 2);
1379
1380	bool lastWasDot = false;
1381
1382	for (int32 i = 0; i < fHost.Length(); i++) {
1383		char c = fHost[i];
1384
1385		if (c == '.') {
1386			if (lastWasDot || i == 0)
1387				return false;
1388			lastWasDot = true;
1389		} else {
1390			lastWasDot = false;
1391		}
1392
1393		if (!_IsHostChar(c) && c != '.') {
1394			// the underscore is technically not allowed, but occurs sometimes
1395			// in the wild.
1396			return false;
1397		}
1398	}
1399
1400	return true;
1401}
1402
1403
1404bool
1405BUrl::_IsProtocolValid() const
1406{
1407	for (int8 index = 0; index < fProtocol.Length(); index++) {
1408		char c = fProtocol[index];
1409
1410		if (index == 0 && !isalpha(c))
1411			return false;
1412		else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
1413			return false;
1414	}
1415
1416	return !fProtocol.IsEmpty();
1417}
1418
1419
1420bool
1421BUrl::_IsUnreserved(char c)
1422{
1423	return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~';
1424}
1425
1426
1427bool
1428BUrl::_IsGenDelim(char c)
1429{
1430	return c == ':' || c == '/' || c == '?' || c == '#' || c == '['
1431		|| c == ']' || c == '@';
1432}
1433
1434
1435bool
1436BUrl::_IsSubDelim(char c)
1437{
1438	return c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
1439		|| c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
1440		|| c == '=';
1441}
1442
1443
1444bool
1445BUrl::_IsUsernameChar(char c)
1446{
1447	return !(c == ':' || c == '@');
1448}
1449
1450
1451bool
1452BUrl::_IsPasswordChar(char c)
1453{
1454	return !(c == '@');
1455}
1456
1457
1458bool
1459BUrl::_IsHostChar(char c)
1460{
1461	return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.'
1462		|| c == '%';
1463}
1464
1465
1466bool
1467BUrl::_IsPortChar(char c)
1468{
1469	return isdigit(c);
1470}
1471
1472
1473bool
1474BUrl::_IsIPV6Char(char c)
1475{
1476	return c == ':' || isxdigit(c);
1477}
1478
1479
1480BString
1481BUrl::_UrlMimeType() const
1482{
1483	BString mime;
1484	mime << "application/x-vnd.Be.URL." << fProtocol;
1485
1486	return BString(mime);
1487}
1488