1/*
2 * Copyright 2010 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Christophe Huriaux, c.huriaux@gmail.com
7 */
8
9
10#include <ctype.h>
11#include <cstdio>
12#include <cstdlib>
13#include <new>
14
15#include <Url.h>
16
17static const char* kArchivedUrl = "be:url string";
18
19
20BUrl::BUrl(const char* url)
21	:
22	fUrlString(),
23	fProtocol(),
24	fUser(),
25	fPassword(),
26	fHost(),
27	fPort(0),
28	fPath(),
29	fRequest(),
30	fHasAuthority(false)
31{
32	SetUrlString(url);
33}
34
35
36BUrl::BUrl(BMessage* archive)
37	:
38	fUrlString(),
39	fProtocol(),
40	fUser(),
41	fPassword(),
42	fHost(),
43	fPort(0),
44	fPath(),
45	fRequest(),
46	fHasAuthority(false)
47{
48	BString url;
49
50	if (archive->FindString(kArchivedUrl, &url) == B_OK)
51		SetUrlString(url);
52}
53
54
55BUrl::BUrl(const BUrl& other)
56	:
57	BArchivable(),
58	fUrlString(),
59	fProtocol(),
60	fUser(),
61	fPassword(),
62	fHost(),
63	fPort(0),
64	fPath(),
65	fRequest(),
66	fHasAuthority(false)
67{
68	*this = other;
69}
70
71
72BUrl::BUrl()
73	:
74	fUrlString(),
75	fProtocol(),
76	fUser(),
77	fPassword(),
78	fHost(),
79	fPort(0),
80	fPath(),
81	fRequest(),
82	fHasAuthority(false)
83{
84}
85
86
87BUrl::~BUrl()
88{
89}
90
91
92// #pragma mark URL fields modifiers
93
94
95BUrl&
96BUrl::SetUrlString(const BString& url)
97{
98	_ExplodeUrlString(url);
99	return *this;
100}
101
102
103BUrl&
104BUrl::SetProtocol(const BString& protocol)
105{
106	fProtocol = protocol;
107	fHasProtocol = true;
108	fUrlStringValid = false;
109	return *this;
110}
111
112
113BUrl&
114BUrl::SetUserName(const BString& user)
115{
116	fUser = user;
117	fHasUserName = true;
118	fUrlStringValid = false;
119	fAuthorityValid = false;
120	fUserInfoValid = false;
121	return *this;
122}
123
124
125BUrl&
126BUrl::SetPassword(const BString& password)
127{
128	fPassword = password;
129	fHasPassword = true;
130	fUrlStringValid = false;
131	fAuthorityValid = false;
132	fUserInfoValid = false;
133	return *this;
134}
135
136
137BUrl&
138BUrl::SetHost(const BString& host)
139{
140	fHost = host;
141	fHasHost = true;
142	fUrlStringValid = false;
143	fAuthorityValid = false;
144	return *this;
145}
146
147
148BUrl&
149BUrl::SetPort(int port)
150{
151	fPort = port;
152	fHasPort = true;
153	fUrlStringValid = false;
154	fAuthorityValid = false;
155	return *this;
156}
157
158
159BUrl&
160BUrl::SetPath(const BString& path)
161{
162	fPath = path;
163	fHasPath = true;
164	fUrlStringValid = false;
165	return *this;
166}
167
168
169BUrl&
170BUrl::SetRequest(const BString& request)
171{
172	fRequest = request;
173	fHasRequest = true;
174	fUrlStringValid = false;
175	return *this;
176}
177
178
179BUrl&
180BUrl::SetFragment(const BString& fragment)
181{
182	fFragment = fragment;
183	fHasFragment = true;
184	fUrlStringValid = false;
185	return *this;
186}
187
188
189// #pragma mark URL fields access
190
191
192const BString&
193BUrl::UrlString() const
194{
195	if (!fUrlStringValid) {
196		fUrlString.Truncate(0);
197
198		if (HasProtocol()) {
199			fUrlString << fProtocol << ':';
200			if (HasAuthority())
201				fUrlString << "//";
202		}
203
204		fUrlString << Authority();
205		fUrlString << Path();
206
207		if (HasRequest())
208			fUrlString << '?' << fRequest;
209
210		if (HasFragment())
211			fUrlString << '#' << fFragment;
212
213		fUrlStringValid = true;
214	}
215
216	return fUrlString;
217}
218
219
220const BString&
221BUrl::Protocol() const
222{
223	return fProtocol;
224}
225
226
227const BString&
228BUrl::UserName() const
229{
230	return fUser;
231}
232
233
234const BString&
235BUrl::Password() const
236{
237	return fPassword;
238}
239
240
241const BString&
242BUrl::UserInfo() const
243{
244	if (!fUserInfoValid) {
245		fUserInfo = fUser;
246
247		if (HasPassword())
248			fUserInfo << ':' << fPassword;
249
250		fUserInfoValid = true;
251	}
252
253	return fUserInfo;
254}
255
256
257const BString&
258BUrl::Host() const
259{
260	return fHost;
261}
262
263
264int
265BUrl::Port() const
266{
267	return fPort;
268}
269
270
271const BString&
272BUrl::Authority() const
273{
274	if (!fAuthorityValid) {
275		fAuthority.Truncate(0);
276
277		if (HasUserInfo())
278			fAuthority << UserInfo() << '@';
279		fAuthority << Host();
280
281		if (HasPort())
282			fAuthority << ':' << fPort;
283
284		fAuthorityValid = true;
285	}
286	return fAuthority;
287}
288
289
290const BString&
291BUrl::Path() const
292{
293	return fPath;
294}
295
296
297const BString&
298BUrl::Request() const
299{
300	return fRequest;
301}
302
303
304const BString&
305BUrl::Fragment() const
306{
307	return fFragment;
308}
309
310
311// #pragma mark URL fields tests
312
313
314bool
315BUrl::IsValid() const
316{
317	// TODO
318	return false;
319}
320
321
322bool
323BUrl::HasProtocol() const
324{
325	return fHasProtocol;
326}
327
328
329bool
330BUrl::HasAuthority() const
331{
332	return fHasAuthority;
333}
334
335
336bool
337BUrl::HasUserName() const
338{
339	return fHasUserName;
340}
341
342
343bool
344BUrl::HasPassword() const
345{
346	return fHasPassword;
347}
348
349
350bool
351BUrl::HasUserInfo() const
352{
353	return fHasUserInfo;
354}
355
356
357bool
358BUrl::HasHost() const
359{
360	return fHasHost;
361}
362
363
364bool
365BUrl::HasPort() const
366{
367	return fHasPort;
368}
369
370
371bool
372BUrl::HasPath() const
373{
374	return fHasPath;
375}
376
377
378bool
379BUrl::HasRequest() const
380{
381	return fHasRequest;
382}
383
384
385bool
386BUrl::HasFragment() const
387{
388	return fHasFragment;
389}
390
391
392// #pragma mark URL encoding/decoding of needed fields
393
394
395void
396BUrl::UrlEncode(bool strict)
397{
398	fUser = _DoUrlEncodeChunk(fUser, strict);
399	fPassword = _DoUrlEncodeChunk(fPassword, strict);
400	fHost = _DoUrlEncodeChunk(fHost, strict);
401	fFragment = _DoUrlEncodeChunk(fFragment, strict);
402	fPath = _DoUrlEncodeChunk(fPath, strict, true);
403}
404
405
406void
407BUrl::UrlDecode(bool strict)
408{
409	fUser = _DoUrlDecodeChunk(fUser, strict);
410	fPassword = _DoUrlDecodeChunk(fPassword, strict);
411	fHost = _DoUrlDecodeChunk(fHost, strict);
412	fFragment = _DoUrlDecodeChunk(fFragment, strict);
413	fPath = _DoUrlDecodeChunk(fPath, strict);
414}
415
416
417// #pragma mark Url encoding/decoding of string
418
419
420/*static*/ BString
421BUrl::UrlEncode(const BString& url, bool strict, bool directory)
422{
423	return _DoUrlEncodeChunk(url, strict, directory);
424}
425
426
427/*static*/ BString
428BUrl::UrlDecode(const BString& url, bool strict)
429{
430	return _DoUrlDecodeChunk(url, strict);
431}
432
433
434// #pragma mark BArchivable members
435
436
437status_t
438BUrl::Archive(BMessage* into, bool deep) const
439{
440	status_t ret = BArchivable::Archive(into, deep);
441
442	if (ret == B_OK)
443		ret = into->AddString(kArchivedUrl, UrlString());
444
445	return ret;
446}
447
448
449/*static*/ BArchivable*
450BUrl::Instantiate(BMessage* archive)
451{
452	if (validate_instantiation(archive, "BUrl"))
453		return new(std::nothrow) BUrl(archive);
454	return NULL;
455}
456
457
458// #pragma mark URL comparison
459
460
461bool
462BUrl::operator==(BUrl& other) const
463{
464	UrlString();
465	other.UrlString();
466
467	return fUrlString == other.fUrlString;
468}
469
470
471bool
472BUrl::operator!=(BUrl& other) const
473{
474	return !(*this == other);
475}
476
477
478// #pragma mark URL assignment
479
480
481const BUrl&
482BUrl::operator=(const BUrl& other)
483{
484	fUrlStringValid		= other.fUrlStringValid;
485	if (fUrlStringValid)
486		fUrlString			= other.fUrlString;
487
488	fAuthorityValid		= other.fAuthorityValid;
489	if (fAuthorityValid)
490		fAuthority			= other.fAuthority;
491
492	fUserInfoValid		= other.fUserInfoValid;
493	if (fUserInfoValid)
494		fUserInfo			= other.fUserInfo;
495
496	fProtocol			= other.fProtocol;
497	fUser				= other.fUser;
498	fPassword			= other.fPassword;
499	fHost				= other.fHost;
500	fPort				= other.fPort;
501	fPath				= other.fPath;
502	fRequest			= other.fRequest;
503	fFragment			= other.fFragment;
504
505	fHasProtocol		= other.fHasProtocol;
506	fHasUserName		= other.fHasUserName;
507	fHasPassword		= other.fHasPassword;
508	fHasUserInfo		= other.fHasUserInfo;
509	fHasHost			= other.fHasHost;
510	fHasPort			= other.fHasPort;
511	fHasAuthority		= other.fHasAuthority;
512	fHasPath			= other.fHasPath;
513	fHasRequest			= other.fHasRequest;
514	fHasFragment		= other.fHasFragment;
515
516	return *this;
517}
518
519
520const BUrl&
521BUrl::operator=(const BString& string)
522{
523	SetUrlString(string);
524	return *this;
525}
526
527
528const BUrl&
529BUrl::operator=(const char* string)
530{
531	SetUrlString(string);
532	return *this;
533}
534
535
536// #pragma mark URL to string conversion
537
538
539BUrl::operator const char*() const
540{
541	return UrlString();
542}
543
544
545void
546BUrl::_ResetFields()
547{
548	fHasProtocol = false;
549	fHasUserName = false;
550	fHasPassword = false;
551	fHasUserInfo = false;
552	fHasHost = false;
553	fHasPort = false;
554	fHasAuthority = false;
555	fHasPath = false;
556	fHasRequest = false;
557	fHasFragment = false;
558
559	fProtocol.Truncate(0);
560	fUser.Truncate(0);
561	fPassword.Truncate(0);
562	fHost.Truncate(0);
563	fPort = 0;
564	fPath.Truncate(0);
565	fRequest.Truncate(0);
566	fFragment.Truncate(0);
567
568	// Force re-generation of these fields
569	fUrlStringValid = false;
570	fUserInfoValid = false;
571	fAuthorityValid = false;
572}
573
574
575void
576BUrl::_ExplodeUrlString(const BString& url)
577{
578	int16 urlIndex = 0;
579
580	_ResetFields();
581
582	_ExtractProtocol(url, &urlIndex);
583	_ExtractAuthority(url, &urlIndex);
584	_ExtractPath(url, &urlIndex);
585	_ExtractRequestAndFragment(url, &urlIndex);
586}
587
588
589void
590BUrl::_ExtractProtocol(const BString& urlString, int16* origin)
591{
592	int16 firstColon = urlString.FindFirst(':', *origin);
593
594	// If no colon is found, assume the protocol
595	// is not present
596	if (firstColon == -1)
597		return;
598	else {
599		urlString.CopyInto(fProtocol, *origin, firstColon - *origin);
600		*origin = firstColon + 1;
601	}
602
603	if (!_IsProtocolValid()) {
604		fHasProtocol = false;
605		fProtocol.Truncate(0);
606	} else
607		fHasProtocol = true;
608}
609
610
611void
612BUrl::_ExtractAuthority(const BString& urlString, int16* origin)
613{
614	// URI doesn't contain an authority field
615	if (urlString.FindFirst("//", *origin) != *origin)
616		return;
617
618	fHasAuthority = true;
619
620//	while (urlString.ByteAt(*origin) == '/')
621//		(*origin)++;
622
623	(*origin) += 2;
624
625
626	int16 userInfoEnd = urlString.FindFirst('@', *origin);
627
628	// URL contains userinfo field
629	if (userInfoEnd != -1) {
630		BString userInfo;
631		urlString.CopyInto(userInfo, *origin, userInfoEnd - *origin);
632
633		int16 colonDelimiter = userInfo.FindFirst(':', 0);
634
635		if (colonDelimiter == *origin) {
636			fHasPassword = true;
637			fPassword = userInfo;
638		} else if (colonDelimiter != -1) {
639			fHasUserName = true;
640			fHasPassword = true;
641
642			userInfo.CopyInto(fUser, 0, colonDelimiter);
643			userInfo.CopyInto(fPassword, colonDelimiter + 1,
644				userInfo.Length() - colonDelimiter);
645		} else {
646			fHasUserName = true;
647			fUser = userInfo;
648		}
649
650		fHasUserInfo = true;
651		*origin = userInfoEnd + 1;
652	}
653
654
655	// Extract the host part
656	int16 hostEnd = *origin;
657
658	while (hostEnd < urlString.Length()
659		&& !_IsAuthorityTerminator(urlString.ByteAt(hostEnd))
660		&& urlString.ByteAt(hostEnd) != ':') {
661		hostEnd++;
662	}
663
664	// The host is likely to be present if an authority is
665	// defined, but in some weird cases, it's not.
666	if (hostEnd != *origin) {
667		urlString.CopyInto(fHost, *origin, hostEnd - *origin);
668
669		*origin = hostEnd;
670		fHasHost = true;
671	}
672
673	// Extract the port part
674	fPort = 0;
675	if (urlString.ByteAt(*origin) == ':') {
676		int16 portEnd = ++(*origin);
677
678		while (portEnd < urlString.Length()
679			&& !_IsAuthorityTerminator(urlString.ByteAt(portEnd)))
680			portEnd++;
681
682		BString portString;
683		urlString.CopyInto(portString, *origin, portEnd - *origin);
684		fPort = atoi(portString.String());
685
686		//  Even if the port is invalid, the URL is considered to
687		// have a port.
688		fHasPort = portString.Length() > 0;
689		*origin = portEnd;
690	}
691}
692
693
694void
695BUrl::_ExtractPath(const BString& urlString, int16* origin)
696{
697	// Extract path from URL
698	if (urlString.ByteAt(*origin) == '/' || !HasAuthority()) {
699		int16 pathEnd = *origin;
700
701		while (pathEnd < urlString.Length()
702				&& !_IsPathTerminator(urlString.ByteAt(pathEnd))) {
703				pathEnd++;
704		}
705
706		urlString.CopyInto(fPath, *origin, pathEnd - *origin);
707
708		*origin = pathEnd;
709		fHasPath = true;
710	}
711}
712
713
714void
715BUrl::_ExtractRequestAndFragment(const BString& urlString, int16* origin)
716{
717	// Extract request field from URL
718	if (urlString.ByteAt(*origin) == '?') {
719		(*origin)++;
720		int16 requestEnd = urlString.FindFirst('#', *origin);
721
722		fHasRequest = true;
723
724		if (requestEnd == -1) {
725			urlString.CopyInto(fRequest, *origin, urlString.Length() - *origin);
726			return;
727		} else {
728			urlString.CopyInto(fRequest, *origin, requestEnd - *origin);
729			*origin = requestEnd;
730		}
731	}
732
733	// Extract fragment field if needed
734	if (urlString.ByteAt(*origin) == '#') {
735		(*origin)++;
736		urlString.CopyInto(fFragment, *origin, urlString.Length() - *origin);
737
738		fHasFragment = true;
739	}
740}
741
742
743/*static*/ BString
744BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
745{
746	BString result;
747
748	for (int32 i = 0; i < chunk.Length(); i++) {
749		if (_IsUnreserved(chunk[i])
750			|| (directory && (chunk[i] == '/' || chunk[i] == '\\')))
751			result << chunk[i];
752		else {
753			if (chunk[i] == ' ' && !strict) {
754				result << '+';
755					// In non-strict mode, spaces are encoded by a plus sign
756			} else {
757				char hexString[5];
758				snprintf(hexString, 5, "%X", chunk[i]);
759
760				result << '%' << hexString;
761			}
762		}
763	}
764
765	return result;
766}
767
768
769/*static*/ BString
770BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
771{
772	BString result;
773
774	for (int32 i = 0; i < chunk.Length(); i++) {
775		if (chunk[i] == '+' && !strict)
776			result << ' ';
777		else if (chunk[i] != '%')
778			result << chunk[i];
779		else {
780			char hexString[] = { chunk[i+1], chunk[i+2], 0 };
781			result << (char)strtol(hexString, NULL, 16);
782
783			i += 2;
784		}
785	}
786	return result;
787}
788
789
790bool
791BUrl::_IsProtocolValid()
792{
793	for (int8 index = 0; index < fProtocol.Length(); index++) {
794		char c = fProtocol[index];
795
796		if (index == 0 && !isalpha(c))
797			return false;
798		else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
799			return false;
800	}
801
802	return true;
803}
804
805
806bool
807BUrl::_IsAuthorityTerminator(char c)
808{
809	if (c == '/' || _IsPathTerminator(c))
810		return true;
811	else
812		return false;
813}
814
815
816bool
817BUrl::_IsPathTerminator(char c)
818{
819	if (c == '?' || _IsRequestTerminator(c))
820		return true;
821	else
822		return false;
823}
824
825
826bool
827BUrl::_IsRequestTerminator(char c)
828{
829	if (c == '#')
830		return true;
831	else
832		return false;
833}
834
835
836bool
837BUrl::_IsUnreserved(char c)
838{
839	if (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~')
840		return true;
841	else
842		return false;
843}
844
845
846bool
847BUrl::_IsGenDelim(char c)
848{
849	if (c == ':' || c == '/' || c == '?' || c == '#' || c == '['
850		|| c == ']' || c == '@')
851		return true;
852	else
853		return false;
854}
855
856
857bool
858BUrl::_IsSubDelim(char c)
859{
860	if (c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
861		|| c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
862		|| c == '=')
863		return true;
864	else
865		return false;
866}
867