uri.cpp@ 58067

Last change on this file since 58067 was 58067, checked in by vboxsync, 10 years ago
IPRT: Added RTUriFileCreateEx and made RTUriFileCreate assume OS specific input path like it was supposed to. Fixed DOS slash flipping.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 31.5 KB

Line
1	/* $Id: uri.cpp 58067 2015-10-06 20:52:48Z vboxsync $ */
2	/** @file
3	* IPRT - Uniform Resource Identifier handling.
4	*/
5
6	/*
7	* Copyright (C) 2011-2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.215389.xyz. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* The contents of this file may alternatively be used under the terms
18	* of the Common Development and Distribution License Version 1.0
19	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	* VirtualBox OSE distribution, in which case the provisions of the
21	* CDDL are applicable instead of those of the GPL.
22	*
23	* You may elect to license modified versions of this file under the
24	* terms and conditions of either the GPL or the CDDL or both.
25	*/
26
27
28	/*********************************************************************************************************************************
29	* Header Files *
30	*********************************************************************************************************************************/
31	#include <iprt/uri.h>
32
33	#include <iprt/assert.h>
34	#include <iprt/ctype.h>
35	#include <iprt/path.h>
36	#include <iprt/string.h>
37
38
39	/*********************************************************************************************************************************
40	* Defined Constants And Macros *
41	*********************************************************************************************************************************/
42	/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
43	#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
44
45
46	/* General URI format:
47
48	foo://example.com:8042/over/there?name=ferret#nose
49	\_/ \______________/\_________/ \_________/ \__/
50	\| \| \| \| \|
51	scheme authority path query fragment
52	\| _____________________\|__
53	/ \ / \
54	urn:example:animal:ferret:nose
55	*/
56
57
58	/**
59	* The following defines characters which have to be % escaped:
60	* control = 00-1F
61	* space = ' '
62	* delims = '<' , '>' , '#' , '%' , '"'
63	* unwise = '{' , '}' , '\|' , '\' , '^' , '[' , ']' , '`'
64	*/
65	#define URI_EXCLUDED(a) \
66	( ((a) >= 0x0 && (a) <= 0x20) \
67	\|\| ((a) >= 0x5B && (a) <= 0x5E) \
68	\|\| ((a) >= 0x7B && (a) <= 0x7D) \
69	\|\| (a) == '<' \|\| (a) == '>' \|\| (a) == '#' \
70	\|\| (a) == '%' \|\| (a) == '"' \|\| (a) == '`' )
71
72	static char rtUriPercentEncodeN(const char pszString, size_t cchMax)
73	{
74	if (!pszString)
75	return NULL;
76
77	int rc = VINF_SUCCESS;
78
79	size_t cbLen = RT_MIN(strlen(pszString), cchMax);
80	/* The new string can be max 3 times in size of the original string. */
81	char pszNew = RTStrAlloc(cbLen 3 + 1);
82	if (!pszNew)
83	return NULL;
84
85	char *pszRes = NULL;
86	size_t iIn = 0;
87	size_t iOut = 0;
88	while (iIn < cbLen)
89	{
90	if (URI_EXCLUDED(pszString[iIn]))
91	{
92	char szNum[3] = { 0, 0, 0 };
93	RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL \| RTSTR_F_ZEROPAD);
94	pszNew[iOut++] = '%';
95	pszNew[iOut++] = szNum[0];
96	pszNew[iOut++] = szNum[1];
97	}
98	else
99	pszNew[iOut++] = pszString[iIn++];
100	}
101	if (RT_SUCCESS(rc))
102	{
103	pszNew[iOut] = '\0';
104	if (iOut != iIn)
105	{
106	/* If the source and target strings have different size, recreate
107	* the target string with the correct size. */
108	pszRes = RTStrDupN(pszNew, iOut);
109	RTStrFree(pszNew);
110	}
111	else
112	pszRes = pszNew;
113	}
114	else
115	RTStrFree(pszNew);
116
117	return pszRes;
118	}
119
120
121	/**
122	* Calculates the encoded string length.
123	*
124	* @returns Number of chars (excluding the terminator).
125	* @param pszString The string to encode.
126	* @param cchMax The maximum string length (e.g. RTSTR_MAX).
127	* @param fEncodeDosSlash Whether to encode DOS slashes or not.
128	*/
129	static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
130	{
131	size_t cchEncoded = 0;
132	if (pszString)
133	{
134	size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
135	while (cchSrcLeft-- > 0)
136	{
137	char const ch = *pszString++;
138	if (!URI_EXCLUDED(ch) \|\| (ch == '\\' && !fEncodeDosSlash))
139	cchEncoded += 1;
140	else
141	cchEncoded += 3;
142	}
143	}
144	return cchEncoded;
145	}
146
147
148	/**
149	* Encodes an URI into a caller allocated buffer.
150	*
151	* @returns IPRT status code.
152	* @param pszString The string to encode.
153	* @param cchMax The maximum string length (e.g. RTSTR_MAX).
154	* @param fEncodeDosSlash Whether to encode DOS slashes or not.
155	* @param pszDst The destination buffer.
156	* @param cbDst The size of the destination buffer.
157	*/
158	static int rtUriEncodeIntoBuffer(const char pszString, size_t cchMax, bool fEncodeDosSlash, char pszDst, size_t cbDst)
159	{
160	AssertReturn(pszString, VERR_INVALID_POINTER);
161
162	/*
163	* We do buffer size checking up front and every time we encode a special
164	* character. That's faster than checking for each char.
165	*/
166	size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
167	AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
168	cbDst -= cchSrcLeft;
169
170	while (cchSrcLeft-- > 0)
171	{
172	char const ch = *pszString++;
173	if (!URI_EXCLUDED(ch) \|\| (ch == '\\' && !fEncodeDosSlash))
174	*pszDst++ = ch;
175	else
176	{
177	AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
178	cbDst -= 2;
179
180	*pszDst++ = '%';
181	ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL \| RTSTR_F_ZEROPAD);
182	Assert(cchTmp == 2); NOREF(cchTmp);
183	pszDst += 2;
184	}
185	}
186
187	*pszDst = '\0';
188	return VINF_SUCCESS;
189	}
190
191
192	static char rtUriPercentDecodeN(const char pszString, size_t cchString)
193	{
194	AssertPtrReturn(pszString, NULL);
195	AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
196
197	/*
198	* The new string can only get smaller, so use the input length as a
199	* staring buffer size.
200	*/
201	char *pszDecoded = RTStrAlloc(cchString + 1);
202	if (pszDecoded)
203	{
204	/*
205	* Knowing that the pszString itself is valid UTF-8, we only have to
206	* validate the escape sequences.
207	*/
208	size_t cchLeft = cchString;
209	char const *pchSrc = pszString;
210	char *pchDst = pszDecoded;
211	while (cchLeft > 0)
212	{
213	const char pchPct = (const char )memchr(pchSrc, '%', cchLeft);
214	if (pchPct)
215	{
216	size_t cchBefore = pchPct - pchSrc;
217	if (cchBefore)
218	{
219	memcpy(pchDst, pchSrc, cchBefore);
220	pchDst += cchBefore;
221	pchSrc += cchBefore;
222	cchLeft -= cchBefore;
223	}
224
225	char chHigh, chLow;
226	if ( cchLeft >= 3
227	&& RT_C_IS_XDIGIT(chHigh = pchSrc[1])
228	&& RT_C_IS_XDIGIT(chLow = pchSrc[2]))
229	{
230	uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
231	b <<= 4;
232	b \|= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
233	*pchDst++ = (char)b;
234	pchSrc += 3;
235	cchLeft -= 3;
236	}
237	else
238	{
239	AssertFailed();
240	pchDst++ = pchSrc++;
241	cchLeft--;
242	}
243	}
244	else
245	{
246	memcpy(pchDst, pchSrc, cchLeft);
247	pchDst += cchLeft;
248	pchSrc += cchLeft;
249	cchLeft = 0;
250	break;
251	}
252	}
253
254	*pchDst = '\0';
255
256	/*
257	* If we've got lof space room in the result string, reallocate it.
258	*/
259	size_t cchDecoded = pchDst - pszDecoded;
260	Assert(cchDecoded <= cchString);
261	// if (cchString - cchDecoded > 64) - enable later!
262	RTStrRealloc(&pszDecoded, cchDecoded + 1);
263	}
264	return pszDecoded;
265	}
266
267
268	static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
269	{
270	/*
271	* Validate the input and clear the output.
272	*/
273	AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
274	RT_ZERO(*pParsed);
275	pParsed->uAuthorityPort = UINT32_MAX;
276
277	AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
278
279	size_t const cchUri = strlen(pszUri);
280	if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
281	else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
282
283	/*
284	* Validating escaped text sequences is much simpler if we know that
285	* that the base URI string is valid. Also, we don't necessarily trust
286	* the developer calling us to remember to do this.
287	*/
288	int rc = RTStrValidateEncoding(pszUri);
289	AssertRCReturn(rc, rc);
290
291	/*
292	* RFC-3986, section 3.1:
293	* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
294	*
295	* The scheme ends with a ':', which we also skip here.
296	*/
297	size_t off = 0;
298	char ch = pszUri[off++];
299	if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
300	else return VERR_URI_INVALID_SCHEME;
301	for (;;)
302	{
303	ch = pszUri[off];
304	if (ch == ':')
305	break;
306	if (RT_LIKELY(RT_C_IS_ALNUM(ch) \|\| ch == '.' \|\| ch == '-' \|\| ch == '+')) { /* likely */ }
307	else return VERR_URI_INVALID_SCHEME;
308	off++;
309	}
310	pParsed->cchScheme = off;
311
312	/* Require the scheme length to be at least two chars so we won't confuse
313	it with a path starting with a DOS drive letter specification. */
314	if (RT_LIKELY(off >= 2)) { /* likely */ }
315	else return VERR_URI_INVALID_SCHEME;
316
317	off++; /* (skip colon) */
318
319	/*
320	* Find the end of the path, we'll need this several times.
321	* Also, while we're potentially scanning the whole thing, check for '%'.
322	*/
323	size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
324	size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
325
326	if (memchr(pszUri, '%', cchUri) != NULL)
327	pParsed->fFlags \|= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
328
329	/*
330	* RFC-3986, section 3.2:
331	* The authority component is preceeded by a double slash ("//")...
332	*/
333	if ( pszUri[off] == '/'
334	&& pszUri[off + 1] == '/')
335	{
336	off += 2;
337	pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
338	pParsed->fFlags \|= RTURIPARSED_F_HAVE_AUTHORITY;
339
340	/*
341	* RFC-3986, section 3.2:
342	* ...and is terminated by the next slash ("/"), question mark ("?"),
343	* or number sign ("#") character, or by the end of the URI.
344	*/
345	const char *pszAuthority = &pszUri[off];
346	size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
347	cchAuthority = RT_MIN(cchAuthority, offHash - off);
348	cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
349	pParsed->cchAuthority = cchAuthority;
350
351	/* The Authority can be empty, like for: file:///usr/bin/grep */
352	if (cchAuthority > 0)
353	{
354	pParsed->cchAuthorityHost = cchAuthority;
355
356	/*
357	* If there is a userinfo part, it is ended by a '@'.
358	*/
359	const char pszAt = (const char )memchr(pszAuthority, '@', cchAuthority);
360	if (pszAt)
361	{
362	size_t cchTmp = pszAt - pszAuthority;
363	pParsed->offAuthorityHost += cchTmp + 1;
364	pParsed->cchAuthorityHost -= cchTmp + 1;
365
366	/* If there is a password part, it's separated from the username with a colon. */
367	const char pszColon = (const char )memchr(pszAuthority, ':', cchTmp);
368	if (pszColon)
369	{
370	pParsed->cchAuthorityUsername = pszColon - pszAuthority;
371	pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
372	pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
373	}
374	else
375	{
376	pParsed->cchAuthorityUsername = cchTmp;
377	pParsed->offAuthorityPassword = off + cchTmp;
378	}
379	}
380
381	/*
382	* If there is a port part, its after the last colon in the host part.
383	*/
384	const char pszColon = (const char )memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
385	if (pszColon)
386	{
387	size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
388	pParsed->cchAuthorityHost -= cchTmp + 1;
389
390	pParsed->uAuthorityPort = 0;
391	while (cchTmp-- > 0)
392	{
393	ch = *++pszColon;
394	if ( RT_C_IS_DIGIT(ch)
395	&& pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
396	{
397	pParsed->uAuthorityPort *= 10;
398	pParsed->uAuthorityPort += ch - '0';
399	}
400	else
401	return VERR_URI_INVALID_PORT_NUMBER;
402	}
403	}
404	}
405
406	/* Skip past the authority. */
407	off += cchAuthority;
408	}
409	else
410	pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
411
412	/*
413	* RFC-3986, section 3.3: Path
414	* The path is terminated by the first question mark ("?")
415	* or number sign ("#") character, or by the end of the URI.
416	*/
417	pParsed->offPath = off;
418	pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
419	off += pParsed->cchPath;
420
421	/*
422	* RFC-3986, section 3.4: Query
423	* The query component is indicated by the first question mark ("?")
424	* character and terminated by a number sign ("#") character or by the
425	* end of the URI.
426	*/
427	if ( off == offQuestionMark
428	&& off < cchUri)
429	{
430	Assert(pszUri[offQuestionMark] == '?');
431	pParsed->offQuery = ++off;
432	pParsed->cchQuery = offHash - off;
433	off = offHash;
434	}
435	else
436	{
437	Assert(!pszUri[offQuestionMark]);
438	pParsed->offQuery = off;
439	}
440
441	/*
442	* RFC-3986, section 3.5: Fragment
443	* A fragment identifier component is indicated by the presence of a
444	* number sign ("#") character and terminated by the end of the URI.
445	*/
446	if ( off == offHash
447	&& off < cchUri)
448	{
449	pParsed->offFragment = ++off;
450	pParsed->cchFragment = cchUri - off;
451	}
452	else
453	{
454	Assert(!pszUri[offHash]);
455	pParsed->offFragment = off;
456	}
457
458	/*
459	* If there are any escape sequences, validate them.
460	*
461	* This is reasonably simple as we already know that the string is valid UTF-8
462	* before they get decoded. Thus we only have to validate the escaped sequences.
463	*/
464	if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
465	{
466	const char pchSrc = (const char )memchr(pszUri, '%', cchUri);
467	AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
468	do
469	{
470	char szUtf8Seq[8];
471	unsigned cchUtf8Seq = 0;
472	unsigned cchNeeded = 0;
473	size_t cchLeft = &pszUri[cchUri] - pchSrc;
474	do
475	{
476	if (cchLeft >= 3)
477	{
478	char chHigh = pchSrc[1];
479	char chLow = pchSrc[2];
480	if ( RT_C_IS_XDIGIT(chHigh)
481	&& RT_C_IS_XDIGIT(chLow))
482	{
483	uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
484	b <<= 4;
485	b \|= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
486
487	if (!(b & 0x80))
488	{
489	/* We don't want the string to be terminated prematurely. */
490	if (RT_LIKELY(b != 0)) { /* likely */ }
491	else return VERR_URI_ESCAPED_ZERO;
492
493	/* Check that we're not expecting more UTF-8 bytes. */
494	if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
495	else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
496	}
497	/* Are we waiting UTF-8 bytes? */
498	else if (cchNeeded > 0)
499	{
500	if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
501	else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
502
503	szUtf8Seq[cchUtf8Seq++] = (char)b;
504	if (--cchNeeded == 0)
505	{
506	szUtf8Seq[cchUtf8Seq] = '\0';
507	rc = RTStrValidateEncoding(szUtf8Seq);
508	if (RT_FAILURE(rc))
509	return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
510	cchUtf8Seq = 0;
511	}
512	}
513	/* Start a new UTF-8 sequence. */
514	else
515	{
516	if ((b & 0xf8) == 0xf0)
517	cchNeeded = 3;
518	else if ((b & 0xf0) == 0xe0)
519	cchNeeded = 2;
520	else if ((b & 0xe0) == 0xc0)
521	cchNeeded = 1;
522	else
523	return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
524	szUtf8Seq[0] = (char)b;
525	cchUtf8Seq = 1;
526	}
527	pchSrc += 3;
528	cchLeft -= 3;
529	}
530	else
531	return VERR_URI_INVALID_ESCAPE_SEQ;
532	}
533	else
534	return VERR_URI_INVALID_ESCAPE_SEQ;
535	} while (cchLeft > 0 && pchSrc[0] == '%');
536
537	/* Check that we're not expecting more UTF-8 bytes. */
538	if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
539	else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
540
541	/* next */
542	pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
543	} while (pchSrc);
544	}
545
546	pParsed->u32Magic = RTURIPARSED_MAGIC;
547	return VINF_SUCCESS;
548	}
549
550
551	RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
552	{
553	return rtUriParse(pszUri, pParsed);
554	}
555
556
557	RTDECL(char ) RTUriParsedScheme(const char pszUri, PCRTURIPARSED pParsed)
558	{
559	AssertPtrReturn(pszUri, NULL);
560	AssertPtrReturn(pParsed, NULL);
561	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
562	return RTStrDupN(pszUri, pParsed->cchScheme);
563	}
564
565
566	RTDECL(char ) RTUriParsedAuthority(const char pszUri, PCRTURIPARSED pParsed)
567	{
568	AssertPtrReturn(pszUri, NULL);
569	AssertPtrReturn(pParsed, NULL);
570	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
571	if (pParsed->cchAuthority \|\| (pParsed->fFlags & RTURIPARSED_F_HAVE_AUTHORITY))
572	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
573	return NULL;
574	}
575
576
577	RTDECL(char ) RTUriParsedAuthorityUsername(const char pszUri, PCRTURIPARSED pParsed)
578	{
579	AssertPtrReturn(pszUri, NULL);
580	AssertPtrReturn(pParsed, NULL);
581	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
582	if (pParsed->cchAuthorityUsername)
583	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
584	return NULL;
585	}
586
587
588	RTDECL(char ) RTUriParsedAuthorityPassword(const char pszUri, PCRTURIPARSED pParsed)
589	{
590	AssertPtrReturn(pszUri, NULL);
591	AssertPtrReturn(pParsed, NULL);
592	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
593	if (pParsed->cchAuthorityPassword)
594	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
595	return NULL;
596	}
597
598
599	RTDECL(char ) RTUriParsedAuthorityHost(const char pszUri, PCRTURIPARSED pParsed)
600	{
601	AssertPtrReturn(pszUri, NULL);
602	AssertPtrReturn(pParsed, NULL);
603	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
604	if (pParsed->cchAuthorityHost)
605	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
606	return NULL;
607	}
608
609
610	RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
611	{
612	AssertPtrReturn(pszUri, UINT32_MAX);
613	AssertPtrReturn(pParsed, UINT32_MAX);
614	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
615	return pParsed->uAuthorityPort;
616	}
617
618
619	RTDECL(char ) RTUriParsedPath(const char pszUri, PCRTURIPARSED pParsed)
620	{
621	AssertPtrReturn(pszUri, NULL);
622	AssertPtrReturn(pParsed, NULL);
623	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
624	if (pParsed->cchPath)
625	return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
626	return NULL;
627	}
628
629
630	RTDECL(char ) RTUriParsedQuery(const char pszUri, PCRTURIPARSED pParsed)
631	{
632	AssertPtrReturn(pszUri, NULL);
633	AssertPtrReturn(pParsed, NULL);
634	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
635	if (pParsed->cchQuery)
636	return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
637	return NULL;
638	}
639
640
641	RTDECL(char ) RTUriParsedFragment(const char pszUri, PCRTURIPARSED pParsed)
642	{
643	AssertPtrReturn(pszUri, NULL);
644	AssertPtrReturn(pParsed, NULL);
645	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
646	if (pParsed->cchFragment)
647	return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
648	return NULL;
649	}
650
651
652	RTDECL(char ) RTUriCreate(const char pszScheme, const char pszAuthority, const char pszPath, const char *pszQuery,
653	const char *pszFragment)
654	{
655	if (!pszScheme) /* Scheme is minimum requirement */
656	return NULL;
657
658	char *pszResult = 0;
659	char *pszAuthority1 = 0;
660	char *pszPath1 = 0;
661	char *pszQuery1 = 0;
662	char *pszFragment1 = 0;
663
664	do
665	{
666	/* Create the percent encoded strings and calculate the necessary uri
667	* length. */
668	size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
669	if (pszAuthority)
670	{
671	pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
672	if (!pszAuthority1)
673	break;
674	cbSize += strlen(pszAuthority1) + 2;
675	}
676	if (pszPath)
677	{
678	pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
679	if (!pszPath1)
680	break;
681	cbSize += strlen(pszPath1);
682	}
683	if (pszQuery)
684	{
685	pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
686	if (!pszQuery1)
687	break;
688	cbSize += strlen(pszQuery1) + 1;
689	}
690	if (pszFragment)
691	{
692	pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
693	if (!pszFragment1)
694	break;
695	cbSize += strlen(pszFragment1) + 1;
696	}
697
698	char pszTmp = pszResult = (char )RTStrAlloc(cbSize);
699	if (!pszResult)
700	break;
701	RT_BZERO(pszTmp, cbSize);
702
703	/* Compose the target uri string. */
704	RTStrCatP(&pszTmp, &cbSize, pszScheme);
705	RTStrCatP(&pszTmp, &cbSize, ":");
706	if (pszAuthority1)
707	{
708	RTStrCatP(&pszTmp, &cbSize, "//");
709	RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
710	}
711	if (pszPath1)
712	{
713	RTStrCatP(&pszTmp, &cbSize, pszPath1);
714	}
715	if (pszQuery1)
716	{
717	RTStrCatP(&pszTmp, &cbSize, "?");
718	RTStrCatP(&pszTmp, &cbSize, pszQuery1);
719	}
720	if (pszFragment1)
721	{
722	RTStrCatP(&pszTmp, &cbSize, "#");
723	RTStrCatP(&pszTmp, &cbSize, pszFragment1);
724	}
725	} while (0);
726
727	/* Cleanup */
728	if (pszAuthority1)
729	RTStrFree(pszAuthority1);
730	if (pszPath1)
731	RTStrFree(pszPath1);
732	if (pszQuery1)
733	RTStrFree(pszQuery1);
734	if (pszFragment1)
735	RTStrFree(pszFragment1);
736
737	return pszResult;
738	}
739
740
741	RTDECL(bool) RTUriIsSchemeMatch(const char pszUri, const char pszScheme)
742	{
743	AssertPtrReturn(pszUri, false);
744	size_t const cchScheme = strlen(pszScheme);
745	return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
746	&& pszUri[cchScheme] == ':';
747	}
748
749
750	RTDECL(int) RTUriFileCreateEx(const char pszPath, uint32_t fPathStyle, char ppszUri, size_t cbUri, size_t pcchUri)
751	{
752	/*
753	* Validate and adjust input. (RTPathParse check pszPath out for us)
754	*/
755	if (pcchUri)
756	{
757	AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
758	*pcchUri = ~(size_t)0;
759	}
760	AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
761	AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
762	if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
763	fPathStyle = RTPATH_STYLE;
764
765	/*
766	* Let the RTPath code parse the stuff (no reason to duplicate path parsing
767	* and get it slightly wrong here).
768	*/
769	RTPATHPARSED ParsedPath;
770	int rc = RTPathParse(pszPath, &ParsedPath, sizeof(ParsedPath), fPathStyle);
771	if (RT_SUCCESS(rc) \|\| rc == VERR_BUFFER_OVERFLOW)
772	{
773	/* Skip leading slashes. */
774	if (ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
775	{
776	if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
777	while (pszPath[0] == '/' \|\| pszPath[0] == '\\')
778	pszPath++;
779	else
780	while (pszPath[0] == '/')
781	pszPath++;
782	}
783	const size_t cchPath = strlen(pszPath);
784
785	/*
786	* Calculate the encoded length and figure destination buffering.
787	*/
788	static const char s_szPrefix[] = "file:///";
789	size_t const cchPrefix = sizeof(s_szPrefix) - (ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
790	size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
791
792	if (pcchUri)
793	*pcchUri = cchEncoded;
794
795	char *pszDst;
796	char *pszFreeMe = NULL;
797	if (!cbUri \|\| *ppszUri == NULL)
798	{
799	cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
800	*ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
801	AssertReturn(pszDst, VERR_NO_STR_MEMORY);
802	}
803	else if (cchEncoded < cbUri)
804	pszDst = *ppszUri;
805	else
806	return VERR_BUFFER_OVERFLOW;
807
808	/*
809	* Construct the URI.
810	*/
811	memcpy(pszDst, s_szPrefix, cchPrefix);
812	pszDst[cchPrefix] = '\0';
813	rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
814	if (RT_SUCCESS(rc))
815	{
816	Assert(strlen(pszDst) == cbUri - 1);
817	if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
818	RTPathChangeToUnixSlashes(pszDst, true /fForce/);
819	return VINF_SUCCESS;
820	}
821
822	AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
823	if (pszFreeMe)
824	RTStrFree(pszFreeMe);
825	}
826	return rc;
827	}
828
829
830	RTDECL(char ) RTUriFileCreate(const char pszPath)
831	{
832	char *pszUri = NULL;
833	int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /cbUri/, NULL /pcchUri/);
834	if (RT_SUCCESS(rc))
835	return pszUri;
836	return NULL;
837	}
838
839
840	RTDECL(char ) RTUriFilePath(const char pszUri, uint32_t uFormat)
841	{
842	return RTUriFileNPath(pszUri, uFormat, RTSTR_MAX);
843	}
844
845
846	RTDECL(char ) RTUriFileNPath(const char pszUri, uint32_t uFormat, size_t cchMax)
847	{
848	AssertPtrReturn(pszUri, NULL);
849	AssertReturn(uFormat == URI_FILE_FORMAT_AUTO \|\| uFormat == URI_FILE_FORMAT_UNIX \|\| uFormat == URI_FILE_FORMAT_WIN, NULL);
850
851	/* Auto is based on the current OS. */
852	if (uFormat == URI_FILE_FORMAT_AUTO)
853	#if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_OS2)
854	uFormat = URI_FILE_FORMAT_WIN;
855	#else
856	uFormat = URI_FILE_FORMAT_UNIX;
857	#endif
858
859	/* Check that this is a file URI. */
860	if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) != 0)
861	return NULL;
862
863	RTURIPARSED Parsed;
864	int rc = rtUriParse(pszUri, &Parsed);
865	if (RT_SUCCESS(rc))
866	{
867	/* No path detected? Take authority as path then. */
868	if (!Parsed.cchPath)
869	{
870	Parsed.cchPath = Parsed.cchAuthority;
871	Parsed.offPath = Parsed.offAuthority;
872	Parsed.cchAuthority = 0;
873	}
874	}
875
876	if ( RT_SUCCESS(rc)
877	&& Parsed.cchPath)
878	{
879	/*
880	* Calculate the size of the encoded result.
881	*/
882	size_t cbResult = 0;
883
884	/* Skip the leading slash if a DOS drive letter (e.g. "C:") is detected right after it. */
885	if ( Parsed.cchPath >= 3
886	&& pszUri[Parsed.offPath] == '/' /* Leading slash. */
887	&& RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) /* Drive letter. */
888	&& pszUri[Parsed.offPath + 2] == ':')
889	{
890	Parsed.offPath++;
891	Parsed.cchPath--;
892	}
893
894	/* Windows: Authority given? Include authority as part of UNC path */
895	if (uFormat == URI_FILE_FORMAT_WIN && Parsed.cchAuthority)
896	{
897	cbResult += 2; /* UNC slashes "\\". */
898	cbResult += Parsed.cchAuthority;
899	}
900
901	cbResult += Parsed.cchPath;
902	cbResult += 1; /* Zero termination. */
903
904	/*
905	* Compose encoded string.
906	*/
907	char *pszResult;
908	char *pszTmp = pszResult = RTStrAlloc(cbResult);
909	if (pszTmp)
910	{
911	size_t cbTmp = cbResult;
912
913	/* Windows: If an authority is given, add the required UNC prefix. */
914	if (uFormat == URI_FILE_FORMAT_WIN && Parsed.cchAuthority)
915	{
916	rc = RTStrCatP(&pszTmp, &cbTmp, "\\\\");
917	if (RT_SUCCESS(rc))
918	rc = RTStrCatPEx(&pszTmp, &cbTmp, &pszUri[Parsed.offAuthority], Parsed.cchAuthority);
919	}
920	if (RT_SUCCESS(rc))
921	rc = RTStrCatPEx(&pszTmp, &cbTmp, &pszUri[Parsed.offPath], Parsed.cchPath);
922	AssertRC(rc); /* Shall not happen! */
923	if (RT_SUCCESS(rc))
924	{
925	/*
926	* Decode the string and switch the slashes around the request way before returning.
927	*/
928	char pszPath = rtUriPercentDecodeN(pszResult, cbResult - 1 / Minus termination */);
929	if (pszPath)
930	{
931	RTStrFree(pszResult);
932
933	if (uFormat == URI_FILE_FORMAT_UNIX)
934	return RTPathChangeToUnixSlashes(pszPath, true);
935	Assert(uFormat == URI_FILE_FORMAT_WIN);
936	return RTPathChangeToDosSlashes(pszPath, true);
937	}
938
939	/* Failed. */
940	}
941	RTStrFree(pszResult);
942	}
943	}
944	return NULL;
945	}
946

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 58067

Download in other formats: