VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 58067

Last change on this file since 58067 was 58067, checked in by vboxsync, 10 years ago

IPRT: Added RTUriFileCreateEx and made RTUriFileCreate assume OS specific input path like it was supposed to. Fixed DOS slash flipping.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 31.5 KB
Line 
1/* $Id: uri.cpp 58067 2015-10-06 20:52:48Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/uri.h>
32
33#include <iprt/assert.h>
34#include <iprt/ctype.h>
35#include <iprt/path.h>
36#include <iprt/string.h>
37
38
39/*********************************************************************************************************************************
40* Defined Constants And Macros *
41*********************************************************************************************************************************/
42/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
43#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
44
45
46/* General URI format:
47
48 foo://example.com:8042/over/there?name=ferret#nose
49 \_/ \______________/\_________/ \_________/ \__/
50 | | | | |
51 scheme authority path query fragment
52 | _____________________|__
53 / \ / \
54 urn:example:animal:ferret:nose
55*/
56
57
58/**
59 * The following defines characters which have to be % escaped:
60 * control = 00-1F
61 * space = ' '
62 * delims = '<' , '>' , '#' , '%' , '"'
63 * unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
64 */
65#define URI_EXCLUDED(a) \
66 ( ((a) >= 0x0 && (a) <= 0x20) \
67 || ((a) >= 0x5B && (a) <= 0x5E) \
68 || ((a) >= 0x7B && (a) <= 0x7D) \
69 || (a) == '<' || (a) == '>' || (a) == '#' \
70 || (a) == '%' || (a) == '"' || (a) == '`' )
71
72static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
73{
74 if (!pszString)
75 return NULL;
76
77 int rc = VINF_SUCCESS;
78
79 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
80 /* The new string can be max 3 times in size of the original string. */
81 char *pszNew = RTStrAlloc(cbLen * 3 + 1);
82 if (!pszNew)
83 return NULL;
84
85 char *pszRes = NULL;
86 size_t iIn = 0;
87 size_t iOut = 0;
88 while (iIn < cbLen)
89 {
90 if (URI_EXCLUDED(pszString[iIn]))
91 {
92 char szNum[3] = { 0, 0, 0 };
93 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
94 pszNew[iOut++] = '%';
95 pszNew[iOut++] = szNum[0];
96 pszNew[iOut++] = szNum[1];
97 }
98 else
99 pszNew[iOut++] = pszString[iIn++];
100 }
101 if (RT_SUCCESS(rc))
102 {
103 pszNew[iOut] = '\0';
104 if (iOut != iIn)
105 {
106 /* If the source and target strings have different size, recreate
107 * the target string with the correct size. */
108 pszRes = RTStrDupN(pszNew, iOut);
109 RTStrFree(pszNew);
110 }
111 else
112 pszRes = pszNew;
113 }
114 else
115 RTStrFree(pszNew);
116
117 return pszRes;
118}
119
120
121/**
122 * Calculates the encoded string length.
123 *
124 * @returns Number of chars (excluding the terminator).
125 * @param pszString The string to encode.
126 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
127 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
128 */
129static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
130{
131 size_t cchEncoded = 0;
132 if (pszString)
133 {
134 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
135 while (cchSrcLeft-- > 0)
136 {
137 char const ch = *pszString++;
138 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
139 cchEncoded += 1;
140 else
141 cchEncoded += 3;
142 }
143 }
144 return cchEncoded;
145}
146
147
148/**
149 * Encodes an URI into a caller allocated buffer.
150 *
151 * @returns IPRT status code.
152 * @param pszString The string to encode.
153 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
154 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
155 * @param pszDst The destination buffer.
156 * @param cbDst The size of the destination buffer.
157 */
158static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst)
159{
160 AssertReturn(pszString, VERR_INVALID_POINTER);
161
162 /*
163 * We do buffer size checking up front and every time we encode a special
164 * character. That's faster than checking for each char.
165 */
166 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
167 AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
168 cbDst -= cchSrcLeft;
169
170 while (cchSrcLeft-- > 0)
171 {
172 char const ch = *pszString++;
173 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
174 *pszDst++ = ch;
175 else
176 {
177 AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
178 cbDst -= 2;
179
180 *pszDst++ = '%';
181 ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
182 Assert(cchTmp == 2); NOREF(cchTmp);
183 pszDst += 2;
184 }
185 }
186
187 *pszDst = '\0';
188 return VINF_SUCCESS;
189}
190
191
192static char *rtUriPercentDecodeN(const char *pszString, size_t cchString)
193{
194 AssertPtrReturn(pszString, NULL);
195 AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
196
197 /*
198 * The new string can only get smaller, so use the input length as a
199 * staring buffer size.
200 */
201 char *pszDecoded = RTStrAlloc(cchString + 1);
202 if (pszDecoded)
203 {
204 /*
205 * Knowing that the pszString itself is valid UTF-8, we only have to
206 * validate the escape sequences.
207 */
208 size_t cchLeft = cchString;
209 char const *pchSrc = pszString;
210 char *pchDst = pszDecoded;
211 while (cchLeft > 0)
212 {
213 const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft);
214 if (pchPct)
215 {
216 size_t cchBefore = pchPct - pchSrc;
217 if (cchBefore)
218 {
219 memcpy(pchDst, pchSrc, cchBefore);
220 pchDst += cchBefore;
221 pchSrc += cchBefore;
222 cchLeft -= cchBefore;
223 }
224
225 char chHigh, chLow;
226 if ( cchLeft >= 3
227 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
228 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
229 {
230 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
231 b <<= 4;
232 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
233 *pchDst++ = (char)b;
234 pchSrc += 3;
235 cchLeft -= 3;
236 }
237 else
238 {
239 AssertFailed();
240 *pchDst++ = *pchSrc++;
241 cchLeft--;
242 }
243 }
244 else
245 {
246 memcpy(pchDst, pchSrc, cchLeft);
247 pchDst += cchLeft;
248 pchSrc += cchLeft;
249 cchLeft = 0;
250 break;
251 }
252 }
253
254 *pchDst = '\0';
255
256 /*
257 * If we've got lof space room in the result string, reallocate it.
258 */
259 size_t cchDecoded = pchDst - pszDecoded;
260 Assert(cchDecoded <= cchString);
261 // if (cchString - cchDecoded > 64) - enable later!
262 RTStrRealloc(&pszDecoded, cchDecoded + 1);
263 }
264 return pszDecoded;
265}
266
267
268static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
269{
270 /*
271 * Validate the input and clear the output.
272 */
273 AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
274 RT_ZERO(*pParsed);
275 pParsed->uAuthorityPort = UINT32_MAX;
276
277 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
278
279 size_t const cchUri = strlen(pszUri);
280 if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
281 else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
282
283 /*
284 * Validating escaped text sequences is much simpler if we know that
285 * that the base URI string is valid. Also, we don't necessarily trust
286 * the developer calling us to remember to do this.
287 */
288 int rc = RTStrValidateEncoding(pszUri);
289 AssertRCReturn(rc, rc);
290
291 /*
292 * RFC-3986, section 3.1:
293 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
294 *
295 * The scheme ends with a ':', which we also skip here.
296 */
297 size_t off = 0;
298 char ch = pszUri[off++];
299 if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
300 else return VERR_URI_INVALID_SCHEME;
301 for (;;)
302 {
303 ch = pszUri[off];
304 if (ch == ':')
305 break;
306 if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ }
307 else return VERR_URI_INVALID_SCHEME;
308 off++;
309 }
310 pParsed->cchScheme = off;
311
312 /* Require the scheme length to be at least two chars so we won't confuse
313 it with a path starting with a DOS drive letter specification. */
314 if (RT_LIKELY(off >= 2)) { /* likely */ }
315 else return VERR_URI_INVALID_SCHEME;
316
317 off++; /* (skip colon) */
318
319 /*
320 * Find the end of the path, we'll need this several times.
321 * Also, while we're potentially scanning the whole thing, check for '%'.
322 */
323 size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
324 size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
325
326 if (memchr(pszUri, '%', cchUri) != NULL)
327 pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
328
329 /*
330 * RFC-3986, section 3.2:
331 * The authority component is preceeded by a double slash ("//")...
332 */
333 if ( pszUri[off] == '/'
334 && pszUri[off + 1] == '/')
335 {
336 off += 2;
337 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
338 pParsed->fFlags |= RTURIPARSED_F_HAVE_AUTHORITY;
339
340 /*
341 * RFC-3986, section 3.2:
342 * ...and is terminated by the next slash ("/"), question mark ("?"),
343 * or number sign ("#") character, or by the end of the URI.
344 */
345 const char *pszAuthority = &pszUri[off];
346 size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
347 cchAuthority = RT_MIN(cchAuthority, offHash - off);
348 cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
349 pParsed->cchAuthority = cchAuthority;
350
351 /* The Authority can be empty, like for: file:///usr/bin/grep */
352 if (cchAuthority > 0)
353 {
354 pParsed->cchAuthorityHost = cchAuthority;
355
356 /*
357 * If there is a userinfo part, it is ended by a '@'.
358 */
359 const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority);
360 if (pszAt)
361 {
362 size_t cchTmp = pszAt - pszAuthority;
363 pParsed->offAuthorityHost += cchTmp + 1;
364 pParsed->cchAuthorityHost -= cchTmp + 1;
365
366 /* If there is a password part, it's separated from the username with a colon. */
367 const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp);
368 if (pszColon)
369 {
370 pParsed->cchAuthorityUsername = pszColon - pszAuthority;
371 pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
372 pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
373 }
374 else
375 {
376 pParsed->cchAuthorityUsername = cchTmp;
377 pParsed->offAuthorityPassword = off + cchTmp;
378 }
379 }
380
381 /*
382 * If there is a port part, its after the last colon in the host part.
383 */
384 const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
385 if (pszColon)
386 {
387 size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
388 pParsed->cchAuthorityHost -= cchTmp + 1;
389
390 pParsed->uAuthorityPort = 0;
391 while (cchTmp-- > 0)
392 {
393 ch = *++pszColon;
394 if ( RT_C_IS_DIGIT(ch)
395 && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
396 {
397 pParsed->uAuthorityPort *= 10;
398 pParsed->uAuthorityPort += ch - '0';
399 }
400 else
401 return VERR_URI_INVALID_PORT_NUMBER;
402 }
403 }
404 }
405
406 /* Skip past the authority. */
407 off += cchAuthority;
408 }
409 else
410 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
411
412 /*
413 * RFC-3986, section 3.3: Path
414 * The path is terminated by the first question mark ("?")
415 * or number sign ("#") character, or by the end of the URI.
416 */
417 pParsed->offPath = off;
418 pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
419 off += pParsed->cchPath;
420
421 /*
422 * RFC-3986, section 3.4: Query
423 * The query component is indicated by the first question mark ("?")
424 * character and terminated by a number sign ("#") character or by the
425 * end of the URI.
426 */
427 if ( off == offQuestionMark
428 && off < cchUri)
429 {
430 Assert(pszUri[offQuestionMark] == '?');
431 pParsed->offQuery = ++off;
432 pParsed->cchQuery = offHash - off;
433 off = offHash;
434 }
435 else
436 {
437 Assert(!pszUri[offQuestionMark]);
438 pParsed->offQuery = off;
439 }
440
441 /*
442 * RFC-3986, section 3.5: Fragment
443 * A fragment identifier component is indicated by the presence of a
444 * number sign ("#") character and terminated by the end of the URI.
445 */
446 if ( off == offHash
447 && off < cchUri)
448 {
449 pParsed->offFragment = ++off;
450 pParsed->cchFragment = cchUri - off;
451 }
452 else
453 {
454 Assert(!pszUri[offHash]);
455 pParsed->offFragment = off;
456 }
457
458 /*
459 * If there are any escape sequences, validate them.
460 *
461 * This is reasonably simple as we already know that the string is valid UTF-8
462 * before they get decoded. Thus we only have to validate the escaped sequences.
463 */
464 if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
465 {
466 const char *pchSrc = (const char *)memchr(pszUri, '%', cchUri);
467 AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
468 do
469 {
470 char szUtf8Seq[8];
471 unsigned cchUtf8Seq = 0;
472 unsigned cchNeeded = 0;
473 size_t cchLeft = &pszUri[cchUri] - pchSrc;
474 do
475 {
476 if (cchLeft >= 3)
477 {
478 char chHigh = pchSrc[1];
479 char chLow = pchSrc[2];
480 if ( RT_C_IS_XDIGIT(chHigh)
481 && RT_C_IS_XDIGIT(chLow))
482 {
483 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
484 b <<= 4;
485 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
486
487 if (!(b & 0x80))
488 {
489 /* We don't want the string to be terminated prematurely. */
490 if (RT_LIKELY(b != 0)) { /* likely */ }
491 else return VERR_URI_ESCAPED_ZERO;
492
493 /* Check that we're not expecting more UTF-8 bytes. */
494 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
495 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
496 }
497 /* Are we waiting UTF-8 bytes? */
498 else if (cchNeeded > 0)
499 {
500 if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
501 else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
502
503 szUtf8Seq[cchUtf8Seq++] = (char)b;
504 if (--cchNeeded == 0)
505 {
506 szUtf8Seq[cchUtf8Seq] = '\0';
507 rc = RTStrValidateEncoding(szUtf8Seq);
508 if (RT_FAILURE(rc))
509 return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
510 cchUtf8Seq = 0;
511 }
512 }
513 /* Start a new UTF-8 sequence. */
514 else
515 {
516 if ((b & 0xf8) == 0xf0)
517 cchNeeded = 3;
518 else if ((b & 0xf0) == 0xe0)
519 cchNeeded = 2;
520 else if ((b & 0xe0) == 0xc0)
521 cchNeeded = 1;
522 else
523 return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
524 szUtf8Seq[0] = (char)b;
525 cchUtf8Seq = 1;
526 }
527 pchSrc += 3;
528 cchLeft -= 3;
529 }
530 else
531 return VERR_URI_INVALID_ESCAPE_SEQ;
532 }
533 else
534 return VERR_URI_INVALID_ESCAPE_SEQ;
535 } while (cchLeft > 0 && pchSrc[0] == '%');
536
537 /* Check that we're not expecting more UTF-8 bytes. */
538 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
539 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
540
541 /* next */
542 pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
543 } while (pchSrc);
544 }
545
546 pParsed->u32Magic = RTURIPARSED_MAGIC;
547 return VINF_SUCCESS;
548}
549
550
551RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
552{
553 return rtUriParse(pszUri, pParsed);
554}
555
556
557RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed)
558{
559 AssertPtrReturn(pszUri, NULL);
560 AssertPtrReturn(pParsed, NULL);
561 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
562 return RTStrDupN(pszUri, pParsed->cchScheme);
563}
564
565
566RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed)
567{
568 AssertPtrReturn(pszUri, NULL);
569 AssertPtrReturn(pParsed, NULL);
570 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
571 if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAVE_AUTHORITY))
572 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
573 return NULL;
574}
575
576
577RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed)
578{
579 AssertPtrReturn(pszUri, NULL);
580 AssertPtrReturn(pParsed, NULL);
581 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
582 if (pParsed->cchAuthorityUsername)
583 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
584 return NULL;
585}
586
587
588RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed)
589{
590 AssertPtrReturn(pszUri, NULL);
591 AssertPtrReturn(pParsed, NULL);
592 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
593 if (pParsed->cchAuthorityPassword)
594 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
595 return NULL;
596}
597
598
599RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed)
600{
601 AssertPtrReturn(pszUri, NULL);
602 AssertPtrReturn(pParsed, NULL);
603 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
604 if (pParsed->cchAuthorityHost)
605 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
606 return NULL;
607}
608
609
610RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
611{
612 AssertPtrReturn(pszUri, UINT32_MAX);
613 AssertPtrReturn(pParsed, UINT32_MAX);
614 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
615 return pParsed->uAuthorityPort;
616}
617
618
619RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed)
620{
621 AssertPtrReturn(pszUri, NULL);
622 AssertPtrReturn(pParsed, NULL);
623 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
624 if (pParsed->cchPath)
625 return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
626 return NULL;
627}
628
629
630RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed)
631{
632 AssertPtrReturn(pszUri, NULL);
633 AssertPtrReturn(pParsed, NULL);
634 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
635 if (pParsed->cchQuery)
636 return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
637 return NULL;
638}
639
640
641RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed)
642{
643 AssertPtrReturn(pszUri, NULL);
644 AssertPtrReturn(pParsed, NULL);
645 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
646 if (pParsed->cchFragment)
647 return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
648 return NULL;
649}
650
651
652RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery,
653 const char *pszFragment)
654{
655 if (!pszScheme) /* Scheme is minimum requirement */
656 return NULL;
657
658 char *pszResult = 0;
659 char *pszAuthority1 = 0;
660 char *pszPath1 = 0;
661 char *pszQuery1 = 0;
662 char *pszFragment1 = 0;
663
664 do
665 {
666 /* Create the percent encoded strings and calculate the necessary uri
667 * length. */
668 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
669 if (pszAuthority)
670 {
671 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
672 if (!pszAuthority1)
673 break;
674 cbSize += strlen(pszAuthority1) + 2;
675 }
676 if (pszPath)
677 {
678 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
679 if (!pszPath1)
680 break;
681 cbSize += strlen(pszPath1);
682 }
683 if (pszQuery)
684 {
685 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
686 if (!pszQuery1)
687 break;
688 cbSize += strlen(pszQuery1) + 1;
689 }
690 if (pszFragment)
691 {
692 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
693 if (!pszFragment1)
694 break;
695 cbSize += strlen(pszFragment1) + 1;
696 }
697
698 char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize);
699 if (!pszResult)
700 break;
701 RT_BZERO(pszTmp, cbSize);
702
703 /* Compose the target uri string. */
704 RTStrCatP(&pszTmp, &cbSize, pszScheme);
705 RTStrCatP(&pszTmp, &cbSize, ":");
706 if (pszAuthority1)
707 {
708 RTStrCatP(&pszTmp, &cbSize, "//");
709 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
710 }
711 if (pszPath1)
712 {
713 RTStrCatP(&pszTmp, &cbSize, pszPath1);
714 }
715 if (pszQuery1)
716 {
717 RTStrCatP(&pszTmp, &cbSize, "?");
718 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
719 }
720 if (pszFragment1)
721 {
722 RTStrCatP(&pszTmp, &cbSize, "#");
723 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
724 }
725 } while (0);
726
727 /* Cleanup */
728 if (pszAuthority1)
729 RTStrFree(pszAuthority1);
730 if (pszPath1)
731 RTStrFree(pszPath1);
732 if (pszQuery1)
733 RTStrFree(pszQuery1);
734 if (pszFragment1)
735 RTStrFree(pszFragment1);
736
737 return pszResult;
738}
739
740
741RTDECL(bool) RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme)
742{
743 AssertPtrReturn(pszUri, false);
744 size_t const cchScheme = strlen(pszScheme);
745 return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
746 && pszUri[cchScheme] == ':';
747}
748
749
750RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri)
751{
752 /*
753 * Validate and adjust input. (RTPathParse check pszPath out for us)
754 */
755 if (pcchUri)
756 {
757 AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
758 *pcchUri = ~(size_t)0;
759 }
760 AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
761 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
762 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
763 fPathStyle = RTPATH_STYLE;
764
765 /*
766 * Let the RTPath code parse the stuff (no reason to duplicate path parsing
767 * and get it slightly wrong here).
768 */
769 RTPATHPARSED ParsedPath;
770 int rc = RTPathParse(pszPath, &ParsedPath, sizeof(ParsedPath), fPathStyle);
771 if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW)
772 {
773 /* Skip leading slashes. */
774 if (ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
775 {
776 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
777 while (pszPath[0] == '/' || pszPath[0] == '\\')
778 pszPath++;
779 else
780 while (pszPath[0] == '/')
781 pszPath++;
782 }
783 const size_t cchPath = strlen(pszPath);
784
785 /*
786 * Calculate the encoded length and figure destination buffering.
787 */
788 static const char s_szPrefix[] = "file:///";
789 size_t const cchPrefix = sizeof(s_szPrefix) - (ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
790 size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
791
792 if (pcchUri)
793 *pcchUri = cchEncoded;
794
795 char *pszDst;
796 char *pszFreeMe = NULL;
797 if (!cbUri || *ppszUri == NULL)
798 {
799 cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
800 *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
801 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
802 }
803 else if (cchEncoded < cbUri)
804 pszDst = *ppszUri;
805 else
806 return VERR_BUFFER_OVERFLOW;
807
808 /*
809 * Construct the URI.
810 */
811 memcpy(pszDst, s_szPrefix, cchPrefix);
812 pszDst[cchPrefix] = '\0';
813 rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
814 if (RT_SUCCESS(rc))
815 {
816 Assert(strlen(pszDst) == cbUri - 1);
817 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
818 RTPathChangeToUnixSlashes(pszDst, true /*fForce*/);
819 return VINF_SUCCESS;
820 }
821
822 AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
823 if (pszFreeMe)
824 RTStrFree(pszFreeMe);
825 }
826 return rc;
827}
828
829
830RTDECL(char *) RTUriFileCreate(const char *pszPath)
831{
832 char *pszUri = NULL;
833 int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/);
834 if (RT_SUCCESS(rc))
835 return pszUri;
836 return NULL;
837}
838
839
840RTDECL(char *) RTUriFilePath(const char *pszUri, uint32_t uFormat)
841{
842 return RTUriFileNPath(pszUri, uFormat, RTSTR_MAX);
843}
844
845
846RTDECL(char *) RTUriFileNPath(const char *pszUri, uint32_t uFormat, size_t cchMax)
847{
848 AssertPtrReturn(pszUri, NULL);
849 AssertReturn(uFormat == URI_FILE_FORMAT_AUTO || uFormat == URI_FILE_FORMAT_UNIX || uFormat == URI_FILE_FORMAT_WIN, NULL);
850
851 /* Auto is based on the current OS. */
852 if (uFormat == URI_FILE_FORMAT_AUTO)
853#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
854 uFormat = URI_FILE_FORMAT_WIN;
855#else
856 uFormat = URI_FILE_FORMAT_UNIX;
857#endif
858
859 /* Check that this is a file URI. */
860 if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) != 0)
861 return NULL;
862
863 RTURIPARSED Parsed;
864 int rc = rtUriParse(pszUri, &Parsed);
865 if (RT_SUCCESS(rc))
866 {
867 /* No path detected? Take authority as path then. */
868 if (!Parsed.cchPath)
869 {
870 Parsed.cchPath = Parsed.cchAuthority;
871 Parsed.offPath = Parsed.offAuthority;
872 Parsed.cchAuthority = 0;
873 }
874 }
875
876 if ( RT_SUCCESS(rc)
877 && Parsed.cchPath)
878 {
879 /*
880 * Calculate the size of the encoded result.
881 */
882 size_t cbResult = 0;
883
884 /* Skip the leading slash if a DOS drive letter (e.g. "C:") is detected right after it. */
885 if ( Parsed.cchPath >= 3
886 && pszUri[Parsed.offPath] == '/' /* Leading slash. */
887 && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) /* Drive letter. */
888 && pszUri[Parsed.offPath + 2] == ':')
889 {
890 Parsed.offPath++;
891 Parsed.cchPath--;
892 }
893
894 /* Windows: Authority given? Include authority as part of UNC path */
895 if (uFormat == URI_FILE_FORMAT_WIN && Parsed.cchAuthority)
896 {
897 cbResult += 2; /* UNC slashes "\\". */
898 cbResult += Parsed.cchAuthority;
899 }
900
901 cbResult += Parsed.cchPath;
902 cbResult += 1; /* Zero termination. */
903
904 /*
905 * Compose encoded string.
906 */
907 char *pszResult;
908 char *pszTmp = pszResult = RTStrAlloc(cbResult);
909 if (pszTmp)
910 {
911 size_t cbTmp = cbResult;
912
913 /* Windows: If an authority is given, add the required UNC prefix. */
914 if (uFormat == URI_FILE_FORMAT_WIN && Parsed.cchAuthority)
915 {
916 rc = RTStrCatP(&pszTmp, &cbTmp, "\\\\");
917 if (RT_SUCCESS(rc))
918 rc = RTStrCatPEx(&pszTmp, &cbTmp, &pszUri[Parsed.offAuthority], Parsed.cchAuthority);
919 }
920 if (RT_SUCCESS(rc))
921 rc = RTStrCatPEx(&pszTmp, &cbTmp, &pszUri[Parsed.offPath], Parsed.cchPath);
922 AssertRC(rc); /* Shall not happen! */
923 if (RT_SUCCESS(rc))
924 {
925 /*
926 * Decode the string and switch the slashes around the request way before returning.
927 */
928 char *pszPath = rtUriPercentDecodeN(pszResult, cbResult - 1 /* Minus termination */);
929 if (pszPath)
930 {
931 RTStrFree(pszResult);
932
933 if (uFormat == URI_FILE_FORMAT_UNIX)
934 return RTPathChangeToUnixSlashes(pszPath, true);
935 Assert(uFormat == URI_FILE_FORMAT_WIN);
936 return RTPathChangeToDosSlashes(pszPath, true);
937 }
938
939 /* Failed. */
940 }
941 RTStrFree(pszResult);
942 }
943 }
944 return NULL;
945}
946
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette