VirtualBox

source: vbox/trunk/src/bldprogs/scmparser.cpp@ 69213

Last change on this file since 69213 was 69213, checked in by vboxsync, 8 years ago

bldprogs: scm cleanups

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 33.5 KB
Line 
1/* $Id: scmparser.cpp 69213 2017-10-24 13:56:47Z vboxsync $ */
2/** @file
3 * IPRT Testcase / Tool - Source Code Massager, Code Parsers.
4 */
5
6/*
7 * Copyright (C) 2010-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include <iprt/assert.h>
23#include <iprt/ctype.h>
24#include <iprt/dir.h>
25#include <iprt/env.h>
26#include <iprt/file.h>
27#include <iprt/err.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/mem.h>
31#include <iprt/message.h>
32#include <iprt/param.h>
33#include <iprt/path.h>
34#include <iprt/process.h>
35#include <iprt/stream.h>
36#include <iprt/string.h>
37
38#include "scm.h"
39
40
41/*********************************************************************************************************************************
42* Structures and Typedefs *
43*********************************************************************************************************************************/
44typedef size_t (*PFNISCOMMENT)(const char *pchLine, size_t cchLine, bool fSecond);
45
46
47/**
48 * Callback for checking if C++ line comment.
49 */
50static size_t isCppLineComment(const char *pchLine, size_t cchLine, bool fSecond)
51{
52 if ( cchLine >= 2
53 && pchLine[0] == '/'
54 && pchLine[1] == '/')
55 {
56 if (!fSecond)
57 return 2;
58 if (cchLine >= 3 && pchLine[2] == '/')
59 return 3;
60 }
61 return 0;
62}
63
64
65/**
66 * Callback for checking if hash comment.
67 */
68static size_t isHashComment(const char *pchLine, size_t cchLine, bool fSecond)
69{
70 if (cchLine >= 1 && *pchLine == '#')
71 {
72 if (!fSecond)
73 return 1;
74 if (cchLine >= 2 && pchLine[1] == '#')
75 return 2;
76 }
77 return 0;
78}
79
80
81/**
82 * Callback for checking if semicolon comment.
83 */
84static size_t isSemicolonComment(const char *pchLine, size_t cchLine, bool fSecond)
85{
86 if (cchLine >= 1 && *pchLine == ';')
87 {
88 if (!fSecond)
89 return 1;
90 if (cchLine >= 2 && pchLine[1] == ';')
91 return 2;
92 }
93 return 0;
94}
95
96
97/** Macro for checking for a batch file comment prefix. */
98#define IS_REM(a_pch, a_off, a_cch) \
99 ( (a_off) + 3 <= (a_cch) \
100 && ((a_pch)[(a_off) ] == 'R' || (a_pch)[(a_off) ] == 'r') \
101 && ((a_pch)[(a_off) + 1] == 'E' || (a_pch)[(a_off) + 1] == 'e') \
102 && ((a_pch)[(a_off) + 2] == 'M' || (a_pch)[(a_off) + 2] == 'm') \
103 && ((a_off) + 3 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 3])) )
104
105
106/**
107 * Callback for checking if comment.
108 */
109static size_t isBatchComment(const char *pchLine, size_t cchLine, bool fSecond)
110{
111 if (!fSecond)
112 {
113 if (IS_REM(pchLine, 0, cchLine))
114 return 3;
115 }
116 else
117 {
118 /* Check for the 2nd in "rem rem" lines. */
119 if ( cchLine >= 4
120 && RT_C_IS_SPACE(*pchLine)
121 && IS_REM(pchLine, 1, cchLine))
122 return 4;
123 }
124 return 0;
125}
126
127
128/**
129 * Common worker for enumeratePythonComments and enumerateSimpleLineComments.
130 *
131 * @returns IPRT status code.
132 * @param pIn The input stream.
133 * @param pfnIsComment Comment tester function.
134 * @param pfnCallback The callback.
135 * @param pvUser The user argument for the callback.
136 * @param ppchLine Pointer to the line variable.
137 * @param pcchLine Pointer to the line length variable.
138 * @param penmEol Pointer to the line ending type variable.
139 * @param piLine Pointer to the line number variable.
140 * @param poff Pointer to the line offset variable. On input this
141 * is positioned at the start of the comment.
142 */
143static int handleLineComment(PSCMSTREAM pIn, PFNISCOMMENT pfnIsComment,
144 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser,
145 const char **ppchLine, size_t *pcchLine, PSCMEOL penmEol,
146 uint32_t *piLine, size_t *poff)
147{
148 /* Unpack input/output variables. */
149 uint32_t iLine = *piLine;
150 const char *pchLine = *ppchLine;
151 size_t cchLine = *pcchLine;
152 size_t off = *poff;
153 SCMEOL enmEol = *penmEol;
154
155 /*
156 * Take down the basic info about the comment.
157 */
158 SCMCOMMENTINFO Info;
159 Info.iLineStart = iLine;
160 Info.iLineEnd = iLine;
161 Info.offStart = (uint32_t)off;
162 Info.offEnd = (uint32_t)cchLine;
163
164 size_t cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false);
165 Assert(cchSkip > 0);
166 off += cchSkip;
167
168 /* Determin comment type. */
169 Info.enmType = kScmCommentType_Line;
170 char ch;
171 cchSkip = 1;
172 if ( off < cchLine
173 && ( (ch = pchLine[off]) == '!'
174 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, true)) > 0) )
175 {
176 unsigned ch2;
177 if ( off + cchSkip == cchLine
178 || RT_C_IS_SPACE(ch2 = pchLine[off + cchSkip]) )
179 {
180 Info.enmType = ch != '!' ? kScmCommentType_Line_JavaDoc : kScmCommentType_Line_Qt;
181 off += cchSkip;
182 }
183 else if ( ch2 == '<'
184 && ( off + cchSkip + 1 == cchLine
185 || RT_C_IS_SPACE(pchLine[off + cchSkip + 1]) ))
186 {
187 Info.enmType = ch == '!' ? kScmCommentType_Line_JavaDoc_After : kScmCommentType_Line_Qt_After;
188 off += cchSkip + 1;
189 }
190 }
191
192 /*
193 * Copy body of the first line. Like for C, we ignore a single space in the first comment line.
194 */
195 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
196 off++;
197 size_t cchBody = cchLine;
198 while (cchBody >= off && RT_C_IS_SPACE(pchLine[cchBody - 1]))
199 cchBody--;
200 cchBody -= off;
201 size_t cbBodyAlloc = RT_MAX(_1K, RT_ALIGN_Z(cchBody + 64, 128));
202 char *pszBody = (char *)RTMemAlloc(cbBodyAlloc);
203 if (!pszBody)
204 return VERR_NO_MEMORY;
205 memcpy(pszBody, &pchLine[off], cchBody);
206 pszBody[cchBody] = '\0';
207
208 Info.cBlankLinesBefore = cchBody == 0;
209
210 /*
211 * Look for more comment lines and append them to the body.
212 */
213 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
214 {
215 iLine++;
216
217 /* Skip leading spaces. */
218 off = 0;
219 while (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
220 off++;
221
222 /* Check if it's a comment. */
223 if ( off >= cchLine
224 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false)) == 0)
225 break;
226 off += cchSkip;
227
228 /* Split on doxygen comment start (if not already in one). */
229 if ( Info.enmType == kScmCommentType_Line
230 && off + 1 < cchLine
231 && ( pfnIsComment(&pchLine[off], cchLine - off, true) > 0
232 || ( pchLine[off + 1] == '!'
233 && ( off + 2 == cchLine
234 || pchLine[off + 2] != '!') ) ) )
235 {
236 off -= cchSkip;
237 break;
238 }
239
240 /* Append the body w/o trailing spaces and some leading ones. */
241 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
242 off++;
243 while (off < cchLine && off < Info.offStart + 3 && RT_C_IS_SPACE(pchLine[off]))
244 off++;
245 size_t cchAppend = cchLine;
246 while (cchAppend > off && RT_C_IS_SPACE(pchLine[cchAppend - 1]))
247 cchAppend--;
248 cchAppend -= off;
249
250 size_t cchNewBody = cchBody + 1 + cchAppend;
251 if (cchNewBody >= cbBodyAlloc)
252 {
253 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
254 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
255 if (pvNew)
256 pszBody = (char *)pvNew;
257 else
258 {
259 RTMemFree(pszBody);
260 return VERR_NO_MEMORY;
261 }
262 }
263
264 if ( cchBody > 0
265 || cchAppend > 0)
266 {
267 if (cchBody > 0)
268 pszBody[cchBody++] = '\n';
269 memcpy(&pszBody[cchBody], &pchLine[off], cchAppend);
270 cchBody += cchAppend;
271 pszBody[cchBody] = '\0';
272 }
273 else
274 Info.cBlankLinesBefore++;
275
276 /* Advance. */
277 Info.offEnd = (uint32_t)cchLine;
278 Info.iLineEnd = iLine;
279 }
280
281 /*
282 * Strip trailing empty lines in the body.
283 */
284 Info.cBlankLinesAfter = 0;
285 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
286 {
287 Info.cBlankLinesAfter++;
288 pszBody[--cchBody] = '\0';
289 }
290
291 /*
292 * Do the callback and return.
293 */
294 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
295
296 RTMemFree(pszBody);
297
298 *piLine = iLine;
299 *ppchLine = pchLine;
300 *pcchLine = cchLine;
301 *poff = off;
302 *penmEol = enmEol;
303 return rc;
304}
305
306
307
308/**
309 * Common string litteral handler.
310 *
311 * @returns new pchLine value.
312 * @param pIn The input string.
313 * @param chType The quotation type.
314 * @param pchLine The current line.
315 * @param ppchLine Pointer to the line variable.
316 * @param pcchLine Pointer to the line length variable.
317 * @param penmEol Pointer to the line ending type variable.
318 * @param piLine Pointer to the line number variable.
319 * @param poff Pointer to the line offset variable.
320 */
321static const char *handleStringLitteral(PSCMSTREAM pIn, char chType, const char *pchLine, size_t *pcchLine, PSCMEOL penmEol,
322 uint32_t *piLine, size_t *poff)
323{
324 size_t off = *poff;
325 for (;;)
326 {
327 bool fEnd = false;
328 bool fEscaped = false;
329 size_t const cchLine = *pcchLine;
330 while (off < cchLine)
331 {
332 char ch = pchLine[off++];
333 if (!fEscaped)
334 {
335 if (ch != chType)
336 {
337 if (ch != '\\')
338 { /* likely */ }
339 else
340 fEscaped = true;
341 }
342 else
343 {
344 fEnd = true;
345 break;
346 }
347 }
348 else
349 fEscaped = false;
350 }
351 if (fEnd)
352 break;
353
354 /* next line */
355 pchLine = ScmStreamGetLine(pIn, pcchLine, penmEol);
356 if (!pchLine)
357 break;
358 *piLine += 1;
359 off = 0;
360 }
361
362 *poff = off;
363 return pchLine;
364}
365
366
367/**
368 * Deals with comments in C and C++ code.
369 *
370 * @returns VBox status code / callback return code.
371 * @param pIn The stream to parse.
372 * @param pfnCallback The callback.
373 * @param pvUser The user parameter for the callback.
374 */
375static int enumerateCStyleComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
376{
377 AssertCompile('\'' < '/');
378 AssertCompile('"' < '/');
379
380 int rcRet = VINF_SUCCESS;
381 uint32_t iLine = 0;
382 SCMEOL enmEol;
383 size_t cchLine;
384 const char *pchLine;
385 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
386 {
387 size_t off = 0;
388 while (off < cchLine)
389 {
390 unsigned ch = pchLine[off++];
391 if (ch > (unsigned)'/')
392 { /* not interesting */ }
393 else if (ch == '/')
394 {
395 if (off < cchLine)
396 {
397 ch = pchLine[off++];
398 if (ch == '*')
399 {
400 /*
401 * Multiline comment. Find the end.
402 *
403 * Note! This is very similar to the python doc string handling further down.
404 */
405 SCMCOMMENTINFO Info;
406 Info.iLineStart = iLine;
407 Info.offStart = (uint32_t)off - 2;
408 Info.iLineEnd = UINT32_MAX;
409 Info.offEnd = UINT32_MAX;
410 Info.cBlankLinesBefore = 0;
411
412 /* Determin comment type (same as for line-comments). */
413 Info.enmType = kScmCommentType_MultiLine;
414 if ( off < cchLine
415 && ( (ch = pchLine[off]) == '*'
416 || ch == '!') )
417 {
418 unsigned ch2;
419 if ( off + 1 == cchLine
420 || RT_C_IS_SPACE(ch2 = pchLine[off + 1]) )
421 {
422 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc : kScmCommentType_MultiLine_Qt;
423 off += 1;
424 }
425 else if ( ch2 == '<'
426 && ( off + 2 == cchLine
427 || RT_C_IS_SPACE(pchLine[off + 2]) ))
428 {
429 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc_After
430 : kScmCommentType_MultiLine_Qt_After;
431 off += 2;
432 }
433 }
434
435 /*
436 * Copy the body and find the end of the multiline comment.
437 */
438 size_t cbBodyAlloc = 0;
439 size_t cchBody = 0;
440 char *pszBody = NULL;
441 for (;;)
442 {
443 /* Parse the line up to the end-of-comment or end-of-line. */
444 size_t offLineStart = off;
445 size_t offLastNonBlank = off;
446 size_t offFirstNonBlank = ~(size_t)0;
447 while (off < cchLine)
448 {
449 ch = pchLine[off++];
450 if (ch != '*' || off >= cchLine || pchLine[off] != '/')
451 {
452 if (RT_C_IS_BLANK(ch))
453 {/* kind of likely */}
454 else
455 {
456 offLastNonBlank = off - 1;
457 if (offFirstNonBlank != ~(size_t)0)
458 {/* likely */}
459 else if ( ch != '*' /* ignore continuation-asterisks */
460 || off > Info.offStart + 1 + 1
461 || off > cchLine
462 || ( off < cchLine
463 && !RT_C_IS_SPACE(pchLine[off]))
464 || pszBody == NULL)
465 offFirstNonBlank = off - 1;
466 }
467 }
468 else
469 {
470 Info.offEnd = (uint32_t)++off;
471 Info.iLineEnd = iLine;
472 break;
473 }
474 }
475
476 /* Append line content to the comment body string. */
477 size_t cchAppend;
478 if (offFirstNonBlank == ~(size_t)0)
479 cchAppend = 0; /* empty line */
480 else
481 {
482 if (pszBody)
483 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
484 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
485 offLineStart++;
486 cchAppend = offLastNonBlank + 1 - offLineStart;
487 Assert(cchAppend <= cchLine);
488 }
489
490 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
491 if (cchNewBody >= cbBodyAlloc)
492 {
493 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
494 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
495 if (pvNew)
496 pszBody = (char *)pvNew;
497 else
498 {
499 RTMemFree(pszBody);
500 return VERR_NO_MEMORY;
501 }
502 }
503
504 if (cchBody > 0) /* no leading blank lines */
505 pszBody[cchBody++] = '\n';
506 else if (cchAppend == 0)
507 Info.cBlankLinesBefore++;
508 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
509 cchBody += cchAppend;
510 pszBody[cchBody] = '\0';
511
512 /* Advance to the next line, if we haven't yet seen the end of this comment. */
513 if (Info.iLineEnd != UINT32_MAX)
514 break;
515 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
516 if (!pchLine)
517 {
518 Info.offEnd = (uint32_t)cchLine;
519 Info.iLineEnd = iLine;
520 break;
521 }
522 iLine++;
523 off = 0;
524 }
525
526 /* Strip trailing empty lines in the body. */
527 Info.cBlankLinesAfter = 0;
528 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
529 {
530 Info.cBlankLinesAfter++;
531 pszBody[--cchBody] = '\0';
532 }
533
534 /* Do the callback. */
535 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
536 RTMemFree(pszBody);
537 if (RT_FAILURE(rc))
538 return rc;
539 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
540 rcRet = rc;
541 }
542 else if (ch == '/')
543 {
544 /*
545 * Line comment. Join the other line comment guys.
546 */
547 off -= 2;
548 int rc = handleLineComment(pIn, isCppLineComment, pfnCallback, pvUser,
549 &pchLine, &cchLine, &enmEol, &iLine, &off);
550 if (RT_FAILURE(rc))
551 return rc;
552 if (rcRet == VINF_SUCCESS)
553 rcRet = rc;
554 }
555 }
556 }
557 else if (ch == '"')
558 {
559 /*
560 * String litterals may include sequences that looks like comments. So,
561 * they needs special handling to avoid confusion.
562 */
563 pchLine = handleStringLitteral(pIn, '"', pchLine, &cchLine, &enmEol, &iLine, &off);
564 }
565 /* else: We don't have to deal with character litterals as these shouldn't
566 include comment-like sequences. */
567 } /* for each character in the line */
568
569 iLine++;
570 } /* for each line in the stream */
571
572 int rcStream = ScmStreamGetStatus(pIn);
573 if (RT_SUCCESS(rcStream))
574 return rcRet;
575 return rcStream;
576}
577
578
579/**
580 * Deals with comments in Python code.
581 *
582 * @returns VBox status code / callback return code.
583 * @param pIn The stream to parse.
584 * @param pfnCallback The callback.
585 * @param pvUser The user parameter for the callback.
586 */
587static int enumeratePythonComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
588{
589 AssertCompile('#' < '\'');
590 AssertCompile('"' < '\'');
591
592 int rcRet = VINF_SUCCESS;
593 uint32_t iLine = 0;
594 SCMEOL enmEol;
595 size_t cchLine;
596 const char *pchLine;
597 SCMCOMMENTINFO Info;
598 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
599 {
600 size_t off = 0;
601 while (off < cchLine)
602 {
603 char ch = pchLine[off++];
604 if ((unsigned char)ch > (unsigned char)'\'')
605 { /* not interesting */ }
606 else if (ch == '#')
607 {
608 /*
609 * Line comment. Join paths with the others.
610 */
611 off -= 1;
612 int rc = handleLineComment(pIn, isHashComment, pfnCallback, pvUser,
613 &pchLine, &cchLine, &enmEol, &iLine, &off);
614 if (RT_FAILURE(rc))
615 return rc;
616 if (rcRet == VINF_SUCCESS)
617 rcRet = rc;
618 }
619 else if (ch == '"' || ch == '\'')
620 {
621 /*
622 * String litterals may be doc strings and they may legally include hashes.
623 */
624 const char chType = ch;
625 if ( off + 1 >= cchLine
626 || pchLine[off] != chType
627 || pchLine[off + 1] != chType)
628 pchLine = handleStringLitteral(pIn, chType, pchLine, &cchLine, &enmEol, &iLine, &off);
629 else
630 {
631 /*
632 * Doc string (/ long string).
633 *
634 * Note! This is very similar to the multiline C comment handling above.
635 */
636 Info.iLineStart = iLine;
637 Info.offStart = (uint32_t)off - 1;
638 Info.iLineEnd = UINT32_MAX;
639 Info.offEnd = UINT32_MAX;
640 Info.cBlankLinesBefore = 0;
641 Info.enmType = kScmCommentType_DocString;
642
643 off += 2;
644
645 /* Copy the body and find the end of the doc string comment. */
646 size_t cbBodyAlloc = 0;
647 size_t cchBody = 0;
648 char *pszBody = NULL;
649 for (;;)
650 {
651 /* Parse the line up to the end-of-comment or end-of-line. */
652 size_t offLineStart = off;
653 size_t offLastNonBlank = off;
654 size_t offFirstNonBlank = ~(size_t)0;
655 bool fEscaped = false;
656 while (off < cchLine)
657 {
658 ch = pchLine[off++];
659 if (!fEscaped)
660 {
661 if ( off + 1 >= cchLine
662 || ch != chType
663 || pchLine[off] != chType
664 || pchLine[off + 1] != chType)
665 {
666 if (RT_C_IS_BLANK(ch))
667 {/* kind of likely */}
668 else
669 {
670 offLastNonBlank = off - 1;
671 if (offFirstNonBlank != ~(size_t)0)
672 {/* likely */}
673 else if ( ch != '*' /* ignore continuation-asterisks */
674 || off > Info.offStart + 1 + 1
675 || off > cchLine
676 || ( off < cchLine
677 && !RT_C_IS_SPACE(pchLine[off]))
678 || pszBody == NULL)
679 offFirstNonBlank = off - 1;
680
681 if (ch != '\\')
682 {/* likely */ }
683 else
684 fEscaped = true;
685 }
686 }
687 else
688 {
689 off += 2;
690 Info.offEnd = (uint32_t)off;
691 Info.iLineEnd = iLine;
692 break;
693 }
694 }
695 else
696 fEscaped = false;
697 }
698
699 /* Append line content to the comment body string. */
700 size_t cchAppend;
701 if (offFirstNonBlank == ~(size_t)0)
702 cchAppend = 0; /* empty line */
703 else
704 {
705 if (pszBody)
706 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
707 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
708 offLineStart++;
709 cchAppend = offLastNonBlank + 1 - offLineStart;
710 Assert(cchAppend <= cchLine);
711 }
712
713 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
714 if (cchNewBody >= cbBodyAlloc)
715 {
716 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
717 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
718 if (pvNew)
719 pszBody = (char *)pvNew;
720 else
721 {
722 RTMemFree(pszBody);
723 return VERR_NO_MEMORY;
724 }
725 }
726
727 if (cchBody > 0) /* no leading blank lines */
728 pszBody[cchBody++] = '\n';
729 else if (cchAppend == 0)
730 Info.cBlankLinesBefore++;
731 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
732 cchBody += cchAppend;
733 pszBody[cchBody] = '\0';
734
735 /* Advance to the next line, if we haven't yet seen the end of this comment. */
736 if (Info.iLineEnd != UINT32_MAX)
737 break;
738 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
739 if (!pchLine)
740 {
741 Info.offEnd = (uint32_t)cchLine;
742 Info.iLineEnd = iLine;
743 break;
744 }
745 iLine++;
746 off = 0;
747 }
748
749 /* Strip trailing empty lines in the body. */
750 Info.cBlankLinesAfter = 0;
751 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
752 {
753 Info.cBlankLinesAfter++;
754 pszBody[--cchBody] = '\0';
755 }
756
757 /* Do the callback. */
758 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
759 RTMemFree(pszBody);
760 if (RT_FAILURE(rc))
761 return rc;
762 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
763 rcRet = rc;
764 }
765 }
766 /* else: We don't have to deal with character litterals as these shouldn't
767 include comment-like sequences. */
768 } /* for each character in the line */
769
770 iLine++;
771 } /* for each line in the stream */
772
773 int rcStream = ScmStreamGetStatus(pIn);
774 if (RT_SUCCESS(rcStream))
775 return rcRet;
776 return rcStream;
777}
778
779
780/**
781 * Deals with comments in DOS batch files.
782 *
783 * @returns VBox status code / callback return code.
784 * @param pIn The stream to parse.
785 * @param pfnCallback The callback.
786 * @param pvUser The user parameter for the callback.
787 */
788static int enumerateBatchComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
789{
790 int rcRet = VINF_SUCCESS;
791 uint32_t iLine = 0;
792 SCMEOL enmEol;
793 size_t cchLine;
794 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
795 while (pchLine != NULL)
796 {
797 /*
798 * Skip leading blanks and check for 'rem'.
799 * At the moment we do not parse '::lable-comments'.
800 */
801 size_t off = 0;
802 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
803 off++;
804 if (!IS_REM(pchLine, off, cchLine))
805 {
806 iLine++;
807 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
808 }
809 else
810 {
811 int rc = handleLineComment(pIn, isBatchComment, pfnCallback, pvUser,
812 &pchLine, &cchLine, &enmEol, &iLine, &off);
813 if (RT_FAILURE(rc))
814 return rc;
815 if (rcRet == VINF_SUCCESS)
816 rcRet = rc;
817 }
818 }
819
820 int rcStream = ScmStreamGetStatus(pIn);
821 if (RT_SUCCESS(rcStream))
822 return rcRet;
823 return rcStream;
824}
825
826
827/**
828 * Deals with simple line comments.
829 *
830 * @returns VBox status code / callback return code.
831 * @param pIn The stream to parse.
832 * @param chStart The start of comment character.
833 * @param pfnIsComment Comment tester function.
834 * @param pfnCallback The callback.
835 * @param pvUser The user parameter for the callback.
836 */
837static int enumerateSimpleLineComments(PSCMSTREAM pIn, char chStart, PFNISCOMMENT pfnIsComment,
838 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
839{
840 int rcRet = VINF_SUCCESS;
841 uint32_t iLine = 0;
842 SCMEOL enmEol;
843 size_t cchLine;
844 const char *pchLine;
845 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
846 {
847 size_t off = 0;
848 while (off < cchLine)
849 {
850 char ch = pchLine[off++];
851 if (ch != chStart)
852 { /* not interesting */ }
853 else
854 {
855 off -= 1;
856 int rc = handleLineComment(pIn, pfnIsComment, pfnCallback, pvUser,
857 &pchLine, &cchLine, &enmEol, &iLine, &off);
858 if (RT_FAILURE(rc))
859 return rc;
860 if (rcRet == VINF_SUCCESS)
861 rcRet = rc;
862 }
863 } /* for each character in the line */
864
865 iLine++;
866 } /* for each line in the stream */
867
868 int rcStream = ScmStreamGetStatus(pIn);
869 if (RT_SUCCESS(rcStream))
870 return rcRet;
871 return rcStream;
872}
873
874
875/**
876 * Enumerates the comments in the given stream, calling @a pfnCallback for each.
877 *
878 * @returns IPRT status code.
879 * @param pIn The stream to parse.
880 * @param enmCommentStyle The comment style of the source stream.
881 * @param pfnCallback The function to call.
882 * @param pvUser User argument to the callback.
883 */
884int ScmEnumerateComments(PSCMSTREAM pIn, SCMCOMMENTSTYLE enmCommentStyle, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
885{
886 switch (enmCommentStyle)
887 {
888 case kScmCommentStyle_C:
889 return enumerateCStyleComments(pIn, pfnCallback, pvUser);
890
891 case kScmCommentStyle_Python:
892 return enumeratePythonComments(pIn, pfnCallback, pvUser);
893
894 case kScmCommentStyle_Semicolon:
895 return enumerateSimpleLineComments(pIn, ';', isSemicolonComment, pfnCallback, pvUser);
896
897 case kScmCommentStyle_Hash:
898 return enumerateSimpleLineComments(pIn, '#', isHashComment, pfnCallback, pvUser);
899
900 case kScmCommentStyle_Rem_Upper:
901 case kScmCommentStyle_Rem_Lower:
902 case kScmCommentStyle_Rem_Camel:
903 return enumerateBatchComments(pIn, pfnCallback, pvUser);
904
905 default:
906 AssertFailedReturn(VERR_INVALID_PARAMETER);
907 }
908}
909
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette