VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 96407

Last change on this file since 96407 was 96407, checked in by vboxsync, 3 years ago

scm copyright and license note update

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 89.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 96407 2022-08-22 17:43:14Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2022 Oracle and/or its affiliates.
20
21This file is part of VirtualBox base platform packages, as
22available from https://www.215389.xyz.
23
24This program is free software; you can redistribute it and/or
25modify it under the terms of the GNU General Public License
26as published by the Free Software Foundation, in version 3 of the
27License.
28
29This program is distributed in the hope that it will be useful, but
30WITHOUT ANY WARRANTY; without even the implied warranty of
31MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32General Public License for more details.
33
34You should have received a copy of the GNU General Public License
35along with this program; if not, see <https://www.gnu.org/licenses>.
36
37The contents of this file may alternatively be used under the terms
38of the Common Development and Distribution License Version 1.0
39(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
40in the VirtualBox distribution, in which case the provisions of the
41CDDL are applicable instead of those of the GPL.
42
43You may elect to license modified versions of this file under the
44terms and conditions of either the GPL or the CDDL or both.
45
46SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
47"""
48__version__ = "$Revision: 96407 $"
49
50
51# Standard python imports
52import hashlib;
53import os;
54import re;
55import smtplib;
56#import subprocess;
57import sys;
58from email.mime.multipart import MIMEMultipart;
59from email.mime.text import MIMEText;
60from email.utils import COMMASPACE;
61
62if sys.version_info[0] >= 3:
63 from io import BytesIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
64else:
65 from StringIO import StringIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
66from optparse import OptionParser; # pylint: disable=deprecated-module
67from PIL import Image; # pylint: disable=import-error
68
69# Add Test Manager's modules path
70g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
71sys.path.append(g_ksTestManagerDir);
72
73# Test Manager imports
74from common import utils;
75from testmanager.core.db import TMDatabaseConnection;
76from testmanager.core.build import BuildDataEx;
77from testmanager.core.failurereason import FailureReasonLogic;
78from testmanager.core.testbox import TestBoxLogic, TestBoxData;
79from testmanager.core.testcase import TestCaseDataEx;
80from testmanager.core.testgroup import TestGroupData;
81from testmanager.core.testset import TestSetLogic, TestSetData;
82from testmanager.core.testresults import TestResultLogic, TestResultFileData;
83from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
84from testmanager.core.useraccount import UserAccountLogic;
85from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
86 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
87
88# Python 3 hacks:
89if sys.version_info[0] >= 3:
90 xrange = range; # pylint: disable=redefined-builtin,invalid-name
91
92
93class VirtualTestSheriffCaseFile(object):
94 """
95 A failure investigation case file.
96
97 """
98
99
100 ## Max log file we'll read into memory. (256 MB)
101 kcbMaxLogRead = 0x10000000;
102
103 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
104 self.oSheriff = oSheriff;
105 self.oTestSet = oTestSet; # TestSetData
106 self.oTree = oTree; # TestResultDataEx
107 self.oBuild = oBuild; # BuildDataEx
108 self.oTestBox = oTestBox; # TestBoxData
109 self.oTestGroup = oTestGroup; # TestGroupData
110 self.oTestCase = oTestCase; # TestCaseDataEx
111 self.sMainLog = ''; # The main log file. Empty string if not accessible.
112 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
113
114 # Generate a case file name.
115 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
116 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
117 % ( self.oTestSet.idTestSet,
118 self.oTestCase.sName,
119 self.oTestBox.sName,
120 self.oTestBox.sOs,
121 self.oTestBox.sOsVersion,
122 self.oTestBox.sCpuArch,
123 self.oTestBox.sCpuName,
124 self.oTestBox.sCpuVendor,
125 self.oBuild.oCat.sProduct,
126 self.oBuild.oCat.sBranch,
127 self.oBuild.oCat.sType,
128 self.oBuild.iRevision, );
129
130 # Investigation notes.
131 self.tReason = None; # None or one of the ktReason_XXX constants.
132 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
133 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
134
135 #
136 # Reason.
137 #
138
139 def noteReason(self, tReason):
140 """ Notes down a possible reason. """
141 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
142 self.tReason = tReason;
143 return True;
144
145 def noteReasonForId(self, tReason, idTestResult, sComment = None):
146 """ Notes down a possible reason for a specific test result. """
147 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
148 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
149 (u' (%s)' % (sComment,)) if sComment is not None else ''));
150 self.dReasonForResultId[idTestResult] = tReason;
151 if sComment is not None:
152 self.dCommentForResultId[idTestResult] = sComment;
153 return True;
154
155
156 #
157 # Test classification.
158 #
159
160 def isVBoxTest(self):
161 """ Test classification: VirtualBox (using the build) """
162 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
163
164 def isVBoxUnitTest(self):
165 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
166 return self.isVBoxTest() \
167 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower().startswith('misc: unit tests'));
168
169 def isVBoxInstallTest(self):
170 """ Test case classification: VirtualBox Guest installation test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('install:');
173
174 def isVBoxUnattendedInstallTest(self):
175 """ Test case classification: VirtualBox Guest installation test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('uinstall:');
178
179 def isVBoxUSBTest(self):
180 """ Test case classification: VirtualBox USB test. """
181 return self.isVBoxTest() \
182 and self.oTestCase.sName.lower().startswith('usb:');
183
184 def isVBoxStorageTest(self):
185 """ Test case classification: VirtualBox Storage test. """
186 return self.isVBoxTest() \
187 and self.oTestCase.sName.lower().startswith('storage:');
188
189 def isVBoxGAsTest(self):
190 """ Test case classification: VirtualBox Guest Additions test. """
191 return self.isVBoxTest() \
192 and ( self.oTestCase.sName.lower().startswith('guest additions')
193 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
194
195 def isVBoxAPITest(self):
196 """ Test case classification: VirtualBox API test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('api:');
199
200 def isVBoxBenchmarkTest(self):
201 """ Test case classification: VirtualBox Benchmark test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('benchmark:');
204
205 def isVBoxSmokeTest(self):
206 """ Test case classification: Smoke test. """
207 return self.isVBoxTest() \
208 and self.oTestCase.sName.lower().startswith('smoketest');
209
210 def isVBoxSerialTest(self):
211 """ Test case classification: Smoke test. """
212 return self.isVBoxTest() \
213 and self.oTestCase.sName.lower().startswith('serial:');
214
215
216 #
217 # Utility methods.
218 #
219
220 def getMainLog(self):
221 """
222 Tries to read the main log file since this will be the first source of information.
223 """
224 if self.sMainLog:
225 return self.sMainLog;
226 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
227 if oFile is not None:
228 try:
229 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
230 except Exception as oXcpt:
231 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
232 self.sMainLog = '';
233 else:
234 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
235 return self.sMainLog;
236
237 def getLogFile(self, oFile):
238 """
239 Tries to read the given file as a utf-8 log file.
240 oFile is a TestFileDataEx instance.
241 Returns empty string if problems opening or reading the file.
242 """
243 sContent = '';
244 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
245 if oFile is not None:
246 try:
247 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
248 except Exception as oXcpt:
249 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
250 else:
251 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
252 return sContent;
253
254 def getSvcLog(self):
255 """
256 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
257 Note! Returns the first VBoxSVC log file we find.
258 """
259 if not self.sSvcLog:
260 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
261 if aoSvcLogFiles:
262 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
263 return self.sSvcLog;
264
265 def getScreenshotSha256(self, oFile):
266 """
267 Tries to read the given screenshot file, uncompress it, and do SHA-2
268 on the raw pixels.
269 Returns SHA-2 digest string on success, None on failure.
270 """
271 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
272 try:
273 abImageFile = oImgFile.read();
274 except Exception as oXcpt:
275 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
276 else:
277 try:
278 oImage = Image.open(BytesIO(abImageFile));
279 except Exception as oXcpt:
280 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
281 else:
282 try:
283 oHash = hashlib.sha256();
284 if hasattr(oImage, 'tobytes'):
285 oHash.update(oImage.tobytes());
286 else:
287 oHash.update(oImage.tostring()); # pylint: disable=no-member
288 except Exception as oXcpt:
289 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
290 else:
291 return oHash.hexdigest();
292 return None;
293
294
295
296 def isSingleTestFailure(self):
297 """
298 Figure out if this is a single test failing or if it's one of the
299 more complicated ones.
300 """
301 if self.oTree.cErrors == 1:
302 return True;
303 if self.oTree.deepCountErrorContributers() <= 1:
304 return True;
305 return False;
306
307
308
309class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
310 """
311 Add build info into Test Manager database.
312 """
313
314 ## The user account for the virtual sheriff.
315 ksLoginName = 'vsheriff';
316
317 def __init__(self):
318 """
319 Parse command line.
320 """
321 self.oDb = None;
322 self.tsNow = None;
323 self.oTestResultLogic = None;
324 self.oTestSetLogic = None;
325 self.oFailureReasonLogic = None; # FailureReasonLogic;
326 self.oTestResultFailureLogic = None; # TestResultFailureLogic
327 self.oLogin = None;
328 self.uidSelf = -1;
329 self.oLogFile = None;
330 self.asBsodReasons = [];
331 self.asUnitTestReasons = [];
332
333 oParser = OptionParser();
334 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
335 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
336 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
337 help = 'Work period specified in hours. Defauls is 2 hours.');
338 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
339 help = 'Whether to commit the findings to the database. Default is a dry run.');
340 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
341 help = 'Only investigate this one. Accumulates IDs when repeated.');
342 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
343 help = 'Quiet execution');
344 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
345 help = 'Where to log messages.');
346 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
347 help = 'Enables debug mode.');
348
349 (self.oConfig, _) = oParser.parse_args();
350
351 if self.oConfig.sLogFile:
352 self.oLogFile = open(self.oConfig.sLogFile, "a"); # pylint: disable=consider-using-with
353 self.oLogFile.write('VirtualTestSheriff: $Revision: 96407 $ \n');
354
355
356 def eprint(self, sText):
357 """
358 Prints error messages.
359 Returns 1 (for exit code usage.)
360 """
361 print('error: %s' % (sText,));
362 if self.oLogFile is not None:
363 if sys.version_info[0] >= 3:
364 self.oLogFile.write(u'error: %s\n' % (sText,));
365 else:
366 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
367 return 1;
368
369 def dprint(self, sText):
370 """
371 Prints debug info.
372 """
373 if self.oConfig.fDebug:
374 if not self.oConfig.fQuiet:
375 print('debug: %s' % (sText, ));
376 if self.oLogFile is not None:
377 if sys.version_info[0] >= 3:
378 self.oLogFile.write(u'debug: %s\n' % (sText,));
379 else:
380 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
381 return 0;
382
383 def vprint(self, sText):
384 """
385 Prints verbose info.
386 """
387 if not self.oConfig.fQuiet:
388 print('info: %s' % (sText,));
389 if self.oLogFile is not None:
390 if sys.version_info[0] >= 3:
391 self.oLogFile.write(u'info: %s\n' % (sText,));
392 else:
393 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
394 return 0;
395
396 def getFailureReason(self, tReason):
397 """ Gets the failure reason object for tReason. """
398 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
399
400 def selfCheck(self):
401 """ Does some self checks, looking up things we expect to be in the database and such. """
402 rcExit = 0;
403 for sAttr in dir(self.__class__):
404 if sAttr.startswith('ktReason_'):
405 tReason = getattr(self.__class__, sAttr);
406 oFailureReason = self.getFailureReason(tReason);
407 if oFailureReason is None:
408 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
409 % (tReason[1], tReason[0],));
410
411 # Check the user account as well.
412 if self.oLogin is None:
413 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
414 if oLogin is None:
415 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
416 return rcExit;
417
418 def sendEmailAlert(self, uidAuthor, sBodyText):
419 """
420 Sends email alert.
421 """
422
423 # Get author email
424 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
425 sFrom = self.oDb.fetchOne();
426 if sFrom is not None:
427 sFrom = sFrom[0];
428 else:
429 sFrom = g_ksAlertFrom;
430
431 # Gather recipient list.
432 asEmailList = [];
433 for sUser in g_asAlertList:
434 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
435 sEmail = self.oDb.fetchOne();
436 if sEmail:
437 asEmailList.append(sEmail[0]);
438 if not asEmailList:
439 return self.eprint('No email addresses to send alter to!');
440
441 # Compose the message.
442 oMsg = MIMEMultipart();
443 oMsg['From'] = sFrom;
444 oMsg['To'] = COMMASPACE.join(asEmailList);
445 oMsg['Subject'] = g_ksAlertSubject;
446 oMsg.attach(MIMEText(sBodyText, 'plain'))
447
448 # Try send it.
449 try:
450 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
451 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
452 oSMTP.quit()
453 except smtplib.SMTPException as oXcpt:
454 return self.eprint('Failed to send mail: %s' % (oXcpt,));
455
456 return 0;
457
458 def badTestBoxManagement(self):
459 """
460 Looks for bad test boxes and first tries once to reboot them then disables them.
461 """
462 rcExit = 0;
463
464 #
465 # We skip this entirely if we're running in the past and not in harmless debug mode.
466 #
467 if self.oConfig.cStartHoursAgo != 0 \
468 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
469 return rcExit;
470 tsNow = self.tsNow if self.oConfig.fDebug else None;
471 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
472 oTestBoxLogic = TestBoxLogic(self.oDb);
473
474 #
475 # Generate a list of failures reasons we consider bad-testbox behavior.
476 #
477 aidFailureReasons = [
478 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
479 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
480 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
481 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
482 ];
483
484 #
485 # Get list of bad test boxes for given period and check them out individually.
486 #
487 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
488 aidFailureReasons = aidFailureReasons);
489 for idTestBox in aidBadTestBoxes:
490 # Skip if the testbox is already disabled or has a pending reboot command.
491 try:
492 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
493 except Exception as oXcpt:
494 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
495 continue;
496 if not oTestBox.fEnabled:
497 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
498 % ( idTestBox, oTestBox.sName, ));
499 continue;
500 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
501 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
502 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
503 continue;
504
505 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
506 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
507 cOkay = 0;
508 cBad = 0;
509 iFirstOkay = len(aoSets);
510 for iSet, oSet in enumerate(aoSets):
511 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
512 cBad += 1;
513 else:
514 # Check for bad failure reasons.
515 oFailure = None;
516 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
517 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
518 aoFailedResults = oTree.getListOfFailures();
519 for oFailedResult in aoFailedResults:
520 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
521 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
522 break;
523 oFailure = None;
524 if oFailure is not None:
525 cBad += 1;
526 else:
527 # This is an okay test result then.
528 ## @todo maybe check the elapsed time here, it could still be a bad run?
529 cOkay += 1;
530 iFirstOkay = min(iFirstOkay, iSet);
531 if iSet > 10:
532 break;
533
534 # We react if there are two or more bad-testbox statuses at the head of the
535 # history and at least three in the last 10 results.
536 if iFirstOkay >= 2 and cBad > 2:
537 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
538 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
539 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
540 self.vprint(sComment);
541 self.sendEmailAlert(self.uidSelf, sComment);
542 if self.oConfig.fRealRun is True:
543 try:
544 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
545 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
546 % (iFirstOkay, cBad, cOkay),);
547 except Exception as oXcpt:
548 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
549 else:
550 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
551 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
552 self.vprint(sComment);
553 self.sendEmailAlert(self.uidSelf, sComment);
554 if self.oConfig.fRealRun is True:
555 try:
556 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
557 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
558 % (iFirstOkay, cBad, cOkay),);
559 except Exception as oXcpt:
560 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
561 else:
562 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
563 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
564
565 ## @todo r=bird: review + rewrite;
566 ## - no selecting here, that belongs in the core/*.py files.
567 ## - preserve existing comments.
568 ## - doing way too much in the try/except block.
569 ## - No password quoting in the sshpass command that always fails (127).
570 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
571 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
572 ## Besides, it need to be constant elsewhere in the file, not a variable here.
573 ##
574 ##
575 ## Reset hanged testboxes
576 ##
577 #cStatusTimeoutMins = 10;
578 #
579 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
580 # ' FROM TestBoxStatuses, TestBoxes\n'
581 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
582 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
583 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
584 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
585 #for idTestBox in self.oDb.fetchAll():
586 # idTestBox = idTestBox[0];
587 # try:
588 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
589 # except Exception as oXcpt:
590 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
591 # continue;
592 # # Skip if the testbox is already disabled, already reset or there's no iLOM
593 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
594 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
595 # % ( idTestBox, oTestBox.sName, ));
596 # continue;
597 # ## @todo get iLOM credentials from a table?
598 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
599 # try:
600 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
601 # sStdout = oPs.communicate()[0];
602 # iRC = oPs.wait();
603 #
604 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
605 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
606 #
607 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
608 # self.vprint(sComment);
609 # self.sendEmailAlert(self.uidSelf, sComment);
610 #
611 # except Exception as oXcpt:
612 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
613 #
614 return rcExit;
615
616
617 ## @name Failure reasons we know.
618 ## @{
619
620 ktReason_Add_Installer_Win_Failed = ( 'Additions', 'Win GA install' );
621 ktReason_Add_ShFl_Automount = ( 'Additions', 'Automounting' );
622 ktReason_Add_ShFl_FsPerf = ( 'Additions', 'FsPerf' );
623 ktReason_Add_ShFl_FsPerf_Abend = ( 'Additions', 'FsPerf abend' );
624 ktReason_Add_GstCtl_Preparations = ( 'Additions', 'GstCtl preparations' );
625 ktReason_Add_GstCtl_SessionBasics = ( 'Additions', 'Session basics' );
626 ktReason_Add_GstCtl_SessionProcRefs = ( 'Additions', 'Session process' );
627 ktReason_Add_GstCtl_Session_Reboot = ( 'Additions', 'Session reboot' );
628 ktReason_Add_GstCtl_CopyFromGuest_Timeout = ( 'Additions', 'CopyFromGuest timeout' );
629 ktReason_Add_GstCtl_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest timeout' );
630 ktReason_Add_GstCtl_CopyToGuest_DstEmpty = ( 'Additions', 'CopyToGuest dst empty' );
631 ktReason_Add_GstCtl_CopyToGuest_DstExists = ( 'Additions', 'CopyToGuest dst exists' );
632 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
633 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
634 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
635 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
636 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
637 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
638 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
639 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
640 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
641 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
642 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
643 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
644 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
645 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
646 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
647 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
648 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
649 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
650 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
651 ktReason_Host_InstallationWantReboot = ( 'Host', 'Installation want reboot' );
652 ktReason_Host_InvalidPackage = ( 'Host', 'ERROR_INSTALL_PACKAGE_INVALID' );
653 ktReason_Host_InstallSourceAbsent = ( 'Host', 'ERROR_INSTALL_SOURCE_ABSENT' );
654 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
655 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
656 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
657 ktReason_Host_win32com_gen_py = ( 'Host', 'win32com.gen_py' );
658 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
659 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
660 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
661 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
662 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
663 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
664 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
665 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
666 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
667 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
668 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
669 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
670 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
671 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
672 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
673 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
674 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
675 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
676 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
677 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
678 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
679 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
680 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
681 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
682 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
683 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
684 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
685 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
686 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
687 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
688 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
689 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
690 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
691 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
692 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
693 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
694 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
695 ## @}
696
697 ## BSOD category.
698 ksBsodCategory = 'BSOD';
699 ## Special reason indicating that the flesh and blood sheriff has work to do.
700 ksBsodAddNew = 'Add new BSOD';
701
702 ## Unit test category.
703 ksUnitTestCategory = 'Unit';
704 ## Special reason indicating that the flesh and blood sheriff has work to do.
705 ksUnitTestAddNew = 'Add new';
706
707 ## Used for indica that we shouldn't report anything for this test result ID and
708 ## consider promoting the previous error to test set level if it's the only one.
709 ktHarmless = ( 'Probably', 'Caused by previous error' );
710
711
712 def caseClosed(self, oCaseFile):
713 """
714 Reports the findings in the case and closes it.
715 """
716 #
717 # Log it and create a dReasonForReasultId we can use below.
718 #
719 dCommentForResultId = oCaseFile.dCommentForResultId;
720 if oCaseFile.dReasonForResultId:
721 # Must weed out ktHarmless.
722 dReasonForResultId = {};
723 for idKey, tReason in oCaseFile.dReasonForResultId.items():
724 if tReason is not self.ktHarmless:
725 dReasonForResultId[idKey] = tReason;
726 if not dReasonForResultId:
727 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
728 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
729 return False;
730
731 # Try promote to single reason.
732 atValues = dReasonForResultId.values();
733 fSingleReason = True;
734 if len(dReasonForResultId) == 1 and next(iter(dReasonForResultId.keys())) != oCaseFile.oTestSet.idTestResult:
735 self.dprint(u'Promoting single reason to whole set: %s' % (next(iter(atValues)),));
736 elif len(dReasonForResultId) > 1 and len(atValues) == list(atValues).count(next(iter(atValues))):
737 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), next(iter(atValues))));
738 else:
739 fSingleReason = False;
740 if fSingleReason:
741 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(atValues)), };
742 if dCommentForResultId:
743 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(dCommentForResultId.values())), };
744 elif oCaseFile.tReason is not None:
745 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
746 else:
747 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
748 return False;
749
750 self.vprint(u'Closing %s with following reason%s: %s'
751 % ( oCaseFile.sName, 's' if len(dReasonForResultId) > 1 else '', dReasonForResultId, ));
752
753 #
754 # Add the test failure reason record(s).
755 #
756 for idTestResult, tReason in dReasonForResultId.items():
757 oFailureReason = self.getFailureReason(tReason);
758 if oFailureReason is not None:
759 sComment = 'Set by $Revision: 96407 $' # Handy for reverting later.
760 if idTestResult in dCommentForResultId:
761 sComment += ': ' + dCommentForResultId[idTestResult];
762
763 oAdd = TestResultFailureData();
764 oAdd.initFromValues(idTestResult = idTestResult,
765 idFailureReason = oFailureReason.idFailureReason,
766 uidAuthor = self.uidSelf,
767 idTestSet = oCaseFile.oTestSet.idTestSet,
768 sComment = sComment,);
769 if self.oConfig.fRealRun:
770 try:
771 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
772 except Exception as oXcpt:
773 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
774 % (oXcpt, oAdd, oCaseFile.sLongName,));
775 else:
776 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
777 return True;
778
779 #
780 # Tools for assiting log parsing.
781 #
782
783 @staticmethod
784 def matchFollowedByLines(sStr, off, asFollowingLines):
785 """ Worker for isThisFollowedByTheseLines. """
786
787 # Advance off to the end of the line.
788 off = sStr.find('\n', off);
789 if off < 0:
790 return False;
791 off += 1;
792
793 # Match each string with the subsequent lines.
794 for iLine, sLine in enumerate(asFollowingLines):
795 offEnd = sStr.find('\n', off);
796 if offEnd < 0:
797 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
798 if sLine and sStr.find(sLine, off, offEnd) < 0:
799 return False;
800
801 # next line.
802 off = offEnd + 1;
803
804 return True;
805
806 @staticmethod
807 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
808 """
809 Looks for a line contining sFirst which is then followed by lines
810 with the strings in asFollowingLines. (No newline chars anywhere!)
811 Returns True / False.
812 """
813 off = sStr.find(sFirst, 0);
814 while off >= 0:
815 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
816 return True;
817 off = sStr.find(sFirst, off + 1);
818 return False;
819
820 @staticmethod
821 def findAndReturnRestOfLine(sHaystack, sNeedle):
822 """
823 Looks for sNeedle in sHaystack.
824 Returns The text following the needle up to the end of the line.
825 Returns None if not found.
826 """
827 if sHaystack is None:
828 return None;
829 off = sHaystack.find(sNeedle);
830 if off < 0:
831 return None;
832 off += len(sNeedle)
833 offEol = sHaystack.find('\n', off);
834 if offEol < 0:
835 offEol = len(sHaystack);
836 return sHaystack[off:offEol]
837
838 @staticmethod
839 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
840 """
841 Looks for sNeedle in zeroe or more haystacks (asHaystack).
842 Returns The text following the first needed found up to the end of the line.
843 Returns None if not found.
844 """
845 for sHaystack in asHaystacks:
846 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
847 if sRet is not None:
848 return sRet;
849 return None;
850
851
852 #
853 # The investigative units.
854 #
855
856 katSimpleInstallUninstallMainLogReasons = [
857 # ( Whether to stop on hit, reason tuple, needle text. )
858 ( False, ktReason_Host_LeftoverService,
859 'SERVICE_NAME: vbox' ),
860 ( False, ktReason_Host_LeftoverService,
861 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
862 ];
863
864 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
865 'darwin': [
866 # ( Whether to stop on hit, reason tuple, needle text. )
867 ( True, ktReason_Host_DriverNotUnloading,
868 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
869 ],
870 'linux': [
871 # ( Whether to stop on hit, reason tuple, needle text. )
872 ( True, ktReason_Host_DriverNotCompilable,
873 'This system is not currently set up to build kernel modules' ),
874 ( True, ktReason_Host_DriverNotCompilable,
875 'This system is currently not set up to build kernel modules' ),
876 ( True, ktReason_Host_InstallationFailed,
877 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
878 ( True, ktReason_Host_DriverNotUnloading,
879 'Cannot unload module vboxdrv'),
880 ],
881 'solaris': [
882 # ( Whether to stop on hit, reason tuple, needle text. )
883 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
884 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
885 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
886 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
887 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
888 ],
889 'win': [
890 # ( Whether to stop on hit, reason tuple, needle text. )
891 ( True, ktReason_Host_InstallationWantReboot, 'ERROR_SUCCESS_REBOOT_REQUIRED' ),
892 ( False, ktReason_Host_InstallationFailed, 'Installation error.' ),
893 ( True, ktReason_Host_InvalidPackage, 'Uninstaller failed, exit code: 1620' ),
894 ( True, ktReason_Host_InstallSourceAbsent, 'Uninstaller failed, exit code: 1612' ),
895 ],
896 };
897
898
899 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
900 """
901 Investigates an install or uninstall failure.
902
903 We lump the two together since the installation typically also performs
904 an uninstall first and will be seeing similar issues to the uninstall.
905 """
906 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
907
908 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
909 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
910 return True;
911
912 atSimple = self.katSimpleInstallUninstallMainLogReasons;
913 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
914 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
915
916 fFoundSomething = False;
917 for fStopOnHit, tReason, sNeedle in atSimple:
918 if sResultLog.find(sNeedle) > 0:
919 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
920 if fStopOnHit:
921 return True;
922 fFoundSomething = True;
923
924 return fFoundSomething if fFoundSomething else None;
925
926
927 def investigateBadTestBox(self, oCaseFile):
928 """
929 Checks out bad-testbox statuses.
930 """
931 _ = oCaseFile;
932 return False;
933
934
935 def investigateVBoxUnitTest(self, oCaseFile):
936 """
937 Checks out a VBox unittest problem.
938 """
939
940 #
941 # Process simple test case failures first, using their name as reason.
942 # We do the reason management just like for BSODs.
943 #
944 cRelevantOnes = 0;
945 sMainLog = oCaseFile.getMainLog();
946 aoFailedResults = oCaseFile.oTree.getListOfFailures();
947 for oFailedResult in aoFailedResults:
948 if oFailedResult is oCaseFile.oTree:
949 self.vprint('TODO: toplevel failure');
950 cRelevantOnes += 1
951
952 elif oFailedResult.sName == 'Installing VirtualBox':
953 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
954 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
955 cRelevantOnes += 1
956
957 elif oFailedResult.sName == 'Uninstalling VirtualBox':
958 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
959 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
960 cRelevantOnes += 1
961
962 elif oFailedResult.oParent is not None:
963 # Get the 2nd level node because that's where we'll find the unit test name.
964 while oFailedResult.oParent.oParent is not None:
965 oFailedResult = oFailedResult.oParent;
966
967 # Only report a failure once.
968 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
969 sKey = oFailedResult.sName;
970 if sKey.startswith('testcase/'):
971 sKey = sKey[9:];
972 if sKey in self.asUnitTestReasons:
973 tReason = ( self.ksUnitTestCategory, sKey );
974 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
975 else:
976 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
977 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
978 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
979 cRelevantOnes += 1
980 else:
981 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
982
983 #
984 # If we've caught all the relevant ones by now, report the result.
985 #
986 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
987 return self.caseClosed(oCaseFile);
988 return False;
989
990 def extractGuestCpuStack(self, sInfoText):
991 """
992 Extracts the guest CPU stacks from the input file.
993
994 Returns a dictionary keyed by the CPU number, value being a list of
995 raw stack lines (no header).
996 Returns empty dictionary if no stacks where found.
997 """
998 dRet = {};
999 off = 0;
1000 while True:
1001 # Find the stack.
1002 offStart = sInfoText.find('=== start guest stack VCPU ', off);
1003 if offStart < 0:
1004 break;
1005 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
1006 if offEnd >= 0:
1007 offEnd += 3;
1008 else:
1009 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
1010 if offEnd < 0:
1011 offEnd = len(sInfoText);
1012
1013 sStack = sInfoText[offStart : offEnd];
1014 sStack = sStack.replace('\r',''); # paranoia
1015 asLines = sStack.split('\n');
1016
1017 # Figure the CPU.
1018 asWords = asLines[0].split();
1019 if len(asWords) < 6 or not asWords[5].isdigit():
1020 break;
1021 iCpu = int(asWords[5]);
1022
1023 # Add it and advance.
1024 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
1025 off = offEnd;
1026 return dRet;
1027
1028 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
1029 """ Investigates kvm_lock_spinning deadlocks """
1030 #
1031 # Extract the stacks. We need more than one CPU to create a deadlock.
1032 #
1033 dStacks = self.extractGuestCpuStack(sInfoText);
1034 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1035 if len(dStacks) >= 2:
1036 #
1037 # Examin each of the stacks. Each must have kvm_lock_spinning in
1038 # one of the first three entries.
1039 #
1040 cHits = 0;
1041 for asBacktrace in dStacks.values():
1042 for iFrame in xrange(min(3, len(asBacktrace))):
1043 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1044 cHits += 1;
1045 break;
1046 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1047 if cHits == len(dStacks):
1048 return (True, self.ktReason_VMM_kvm_lock_spinning);
1049
1050 _ = dLogs; _ = oCaseFile;
1051 return (False, None);
1052
1053 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1054 """ Investigates HalReturnToFirmware hangs """
1055 del oCaseFile
1056 del sInfoText
1057 del dLogs
1058 # hope that's sufficient
1059 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1060
1061 ## Things we search a main or VM log for to figure out why something went bust.
1062 ## @note DO NOT ADD MORE STUFF HERE!
1063 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1064 katSimpleMainAndVmLogReasonsDeprecated = [
1065 # ( Whether to stop on hit, reason tuple, needle text. )
1066 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1067 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1068 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1069 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1070 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1071 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1072 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1073 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1074 ( True, ktReason_Networking_Nonexistent_host_nic,
1075 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1076 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1077 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1078 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1079 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1080 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1081 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1082 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1083 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1084 ( True, ktReason_Unknown_File_Not_Found,
1085 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1086 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1087 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1088 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1089 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1090 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1091 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1092 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1093 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1094 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1095 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1096 ];
1097
1098 ## This we search a main log for to figure out why something went bust.
1099 katSimpleMainLogReasons = [
1100 # ( Whether to stop on hit, reason tuple, needle text. )
1101 ( False, ktReason_Host_win32com_gen_py, 'ModuleNotFoundError: No module named \'win32com.gen_py' ),
1102
1103 ];
1104
1105 ## This we search a VM log for to figure out why something went bust.
1106 katSimpleVmLogReasons = [
1107 # ( Whether to stop on hit, reason tuple, needle text. )
1108 ];
1109
1110 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1111 katSimpleVBoxHardeningLogReasons = [
1112 # ( Whether to stop on hit, reason tuple, needle text. )
1113 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1114 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1115 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1116 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1117 ];
1118
1119 ## Things we search a kernel.log file for to figure out why something went bust.
1120 katSimpleKernelLogReasons = [
1121 # ( Whether to stop on hit, reason tuple, needle text. )
1122 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1123 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1124 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1125 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1126 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1127 ];
1128
1129 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1130 katSimpleVgaTextReasons = [
1131 # ( Whether to stop on hit, reason tuple, needle text. )
1132 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1133 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1134 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1135 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1136 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1137 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1138 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1139 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1140 "and send a report. Then try booting with the 'noapic' option\n"
1141 "\n" ),
1142 ( True, ktReason_OSInstall_GRUB_hang,
1143 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1144 ( True, ktReason_OSInstall_GRUB_hang,
1145 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1146 ( True, ktReason_OSInstall_GRUB_hang,
1147 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1148 ( True, ktReason_OSInstall_GRUB_hang,
1149 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1150 ( True, ktReason_OSInstall_Udev_hang,
1151 "\nStarting udev:\n\n\n\n" ),
1152 ( True, ktReason_OSInstall_Udev_hang,
1153 "\nStarting udev:\n------" ),
1154 ( True, ktReason_Panic_BootManagerC000000F,
1155 "Windows failed to start. A recent hardware or software change might be the" ),
1156 ( True, ktReason_BootManager_Image_corrupt,
1157 "BOOTMGR image is corrupt. The system cannot boot." ),
1158 ];
1159
1160 ## Things we search for in the info.txt file. Require handlers for now.
1161 katInfoTextHandlers = [
1162 # ( Trigger text, handler method )
1163 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1164 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1165 ];
1166
1167 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1168 katSimpleScreenshotHashReasons = [
1169 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1170 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1171 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1172 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1173 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1174 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1175 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1176 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1177 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1178 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1179 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1180 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1181 ];
1182
1183
1184 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1185 """
1186 Scans for atNeedles in sLog.
1187
1188 Returns True if a stop-on-hit neelde was found.
1189 Returns None if a no-stop reason was found.
1190 Returns False if no hit.
1191 """
1192 fRet = False;
1193 for fStopOnHit, tReason, oNeedle in atNeedles:
1194 fMatch = False;
1195 if utils.isString(oNeedle):
1196 for sLog in asLogs:
1197 if sLog:
1198 fMatch |= sLog.find(oNeedle) > 0;
1199 else:
1200 for sLog in asLogs:
1201 if sLog:
1202 fMatch |= oNeedle.search(sLog) is not None;
1203 if fMatch:
1204 oCaseFile.noteReasonForId(tReason, idTestResult);
1205 if fStopOnHit:
1206 return True;
1207 fRet = None;
1208 return fRet;
1209
1210
1211 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1212 """
1213 Investigates a failed VM run.
1214 """
1215 enmReason = None;
1216 sParentName = oFailedResult.oParent.sName if oFailedResult.oParent else '';
1217 if oFailedResult.sName == 'VBoxWindowsAdditions.exe' or sResultLog.find('VBoxWindowsAdditions.exe" failed with') > 0:
1218 enmReason = self.ktReason_Add_Installer_Win_Failed;
1219 # guest control:
1220 elif sParentName == 'Guest Control' and oFailedResult.sName == 'Preparations':
1221 enmReason = self.ktReason_Add_GstCtl_Preparations;
1222 elif oFailedResult.sName == 'Session Basics':
1223 enmReason = self.ktReason_Add_GstCtl_SessionBasics;
1224 elif oFailedResult.sName == 'Session Process References':
1225 enmReason = self.ktReason_Add_GstCtl_SessionProcRefs;
1226 elif oFailedResult.sName == 'Copy from guest':
1227 if sResultLog.find('*** abort action ***') >= 0:
1228 enmReason = self.ktReason_Add_GstCtl_CopyFromGuest_Timeout;
1229 elif oFailedResult.sName == 'Copy to guest':
1230 off = sResultLog.find('"Guest directory "');
1231 if off > 0 and sResultLog.find('" already exists"', off, off + 80):
1232 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstExists;
1233 elif sResultLog.find('Guest destination must not be empty') >= 0:
1234 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstEmpty;
1235 elif sResultLog.find('*** abort action ***') >= 0:
1236 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_Timeout;
1237 elif oFailedResult.sName.find('Session w/ Guest Reboot') >= 0:
1238 enmReason = self.ktReason_Add_GstCtl_Session_Reboot;
1239 # shared folders:
1240 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Automounting':
1241 enmReason = self.ktReason_Add_ShFl_Automount;
1242 elif oFailedResult.sName == 'mmap':
1243 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1244 enmReason = self.ktReason_Add_Mmap_Coherency;
1245 elif sResultLog.find('FlushViewOfFile') >= 0:
1246 enmReason = self.ktReason_Add_FlushViewOfFile;
1247 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Running FsPerf':
1248 enmReason = self.ktReason_Add_ShFl_FsPerf; ## Maybe it would be better to be more specific...
1249
1250 if enmReason is not None:
1251 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1252
1253 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1254 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1255 return False;
1256
1257 def isResultFromGATest(self, oCaseFile, oFailedResult):
1258 """
1259 Checks if this result and corresponding log snippet looks like a GA test run.
1260 """
1261 while oFailedResult is not None:
1262 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', 'VBoxWindowsAdditions.exe' ]:
1263 return True;
1264 if oCaseFile.oTestCase.sName == 'Guest Additions' and oFailedResult.sName in [ 'Install', ]:
1265 return True;
1266 oFailedResult = oFailedResult.oParent;
1267 return False;
1268
1269
1270 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1271 """
1272 Investigates a failed VM run.
1273 """
1274
1275 def investigateLogSet():
1276 """
1277 Investigates the current set of VM related logs.
1278 """
1279 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1280 % ( len(sResultLog if sResultLog else ''),
1281 len(sVMLog if sVMLog else ''),
1282 len(sKrnlLog if sKrnlLog else ''),
1283 len(sVgaText if sVgaText else ''),
1284 len(sInfoText if sInfoText else ''),
1285 len(sNtHardLog if sNtHardLog else ''),));
1286
1287 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1288 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1289 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1290 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1291 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1292 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1293
1294 # TODO: more
1295
1296 #
1297 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1298 #
1299 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1300 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1301 if sDetails is not None:
1302 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1303 sKey = sDetails.split(' ', 1)[0];
1304 try: sKey = '0x%08X' % (int(sKey, 16),);
1305 except: pass;
1306 if sKey in self.asBsodReasons:
1307 tReason = ( self.ksBsodCategory, sKey );
1308 elif sKey.lower() in self.asBsodReasons: # just in case.
1309 tReason = ( self.ksBsodCategory, sKey.lower() );
1310 else:
1311 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1312 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1313 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1314
1315 fFoundSomething = False;
1316
1317 #
1318 # Look for linux panic.
1319 #
1320 if sKrnlLog is not None:
1321 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1322 if fRet is True:
1323 return fRet;
1324 fFoundSomething |= fRet is None;
1325
1326 #
1327 # Loop thru the simple stuff.
1328 #
1329
1330 # Main log.
1331 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1332 if fRet is True:
1333 return fRet;
1334 fFoundSomething |= fRet is None;
1335
1336 # VM log.
1337 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1338 if fRet is True:
1339 return fRet;
1340 fFoundSomething |= fRet is None;
1341
1342 # Old main + vm log.
1343 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1344 oCaseFile, oFailedResult.idTestResult);
1345 if fRet is True:
1346 return fRet;
1347 fFoundSomething |= fRet is None;
1348
1349 # Continue with vga text.
1350 if sVgaText:
1351 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1352 if fRet is True:
1353 return fRet;
1354 fFoundSomething |= fRet is None;
1355
1356 # Continue with screen hashes.
1357 if sScreenHash is not None:
1358 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1359 if sScreenHash == sHash:
1360 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1361 if fStopOnHit:
1362 return True;
1363 fFoundSomething = True;
1364
1365 # Check VBoxHardening.log.
1366 if sNtHardLog is not None:
1367 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1368 if fRet is True:
1369 return fRet;
1370 fFoundSomething |= fRet is None;
1371
1372 #
1373 # Complicated stuff.
1374 #
1375 dLogs = {
1376 'sVMLog': sVMLog,
1377 'sNtHardLog': sNtHardLog,
1378 'sScreenHash': sScreenHash,
1379 'sKrnlLog': sKrnlLog,
1380 'sVgaText': sVgaText,
1381 'sInfoText': sInfoText,
1382 };
1383
1384 # info.txt.
1385 if sInfoText:
1386 for sNeedle, fnHandler in self.katInfoTextHandlers:
1387 if sInfoText.find(sNeedle) > 0:
1388 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1389 if tReason is not None:
1390 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1391 if fStop:
1392 return True;
1393 fFoundSomething = True;
1394
1395 #
1396 # Check for repeated reboots...
1397 #
1398 if sVMLog is not None:
1399 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1400 if cResets > 10:
1401 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1402 sComment = 'Counted %s reboots' % (cResets,));
1403
1404 return fFoundSomething;
1405
1406 #
1407 # Check if we got any VM or/and kernel logs. Treat them as sets in
1408 # case we run multiple VMs here (this is of course ASSUMING they
1409 # appear in the order that terminateVmBySession uploads them).
1410 #
1411 cTimes = 0;
1412 sVMLog = None;
1413 sNtHardLog = None;
1414 sScreenHash = None;
1415 sKrnlLog = None;
1416 sVgaText = None;
1417 sInfoText = None;
1418 for oFile in oFailedResult.aoFiles:
1419 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1420 if 'VBoxHardening.log' not in oFile.sFile:
1421 if sVMLog is not None:
1422 if investigateLogSet() is True:
1423 return True;
1424 cTimes += 1;
1425 sInfoText = None;
1426 sVgaText = None;
1427 sKrnlLog = None;
1428 sScreenHash = None;
1429 sNtHardLog = None;
1430 sVMLog = oCaseFile.getLogFile(oFile);
1431 else:
1432 sNtHardLog = oCaseFile.getLogFile(oFile);
1433 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1434 sKrnlLog = oCaseFile.getLogFile(oFile);
1435 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1436 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1437 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1438 sInfoText = oCaseFile.getLogFile(oFile);
1439 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1440 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1441 if sScreenHash is not None:
1442 sScreenHash = sScreenHash.lower();
1443 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1444
1445 if ( sVMLog is not None \
1446 or sNtHardLog is not None \
1447 or cTimes == 0) \
1448 and investigateLogSet() is True:
1449 return True;
1450
1451 return None;
1452
1453 def isResultFromVMRun(self, oFailedResult, sResultLog):
1454 """
1455 Checks if this result and corresponding log snippet looks like a VM run.
1456 """
1457
1458 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1459 if sResultLog.find(' startVm') > 0:
1460 return True;
1461
1462 # Any other indicators? No?
1463 _ = oFailedResult;
1464 return False;
1465
1466
1467 ## Things we search a VBoxSVC log for to figure out why something went bust.
1468 katSimpleSvcLogReasons = [
1469 # ( Whether to stop on hit, reason tuple, needle text. )
1470 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1471 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1472 ];
1473
1474 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1475 """
1476 Check the VBoxSVC log for a single VM run.
1477 """
1478 if sSvcLog:
1479 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1480 if fRet is True or fRet is None:
1481 return True;
1482 return False;
1483
1484 def investigateNtHardLogForVMRun(self, oCaseFile):
1485 """
1486 Check if the hardening log for a single VM run contains VM crash indications.
1487 """
1488 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1489 for oLogFile in aoLogFiles:
1490 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1491 sLog = oCaseFile.getLogFile(oLogFile);
1492 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1493 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1494 return False;
1495
1496
1497 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1498 """
1499 Checks out a VBox VM test.
1500
1501 This is generic investigation of a test running one or more VMs, like
1502 for example a smoke test or a guest installation test.
1503
1504 The fSingleVM parameter is a hint, which probably won't come in useful.
1505 """
1506 _ = fSingleVM;
1507
1508 #
1509 # Get a list of test result failures we should be looking into and the main log.
1510 #
1511 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1512 sMainLog = oCaseFile.getMainLog();
1513
1514 #
1515 # There are a set of errors ending up on the top level result record.
1516 # Should deal with these first.
1517 #
1518 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1519 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1520 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1521 # ignore other failures in the test if we're not a little bit careful here.
1522 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1523 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1524 return self.caseClosed(oCaseFile);
1525
1526 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1527 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1528 (' Timeout: ', ' Attempting to abort child...',) ):
1529 if sMainLog.find('*** glibc detected *** /') > 0:
1530 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1531 else:
1532 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1533 return self.caseClosed(oCaseFile);
1534
1535 # Look for heap corruption without visible hang.
1536 if sMainLog.find('*** glibc detected *** /') > 0 \
1537 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1538 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1539 return self.caseClosed(oCaseFile);
1540
1541 # Out of memory w/ timeout.
1542 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1543 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1544 return self.caseClosed(oCaseFile);
1545
1546 # Stale files like vts_rm.exe (windows).
1547 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1548 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1549 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1550 return self.caseClosed(oCaseFile);
1551
1552 #
1553 # XPCOM screwup
1554 #
1555 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1556 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1557 return self.caseClosed(oCaseFile);
1558
1559 #
1560 # Go thru each failed result.
1561 #
1562 for oFailedResult in aoFailedResults:
1563 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1564 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1565 if oFailedResult.sName == 'Installing VirtualBox':
1566 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1567
1568 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1569 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1570
1571 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1572 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1573
1574 elif self.isResultFromGATest(oCaseFile, oFailedResult):
1575 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1576
1577 elif sResultLog.find('most likely not unique') > 0:
1578 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1579 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1580 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1581
1582 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1583 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1584 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1585
1586 elif sResultLog.find('** error: no action was specified') > 0 \
1587 or sResultLog.find('(len(self._asXml, asText))') > 0:
1588 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1589
1590 else:
1591 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1592 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1593
1594 #
1595 # Windows python/com screwup.
1596 #
1597 if sMainLog.find('ModuleNotFoundError: No module named \'win32com.gen_py') > 0:
1598 oCaseFile.noteReason(self.ktReason_Host_win32com_gen_py);
1599 return self.caseClosed(oCaseFile);
1600
1601 #
1602 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1603 #
1604 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1605 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1606 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1607 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1608 return self.caseClosed(oCaseFile);
1609 if self.investigateNtHardLogForVMRun(oCaseFile):
1610 return self.caseClosed(oCaseFile);
1611
1612 #
1613 # Report home and close the case if we got them all, otherwise log it.
1614 #
1615 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1616 return self.caseClosed(oCaseFile);
1617
1618 if oCaseFile.dReasonForResultId:
1619 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1620 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1621 else:
1622 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1623 return False;
1624
1625
1626 ## Things we search a main log for to figure out why something in the API test went bust.
1627 katSimpleApiMainLogReasons = [
1628 # ( Whether to stop on hit, reason tuple, needle text. )
1629 ( True, ktReason_Networking_Nonexistent_host_nic,
1630 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1631 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1632 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1633 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1634 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1635 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1636 ( True, ktReason_API_MoveVM_InvalidParameter,
1637 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1638 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1639 ];
1640
1641 def investigateVBoxApiTest(self, oCaseFile):
1642 """
1643 Checks out a VBox API test.
1644 """
1645
1646 #
1647 # Get a list of test result failures we should be looking into and the main log.
1648 #
1649 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1650 sMainLog = oCaseFile.getMainLog();
1651
1652 #
1653 # Go thru each failed result.
1654 #
1655 for oFailedResult in aoFailedResults:
1656 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1657 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1658 if oFailedResult.sName == 'Installing VirtualBox':
1659 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1660
1661 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1662 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1663
1664 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1665 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1666
1667 else:
1668 fFoundSomething = False;
1669 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1670 if sResultLog.find(sNeedle) > 0:
1671 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1672 fFoundSomething = True;
1673 if fStopOnHit:
1674 break;
1675 if fFoundSomething:
1676 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1677 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1678
1679 #
1680 # Report home and close the case if we got them all, otherwise log it.
1681 #
1682 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1683 return self.caseClosed(oCaseFile);
1684
1685 if oCaseFile.dReasonForResultId:
1686 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1687 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1688 else:
1689 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1690 return False;
1691
1692
1693 def reasoningFailures(self):
1694 """
1695 Guess the reason for failures.
1696 """
1697 #
1698 # Get a list of failed test sets without any assigned failure reason.
1699 #
1700 cGot = 0;
1701 if not self.oConfig.aidTestSets:
1702 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1703 tsNow = self.tsNow);
1704 else:
1705 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1706 for oTestSet in aoTestSets:
1707 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1708 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1709
1710 #
1711 # Open a case file and assign it to the right investigator.
1712 #
1713 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1714 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1715 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1716 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1717 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1718
1719 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1720
1721 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1722 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1723 fRc = self.investigateBadTestBox(oCaseFile);
1724
1725 elif oCaseFile.isVBoxUnitTest():
1726 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1727 fRc = self.investigateVBoxUnitTest(oCaseFile);
1728
1729 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1730 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1731 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1732
1733 elif oCaseFile.isVBoxUSBTest():
1734 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1735 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1736
1737 elif oCaseFile.isVBoxStorageTest():
1738 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1739 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1740
1741 elif oCaseFile.isVBoxGAsTest():
1742 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1743 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1744
1745 elif oCaseFile.isVBoxAPITest():
1746 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1747 fRc = self.investigateVBoxApiTest(oCaseFile);
1748
1749 elif oCaseFile.isVBoxBenchmarkTest():
1750 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1751 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1752
1753 elif oCaseFile.isVBoxSmokeTest():
1754 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1755 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1756
1757 elif oCaseFile.isVBoxSerialTest():
1758 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1759 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1760
1761 else:
1762 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1763 fRc = False;
1764 cGot += fRc is True;
1765
1766 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1767 return 0;
1768
1769
1770 def main(self):
1771 """
1772 The 'main' function.
1773 Return exit code (0, 1, etc).
1774 """
1775 # Database stuff.
1776 self.oDb = TMDatabaseConnection()
1777 self.oTestResultLogic = TestResultLogic(self.oDb);
1778 self.oTestSetLogic = TestSetLogic(self.oDb);
1779 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1780 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1781 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1782 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1783
1784 # Get a fix on our 'now' before we do anything..
1785 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1786 self.tsNow = self.oDb.fetchOne();
1787
1788 # If we're suppost to commit anything we need to get our user ID.
1789 rcExit = 0;
1790 if self.oConfig.fRealRun:
1791 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1792 if self.oLogin is None:
1793 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1794 else:
1795 self.uidSelf = self.oLogin.uid;
1796
1797 #
1798 # Do the stuff.
1799 #
1800 if rcExit == 0:
1801 rcExit = self.selfCheck();
1802 if rcExit == 0:
1803 rcExit = self.badTestBoxManagement();
1804 rcExit2 = self.reasoningFailures();
1805 if rcExit == 0:
1806 rcExit = rcExit2;
1807 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1808 if rcExit == 0:
1809 rcExit = self.badTestBoxManagement();
1810
1811 # Cleanup.
1812 self.oFailureReasonLogic = None;
1813 self.oTestResultFailureLogic = None;
1814 self.oTestSetLogic = None;
1815 self.oTestResultLogic = None;
1816 self.oDb.close();
1817 self.oDb = None;
1818 if self.oLogFile is not None:
1819 self.oLogFile.close();
1820 self.oLogFile = None;
1821 return rcExit;
1822
1823if __name__ == '__main__':
1824 sys.exit(VirtualTestSheriff().main());
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette