VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 107893

Last change on this file since 107893 was 107893, checked in by vboxsync, 4 months ago

VMM,VBox/types.h,VBox/err.h: Added VM target platform arch members to the VM structures (mostly for ring-0). Also added the structure sizes and svn revision to VMMR0_DO_GVMM_CREATE_VM. jiraref:VBP-1470

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 124.1 KB
Line 
1/* $Id: GVMMR0.cpp 107893 2025-01-22 15:31:45Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.215389.xyz.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gvmm GVMM - The Global VM Manager
30 *
31 * The Global VM Manager lives in ring-0. Its main function at the moment is
32 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
33 * each of them, and assign them unique identifiers (so GMM can track page
34 * owners). The GVMM also manage some of the host CPU resources, like the
35 * periodic preemption timer.
36 *
37 * The GVMM will create a ring-0 object for each VM when it is registered, this
38 * is both for session cleanup purposes and for having a point where it is
39 * possible to implement usage polices later (in SUPR0ObjRegister).
40 *
41 *
42 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
43 *
44 * On system that sports a high resolution kernel timer API, we use per-cpu
45 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
46 * execution. The timer frequency is calculating by taking the max
47 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
48 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
49 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
50 *
51 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
52 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
53 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
54 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
55 * AMD-V and raw-mode execution environments.
56 */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP LOG_GROUP_GVMM
63#include <VBox/vmm/gvmm.h>
64#include <VBox/vmm/gmm.h>
65#include "GVMMR0Internal.h"
66#include <VBox/vmm/dbgf.h>
67#include <VBox/vmm/iom.h>
68#include <VBox/vmm/pdm.h>
69#include <VBox/vmm/pgm.h>
70#include <VBox/vmm/vmm.h>
71#ifdef VBOX_WITH_NEM_R0
72# include <VBox/vmm/nem.h>
73#endif
74#include <VBox/vmm/vmcpuset.h>
75#include <VBox/vmm/vmcc.h>
76#include <VBox/param.h>
77#include <VBox/err.h>
78
79#include <iprt/asm.h>
80#ifdef RT_ARCH_AMD64
81# include <iprt/asm-amd64-x86.h>
82#endif
83#include <iprt/critsect.h>
84#include <iprt/mem.h>
85#include <iprt/semaphore.h>
86#include <iprt/time.h>
87#include <VBox/log.h>
88#include <iprt/thread.h>
89#include <iprt/process.h>
90#include <iprt/param.h>
91#include <iprt/string.h>
92#include <iprt/assert.h>
93#include <iprt/mem.h>
94#include <iprt/memobj.h>
95#include <iprt/mp.h>
96#include <iprt/cpuset.h>
97#include <iprt/spinlock.h>
98#include <iprt/timer.h>
99
100#include "dtrace/VBoxVMM.h"
101
102
103/*********************************************************************************************************************************
104* Defined Constants And Macros *
105*********************************************************************************************************************************/
106#if (defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)) \
107 && !defined(VBOX_WITH_MINIMAL_R0)
108/** Define this to enable the periodic preemption timer. */
109# define GVMM_SCHED_WITH_PPT
110#endif
111
112#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
113/** Define this to enable the per-EMT high resolution wakeup timers. */
114# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
115#endif
116
117
118/** Special value that GVMMR0DeregisterVCpu sets. */
119#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
120AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
121
122
123/*********************************************************************************************************************************
124* Structures and Typedefs *
125*********************************************************************************************************************************/
126
127/**
128 * Global VM handle.
129 */
130typedef struct GVMHANDLE
131{
132 /** The index of the next handle in the list (free or used). (0 is nil.) */
133 uint16_t volatile iNext;
134 /** Our own index / handle value. */
135 uint16_t iSelf;
136 /** The process ID of the handle owner.
137 * This is used for access checks. */
138 RTPROCESS ProcId;
139 /** The pointer to the ring-0 only (aka global) VM structure. */
140 PGVM pGVM;
141 /** The virtual machine object. */
142 void *pvObj;
143 /** The session this VM is associated with. */
144 PSUPDRVSESSION pSession;
145 /** The ring-0 handle of the EMT0 thread.
146 * This is used for ownership checks as well as looking up a VM handle by thread
147 * at times like assertions. */
148 RTNATIVETHREAD hEMT0;
149} GVMHANDLE;
150/** Pointer to a global VM handle. */
151typedef GVMHANDLE *PGVMHANDLE;
152
153/** Number of GVM handles (including the NIL handle). */
154#if HC_ARCH_BITS == 64
155# define GVMM_MAX_HANDLES 8192
156#else
157# define GVMM_MAX_HANDLES 128
158#endif
159
160/**
161 * Per host CPU GVMM data.
162 */
163typedef struct GVMMHOSTCPU
164{
165 /** Magic number (GVMMHOSTCPU_MAGIC). */
166 uint32_t volatile u32Magic;
167 /** The CPU ID. */
168 RTCPUID idCpu;
169 /** The CPU set index. */
170 uint32_t idxCpuSet;
171
172#ifdef GVMM_SCHED_WITH_PPT
173 /** Periodic preemption timer data. */
174 struct
175 {
176 /** The handle to the periodic preemption timer. */
177 PRTTIMER pTimer;
178 /** Spinlock protecting the data below. */
179 RTSPINLOCK hSpinlock;
180 /** The smalles Hz that we need to care about. (static) */
181 uint32_t uMinHz;
182 /** The number of ticks between each historization. */
183 uint32_t cTicksHistoriziationInterval;
184 /** The current historization tick (counting up to
185 * cTicksHistoriziationInterval and then resetting). */
186 uint32_t iTickHistorization;
187 /** The current timer interval. This is set to 0 when inactive. */
188 uint32_t cNsInterval;
189 /** The current timer frequency. This is set to 0 when inactive. */
190 uint32_t uTimerHz;
191 /** The current max frequency reported by the EMTs.
192 * This gets historicize and reset by the timer callback. This is
193 * read without holding the spinlock, so needs atomic updating. */
194 uint32_t volatile uDesiredHz;
195 /** Whether the timer was started or not. */
196 bool volatile fStarted;
197 /** Set if we're starting timer. */
198 bool volatile fStarting;
199 /** The index of the next history entry (mod it). */
200 uint32_t iHzHistory;
201 /** Historicized uDesiredHz values. The array wraps around, new entries
202 * are added at iHzHistory. This is updated approximately every
203 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
204 uint32_t aHzHistory[8];
205 /** Statistics counter for recording the number of interval changes. */
206 uint32_t cChanges;
207 /** Statistics counter for recording the number of timer starts. */
208 uint32_t cStarts;
209 } Ppt;
210#endif /* GVMM_SCHED_WITH_PPT */
211
212} GVMMHOSTCPU;
213/** Pointer to the per host CPU GVMM data. */
214typedef GVMMHOSTCPU *PGVMMHOSTCPU;
215/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
216#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
217/** The interval on history entry should cover (approximately) give in
218 * nanoseconds. */
219#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
220
221
222/**
223 * The GVMM instance data.
224 */
225typedef struct GVMM
226{
227 /** Eyecatcher / magic. */
228 uint32_t u32Magic;
229 /** The index of the head of the free handle chain. (0 is nil.) */
230 uint16_t volatile iFreeHead;
231 /** The index of the head of the active handle chain. (0 is nil.) */
232 uint16_t volatile iUsedHead;
233 /** The number of VMs. */
234 uint16_t volatile cVMs;
235 /** Alignment padding. */
236 uint16_t u16Reserved;
237 /** The number of EMTs. */
238 uint32_t volatile cEMTs;
239 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
240 uint32_t volatile cHaltedEMTs;
241 /** Mini lock for restricting early wake-ups to one thread. */
242 bool volatile fDoingEarlyWakeUps;
243 bool afPadding[3]; /**< explicit alignment padding. */
244 /** When the next halted or sleeping EMT will wake up.
245 * This is set to 0 when it needs recalculating and to UINT64_MAX when
246 * there are no halted or sleeping EMTs in the GVMM. */
247 uint64_t uNsNextEmtWakeup;
248 /** The lock used to serialize VM creation, destruction and associated events that
249 * isn't performance critical. Owners may acquire the list lock. */
250 RTCRITSECT CreateDestroyLock;
251 /** The lock used to serialize used list updates and accesses.
252 * This indirectly includes scheduling since the scheduler will have to walk the
253 * used list to examin running VMs. Owners may not acquire any other locks. */
254 RTCRITSECTRW UsedLock;
255 /** The handle array.
256 * The size of this array defines the maximum number of currently running VMs.
257 * The first entry is unused as it represents the NIL handle. */
258 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
259
260 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
261 * The number of EMTs that means we no longer consider ourselves alone on a
262 * CPU/Core.
263 */
264 uint32_t cEMTsMeansCompany;
265 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
266 * The minimum sleep time for when we're alone, in nano seconds.
267 */
268 uint32_t nsMinSleepAlone;
269 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
270 * The minimum sleep time for when we've got company, in nano seconds.
271 */
272 uint32_t nsMinSleepCompany;
273#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
274 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
275 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
276 * nano seconds.
277 */
278 uint32_t nsMinSleepWithHrTimer;
279#endif
280 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
281 * The limit for the first round of early wake-ups, given in nano seconds.
282 */
283 uint32_t nsEarlyWakeUp1;
284 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
285 * The limit for the second round of early wake-ups, given in nano seconds.
286 */
287 uint32_t nsEarlyWakeUp2;
288
289 /** Set if we're doing early wake-ups.
290 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
291 bool volatile fDoEarlyWakeUps;
292
293 /** The number of entries in the host CPU array (aHostCpus). */
294 uint32_t cHostCpus;
295 /** Per host CPU data (variable length). */
296 GVMMHOSTCPU aHostCpus[1];
297} GVMM;
298AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
299AssertCompileMemberAlignment(GVMM, UsedLock, 8);
300AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
301/** Pointer to the GVMM instance data. */
302typedef GVMM *PGVMM;
303
304/** The GVMM::u32Magic value (Charlie Haden). */
305#define GVMM_MAGIC UINT32_C(0x19370806)
306
307
308
309/*********************************************************************************************************************************
310* Global Variables *
311*********************************************************************************************************************************/
312/** Pointer to the GVMM instance data.
313 * (Just my general dislike for global variables.) */
314static PGVMM g_pGVMM = NULL;
315
316/** Macro for obtaining and validating the g_pGVMM pointer.
317 * On failure it will return from the invoking function with the specified return value.
318 *
319 * @param pGVMM The name of the pGVMM variable.
320 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
321 * status codes.
322 */
323#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
324 do { \
325 (pGVMM) = g_pGVMM;\
326 AssertPtrReturn((pGVMM), (rc)); \
327 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
328 } while (0)
329
330/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
331 * On failure it will return from the invoking function.
332 *
333 * @param pGVMM The name of the pGVMM variable.
334 */
335#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
336 do { \
337 (pGVMM) = g_pGVMM;\
338 AssertPtrReturnVoid((pGVMM)); \
339 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
340 } while (0)
341
342
343/*********************************************************************************************************************************
344* Internal Functions *
345*********************************************************************************************************************************/
346static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMTARGET enmTarget, VMCPUID cCpus, PSUPDRVSESSION pSession);
347static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
348static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
349static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
350
351#ifdef GVMM_SCHED_WITH_PPT
352static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
353#endif
354#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
355static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
356#endif
357
358
359/**
360 * Initializes the GVMM.
361 *
362 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
363 *
364 * @returns VBox status code.
365 */
366GVMMR0DECL(int) GVMMR0Init(void)
367{
368 LogFlow(("GVMMR0Init:\n"));
369
370 /*
371 * Allocate and initialize the instance data.
372 */
373 uint32_t cHostCpus = RTMpGetArraySize();
374 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
375
376 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
377 if (!pGVMM)
378 return VERR_NO_MEMORY;
379 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
380 "GVMM-CreateDestroyLock");
381 if (RT_SUCCESS(rc))
382 {
383 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
384 if (RT_SUCCESS(rc))
385 {
386 pGVMM->u32Magic = GVMM_MAGIC;
387 pGVMM->iUsedHead = 0;
388 pGVMM->iFreeHead = 1;
389
390 /* the nil handle */
391 pGVMM->aHandles[0].iSelf = 0;
392 pGVMM->aHandles[0].iNext = 0;
393
394 /* the tail */
395 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
396 pGVMM->aHandles[i].iSelf = i;
397 pGVMM->aHandles[i].iNext = 0; /* nil */
398
399 /* the rest */
400 while (i-- > 1)
401 {
402 pGVMM->aHandles[i].iSelf = i;
403 pGVMM->aHandles[i].iNext = i + 1;
404 }
405
406 /* The default configuration values. */
407 uint32_t cNsResolution = RTSemEventMultiGetResolution();
408 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
409 if (cNsResolution >= 5*RT_NS_100US)
410 {
411 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
412 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
413 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
414 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
415 }
416 else if (cNsResolution > RT_NS_100US)
417 {
418 pGVMM->nsMinSleepAlone = cNsResolution / 2;
419 pGVMM->nsMinSleepCompany = cNsResolution / 4;
420 pGVMM->nsEarlyWakeUp1 = 0;
421 pGVMM->nsEarlyWakeUp2 = 0;
422 }
423 else
424 {
425 pGVMM->nsMinSleepAlone = 2000;
426 pGVMM->nsMinSleepCompany = 2000;
427 pGVMM->nsEarlyWakeUp1 = 0;
428 pGVMM->nsEarlyWakeUp2 = 0;
429 }
430#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
431 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
432#endif
433 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
434
435 /* The host CPU data. */
436 pGVMM->cHostCpus = cHostCpus;
437 uint32_t iCpu = cHostCpus;
438 RTCPUSET PossibleSet;
439 RTMpGetSet(&PossibleSet);
440 while (iCpu-- > 0)
441 {
442 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
443#ifdef GVMM_SCHED_WITH_PPT
444 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
445 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
446 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
447 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
448 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
449 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
450 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
451 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
452 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
453 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
454 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
455 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
456#endif
457
458 if (RTCpuSetIsMember(&PossibleSet, iCpu))
459 {
460 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
461 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
462
463#ifdef GVMM_SCHED_WITH_PPT
464 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
465 50*1000*1000 /* whatever */,
466 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
467 gvmmR0SchedPeriodicPreemptionTimerCallback,
468 &pGVMM->aHostCpus[iCpu]);
469 if (RT_SUCCESS(rc))
470 {
471 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
472 if (RT_FAILURE(rc))
473 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
474 }
475 else
476 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
477 if (RT_FAILURE(rc))
478 {
479 while (iCpu < cHostCpus)
480 {
481 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
482 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
483 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
484 iCpu++;
485 }
486 break;
487 }
488#endif
489 }
490 else
491 {
492 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
493 pGVMM->aHostCpus[iCpu].u32Magic = 0;
494 }
495 }
496 if (RT_SUCCESS(rc))
497 {
498 g_pGVMM = pGVMM;
499 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
500 return VINF_SUCCESS;
501 }
502
503 /* bail out. */
504 RTCritSectRwDelete(&pGVMM->UsedLock);
505 }
506 else
507 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
508 RTCritSectDelete(&pGVMM->CreateDestroyLock);
509 }
510 else
511 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525GVMMR0DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
631 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 pGVMM->nsMinSleepWithHrTimer = u64Value;
635 else
636 rc = VERR_OUT_OF_RANGE;
637 }
638#endif
639 else if (!strcmp(pszName, "EarlyWakeUp1"))
640 {
641 if (u64Value <= RT_NS_100MS)
642 {
643 pGVMM->nsEarlyWakeUp1 = u64Value;
644 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
645 }
646 else
647 rc = VERR_OUT_OF_RANGE;
648 }
649 else if (!strcmp(pszName, "EarlyWakeUp2"))
650 {
651 if (u64Value <= RT_NS_100MS)
652 {
653 pGVMM->nsEarlyWakeUp2 = u64Value;
654 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
655 }
656 else
657 rc = VERR_OUT_OF_RANGE;
658 }
659 else
660 rc = VERR_CFGM_VALUE_NOT_FOUND;
661 return rc;
662}
663
664
665/**
666 * A quick hack for getting global config values.
667 *
668 * @returns VBox status code.
669 *
670 * @param pSession The session handle. Used for authentication.
671 * @param pszName The variable name.
672 * @param pu64Value Where to return the value.
673 */
674GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
675{
676 /*
677 * Validate input.
678 */
679 PGVMM pGVMM;
680 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
681 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
682 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
683 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
684
685 /*
686 * String switch time!
687 */
688 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
689 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
690 int rc = VINF_SUCCESS;
691 pszName += sizeof("/GVMM/") - 1;
692 if (!strcmp(pszName, "cEMTsMeansCompany"))
693 *pu64Value = pGVMM->cEMTsMeansCompany;
694 else if (!strcmp(pszName, "MinSleepAlone"))
695 *pu64Value = pGVMM->nsMinSleepAlone;
696 else if (!strcmp(pszName, "MinSleepCompany"))
697 *pu64Value = pGVMM->nsMinSleepCompany;
698#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
699 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
700 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
701#endif
702 else if (!strcmp(pszName, "EarlyWakeUp1"))
703 *pu64Value = pGVMM->nsEarlyWakeUp1;
704 else if (!strcmp(pszName, "EarlyWakeUp2"))
705 *pu64Value = pGVMM->nsEarlyWakeUp2;
706 else
707 rc = VERR_CFGM_VALUE_NOT_FOUND;
708 return rc;
709}
710
711
712/**
713 * Acquire the 'used' lock in shared mode.
714 *
715 * This prevents destruction of the VM while we're in ring-0.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
720 */
721#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Release the 'used' lock in when owning it in shared mode.
725 *
726 * @returns IPRT status code, see RTSemFastMutexRequest.
727 * @param a_pGVMM The GVMM instance data.
728 * @sa GVMMR0_USED_SHARED_LOCK
729 */
730#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
731
732/**
733 * Acquire the 'used' lock in exclusive mode.
734 *
735 * Only use this function when making changes to the used list.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRequest.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
742
743/**
744 * Release the 'used' lock when owning it in exclusive mode.
745 *
746 * @returns IPRT status code, see RTSemFastMutexRelease.
747 * @param a_pGVMM The GVMM instance data.
748 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
749 */
750#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
751
752
753/**
754 * Try acquire the 'create & destroy' lock.
755 *
756 * @returns IPRT status code, see RTSemFastMutexRequest.
757 * @param pGVMM The GVMM instance data.
758 */
759DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
760{
761 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
762 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
763 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
764 return rc;
765}
766
767
768/**
769 * Release the 'create & destroy' lock.
770 *
771 * @returns IPRT status code, see RTSemFastMutexRequest.
772 * @param pGVMM The GVMM instance data.
773 */
774DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
775{
776 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
777 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
778 AssertRC(rc);
779 return rc;
780}
781
782
783/**
784 * Request wrapper for the GVMMR0CreateVM API.
785 *
786 * @returns VBox status code.
787 * @param pReq The request buffer.
788 * @param pSession The session handle. The VM will be associated with this.
789 */
790GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
791{
792 /*
793 * Validate the request.
794 */
795 if (!RT_VALID_PTR(pReq))
796 return VERR_INVALID_POINTER;
797 if (pReq->Hdr.cbReq != sizeof(*pReq))
798 return VERR_INVALID_PARAMETER;
799 if (pReq->pSession != pSession)
800 return VERR_INVALID_POINTER;
801
802 /* Check that VBoxVMM and VMMR0 are likely to have the same idea about the structures. */
803 if (pReq->cbVM != sizeof(VM))
804 {
805 LogRel(("GVMMR0CreateVMReq: cbVM=%#x, expcted %#x\n", pReq->cbVM, sizeof(VM)));
806 return VINF_GVM_MISMATCH_VM_SIZE;
807 }
808 if (pReq->cbVCpu != sizeof(VMCPU))
809 {
810 LogRel(("GVMMR0CreateVMReq: cbVCpu=%#x, expcted %#x\n", pReq->cbVCpu, sizeof(VMCPU)));
811 return VINF_GVM_MISMATCH_VMCPU_SIZE;
812 }
813 if (pReq->uStructVersion != 1)
814 {
815 LogRel(("GVMMR0CreateVMReq: uStructVersion=%#x, expcted %#x\n", pReq->uStructVersion, 1));
816 return VINF_GVM_MISMATCH_VM_STRUCT_VER;
817 }
818 if (pReq->uSvnRevision != VMMGetSvnRev())
819 {
820 LogRel(("GVMMR0CreateVMReq: uSvnRevision=%u, expcted %u\n", pReq->uSvnRevision, VMMGetSvnRev()));
821 return VINF_GVM_MISMATCH_VMCPU_SIZE;
822 }
823
824 /*
825 * Execute it.
826 */
827 PGVM pGVM;
828 pReq->pVMR0 = NULL;
829 pReq->pVMR3 = NIL_RTR3PTR;
830 int rc = GVMMR0CreateVM(pSession, pReq->enmTarget, pReq->cCpus, &pGVM);
831 if (RT_SUCCESS(rc))
832 {
833 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
834 pReq->pVMR3 = pGVM->pVMR3;
835 }
836 return rc;
837}
838
839
840/**
841 * Allocates the VM structure and registers it with GVM.
842 *
843 * The caller will become the VM owner and there by the EMT.
844 *
845 * @returns VBox status code.
846 * @param pSession The support driver session.
847 * @param cCpus Number of virtual CPUs for the new VM.
848 * @param ppGVM Where to store the pointer to the VM structure.
849 *
850 * @thread EMT.
851 */
852GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, VMTARGET enmTarget, uint32_t cCpus, PGVM *ppGVM)
853{
854 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
855 PGVMM pGVMM;
856 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
857
858 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
859 *ppGVM = NULL;
860
861 if ( cCpus == 0
862 || cCpus > VMM_MAX_CPU_COUNT)
863 return VERR_INVALID_PARAMETER;
864 if ( enmTarget != VMTARGET_X86
865 && enmTarget != VMTARGET_ARMV8)
866 return VERR_INVALID_PARAMETER;
867
868 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
869 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
870 RTPROCESS ProcId = RTProcSelf();
871 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
872
873 /*
874 * The whole allocation process is protected by the lock.
875 */
876 int rc = gvmmR0CreateDestroyLock(pGVMM);
877 AssertRCReturn(rc, rc);
878
879 /*
880 * Only one VM per session.
881 */
882 if (SUPR0GetSessionVM(pSession) != NULL)
883 {
884 gvmmR0CreateDestroyUnlock(pGVMM);
885 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
886 return VERR_ALREADY_EXISTS;
887 }
888
889 /*
890 * Allocate a handle first so we don't waste resources unnecessarily.
891 */
892 uint16_t iHandle = pGVMM->iFreeHead;
893 if (iHandle)
894 {
895 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
896
897 /* consistency checks, a bit paranoid as always. */
898 if ( !pHandle->pGVM
899 && !pHandle->pvObj
900 && pHandle->iSelf == iHandle)
901 {
902 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
903 if (pHandle->pvObj)
904 {
905 /*
906 * Move the handle from the free to used list and perform permission checks.
907 */
908 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
909 AssertRC(rc);
910
911 pGVMM->iFreeHead = pHandle->iNext;
912 pHandle->iNext = pGVMM->iUsedHead;
913 pGVMM->iUsedHead = iHandle;
914 pGVMM->cVMs++;
915
916 pHandle->pGVM = NULL;
917 pHandle->pSession = pSession;
918 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
919 pHandle->ProcId = NIL_RTPROCESS;
920
921 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
922
923 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
924 if (RT_SUCCESS(rc))
925 {
926 /*
927 * Allocate memory for the VM structure (combined VM + GVM).
928 */
929 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
930 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
931 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
932 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
933 if (RT_SUCCESS(rc))
934 {
935 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
936 AssertPtr(pGVM);
937
938 /*
939 * Initialise the structure.
940 */
941 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
942 gvmmR0InitPerVMData(pGVM, iHandle, enmTarget, cCpus, pSession);
943 pGVM->gvmm.s.VMMemObj = hVMMemObj;
944#ifndef VBOX_WITH_MINIMAL_R0
945 rc = GMMR0InitPerVMData(pGVM);
946 int rc2 = PGMR0InitPerVMData(pGVM, hVMMemObj);
947#else
948 int rc2 = VINF_SUCCESS;
949#endif
950 int rc3 = VMMR0InitPerVMData(pGVM);
951#ifndef VBOX_WITH_MINIMAL_R0
952 CPUMR0InitPerVMData(pGVM);
953 DBGFR0InitPerVMData(pGVM);
954 PDMR0InitPerVMData(pGVM);
955 IOMR0InitPerVMData(pGVM);
956 TMR0InitPerVMData(pGVM);
957#endif
958 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
959 {
960 /*
961 * Allocate page array.
962 * This currently have to be made available to ring-3, but this is should change eventually.
963 */
964 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
965 if (RT_SUCCESS(rc))
966 {
967 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
968 for (uint32_t iPage = 0; iPage < cPages; iPage++)
969 {
970 paPages[iPage].uReserved = 0;
971 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
972 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
973 }
974
975 /*
976 * Map the page array, VM and VMCPU structures into ring-3.
977 */
978 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
979 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
980 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
981 0 /*offSub*/, sizeof(VM));
982 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
983 {
984 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
985 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
986 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
987 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
988 }
989 if (RT_SUCCESS(rc))
990 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
991 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
992 NIL_RTR0PROCESS);
993 if (RT_SUCCESS(rc))
994 {
995 /*
996 * Initialize all the VM pointers.
997 */
998 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
999 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
1000
1001 for (VMCPUID i = 0; i < cCpus; i++)
1002 {
1003 pGVM->aCpus[i].pVMR0 = pGVM;
1004 pGVM->aCpus[i].pVMR3 = pVMR3;
1005 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
1006 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
1007 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
1008 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
1009 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
1010 }
1011
1012 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1013 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
1014 ("%p\n", pGVM->paVMPagesR3));
1015
1016#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1017 /*
1018 * Create the high resolution wake-up timer for EMT 0, ignore failures.
1019 */
1020 if (RTTimerCanDoHighResolution())
1021 {
1022 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
1023 0 /*one-shot, no interval*/,
1024 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
1025 &pGVM->aCpus[0]);
1026 if (RT_FAILURE(rc4))
1027 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
1028 }
1029#endif
1030
1031 /*
1032 * Complete the handle - take the UsedLock sem just to be careful.
1033 */
1034 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1035 AssertRC(rc);
1036
1037 pHandle->pGVM = pGVM;
1038 pHandle->hEMT0 = hEMT0;
1039 pHandle->ProcId = ProcId;
1040 pGVM->pVMR3 = pVMR3;
1041 pGVM->pVMR3Unsafe = pVMR3;
1042 pGVM->aCpus[0].hEMT = hEMT0;
1043 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1044 pGVM->aCpus[0].cEmtHashCollisions = 0;
1045 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1046 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1047 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1048 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1049 pGVMM->cEMTs += cCpus;
1050
1051 /* Associate it with the session and create the context hook for EMT0. */
1052 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1053 if (RT_SUCCESS(rc))
1054 {
1055 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1056 if (RT_SUCCESS(rc))
1057 {
1058 /*
1059 * Done!
1060 */
1061 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1062
1063 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1064 gvmmR0CreateDestroyUnlock(pGVMM);
1065
1066#ifndef VBOX_WITH_MINIMAL_R0
1067 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1068#endif
1069
1070 *ppGVM = pGVM;
1071 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1072 return VINF_SUCCESS;
1073 }
1074
1075 SUPR0SetSessionVM(pSession, NULL, NULL);
1076 }
1077 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1078 }
1079
1080 /* Cleanup mappings. */
1081 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1082 {
1083 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1084 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1085 }
1086 for (VMCPUID i = 0; i < cCpus; i++)
1087 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1088 {
1089 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1090 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1091 }
1092 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1093 {
1094 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1095 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1096 }
1097 }
1098 }
1099 else
1100 {
1101 if (RT_SUCCESS_NP(rc))
1102 rc = rc2;
1103 if (RT_SUCCESS_NP(rc))
1104 rc = rc3;
1105 AssertStmt(RT_FAILURE_NP(rc), rc = VERR_IPE_UNEXPECTED_STATUS);
1106 }
1107 }
1108 }
1109 /* else: The user wasn't permitted to create this VM. */
1110
1111 /*
1112 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1113 * object reference here. A little extra mess because of non-recursive lock.
1114 */
1115 void *pvObj = pHandle->pvObj;
1116 pHandle->pvObj = NULL;
1117 gvmmR0CreateDestroyUnlock(pGVMM);
1118
1119 SUPR0ObjRelease(pvObj, pSession);
1120
1121 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1122 return rc;
1123 }
1124
1125 rc = VERR_NO_MEMORY;
1126 }
1127 else
1128 rc = VERR_GVMM_IPE_1;
1129 }
1130 else
1131 rc = VERR_GVM_TOO_MANY_VMS;
1132
1133 gvmmR0CreateDestroyUnlock(pGVMM);
1134 return rc;
1135}
1136
1137
1138/**
1139 * Initializes the per VM data belonging to GVMM.
1140 *
1141 * @param pGVM Pointer to the global VM structure.
1142 * @param hSelf The handle.
1143 * @param enmTarget The target platform architecture of the VM.
1144 * @param cCpus The CPU count.
1145 * @param pSession The session this VM is associated with.
1146 */
1147static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMTARGET enmTarget, VMCPUID cCpus, PSUPDRVSESSION pSession)
1148{
1149 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1150 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1151 AssertCompileMemberAlignment(VM, cpum, 64);
1152 AssertCompileMemberAlignment(VM, tm, 64);
1153
1154 /* GVM: */
1155 pGVM->u32Magic = GVM_MAGIC;
1156 pGVM->hSelf = hSelf;
1157 pGVM->cCpus = cCpus;
1158 pGVM->enmTarget = enmTarget;
1159 pGVM->pSession = pSession;
1160 pGVM->pSelf = pGVM;
1161
1162 /* VM: */
1163 pGVM->enmVMState = VMSTATE_CREATING;
1164 pGVM->hSelfUnsafe = hSelf;
1165 pGVM->pSessionUnsafe = pSession;
1166 pGVM->pVMR0ForCall = pGVM;
1167 pGVM->cCpusUnsafe = cCpus;
1168 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1169 pGVM->uStructVersion = 1;
1170 pGVM->cbSelf = sizeof(VM);
1171 pGVM->cbVCpu = sizeof(VMCPU);
1172 pGVM->enmTargetUnsafe = enmTarget;
1173
1174 /* GVMM: */
1175 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1176 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1177 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1178 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1179 pGVM->gvmm.s.fDoneVMMR0Init = false;
1180 pGVM->gvmm.s.fDoneVMMR0Term = false;
1181
1182 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1183 {
1184 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1185 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1186 }
1187 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1188
1189 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1190 {
1191 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1192 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1193 }
1194
1195 /*
1196 * Per virtual CPU.
1197 */
1198 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1199 {
1200 pGVM->aCpus[i].idCpu = i;
1201 pGVM->aCpus[i].idCpuUnsafe = i;
1202 pGVM->aCpus[i].enmTarget = enmTarget;
1203 pGVM->aCpus[i].enmTargetUnsafe = enmTarget;
1204 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1205 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1206 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1207 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1208 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1209 pGVM->aCpus[i].pGVM = pGVM;
1210 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1211 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1212 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1213 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1214 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1215 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1216 }
1217}
1218
1219
1220/**
1221 * Does the VM initialization.
1222 *
1223 * @returns VBox status code.
1224 * @param pGVM The global (ring-0) VM structure.
1225 */
1226GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1227{
1228 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1229
1230 int rc = VERR_INTERNAL_ERROR_3;
1231 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1232 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1233 {
1234 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1235 {
1236 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1237 if (RT_FAILURE(rc))
1238 {
1239 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1240 break;
1241 }
1242 }
1243 }
1244 else
1245 rc = VERR_WRONG_ORDER;
1246
1247 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1248 return rc;
1249}
1250
1251
1252/**
1253 * Indicates that we're done with the ring-0 initialization
1254 * of the VM.
1255 *
1256 * @param pGVM The global (ring-0) VM structure.
1257 * @thread EMT(0)
1258 */
1259GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1260{
1261 /* Set the indicator. */
1262 pGVM->gvmm.s.fDoneVMMR0Init = true;
1263}
1264
1265
1266/**
1267 * Indicates that we're doing the ring-0 termination of the VM.
1268 *
1269 * @returns true if termination hasn't been done already, false if it has.
1270 * @param pGVM Pointer to the global VM structure. Optional.
1271 * @thread EMT(0) or session cleanup thread.
1272 */
1273GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1274{
1275 /* Validate the VM structure, state and handle. */
1276 AssertPtrReturn(pGVM, false);
1277
1278 /* Set the indicator. */
1279 if (pGVM->gvmm.s.fDoneVMMR0Term)
1280 return false;
1281 pGVM->gvmm.s.fDoneVMMR0Term = true;
1282 return true;
1283}
1284
1285
1286/**
1287 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1288 *
1289 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1290 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1291 * would've been nice if the caller was actually the EMT thread or that we somehow
1292 * could've associated the calling thread with the VM up front.
1293 *
1294 * @returns VBox status code.
1295 * @param pGVM The global (ring-0) VM structure.
1296 *
1297 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1298 */
1299GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1300{
1301 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1302 PGVMM pGVMM;
1303 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1304
1305 /*
1306 * Validate the VM structure, state and caller.
1307 */
1308 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1309 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1310 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1311 VERR_WRONG_ORDER);
1312
1313 uint32_t hGVM = pGVM->hSelf;
1314 ASMCompilerBarrier();
1315 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1316 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1317
1318 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1319 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1320
1321 RTPROCESS ProcId = RTProcSelf();
1322 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1323 AssertReturn( ( pHandle->hEMT0 == hSelf
1324 && pHandle->ProcId == ProcId)
1325 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1326
1327 /*
1328 * Lookup the handle and destroy the object.
1329 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1330 * object, we take some precautions against racing callers just in case...
1331 */
1332 int rc = gvmmR0CreateDestroyLock(pGVMM);
1333 AssertRC(rc);
1334
1335 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1336 if ( pHandle->pGVM == pGVM
1337 && ( ( pHandle->hEMT0 == hSelf
1338 && pHandle->ProcId == ProcId)
1339 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1340 && RT_VALID_PTR(pHandle->pvObj)
1341 && RT_VALID_PTR(pHandle->pSession)
1342 && RT_VALID_PTR(pHandle->pGVM)
1343 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1344 {
1345 /* Check that other EMTs have deregistered. */
1346 uint32_t cNotDeregistered = 0;
1347 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1348 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1349 if (cNotDeregistered == 0)
1350 {
1351 /* Grab the object pointer. */
1352 void *pvObj = pHandle->pvObj;
1353 pHandle->pvObj = NULL;
1354 gvmmR0CreateDestroyUnlock(pGVMM);
1355
1356 SUPR0ObjRelease(pvObj, pHandle->pSession);
1357 }
1358 else
1359 {
1360 gvmmR0CreateDestroyUnlock(pGVMM);
1361 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1362 }
1363 }
1364 else
1365 {
1366 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1367 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1368 gvmmR0CreateDestroyUnlock(pGVMM);
1369 rc = VERR_GVMM_IPE_2;
1370 }
1371
1372 return rc;
1373}
1374
1375
1376/**
1377 * Performs VM cleanup task as part of object destruction.
1378 *
1379 * @param pGVM The GVM pointer.
1380 */
1381static void gvmmR0CleanupVM(PGVM pGVM)
1382{
1383 if ( pGVM->gvmm.s.fDoneVMMR0Init
1384 && !pGVM->gvmm.s.fDoneVMMR0Term)
1385 {
1386 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1387 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1388 {
1389 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1390 VMMR0TermVM(pGVM, NIL_VMCPUID);
1391 }
1392 else
1393 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1394 }
1395
1396#ifndef VBOX_WITH_MINIMAL_R0
1397 GMMR0CleanupVM(pGVM);
1398# ifdef VBOX_WITH_NEM_R0
1399 NEMR0CleanupVM(pGVM);
1400# endif
1401 PDMR0CleanupVM(pGVM);
1402 IOMR0CleanupVM(pGVM);
1403 DBGFR0CleanupVM(pGVM);
1404 PGMR0CleanupVM(pGVM);
1405 TMR0CleanupVM(pGVM);
1406#endif
1407 VMMR0CleanupVM(pGVM);
1408}
1409
1410
1411/**
1412 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1413 *
1414 * pvUser1 is the GVM instance pointer.
1415 * pvUser2 is the handle pointer.
1416 */
1417static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1418{
1419 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1420
1421 NOREF(pvObj);
1422
1423 /*
1424 * Some quick, paranoid, input validation.
1425 */
1426 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1427 AssertPtr(pHandle);
1428 PGVMM pGVMM = (PGVMM)pvUser1;
1429 Assert(pGVMM == g_pGVMM);
1430 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1431 if ( !iHandle
1432 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1433 || iHandle != pHandle->iSelf)
1434 {
1435 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1436 return;
1437 }
1438
1439 int rc = gvmmR0CreateDestroyLock(pGVMM);
1440 AssertRC(rc);
1441 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1442 AssertRC(rc);
1443
1444 /*
1445 * This is a tad slow but a doubly linked list is too much hassle.
1446 */
1447 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1448 {
1449 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1450 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1451 gvmmR0CreateDestroyUnlock(pGVMM);
1452 return;
1453 }
1454
1455 if (pGVMM->iUsedHead == iHandle)
1456 pGVMM->iUsedHead = pHandle->iNext;
1457 else
1458 {
1459 uint16_t iPrev = pGVMM->iUsedHead;
1460 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1461 while (iPrev)
1462 {
1463 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1464 {
1465 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1466 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1467 gvmmR0CreateDestroyUnlock(pGVMM);
1468 return;
1469 }
1470 if (RT_UNLIKELY(c-- <= 0))
1471 {
1472 iPrev = 0;
1473 break;
1474 }
1475
1476 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1477 break;
1478 iPrev = pGVMM->aHandles[iPrev].iNext;
1479 }
1480 if (!iPrev)
1481 {
1482 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1483 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1484 gvmmR0CreateDestroyUnlock(pGVMM);
1485 return;
1486 }
1487
1488 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1489 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1490 }
1491 pHandle->iNext = 0;
1492 pGVMM->cVMs--;
1493
1494 /*
1495 * Do the global cleanup round.
1496 */
1497 PGVM pGVM = pHandle->pGVM;
1498 if ( RT_VALID_PTR(pGVM)
1499 && pGVM->u32Magic == GVM_MAGIC)
1500 {
1501 pGVMM->cEMTs -= pGVM->cCpus;
1502
1503 if (pGVM->pSession)
1504 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1505
1506 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1507
1508 gvmmR0CleanupVM(pGVM);
1509
1510 /*
1511 * Do the GVMM cleanup - must be done last.
1512 */
1513 /* The VM and VM pages mappings/allocations. */
1514 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1515 {
1516 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1517 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1518 }
1519
1520 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1521 {
1522 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1523 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1524 }
1525
1526 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1527 {
1528 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1529 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1530 }
1531
1532 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1533 {
1534 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1535 {
1536 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1537 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1538 }
1539 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1540 {
1541 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1542 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1543 }
1544#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1545 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1546 {
1547 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1548 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1549 }
1550#endif
1551 }
1552
1553 /* the GVM structure itself. */
1554 pGVM->u32Magic |= UINT32_C(0x80000000);
1555 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1556 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1557 pGVM = NULL;
1558
1559 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1560 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1561 AssertRC(rc);
1562 }
1563 /* else: GVMMR0CreateVM cleanup. */
1564
1565 /*
1566 * Free the handle.
1567 */
1568 pHandle->iNext = pGVMM->iFreeHead;
1569 pGVMM->iFreeHead = iHandle;
1570 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1571 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1572 ASMAtomicWriteNullPtr(&pHandle->pSession);
1573 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1574 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1575
1576 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1577 gvmmR0CreateDestroyUnlock(pGVMM);
1578 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1579}
1580
1581
1582/**
1583 * Registers the calling thread as the EMT of a Virtual CPU.
1584 *
1585 * Note that VCPU 0 is automatically registered during VM creation.
1586 *
1587 * @returns VBox status code
1588 * @param pGVM The global (ring-0) VM structure.
1589 * @param idCpu VCPU id to register the current thread as.
1590 */
1591GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1592{
1593 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1594
1595 /*
1596 * Validate the VM structure, state and handle.
1597 */
1598 PGVMM pGVMM;
1599 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1600 if (RT_SUCCESS(rc))
1601 {
1602 if (idCpu < pGVM->cCpus)
1603 {
1604 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1605 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1606
1607 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1608
1609 /* Check that the EMT isn't already assigned to a thread. */
1610 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1611 {
1612 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1613
1614 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1615 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1616 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1617 if (RT_SUCCESS(rc))
1618 {
1619 /*
1620 * Do the assignment, then try setup the hook. Undo if that fails.
1621 */
1622 unsigned cCollisions = 0;
1623 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1624 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1625 {
1626 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1627 do
1628 {
1629 cCollisions++;
1630 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1631 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1632 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1633 }
1634 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1635 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1636
1637 pGVCpu->hNativeThreadR0 = hNativeSelf;
1638 pGVCpu->hEMT = hNativeSelf;
1639 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1640 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1641
1642 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1643 if (RT_SUCCESS(rc))
1644 {
1645#ifndef VBOX_WITH_MINIMAL_R0
1646 CPUMR0RegisterVCpuThread(pGVCpu);
1647#endif
1648
1649#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1650 /*
1651 * Create the high resolution wake-up timer, ignore failures.
1652 */
1653 if (RTTimerCanDoHighResolution())
1654 {
1655 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1656 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1657 if (RT_FAILURE(rc2))
1658 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1659 }
1660#endif
1661 }
1662 else
1663 {
1664 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1665 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1666 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1667 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1668 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1669 }
1670 }
1671 }
1672 else
1673 rc = VERR_ACCESS_DENIED;
1674
1675 gvmmR0CreateDestroyUnlock(pGVMM);
1676 }
1677 else
1678 rc = VERR_INVALID_CPU_ID;
1679 }
1680 return rc;
1681}
1682
1683
1684/**
1685 * Deregisters the calling thread as the EMT of a Virtual CPU.
1686 *
1687 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1688 *
1689 * @returns VBox status code
1690 * @param pGVM The global (ring-0) VM structure.
1691 * @param idCpu VCPU id to register the current thread as.
1692 */
1693GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1694{
1695 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1696
1697 /*
1698 * Validate the VM structure, state and handle.
1699 */
1700 PGVMM pGVMM;
1701 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1702 if (RT_SUCCESS(rc))
1703 {
1704 /*
1705 * Take the destruction lock and recheck the handle state to
1706 * prevent racing GVMMR0DestroyVM.
1707 */
1708 gvmmR0CreateDestroyLock(pGVMM);
1709
1710 uint32_t hSelf = pGVM->hSelf;
1711 ASMCompilerBarrier();
1712 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1713 && pGVMM->aHandles[hSelf].pvObj != NULL
1714 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1715 {
1716 /*
1717 * Do per-EMT cleanups.
1718 */
1719 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1720
1721 /*
1722 * Invalidate hEMT. We don't use NIL here as that would allow
1723 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1724 */
1725 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1726 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1727
1728 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1729 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1730 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1731 }
1732
1733 gvmmR0CreateDestroyUnlock(pGVMM);
1734 }
1735 return rc;
1736}
1737
1738
1739/**
1740 * Registers the caller as a given worker thread.
1741 *
1742 * This enables the thread to operate critical sections in ring-0.
1743 *
1744 * @returns VBox status code.
1745 * @param pGVM The global (ring-0) VM structure.
1746 * @param enmWorker The worker thread this is supposed to be.
1747 * @param hNativeSelfR3 The ring-3 native self of the caller.
1748 */
1749GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1750{
1751 /*
1752 * Validate input.
1753 */
1754 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1755 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1756 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1757 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1758 PGVMM pGVMM;
1759 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1760 AssertRCReturn(rc, rc);
1761 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1762
1763 /*
1764 * Grab the big lock and check the VM state again.
1765 */
1766 uint32_t const hSelf = pGVM->hSelf;
1767 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1768 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1769 && pGVMM->aHandles[hSelf].pvObj != NULL
1770 && pGVMM->aHandles[hSelf].pGVM == pGVM
1771 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1772 {
1773 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1774 {
1775 /*
1776 * Check that the thread isn't an EMT or serving in some other worker capacity.
1777 */
1778 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1779 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1780 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1781 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1782 rc = VERR_INVALID_PARAMETER);
1783 if (RT_SUCCESS(rc))
1784 {
1785 /*
1786 * Do the registration.
1787 */
1788 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1789 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1790 {
1791 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1792 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1793 rc = VINF_SUCCESS;
1794 }
1795 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1796 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1797 rc = VERR_ALREADY_EXISTS;
1798 else
1799 rc = VERR_RESOURCE_BUSY;
1800 }
1801 }
1802 else
1803 rc = VERR_VM_INVALID_VM_STATE;
1804 }
1805 else
1806 rc = VERR_INVALID_VM_HANDLE;
1807 gvmmR0CreateDestroyUnlock(pGVMM);
1808 return rc;
1809}
1810
1811
1812/**
1813 * Deregisters a workinger thread (caller).
1814 *
1815 * The worker thread cannot be re-created and re-registered, instead the given
1816 * @a enmWorker slot becomes invalid.
1817 *
1818 * @returns VBox status code.
1819 * @param pGVM The global (ring-0) VM structure.
1820 * @param enmWorker The worker thread this is supposed to be.
1821 */
1822GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1823{
1824 /*
1825 * Validate input.
1826 */
1827 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1828 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1829 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1830 PGVMM pGVMM;
1831 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1832 AssertRCReturn(rc, rc);
1833
1834 /*
1835 * Grab the big lock and check the VM state again.
1836 */
1837 uint32_t const hSelf = pGVM->hSelf;
1838 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1839 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1840 && pGVMM->aHandles[hSelf].pvObj != NULL
1841 && pGVMM->aHandles[hSelf].pGVM == pGVM
1842 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1843 {
1844 /*
1845 * Do the deregistration.
1846 * This will prevent any other threads register as the worker later.
1847 */
1848 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1849 {
1850 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1851 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1852 rc = VINF_SUCCESS;
1853 }
1854 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1855 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1856 rc = VINF_SUCCESS;
1857 else
1858 rc = VERR_NOT_OWNER;
1859 }
1860 else
1861 rc = VERR_INVALID_VM_HANDLE;
1862 gvmmR0CreateDestroyUnlock(pGVMM);
1863 return rc;
1864}
1865
1866
1867/**
1868 * Lookup a GVM structure by its handle.
1869 *
1870 * @returns The GVM pointer on success, NULL on failure.
1871 * @param hGVM The global VM handle. Asserts on bad handle.
1872 */
1873GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1874{
1875 PGVMM pGVMM;
1876 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1877
1878 /*
1879 * Validate.
1880 */
1881 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1882 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1883
1884 /*
1885 * Look it up.
1886 */
1887 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1888 AssertPtrReturn(pHandle->pvObj, NULL);
1889 PGVM pGVM = pHandle->pGVM;
1890 AssertPtrReturn(pGVM, NULL);
1891
1892 return pGVM;
1893}
1894
1895
1896/**
1897 * Check that the given GVM and VM structures match up.
1898 *
1899 * The calling thread must be in the same process as the VM. All current lookups
1900 * are by threads inside the same process, so this will not be an issue.
1901 *
1902 * @returns VBox status code.
1903 * @param pGVM The global (ring-0) VM structure.
1904 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1905 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1906 * shared mode when requested.
1907 *
1908 * Be very careful if not taking the lock as it's
1909 * possible that the VM will disappear then!
1910 *
1911 * @remark This will not assert on an invalid pGVM but try return silently.
1912 */
1913static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1914{
1915 /*
1916 * Check the pointers.
1917 */
1918 int rc;
1919 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1920 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1921 {
1922 /*
1923 * Get the pGVMM instance and check the VM handle.
1924 */
1925 PGVMM pGVMM;
1926 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1927
1928 uint16_t hGVM = pGVM->hSelf;
1929 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1930 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1931 {
1932 RTPROCESS const pidSelf = RTProcSelf();
1933 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1934 if (fTakeUsedLock)
1935 {
1936 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1937 AssertRCReturn(rc, rc);
1938 }
1939
1940 if (RT_LIKELY( pHandle->pGVM == pGVM
1941 && pHandle->ProcId == pidSelf
1942 && RT_VALID_PTR(pHandle->pvObj)))
1943 {
1944 /*
1945 * Some more VM data consistency checks.
1946 */
1947 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1948 && pGVM->hSelfUnsafe == hGVM
1949 && pGVM->pSelf == pGVM))
1950 {
1951 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1952 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1953 {
1954 *ppGVMM = pGVMM;
1955 return VINF_SUCCESS;
1956 }
1957 rc = VERR_INCONSISTENT_VM_HANDLE;
1958 }
1959 else
1960 rc = VERR_INCONSISTENT_VM_HANDLE;
1961 }
1962 else
1963 rc = VERR_INVALID_VM_HANDLE;
1964
1965 if (fTakeUsedLock)
1966 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1967 }
1968 else
1969 rc = VERR_INVALID_VM_HANDLE;
1970 }
1971 else
1972 rc = VERR_INVALID_POINTER;
1973 return rc;
1974}
1975
1976
1977/**
1978 * Validates a GVM/VM pair.
1979 *
1980 * @returns VBox status code.
1981 * @param pGVM The global (ring-0) VM structure.
1982 */
1983GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1984{
1985 PGVMM pGVMM;
1986 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1987}
1988
1989
1990/**
1991 * Check that the given GVM and VM structures match up.
1992 *
1993 * The calling thread must be in the same process as the VM. All current lookups
1994 * are by threads inside the same process, so this will not be an issue.
1995 *
1996 * @returns VBox status code.
1997 * @param pGVM The global (ring-0) VM structure.
1998 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1999 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2000 * @thread EMT
2001 *
2002 * @remarks This will assert in all failure paths.
2003 */
2004static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
2005{
2006 /*
2007 * Check the pointers.
2008 */
2009 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2010 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2011
2012 /*
2013 * Get the pGVMM instance and check the VM handle.
2014 */
2015 PGVMM pGVMM;
2016 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2017
2018 uint16_t hGVM = pGVM->hSelf;
2019 ASMCompilerBarrier();
2020 AssertReturn( hGVM != NIL_GVM_HANDLE
2021 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2022
2023 RTPROCESS const pidSelf = RTProcSelf();
2024 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2025 AssertReturn( pHandle->pGVM == pGVM
2026 && pHandle->ProcId == pidSelf
2027 && RT_VALID_PTR(pHandle->pvObj),
2028 VERR_INVALID_HANDLE);
2029
2030 /*
2031 * Check the EMT claim.
2032 */
2033 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2034 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2035 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2036
2037 /*
2038 * Some more VM data consistency checks.
2039 */
2040 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2041 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2042 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
2043 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2044
2045 *ppGVMM = pGVMM;
2046 return VINF_SUCCESS;
2047}
2048
2049
2050/**
2051 * Validates a GVM/EMT pair.
2052 *
2053 * @returns VBox status code.
2054 * @param pGVM The global (ring-0) VM structure.
2055 * @param idCpu The Virtual CPU ID of the calling EMT.
2056 * @thread EMT(idCpu)
2057 */
2058GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2059{
2060 PGVMM pGVMM;
2061 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2062}
2063
2064
2065/**
2066 * Looks up the VM belonging to the specified EMT thread.
2067 *
2068 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2069 * unnecessary kernel panics when the EMT thread hits an assertion. The
2070 * call may or not be an EMT thread.
2071 *
2072 * @returns Pointer to the VM on success, NULL on failure.
2073 * @param hEMT The native thread handle of the EMT.
2074 * NIL_RTNATIVETHREAD means the current thread
2075 */
2076GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2077{
2078 /*
2079 * No Assertions here as we're usually called in a AssertMsgN or
2080 * RTAssert* context.
2081 */
2082 PGVMM pGVMM = g_pGVMM;
2083 if ( !RT_VALID_PTR(pGVMM)
2084 || pGVMM->u32Magic != GVMM_MAGIC)
2085 return NULL;
2086
2087 if (hEMT == NIL_RTNATIVETHREAD)
2088 hEMT = RTThreadNativeSelf();
2089 RTPROCESS ProcId = RTProcSelf();
2090
2091 /*
2092 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2093 */
2094/** @todo introduce some pid hash table here, please. */
2095 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2096 {
2097 if ( pGVMM->aHandles[i].iSelf == i
2098 && pGVMM->aHandles[i].ProcId == ProcId
2099 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2100 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2101 {
2102 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2103 return pGVMM->aHandles[i].pGVM;
2104
2105 /* This is fearly safe with the current process per VM approach. */
2106 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2107 VMCPUID const cCpus = pGVM->cCpus;
2108 ASMCompilerBarrier();
2109 if ( cCpus < 1
2110 || cCpus > VMM_MAX_CPU_COUNT)
2111 continue;
2112 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2113 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2114 return pGVMM->aHandles[i].pGVM;
2115 }
2116 }
2117 return NULL;
2118}
2119
2120
2121/**
2122 * Looks up the GVMCPU belonging to the specified EMT thread.
2123 *
2124 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2125 * unnecessary kernel panics when the EMT thread hits an assertion. The
2126 * call may or not be an EMT thread.
2127 *
2128 * @returns Pointer to the VM on success, NULL on failure.
2129 * @param hEMT The native thread handle of the EMT.
2130 * NIL_RTNATIVETHREAD means the current thread
2131 */
2132GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2133{
2134 /*
2135 * No Assertions here as we're usually called in a AssertMsgN,
2136 * RTAssert*, Log and LogRel contexts.
2137 */
2138 PGVMM pGVMM = g_pGVMM;
2139 if ( !RT_VALID_PTR(pGVMM)
2140 || pGVMM->u32Magic != GVMM_MAGIC)
2141 return NULL;
2142
2143 if (hEMT == NIL_RTNATIVETHREAD)
2144 hEMT = RTThreadNativeSelf();
2145 RTPROCESS ProcId = RTProcSelf();
2146
2147 /*
2148 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2149 */
2150/** @todo introduce some pid hash table here, please. */
2151 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2152 {
2153 if ( pGVMM->aHandles[i].iSelf == i
2154 && pGVMM->aHandles[i].ProcId == ProcId
2155 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2156 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2157 {
2158 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2159 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2160 return &pGVM->aCpus[0];
2161
2162 /* This is fearly safe with the current process per VM approach. */
2163 VMCPUID const cCpus = pGVM->cCpus;
2164 ASMCompilerBarrier();
2165 ASMCompilerBarrier();
2166 if ( cCpus < 1
2167 || cCpus > VMM_MAX_CPU_COUNT)
2168 continue;
2169 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2170 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2171 return &pGVM->aCpus[idCpu];
2172 }
2173 }
2174 return NULL;
2175}
2176
2177
2178/**
2179 * Get the GVMCPU structure for the given EMT.
2180 *
2181 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2182 * @param pGVM The global (ring-0) VM structure.
2183 * @param hEMT The native thread handle of the EMT.
2184 * NIL_RTNATIVETHREAD means the current thread
2185 */
2186GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2187{
2188 /*
2189 * Validate & adjust input.
2190 */
2191 AssertPtr(pGVM);
2192 Assert(pGVM->u32Magic == GVM_MAGIC);
2193 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2194 {
2195 hEMT = RTThreadNativeSelf();
2196 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2197 }
2198
2199 /*
2200 * Find the matching hash table entry.
2201 * See similar code in GVMMR0GetRing3ThreadForSelf.
2202 */
2203 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2204 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2205 { /* likely */ }
2206 else
2207 {
2208#ifdef VBOX_STRICT
2209 unsigned cCollisions = 0;
2210#endif
2211 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2212 for (;;)
2213 {
2214 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2215 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2216 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2217 break;
2218 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2219 {
2220#ifdef VBOX_STRICT
2221 uint32_t idxCpu = pGVM->cCpus;
2222 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2223 while (idxCpu-- > 0)
2224 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2225#endif
2226 return NULL;
2227 }
2228 }
2229 }
2230
2231 /*
2232 * Validate the VCpu number and translate it into a pointer.
2233 */
2234 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2235 AssertReturn(idCpu < pGVM->cCpus, NULL);
2236 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2237 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2238 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2239 return pGVCpu;
2240}
2241
2242
2243/**
2244 * Get the native ring-3 thread handle for the caller.
2245 *
2246 * This works for EMTs and registered workers.
2247 *
2248 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2249 * @param pGVM The global (ring-0) VM structure.
2250 */
2251GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2252{
2253 /*
2254 * Validate input.
2255 */
2256 AssertPtr(pGVM);
2257 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2258 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2259 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2260
2261 /*
2262 * Find the matching hash table entry.
2263 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2264 */
2265 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2266 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2267 { /* likely */ }
2268 else
2269 {
2270#ifdef VBOX_STRICT
2271 unsigned cCollisions = 0;
2272#endif
2273 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2274 for (;;)
2275 {
2276 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2277 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2278 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2279 break;
2280 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2281 {
2282#ifdef VBOX_STRICT
2283 uint32_t idxCpu = pGVM->cCpus;
2284 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2285 while (idxCpu-- > 0)
2286 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2287#endif
2288
2289 /*
2290 * Not an EMT, so see if it's a worker thread.
2291 */
2292 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2293 while (--idx > GVMMWORKERTHREAD_INVALID)
2294 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2295 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2296
2297 return NIL_RTNATIVETHREAD;
2298 }
2299 }
2300 }
2301
2302 /*
2303 * Validate the VCpu number and translate it into a pointer.
2304 */
2305 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2306 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2307 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2308 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2309 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2310 return pGVCpu->hNativeThread;
2311}
2312
2313
2314/**
2315 * Converts a pointer with the GVM structure to a host physical address.
2316 *
2317 * @returns Host physical address.
2318 * @param pGVM The global (ring-0) VM structure.
2319 * @param pv The address to convert.
2320 * @thread EMT
2321 */
2322GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2323{
2324 AssertPtr(pGVM);
2325 Assert(pGVM->u32Magic == GVM_MAGIC);
2326 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2327 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2328 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2329}
2330
2331
2332/**
2333 * This is will wake up expired and soon-to-be expired VMs.
2334 *
2335 * @returns Number of VMs that has been woken up.
2336 * @param pGVMM Pointer to the GVMM instance data.
2337 * @param u64Now The current time.
2338 */
2339static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2340{
2341 /*
2342 * Skip this if we've got disabled because of high resolution wakeups or by
2343 * the user.
2344 */
2345 if (!pGVMM->fDoEarlyWakeUps)
2346 return 0;
2347
2348/** @todo Rewrite this algorithm. See performance defect XYZ. */
2349
2350 /*
2351 * A cheap optimization to stop wasting so much time here on big setups.
2352 */
2353 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2354 if ( pGVMM->cHaltedEMTs == 0
2355 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2356 return 0;
2357
2358 /*
2359 * Only one thread doing this at a time.
2360 */
2361 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2362 return 0;
2363
2364 /*
2365 * The first pass will wake up VMs which have actually expired
2366 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2367 */
2368 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2369 uint64_t u64Min = UINT64_MAX;
2370 unsigned cWoken = 0;
2371 unsigned cHalted = 0;
2372 unsigned cTodo2nd = 0;
2373 unsigned cTodo3rd = 0;
2374 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2375 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2376 i = pGVMM->aHandles[i].iNext)
2377 {
2378 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2379 if ( RT_VALID_PTR(pCurGVM)
2380 && pCurGVM->u32Magic == GVM_MAGIC)
2381 {
2382 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2383 {
2384 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2385 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2386 if (u64)
2387 {
2388 if (u64 <= u64Now)
2389 {
2390 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2391 {
2392 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2393 AssertRC(rc);
2394 cWoken++;
2395 }
2396 }
2397 else
2398 {
2399 cHalted++;
2400 if (u64 <= uNsEarlyWakeUp1)
2401 cTodo2nd++;
2402 else if (u64 <= uNsEarlyWakeUp2)
2403 cTodo3rd++;
2404 else if (u64 < u64Min)
2405 u64 = u64Min;
2406 }
2407 }
2408 }
2409 }
2410 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2411 }
2412
2413 if (cTodo2nd)
2414 {
2415 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2416 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2417 i = pGVMM->aHandles[i].iNext)
2418 {
2419 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2420 if ( RT_VALID_PTR(pCurGVM)
2421 && pCurGVM->u32Magic == GVM_MAGIC)
2422 {
2423 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2424 {
2425 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2426 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2427 if ( u64
2428 && u64 <= uNsEarlyWakeUp1)
2429 {
2430 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2431 {
2432 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2433 AssertRC(rc);
2434 cWoken++;
2435 }
2436 }
2437 }
2438 }
2439 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2440 }
2441 }
2442
2443 if (cTodo3rd)
2444 {
2445 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2446 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2447 i = pGVMM->aHandles[i].iNext)
2448 {
2449 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2450 if ( RT_VALID_PTR(pCurGVM)
2451 && pCurGVM->u32Magic == GVM_MAGIC)
2452 {
2453 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2454 {
2455 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2456 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2457 if ( u64
2458 && u64 <= uNsEarlyWakeUp2)
2459 {
2460 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2461 {
2462 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2463 AssertRC(rc);
2464 cWoken++;
2465 }
2466 }
2467 }
2468 }
2469 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2470 }
2471 }
2472
2473 /*
2474 * Set the minimum value.
2475 */
2476 pGVMM->uNsNextEmtWakeup = u64Min;
2477
2478 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2479 return cWoken;
2480}
2481
2482
2483#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2484/**
2485 * Timer callback for the EMT high-resolution wake-up timer.
2486 *
2487 * @param pTimer The timer handle.
2488 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2489 * @param iTick The current tick.
2490 */
2491static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2492{
2493 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2494 NOREF(pTimer); NOREF(iTick);
2495
2496 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2497 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2498 {
2499 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2500 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2501 }
2502 else
2503 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2504
2505 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2506 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2507}
2508#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2509
2510
2511/**
2512 * Halt the EMT thread.
2513 *
2514 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2515 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2516 * @param pGVM The global (ring-0) VM structure.
2517 * @param pGVCpu The global (ring-0) CPU structure of the calling
2518 * EMT.
2519 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2520 * @thread EMT(pGVCpu).
2521 */
2522GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2523{
2524 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2525 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2526 PGVMM pGVMM;
2527 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2528
2529 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2530 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2531
2532 /*
2533 * If we're doing early wake-ups, we must take the UsedList lock before we
2534 * start querying the current time.
2535 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2536 */
2537 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2538 if (fDoEarlyWakeUps)
2539 {
2540 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2541 }
2542
2543 /* GIP hack: We might are frequently sleeping for short intervals where the
2544 difference between GIP and system time matters on systems with high resolution
2545 system time. So, convert the input from GIP to System time in that case. */
2546 Assert(ASMIntAreEnabled());
2547 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2548 const uint64_t u64NowGip = RTTimeNanoTS();
2549
2550 if (fDoEarlyWakeUps)
2551 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2552
2553 /*
2554 * Go to sleep if we must...
2555 * Cap the sleep time to 1 second to be on the safe side.
2556 */
2557 int rc;
2558 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2559 if ( u64NowGip < u64ExpireGipTime
2560 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2561 ? pGVMM->nsMinSleepCompany
2562 : pGVMM->nsMinSleepAlone)
2563#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2564 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2565#endif
2566 )
2567 )
2568 {
2569 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2570 if (cNsInterval > RT_NS_1SEC)
2571 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2572 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2573 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2574 if (fDoEarlyWakeUps)
2575 {
2576 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2577 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2578 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2579 }
2580
2581#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2582 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2583 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2584 {
2585 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2586 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2587 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2588 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2589 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2590 }
2591#endif
2592
2593 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2594 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2595 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2596
2597 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2598 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2599
2600#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2601 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2602 { /* likely */ }
2603 else
2604 {
2605 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2606 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2607 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2608 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2609 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2610 }
2611#endif
2612
2613 /* Reset the semaphore to try prevent a few false wake-ups. */
2614 if (rc == VINF_SUCCESS)
2615 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2616 else if (rc == VERR_TIMEOUT)
2617 {
2618 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2619 rc = VINF_SUCCESS;
2620 }
2621 }
2622 else
2623 {
2624 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2625 if (fDoEarlyWakeUps)
2626 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2627 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2628 rc = VINF_SUCCESS;
2629 }
2630
2631 return rc;
2632}
2633
2634
2635/**
2636 * Halt the EMT thread.
2637 *
2638 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2639 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2640 * @param pGVM The global (ring-0) VM structure.
2641 * @param idCpu The Virtual CPU ID of the calling EMT.
2642 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2643 * @thread EMT(idCpu).
2644 */
2645GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2646{
2647 PGVMM pGVMM;
2648 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2649 if (RT_SUCCESS(rc))
2650 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2651 return rc;
2652}
2653
2654
2655
2656/**
2657 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2658 * the a sleeping EMT.
2659 *
2660 * @retval VINF_SUCCESS if successfully woken up.
2661 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2662 *
2663 * @param pGVM The global (ring-0) VM structure.
2664 * @param pGVCpu The global (ring-0) VCPU structure.
2665 */
2666DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2667{
2668 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2669
2670 /*
2671 * Signal the semaphore regardless of whether it's current blocked on it.
2672 *
2673 * The reason for this is that there is absolutely no way we can be 100%
2674 * certain that it isn't *about* go to go to sleep on it and just got
2675 * delayed a bit en route. So, we will always signal the semaphore when
2676 * the it is flagged as halted in the VMM.
2677 */
2678/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2679 int rc;
2680 if (pGVCpu->gvmm.s.u64HaltExpire)
2681 {
2682 rc = VINF_SUCCESS;
2683 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2684 }
2685 else
2686 {
2687 rc = VINF_GVM_NOT_BLOCKED;
2688 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2689 }
2690
2691 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2692 AssertRC(rc2);
2693
2694 return rc;
2695}
2696
2697
2698/**
2699 * Wakes up the halted EMT thread so it can service a pending request.
2700 *
2701 * @returns VBox status code.
2702 * @retval VINF_SUCCESS if successfully woken up.
2703 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2704 *
2705 * @param pGVM The global (ring-0) VM structure.
2706 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2707 * @param fTakeUsedLock Take the used lock or not
2708 * @thread Any but EMT(idCpu).
2709 */
2710GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2711{
2712 /*
2713 * Validate input and take the UsedLock.
2714 */
2715 PGVMM pGVMM;
2716 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2717 if (RT_SUCCESS(rc))
2718 {
2719 if (idCpu < pGVM->cCpus)
2720 {
2721 /*
2722 * Do the actual job.
2723 */
2724 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2725
2726 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2727 {
2728 /*
2729 * While we're here, do a round of scheduling.
2730 */
2731 Assert(ASMIntAreEnabled());
2732 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2733 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2734 }
2735 }
2736 else
2737 rc = VERR_INVALID_CPU_ID;
2738
2739 if (fTakeUsedLock)
2740 {
2741 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2742 AssertRC(rc2);
2743 }
2744 }
2745
2746 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2747 return rc;
2748}
2749
2750
2751/**
2752 * Wakes up the halted EMT thread so it can service a pending request.
2753 *
2754 * @returns VBox status code.
2755 * @retval VINF_SUCCESS if successfully woken up.
2756 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2757 *
2758 * @param pGVM The global (ring-0) VM structure.
2759 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2760 * @thread Any but EMT(idCpu).
2761 */
2762GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2763{
2764 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2765}
2766
2767
2768/**
2769 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2770 * parameter and no used locking.
2771 *
2772 * @returns VBox status code.
2773 * @retval VINF_SUCCESS if successfully woken up.
2774 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2775 *
2776 * @param pGVM The global (ring-0) VM structure.
2777 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2778 * @thread Any but EMT(idCpu).
2779 * @deprecated Don't use in new code if possible! Use the GVM variant.
2780 */
2781GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2782{
2783 PGVMM pGVMM;
2784 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2785 if (RT_SUCCESS(rc))
2786 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2787 return rc;
2788}
2789
2790
2791/**
2792 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2793 * the Virtual CPU if it's still busy executing guest code.
2794 *
2795 * @returns VBox status code.
2796 * @retval VINF_SUCCESS if poked successfully.
2797 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2798 *
2799 * @param pGVM The global (ring-0) VM structure.
2800 * @param pVCpu The cross context virtual CPU structure.
2801 */
2802DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2803{
2804 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2805
2806 RTCPUID idHostCpu = pVCpu->idHostCpu;
2807 if ( idHostCpu == NIL_RTCPUID
2808 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2809 {
2810 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2811 return VINF_GVM_NOT_BUSY_IN_GC;
2812 }
2813
2814 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2815 RTMpPokeCpu(idHostCpu);
2816 return VINF_SUCCESS;
2817}
2818
2819
2820/**
2821 * Pokes an EMT if it's still busy running guest code.
2822 *
2823 * @returns VBox status code.
2824 * @retval VINF_SUCCESS if poked successfully.
2825 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2826 *
2827 * @param pGVM The global (ring-0) VM structure.
2828 * @param idCpu The ID of the virtual CPU to poke.
2829 * @param fTakeUsedLock Take the used lock or not
2830 */
2831GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2832{
2833 /*
2834 * Validate input and take the UsedLock.
2835 */
2836 PGVMM pGVMM;
2837 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2838 if (RT_SUCCESS(rc))
2839 {
2840 if (idCpu < pGVM->cCpus)
2841 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2842 else
2843 rc = VERR_INVALID_CPU_ID;
2844
2845 if (fTakeUsedLock)
2846 {
2847 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2848 AssertRC(rc2);
2849 }
2850 }
2851
2852 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2853 return rc;
2854}
2855
2856
2857/**
2858 * Pokes an EMT if it's still busy running guest code.
2859 *
2860 * @returns VBox status code.
2861 * @retval VINF_SUCCESS if poked successfully.
2862 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2863 *
2864 * @param pGVM The global (ring-0) VM structure.
2865 * @param idCpu The ID of the virtual CPU to poke.
2866 */
2867GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2868{
2869 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2870}
2871
2872
2873/**
2874 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2875 * used locking.
2876 *
2877 * @returns VBox status code.
2878 * @retval VINF_SUCCESS if poked successfully.
2879 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2880 *
2881 * @param pGVM The global (ring-0) VM structure.
2882 * @param idCpu The ID of the virtual CPU to poke.
2883 *
2884 * @deprecated Don't use in new code if possible! Use the GVM variant.
2885 */
2886GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2887{
2888 PGVMM pGVMM;
2889 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2890 if (RT_SUCCESS(rc))
2891 {
2892 if (idCpu < pGVM->cCpus)
2893 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2894 else
2895 rc = VERR_INVALID_CPU_ID;
2896 }
2897 return rc;
2898}
2899
2900
2901/**
2902 * Wakes up a set of halted EMT threads so they can service pending request.
2903 *
2904 * @returns VBox status code, no informational stuff.
2905 *
2906 * @param pGVM The global (ring-0) VM structure.
2907 * @param pSleepSet The set of sleepers to wake up.
2908 * @param pPokeSet The set of CPUs to poke.
2909 */
2910GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2911{
2912 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2913 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2914 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2915
2916 /*
2917 * Validate input and take the UsedLock.
2918 */
2919 PGVMM pGVMM;
2920 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2921 if (RT_SUCCESS(rc))
2922 {
2923 rc = VINF_SUCCESS;
2924 VMCPUID idCpu = pGVM->cCpus;
2925 while (idCpu-- > 0)
2926 {
2927 /* Don't try poke or wake up ourselves. */
2928 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2929 continue;
2930
2931 /* just ignore errors for now. */
2932 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2933 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2934 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2935 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2936 }
2937
2938 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2939 AssertRC(rc2);
2940 }
2941
2942 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2943 return rc;
2944}
2945
2946
2947/**
2948 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2949 *
2950 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2951 * @param pGVM The global (ring-0) VM structure.
2952 * @param pReq Pointer to the request packet.
2953 */
2954GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2955{
2956 /*
2957 * Validate input and pass it on.
2958 */
2959 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2960 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2961
2962 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2963}
2964
2965
2966
2967/**
2968 * Poll the schedule to see if someone else should get a chance to run.
2969 *
2970 * This is a bit hackish and will not work too well if the machine is
2971 * under heavy load from non-VM processes.
2972 *
2973 * @returns VINF_SUCCESS if not yielded.
2974 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2975 * @param pGVM The global (ring-0) VM structure.
2976 * @param idCpu The Virtual CPU ID of the calling EMT.
2977 * @param fYield Whether to yield or not.
2978 * This is for when we're spinning in the halt loop.
2979 * @thread EMT(idCpu).
2980 */
2981GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2982{
2983 /*
2984 * Validate input.
2985 */
2986 PGVMM pGVMM;
2987 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2988 if (RT_SUCCESS(rc))
2989 {
2990 /*
2991 * We currently only implement helping doing wakeups (fYield = false), so don't
2992 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2993 */
2994 if (!fYield && pGVMM->fDoEarlyWakeUps)
2995 {
2996 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2997 pGVM->gvmm.s.StatsSched.cPollCalls++;
2998
2999 Assert(ASMIntAreEnabled());
3000 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
3001
3002 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
3003
3004 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3005 }
3006 /*
3007 * Not quite sure what we could do here...
3008 */
3009 else if (fYield)
3010 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
3011 else
3012 rc = VINF_SUCCESS;
3013 }
3014
3015 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
3016 return rc;
3017}
3018
3019
3020#ifdef GVMM_SCHED_WITH_PPT
3021/**
3022 * Timer callback for the periodic preemption timer.
3023 *
3024 * @param pTimer The timer handle.
3025 * @param pvUser Pointer to the per cpu structure.
3026 * @param iTick The current tick.
3027 */
3028static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3029{
3030 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
3031 NOREF(pTimer); NOREF(iTick);
3032
3033 /*
3034 * Termination check
3035 */
3036 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
3037 return;
3038
3039 /*
3040 * Do the house keeping.
3041 */
3042 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3043
3044 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3045 {
3046 /*
3047 * Historicize the max frequency.
3048 */
3049 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3050 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3051 pCpu->Ppt.iTickHistorization = 0;
3052 pCpu->Ppt.uDesiredHz = 0;
3053
3054 /*
3055 * Check if the current timer frequency.
3056 */
3057 uint32_t uHistMaxHz = 0;
3058 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3059 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3060 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3061 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3062 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3063 else if (uHistMaxHz)
3064 {
3065 /*
3066 * Reprogram it.
3067 */
3068 pCpu->Ppt.cChanges++;
3069 pCpu->Ppt.iTickHistorization = 0;
3070 pCpu->Ppt.uTimerHz = uHistMaxHz;
3071 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3072 pCpu->Ppt.cNsInterval = cNsInterval;
3073 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3074 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3075 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3076 / cNsInterval;
3077 else
3078 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3079 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3080
3081 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3082 RTTimerChangeInterval(pTimer, cNsInterval);
3083 }
3084 else
3085 {
3086 /*
3087 * Stop it.
3088 */
3089 pCpu->Ppt.fStarted = false;
3090 pCpu->Ppt.uTimerHz = 0;
3091 pCpu->Ppt.cNsInterval = 0;
3092 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3093
3094 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3095 RTTimerStop(pTimer);
3096 }
3097 }
3098 else
3099 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3100}
3101#endif /* GVMM_SCHED_WITH_PPT */
3102
3103
3104/**
3105 * Updates the periodic preemption timer for the calling CPU.
3106 *
3107 * The caller must have disabled preemption!
3108 * The caller must check that the host can do high resolution timers.
3109 *
3110 * @param pGVM The global (ring-0) VM structure.
3111 * @param idHostCpu The current host CPU id.
3112 * @param uHz The desired frequency.
3113 */
3114GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3115{
3116 NOREF(pGVM);
3117#ifdef GVMM_SCHED_WITH_PPT
3118 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3119 Assert(RTTimerCanDoHighResolution());
3120
3121 /*
3122 * Resolve the per CPU data.
3123 */
3124 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3125 PGVMM pGVMM = g_pGVMM;
3126 if ( !RT_VALID_PTR(pGVMM)
3127 || pGVMM->u32Magic != GVMM_MAGIC)
3128 return;
3129 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3130 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3131 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3132 && pCpu->idCpu == idHostCpu,
3133 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3134
3135 /*
3136 * Check whether we need to do anything about the timer.
3137 * We have to be a little bit careful since we might be race the timer
3138 * callback here.
3139 */
3140 if (uHz > 16384)
3141 uHz = 16384; /** @todo add a query method for this! */
3142 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3143 && uHz >= pCpu->Ppt.uMinHz
3144 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3145 {
3146 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3147
3148 pCpu->Ppt.uDesiredHz = uHz;
3149 uint32_t cNsInterval = 0;
3150 if (!pCpu->Ppt.fStarted)
3151 {
3152 pCpu->Ppt.cStarts++;
3153 pCpu->Ppt.fStarted = true;
3154 pCpu->Ppt.fStarting = true;
3155 pCpu->Ppt.iTickHistorization = 0;
3156 pCpu->Ppt.uTimerHz = uHz;
3157 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3158 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3159 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3160 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3161 / cNsInterval;
3162 else
3163 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3164 }
3165
3166 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3167
3168 if (cNsInterval)
3169 {
3170 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3171 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3172 AssertRC(rc);
3173
3174 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3175 if (RT_FAILURE(rc))
3176 pCpu->Ppt.fStarted = false;
3177 pCpu->Ppt.fStarting = false;
3178 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3179 }
3180 }
3181#else /* !GVMM_SCHED_WITH_PPT */
3182 NOREF(idHostCpu); NOREF(uHz);
3183#endif /* !GVMM_SCHED_WITH_PPT */
3184}
3185
3186
3187/**
3188 * Calls @a pfnCallback for each VM in the system.
3189 *
3190 * This will enumerate the VMs while holding the global VM used list lock in
3191 * shared mode. So, only suitable for simple work. If more expensive work
3192 * needs doing, a different approach must be taken as using this API would
3193 * otherwise block VM creation and destruction.
3194 *
3195 * @returns VBox status code.
3196 * @param pfnCallback The callback function.
3197 * @param pvUser User argument to the callback.
3198 */
3199GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3200{
3201 PGVMM pGVMM;
3202 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3203
3204 int rc = VINF_SUCCESS;
3205 GVMMR0_USED_SHARED_LOCK(pGVMM);
3206 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3207 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3208 i = pGVMM->aHandles[i].iNext, cLoops++)
3209 {
3210 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3211 if ( RT_VALID_PTR(pGVM)
3212 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3213 && pGVM->u32Magic == GVM_MAGIC)
3214 {
3215 rc = pfnCallback(pGVM, pvUser);
3216 if (rc != VINF_SUCCESS)
3217 break;
3218 }
3219
3220 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3221 }
3222 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3223 return rc;
3224}
3225
3226
3227/**
3228 * Retrieves the GVMM statistics visible to the caller.
3229 *
3230 * @returns VBox status code.
3231 *
3232 * @param pStats Where to put the statistics.
3233 * @param pSession The current session.
3234 * @param pGVM The GVM to obtain statistics for. Optional.
3235 */
3236GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3237{
3238 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3239
3240 /*
3241 * Validate input.
3242 */
3243 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3244 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3245 pStats->cVMs = 0; /* (crash before taking the sem...) */
3246
3247 /*
3248 * Take the lock and get the VM statistics.
3249 */
3250 PGVMM pGVMM;
3251 if (pGVM)
3252 {
3253 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3254 if (RT_FAILURE(rc))
3255 return rc;
3256 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3257
3258 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3259 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3260 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3261 while (iCpu-- > 0)
3262 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3263 }
3264 else
3265 {
3266 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3267 RT_ZERO(pStats->SchedVM);
3268 RT_ZERO(pStats->aVCpus);
3269
3270 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3271 AssertRCReturn(rc, rc);
3272 }
3273
3274 /*
3275 * Enumerate the VMs and add the ones visible to the statistics.
3276 */
3277 pStats->cVMs = 0;
3278 pStats->cEMTs = 0;
3279 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3280
3281 for (unsigned i = pGVMM->iUsedHead;
3282 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3283 i = pGVMM->aHandles[i].iNext)
3284 {
3285 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3286 void *pvObj = pGVMM->aHandles[i].pvObj;
3287 if ( RT_VALID_PTR(pvObj)
3288 && RT_VALID_PTR(pOtherGVM)
3289 && pOtherGVM->u32Magic == GVM_MAGIC
3290 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3291 {
3292 pStats->cVMs++;
3293 pStats->cEMTs += pOtherGVM->cCpus;
3294
3295 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3296 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3297 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3298 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3299 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3300
3301 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3302 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3303 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3304
3305 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3306 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3307
3308 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3309 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3310 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3311 }
3312 }
3313
3314 /*
3315 * Copy out the per host CPU statistics.
3316 */
3317 uint32_t iDstCpu = 0;
3318 uint32_t cSrcCpus = pGVMM->cHostCpus;
3319 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3320 {
3321 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3322 {
3323 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3324 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3325#ifdef GVMM_SCHED_WITH_PPT
3326 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3327 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3328 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3329 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3330#else
3331 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3332 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3333 pStats->aHostCpus[iDstCpu].cChanges = 0;
3334 pStats->aHostCpus[iDstCpu].cStarts = 0;
3335#endif
3336 iDstCpu++;
3337 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3338 break;
3339 }
3340 }
3341 pStats->cHostCpus = iDstCpu;
3342
3343 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3344
3345 return VINF_SUCCESS;
3346}
3347
3348
3349/**
3350 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3351 *
3352 * @returns see GVMMR0QueryStatistics.
3353 * @param pGVM The global (ring-0) VM structure. Optional.
3354 * @param pReq Pointer to the request packet.
3355 * @param pSession The current session.
3356 */
3357GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3358{
3359 /*
3360 * Validate input and pass it on.
3361 */
3362 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3363 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3364 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3365
3366 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3367}
3368
3369
3370/**
3371 * Resets the specified GVMM statistics.
3372 *
3373 * @returns VBox status code.
3374 *
3375 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3376 * @param pSession The current session.
3377 * @param pGVM The GVM to reset statistics for. Optional.
3378 */
3379GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3380{
3381 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3382
3383 /*
3384 * Validate input.
3385 */
3386 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3387 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3388
3389 /*
3390 * Take the lock and get the VM statistics.
3391 */
3392 PGVMM pGVMM;
3393 if (pGVM)
3394 {
3395 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3396 if (RT_FAILURE(rc))
3397 return rc;
3398# define MAYBE_RESET_FIELD(field) \
3399 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3400 MAYBE_RESET_FIELD(cHaltCalls);
3401 MAYBE_RESET_FIELD(cHaltBlocking);
3402 MAYBE_RESET_FIELD(cHaltTimeouts);
3403 MAYBE_RESET_FIELD(cHaltNotBlocking);
3404 MAYBE_RESET_FIELD(cHaltWakeUps);
3405 MAYBE_RESET_FIELD(cWakeUpCalls);
3406 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3407 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3408 MAYBE_RESET_FIELD(cPokeCalls);
3409 MAYBE_RESET_FIELD(cPokeNotBusy);
3410 MAYBE_RESET_FIELD(cPollCalls);
3411 MAYBE_RESET_FIELD(cPollHalts);
3412 MAYBE_RESET_FIELD(cPollWakeUps);
3413# undef MAYBE_RESET_FIELD
3414 }
3415 else
3416 {
3417 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3418
3419 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3420 AssertRCReturn(rc, rc);
3421 }
3422
3423 /*
3424 * Enumerate the VMs and add the ones visible to the statistics.
3425 */
3426 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3427 {
3428 for (unsigned i = pGVMM->iUsedHead;
3429 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3430 i = pGVMM->aHandles[i].iNext)
3431 {
3432 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3433 void *pvObj = pGVMM->aHandles[i].pvObj;
3434 if ( RT_VALID_PTR(pvObj)
3435 && RT_VALID_PTR(pOtherGVM)
3436 && pOtherGVM->u32Magic == GVM_MAGIC
3437 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3438 {
3439# define MAYBE_RESET_FIELD(field) \
3440 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3441 MAYBE_RESET_FIELD(cHaltCalls);
3442 MAYBE_RESET_FIELD(cHaltBlocking);
3443 MAYBE_RESET_FIELD(cHaltTimeouts);
3444 MAYBE_RESET_FIELD(cHaltNotBlocking);
3445 MAYBE_RESET_FIELD(cHaltWakeUps);
3446 MAYBE_RESET_FIELD(cWakeUpCalls);
3447 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3448 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3449 MAYBE_RESET_FIELD(cPokeCalls);
3450 MAYBE_RESET_FIELD(cPokeNotBusy);
3451 MAYBE_RESET_FIELD(cPollCalls);
3452 MAYBE_RESET_FIELD(cPollHalts);
3453 MAYBE_RESET_FIELD(cPollWakeUps);
3454# undef MAYBE_RESET_FIELD
3455 }
3456 }
3457 }
3458
3459 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3460
3461 return VINF_SUCCESS;
3462}
3463
3464
3465/**
3466 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3467 *
3468 * @returns see GVMMR0ResetStatistics.
3469 * @param pGVM The global (ring-0) VM structure. Optional.
3470 * @param pReq Pointer to the request packet.
3471 * @param pSession The current session.
3472 */
3473GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3474{
3475 /*
3476 * Validate input and pass it on.
3477 */
3478 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3479 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3480 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3481
3482 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3483}
3484
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette