VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 54325

Last change on this file since 54325 was 54325, checked in by vboxsync, 10 years ago

HostDrivers/Support: nits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 330.5 KB
Line 
1/* $Id: SUPDrv.c 54325 2015-02-20 10:54:23Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63#include <VBox/vmm/hm_svm.h>
64#include <VBox/vmm/hm_vmx.h>
65
66#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
67# include "dtrace/SUPDrv.h"
68#else
69# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
70# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
71# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
72# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
73#endif
74
75/*
76 * Logging assignments:
77 * Log - useful stuff, like failures.
78 * LogFlow - program flow, except the really noisy bits.
79 * Log2 - Cleanup.
80 * Log3 - Loader flow noise.
81 * Log4 - Call VMMR0 flow noise.
82 * Log5 - Native yet-to-be-defined noise.
83 * Log6 - Native ioctl flow noise.
84 *
85 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
86 * instantiation in log-vbox.c(pp).
87 */
88
89
90/*******************************************************************************
91* Defined Constants And Macros *
92*******************************************************************************/
93/** The frequency by which we recalculate the u32UpdateHz and
94 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
95 *
96 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
97 */
98#define GIP_UPDATEHZ_RECALC_FREQ 0x800
99
100/** A reserved TSC value used for synchronization as well as measurement of
101 * TSC deltas. */
102#define GIP_TSC_DELTA_RSVD UINT64_MAX
103/** The number of TSC delta measurement loops in total (includes primer and
104 * read-time loops). */
105#define GIP_TSC_DELTA_LOOPS 96
106/** The number of cache primer loops. */
107#define GIP_TSC_DELTA_PRIMER_LOOPS 4
108/** The number of loops until we keep computing the minumum read time. */
109#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
110/** Stop measurement of TSC delta. */
111#define GIP_TSC_DELTA_SYNC_STOP 0
112/** Start measurement of TSC delta. */
113#define GIP_TSC_DELTA_SYNC_START 1
114/** Worker thread is ready for reading the TSC. */
115#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
116/** Worker thread is done updating TSC delta info. */
117#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
118/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
119 * with a timeout. */
120#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
121/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
122 * master with a timeout. */
123#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
124/** The TSC-refinement interval in seconds. */
125#define GIP_TSC_REFINE_INTERVAL 5
126/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
127#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
128/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
129#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
130/** The TSC delta value for the initial GIP master - 0 in regular builds.
131 * To test the delta code this can be set to a non-zero value. */
132#if 1
133# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
134#else
135# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
136#endif
137
138AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
139AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
140
141/** @def VBOX_SVN_REV
142 * The makefile should define this if it can. */
143#ifndef VBOX_SVN_REV
144# define VBOX_SVN_REV 0
145#endif
146
147#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
148# define DO_NOT_START_GIP
149#endif
150
151/*******************************************************************************
152* Internal Functions *
153*******************************************************************************/
154static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
155static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
156static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
157static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
158static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
159static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
160static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
161static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt);
162static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
163static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
164static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
165static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
166static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
167static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
168DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
169DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
170static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
171static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
172static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
173static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq);
174static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq);
175static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
176static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
177static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
178static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
179static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
180static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
181 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
182static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
183static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
184static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
185static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
186 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
187static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
188static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
189static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
190static int supdrvIOCtl_ResumeSuspendedKbds(void);
191
192
193/*******************************************************************************
194* Global Variables *
195*******************************************************************************/
196DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
197
198
199/**
200 * Array of the R0 SUP API.
201 *
202 * While making changes to these exports, make sure to update the IOC
203 * minor version (SUPDRV_IOC_VERSION).
204 */
205static SUPFUNC g_aFunctions[] =
206{
207/* SED: START */
208 /* name function */
209 /* Entries with absolute addresses determined at runtime, fixup
210 code makes ugly ASSUMPTIONS about the order here: */
211 { "SUPR0AbsIs64bit", (void *)0 },
212 { "SUPR0Abs64bitKernelCS", (void *)0 },
213 { "SUPR0Abs64bitKernelSS", (void *)0 },
214 { "SUPR0Abs64bitKernelDS", (void *)0 },
215 { "SUPR0AbsKernelCS", (void *)0 },
216 { "SUPR0AbsKernelSS", (void *)0 },
217 { "SUPR0AbsKernelDS", (void *)0 },
218 { "SUPR0AbsKernelES", (void *)0 },
219 { "SUPR0AbsKernelFS", (void *)0 },
220 { "SUPR0AbsKernelGS", (void *)0 },
221 /* Normal function pointers: */
222 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
223 { "SUPGetGIP", (void *)SUPGetGIP },
224 { "SUPReadTscWithDelta", (void *)SUPReadTscWithDelta },
225 { "SUPGetTscDeltaSlow", (void *)SUPGetTscDeltaSlow },
226 { "SUPGetCpuHzFromGipForAsyncMode", (void *)SUPGetCpuHzFromGipForAsyncMode },
227 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
228 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
229 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
230 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
231 { "SUPR0ContFree", (void *)SUPR0ContFree },
232 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
233 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
234 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
235 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
236 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
237 { "SUPR0GetSvmUsability", (void *)SUPR0GetSvmUsability },
238 { "SUPR0GetVmxUsability", (void *)SUPR0GetVmxUsability },
239 { "SUPR0LockMem", (void *)SUPR0LockMem },
240 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
241 { "SUPR0LowFree", (void *)SUPR0LowFree },
242 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
243 { "SUPR0MemFree", (void *)SUPR0MemFree },
244 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
245 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
246 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
247 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
248 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
249 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
250 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
251 { "SUPR0PageFree", (void *)SUPR0PageFree },
252 { "SUPR0Printf", (void *)SUPR0Printf },
253 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
254 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
255 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
256 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
257 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
258 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
259 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
260 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
261 { "SUPSemEventClose", (void *)SUPSemEventClose },
262 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
263 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
264 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
265 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
266 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
267 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
268 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
269 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
270 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
271 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
272 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
273 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
274 { "SUPSemEventWait", (void *)SUPSemEventWait },
275 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
276 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
277 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
278
279 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
280 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
281 { "RTAssertMsg1", (void *)RTAssertMsg1 },
282 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
283 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
284 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
285 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
286 { "RTCrc32", (void *)RTCrc32 },
287 { "RTCrc32Finish", (void *)RTCrc32Finish },
288 { "RTCrc32Process", (void *)RTCrc32Process },
289 { "RTCrc32Start", (void *)RTCrc32Start },
290 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
291 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
292 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
293 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
294 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
295 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
296 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
297 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
298 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
299 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
300 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
301 { "RTLogPrintfV", (void *)RTLogPrintfV },
302 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
303 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
304 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
305 { "RTMemAllocTag", (void *)RTMemAllocTag },
306 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
307 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
308 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
309 { "RTMemDupExTag", (void *)RTMemDupExTag },
310 { "RTMemDupTag", (void *)RTMemDupTag },
311 { "RTMemFree", (void *)RTMemFree },
312 { "RTMemFreeEx", (void *)RTMemFreeEx },
313 { "RTMemReallocTag", (void *)RTMemReallocTag },
314 { "RTMpCpuId", (void *)RTMpCpuId },
315 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
316 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
317 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
318 { "RTMpGetCount", (void *)RTMpGetCount },
319 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
320 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
321 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
322 { "RTMpGetSet", (void *)RTMpGetSet },
323 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
324 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
325 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
326 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
327 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
328 { "RTMpOnAll", (void *)RTMpOnAll },
329 { "RTMpOnOthers", (void *)RTMpOnOthers },
330 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
331 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
332 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
333 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
334 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
335 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
336 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
337 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
338 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
339 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
340 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
341 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
342 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
343 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
344 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
345 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
346 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
347 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
348 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
349 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
350 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
351 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
352 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
353 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
354 { "RTProcSelf", (void *)RTProcSelf },
355 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
356 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
357 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
358 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
359 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
360 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
361 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
362 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
363 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
364 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
365 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
366 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
367 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
368 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
369 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
370 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
371 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
372 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
373 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
374 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
375 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
376 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
377 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
378 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
379 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
380 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
381 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
382 { "RTSemEventCreate", (void *)RTSemEventCreate },
383 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
384 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
385 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
386 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
387 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
388 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
389 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
390 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
391 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
392 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
393 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
394 { "RTSemEventSignal", (void *)RTSemEventSignal },
395 { "RTSemEventWait", (void *)RTSemEventWait },
396 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
397 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
398 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
399 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
400 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
401 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
402 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
403 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
404 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
405 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
406 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
407 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
408 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
409 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
410 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
411 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
412 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
413 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
414 { "RTStrCopy", (void *)RTStrCopy },
415 { "RTStrDupTag", (void *)RTStrDupTag },
416 { "RTStrFormat", (void *)RTStrFormat },
417 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
418 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
419 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
420 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
421 { "RTStrFormatV", (void *)RTStrFormatV },
422 { "RTStrFree", (void *)RTStrFree },
423 { "RTStrNCmp", (void *)RTStrNCmp },
424 { "RTStrPrintf", (void *)RTStrPrintf },
425 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
426 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
427 { "RTStrPrintfV", (void *)RTStrPrintfV },
428 { "RTThreadCreate", (void *)RTThreadCreate },
429 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
430 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
431 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
432 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
433 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
434 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
435 { "RTThreadGetName", (void *)RTThreadGetName },
436 { "RTThreadGetNative", (void *)RTThreadGetNative },
437 { "RTThreadGetType", (void *)RTThreadGetType },
438 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
439 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
440 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
441 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
442 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
443 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
444 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
445 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
446 { "RTThreadSelf", (void *)RTThreadSelf },
447 { "RTThreadSelfName", (void *)RTThreadSelfName },
448 { "RTThreadSleep", (void *)RTThreadSleep },
449 { "RTThreadUserReset", (void *)RTThreadUserReset },
450 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
451 { "RTThreadUserWait", (void *)RTThreadUserWait },
452 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
453 { "RTThreadWait", (void *)RTThreadWait },
454 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
455 { "RTThreadYield", (void *)RTThreadYield },
456 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
457 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
458 { "RTTimeNow", (void *)RTTimeNow },
459 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
460 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
461 { "RTTimerCreate", (void *)RTTimerCreate },
462 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
463 { "RTTimerDestroy", (void *)RTTimerDestroy },
464 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
465 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
466 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
467 { "RTTimerStart", (void *)RTTimerStart },
468 { "RTTimerStop", (void *)RTTimerStop },
469 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
470 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
471 { "RTUuidCompare", (void *)RTUuidCompare },
472 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
473 { "RTUuidFromStr", (void *)RTUuidFromStr },
474/* SED: END */
475};
476
477#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
478/**
479 * Drag in the rest of IRPT since we share it with the
480 * rest of the kernel modules on darwin.
481 */
482PFNRT g_apfnVBoxDrvIPRTDeps[] =
483{
484 /* VBoxNetAdp */
485 (PFNRT)RTRandBytes,
486 /* VBoxUSB */
487 (PFNRT)RTPathStripFilename,
488 NULL
489};
490#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
491
492
493/**
494 * Initializes the device extentsion structure.
495 *
496 * @returns IPRT status code.
497 * @param pDevExt The device extension to initialize.
498 * @param cbSession The size of the session structure. The size of
499 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
500 * defined because we're skipping the OS specific members
501 * then.
502 */
503int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
504{
505 int rc;
506
507#ifdef SUPDRV_WITH_RELEASE_LOGGER
508 /*
509 * Create the release log.
510 */
511 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
512 PRTLOGGER pRelLogger;
513 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
514 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
515 if (RT_SUCCESS(rc))
516 RTLogRelSetDefaultInstance(pRelLogger);
517 /** @todo Add native hook for getting logger config parameters and setting
518 * them. On linux we should use the module parameter stuff... */
519#endif
520
521 /*
522 * Initialize it.
523 */
524 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
525 pDevExt->Spinlock = NIL_RTSPINLOCK;
526 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
527 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
528 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
529 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
530 if (RT_SUCCESS(rc))
531 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
532 if (RT_SUCCESS(rc))
533 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
534
535 if (RT_SUCCESS(rc))
536#ifdef SUPDRV_USE_MUTEX_FOR_LDR
537 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
538#else
539 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
540#endif
541 if (RT_SUCCESS(rc))
542 {
543 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
544 if (RT_SUCCESS(rc))
545 {
546#ifdef SUPDRV_USE_MUTEX_FOR_LDR
547 rc = RTSemMutexCreate(&pDevExt->mtxGip);
548#else
549 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
550#endif
551 if (RT_SUCCESS(rc))
552 {
553 rc = supdrvGipCreate(pDevExt);
554 if (RT_SUCCESS(rc))
555 {
556 rc = supdrvTracerInit(pDevExt);
557 if (RT_SUCCESS(rc))
558 {
559 pDevExt->pLdrInitImage = NULL;
560 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
561 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
562 pDevExt->cbSession = (uint32_t)cbSession;
563
564 /*
565 * Fixup the absolute symbols.
566 *
567 * Because of the table indexing assumptions we'll have a little #ifdef orgy
568 * here rather than distributing this to OS specific files. At least for now.
569 */
570#ifdef RT_OS_DARWIN
571# if ARCH_BITS == 32
572 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
573 {
574 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
575 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
576 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
577 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
578 }
579 else
580 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
581 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
582 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
583 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
584 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
585 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
586 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
587# else /* 64-bit darwin: */
588 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
589 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
590 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
591 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
592 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
593 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
594 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
595 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
596 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
597 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
598
599# endif
600#else /* !RT_OS_DARWIN */
601# if ARCH_BITS == 64
602 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
603 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
604 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
605 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
606# else
607 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
608# endif
609 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
610 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
611 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
612 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
613 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
614 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
615#endif /* !RT_OS_DARWIN */
616 return VINF_SUCCESS;
617 }
618
619 supdrvGipDestroy(pDevExt);
620 }
621
622#ifdef SUPDRV_USE_MUTEX_FOR_GIP
623 RTSemMutexDestroy(pDevExt->mtxGip);
624 pDevExt->mtxGip = NIL_RTSEMMUTEX;
625#else
626 RTSemFastMutexDestroy(pDevExt->mtxGip);
627 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
628#endif
629 }
630 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
631 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
632 }
633#ifdef SUPDRV_USE_MUTEX_FOR_LDR
634 RTSemMutexDestroy(pDevExt->mtxLdr);
635 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
636#else
637 RTSemFastMutexDestroy(pDevExt->mtxLdr);
638 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
639#endif
640 }
641
642 RTSpinlockDestroy(pDevExt->Spinlock);
643 pDevExt->Spinlock = NIL_RTSPINLOCK;
644 RTSpinlockDestroy(pDevExt->hGipSpinlock);
645 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
646 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
647 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
648
649#ifdef SUPDRV_WITH_RELEASE_LOGGER
650 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
651 RTLogDestroy(RTLogSetDefaultInstance(NULL));
652#endif
653
654 return rc;
655}
656
657
658/**
659 * Delete the device extension (e.g. cleanup members).
660 *
661 * @param pDevExt The device extension to delete.
662 */
663void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
664{
665 PSUPDRVOBJ pObj;
666 PSUPDRVUSAGE pUsage;
667
668 /*
669 * Kill mutexes and spinlocks.
670 */
671#ifdef SUPDRV_USE_MUTEX_FOR_GIP
672 RTSemMutexDestroy(pDevExt->mtxGip);
673 pDevExt->mtxGip = NIL_RTSEMMUTEX;
674#else
675 RTSemFastMutexDestroy(pDevExt->mtxGip);
676 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
677#endif
678#ifdef SUPDRV_USE_MUTEX_FOR_LDR
679 RTSemMutexDestroy(pDevExt->mtxLdr);
680 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
681#else
682 RTSemFastMutexDestroy(pDevExt->mtxLdr);
683 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
684#endif
685 RTSpinlockDestroy(pDevExt->Spinlock);
686 pDevExt->Spinlock = NIL_RTSPINLOCK;
687 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
688 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
689 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
690 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
691
692 /*
693 * Free lists.
694 */
695 /* objects. */
696 pObj = pDevExt->pObjs;
697 Assert(!pObj); /* (can trigger on forced unloads) */
698 pDevExt->pObjs = NULL;
699 while (pObj)
700 {
701 void *pvFree = pObj;
702 pObj = pObj->pNext;
703 RTMemFree(pvFree);
704 }
705
706 /* usage records. */
707 pUsage = pDevExt->pUsageFree;
708 pDevExt->pUsageFree = NULL;
709 while (pUsage)
710 {
711 void *pvFree = pUsage;
712 pUsage = pUsage->pNext;
713 RTMemFree(pvFree);
714 }
715
716 /* kill the GIP. */
717 supdrvGipDestroy(pDevExt);
718 RTSpinlockDestroy(pDevExt->hGipSpinlock);
719 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
720
721 supdrvTracerTerm(pDevExt);
722
723#ifdef SUPDRV_WITH_RELEASE_LOGGER
724 /* destroy the loggers. */
725 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
726 RTLogDestroy(RTLogSetDefaultInstance(NULL));
727#endif
728}
729
730
731/**
732 * Create session.
733 *
734 * @returns IPRT status code.
735 * @param pDevExt Device extension.
736 * @param fUser Flag indicating whether this is a user or kernel
737 * session.
738 * @param fUnrestricted Unrestricted access (system) or restricted access
739 * (user)?
740 * @param ppSession Where to store the pointer to the session data.
741 */
742int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
743{
744 int rc;
745 PSUPDRVSESSION pSession;
746
747 if (!SUP_IS_DEVEXT_VALID(pDevExt))
748 return VERR_INVALID_PARAMETER;
749
750 /*
751 * Allocate memory for the session data.
752 */
753 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
754 if (pSession)
755 {
756 /* Initialize session data. */
757 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
758 if (!rc)
759 {
760 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
761 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
762 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
763 if (RT_SUCCESS(rc))
764 {
765 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
766 pSession->pDevExt = pDevExt;
767 pSession->u32Cookie = BIRD_INV;
768 pSession->fUnrestricted = fUnrestricted;
769 /*pSession->fInHashTable = false; */
770 pSession->cRefs = 1;
771 /*pSession->pCommonNextHash = NULL;
772 pSession->ppOsSessionPtr = NULL; */
773 if (fUser)
774 {
775 pSession->Process = RTProcSelf();
776 pSession->R0Process = RTR0ProcHandleSelf();
777 }
778 else
779 {
780 pSession->Process = NIL_RTPROCESS;
781 pSession->R0Process = NIL_RTR0PROCESS;
782 }
783 /*pSession->pLdrUsage = NULL;
784 pSession->pVM = NULL;
785 pSession->pUsage = NULL;
786 pSession->pGip = NULL;
787 pSession->fGipReferenced = false;
788 pSession->Bundle.cUsed = 0; */
789 pSession->Uid = NIL_RTUID;
790 pSession->Gid = NIL_RTGID;
791 /*pSession->uTracerData = 0;*/
792 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
793 RTListInit(&pSession->TpProviders);
794 /*pSession->cTpProviders = 0;*/
795 /*pSession->cTpProbesFiring = 0;*/
796 RTListInit(&pSession->TpUmods);
797 /*RT_ZERO(pSession->apTpLookupTable);*/
798
799 VBOXDRV_SESSION_CREATE(pSession, fUser);
800 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
801 return VINF_SUCCESS;
802 }
803
804 RTSpinlockDestroy(pSession->Spinlock);
805 }
806 RTMemFree(pSession);
807 *ppSession = NULL;
808 Log(("Failed to create spinlock, rc=%d!\n", rc));
809 }
810 else
811 rc = VERR_NO_MEMORY;
812
813 return rc;
814}
815
816
817/**
818 * Cleans up the session in the context of the process to which it belongs, the
819 * caller will free the session and the session spinlock.
820 *
821 * This should normally occur when the session is closed or as the process
822 * exits. Careful reference counting in the OS specfic code makes sure that
823 * there cannot be any races between process/handle cleanup callbacks and
824 * threads doing I/O control calls.
825 *
826 * @param pDevExt The device extension.
827 * @param pSession Session data.
828 */
829static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
830{
831 int rc;
832 PSUPDRVBUNDLE pBundle;
833 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
834
835 Assert(!pSession->fInHashTable);
836 Assert(!pSession->ppOsSessionPtr);
837 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
838 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
839
840 /*
841 * Remove logger instances related to this session.
842 */
843 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
844
845 /*
846 * Destroy the handle table.
847 */
848 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
849 AssertRC(rc);
850 pSession->hHandleTable = NIL_RTHANDLETABLE;
851
852 /*
853 * Release object references made in this session.
854 * In theory there should be noone racing us in this session.
855 */
856 Log2(("release objects - start\n"));
857 if (pSession->pUsage)
858 {
859 PSUPDRVUSAGE pUsage;
860 RTSpinlockAcquire(pDevExt->Spinlock);
861
862 while ((pUsage = pSession->pUsage) != NULL)
863 {
864 PSUPDRVOBJ pObj = pUsage->pObj;
865 pSession->pUsage = pUsage->pNext;
866
867 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
868 if (pUsage->cUsage < pObj->cUsage)
869 {
870 pObj->cUsage -= pUsage->cUsage;
871 RTSpinlockRelease(pDevExt->Spinlock);
872 }
873 else
874 {
875 /* Destroy the object and free the record. */
876 if (pDevExt->pObjs == pObj)
877 pDevExt->pObjs = pObj->pNext;
878 else
879 {
880 PSUPDRVOBJ pObjPrev;
881 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
882 if (pObjPrev->pNext == pObj)
883 {
884 pObjPrev->pNext = pObj->pNext;
885 break;
886 }
887 Assert(pObjPrev);
888 }
889 RTSpinlockRelease(pDevExt->Spinlock);
890
891 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
892 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
893 if (pObj->pfnDestructor)
894 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
895 RTMemFree(pObj);
896 }
897
898 /* free it and continue. */
899 RTMemFree(pUsage);
900
901 RTSpinlockAcquire(pDevExt->Spinlock);
902 }
903
904 RTSpinlockRelease(pDevExt->Spinlock);
905 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
906 }
907 Log2(("release objects - done\n"));
908
909 /*
910 * Do tracer cleanups related to this session.
911 */
912 Log2(("release tracer stuff - start\n"));
913 supdrvTracerCleanupSession(pDevExt, pSession);
914 Log2(("release tracer stuff - end\n"));
915
916 /*
917 * Release memory allocated in the session.
918 *
919 * We do not serialize this as we assume that the application will
920 * not allocated memory while closing the file handle object.
921 */
922 Log2(("freeing memory:\n"));
923 pBundle = &pSession->Bundle;
924 while (pBundle)
925 {
926 PSUPDRVBUNDLE pToFree;
927 unsigned i;
928
929 /*
930 * Check and unlock all entries in the bundle.
931 */
932 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
933 {
934 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
935 {
936 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
937 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
938 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
939 {
940 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
941 AssertRC(rc); /** @todo figure out how to handle this. */
942 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
943 }
944 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
945 AssertRC(rc); /** @todo figure out how to handle this. */
946 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
947 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
948 }
949 }
950
951 /*
952 * Advance and free previous bundle.
953 */
954 pToFree = pBundle;
955 pBundle = pBundle->pNext;
956
957 pToFree->pNext = NULL;
958 pToFree->cUsed = 0;
959 if (pToFree != &pSession->Bundle)
960 RTMemFree(pToFree);
961 }
962 Log2(("freeing memory - done\n"));
963
964 /*
965 * Deregister component factories.
966 */
967 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
968 Log2(("deregistering component factories:\n"));
969 if (pDevExt->pComponentFactoryHead)
970 {
971 PSUPDRVFACTORYREG pPrev = NULL;
972 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
973 while (pCur)
974 {
975 if (pCur->pSession == pSession)
976 {
977 /* unlink it */
978 PSUPDRVFACTORYREG pNext = pCur->pNext;
979 if (pPrev)
980 pPrev->pNext = pNext;
981 else
982 pDevExt->pComponentFactoryHead = pNext;
983
984 /* free it */
985 pCur->pNext = NULL;
986 pCur->pSession = NULL;
987 pCur->pFactory = NULL;
988 RTMemFree(pCur);
989
990 /* next */
991 pCur = pNext;
992 }
993 else
994 {
995 /* next */
996 pPrev = pCur;
997 pCur = pCur->pNext;
998 }
999 }
1000 }
1001 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
1002 Log2(("deregistering component factories - done\n"));
1003
1004 /*
1005 * Loaded images needs to be dereferenced and possibly freed up.
1006 */
1007 supdrvLdrLock(pDevExt);
1008 Log2(("freeing images:\n"));
1009 if (pSession->pLdrUsage)
1010 {
1011 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1012 pSession->pLdrUsage = NULL;
1013 while (pUsage)
1014 {
1015 void *pvFree = pUsage;
1016 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1017 if (pImage->cUsage > pUsage->cUsage)
1018 pImage->cUsage -= pUsage->cUsage;
1019 else
1020 supdrvLdrFree(pDevExt, pImage);
1021 pUsage->pImage = NULL;
1022 pUsage = pUsage->pNext;
1023 RTMemFree(pvFree);
1024 }
1025 }
1026 supdrvLdrUnlock(pDevExt);
1027 Log2(("freeing images - done\n"));
1028
1029 /*
1030 * Unmap the GIP.
1031 */
1032 Log2(("umapping GIP:\n"));
1033 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1034 {
1035 SUPR0GipUnmap(pSession);
1036 pSession->fGipReferenced = 0;
1037 }
1038 Log2(("umapping GIP - done\n"));
1039}
1040
1041
1042/**
1043 * Common code for freeing a session when the reference count reaches zero.
1044 *
1045 * @param pDevExt Device extension.
1046 * @param pSession Session data.
1047 * This data will be freed by this routine.
1048 */
1049static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1050{
1051 VBOXDRV_SESSION_CLOSE(pSession);
1052
1053 /*
1054 * Cleanup the session first.
1055 */
1056 supdrvCleanupSession(pDevExt, pSession);
1057 supdrvOSCleanupSession(pDevExt, pSession);
1058
1059 /*
1060 * Free the rest of the session stuff.
1061 */
1062 RTSpinlockDestroy(pSession->Spinlock);
1063 pSession->Spinlock = NIL_RTSPINLOCK;
1064 pSession->pDevExt = NULL;
1065 RTMemFree(pSession);
1066 LogFlow(("supdrvDestroySession: returns\n"));
1067}
1068
1069
1070/**
1071 * Inserts the session into the global hash table.
1072 *
1073 * @retval VINF_SUCCESS on success.
1074 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1075 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1076 * session (asserted).
1077 * @retval VERR_DUPLICATE if there is already a session for that pid.
1078 *
1079 * @param pDevExt The device extension.
1080 * @param pSession The session.
1081 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1082 * available and used. This will set to point to the
1083 * session while under the protection of the session
1084 * hash table spinlock. It will also be kept in
1085 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1086 * cleanup use.
1087 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1088 */
1089int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1090 void *pvUser)
1091{
1092 PSUPDRVSESSION pCur;
1093 unsigned iHash;
1094
1095 /*
1096 * Validate input.
1097 */
1098 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1099 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1100
1101 /*
1102 * Calculate the hash table index and acquire the spinlock.
1103 */
1104 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1105
1106 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1107
1108 /*
1109 * If there are a collisions, we need to carefully check if we got a
1110 * duplicate. There can only be one open session per process.
1111 */
1112 pCur = pDevExt->apSessionHashTab[iHash];
1113 if (pCur)
1114 {
1115 while (pCur && pCur->Process != pSession->Process)
1116 pCur = pCur->pCommonNextHash;
1117
1118 if (pCur)
1119 {
1120 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1121 if (pCur == pSession)
1122 {
1123 Assert(pSession->fInHashTable);
1124 AssertFailed();
1125 return VERR_WRONG_ORDER;
1126 }
1127 Assert(!pSession->fInHashTable);
1128 if (pCur->R0Process == pSession->R0Process)
1129 return VERR_RESOURCE_IN_USE;
1130 return VERR_DUPLICATE;
1131 }
1132 }
1133 Assert(!pSession->fInHashTable);
1134 Assert(!pSession->ppOsSessionPtr);
1135
1136 /*
1137 * Insert it, doing a callout to the OS specific code in case it has
1138 * anything it wishes to do while we're holding the spinlock.
1139 */
1140 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1141 pDevExt->apSessionHashTab[iHash] = pSession;
1142 pSession->fInHashTable = true;
1143 ASMAtomicIncS32(&pDevExt->cSessions);
1144
1145 pSession->ppOsSessionPtr = ppOsSessionPtr;
1146 if (ppOsSessionPtr)
1147 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1148
1149 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1150
1151 /*
1152 * Retain a reference for the pointer in the session table.
1153 */
1154 ASMAtomicIncU32(&pSession->cRefs);
1155
1156 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1157 return VINF_SUCCESS;
1158}
1159
1160
1161/**
1162 * Removes the session from the global hash table.
1163 *
1164 * @retval VINF_SUCCESS on success.
1165 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1166 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1167 * session (asserted).
1168 *
1169 * @param pDevExt The device extension.
1170 * @param pSession The session. The caller is expected to have a reference
1171 * to this so it won't croak on us when we release the hash
1172 * table reference.
1173 * @param pvUser OS specific context value for the
1174 * supdrvOSSessionHashTabInserted callback.
1175 */
1176int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1177{
1178 PSUPDRVSESSION pCur;
1179 unsigned iHash;
1180 int32_t cRefs;
1181
1182 /*
1183 * Validate input.
1184 */
1185 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1186 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1187
1188 /*
1189 * Calculate the hash table index and acquire the spinlock.
1190 */
1191 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1192
1193 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1194
1195 /*
1196 * Unlink it.
1197 */
1198 pCur = pDevExt->apSessionHashTab[iHash];
1199 if (pCur == pSession)
1200 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1201 else
1202 {
1203 PSUPDRVSESSION pPrev = pCur;
1204 while (pCur && pCur != pSession)
1205 {
1206 pPrev = pCur;
1207 pCur = pCur->pCommonNextHash;
1208 }
1209 if (pCur)
1210 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1211 else
1212 {
1213 Assert(!pSession->fInHashTable);
1214 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1215 return VERR_NOT_FOUND;
1216 }
1217 }
1218
1219 pSession->pCommonNextHash = NULL;
1220 pSession->fInHashTable = false;
1221
1222 ASMAtomicDecS32(&pDevExt->cSessions);
1223
1224 /*
1225 * Clear OS specific session pointer if available and do the OS callback.
1226 */
1227 if (pSession->ppOsSessionPtr)
1228 {
1229 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1230 pSession->ppOsSessionPtr = NULL;
1231 }
1232
1233 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1234
1235 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1236
1237 /*
1238 * Drop the reference the hash table had to the session. This shouldn't
1239 * be the last reference!
1240 */
1241 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1242 Assert(cRefs > 0 && cRefs < _1M);
1243 if (cRefs == 0)
1244 supdrvDestroySession(pDevExt, pSession);
1245
1246 return VINF_SUCCESS;
1247}
1248
1249
1250/**
1251 * Looks up the session for the current process in the global hash table or in
1252 * OS specific pointer.
1253 *
1254 * @returns Pointer to the session with a reference that the caller must
1255 * release. If no valid session was found, NULL is returned.
1256 *
1257 * @param pDevExt The device extension.
1258 * @param Process The process ID.
1259 * @param R0Process The ring-0 process handle.
1260 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1261 * this is used instead of the hash table. For
1262 * additional safety it must then be equal to the
1263 * SUPDRVSESSION::ppOsSessionPtr member.
1264 * This can be NULL even if the OS has a session
1265 * pointer.
1266 */
1267PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1268 PSUPDRVSESSION *ppOsSessionPtr)
1269{
1270 PSUPDRVSESSION pCur;
1271 unsigned iHash;
1272
1273 /*
1274 * Validate input.
1275 */
1276 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1277
1278 /*
1279 * Calculate the hash table index and acquire the spinlock.
1280 */
1281 iHash = SUPDRV_SESSION_HASH(Process);
1282
1283 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1284
1285 /*
1286 * If an OS session pointer is provided, always use it.
1287 */
1288 if (ppOsSessionPtr)
1289 {
1290 pCur = *ppOsSessionPtr;
1291 if ( pCur
1292 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1293 || pCur->Process != Process
1294 || pCur->R0Process != R0Process) )
1295 pCur = NULL;
1296 }
1297 else
1298 {
1299 /*
1300 * Otherwise, do the hash table lookup.
1301 */
1302 pCur = pDevExt->apSessionHashTab[iHash];
1303 while ( pCur
1304 && ( pCur->Process != Process
1305 || pCur->R0Process != R0Process) )
1306 pCur = pCur->pCommonNextHash;
1307 }
1308
1309 /*
1310 * Retain the session.
1311 */
1312 if (pCur)
1313 {
1314 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1315 NOREF(cRefs);
1316 Assert(cRefs > 1 && cRefs < _1M);
1317 }
1318
1319 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1320
1321 return pCur;
1322}
1323
1324
1325/**
1326 * Retain a session to make sure it doesn't go away while it is in use.
1327 *
1328 * @returns New reference count on success, UINT32_MAX on failure.
1329 * @param pSession Session data.
1330 */
1331uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1332{
1333 uint32_t cRefs;
1334 AssertPtrReturn(pSession, UINT32_MAX);
1335 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1336
1337 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1338 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1339 return cRefs;
1340}
1341
1342
1343/**
1344 * Releases a given session.
1345 *
1346 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1347 * @param pSession Session data.
1348 */
1349uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1350{
1351 uint32_t cRefs;
1352 AssertPtrReturn(pSession, UINT32_MAX);
1353 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1354
1355 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1356 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1357 if (cRefs == 0)
1358 supdrvDestroySession(pSession->pDevExt, pSession);
1359 return cRefs;
1360}
1361
1362
1363/**
1364 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1365 *
1366 * @returns IPRT status code, see SUPR0ObjAddRef.
1367 * @param hHandleTable The handle table handle. Ignored.
1368 * @param pvObj The object pointer.
1369 * @param pvCtx Context, the handle type. Ignored.
1370 * @param pvUser Session pointer.
1371 */
1372static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1373{
1374 NOREF(pvCtx);
1375 NOREF(hHandleTable);
1376 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1377}
1378
1379
1380/**
1381 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1382 *
1383 * @param hHandleTable The handle table handle. Ignored.
1384 * @param h The handle value. Ignored.
1385 * @param pvObj The object pointer.
1386 * @param pvCtx Context, the handle type. Ignored.
1387 * @param pvUser Session pointer.
1388 */
1389static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1390{
1391 NOREF(pvCtx);
1392 NOREF(h);
1393 NOREF(hHandleTable);
1394 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1395}
1396
1397
1398/**
1399 * Fast path I/O Control worker.
1400 *
1401 * @returns VBox status code that should be passed down to ring-3 unchanged.
1402 * @param uIOCtl Function number.
1403 * @param idCpu VMCPU id.
1404 * @param pDevExt Device extention.
1405 * @param pSession Session data.
1406 */
1407int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1408{
1409 /*
1410 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1411 */
1412 if (RT_LIKELY( RT_VALID_PTR(pSession)
1413 && pSession->pVM
1414 && pDevExt->pfnVMMR0EntryFast))
1415 {
1416 switch (uIOCtl)
1417 {
1418 case SUP_IOCTL_FAST_DO_RAW_RUN:
1419 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1420 break;
1421 case SUP_IOCTL_FAST_DO_HM_RUN:
1422 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1423 break;
1424 case SUP_IOCTL_FAST_DO_NOP:
1425 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1426 break;
1427 default:
1428 return VERR_INTERNAL_ERROR;
1429 }
1430 return VINF_SUCCESS;
1431 }
1432 return VERR_INTERNAL_ERROR;
1433}
1434
1435
1436/**
1437 * Helper for supdrvIOCtl used to validate module names passed to SUP_IOCTL_LDR_OPEN.
1438 *
1439 * Check if pszStr contains any character of pszChars. We would use strpbrk
1440 * here if this function would be contained in the RedHat kABI white list, see
1441 * http://www.kerneldrivers.org/RHEL5.
1442 *
1443 * @returns true if fine, false if not.
1444 * @param pszName The module name to check.
1445 */
1446static bool supdrvIsLdrModuleNameValid(const char *pszName)
1447{
1448 int chCur;
1449 while ((chCur = *pszName++) != '\0')
1450 {
1451 static const char s_szInvalidChars[] = ";:()[]{}/\\|&*%#@!~`\"'";
1452 unsigned offInv = RT_ELEMENTS(s_szInvalidChars);
1453 while (offInv-- > 0)
1454 if (s_szInvalidChars[offInv] == chCur)
1455 return false;
1456 }
1457 return true;
1458}
1459
1460
1461
1462/**
1463 * I/O Control inner worker (tracing reasons).
1464 *
1465 * @returns IPRT status code.
1466 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1467 *
1468 * @param uIOCtl Function number.
1469 * @param pDevExt Device extention.
1470 * @param pSession Session data.
1471 * @param pReqHdr The request header.
1472 */
1473static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1474{
1475 /*
1476 * Validation macros
1477 */
1478#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1479 do { \
1480 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1481 { \
1482 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1483 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1484 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1485 } \
1486 } while (0)
1487
1488#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1489
1490#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1491 do { \
1492 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1493 { \
1494 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1495 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1496 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1497 } \
1498 } while (0)
1499
1500#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1501 do { \
1502 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1503 { \
1504 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1505 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1506 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1507 } \
1508 } while (0)
1509
1510#define REQ_CHECK_EXPR(Name, expr) \
1511 do { \
1512 if (RT_UNLIKELY(!(expr))) \
1513 { \
1514 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1515 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1516 } \
1517 } while (0)
1518
1519#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1520 do { \
1521 if (RT_UNLIKELY(!(expr))) \
1522 { \
1523 OSDBGPRINT( fmt ); \
1524 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1525 } \
1526 } while (0)
1527
1528 /*
1529 * The switch.
1530 */
1531 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1532 {
1533 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1534 {
1535 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1536 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1537 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1538 {
1539 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1540 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1541 return 0;
1542 }
1543
1544#if 0
1545 /*
1546 * Call out to the OS specific code and let it do permission checks on the
1547 * client process.
1548 */
1549 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1550 {
1551 pReq->u.Out.u32Cookie = 0xffffffff;
1552 pReq->u.Out.u32SessionCookie = 0xffffffff;
1553 pReq->u.Out.u32SessionVersion = 0xffffffff;
1554 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1555 pReq->u.Out.pSession = NULL;
1556 pReq->u.Out.cFunctions = 0;
1557 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1558 return 0;
1559 }
1560#endif
1561
1562 /*
1563 * Match the version.
1564 * The current logic is very simple, match the major interface version.
1565 */
1566 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1567 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1568 {
1569 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1570 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1571 pReq->u.Out.u32Cookie = 0xffffffff;
1572 pReq->u.Out.u32SessionCookie = 0xffffffff;
1573 pReq->u.Out.u32SessionVersion = 0xffffffff;
1574 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1575 pReq->u.Out.pSession = NULL;
1576 pReq->u.Out.cFunctions = 0;
1577 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1578 return 0;
1579 }
1580
1581 /*
1582 * Fill in return data and be gone.
1583 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1584 * u32SessionVersion <= u32ReqVersion!
1585 */
1586 /** @todo Somehow validate the client and negotiate a secure cookie... */
1587 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1588 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1589 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1590 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1591 pReq->u.Out.pSession = pSession;
1592 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1593 pReq->Hdr.rc = VINF_SUCCESS;
1594 return 0;
1595 }
1596
1597 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1598 {
1599 /* validate */
1600 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1601 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1602
1603 /* execute */
1604 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1605 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1606 pReq->Hdr.rc = VINF_SUCCESS;
1607 return 0;
1608 }
1609
1610 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1611 {
1612 /* validate */
1613 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1614 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1615 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1616 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1617 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1618
1619 /* execute */
1620 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1621 if (RT_FAILURE(pReq->Hdr.rc))
1622 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1623 return 0;
1624 }
1625
1626 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1627 {
1628 /* validate */
1629 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1630 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1631
1632 /* execute */
1633 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1634 return 0;
1635 }
1636
1637 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1638 {
1639 /* validate */
1640 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1641 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1642
1643 /* execute */
1644 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1645 if (RT_FAILURE(pReq->Hdr.rc))
1646 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1647 return 0;
1648 }
1649
1650 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1651 {
1652 /* validate */
1653 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1654 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1655
1656 /* execute */
1657 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1658 return 0;
1659 }
1660
1661 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1662 {
1663 /* validate */
1664 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1665 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1666 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1667 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1668 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1669 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1670 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1671 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1672 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1673 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, supdrvIsLdrModuleNameValid(pReq->u.In.szName));
1674 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1675
1676 /* execute */
1677 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1678 return 0;
1679 }
1680
1681 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1682 {
1683 /* validate */
1684 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1685 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1686 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1687 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1688 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1689 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1690 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1691 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1692 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1693 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1694 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1695 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1696 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1697 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1698 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1699
1700 if (pReq->u.In.cSymbols)
1701 {
1702 uint32_t i;
1703 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1704 for (i = 0; i < pReq->u.In.cSymbols; i++)
1705 {
1706 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1707 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1708 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1709 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1710 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1711 pReq->u.In.cbStrTab - paSyms[i].offName),
1712 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1713 }
1714 }
1715
1716 /* execute */
1717 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1718 return 0;
1719 }
1720
1721 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1722 {
1723 /* validate */
1724 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1725 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1726
1727 /* execute */
1728 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1729 return 0;
1730 }
1731
1732 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOCK_DOWN):
1733 {
1734 /* validate */
1735 REQ_CHECK_SIZES(SUP_IOCTL_LDR_LOCK_DOWN);
1736
1737 /* execute */
1738 pReqHdr->rc = supdrvIOCtl_LdrLockDown(pDevExt);
1739 return 0;
1740 }
1741
1742 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1743 {
1744 /* validate */
1745 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1746 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1747 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1748
1749 /* execute */
1750 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1751 return 0;
1752 }
1753
1754 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1755 {
1756 /* validate */
1757 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1758 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1759 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1760
1761 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1762 {
1763 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1764
1765 /* execute */
1766 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1767 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1768 else
1769 pReq->Hdr.rc = VERR_WRONG_ORDER;
1770 }
1771 else
1772 {
1773 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1774 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1775 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1776 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1777 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1778
1779 /* execute */
1780 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1781 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1782 else
1783 pReq->Hdr.rc = VERR_WRONG_ORDER;
1784 }
1785
1786 if ( RT_FAILURE(pReq->Hdr.rc)
1787 && pReq->Hdr.rc != VERR_INTERRUPTED
1788 && pReq->Hdr.rc != VERR_TIMEOUT)
1789 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1790 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1791 else
1792 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1793 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1794 return 0;
1795 }
1796
1797 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1798 {
1799 /* validate */
1800 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1801 PSUPVMMR0REQHDR pVMMReq;
1802 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1803 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1804
1805 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1806 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1807 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1808 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1809 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1810
1811 /* execute */
1812 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1813 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1814 else
1815 pReq->Hdr.rc = VERR_WRONG_ORDER;
1816
1817 if ( RT_FAILURE(pReq->Hdr.rc)
1818 && pReq->Hdr.rc != VERR_INTERRUPTED
1819 && pReq->Hdr.rc != VERR_TIMEOUT)
1820 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1821 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1822 else
1823 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1824 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1825 return 0;
1826 }
1827
1828 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1829 {
1830 /* validate */
1831 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1832 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1833
1834 /* execute */
1835 pReq->Hdr.rc = VINF_SUCCESS;
1836 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1837 return 0;
1838 }
1839
1840 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1841 {
1842 /* validate */
1843 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1844 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1845 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1846
1847 /* execute */
1848 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1849 if (RT_FAILURE(pReq->Hdr.rc))
1850 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1851 return 0;
1852 }
1853
1854 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1855 {
1856 /* validate */
1857 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1858 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1859
1860 /* execute */
1861 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1862 return 0;
1863 }
1864
1865 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1866 {
1867 /* validate */
1868 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1869 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1870
1871 /* execute */
1872 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1873 if (RT_SUCCESS(pReq->Hdr.rc))
1874 pReq->u.Out.pGipR0 = pDevExt->pGip;
1875 return 0;
1876 }
1877
1878 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1879 {
1880 /* validate */
1881 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1882 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1883
1884 /* execute */
1885 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1886 return 0;
1887 }
1888
1889 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1890 {
1891 /* validate */
1892 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1893 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1894 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1895 || ( VALID_PTR(pReq->u.In.pVMR0)
1896 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1897 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1898 /* execute */
1899 pSession->pVM = pReq->u.In.pVMR0;
1900 pReq->Hdr.rc = VINF_SUCCESS;
1901 return 0;
1902 }
1903
1904 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1905 {
1906 /* validate */
1907 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1908 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1909 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1910 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1911 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1912 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1913 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1914 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1915 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1916
1917 /* execute */
1918 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1919 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1920 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1921 &pReq->u.Out.aPages[0]);
1922 if (RT_FAILURE(pReq->Hdr.rc))
1923 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1924 return 0;
1925 }
1926
1927 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1928 {
1929 /* validate */
1930 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1931 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1932 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1933 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1934 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1935 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1936
1937 /* execute */
1938 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1939 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1940 if (RT_FAILURE(pReq->Hdr.rc))
1941 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1942 return 0;
1943 }
1944
1945 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1946 {
1947 /* validate */
1948 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1949 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1950 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1951 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1952 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1953 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1954 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1955
1956 /* execute */
1957 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1958 return 0;
1959 }
1960
1961 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1962 {
1963 /* validate */
1964 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1965 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1966
1967 /* execute */
1968 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1969 return 0;
1970 }
1971
1972 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1973 {
1974 /* validate */
1975 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1976 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1977 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1978
1979 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1980 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1981 else
1982 {
1983 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1984 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1985 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1986 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1987 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1988 }
1989 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1990
1991 /* execute */
1992 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1993 return 0;
1994 }
1995
1996 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1997 {
1998 /* validate */
1999 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
2000 size_t cbStrTab;
2001 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
2002 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
2003 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
2004 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
2005 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
2006 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
2007 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
2008 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
2009 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2010 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2011 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2012
2013 /* execute */
2014 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2015 return 0;
2016 }
2017
2018 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2019 {
2020 /* validate */
2021 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2022 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2023 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2024
2025 /* execute */
2026 switch (pReq->u.In.uType)
2027 {
2028 case SUP_SEM_TYPE_EVENT:
2029 {
2030 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2031 switch (pReq->u.In.uOp)
2032 {
2033 case SUPSEMOP2_WAIT_MS_REL:
2034 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2035 break;
2036 case SUPSEMOP2_WAIT_NS_ABS:
2037 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2038 break;
2039 case SUPSEMOP2_WAIT_NS_REL:
2040 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2041 break;
2042 case SUPSEMOP2_SIGNAL:
2043 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2044 break;
2045 case SUPSEMOP2_CLOSE:
2046 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2047 break;
2048 case SUPSEMOP2_RESET:
2049 default:
2050 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2051 break;
2052 }
2053 break;
2054 }
2055
2056 case SUP_SEM_TYPE_EVENT_MULTI:
2057 {
2058 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2059 switch (pReq->u.In.uOp)
2060 {
2061 case SUPSEMOP2_WAIT_MS_REL:
2062 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2063 break;
2064 case SUPSEMOP2_WAIT_NS_ABS:
2065 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2066 break;
2067 case SUPSEMOP2_WAIT_NS_REL:
2068 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2069 break;
2070 case SUPSEMOP2_SIGNAL:
2071 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2072 break;
2073 case SUPSEMOP2_CLOSE:
2074 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2075 break;
2076 case SUPSEMOP2_RESET:
2077 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2078 break;
2079 default:
2080 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2081 break;
2082 }
2083 break;
2084 }
2085
2086 default:
2087 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2088 break;
2089 }
2090 return 0;
2091 }
2092
2093 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2094 {
2095 /* validate */
2096 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2097 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2098 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2099
2100 /* execute */
2101 switch (pReq->u.In.uType)
2102 {
2103 case SUP_SEM_TYPE_EVENT:
2104 {
2105 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2106 switch (pReq->u.In.uOp)
2107 {
2108 case SUPSEMOP3_CREATE:
2109 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2110 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2111 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2112 break;
2113 case SUPSEMOP3_GET_RESOLUTION:
2114 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2115 pReq->Hdr.rc = VINF_SUCCESS;
2116 pReq->Hdr.cbOut = sizeof(*pReq);
2117 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2118 break;
2119 default:
2120 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2121 break;
2122 }
2123 break;
2124 }
2125
2126 case SUP_SEM_TYPE_EVENT_MULTI:
2127 {
2128 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2129 switch (pReq->u.In.uOp)
2130 {
2131 case SUPSEMOP3_CREATE:
2132 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2133 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2134 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2135 break;
2136 case SUPSEMOP3_GET_RESOLUTION:
2137 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2138 pReq->Hdr.rc = VINF_SUCCESS;
2139 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2140 break;
2141 default:
2142 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2143 break;
2144 }
2145 break;
2146 }
2147
2148 default:
2149 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2150 break;
2151 }
2152 return 0;
2153 }
2154
2155 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2156 {
2157 /* validate */
2158 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2159 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2160
2161 /* execute */
2162 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2163 if (RT_FAILURE(pReq->Hdr.rc))
2164 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2165 return 0;
2166 }
2167
2168 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2169 {
2170 /* validate */
2171 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2172 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2173
2174 /* execute */
2175 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2176 return 0;
2177 }
2178
2179 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2180 {
2181 /* validate */
2182 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2183
2184 /* execute */
2185 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2186 return 0;
2187 }
2188
2189 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2190 {
2191 /* validate */
2192 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2193 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2194
2195 /* execute */
2196 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2197 return 0;
2198 }
2199
2200 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2201 {
2202 /* validate */
2203 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2204 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2205 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2206 return VERR_INVALID_PARAMETER;
2207
2208 /* execute */
2209 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2210 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2211 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2212 pReq->u.In.szName, pReq->u.In.fFlags);
2213 return 0;
2214 }
2215
2216 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2217 {
2218 /* validate */
2219 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2220 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2221
2222 /* execute */
2223 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2224 return 0;
2225 }
2226
2227 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2228 {
2229 /* validate */
2230 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2231 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2232
2233 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2234 pReqHdr->rc = VINF_SUCCESS;
2235 return 0;
2236 }
2237
2238 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2239 {
2240 /* validate */
2241 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2242 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2243 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2244 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2245
2246 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2247 return 0;
2248 }
2249
2250 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2251 {
2252 /* validate */
2253 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2254
2255 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2256 return 0;
2257 }
2258
2259 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2260 {
2261 /* validate */
2262 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2263 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2264
2265 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pSession, pReq);
2266 return 0;
2267 }
2268
2269 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2270 {
2271 /* validate */
2272 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2273 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2274
2275 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pSession, pReq);
2276 return 0;
2277 }
2278
2279 default:
2280 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2281 break;
2282 }
2283 return VERR_GENERAL_FAILURE;
2284}
2285
2286
2287/**
2288 * I/O Control inner worker for the restricted operations.
2289 *
2290 * @returns IPRT status code.
2291 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2292 *
2293 * @param uIOCtl Function number.
2294 * @param pDevExt Device extention.
2295 * @param pSession Session data.
2296 * @param pReqHdr The request header.
2297 */
2298static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2299{
2300 /*
2301 * The switch.
2302 */
2303 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2304 {
2305 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2306 {
2307 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2308 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2309 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2310 {
2311 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2312 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2313 return 0;
2314 }
2315
2316 /*
2317 * Match the version.
2318 * The current logic is very simple, match the major interface version.
2319 */
2320 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2321 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2322 {
2323 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2324 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2325 pReq->u.Out.u32Cookie = 0xffffffff;
2326 pReq->u.Out.u32SessionCookie = 0xffffffff;
2327 pReq->u.Out.u32SessionVersion = 0xffffffff;
2328 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2329 pReq->u.Out.pSession = NULL;
2330 pReq->u.Out.cFunctions = 0;
2331 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2332 return 0;
2333 }
2334
2335 /*
2336 * Fill in return data and be gone.
2337 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2338 * u32SessionVersion <= u32ReqVersion!
2339 */
2340 /** @todo Somehow validate the client and negotiate a secure cookie... */
2341 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2342 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2343 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2344 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2345 pReq->u.Out.pSession = pSession;
2346 pReq->u.Out.cFunctions = 0;
2347 pReq->Hdr.rc = VINF_SUCCESS;
2348 return 0;
2349 }
2350
2351 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2352 {
2353 /* validate */
2354 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2355 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2356
2357 /* execute */
2358 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2359 if (RT_FAILURE(pReq->Hdr.rc))
2360 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2361 return 0;
2362 }
2363
2364 default:
2365 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2366 break;
2367 }
2368 return VERR_GENERAL_FAILURE;
2369}
2370
2371
2372/**
2373 * I/O Control worker.
2374 *
2375 * @returns IPRT status code.
2376 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2377 *
2378 * @param uIOCtl Function number.
2379 * @param pDevExt Device extention.
2380 * @param pSession Session data.
2381 * @param pReqHdr The request header.
2382 */
2383int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2384{
2385 int rc;
2386 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2387
2388 /*
2389 * Validate the request.
2390 */
2391 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2392 {
2393 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2394 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2395 return VERR_INVALID_PARAMETER;
2396 }
2397 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2398 || pReqHdr->cbIn < sizeof(*pReqHdr)
2399 || pReqHdr->cbIn > cbReq
2400 || pReqHdr->cbOut < sizeof(*pReqHdr)
2401 || pReqHdr->cbOut > cbReq))
2402 {
2403 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2404 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2405 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2406 return VERR_INVALID_PARAMETER;
2407 }
2408 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2409 {
2410 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2411 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2412 return VERR_INVALID_PARAMETER;
2413 }
2414 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2415 {
2416 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2417 {
2418 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2419 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2420 return VERR_INVALID_PARAMETER;
2421 }
2422 }
2423 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2424 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2425 {
2426 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2427 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2428 return VERR_INVALID_PARAMETER;
2429 }
2430
2431 /*
2432 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2433 */
2434 if (pSession->fUnrestricted)
2435 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2436 else
2437 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2438
2439 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2440 return rc;
2441}
2442
2443
2444/**
2445 * Inter-Driver Communication (IDC) worker.
2446 *
2447 * @returns VBox status code.
2448 * @retval VINF_SUCCESS on success.
2449 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2450 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2451 *
2452 * @param uReq The request (function) code.
2453 * @param pDevExt Device extention.
2454 * @param pSession Session data.
2455 * @param pReqHdr The request header.
2456 */
2457int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2458{
2459 /*
2460 * The OS specific code has already validated the pSession
2461 * pointer, and the request size being greater or equal to
2462 * size of the header.
2463 *
2464 * So, just check that pSession is a kernel context session.
2465 */
2466 if (RT_UNLIKELY( pSession
2467 && pSession->R0Process != NIL_RTR0PROCESS))
2468 return VERR_INVALID_PARAMETER;
2469
2470/*
2471 * Validation macro.
2472 */
2473#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2474 do { \
2475 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2476 { \
2477 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2478 (long)pReqHdr->cb, (long)(cbExpect))); \
2479 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2480 } \
2481 } while (0)
2482
2483 switch (uReq)
2484 {
2485 case SUPDRV_IDC_REQ_CONNECT:
2486 {
2487 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2488 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2489
2490 /*
2491 * Validate the cookie and other input.
2492 */
2493 if (pReq->Hdr.pSession != NULL)
2494 {
2495 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2496 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2497 }
2498 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2499 {
2500 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2501 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2502 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2503 }
2504 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2505 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2506 {
2507 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2508 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2509 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2510 }
2511 if (pSession != NULL)
2512 {
2513 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2514 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2515 }
2516
2517 /*
2518 * Match the version.
2519 * The current logic is very simple, match the major interface version.
2520 */
2521 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2522 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2523 {
2524 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2525 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2526 pReq->u.Out.pSession = NULL;
2527 pReq->u.Out.uSessionVersion = 0xffffffff;
2528 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2529 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2530 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2531 return VINF_SUCCESS;
2532 }
2533
2534 pReq->u.Out.pSession = NULL;
2535 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2536 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2537 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2538
2539 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2540 if (RT_FAILURE(pReq->Hdr.rc))
2541 {
2542 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2543 return VINF_SUCCESS;
2544 }
2545
2546 pReq->u.Out.pSession = pSession;
2547 pReq->Hdr.pSession = pSession;
2548
2549 return VINF_SUCCESS;
2550 }
2551
2552 case SUPDRV_IDC_REQ_DISCONNECT:
2553 {
2554 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2555
2556 supdrvSessionRelease(pSession);
2557 return pReqHdr->rc = VINF_SUCCESS;
2558 }
2559
2560 case SUPDRV_IDC_REQ_GET_SYMBOL:
2561 {
2562 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2563 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2564
2565 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2566 return VINF_SUCCESS;
2567 }
2568
2569 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2570 {
2571 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2572 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2573
2574 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2575 return VINF_SUCCESS;
2576 }
2577
2578 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2579 {
2580 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2581 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2582
2583 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2584 return VINF_SUCCESS;
2585 }
2586
2587 default:
2588 Log(("Unknown IDC %#lx\n", (long)uReq));
2589 break;
2590 }
2591
2592#undef REQ_CHECK_IDC_SIZE
2593 return VERR_NOT_SUPPORTED;
2594}
2595
2596
2597/**
2598 * Register a object for reference counting.
2599 * The object is registered with one reference in the specified session.
2600 *
2601 * @returns Unique identifier on success (pointer).
2602 * All future reference must use this identifier.
2603 * @returns NULL on failure.
2604 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2605 * @param pvUser1 The first user argument.
2606 * @param pvUser2 The second user argument.
2607 */
2608SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2609{
2610 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2611 PSUPDRVOBJ pObj;
2612 PSUPDRVUSAGE pUsage;
2613
2614 /*
2615 * Validate the input.
2616 */
2617 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2618 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2619 AssertPtrReturn(pfnDestructor, NULL);
2620
2621 /*
2622 * Allocate and initialize the object.
2623 */
2624 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2625 if (!pObj)
2626 return NULL;
2627 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2628 pObj->enmType = enmType;
2629 pObj->pNext = NULL;
2630 pObj->cUsage = 1;
2631 pObj->pfnDestructor = pfnDestructor;
2632 pObj->pvUser1 = pvUser1;
2633 pObj->pvUser2 = pvUser2;
2634 pObj->CreatorUid = pSession->Uid;
2635 pObj->CreatorGid = pSession->Gid;
2636 pObj->CreatorProcess= pSession->Process;
2637 supdrvOSObjInitCreator(pObj, pSession);
2638
2639 /*
2640 * Allocate the usage record.
2641 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2642 */
2643 RTSpinlockAcquire(pDevExt->Spinlock);
2644
2645 pUsage = pDevExt->pUsageFree;
2646 if (pUsage)
2647 pDevExt->pUsageFree = pUsage->pNext;
2648 else
2649 {
2650 RTSpinlockRelease(pDevExt->Spinlock);
2651 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2652 if (!pUsage)
2653 {
2654 RTMemFree(pObj);
2655 return NULL;
2656 }
2657 RTSpinlockAcquire(pDevExt->Spinlock);
2658 }
2659
2660 /*
2661 * Insert the object and create the session usage record.
2662 */
2663 /* The object. */
2664 pObj->pNext = pDevExt->pObjs;
2665 pDevExt->pObjs = pObj;
2666
2667 /* The session record. */
2668 pUsage->cUsage = 1;
2669 pUsage->pObj = pObj;
2670 pUsage->pNext = pSession->pUsage;
2671 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2672 pSession->pUsage = pUsage;
2673
2674 RTSpinlockRelease(pDevExt->Spinlock);
2675
2676 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2677 return pObj;
2678}
2679
2680
2681/**
2682 * Increment the reference counter for the object associating the reference
2683 * with the specified session.
2684 *
2685 * @returns IPRT status code.
2686 * @param pvObj The identifier returned by SUPR0ObjRegister().
2687 * @param pSession The session which is referencing the object.
2688 *
2689 * @remarks The caller should not own any spinlocks and must carefully protect
2690 * itself against potential race with the destructor so freed memory
2691 * isn't accessed here.
2692 */
2693SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2694{
2695 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2696}
2697
2698
2699/**
2700 * Increment the reference counter for the object associating the reference
2701 * with the specified session.
2702 *
2703 * @returns IPRT status code.
2704 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2705 * couldn't be allocated. (If you see this you're not doing the right
2706 * thing and it won't ever work reliably.)
2707 *
2708 * @param pvObj The identifier returned by SUPR0ObjRegister().
2709 * @param pSession The session which is referencing the object.
2710 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2711 * first reference to an object in a session with this
2712 * argument set.
2713 *
2714 * @remarks The caller should not own any spinlocks and must carefully protect
2715 * itself against potential race with the destructor so freed memory
2716 * isn't accessed here.
2717 */
2718SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2719{
2720 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2721 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2722 int rc = VINF_SUCCESS;
2723 PSUPDRVUSAGE pUsagePre;
2724 PSUPDRVUSAGE pUsage;
2725
2726 /*
2727 * Validate the input.
2728 * Be ready for the destruction race (someone might be stuck in the
2729 * destructor waiting a lock we own).
2730 */
2731 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2732 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2733 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2734 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2735 VERR_INVALID_PARAMETER);
2736
2737 RTSpinlockAcquire(pDevExt->Spinlock);
2738
2739 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2740 {
2741 RTSpinlockRelease(pDevExt->Spinlock);
2742
2743 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2744 return VERR_WRONG_ORDER;
2745 }
2746
2747 /*
2748 * Preallocate the usage record if we can.
2749 */
2750 pUsagePre = pDevExt->pUsageFree;
2751 if (pUsagePre)
2752 pDevExt->pUsageFree = pUsagePre->pNext;
2753 else if (!fNoBlocking)
2754 {
2755 RTSpinlockRelease(pDevExt->Spinlock);
2756 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2757 if (!pUsagePre)
2758 return VERR_NO_MEMORY;
2759
2760 RTSpinlockAcquire(pDevExt->Spinlock);
2761 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2762 {
2763 RTSpinlockRelease(pDevExt->Spinlock);
2764
2765 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2766 return VERR_WRONG_ORDER;
2767 }
2768 }
2769
2770 /*
2771 * Reference the object.
2772 */
2773 pObj->cUsage++;
2774
2775 /*
2776 * Look for the session record.
2777 */
2778 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2779 {
2780 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2781 if (pUsage->pObj == pObj)
2782 break;
2783 }
2784 if (pUsage)
2785 pUsage->cUsage++;
2786 else if (pUsagePre)
2787 {
2788 /* create a new session record. */
2789 pUsagePre->cUsage = 1;
2790 pUsagePre->pObj = pObj;
2791 pUsagePre->pNext = pSession->pUsage;
2792 pSession->pUsage = pUsagePre;
2793 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2794
2795 pUsagePre = NULL;
2796 }
2797 else
2798 {
2799 pObj->cUsage--;
2800 rc = VERR_TRY_AGAIN;
2801 }
2802
2803 /*
2804 * Put any unused usage record into the free list..
2805 */
2806 if (pUsagePre)
2807 {
2808 pUsagePre->pNext = pDevExt->pUsageFree;
2809 pDevExt->pUsageFree = pUsagePre;
2810 }
2811
2812 RTSpinlockRelease(pDevExt->Spinlock);
2813
2814 return rc;
2815}
2816
2817
2818/**
2819 * Decrement / destroy a reference counter record for an object.
2820 *
2821 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2822 *
2823 * @returns IPRT status code.
2824 * @retval VINF_SUCCESS if not destroyed.
2825 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2826 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2827 * string builds.
2828 *
2829 * @param pvObj The identifier returned by SUPR0ObjRegister().
2830 * @param pSession The session which is referencing the object.
2831 */
2832SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2833{
2834 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2835 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2836 int rc = VERR_INVALID_PARAMETER;
2837 PSUPDRVUSAGE pUsage;
2838 PSUPDRVUSAGE pUsagePrev;
2839
2840 /*
2841 * Validate the input.
2842 */
2843 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2844 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2845 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2846 VERR_INVALID_PARAMETER);
2847
2848 /*
2849 * Acquire the spinlock and look for the usage record.
2850 */
2851 RTSpinlockAcquire(pDevExt->Spinlock);
2852
2853 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2854 pUsage;
2855 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2856 {
2857 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2858 if (pUsage->pObj == pObj)
2859 {
2860 rc = VINF_SUCCESS;
2861 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2862 if (pUsage->cUsage > 1)
2863 {
2864 pObj->cUsage--;
2865 pUsage->cUsage--;
2866 }
2867 else
2868 {
2869 /*
2870 * Free the session record.
2871 */
2872 if (pUsagePrev)
2873 pUsagePrev->pNext = pUsage->pNext;
2874 else
2875 pSession->pUsage = pUsage->pNext;
2876 pUsage->pNext = pDevExt->pUsageFree;
2877 pDevExt->pUsageFree = pUsage;
2878
2879 /* What about the object? */
2880 if (pObj->cUsage > 1)
2881 pObj->cUsage--;
2882 else
2883 {
2884 /*
2885 * Object is to be destroyed, unlink it.
2886 */
2887 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2888 rc = VINF_OBJECT_DESTROYED;
2889 if (pDevExt->pObjs == pObj)
2890 pDevExt->pObjs = pObj->pNext;
2891 else
2892 {
2893 PSUPDRVOBJ pObjPrev;
2894 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2895 if (pObjPrev->pNext == pObj)
2896 {
2897 pObjPrev->pNext = pObj->pNext;
2898 break;
2899 }
2900 Assert(pObjPrev);
2901 }
2902 }
2903 }
2904 break;
2905 }
2906 }
2907
2908 RTSpinlockRelease(pDevExt->Spinlock);
2909
2910 /*
2911 * Call the destructor and free the object if required.
2912 */
2913 if (rc == VINF_OBJECT_DESTROYED)
2914 {
2915 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2916 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2917 if (pObj->pfnDestructor)
2918 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2919 RTMemFree(pObj);
2920 }
2921
2922 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2923 return rc;
2924}
2925
2926
2927/**
2928 * Verifies that the current process can access the specified object.
2929 *
2930 * @returns The following IPRT status code:
2931 * @retval VINF_SUCCESS if access was granted.
2932 * @retval VERR_PERMISSION_DENIED if denied access.
2933 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2934 *
2935 * @param pvObj The identifier returned by SUPR0ObjRegister().
2936 * @param pSession The session which wishes to access the object.
2937 * @param pszObjName Object string name. This is optional and depends on the object type.
2938 *
2939 * @remark The caller is responsible for making sure the object isn't removed while
2940 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2941 */
2942SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2943{
2944 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2945 int rc;
2946
2947 /*
2948 * Validate the input.
2949 */
2950 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2951 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2952 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2953 VERR_INVALID_PARAMETER);
2954
2955 /*
2956 * Check access. (returns true if a decision has been made.)
2957 */
2958 rc = VERR_INTERNAL_ERROR;
2959 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2960 return rc;
2961
2962 /*
2963 * Default policy is to allow the user to access his own
2964 * stuff but nothing else.
2965 */
2966 if (pObj->CreatorUid == pSession->Uid)
2967 return VINF_SUCCESS;
2968 return VERR_PERMISSION_DENIED;
2969}
2970
2971
2972/**
2973 * Lock pages.
2974 *
2975 * @returns IPRT status code.
2976 * @param pSession Session to which the locked memory should be associated.
2977 * @param pvR3 Start of the memory range to lock.
2978 * This must be page aligned.
2979 * @param cPages Number of pages to lock.
2980 * @param paPages Where to put the physical addresses of locked memory.
2981 */
2982SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2983{
2984 int rc;
2985 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2986 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2987 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2988
2989 /*
2990 * Verify input.
2991 */
2992 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2993 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2994 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2995 || !pvR3)
2996 {
2997 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2998 return VERR_INVALID_PARAMETER;
2999 }
3000
3001 /*
3002 * Let IPRT do the job.
3003 */
3004 Mem.eType = MEMREF_TYPE_LOCKED;
3005 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
3006 if (RT_SUCCESS(rc))
3007 {
3008 uint32_t iPage = cPages;
3009 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3010 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3011
3012 while (iPage-- > 0)
3013 {
3014 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3015 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3016 {
3017 AssertMsgFailed(("iPage=%d\n", iPage));
3018 rc = VERR_INTERNAL_ERROR;
3019 break;
3020 }
3021 }
3022 if (RT_SUCCESS(rc))
3023 rc = supdrvMemAdd(&Mem, pSession);
3024 if (RT_FAILURE(rc))
3025 {
3026 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3027 AssertRC(rc2);
3028 }
3029 }
3030
3031 return rc;
3032}
3033
3034
3035/**
3036 * Unlocks the memory pointed to by pv.
3037 *
3038 * @returns IPRT status code.
3039 * @param pSession Session to which the memory was locked.
3040 * @param pvR3 Memory to unlock.
3041 */
3042SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3043{
3044 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3045 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3046 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3047}
3048
3049
3050/**
3051 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3052 * backing.
3053 *
3054 * @returns IPRT status code.
3055 * @param pSession Session data.
3056 * @param cPages Number of pages to allocate.
3057 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3058 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3059 * @param pHCPhys Where to put the physical address of allocated memory.
3060 */
3061SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3062{
3063 int rc;
3064 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3065 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3066
3067 /*
3068 * Validate input.
3069 */
3070 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3071 if (!ppvR3 || !ppvR0 || !pHCPhys)
3072 {
3073 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3074 pSession, ppvR0, ppvR3, pHCPhys));
3075 return VERR_INVALID_PARAMETER;
3076
3077 }
3078 if (cPages < 1 || cPages >= 256)
3079 {
3080 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3081 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3082 }
3083
3084 /*
3085 * Let IPRT do the job.
3086 */
3087 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3088 if (RT_SUCCESS(rc))
3089 {
3090 int rc2;
3091 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3092 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3093 if (RT_SUCCESS(rc))
3094 {
3095 Mem.eType = MEMREF_TYPE_CONT;
3096 rc = supdrvMemAdd(&Mem, pSession);
3097 if (!rc)
3098 {
3099 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3100 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3101 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3102 return 0;
3103 }
3104
3105 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3106 AssertRC(rc2);
3107 }
3108 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3109 AssertRC(rc2);
3110 }
3111
3112 return rc;
3113}
3114
3115
3116/**
3117 * Frees memory allocated using SUPR0ContAlloc().
3118 *
3119 * @returns IPRT status code.
3120 * @param pSession The session to which the memory was allocated.
3121 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3122 */
3123SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3124{
3125 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3126 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3127 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3128}
3129
3130
3131/**
3132 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3133 *
3134 * The memory isn't zeroed.
3135 *
3136 * @returns IPRT status code.
3137 * @param pSession Session data.
3138 * @param cPages Number of pages to allocate.
3139 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3140 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3141 * @param paPages Where to put the physical addresses of allocated memory.
3142 */
3143SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3144{
3145 unsigned iPage;
3146 int rc;
3147 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3148 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3149
3150 /*
3151 * Validate input.
3152 */
3153 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3154 if (!ppvR3 || !ppvR0 || !paPages)
3155 {
3156 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3157 pSession, ppvR3, ppvR0, paPages));
3158 return VERR_INVALID_PARAMETER;
3159
3160 }
3161 if (cPages < 1 || cPages >= 256)
3162 {
3163 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3164 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3165 }
3166
3167 /*
3168 * Let IPRT do the work.
3169 */
3170 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3171 if (RT_SUCCESS(rc))
3172 {
3173 int rc2;
3174 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3175 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3176 if (RT_SUCCESS(rc))
3177 {
3178 Mem.eType = MEMREF_TYPE_LOW;
3179 rc = supdrvMemAdd(&Mem, pSession);
3180 if (!rc)
3181 {
3182 for (iPage = 0; iPage < cPages; iPage++)
3183 {
3184 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3185 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3186 }
3187 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3188 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3189 return 0;
3190 }
3191
3192 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3193 AssertRC(rc2);
3194 }
3195
3196 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3197 AssertRC(rc2);
3198 }
3199
3200 return rc;
3201}
3202
3203
3204/**
3205 * Frees memory allocated using SUPR0LowAlloc().
3206 *
3207 * @returns IPRT status code.
3208 * @param pSession The session to which the memory was allocated.
3209 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3210 */
3211SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3212{
3213 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3214 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3215 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3216}
3217
3218
3219
3220/**
3221 * Allocates a chunk of memory with both R0 and R3 mappings.
3222 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3223 *
3224 * @returns IPRT status code.
3225 * @param pSession The session to associated the allocation with.
3226 * @param cb Number of bytes to allocate.
3227 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3228 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3229 */
3230SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3231{
3232 int rc;
3233 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3234 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3235
3236 /*
3237 * Validate input.
3238 */
3239 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3240 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3241 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3242 if (cb < 1 || cb >= _4M)
3243 {
3244 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3245 return VERR_INVALID_PARAMETER;
3246 }
3247
3248 /*
3249 * Let IPRT do the work.
3250 */
3251 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3252 if (RT_SUCCESS(rc))
3253 {
3254 int rc2;
3255 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3256 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3257 if (RT_SUCCESS(rc))
3258 {
3259 Mem.eType = MEMREF_TYPE_MEM;
3260 rc = supdrvMemAdd(&Mem, pSession);
3261 if (!rc)
3262 {
3263 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3264 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3265 return VINF_SUCCESS;
3266 }
3267
3268 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3269 AssertRC(rc2);
3270 }
3271
3272 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3273 AssertRC(rc2);
3274 }
3275
3276 return rc;
3277}
3278
3279
3280/**
3281 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3282 *
3283 * @returns IPRT status code.
3284 * @param pSession The session to which the memory was allocated.
3285 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3286 * @param paPages Where to store the physical addresses.
3287 */
3288SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3289{
3290 PSUPDRVBUNDLE pBundle;
3291 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3292
3293 /*
3294 * Validate input.
3295 */
3296 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3297 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3298 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3299
3300 /*
3301 * Search for the address.
3302 */
3303 RTSpinlockAcquire(pSession->Spinlock);
3304 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3305 {
3306 if (pBundle->cUsed > 0)
3307 {
3308 unsigned i;
3309 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3310 {
3311 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3312 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3313 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3314 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3315 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3316 )
3317 )
3318 {
3319 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3320 size_t iPage;
3321 for (iPage = 0; iPage < cPages; iPage++)
3322 {
3323 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3324 paPages[iPage].uReserved = 0;
3325 }
3326 RTSpinlockRelease(pSession->Spinlock);
3327 return VINF_SUCCESS;
3328 }
3329 }
3330 }
3331 }
3332 RTSpinlockRelease(pSession->Spinlock);
3333 Log(("Failed to find %p!!!\n", (void *)uPtr));
3334 return VERR_INVALID_PARAMETER;
3335}
3336
3337
3338/**
3339 * Free memory allocated by SUPR0MemAlloc().
3340 *
3341 * @returns IPRT status code.
3342 * @param pSession The session owning the allocation.
3343 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3344 */
3345SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3346{
3347 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3348 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3349 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3350}
3351
3352
3353/**
3354 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3355 *
3356 * The memory is fixed and it's possible to query the physical addresses using
3357 * SUPR0MemGetPhys().
3358 *
3359 * @returns IPRT status code.
3360 * @param pSession The session to associated the allocation with.
3361 * @param cPages The number of pages to allocate.
3362 * @param fFlags Flags, reserved for the future. Must be zero.
3363 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3364 * NULL if no ring-3 mapping.
3365 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3366 * NULL if no ring-0 mapping.
3367 * @param paPages Where to store the addresses of the pages. Optional.
3368 */
3369SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3370{
3371 int rc;
3372 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3373 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3374
3375 /*
3376 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3377 */
3378 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3379 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3380 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3381 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3382 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3383 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3384 {
3385 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3386 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3387 }
3388
3389 /*
3390 * Let IPRT do the work.
3391 */
3392 if (ppvR0)
3393 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3394 else
3395 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3396 if (RT_SUCCESS(rc))
3397 {
3398 int rc2;
3399 if (ppvR3)
3400 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3401 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3402 else
3403 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3404 if (RT_SUCCESS(rc))
3405 {
3406 Mem.eType = MEMREF_TYPE_PAGE;
3407 rc = supdrvMemAdd(&Mem, pSession);
3408 if (!rc)
3409 {
3410 if (ppvR3)
3411 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3412 if (ppvR0)
3413 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3414 if (paPages)
3415 {
3416 uint32_t iPage = cPages;
3417 while (iPage-- > 0)
3418 {
3419 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3420 Assert(paPages[iPage] != NIL_RTHCPHYS);
3421 }
3422 }
3423 return VINF_SUCCESS;
3424 }
3425
3426 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3427 AssertRC(rc2);
3428 }
3429
3430 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3431 AssertRC(rc2);
3432 }
3433 return rc;
3434}
3435
3436
3437/**
3438 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3439 * space.
3440 *
3441 * @returns IPRT status code.
3442 * @param pSession The session to associated the allocation with.
3443 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3444 * @param offSub Where to start mapping. Must be page aligned.
3445 * @param cbSub How much to map. Must be page aligned.
3446 * @param fFlags Flags, MBZ.
3447 * @param ppvR0 Where to return the address of the ring-0 mapping on
3448 * success.
3449 */
3450SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3451 uint32_t fFlags, PRTR0PTR ppvR0)
3452{
3453 int rc;
3454 PSUPDRVBUNDLE pBundle;
3455 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3456 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3457
3458 /*
3459 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3460 */
3461 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3462 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3463 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3464 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3465 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3466 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3467
3468 /*
3469 * Find the memory object.
3470 */
3471 RTSpinlockAcquire(pSession->Spinlock);
3472 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3473 {
3474 if (pBundle->cUsed > 0)
3475 {
3476 unsigned i;
3477 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3478 {
3479 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3480 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3481 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3482 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3483 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3484 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3485 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3486 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3487 {
3488 hMemObj = pBundle->aMem[i].MemObj;
3489 break;
3490 }
3491 }
3492 }
3493 }
3494 RTSpinlockRelease(pSession->Spinlock);
3495
3496 rc = VERR_INVALID_PARAMETER;
3497 if (hMemObj != NIL_RTR0MEMOBJ)
3498 {
3499 /*
3500 * Do some further input validations before calling IPRT.
3501 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3502 */
3503 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3504 if ( offSub < cbMemObj
3505 && cbSub <= cbMemObj
3506 && offSub + cbSub <= cbMemObj)
3507 {
3508 RTR0MEMOBJ hMapObj;
3509 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3510 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3511 if (RT_SUCCESS(rc))
3512 *ppvR0 = RTR0MemObjAddress(hMapObj);
3513 }
3514 else
3515 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3516
3517 }
3518 return rc;
3519}
3520
3521
3522/**
3523 * Changes the page level protection of one or more pages previously allocated
3524 * by SUPR0PageAllocEx.
3525 *
3526 * @returns IPRT status code.
3527 * @param pSession The session to associated the allocation with.
3528 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3529 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3530 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3531 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3532 * @param offSub Where to start changing. Must be page aligned.
3533 * @param cbSub How much to change. Must be page aligned.
3534 * @param fProt The new page level protection, see RTMEM_PROT_*.
3535 */
3536SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3537{
3538 int rc;
3539 PSUPDRVBUNDLE pBundle;
3540 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3541 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3542 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3543
3544 /*
3545 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3546 */
3547 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3548 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3549 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3550 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3551 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3552
3553 /*
3554 * Find the memory object.
3555 */
3556 RTSpinlockAcquire(pSession->Spinlock);
3557 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3558 {
3559 if (pBundle->cUsed > 0)
3560 {
3561 unsigned i;
3562 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3563 {
3564 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3565 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3566 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3567 || pvR3 == NIL_RTR3PTR)
3568 && ( pvR0 == NIL_RTR0PTR
3569 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3570 && ( pvR3 == NIL_RTR3PTR
3571 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3572 {
3573 if (pvR0 != NIL_RTR0PTR)
3574 hMemObjR0 = pBundle->aMem[i].MemObj;
3575 if (pvR3 != NIL_RTR3PTR)
3576 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3577 break;
3578 }
3579 }
3580 }
3581 }
3582 RTSpinlockRelease(pSession->Spinlock);
3583
3584 rc = VERR_INVALID_PARAMETER;
3585 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3586 || hMemObjR3 != NIL_RTR0MEMOBJ)
3587 {
3588 /*
3589 * Do some further input validations before calling IPRT.
3590 */
3591 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3592 if ( offSub < cbMemObj
3593 && cbSub <= cbMemObj
3594 && offSub + cbSub <= cbMemObj)
3595 {
3596 rc = VINF_SUCCESS;
3597 if (hMemObjR3 != NIL_RTR0PTR)
3598 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3599 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3600 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3601 }
3602 else
3603 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3604
3605 }
3606 return rc;
3607
3608}
3609
3610
3611/**
3612 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3613 *
3614 * @returns IPRT status code.
3615 * @param pSession The session owning the allocation.
3616 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3617 * SUPR0PageAllocEx().
3618 */
3619SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3620{
3621 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3622 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3623 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3624}
3625
3626
3627/**
3628 * Gets the paging mode of the current CPU.
3629 *
3630 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3631 */
3632SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3633{
3634 SUPPAGINGMODE enmMode;
3635
3636 RTR0UINTREG cr0 = ASMGetCR0();
3637 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3638 enmMode = SUPPAGINGMODE_INVALID;
3639 else
3640 {
3641 RTR0UINTREG cr4 = ASMGetCR4();
3642 uint32_t fNXEPlusLMA = 0;
3643 if (cr4 & X86_CR4_PAE)
3644 {
3645 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3646 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3647 {
3648 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3649 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3650 fNXEPlusLMA |= RT_BIT(0);
3651 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3652 fNXEPlusLMA |= RT_BIT(1);
3653 }
3654 }
3655
3656 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3657 {
3658 case 0:
3659 enmMode = SUPPAGINGMODE_32_BIT;
3660 break;
3661
3662 case X86_CR4_PGE:
3663 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3664 break;
3665
3666 case X86_CR4_PAE:
3667 enmMode = SUPPAGINGMODE_PAE;
3668 break;
3669
3670 case X86_CR4_PAE | RT_BIT(0):
3671 enmMode = SUPPAGINGMODE_PAE_NX;
3672 break;
3673
3674 case X86_CR4_PAE | X86_CR4_PGE:
3675 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3676 break;
3677
3678 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3679 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3680 break;
3681
3682 case RT_BIT(1) | X86_CR4_PAE:
3683 enmMode = SUPPAGINGMODE_AMD64;
3684 break;
3685
3686 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3687 enmMode = SUPPAGINGMODE_AMD64_NX;
3688 break;
3689
3690 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3691 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3692 break;
3693
3694 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3695 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3696 break;
3697
3698 default:
3699 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3700 enmMode = SUPPAGINGMODE_INVALID;
3701 break;
3702 }
3703 }
3704 return enmMode;
3705}
3706
3707
3708/**
3709 * Enables or disabled hardware virtualization extensions using native OS APIs.
3710 *
3711 * @returns VBox status code.
3712 * @retval VINF_SUCCESS on success.
3713 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3714 *
3715 * @param fEnable Whether to enable or disable.
3716 */
3717SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3718{
3719#ifdef RT_OS_DARWIN
3720 return supdrvOSEnableVTx(fEnable);
3721#else
3722 return VERR_NOT_SUPPORTED;
3723#endif
3724}
3725
3726
3727/**
3728 * Suspends hardware virtualization extensions using the native OS API.
3729 *
3730 * This is called prior to entering raw-mode context.
3731 *
3732 * @returns @c true if suspended, @c false if not.
3733 */
3734SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3735{
3736#ifdef RT_OS_DARWIN
3737 return supdrvOSSuspendVTxOnCpu();
3738#else
3739 return false;
3740#endif
3741}
3742
3743
3744/**
3745 * Resumes hardware virtualization extensions using the native OS API.
3746 *
3747 * This is called after to entering raw-mode context.
3748 *
3749 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3750 */
3751SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3752{
3753#ifdef RT_OS_DARWIN
3754 supdrvOSResumeVTxOnCpu(fSuspended);
3755#else
3756 Assert(!fSuspended);
3757#endif
3758}
3759
3760
3761/**
3762 * Checks if Intel VT-x feature is usable on this CPU.
3763 *
3764 * @returns VBox status code.
3765 * @param fIsSmxModeAmbiguous Where to write whether the SMX mode causes
3766 * ambiguity that makes us unsure whether we
3767 * really can use VT-x or not.
3768 *
3769 * @remarks Must be called with preemption disabled.
3770 */
3771SUPR0DECL(int) SUPR0GetVmxUsability(bool *pfIsSmxModeAmbiguous)
3772{
3773 uint64_t u64FeatMsr;
3774 bool fMaybeSmxMode;
3775 bool fMsrLocked;
3776 bool fSmxVmxAllowed;
3777 bool fVmxAllowed;
3778 bool fIsSmxModeAmbiguous;
3779 int rc;
3780
3781 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3782
3783 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3784 fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3785 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3786 fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3787 fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3788 fIsSmxModeAmbiguous = false;
3789 rc = VERR_INTERNAL_ERROR_5;
3790
3791 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3792 if (fMsrLocked)
3793 {
3794 if (fVmxAllowed && fSmxVmxAllowed)
3795 rc = VINF_SUCCESS;
3796 else if (!fVmxAllowed && !fSmxVmxAllowed)
3797 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3798 else if (!fMaybeSmxMode)
3799 {
3800 if (fVmxAllowed)
3801 rc = VINF_SUCCESS;
3802 else
3803 rc = VERR_VMX_MSR_VMXON_DISABLED;
3804 }
3805 else
3806 {
3807 /*
3808 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3809 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3810 * See @bugref{6873}.
3811 */
3812 Assert(fMaybeSmxMode == true);
3813 fIsSmxModeAmbiguous = true;
3814 rc = VINF_SUCCESS;
3815 }
3816 }
3817 else
3818 {
3819 /*
3820 * MSR is not yet locked; we can change it ourselves here.
3821 * Once the lock bit is set, this MSR can no longer be modified.
3822 *
3823 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3824 * accurately. See @bugref{6873}.
3825 */
3826 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3827 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3828 | MSR_IA32_FEATURE_CONTROL_VMXON;
3829 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3830
3831 /* Verify. */
3832 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3833 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3834 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3835 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3836 if (fSmxVmxAllowed && fVmxAllowed)
3837 rc = VINF_SUCCESS;
3838 else
3839 rc = VERR_VMX_MSR_LOCKING_FAILED;
3840 }
3841
3842 if (pfIsSmxModeAmbiguous)
3843 *pfIsSmxModeAmbiguous = fIsSmxModeAmbiguous;
3844
3845 return rc;
3846}
3847
3848
3849/**
3850 * Checks if AMD-V SVM feature is usable on this CPU.
3851 *
3852 * @returns VBox status code.
3853 * @param fInitSvm If usable, try to initialize SVM on this CPU.
3854 *
3855 * @remarks Must be called with preemption disabled.
3856 */
3857SUPR0DECL(int) SUPR0GetSvmUsability(bool fInitSvm)
3858{
3859 int rc;
3860 uint64_t fVmCr;
3861 uint64_t fEfer;
3862
3863 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3864 fVmCr = ASMRdMsr(MSR_K8_VM_CR);
3865 if (!(fVmCr & MSR_K8_VM_CR_SVM_DISABLE))
3866 {
3867 rc = VINF_SUCCESS;
3868 if (fInitSvm)
3869 {
3870 /* Turn on SVM in the EFER MSR. */
3871 fEfer = ASMRdMsr(MSR_K6_EFER);
3872 if (fEfer & MSR_K6_EFER_SVME)
3873 rc = VERR_SVM_IN_USE;
3874 else
3875 {
3876 ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME);
3877
3878 /* Paranoia. */
3879 fEfer = ASMRdMsr(MSR_K6_EFER);
3880 if (fEfer & MSR_K6_EFER_SVME)
3881 {
3882 /* Restore previous value. */
3883 ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME);
3884 }
3885 else
3886 rc = VERR_SVM_ILLEGAL_EFER_MSR;
3887 }
3888 }
3889 }
3890 else
3891 rc = VERR_SVM_DISABLED;
3892 return rc;
3893}
3894
3895
3896/**
3897 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3898 *
3899 * @returns VBox status code.
3900 * @retval VERR_VMX_NO_VMX
3901 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3902 * @retval VERR_VMX_MSR_VMXON_DISABLED
3903 * @retval VERR_VMX_MSR_LOCKING_FAILED
3904 * @retval VERR_SVM_NO_SVM
3905 * @retval VERR_SVM_DISABLED
3906 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3907 * (centaur) CPU.
3908 *
3909 * @param pSession The session handle.
3910 * @param pfCaps Where to store the capabilities.
3911 */
3912SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3913{
3914 int rc = VERR_UNSUPPORTED_CPU;
3915 bool fIsSmxModeAmbiguous = false;
3916 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3917
3918 /*
3919 * Input validation.
3920 */
3921 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3922 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3923
3924 *pfCaps = 0;
3925 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3926 RTThreadPreemptDisable(&PreemptState);
3927 if (ASMHasCpuId())
3928 {
3929 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3930 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3931
3932 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3933 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3934
3935 if ( ASMIsValidStdRange(uMaxId)
3936 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3937 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3938 )
3939 {
3940 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3941 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3942 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3943 )
3944 {
3945 rc = SUPR0GetVmxUsability(&fIsSmxModeAmbiguous);
3946 if (rc == VINF_SUCCESS)
3947 {
3948 VMXCAPABILITY vtCaps;
3949
3950 *pfCaps |= SUPVTCAPS_VT_X;
3951
3952 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3953 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3954 {
3955 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3956 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3957 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3958 }
3959 }
3960 }
3961 else
3962 rc = VERR_VMX_NO_VMX;
3963 }
3964 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3965 && ASMIsValidStdRange(uMaxId))
3966 {
3967 uint32_t fExtFeaturesEcx, uExtMaxId;
3968 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3969 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3970
3971 /* Check if SVM is available. */
3972 if ( ASMIsValidExtRange(uExtMaxId)
3973 && uExtMaxId >= 0x8000000a
3974 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3975 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3976 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3977 )
3978 {
3979 rc = SUPR0GetSvmUsability(false /* fInitSvm */);
3980 if (RT_SUCCESS(rc))
3981 {
3982 uint32_t fSvmFeatures;
3983 *pfCaps |= SUPVTCAPS_AMD_V;
3984
3985 /* Query AMD-V features. */
3986 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3987 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3988 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3989 }
3990 }
3991 else
3992 rc = VERR_SVM_NO_SVM;
3993 }
3994 }
3995
3996 RTThreadPreemptRestore(&PreemptState);
3997 if (fIsSmxModeAmbiguous)
3998 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3999 return rc;
4000}
4001
4002
4003/**
4004 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
4005 * updating.
4006 *
4007 * @param pGip Pointer to the GIP.
4008 * @param pGipCpu The per CPU structure for this CPU.
4009 * @param u64NanoTS The current time.
4010 */
4011static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
4012{
4013 /*
4014 * Here we don't really care about applying the TSC delta. The re-initialization of this
4015 * value is not relevant especially while (re)starting the GIP as the first few ones will
4016 * be ignored anyway, see supdrvGipDoUpdateCpu().
4017 */
4018 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
4019 pGipCpu->u64NanoTS = u64NanoTS;
4020}
4021
4022
4023/**
4024 * Set the current TSC and NanoTS value for the CPU.
4025 *
4026 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
4027 * @param pvUser1 Pointer to the ring-0 GIP mapping.
4028 * @param pvUser2 Pointer to the variable holding the current time.
4029 */
4030static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4031{
4032 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
4033 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
4034
4035 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
4036 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
4037
4038 NOREF(pvUser2);
4039 NOREF(idCpu);
4040}
4041
4042
4043/**
4044 * State structure for supdrvGipDetectGetGipCpuCallback.
4045 */
4046typedef struct SUPDRVGIPDETECTGETCPU
4047{
4048 /** Bitmap of APIC IDs that has been seen (initialized to zero).
4049 * Used to detect duplicate APIC IDs (paranoia). */
4050 uint8_t volatile bmApicId[256 / 8];
4051 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
4052 * initially). The callback clears the methods not detected. */
4053 uint32_t volatile fSupported;
4054 /** The first callback detecting any kind of range issues (initialized to
4055 * NIL_RTCPUID). */
4056 RTCPUID volatile idCpuProblem;
4057} SUPDRVGIPDETECTGETCPU;
4058/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
4059typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
4060
4061
4062/**
4063 * Checks for alternative ways of getting the CPU ID.
4064 *
4065 * This also checks the APIC ID, CPU ID and CPU set index values against the
4066 * GIP tables.
4067 *
4068 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
4069 * @param pvUser1 Pointer to the state structure.
4070 * @param pvUser2 Pointer to the GIP.
4071 */
4072static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4073{
4074 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
4075 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
4076 uint32_t fSupported = 0;
4077 uint16_t idApic;
4078 int iCpuSet;
4079
4080 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
4081
4082 /*
4083 * Check that the CPU ID and CPU set index are interchangable.
4084 */
4085 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
4086 if ((RTCPUID)iCpuSet == idCpu)
4087 {
4088 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
4089 if ( iCpuSet >= 0
4090 && iCpuSet < RTCPUSET_MAX_CPUS
4091 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
4092 {
4093 /*
4094 * Check whether the IDTR.LIMIT contains a CPU number.
4095 */
4096#ifdef RT_ARCH_X86
4097 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
4098#else
4099 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
4100#endif
4101 RTIDTR Idtr;
4102 ASMGetIDTR(&Idtr);
4103 if (Idtr.cbIdt >= cbIdt)
4104 {
4105 uint32_t uTmp = Idtr.cbIdt - cbIdt;
4106 uTmp &= RTCPUSET_MAX_CPUS - 1;
4107 if (uTmp == idCpu)
4108 {
4109 RTIDTR Idtr2;
4110 ASMGetIDTR(&Idtr2);
4111 if (Idtr2.cbIdt == Idtr.cbIdt)
4112 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
4113 }
4114 }
4115
4116 /*
4117 * Check whether RDTSCP is an option.
4118 */
4119 if (ASMHasCpuId())
4120 {
4121 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
4122 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
4123 {
4124 uint32_t uAux;
4125 ASMReadTscWithAux(&uAux);
4126 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
4127 {
4128 ASMNopPause();
4129 ASMReadTscWithAux(&uAux);
4130 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
4131 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
4132 }
4133 }
4134 }
4135 }
4136 }
4137
4138 /*
4139 * Check that the APIC ID is unique.
4140 */
4141 idApic = ASMGetApicId();
4142 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
4143 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
4144 fSupported |= SUPGIPGETCPU_APIC_ID;
4145 else
4146 {
4147 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
4148 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4149 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
4150 idCpu, iCpuSet, idApic));
4151 }
4152
4153 /*
4154 * Check that the iCpuSet is within the expected range.
4155 */
4156 if (RT_UNLIKELY( iCpuSet < 0
4157 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
4158 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
4159 {
4160 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4161 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
4162 idCpu, iCpuSet, idApic));
4163 }
4164 else
4165 {
4166 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
4167 if (RT_UNLIKELY(idCpu2 != idCpu))
4168 {
4169 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4170 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
4171 idCpu, iCpuSet, idApic, idCpu2));
4172 }
4173 }
4174
4175 /*
4176 * Update the supported feature mask before we return.
4177 */
4178 ASMAtomicAndU32(&pState->fSupported, fSupported);
4179
4180 NOREF(pvUser2);
4181}
4182
4183
4184/**
4185 * Increase the timer freqency on hosts where this is possible (NT).
4186 *
4187 * The idea is that more interrupts is better for us... Also, it's better than
4188 * we increase the timer frequence, because we might end up getting inaccurate
4189 * callbacks if someone else does it.
4190 *
4191 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
4192 */
4193static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
4194{
4195 if (pDevExt->u32SystemTimerGranularityGrant == 0)
4196 {
4197 uint32_t u32SystemResolution;
4198 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4199 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4200 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4201 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4202 )
4203 {
4204 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4205 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4206 }
4207 }
4208}
4209
4210
4211/**
4212 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
4213 *
4214 * @param pDevExt Clears u32SystemTimerGranularityGrant.
4215 */
4216static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
4217{
4218 if (pDevExt->u32SystemTimerGranularityGrant)
4219 {
4220 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4221 AssertRC(rc2);
4222 pDevExt->u32SystemTimerGranularityGrant = 0;
4223 }
4224}
4225
4226
4227/**
4228 * Maps the GIP into userspace and/or get the physical address of the GIP.
4229 *
4230 * @returns IPRT status code.
4231 * @param pSession Session to which the GIP mapping should belong.
4232 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
4233 * @param pHCPhysGip Where to store the physical address. (optional)
4234 *
4235 * @remark There is no reference counting on the mapping, so one call to this function
4236 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
4237 * and remove the session as a GIP user.
4238 */
4239SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
4240{
4241 int rc;
4242 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4243 RTR3PTR pGipR3 = NIL_RTR3PTR;
4244 RTHCPHYS HCPhys = NIL_RTHCPHYS;
4245 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
4246
4247 /*
4248 * Validate
4249 */
4250 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4251 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4252 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4253
4254#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4255 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4256#else
4257 RTSemFastMutexRequest(pDevExt->mtxGip);
4258#endif
4259 if (pDevExt->pGip)
4260 {
4261 /*
4262 * Map it?
4263 */
4264 rc = VINF_SUCCESS;
4265 if (ppGipR3)
4266 {
4267 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4268 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4269 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4270 if (RT_SUCCESS(rc))
4271 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4272 }
4273
4274 /*
4275 * Get physical address.
4276 */
4277 if (pHCPhysGip && RT_SUCCESS(rc))
4278 HCPhys = pDevExt->HCPhysGip;
4279
4280 /*
4281 * Reference globally.
4282 */
4283 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4284 {
4285 pSession->fGipReferenced = 1;
4286 pDevExt->cGipUsers++;
4287 if (pDevExt->cGipUsers == 1)
4288 {
4289 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4290 uint64_t u64NanoTS;
4291
4292 /*
4293 * GIP starts/resumes updating again. On windows we bump the
4294 * host timer frequency to make sure we don't get stuck in guest
4295 * mode and to get better timer (and possibly clock) accuracy.
4296 */
4297 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4298
4299 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4300
4301 /*
4302 * document me
4303 */
4304 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4305 {
4306 unsigned i;
4307 for (i = 0; i < pGipR0->cCpus; i++)
4308 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4309 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4310 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4311 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4312 }
4313
4314 /*
4315 * document me
4316 */
4317 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4318 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4319 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4320 || RTMpGetOnlineCount() == 1)
4321 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4322 else
4323 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4324
4325 /*
4326 * Detect alternative ways to figure the CPU ID in ring-3 and
4327 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
4328 * and CPU set indexes while we're at it.
4329 */
4330 if (RT_SUCCESS(rc))
4331 {
4332 SUPDRVGIPDETECTGETCPU DetectState;
4333 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
4334 DetectState.fSupported = UINT32_MAX;
4335 DetectState.idCpuProblem = NIL_RTCPUID;
4336 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
4337 if (DetectState.idCpuProblem == NIL_RTCPUID)
4338 {
4339 if ( DetectState.fSupported != UINT32_MAX
4340 && DetectState.fSupported != 0)
4341 {
4342 if (pGipR0->fGetGipCpu != DetectState.fSupported)
4343 {
4344 pGipR0->fGetGipCpu = DetectState.fSupported;
4345 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
4346 }
4347 }
4348 else
4349 {
4350 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
4351 DetectState.fSupported));
4352 rc = VERR_UNSUPPORTED_CPU;
4353 }
4354 }
4355 else
4356 {
4357 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
4358 DetectState.idCpuProblem, DetectState.idCpuProblem));
4359 rc = VERR_INVALID_CPU_ID;
4360 }
4361 }
4362
4363 /*
4364 * Start the GIP timer if all is well..
4365 */
4366 if (RT_SUCCESS(rc))
4367 {
4368#ifndef DO_NOT_START_GIP
4369 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4370#endif
4371 rc = VINF_SUCCESS;
4372 }
4373
4374 /*
4375 * Bail out on error.
4376 */
4377 if (RT_FAILURE(rc))
4378 {
4379 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
4380 pDevExt->cGipUsers = 0;
4381 pSession->fGipReferenced = 0;
4382 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4383 {
4384 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
4385 if (RT_SUCCESS(rc2))
4386 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4387 }
4388 HCPhys = NIL_RTHCPHYS;
4389 pGipR3 = NIL_RTR3PTR;
4390 }
4391 }
4392 }
4393 }
4394 else
4395 {
4396 rc = VERR_GENERAL_FAILURE;
4397 Log(("SUPR0GipMap: GIP is not available!\n"));
4398 }
4399#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4400 RTSemMutexRelease(pDevExt->mtxGip);
4401#else
4402 RTSemFastMutexRelease(pDevExt->mtxGip);
4403#endif
4404
4405 /*
4406 * Write returns.
4407 */
4408 if (pHCPhysGip)
4409 *pHCPhysGip = HCPhys;
4410 if (ppGipR3)
4411 *ppGipR3 = pGipR3;
4412
4413#ifdef DEBUG_DARWIN_GIP
4414 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4415#else
4416 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4417#endif
4418 return rc;
4419}
4420
4421
4422/**
4423 * Unmaps any user mapping of the GIP and terminates all GIP access
4424 * from this session.
4425 *
4426 * @returns IPRT status code.
4427 * @param pSession Session to which the GIP mapping should belong.
4428 */
4429SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4430{
4431 int rc = VINF_SUCCESS;
4432 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4433#ifdef DEBUG_DARWIN_GIP
4434 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4435 pSession,
4436 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4437 pSession->GipMapObjR3));
4438#else
4439 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4440#endif
4441 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4442
4443#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4444 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4445#else
4446 RTSemFastMutexRequest(pDevExt->mtxGip);
4447#endif
4448
4449 /*
4450 * Unmap anything?
4451 */
4452 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4453 {
4454 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4455 AssertRC(rc);
4456 if (RT_SUCCESS(rc))
4457 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4458 }
4459
4460 /*
4461 * Dereference global GIP.
4462 */
4463 if (pSession->fGipReferenced && !rc)
4464 {
4465 pSession->fGipReferenced = 0;
4466 if ( pDevExt->cGipUsers > 0
4467 && !--pDevExt->cGipUsers)
4468 {
4469 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4470#ifndef DO_NOT_START_GIP
4471 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4472#endif
4473 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4474 }
4475 }
4476
4477#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4478 RTSemMutexRelease(pDevExt->mtxGip);
4479#else
4480 RTSemFastMutexRelease(pDevExt->mtxGip);
4481#endif
4482
4483 return rc;
4484}
4485
4486
4487/**
4488 * Gets the GIP pointer.
4489 *
4490 * @returns Pointer to the GIP or NULL.
4491 */
4492SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4493{
4494 return g_pSUPGlobalInfoPage;
4495}
4496
4497
4498/**
4499 * Register a component factory with the support driver.
4500 *
4501 * This is currently restricted to kernel sessions only.
4502 *
4503 * @returns VBox status code.
4504 * @retval VINF_SUCCESS on success.
4505 * @retval VERR_NO_MEMORY if we're out of memory.
4506 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4507 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4508 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4509 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4510 *
4511 * @param pSession The SUPDRV session (must be a ring-0 session).
4512 * @param pFactory Pointer to the component factory registration structure.
4513 *
4514 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4515 */
4516SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4517{
4518 PSUPDRVFACTORYREG pNewReg;
4519 const char *psz;
4520 int rc;
4521
4522 /*
4523 * Validate parameters.
4524 */
4525 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4526 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4527 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4528 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4529 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4530 AssertReturn(psz, VERR_INVALID_PARAMETER);
4531
4532 /*
4533 * Allocate and initialize a new registration structure.
4534 */
4535 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4536 if (pNewReg)
4537 {
4538 pNewReg->pNext = NULL;
4539 pNewReg->pFactory = pFactory;
4540 pNewReg->pSession = pSession;
4541 pNewReg->cchName = psz - &pFactory->szName[0];
4542
4543 /*
4544 * Add it to the tail of the list after checking for prior registration.
4545 */
4546 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4547 if (RT_SUCCESS(rc))
4548 {
4549 PSUPDRVFACTORYREG pPrev = NULL;
4550 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4551 while (pCur && pCur->pFactory != pFactory)
4552 {
4553 pPrev = pCur;
4554 pCur = pCur->pNext;
4555 }
4556 if (!pCur)
4557 {
4558 if (pPrev)
4559 pPrev->pNext = pNewReg;
4560 else
4561 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4562 rc = VINF_SUCCESS;
4563 }
4564 else
4565 rc = VERR_ALREADY_EXISTS;
4566
4567 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4568 }
4569
4570 if (RT_FAILURE(rc))
4571 RTMemFree(pNewReg);
4572 }
4573 else
4574 rc = VERR_NO_MEMORY;
4575 return rc;
4576}
4577
4578
4579/**
4580 * Deregister a component factory.
4581 *
4582 * @returns VBox status code.
4583 * @retval VINF_SUCCESS on success.
4584 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4585 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4586 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4587 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4588 *
4589 * @param pSession The SUPDRV session (must be a ring-0 session).
4590 * @param pFactory Pointer to the component factory registration structure
4591 * previously passed SUPR0ComponentRegisterFactory().
4592 *
4593 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4594 */
4595SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4596{
4597 int rc;
4598
4599 /*
4600 * Validate parameters.
4601 */
4602 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4603 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4604 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4605
4606 /*
4607 * Take the lock and look for the registration record.
4608 */
4609 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4610 if (RT_SUCCESS(rc))
4611 {
4612 PSUPDRVFACTORYREG pPrev = NULL;
4613 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4614 while (pCur && pCur->pFactory != pFactory)
4615 {
4616 pPrev = pCur;
4617 pCur = pCur->pNext;
4618 }
4619 if (pCur)
4620 {
4621 if (!pPrev)
4622 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4623 else
4624 pPrev->pNext = pCur->pNext;
4625
4626 pCur->pNext = NULL;
4627 pCur->pFactory = NULL;
4628 pCur->pSession = NULL;
4629 rc = VINF_SUCCESS;
4630 }
4631 else
4632 rc = VERR_NOT_FOUND;
4633
4634 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4635
4636 RTMemFree(pCur);
4637 }
4638 return rc;
4639}
4640
4641
4642/**
4643 * Queries a component factory.
4644 *
4645 * @returns VBox status code.
4646 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4647 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4648 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4649 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4650 *
4651 * @param pSession The SUPDRV session.
4652 * @param pszName The name of the component factory.
4653 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4654 * @param ppvFactoryIf Where to store the factory interface.
4655 */
4656SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4657{
4658 const char *pszEnd;
4659 size_t cchName;
4660 int rc;
4661
4662 /*
4663 * Validate parameters.
4664 */
4665 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4666
4667 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4668 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4669 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4670 cchName = pszEnd - pszName;
4671
4672 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4673 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4674 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4675
4676 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4677 *ppvFactoryIf = NULL;
4678
4679 /*
4680 * Take the lock and try all factories by this name.
4681 */
4682 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4683 if (RT_SUCCESS(rc))
4684 {
4685 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4686 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4687 while (pCur)
4688 {
4689 if ( pCur->cchName == cchName
4690 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4691 {
4692 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4693 if (pvFactory)
4694 {
4695 *ppvFactoryIf = pvFactory;
4696 rc = VINF_SUCCESS;
4697 break;
4698 }
4699 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4700 }
4701
4702 /* next */
4703 pCur = pCur->pNext;
4704 }
4705
4706 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4707 }
4708 return rc;
4709}
4710
4711
4712/**
4713 * Adds a memory object to the session.
4714 *
4715 * @returns IPRT status code.
4716 * @param pMem Memory tracking structure containing the
4717 * information to track.
4718 * @param pSession The session.
4719 */
4720static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4721{
4722 PSUPDRVBUNDLE pBundle;
4723
4724 /*
4725 * Find free entry and record the allocation.
4726 */
4727 RTSpinlockAcquire(pSession->Spinlock);
4728 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4729 {
4730 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4731 {
4732 unsigned i;
4733 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4734 {
4735 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4736 {
4737 pBundle->cUsed++;
4738 pBundle->aMem[i] = *pMem;
4739 RTSpinlockRelease(pSession->Spinlock);
4740 return VINF_SUCCESS;
4741 }
4742 }
4743 AssertFailed(); /* !!this can't be happening!!! */
4744 }
4745 }
4746 RTSpinlockRelease(pSession->Spinlock);
4747
4748 /*
4749 * Need to allocate a new bundle.
4750 * Insert into the last entry in the bundle.
4751 */
4752 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4753 if (!pBundle)
4754 return VERR_NO_MEMORY;
4755
4756 /* take last entry. */
4757 pBundle->cUsed++;
4758 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4759
4760 /* insert into list. */
4761 RTSpinlockAcquire(pSession->Spinlock);
4762 pBundle->pNext = pSession->Bundle.pNext;
4763 pSession->Bundle.pNext = pBundle;
4764 RTSpinlockRelease(pSession->Spinlock);
4765
4766 return VINF_SUCCESS;
4767}
4768
4769
4770/**
4771 * Releases a memory object referenced by pointer and type.
4772 *
4773 * @returns IPRT status code.
4774 * @param pSession Session data.
4775 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4776 * @param eType Memory type.
4777 */
4778static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4779{
4780 PSUPDRVBUNDLE pBundle;
4781
4782 /*
4783 * Validate input.
4784 */
4785 if (!uPtr)
4786 {
4787 Log(("Illegal address %p\n", (void *)uPtr));
4788 return VERR_INVALID_PARAMETER;
4789 }
4790
4791 /*
4792 * Search for the address.
4793 */
4794 RTSpinlockAcquire(pSession->Spinlock);
4795 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4796 {
4797 if (pBundle->cUsed > 0)
4798 {
4799 unsigned i;
4800 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4801 {
4802 if ( pBundle->aMem[i].eType == eType
4803 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4804 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4805 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4806 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4807 )
4808 {
4809 /* Make a copy of it and release it outside the spinlock. */
4810 SUPDRVMEMREF Mem = pBundle->aMem[i];
4811 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4812 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4813 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4814 RTSpinlockRelease(pSession->Spinlock);
4815
4816 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4817 {
4818 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4819 AssertRC(rc); /** @todo figure out how to handle this. */
4820 }
4821 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4822 {
4823 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4824 AssertRC(rc); /** @todo figure out how to handle this. */
4825 }
4826 return VINF_SUCCESS;
4827 }
4828 }
4829 }
4830 }
4831 RTSpinlockRelease(pSession->Spinlock);
4832 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4833 return VERR_INVALID_PARAMETER;
4834}
4835
4836
4837/**
4838 * Opens an image. If it's the first time it's opened the call must upload
4839 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4840 *
4841 * This is the 1st step of the loading.
4842 *
4843 * @returns IPRT status code.
4844 * @param pDevExt Device globals.
4845 * @param pSession Session data.
4846 * @param pReq The open request.
4847 */
4848static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4849{
4850 int rc;
4851 PSUPDRVLDRIMAGE pImage;
4852 void *pv;
4853 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4854 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4855
4856 /*
4857 * Check if we got an instance of the image already.
4858 */
4859 supdrvLdrLock(pDevExt);
4860 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4861 {
4862 if ( pImage->szName[cchName] == '\0'
4863 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4864 {
4865 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4866 {
4867 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4868 pImage->cUsage++;
4869 pReq->u.Out.pvImageBase = pImage->pvImage;
4870 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4871 pReq->u.Out.fNativeLoader = pImage->fNative;
4872 supdrvLdrAddUsage(pSession, pImage);
4873 supdrvLdrUnlock(pDevExt);
4874 return VINF_SUCCESS;
4875 }
4876 supdrvLdrUnlock(pDevExt);
4877 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4878 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4879 }
4880 }
4881 /* (not found - add it!) */
4882
4883 /* If the loader interface is locked down, make userland fail early */
4884 if (pDevExt->fLdrLockedDown)
4885 {
4886 supdrvLdrUnlock(pDevExt);
4887 Log(("supdrvIOCtl_LdrOpen: Not adding '%s' to image list, loader interface is locked down!\n", pReq->u.In.szName));
4888 return VERR_PERMISSION_DENIED;
4889 }
4890
4891 /*
4892 * Allocate memory.
4893 */
4894 Assert(cchName < sizeof(pImage->szName));
4895 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4896 if (!pv)
4897 {
4898 supdrvLdrUnlock(pDevExt);
4899 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4900 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4901 }
4902
4903 /*
4904 * Setup and link in the LDR stuff.
4905 */
4906 pImage = (PSUPDRVLDRIMAGE)pv;
4907 pImage->pvImage = NULL;
4908 pImage->pvImageAlloc = NULL;
4909 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4910 pImage->cbImageBits = pReq->u.In.cbImageBits;
4911 pImage->cSymbols = 0;
4912 pImage->paSymbols = NULL;
4913 pImage->pachStrTab = NULL;
4914 pImage->cbStrTab = 0;
4915 pImage->pfnModuleInit = NULL;
4916 pImage->pfnModuleTerm = NULL;
4917 pImage->pfnServiceReqHandler = NULL;
4918 pImage->uState = SUP_IOCTL_LDR_OPEN;
4919 pImage->cUsage = 1;
4920 pImage->pDevExt = pDevExt;
4921 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4922
4923 /*
4924 * Try load it using the native loader, if that isn't supported, fall back
4925 * on the older method.
4926 */
4927 pImage->fNative = true;
4928 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4929 if (rc == VERR_NOT_SUPPORTED)
4930 {
4931 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4932 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4933 pImage->fNative = false;
4934 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4935 }
4936 if (RT_FAILURE(rc))
4937 {
4938 supdrvLdrUnlock(pDevExt);
4939 RTMemFree(pImage);
4940 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4941 return rc;
4942 }
4943 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4944
4945 /*
4946 * Link it.
4947 */
4948 pImage->pNext = pDevExt->pLdrImages;
4949 pDevExt->pLdrImages = pImage;
4950
4951 supdrvLdrAddUsage(pSession, pImage);
4952
4953 pReq->u.Out.pvImageBase = pImage->pvImage;
4954 pReq->u.Out.fNeedsLoading = true;
4955 pReq->u.Out.fNativeLoader = pImage->fNative;
4956 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4957
4958 supdrvLdrUnlock(pDevExt);
4959 return VINF_SUCCESS;
4960}
4961
4962
4963/**
4964 * Worker that validates a pointer to an image entrypoint.
4965 *
4966 * @returns IPRT status code.
4967 * @param pDevExt The device globals.
4968 * @param pImage The loader image.
4969 * @param pv The pointer into the image.
4970 * @param fMayBeNull Whether it may be NULL.
4971 * @param pszWhat What is this entrypoint? (for logging)
4972 * @param pbImageBits The image bits prepared by ring-3.
4973 *
4974 * @remarks Will leave the lock on failure.
4975 */
4976static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4977 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4978{
4979 if (!fMayBeNull || pv)
4980 {
4981 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4982 {
4983 supdrvLdrUnlock(pDevExt);
4984 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4985 return VERR_INVALID_PARAMETER;
4986 }
4987
4988 if (pImage->fNative)
4989 {
4990 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4991 if (RT_FAILURE(rc))
4992 {
4993 supdrvLdrUnlock(pDevExt);
4994 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4995 return rc;
4996 }
4997 }
4998 }
4999 return VINF_SUCCESS;
5000}
5001
5002
5003/**
5004 * Loads the image bits.
5005 *
5006 * This is the 2nd step of the loading.
5007 *
5008 * @returns IPRT status code.
5009 * @param pDevExt Device globals.
5010 * @param pSession Session data.
5011 * @param pReq The request.
5012 */
5013static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
5014{
5015 PSUPDRVLDRUSAGE pUsage;
5016 PSUPDRVLDRIMAGE pImage;
5017 int rc;
5018 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
5019
5020 /*
5021 * Find the ldr image.
5022 */
5023 supdrvLdrLock(pDevExt);
5024 pUsage = pSession->pLdrUsage;
5025 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5026 pUsage = pUsage->pNext;
5027 if (!pUsage)
5028 {
5029 supdrvLdrUnlock(pDevExt);
5030 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
5031 return VERR_INVALID_HANDLE;
5032 }
5033 pImage = pUsage->pImage;
5034
5035 /*
5036 * Validate input.
5037 */
5038 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
5039 || pImage->cbImageBits != pReq->u.In.cbImageBits)
5040 {
5041 supdrvLdrUnlock(pDevExt);
5042 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
5043 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
5044 return VERR_INVALID_HANDLE;
5045 }
5046
5047 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
5048 {
5049 unsigned uState = pImage->uState;
5050 supdrvLdrUnlock(pDevExt);
5051 if (uState != SUP_IOCTL_LDR_LOAD)
5052 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
5053 return VERR_ALREADY_LOADED;
5054 }
5055
5056 /* If the loader interface is locked down, don't load new images */
5057 if (pDevExt->fLdrLockedDown)
5058 {
5059 supdrvLdrUnlock(pDevExt);
5060 Log(("SUP_IOCTL_LDR_LOAD: Not loading '%s' image bits, loader interface is locked down!\n", pImage->szName));
5061 return VERR_PERMISSION_DENIED;
5062 }
5063
5064 switch (pReq->u.In.eEPType)
5065 {
5066 case SUPLDRLOADEP_NOTHING:
5067 break;
5068
5069 case SUPLDRLOADEP_VMMR0:
5070 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
5071 if (RT_SUCCESS(rc))
5072 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
5073 if (RT_SUCCESS(rc))
5074 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
5075 if (RT_SUCCESS(rc))
5076 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
5077 if (RT_FAILURE(rc))
5078 return rc;
5079 break;
5080
5081 case SUPLDRLOADEP_SERVICE:
5082 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
5083 if (RT_FAILURE(rc))
5084 return rc;
5085 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
5086 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
5087 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
5088 {
5089 supdrvLdrUnlock(pDevExt);
5090 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
5091 pImage->pvImage, pReq->u.In.cbImageWithTabs,
5092 pReq->u.In.EP.Service.apvReserved[0],
5093 pReq->u.In.EP.Service.apvReserved[1],
5094 pReq->u.In.EP.Service.apvReserved[2]));
5095 return VERR_INVALID_PARAMETER;
5096 }
5097 break;
5098
5099 default:
5100 supdrvLdrUnlock(pDevExt);
5101 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
5102 return VERR_INVALID_PARAMETER;
5103 }
5104
5105 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
5106 if (RT_FAILURE(rc))
5107 return rc;
5108 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
5109 if (RT_FAILURE(rc))
5110 return rc;
5111
5112 /*
5113 * Allocate and copy the tables.
5114 * (No need to do try/except as this is a buffered request.)
5115 */
5116 pImage->cbStrTab = pReq->u.In.cbStrTab;
5117 if (pImage->cbStrTab)
5118 {
5119 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
5120 if (pImage->pachStrTab)
5121 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
5122 else
5123 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
5124 }
5125
5126 pImage->cSymbols = pReq->u.In.cSymbols;
5127 if (RT_SUCCESS(rc) && pImage->cSymbols)
5128 {
5129 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
5130 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
5131 if (pImage->paSymbols)
5132 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
5133 else
5134 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
5135 }
5136
5137 /*
5138 * Copy the bits / complete native loading.
5139 */
5140 if (RT_SUCCESS(rc))
5141 {
5142 pImage->uState = SUP_IOCTL_LDR_LOAD;
5143 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
5144 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
5145
5146 if (pImage->fNative)
5147 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
5148 else
5149 {
5150 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
5151 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
5152 }
5153 }
5154
5155 /*
5156 * Update any entry points.
5157 */
5158 if (RT_SUCCESS(rc))
5159 {
5160 switch (pReq->u.In.eEPType)
5161 {
5162 default:
5163 case SUPLDRLOADEP_NOTHING:
5164 rc = VINF_SUCCESS;
5165 break;
5166 case SUPLDRLOADEP_VMMR0:
5167 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
5168 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
5169 break;
5170 case SUPLDRLOADEP_SERVICE:
5171 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
5172 rc = VINF_SUCCESS;
5173 break;
5174 }
5175 }
5176
5177 /*
5178 * On success call the module initialization.
5179 */
5180 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
5181 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
5182 {
5183 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
5184 pDevExt->pLdrInitImage = pImage;
5185 pDevExt->hLdrInitThread = RTThreadNativeSelf();
5186 rc = pImage->pfnModuleInit(pImage);
5187 pDevExt->pLdrInitImage = NULL;
5188 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
5189 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
5190 supdrvLdrUnsetVMMR0EPs(pDevExt);
5191 }
5192 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
5193
5194 if (RT_FAILURE(rc))
5195 {
5196 /* Inform the tracing component in case ModuleInit registered TPs. */
5197 supdrvTracerModuleUnloading(pDevExt, pImage);
5198
5199 pImage->uState = SUP_IOCTL_LDR_OPEN;
5200 pImage->pfnModuleInit = NULL;
5201 pImage->pfnModuleTerm = NULL;
5202 pImage->pfnServiceReqHandler= NULL;
5203 pImage->cbStrTab = 0;
5204 RTMemFree(pImage->pachStrTab);
5205 pImage->pachStrTab = NULL;
5206 RTMemFree(pImage->paSymbols);
5207 pImage->paSymbols = NULL;
5208 pImage->cSymbols = 0;
5209 }
5210
5211 supdrvLdrUnlock(pDevExt);
5212 return rc;
5213}
5214
5215
5216/**
5217 * Frees a previously loaded (prep'ed) image.
5218 *
5219 * @returns IPRT status code.
5220 * @param pDevExt Device globals.
5221 * @param pSession Session data.
5222 * @param pReq The request.
5223 */
5224static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
5225{
5226 int rc;
5227 PSUPDRVLDRUSAGE pUsagePrev;
5228 PSUPDRVLDRUSAGE pUsage;
5229 PSUPDRVLDRIMAGE pImage;
5230 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
5231
5232 /*
5233 * Find the ldr image.
5234 */
5235 supdrvLdrLock(pDevExt);
5236 pUsagePrev = NULL;
5237 pUsage = pSession->pLdrUsage;
5238 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5239 {
5240 pUsagePrev = pUsage;
5241 pUsage = pUsage->pNext;
5242 }
5243 if (!pUsage)
5244 {
5245 supdrvLdrUnlock(pDevExt);
5246 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
5247 return VERR_INVALID_HANDLE;
5248 }
5249
5250 /*
5251 * Check if we can remove anything.
5252 */
5253 rc = VINF_SUCCESS;
5254 pImage = pUsage->pImage;
5255 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
5256 {
5257 /*
5258 * Check if there are any objects with destructors in the image, if
5259 * so leave it for the session cleanup routine so we get a chance to
5260 * clean things up in the right order and not leave them all dangling.
5261 */
5262 RTSpinlockAcquire(pDevExt->Spinlock);
5263 if (pImage->cUsage <= 1)
5264 {
5265 PSUPDRVOBJ pObj;
5266 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5267 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5268 {
5269 rc = VERR_DANGLING_OBJECTS;
5270 break;
5271 }
5272 }
5273 else
5274 {
5275 PSUPDRVUSAGE pGenUsage;
5276 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
5277 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5278 {
5279 rc = VERR_DANGLING_OBJECTS;
5280 break;
5281 }
5282 }
5283 RTSpinlockRelease(pDevExt->Spinlock);
5284 if (rc == VINF_SUCCESS)
5285 {
5286 /* unlink it */
5287 if (pUsagePrev)
5288 pUsagePrev->pNext = pUsage->pNext;
5289 else
5290 pSession->pLdrUsage = pUsage->pNext;
5291
5292 /* free it */
5293 pUsage->pImage = NULL;
5294 pUsage->pNext = NULL;
5295 RTMemFree(pUsage);
5296
5297 /*
5298 * Dereference the image.
5299 */
5300 if (pImage->cUsage <= 1)
5301 supdrvLdrFree(pDevExt, pImage);
5302 else
5303 pImage->cUsage--;
5304 }
5305 else
5306 {
5307 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
5308 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
5309 }
5310 }
5311 else
5312 {
5313 /*
5314 * Dereference both image and usage.
5315 */
5316 pImage->cUsage--;
5317 pUsage->cUsage--;
5318 }
5319
5320 supdrvLdrUnlock(pDevExt);
5321 return rc;
5322}
5323
5324
5325/**
5326 * Lock down the image loader interface.
5327 *
5328 * @returns IPRT status code.
5329 * @param pDevExt Device globals.
5330 */
5331static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt)
5332{
5333 LogFlow(("supdrvIOCtl_LdrLockDown:\n"));
5334
5335 supdrvLdrLock(pDevExt);
5336 if (!pDevExt->fLdrLockedDown)
5337 {
5338 pDevExt->fLdrLockedDown = true;
5339 Log(("supdrvIOCtl_LdrLockDown: Image loader interface locked down\n"));
5340 }
5341 supdrvLdrUnlock(pDevExt);
5342
5343 return VINF_SUCCESS;
5344}
5345
5346
5347/**
5348 * Gets the address of a symbol in an open image.
5349 *
5350 * @returns IPRT status code.
5351 * @param pDevExt Device globals.
5352 * @param pSession Session data.
5353 * @param pReq The request buffer.
5354 */
5355static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
5356{
5357 PSUPDRVLDRIMAGE pImage;
5358 PSUPDRVLDRUSAGE pUsage;
5359 uint32_t i;
5360 PSUPLDRSYM paSyms;
5361 const char *pchStrings;
5362 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5363 void *pvSymbol = NULL;
5364 int rc = VERR_GENERAL_FAILURE;
5365 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5366
5367 /*
5368 * Find the ldr image.
5369 */
5370 supdrvLdrLock(pDevExt);
5371 pUsage = pSession->pLdrUsage;
5372 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5373 pUsage = pUsage->pNext;
5374 if (!pUsage)
5375 {
5376 supdrvLdrUnlock(pDevExt);
5377 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5378 return VERR_INVALID_HANDLE;
5379 }
5380 pImage = pUsage->pImage;
5381 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5382 {
5383 unsigned uState = pImage->uState;
5384 supdrvLdrUnlock(pDevExt);
5385 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5386 return VERR_ALREADY_LOADED;
5387 }
5388
5389 /*
5390 * Search the symbol strings.
5391 *
5392 * Note! The int32_t is for native loading on solaris where the data
5393 * and text segments are in very different places.
5394 */
5395 pchStrings = pImage->pachStrTab;
5396 paSyms = pImage->paSymbols;
5397 for (i = 0; i < pImage->cSymbols; i++)
5398 {
5399 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5400 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5401 {
5402 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5403 rc = VINF_SUCCESS;
5404 break;
5405 }
5406 }
5407 supdrvLdrUnlock(pDevExt);
5408 pReq->u.Out.pvSymbol = pvSymbol;
5409 return rc;
5410}
5411
5412
5413/**
5414 * Gets the address of a symbol in an open image or the support driver.
5415 *
5416 * @returns VINF_SUCCESS on success.
5417 * @returns
5418 * @param pDevExt Device globals.
5419 * @param pSession Session data.
5420 * @param pReq The request buffer.
5421 */
5422static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5423{
5424 int rc = VINF_SUCCESS;
5425 const char *pszSymbol = pReq->u.In.pszSymbol;
5426 const char *pszModule = pReq->u.In.pszModule;
5427 size_t cbSymbol;
5428 char const *pszEnd;
5429 uint32_t i;
5430
5431 /*
5432 * Input validation.
5433 */
5434 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5435 pszEnd = RTStrEnd(pszSymbol, 512);
5436 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5437 cbSymbol = pszEnd - pszSymbol + 1;
5438
5439 if (pszModule)
5440 {
5441 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5442 pszEnd = RTStrEnd(pszModule, 64);
5443 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5444 }
5445 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5446
5447
5448 if ( !pszModule
5449 || !strcmp(pszModule, "SupDrv"))
5450 {
5451 /*
5452 * Search the support driver export table.
5453 */
5454 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5455 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5456 {
5457 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5458 break;
5459 }
5460 }
5461 else
5462 {
5463 /*
5464 * Find the loader image.
5465 */
5466 PSUPDRVLDRIMAGE pImage;
5467
5468 supdrvLdrLock(pDevExt);
5469
5470 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5471 if (!strcmp(pImage->szName, pszModule))
5472 break;
5473 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5474 {
5475 /*
5476 * Search the symbol strings.
5477 */
5478 const char *pchStrings = pImage->pachStrTab;
5479 PCSUPLDRSYM paSyms = pImage->paSymbols;
5480 for (i = 0; i < pImage->cSymbols; i++)
5481 {
5482 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5483 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5484 {
5485 /*
5486 * Found it! Calc the symbol address and add a reference to the module.
5487 */
5488 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5489 rc = supdrvLdrAddUsage(pSession, pImage);
5490 break;
5491 }
5492 }
5493 }
5494 else
5495 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5496
5497 supdrvLdrUnlock(pDevExt);
5498 }
5499 return rc;
5500}
5501
5502
5503/**
5504 * Updates the VMMR0 entry point pointers.
5505 *
5506 * @returns IPRT status code.
5507 * @param pDevExt Device globals.
5508 * @param pSession Session data.
5509 * @param pVMMR0 VMMR0 image handle.
5510 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5511 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5512 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5513 * @remark Caller must own the loader mutex.
5514 */
5515static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5516{
5517 int rc = VINF_SUCCESS;
5518 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5519
5520
5521 /*
5522 * Check if not yet set.
5523 */
5524 if (!pDevExt->pvVMMR0)
5525 {
5526 pDevExt->pvVMMR0 = pvVMMR0;
5527 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5528 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5529 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5530 }
5531 else
5532 {
5533 /*
5534 * Return failure or success depending on whether the values match or not.
5535 */
5536 if ( pDevExt->pvVMMR0 != pvVMMR0
5537 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5538 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5539 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5540 {
5541 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5542 rc = VERR_INVALID_PARAMETER;
5543 }
5544 }
5545 return rc;
5546}
5547
5548
5549/**
5550 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5551 *
5552 * @param pDevExt Device globals.
5553 */
5554static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5555{
5556 pDevExt->pvVMMR0 = NULL;
5557 pDevExt->pfnVMMR0EntryInt = NULL;
5558 pDevExt->pfnVMMR0EntryFast = NULL;
5559 pDevExt->pfnVMMR0EntryEx = NULL;
5560}
5561
5562
5563/**
5564 * Adds a usage reference in the specified session of an image.
5565 *
5566 * Called while owning the loader semaphore.
5567 *
5568 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5569 * @param pSession Session in question.
5570 * @param pImage Image which the session is using.
5571 */
5572static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5573{
5574 PSUPDRVLDRUSAGE pUsage;
5575 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5576
5577 /*
5578 * Referenced it already?
5579 */
5580 pUsage = pSession->pLdrUsage;
5581 while (pUsage)
5582 {
5583 if (pUsage->pImage == pImage)
5584 {
5585 pUsage->cUsage++;
5586 return VINF_SUCCESS;
5587 }
5588 pUsage = pUsage->pNext;
5589 }
5590
5591 /*
5592 * Allocate new usage record.
5593 */
5594 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5595 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5596 pUsage->cUsage = 1;
5597 pUsage->pImage = pImage;
5598 pUsage->pNext = pSession->pLdrUsage;
5599 pSession->pLdrUsage = pUsage;
5600 return VINF_SUCCESS;
5601}
5602
5603
5604/**
5605 * Frees a load image.
5606 *
5607 * @param pDevExt Pointer to device extension.
5608 * @param pImage Pointer to the image we're gonna free.
5609 * This image must exit!
5610 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5611 */
5612static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5613{
5614 PSUPDRVLDRIMAGE pImagePrev;
5615 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5616
5617 /*
5618 * Warn if we're releasing images while the image loader interface is
5619 * locked down -- we won't be able to reload them!
5620 */
5621 if (pDevExt->fLdrLockedDown)
5622 Log(("supdrvLdrFree: Warning: unloading '%s' image, while loader interface is locked down!\n", pImage->szName));
5623
5624 /* find it - arg. should've used doubly linked list. */
5625 Assert(pDevExt->pLdrImages);
5626 pImagePrev = NULL;
5627 if (pDevExt->pLdrImages != pImage)
5628 {
5629 pImagePrev = pDevExt->pLdrImages;
5630 while (pImagePrev->pNext != pImage)
5631 pImagePrev = pImagePrev->pNext;
5632 Assert(pImagePrev->pNext == pImage);
5633 }
5634
5635 /* unlink */
5636 if (pImagePrev)
5637 pImagePrev->pNext = pImage->pNext;
5638 else
5639 pDevExt->pLdrImages = pImage->pNext;
5640
5641 /* check if this is VMMR0.r0 unset its entry point pointers. */
5642 if (pDevExt->pvVMMR0 == pImage->pvImage)
5643 supdrvLdrUnsetVMMR0EPs(pDevExt);
5644
5645 /* check for objects with destructors in this image. (Shouldn't happen.) */
5646 if (pDevExt->pObjs)
5647 {
5648 unsigned cObjs = 0;
5649 PSUPDRVOBJ pObj;
5650 RTSpinlockAcquire(pDevExt->Spinlock);
5651 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5652 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5653 {
5654 pObj->pfnDestructor = NULL;
5655 cObjs++;
5656 }
5657 RTSpinlockRelease(pDevExt->Spinlock);
5658 if (cObjs)
5659 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5660 }
5661
5662 /* call termination function if fully loaded. */
5663 if ( pImage->pfnModuleTerm
5664 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5665 {
5666 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5667 pImage->pfnModuleTerm(pImage);
5668 }
5669
5670 /* Inform the tracing component. */
5671 supdrvTracerModuleUnloading(pDevExt, pImage);
5672
5673 /* do native unload if appropriate. */
5674 if (pImage->fNative)
5675 supdrvOSLdrUnload(pDevExt, pImage);
5676
5677 /* free the image */
5678 pImage->cUsage = 0;
5679 pImage->pDevExt = NULL;
5680 pImage->pNext = NULL;
5681 pImage->uState = SUP_IOCTL_LDR_FREE;
5682 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5683 pImage->pvImageAlloc = NULL;
5684 RTMemFree(pImage->pachStrTab);
5685 pImage->pachStrTab = NULL;
5686 RTMemFree(pImage->paSymbols);
5687 pImage->paSymbols = NULL;
5688 RTMemFree(pImage);
5689}
5690
5691
5692/**
5693 * Acquires the loader lock.
5694 *
5695 * @returns IPRT status code.
5696 * @param pDevExt The device extension.
5697 */
5698DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5699{
5700#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5701 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5702#else
5703 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5704#endif
5705 AssertRC(rc);
5706 return rc;
5707}
5708
5709
5710/**
5711 * Releases the loader lock.
5712 *
5713 * @returns IPRT status code.
5714 * @param pDevExt The device extension.
5715 */
5716DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5717{
5718#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5719 return RTSemMutexRelease(pDevExt->mtxLdr);
5720#else
5721 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5722#endif
5723}
5724
5725
5726/**
5727 * Implements the service call request.
5728 *
5729 * @returns VBox status code.
5730 * @param pDevExt The device extension.
5731 * @param pSession The calling session.
5732 * @param pReq The request packet, valid.
5733 */
5734static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5735{
5736#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5737 int rc;
5738
5739 /*
5740 * Find the module first in the module referenced by the calling session.
5741 */
5742 rc = supdrvLdrLock(pDevExt);
5743 if (RT_SUCCESS(rc))
5744 {
5745 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5746 PSUPDRVLDRUSAGE pUsage;
5747
5748 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5749 if ( pUsage->pImage->pfnServiceReqHandler
5750 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5751 {
5752 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5753 break;
5754 }
5755 supdrvLdrUnlock(pDevExt);
5756
5757 if (pfnServiceReqHandler)
5758 {
5759 /*
5760 * Call it.
5761 */
5762 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5763 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5764 else
5765 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5766 }
5767 else
5768 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5769 }
5770
5771 /* log it */
5772 if ( RT_FAILURE(rc)
5773 && rc != VERR_INTERRUPTED
5774 && rc != VERR_TIMEOUT)
5775 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5776 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5777 else
5778 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5779 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5780 return rc;
5781#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5782 return VERR_NOT_IMPLEMENTED;
5783#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5784}
5785
5786
5787/**
5788 * Implements the logger settings request.
5789 *
5790 * @returns VBox status code.
5791 * @param pDevExt The device extension.
5792 * @param pSession The caller's session.
5793 * @param pReq The request.
5794 */
5795static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5796{
5797 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5798 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5799 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5800 PRTLOGGER pLogger = NULL;
5801 int rc;
5802
5803 /*
5804 * Some further validation.
5805 */
5806 switch (pReq->u.In.fWhat)
5807 {
5808 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5809 case SUPLOGGERSETTINGS_WHAT_CREATE:
5810 break;
5811
5812 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5813 if (*pszGroup || *pszFlags || *pszDest)
5814 return VERR_INVALID_PARAMETER;
5815 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5816 return VERR_ACCESS_DENIED;
5817 break;
5818
5819 default:
5820 return VERR_INTERNAL_ERROR;
5821 }
5822
5823 /*
5824 * Get the logger.
5825 */
5826 switch (pReq->u.In.fWhich)
5827 {
5828 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5829 pLogger = RTLogGetDefaultInstance();
5830 break;
5831
5832 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5833 pLogger = RTLogRelDefaultInstance();
5834 break;
5835
5836 default:
5837 return VERR_INTERNAL_ERROR;
5838 }
5839
5840 /*
5841 * Do the job.
5842 */
5843 switch (pReq->u.In.fWhat)
5844 {
5845 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5846 if (pLogger)
5847 {
5848 rc = RTLogFlags(pLogger, pszFlags);
5849 if (RT_SUCCESS(rc))
5850 rc = RTLogGroupSettings(pLogger, pszGroup);
5851 NOREF(pszDest);
5852 }
5853 else
5854 rc = VERR_NOT_FOUND;
5855 break;
5856
5857 case SUPLOGGERSETTINGS_WHAT_CREATE:
5858 {
5859 if (pLogger)
5860 rc = VERR_ALREADY_EXISTS;
5861 else
5862 {
5863 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5864
5865 rc = RTLogCreate(&pLogger,
5866 0 /* fFlags */,
5867 pszGroup,
5868 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5869 ? "VBOX_LOG"
5870 : "VBOX_RELEASE_LOG",
5871 RT_ELEMENTS(s_apszGroups),
5872 s_apszGroups,
5873 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5874 NULL);
5875 if (RT_SUCCESS(rc))
5876 {
5877 rc = RTLogFlags(pLogger, pszFlags);
5878 NOREF(pszDest);
5879 if (RT_SUCCESS(rc))
5880 {
5881 switch (pReq->u.In.fWhich)
5882 {
5883 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5884 pLogger = RTLogSetDefaultInstance(pLogger);
5885 break;
5886 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5887 pLogger = RTLogRelSetDefaultInstance(pLogger);
5888 break;
5889 }
5890 }
5891 RTLogDestroy(pLogger);
5892 }
5893 }
5894 break;
5895 }
5896
5897 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5898 switch (pReq->u.In.fWhich)
5899 {
5900 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5901 pLogger = RTLogSetDefaultInstance(NULL);
5902 break;
5903 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5904 pLogger = RTLogRelSetDefaultInstance(NULL);
5905 break;
5906 }
5907 rc = RTLogDestroy(pLogger);
5908 break;
5909
5910 default:
5911 {
5912 rc = VERR_INTERNAL_ERROR;
5913 break;
5914 }
5915 }
5916
5917 return rc;
5918}
5919
5920
5921/**
5922 * Implements the MSR prober operations.
5923 *
5924 * @returns VBox status code.
5925 * @param pDevExt The device extension.
5926 * @param pReq The request.
5927 */
5928static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5929{
5930#ifdef SUPDRV_WITH_MSR_PROBER
5931 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5932 int rc;
5933
5934 switch (pReq->u.In.enmOp)
5935 {
5936 case SUPMSRPROBEROP_READ:
5937 {
5938 uint64_t uValue;
5939 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5940 if (RT_SUCCESS(rc))
5941 {
5942 pReq->u.Out.uResults.Read.uValue = uValue;
5943 pReq->u.Out.uResults.Read.fGp = false;
5944 }
5945 else if (rc == VERR_ACCESS_DENIED)
5946 {
5947 pReq->u.Out.uResults.Read.uValue = 0;
5948 pReq->u.Out.uResults.Read.fGp = true;
5949 rc = VINF_SUCCESS;
5950 }
5951 break;
5952 }
5953
5954 case SUPMSRPROBEROP_WRITE:
5955 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5956 if (RT_SUCCESS(rc))
5957 pReq->u.Out.uResults.Write.fGp = false;
5958 else if (rc == VERR_ACCESS_DENIED)
5959 {
5960 pReq->u.Out.uResults.Write.fGp = true;
5961 rc = VINF_SUCCESS;
5962 }
5963 break;
5964
5965 case SUPMSRPROBEROP_MODIFY:
5966 case SUPMSRPROBEROP_MODIFY_FASTER:
5967 rc = supdrvOSMsrProberModify(idCpu, pReq);
5968 break;
5969
5970 default:
5971 return VERR_INVALID_FUNCTION;
5972 }
5973 return rc;
5974#else
5975 return VERR_NOT_IMPLEMENTED;
5976#endif
5977}
5978
5979#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5980
5981/**
5982 * Switches the TSC-delta measurement thread into the butchered state.
5983 *
5984 * @returns VBox status code.
5985 * @param pDevExt Pointer to the device instance data.
5986 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5987 * @param pszFailed An error message to log.
5988 * @param rcFailed The error code to exit the thread with.
5989 */
5990static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5991{
5992 if (!fSpinlockHeld)
5993 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5994
5995 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
5996 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5997 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5998 return rcFailed;
5999}
6000
6001
6002/**
6003 * The TSC-delta measurement thread.
6004 *
6005 * @returns VBox status code.
6006 * @param hThread The thread handle.
6007 * @param pvUser Opaque pointer to the device instance data.
6008 */
6009static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
6010{
6011 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6012 bool fInitialMeasurement = true;
6013 uint32_t cConsecutiveTimeouts = 0;
6014 int rc = VERR_INTERNAL_ERROR_2;
6015 for (;;)
6016 {
6017 /*
6018 * Switch on the current state.
6019 */
6020 SUPDRVTSCDELTATHREADSTATE enmState;
6021 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6022 enmState = pDevExt->enmTscDeltaThreadState;
6023 switch (enmState)
6024 {
6025 case kTscDeltaThreadState_Creating:
6026 {
6027 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
6028 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
6029 if (RT_FAILURE(rc))
6030 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
6031 /* fall thru */
6032 }
6033
6034 case kTscDeltaThreadState_Listening:
6035 {
6036 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6037
6038 /* Simple adaptive timeout. */
6039 if (cConsecutiveTimeouts++ == 10)
6040 {
6041 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
6042 pDevExt->cMsTscDeltaTimeout = 10;
6043 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
6044 pDevExt->cMsTscDeltaTimeout = 100;
6045 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
6046 pDevExt->cMsTscDeltaTimeout = 500;
6047 cConsecutiveTimeouts = 0;
6048 }
6049 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
6050 if ( RT_FAILURE(rc)
6051 && rc != VERR_TIMEOUT)
6052 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
6053 RTThreadUserReset(pDevExt->hTscDeltaThread);
6054 break;
6055 }
6056
6057 case kTscDeltaThreadState_WaitAndMeasure:
6058 {
6059 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
6060 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
6061 if (RT_FAILURE(rc))
6062 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
6063 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6064 pDevExt->cMsTscDeltaTimeout = 1;
6065 RTThreadSleep(10);
6066 /* fall thru */
6067 }
6068
6069 case kTscDeltaThreadState_Measuring:
6070 {
6071 cConsecutiveTimeouts = 0;
6072 if (fInitialMeasurement)
6073 {
6074 int cTries = 8;
6075 int cMsWaitPerTry = 10;
6076 fInitialMeasurement = false;
6077 do
6078 {
6079 rc = supdrvMeasureInitialTscDeltas(pDevExt);
6080 if ( RT_SUCCESS(rc)
6081 || ( RT_FAILURE(rc)
6082 && rc != VERR_TRY_AGAIN
6083 && rc != VERR_CPU_OFFLINE))
6084 {
6085 break;
6086 }
6087 RTThreadSleep(cMsWaitPerTry);
6088 } while (cTries-- > 0);
6089 }
6090 else
6091 {
6092 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6093 unsigned iCpu;
6094
6095 /* Measure TSC-deltas only for the CPUs that are in the set. */
6096 rc = VINF_SUCCESS;
6097 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6098 {
6099 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
6100 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
6101 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
6102 {
6103 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
6104 }
6105 }
6106 }
6107 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6108 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
6109 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
6110 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6111 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */
6112 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
6113 break;
6114 }
6115
6116 case kTscDeltaThreadState_Terminating:
6117 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
6118 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6119 return VINF_SUCCESS;
6120
6121 case kTscDeltaThreadState_Butchered:
6122 default:
6123 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
6124 }
6125 }
6126
6127 return rc;
6128}
6129
6130
6131/**
6132 * Waits for the TSC-delta measurement thread to respond to a state change.
6133 *
6134 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
6135 * other error code on internal error.
6136 *
6137 * @param pThis Pointer to the grant service instance data.
6138 * @param enmCurState The current state.
6139 * @param enmNewState The new state we're waiting for it to enter.
6140 */
6141static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
6142 SUPDRVTSCDELTATHREADSTATE enmNewState)
6143{
6144 /*
6145 * Wait a short while for the expected state transition.
6146 */
6147 int rc;
6148 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
6149 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6150 if (pDevExt->enmTscDeltaThreadState == enmNewState)
6151 {
6152 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6153 rc = VINF_SUCCESS;
6154 }
6155 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
6156 {
6157 /*
6158 * Wait longer if the state has not yet transitioned to the one we want.
6159 */
6160 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6161 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
6162 if ( RT_SUCCESS(rc)
6163 || rc == VERR_TIMEOUT)
6164 {
6165 /*
6166 * Check the state whether we've succeeded.
6167 */
6168 SUPDRVTSCDELTATHREADSTATE enmState;
6169 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6170 enmState = pDevExt->enmTscDeltaThreadState;
6171 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6172 if (enmState == enmNewState)
6173 rc = VINF_SUCCESS;
6174 else if (enmState == enmCurState)
6175 {
6176 rc = VERR_TIMEOUT;
6177 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
6178 enmNewState));
6179 }
6180 else
6181 {
6182 rc = VERR_INTERNAL_ERROR;
6183 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
6184 enmState, enmNewState));
6185 }
6186 }
6187 else
6188 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
6189 }
6190 else
6191 {
6192 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6193 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
6194 rc = VERR_INTERNAL_ERROR;
6195 }
6196
6197 return rc;
6198}
6199
6200
6201/**
6202 * Terminates the TSC-delta measurement thread.
6203 *
6204 * @param pDevExt Pointer to the device instance data.
6205 */
6206static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
6207{
6208 int rc;
6209 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6210 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
6211 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6212 RTThreadUserSignal(pDevExt->hTscDeltaThread);
6213 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
6214 if (RT_FAILURE(rc))
6215 {
6216 /* Signal a few more times before giving up. */
6217 int cTriesLeft = 5;
6218 while (--cTriesLeft > 0)
6219 {
6220 RTThreadUserSignal(pDevExt->hTscDeltaThread);
6221 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
6222 if (rc != VERR_TIMEOUT)
6223 break;
6224 }
6225 }
6226}
6227
6228
6229/**
6230 * Initializes and spawns the TSC-delta measurement thread.
6231 *
6232 * A thread is required for servicing re-measurement requests from events like
6233 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
6234 * under all contexts on all OSs.
6235 *
6236 * @returns VBox status code.
6237 * @param pDevExt Pointer to the device instance data.
6238 *
6239 * @remarks Must only be called -after- initializing GIP and setting up MP
6240 * notifications!
6241 */
6242static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
6243{
6244 int rc;
6245 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
6246 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
6247 if (RT_SUCCESS(rc))
6248 {
6249 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
6250 if (RT_SUCCESS(rc))
6251 {
6252 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
6253 pDevExt->cMsTscDeltaTimeout = 1;
6254 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
6255 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
6256 if (RT_SUCCESS(rc))
6257 {
6258 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
6259 if (RT_SUCCESS(rc))
6260 {
6261 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
6262 return rc;
6263 }
6264
6265 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
6266 supdrvTscDeltaThreadTerminate(pDevExt);
6267 }
6268 else
6269 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
6270 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6271 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6272 }
6273 else
6274 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
6275 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6276 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6277 }
6278 else
6279 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
6280
6281 return rc;
6282}
6283
6284
6285/**
6286 * Terminates the TSC-delta measurement thread and cleanup.
6287 *
6288 * @param pDevExt Pointer to the device instance data.
6289 */
6290static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
6291{
6292 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
6293 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6294 {
6295 supdrvTscDeltaThreadTerminate(pDevExt);
6296 }
6297
6298 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
6299 {
6300 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6301 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6302 }
6303
6304 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6305 {
6306 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6307 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6308 }
6309
6310 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
6311}
6312
6313
6314/**
6315 * Waits for TSC-delta measurements to be completed for all online CPUs.
6316 *
6317 * @returns VBox status code.
6318 * @param pDevExt Pointer to the device instance data.
6319 */
6320static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
6321{
6322 int cTriesLeft = 5;
6323 int cMsTotalWait;
6324 int cMsWaited = 0;
6325 int cMsWaitGranularity = 1;
6326
6327 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6328 AssertReturn(pGip, VERR_INVALID_POINTER);
6329
6330 if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
6331 return VERR_THREAD_NOT_WAITABLE;
6332
6333 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200);
6334 while (cTriesLeft-- > 0)
6335 {
6336 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
6337 return VINF_SUCCESS;
6338 RTThreadSleep(cMsWaitGranularity);
6339 cMsWaited += cMsWaitGranularity;
6340 if (cMsWaited >= cMsTotalWait)
6341 break;
6342 }
6343
6344 return VERR_TIMEOUT;
6345}
6346
6347#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
6348
6349/**
6350 * Applies the TSC delta to the supplied raw TSC value.
6351 *
6352 * @returns VBox status code. (Ignored by all users, just FYI.)
6353 * @param pGip Pointer to the GIP.
6354 * @param puTsc Pointer to a valid TSC value before the TSC delta has been applied.
6355 * @param idApic The APIC ID of the CPU @c puTsc corresponds to.
6356 * @param fDeltaApplied Where to store whether the TSC delta was succesfully
6357 * applied or not (optional, can be NULL).
6358 *
6359 * @remarks Maybe called with interrupts disabled in ring-0!
6360 *
6361 * @note Don't you dare change the delta calculation. If you really do, make
6362 * sure you update all places where it's used (IPRT, SUPLibAll.cpp,
6363 * SUPDrv.c, supdrvGipMpEvent, and more).
6364 */
6365DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
6366{
6367 int rc;
6368
6369 /*
6370 * Validate input.
6371 */
6372 AssertPtr(puTsc);
6373 AssertPtr(pGip);
6374 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
6375
6376 /*
6377 * Carefully convert the idApic into a GIPCPU entry.
6378 */
6379 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
6380 {
6381 uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
6382 if (RT_LIKELY(iCpu < pGip->cCpus))
6383 {
6384 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6385
6386 /*
6387 * Apply the delta if valid.
6388 */
6389 if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
6390 {
6391 *puTsc -= pGipCpu->i64TSCDelta;
6392 if (pfDeltaApplied)
6393 *pfDeltaApplied = true;
6394 return VINF_SUCCESS;
6395 }
6396
6397 rc = VINF_SUCCESS;
6398 }
6399 else
6400 {
6401 AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
6402 rc = VERR_INVALID_CPU_INDEX;
6403 }
6404 }
6405 else
6406 {
6407 AssertMsgFailed(("idApic=%u\n", idApic));
6408 rc = VERR_INVALID_CPU_ID;
6409 }
6410 if (pfDeltaApplied)
6411 *pfDeltaApplied = false;
6412 return rc;
6413}
6414
6415
6416/**
6417 * Measures the TSC frequency of the system.
6418 *
6419 * Uses a busy-wait method for the async. case as it is intended to help push
6420 * the CPU frequency up, while for the invariant cases using a sleeping method.
6421 *
6422 * The TSC frequency can vary on systems which are not reported as invariant.
6423 * On such systems the object of this function is to find out what the nominal,
6424 * maximum TSC frequency under 'normal' CPU operation.
6425 *
6426 * @returns VBox status code.
6427 * @param pDevExt Pointer to the device instance.
6428 *
6429 * @remarks Must be called only -after- measuring the TSC deltas.
6430 */
6431static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6432{
6433 int cTriesLeft = 4;
6434 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6435
6436 /* Assert order. */
6437 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6438 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6439
6440 while (cTriesLeft-- > 0)
6441 {
6442 RTCCUINTREG uFlags;
6443 uint64_t u64NanoTsBefore;
6444 uint64_t u64NanoTsAfter;
6445 uint64_t u64TscBefore;
6446 uint64_t u64TscAfter;
6447 uint8_t idApicBefore;
6448 uint8_t idApicAfter;
6449
6450 /*
6451 * Synchronize with the host OS clock tick before reading the TSC.
6452 * Especially important on older Windows version where the granularity is terrible.
6453 */
6454 u64NanoTsBefore = RTTimeSystemNanoTS();
6455 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6456 ASMNopPause();
6457
6458 uFlags = ASMIntDisableFlags();
6459 idApicBefore = ASMGetApicId();
6460 u64TscBefore = ASMReadTSC();
6461 u64NanoTsBefore = RTTimeSystemNanoTS();
6462 ASMSetFlags(uFlags);
6463
6464 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6465 {
6466 /*
6467 * Sleep-wait since the TSC frequency is constant, it eases host load.
6468 * Shorter interval produces more variance in the frequency (esp. Windows).
6469 */
6470 RTThreadSleep(200);
6471 u64NanoTsAfter = RTTimeSystemNanoTS();
6472 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6473 ASMNopPause();
6474 u64NanoTsAfter = RTTimeSystemNanoTS();
6475 }
6476 else
6477 {
6478 /* Busy-wait keeping the frequency up and measure. */
6479 for (;;)
6480 {
6481 u64NanoTsAfter = RTTimeSystemNanoTS();
6482 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6483 ASMNopPause();
6484 else
6485 break;
6486 }
6487 }
6488
6489 uFlags = ASMIntDisableFlags();
6490 idApicAfter = ASMGetApicId();
6491 u64TscAfter = ASMReadTSC();
6492 ASMSetFlags(uFlags);
6493
6494 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6495 {
6496 int rc;
6497 bool fAppliedBefore;
6498 bool fAppliedAfter;
6499 rc = supdrvTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6500 rc = supdrvTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6501
6502 if ( !fAppliedBefore
6503 || !fAppliedAfter)
6504 {
6505#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6506 /*
6507 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6508 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6509 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6510 * proceed. This should be triggered just once if we're rather unlucky.
6511 */
6512 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6513 if (rc == VERR_TIMEOUT)
6514 {
6515 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6516 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6517 }
6518#else
6519 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6520 idApicBefore, idApicAfter, cTriesLeft);
6521#endif
6522 continue;
6523 }
6524 }
6525
6526 /*
6527 * Update GIP.
6528 */
6529 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6530 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6531 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6532 return VINF_SUCCESS;
6533 }
6534
6535 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6536}
6537
6538
6539/**
6540 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6541 *
6542 * @param pTimer The timer.
6543 * @param pvUser Opaque pointer to the device instance data.
6544 * @param iTick The timer tick.
6545 */
6546static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6547{
6548 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6549 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6550 bool fDeltaApplied = false;
6551 uint8_t idApic;
6552 uint64_t u64DeltaNanoTS;
6553 uint64_t u64DeltaTsc;
6554 uint64_t u64NanoTS;
6555 uint64_t u64Tsc;
6556 RTCCUINTREG uFlags;
6557
6558 /* Paranoia. */
6559 Assert(pGip);
6560 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6561
6562#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6563 u64NanoTS = RTTimeSystemNanoTS();
6564 while (RTTimeSystemNanoTS() == u64NanoTS)
6565 ASMNopPause();
6566#endif
6567 uFlags = ASMIntDisableFlags();
6568 idApic = ASMGetApicId();
6569 u64Tsc = ASMReadTSC();
6570 u64NanoTS = RTTimeSystemNanoTS();
6571 ASMSetFlags(uFlags);
6572 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6573 supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6574 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6575 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6576
6577 if (RT_UNLIKELY( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
6578 && !fDeltaApplied))
6579 {
6580 Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6581 GIP_TSC_REFINE_INTERVAL));
6582 return;
6583 }
6584
6585 /* Calculate the TSC frequency. */
6586 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6587 && u64DeltaNanoTS < UINT32_MAX)
6588 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6589 else
6590 {
6591 RTUINT128U CpuHz, Tmp, Divisor;
6592 CpuHz.s.Lo = CpuHz.s.Hi = 0;
6593 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
6594 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
6595 pGip->u64CpuHz = CpuHz.s.Lo;
6596 }
6597
6598 /* Update rest of GIP. */
6599 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6600 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6601}
6602
6603
6604/**
6605 * Starts the TSC-frequency refinement phase asynchronously.
6606 *
6607 * @param pDevExt Pointer to the device instance data.
6608 */
6609static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6610{
6611 uint64_t u64NanoTS;
6612 RTCCUINTREG uFlags;
6613 uint8_t idApic;
6614 int rc;
6615 PSUPGLOBALINFOPAGE pGip;
6616
6617 /* Validate. */
6618 Assert(pDevExt);
6619 Assert(pDevExt->pGip);
6620 pGip = pDevExt->pGip;
6621
6622#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6623 /*
6624 * If the TSC-delta thread is created, wait until it's done calculating
6625 * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
6626 */
6627 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
6628 && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
6629 {
6630 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6631 if (rc == VERR_TIMEOUT)
6632 {
6633 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6634 return;
6635 }
6636 }
6637#endif
6638
6639 /*
6640 * Record the TSC and NanoTS as the starting anchor point for refinement of the
6641 * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
6642 * reading of the TSC and the NanoTS as close as possible.
6643 */
6644 u64NanoTS = RTTimeSystemNanoTS();
6645 while (RTTimeSystemNanoTS() == u64NanoTS)
6646 ASMNopPause();
6647 uFlags = ASMIntDisableFlags();
6648 idApic = ASMGetApicId();
6649 pDevExt->u64TscAnchor = ASMReadTSC();
6650 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6651 ASMSetFlags(uFlags);
6652 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6653 supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
6654
6655 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6656 if (RT_SUCCESS(rc))
6657 {
6658 /*
6659 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6660 * interval as small as possible while gaining the most consistent and accurate frequency
6661 * (compared to what the host OS might have measured).
6662 *
6663 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6664 * same TSC frequency whenever possible so we need to keep the interval short.
6665 */
6666 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6667 AssertRC(rc);
6668 }
6669 else
6670 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6671}
6672
6673
6674/**
6675 * Creates the GIP.
6676 *
6677 * @returns VBox status code.
6678 * @param pDevExt Instance data. GIP stuff may be updated.
6679 */
6680static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6681{
6682 PSUPGLOBALINFOPAGE pGip;
6683 RTHCPHYS HCPhysGip;
6684 uint32_t u32SystemResolution;
6685 uint32_t u32Interval;
6686 uint32_t u32MinInterval;
6687 uint32_t uMod;
6688 unsigned cCpus;
6689 int rc;
6690
6691 LogFlow(("supdrvGipCreate:\n"));
6692
6693 /* Assert order. */
6694 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6695 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6696 Assert(!pDevExt->pGipTimer);
6697
6698 /*
6699 * Check the CPU count.
6700 */
6701 cCpus = RTMpGetArraySize();
6702 if ( cCpus > RTCPUSET_MAX_CPUS
6703 || cCpus > 256 /* ApicId is used for the mappings */)
6704 {
6705 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6706 return VERR_TOO_MANY_CPUS;
6707 }
6708
6709 /*
6710 * Allocate a contiguous set of pages with a default kernel mapping.
6711 */
6712 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6713 if (RT_FAILURE(rc))
6714 {
6715 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6716 return rc;
6717 }
6718 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6719 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6720
6721 /*
6722 * Allocate the TSC-delta sync struct on a separate cache line.
6723 */
6724 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6725 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6726 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6727
6728 /*
6729 * Find a reasonable update interval and initialize the structure.
6730 */
6731 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6732 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6733 * See @bugref{6710}. */
6734 u32MinInterval = RT_NS_10MS;
6735 u32SystemResolution = RTTimerGetSystemGranularity();
6736 u32Interval = u32MinInterval;
6737 uMod = u32MinInterval % u32SystemResolution;
6738 if (uMod)
6739 u32Interval += u32SystemResolution - uMod;
6740
6741 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6742
6743 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
6744 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6745 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6746 {
6747 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6748 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6749 return VERR_INTERNAL_ERROR_2;
6750 }
6751
6752 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
6753 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
6754#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6755 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
6756 {
6757 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6758 rc = supdrvTscDeltaThreadInit(pDevExt);
6759 }
6760#endif
6761 if (RT_SUCCESS(rc))
6762 {
6763 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6764 if (RT_SUCCESS(rc))
6765 {
6766 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6767 if (RT_SUCCESS(rc))
6768 {
6769#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6770 uint16_t iCpu;
6771 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
6772 {
6773 /*
6774 * Measure the TSC deltas now that we have MP notifications.
6775 */
6776 int cTries = 5;
6777 do
6778 {
6779 rc = supdrvMeasureInitialTscDeltas(pDevExt);
6780 if ( rc != VERR_TRY_AGAIN
6781 && rc != VERR_CPU_OFFLINE)
6782 break;
6783 } while (--cTries > 0);
6784 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6785 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6786 }
6787 else
6788 {
6789 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6790 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
6791 }
6792#endif
6793 if (RT_SUCCESS(rc))
6794 {
6795 rc = supdrvGipMeasureTscFreq(pDevExt);
6796 if (RT_SUCCESS(rc))
6797 {
6798 /*
6799 * Create the timer.
6800 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6801 */
6802 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6803 {
6804 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6805 pDevExt);
6806 if (rc == VERR_NOT_SUPPORTED)
6807 {
6808 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6809 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6810 }
6811 }
6812 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6813 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
6814 supdrvGipSyncAndInvariantTimer, pDevExt);
6815 if (RT_SUCCESS(rc))
6816 {
6817 /*
6818 * We're good.
6819 */
6820 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6821 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6822
6823 g_pSUPGlobalInfoPage = pGip;
6824 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6825 supdrvRefineTscFreq(pDevExt);
6826 return VINF_SUCCESS;
6827 }
6828
6829 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6830 Assert(!pDevExt->pGipTimer);
6831 }
6832 else
6833 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6834 }
6835 else
6836 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
6837 }
6838 else
6839 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6840 }
6841 else
6842 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6843 }
6844 else
6845 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6846
6847 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6848 return rc;
6849}
6850
6851
6852/**
6853 * Terminates the GIP.
6854 *
6855 * @param pDevExt Instance data. GIP stuff may be updated.
6856 */
6857static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6858{
6859 int rc;
6860#ifdef DEBUG_DARWIN_GIP
6861 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6862 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6863 pDevExt->pGipTimer, pDevExt->GipMemObj));
6864#endif
6865
6866 /*
6867 * Stop receiving MP notifications before tearing anything else down.
6868 */
6869 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6870
6871#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6872 /*
6873 * Terminate the TSC-delta measurement thread and resources.
6874 */
6875 supdrvTscDeltaTerm(pDevExt);
6876#endif
6877
6878 /*
6879 * Destroy the TSC-refinement one-shot timer.
6880 */
6881 if (pDevExt->pTscRefineTimer)
6882 {
6883 RTTimerDestroy(pDevExt->pTscRefineTimer);
6884 pDevExt->pTscRefineTimer = NULL;
6885 }
6886
6887 if (pDevExt->pvTscDeltaSync)
6888 {
6889 RTMemFree(pDevExt->pvTscDeltaSync);
6890 pDevExt->pTscDeltaSync = NULL;
6891 pDevExt->pvTscDeltaSync = NULL;
6892 }
6893
6894 /*
6895 * Invalid the GIP data.
6896 */
6897 if (pDevExt->pGip)
6898 {
6899 supdrvGipTerm(pDevExt->pGip);
6900 pDevExt->pGip = NULL;
6901 }
6902 g_pSUPGlobalInfoPage = NULL;
6903
6904 /*
6905 * Destroy the timer and free the GIP memory object.
6906 */
6907 if (pDevExt->pGipTimer)
6908 {
6909 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6910 pDevExt->pGipTimer = NULL;
6911 }
6912
6913 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6914 {
6915 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6916 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6917 }
6918
6919 /*
6920 * Finally, make sure we've release the system timer resolution request
6921 * if one actually succeeded and is still pending.
6922 */
6923 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6924}
6925
6926
6927/**
6928 * Timer callback function for the sync and invariant GIP modes.
6929 *
6930 * @param pTimer The timer.
6931 * @param pvUser Opaque pointer to the device extension.
6932 * @param iTick The timer tick.
6933 */
6934static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6935{
6936 RTCCUINTREG uFlags;
6937 uint64_t u64TSC;
6938 uint64_t u64NanoTS;
6939 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6940 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6941
6942 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6943 u64TSC = ASMReadTSC();
6944 u64NanoTS = RTTimeSystemNanoTS();
6945
6946 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6947 {
6948 /*
6949 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6950 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6951 * affected a bit until we get proper TSC deltas than implementing options like
6952 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6953 *
6954 * The likely hood of this happening is really low. On Windows, Linux, and Solaris
6955 * timers fire on the CPU they were registered/started on. Darwin timers doesn't
6956 * necessarily (they are high priority threads waiting).
6957 */
6958 Assert(!ASMIntAreEnabled());
6959 supdrvTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6960 }
6961
6962 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6963
6964 ASMSetFlags(uFlags);
6965
6966#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6967 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
6968 && !RTCpuSetIsEmpty(&pDevExt->TscDeltaCpuSet))
6969 {
6970 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6971 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
6972 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
6973 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
6974 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6975 /** @todo Do the actual poking using -- RTThreadUserSignal() */
6976 }
6977#endif
6978}
6979
6980
6981/**
6982 * Timer callback function for async GIP mode.
6983 * @param pTimer The timer.
6984 * @param pvUser Opaque pointer to the device extension.
6985 * @param iTick The timer tick.
6986 */
6987static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6988{
6989 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6990 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6991 RTCPUID idCpu = RTMpCpuId();
6992 uint64_t u64TSC = ASMReadTSC();
6993 uint64_t NanoTS = RTTimeSystemNanoTS();
6994
6995 /** @todo reset the transaction number and whatnot when iTick == 1. */
6996 if (pDevExt->idGipMaster == idCpu)
6997 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6998 else
6999 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
7000
7001 ASMSetFlags(fOldFlags);
7002}
7003
7004
7005/**
7006 * Finds our (@a idCpu) entry, or allocates a new one if not found.
7007 *
7008 * @returns Index of the CPU in the cache set.
7009 * @param pGip The GIP.
7010 * @param idCpu The CPU ID.
7011 */
7012static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
7013{
7014 uint32_t i, cTries;
7015
7016 /*
7017 * ASSUMES that CPU IDs are constant.
7018 */
7019 for (i = 0; i < pGip->cCpus; i++)
7020 if (pGip->aCPUs[i].idCpu == idCpu)
7021 return i;
7022
7023 cTries = 0;
7024 do
7025 {
7026 for (i = 0; i < pGip->cCpus; i++)
7027 {
7028 bool fRc;
7029 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
7030 if (fRc)
7031 return i;
7032 }
7033 } while (cTries++ < 32);
7034 AssertReleaseFailed();
7035 return i - 1;
7036}
7037
7038
7039/**
7040 * Finds the GIP CPU index corresponding to @a idCpu.
7041 *
7042 * @returns GIP CPU array index, UINT32_MAX if not found.
7043 * @param pGip The GIP.
7044 * @param idCpu The CPU ID.
7045 */
7046static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
7047{
7048 uint32_t i;
7049 for (i = 0; i < pGip->cCpus; i++)
7050 if (pGip->aCPUs[i].idCpu == idCpu)
7051 return i;
7052 return UINT32_MAX;
7053}
7054
7055
7056/**
7057 * The calling CPU should be accounted as online, update GIP accordingly.
7058 *
7059 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
7060 *
7061 * @param pDevExt The device extension.
7062 * @param idCpu The CPU ID.
7063 */
7064static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
7065{
7066 int iCpuSet = 0;
7067 uint16_t idApic = UINT16_MAX;
7068 uint32_t i = 0;
7069 uint64_t u64NanoTS = 0;
7070 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7071
7072 AssertPtrReturnVoid(pGip);
7073 AssertRelease(idCpu == RTMpCpuId());
7074 Assert(pGip->cPossibleCpus == RTMpGetCount());
7075
7076 /*
7077 * Do this behind a spinlock with interrupts disabled as this can fire
7078 * on all CPUs simultaneously, see @bugref{6110}.
7079 */
7080 RTSpinlockAcquire(pDevExt->hGipSpinlock);
7081
7082 /*
7083 * Update the globals.
7084 */
7085 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
7086 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
7087 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
7088 if (iCpuSet >= 0)
7089 {
7090 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
7091 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
7092 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
7093 }
7094
7095 /*
7096 * Update the entry.
7097 */
7098 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
7099 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
7100 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7101 idApic = ASMGetApicId();
7102 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
7103 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
7104 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
7105
7106 /*
7107 * Update the APIC ID and CPU set index mappings.
7108 */
7109 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
7110 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
7111
7112 /* Update the Mp online/offline counter. */
7113 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
7114
7115 /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
7116 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
7117 {
7118 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
7119#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7120 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7121 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
7122 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
7123 {
7124 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
7125 }
7126 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7127#endif
7128 }
7129
7130 /* commit it */
7131 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
7132
7133 RTSpinlockRelease(pDevExt->hGipSpinlock);
7134}
7135
7136
7137/**
7138 * The CPU should be accounted as offline, update the GIP accordingly.
7139 *
7140 * This is used by supdrvGipMpEvent.
7141 *
7142 * @param pDevExt The device extension.
7143 * @param idCpu The CPU ID.
7144 */
7145static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
7146{
7147 int iCpuSet;
7148 unsigned i;
7149
7150 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7151
7152 AssertPtrReturnVoid(pGip);
7153 RTSpinlockAcquire(pDevExt->hGipSpinlock);
7154
7155 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
7156 AssertReturnVoid(iCpuSet >= 0);
7157
7158 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
7159 AssertReturnVoid(i < pGip->cCpus);
7160 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
7161
7162 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
7163 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
7164
7165 /* Update the Mp online/offline counter. */
7166 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
7167
7168 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
7169 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
7170 {
7171 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7172 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
7173 }
7174
7175 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
7176 {
7177 /* Reset the TSC delta, we will recalculate it lazily. */
7178 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
7179 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
7180 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
7181 }
7182
7183 /* commit it */
7184 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
7185
7186 RTSpinlockRelease(pDevExt->hGipSpinlock);
7187}
7188
7189
7190/**
7191 * Multiprocessor event notification callback.
7192 *
7193 * This is used to make sure that the GIP master gets passed on to
7194 * another CPU. It also updates the associated CPU data.
7195 *
7196 * @param enmEvent The event.
7197 * @param idCpu The cpu it applies to.
7198 * @param pvUser Pointer to the device extension.
7199 *
7200 * @remarks This function -must- fire on the newly online'd CPU for the
7201 * RTMPEVENT_ONLINE case and can fire on any CPU for the
7202 * RTMPEVENT_OFFLINE case.
7203 */
7204static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
7205{
7206 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
7207 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7208
7209 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7210
7211 /*
7212 * Update the GIP CPU data.
7213 */
7214 if (pGip)
7215 {
7216 switch (enmEvent)
7217 {
7218 case RTMPEVENT_ONLINE:
7219 AssertRelease(idCpu == RTMpCpuId());
7220 supdrvGipMpEventOnline(pDevExt, idCpu);
7221 break;
7222 case RTMPEVENT_OFFLINE:
7223 supdrvGipMpEventOffline(pDevExt, idCpu);
7224 break;
7225 }
7226 }
7227
7228 /*
7229 * Make sure there is a master GIP.
7230 */
7231 if (enmEvent == RTMPEVENT_OFFLINE)
7232 {
7233 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
7234 if (idGipMaster == idCpu)
7235 {
7236 /*
7237 * The GIP master is going offline, find a new one.
7238 */
7239 bool fIgnored;
7240 unsigned i;
7241 RTCPUID idNewGipMaster = NIL_RTCPUID;
7242 RTCPUSET OnlineCpus;
7243 RTMpGetOnlineSet(&OnlineCpus);
7244
7245 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
7246 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
7247 {
7248 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
7249 if (idCurCpu != idGipMaster)
7250 {
7251 idNewGipMaster = idCurCpu;
7252 break;
7253 }
7254 }
7255
7256 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
7257 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
7258 NOREF(fIgnored);
7259 }
7260 }
7261}
7262
7263
7264/*
7265 * Select TSC delta measurement algorithm.
7266 */
7267#if 1
7268# define GIP_TSC_DELTA_METHOD_1
7269#else
7270# define GIP_TSC_DELTA_METHOD_2
7271#endif
7272
7273
7274#ifdef GIP_TSC_DELTA_METHOD_2
7275
7276/**
7277 * TSC delta measurment algorithm \#2 result entry.
7278 */
7279typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
7280{
7281 uint32_t iSeqMine;
7282 uint32_t iSeqOther;
7283 uint64_t uTsc;
7284} SUPDRVTSCDELTAMETHOD2ENTRY;
7285
7286/**
7287 * TSC delta measurment algorithm \#2 Data.
7288 */
7289typedef struct SUPDRVTSCDELTAMETHOD2
7290{
7291 /** Padding to make sure the iCurSeqNo is in its own cache line.
7292 * ASSUMES cacheline sizes <= 128 bytes. */
7293 uint32_t au32CacheLinePaddingBefore[128 / sizeof(uint32_t)];
7294 /** The current sequence number of this worker. */
7295 uint32_t volatile iCurSeqNo;
7296 /** Padding to make sure the iCurSeqNo is in its own cache line.
7297 * ASSUMES cacheline sizes <= 128 bytes. */
7298 uint32_t au32CacheLinePaddingAfter[128 / sizeof(uint32_t) - 1];
7299 /** Result table. */
7300 SUPDRVTSCDELTAMETHOD2ENTRY aResults[96];
7301} SUPDRVTSCDELTAMETHOD2;
7302/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
7303typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
7304
7305#endif /* GIP_TSC_DELTA_METHOD_2 */
7306
7307/**
7308 * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn
7309 * callback worker.
7310 */
7311typedef struct SUPDRVGIPTSCDELTARGS
7312{
7313 PSUPDRVDEVEXT pDevExt;
7314 PSUPGIPCPU pWorker;
7315 PSUPGIPCPU pMaster;
7316 RTCPUID idMaster;
7317#ifdef GIP_TSC_DELTA_METHOD_2
7318 PSUPDRVTSCDELTAMETHOD2 pMasterData;
7319 PSUPDRVTSCDELTAMETHOD2 pWorkerData;
7320 uint32_t cHits;
7321 /*uint32_t cOffByOne;*/
7322 uint32_t iAttempt; /**< 1-base outer loop counter. */
7323 bool fLagMaster;
7324 bool fLagWorker;
7325#endif
7326} SUPDRVGIPTSCDELTARGS;
7327typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
7328
7329
7330#ifdef GIP_TSC_DELTA_METHOD_2
7331/*
7332 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
7333 */
7334# undef GIP_TSC_DELTA_LOOPS
7335# undef GIP_TSC_DELTA_READ_TIME_LOOPS
7336# undef GIP_TSC_DELTA_PRIMER_LOOPS
7337# define GIP_TSC_DELTA_LOOPS 17
7338# define GIP_TSC_DELTA_PRIMER_LOOPS 1
7339# define GIP_TSC_DELTA_READ_TIME_LOOPS GIP_TSC_DELTA_PRIMER_LOOPS /* no read-time-loops necessary */
7340
7341
7342static int supdrvTscDeltaMethod2Init(PSUPDRVGIPTSCDELTARGS pArgs)
7343{
7344 uint32_t const fFlags = /*RTMEMALLOCEX_FLAGS_ANY_CTX |*/ RTMEMALLOCEX_FLAGS_ZEROED;
7345 int rc = RTMemAllocEx(sizeof(*pArgs->pMasterData), 0, fFlags, (void **)&pArgs->pWorkerData);
7346 if (RT_SUCCESS(rc))
7347 rc = RTMemAllocEx(sizeof(*pArgs->pMasterData), 0, fFlags, (void **)&pArgs->pMasterData);
7348 return rc;
7349}
7350
7351
7352static void supdrvTscDeltaMethod2Term(PSUPDRVGIPTSCDELTARGS pArgs)
7353{
7354 RTMemFreeEx(pArgs->pMasterData, sizeof(*pArgs->pMasterData));
7355 RTMemFreeEx(pArgs->pWorkerData, sizeof(*pArgs->pWorkerData));
7356 /*SUPR0Printf("cHits=%d cOffByOne=%d m=%d w=%d\n", pArgs->cHits, pArgs->cOffByOne, pArgs->pMaster->idApic, pArgs->pWorker->idApic);*/
7357}
7358
7359
7360static void supdrvTscDeltaMethod2Looped(PSUPDRVGIPTSCDELTARGS pArgs, RTCPUID idCpu, unsigned iLoop)
7361{
7362 if (pArgs->idMaster == idCpu)
7363 {
7364 if (iLoop < GIP_TSC_DELTA_PRIMER_LOOPS)
7365 {
7366 if (iLoop == 0)
7367 pArgs->iAttempt++;
7368
7369 /* Lag during the priming to be nice to everyone.. */
7370 pArgs->fLagMaster = true;
7371 pArgs->fLagWorker = true;
7372 }
7373 else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4)
7374 {
7375 /* 25 % of the body without lagging. */
7376 pArgs->fLagMaster = false;
7377 pArgs->fLagWorker = false;
7378 }
7379 else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4 * 2)
7380 {
7381 /* 25 % of the body with both lagging. */
7382 pArgs->fLagMaster = true;
7383 pArgs->fLagWorker = true;
7384 }
7385 else
7386 {
7387 /* 50% of the body with alternating lag. */
7388 pArgs->fLagMaster = (iLoop & 1) == 0;
7389 pArgs->fLagWorker = (iLoop & 1) == 1;
7390 }
7391 }
7392}
7393
7394
7395/**
7396 * The core function of the 2nd TSC delta mesurment algorithm.
7397 *
7398 * The idea here is that we have the two CPUs execute the exact same code
7399 * collecting a largish set of TSC samples. The code has one data dependency on
7400 * the other CPU which intention it is to synchronize the execution as well as
7401 * help cross references the two sets of TSC samples (the sequence numbers).
7402 *
7403 * The @a fLag parameter is used to modify the execution a tiny bit on one or
7404 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
7405 * it will help with making the CPUs enter lock step execution occationally.
7406 *
7407 */
7408static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
7409{
7410 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
7411 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
7412
7413 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
7414 ASMSerializeInstruction();
7415 while (cLeft-- > 0)
7416 {
7417 uint64_t uTsc;
7418 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
7419 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
7420 ASMCompilerBarrier();
7421 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
7422 uTsc = ASMReadTSC();
7423 ASMAtomicIncU32(&pMyData->iCurSeqNo);
7424 ASMCompilerBarrier();
7425 ASMSerializeInstruction();
7426 pEntry->iSeqMine = iSeqMine;
7427 pEntry->iSeqOther = iSeqOther;
7428 pEntry->uTsc = uTsc;
7429 pEntry++;
7430 ASMSerializeInstruction();
7431 if (fLag)
7432 ASMNopPause();
7433 }
7434}
7435
7436
7437static void supdrvTscDeltaMethod2ProcessDataSet(PSUPDRVGIPTSCDELTARGS pArgs, PSUPDRVTSCDELTAMETHOD2 pMyData,
7438 bool fIsMaster, uint32_t cResults,
7439 PSUPDRVTSCDELTAMETHOD2 pOtherData, int64_t iMasterTscDelta,
7440 int64_t volatile *piWorkerTscDelta)
7441{
7442 uint32_t cHits = 0;
7443#if 0
7444 uint32_t cOffByOne = 0;
7445#endif
7446 uint32_t idxResult = 0;
7447 int64_t iBestDelta = *piWorkerTscDelta;
7448
7449 if (cResults > RT_ELEMENTS(pMyData->aResults))
7450 cResults = RT_ELEMENTS(pMyData->aResults);
7451
7452 for (idxResult = 0; idxResult < cResults; idxResult++)
7453 {
7454 uint32_t idxOther = pMyData->aResults[idxResult].iSeqOther;
7455 if (idxOther & 1)
7456 {
7457 idxOther >>= 1;
7458 if (idxOther < RT_ELEMENTS(pOtherData->aResults))
7459 {
7460 if (pOtherData->aResults[idxOther].iSeqOther == pMyData->aResults[idxResult].iSeqMine)
7461 {
7462 int64_t iDelta;
7463 if (fIsMaster)
7464 iDelta = pOtherData->aResults[idxOther].uTsc
7465 - (pMyData->aResults[idxResult].uTsc - iMasterTscDelta);
7466 else
7467 iDelta = (pOtherData->aResults[idxResult].uTsc - iMasterTscDelta)
7468 - pMyData->aResults[idxOther].uTsc;
7469 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
7470 ? iDelta < iBestDelta
7471 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
7472 iBestDelta = iDelta;
7473 cHits++;
7474 }
7475 }
7476 }
7477#if 0 /* Can be used to detect battles between threads on the same core. Decided to change the master instead. */
7478 else
7479 {
7480 idxOther >>= 1;
7481 if ( idxOther < RT_ELEMENTS(pOtherData->aResults)
7482 && pOtherData->aResults[idxOther].iSeqOther == pMyData->aResults[idxResult].iSeqMine)
7483 cOffByOne++;
7484 }
7485#endif
7486 }
7487
7488 if (cHits > 0)
7489 *piWorkerTscDelta = iBestDelta;
7490 pArgs->cHits += cHits;
7491#if 0
7492 pArgs->cOffByOne += cOffByOne;
7493#endif
7494}
7495
7496
7497static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, bool fFinalLoop)
7498{
7499 supdrvTscDeltaMethod2ProcessDataSet(pArgs,
7500 pArgs->pMasterData,
7501 true /*fIsMaster*/,
7502 RT_ELEMENTS(pArgs->pMasterData->aResults),
7503 pArgs->pWorkerData,
7504 pArgs->pMaster->i64TSCDelta,
7505 &pArgs->pWorker->i64TSCDelta);
7506
7507 supdrvTscDeltaMethod2ProcessDataSet(pArgs,
7508 pArgs->pWorkerData,
7509 false /*fIsMaster*/,
7510 ASMAtomicReadU32(&pArgs->pWorkerData->iCurSeqNo) >> 1,
7511 pArgs->pMasterData,
7512 pArgs->pMaster->i64TSCDelta,
7513 &pArgs->pWorker->i64TSCDelta);
7514}
7515
7516#endif /* GIP_TSC_DELTA_METHOD_2 */
7517
7518
7519/**
7520 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
7521 * and compute the delta between them.
7522 *
7523 * @param idCpu The CPU we are current scheduled on.
7524 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
7525 * @param pvUser2 Unused.
7526 *
7527 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
7528 * read the TSC at exactly the same time on both the master and the
7529 * worker CPUs. Due to DMA, bus arbitration, cache locality,
7530 * contention, SMI, pipelining etc. there is no guaranteed way of
7531 * doing this on x86 CPUs.
7532 *
7533 * GIP_TSC_DELTA_METHOD_1:
7534 * We ignore the first few runs of the loop in order to prime the
7535 * cache. Also, we need to be careful about using 'pause' instruction
7536 * in critical busy-wait loops in this code - it can cause undesired
7537 * behaviour with hyperthreading.
7538 *
7539 * We try to minimize the measurement error by computing the minimum
7540 * read time of the compare statement in the worker by taking TSC
7541 * measurements across it.
7542 *
7543 * It must be noted that the computed minimum read time is mostly to
7544 * eliminate huge deltas when the worker is too early and doesn't by
7545 * itself help produce more accurate deltas. We allow two times the
7546 * computed minimum as an arbibtrary acceptable threshold. Therefore,
7547 * it is still possible to get negative deltas where there are none
7548 * when the worker is earlier. As long as these occasional negative
7549 * deltas are lower than the time it takes to exit guest-context and
7550 * the OS to reschedule EMT on a different CPU we won't expose a TSC
7551 * that jumped backwards. It is because of the existence of the
7552 * negative deltas we don't recompute the delta with the master and
7553 * worker interchanged to eliminate the remaining measurement error.
7554 *
7555 * @todo document working of GIP_TSC_DELTA_METHOD_2.
7556 */
7557static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7558{
7559 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)pvUser1;
7560 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
7561 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7562 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
7563 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
7564 RTCPUID idMaster = pArgs->idMaster;
7565 int cTriesLeft;
7566
7567 /* A bit of paranoia first. */
7568 if (!pGipCpuMaster || !pGipCpuWorker)
7569 return;
7570
7571 /* If the CPU isn't part of the measurement, return immediately. */
7572 if ( idCpu != idMaster
7573 && idCpu != pGipCpuWorker->idCpu)
7574 return;
7575
7576 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
7577 with a timeout to avoid deadlocking the entire system. */
7578 if (!RTMpOnAllIsConcurrentSafe())
7579 {
7580 /** @todo This was introduced for Windows, but since Windows doesn't use this
7581 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
7582 * see @bugref{6710} comment 81), eventually phase it out. */
7583 uint64_t uTscNow;
7584 uint64_t uTscStart;
7585 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
7586
7587 ASMSerializeInstruction();
7588 uTscStart = ASMReadTSC();
7589 if (idCpu == idMaster)
7590 {
7591 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
7592 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
7593 {
7594 ASMSerializeInstruction();
7595 uTscNow = ASMReadTSC();
7596 if (uTscNow - uTscStart > cWaitTicks)
7597 {
7598 /* Set the worker delta to indicate failure, not the master. */
7599 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7600 return;
7601 }
7602
7603 ASMNopPause();
7604 }
7605 }
7606 else
7607 {
7608 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
7609 {
7610 ASMSerializeInstruction();
7611 uTscNow = ASMReadTSC();
7612 if (uTscNow - uTscStart > cWaitTicks)
7613 {
7614 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7615 return;
7616 }
7617
7618 ASMNopPause();
7619 }
7620 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
7621 }
7622 }
7623
7624 /*
7625 * ...
7626 */
7627 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
7628 cTriesLeft = 12;
7629 while (cTriesLeft-- > 0)
7630 {
7631 unsigned i;
7632 uint64_t uMinCmpReadTime = UINT64_MAX;
7633 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
7634 {
7635#ifdef GIP_TSC_DELTA_METHOD_2
7636 supdrvTscDeltaMethod2Looped(pArgs, idCpu, i);
7637#endif
7638 if (idCpu == idMaster)
7639 {
7640 /*
7641 * The master.
7642 */
7643 RTCCUINTREG uFlags;
7644 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
7645 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
7646 pGipCpuMaster->u64TSCSample, idMaster, pGipCpuWorker->idCpu, pDevExt->idGipMaster));
7647 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7648
7649 /* Disable interrupts only in the master for as short a period
7650 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
7651 uFlags = ASMIntDisableFlags();
7652
7653 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
7654 { /* nothing */ }
7655
7656#ifdef GIP_TSC_DELTA_METHOD_1
7657 do
7658 {
7659 ASMSerializeInstruction();
7660 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
7661 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7662
7663#elif defined(GIP_TSC_DELTA_METHOD_2)
7664 supdrvTscDeltaMethod2CollectData(pArgs->pMasterData, &pArgs->pWorkerData->iCurSeqNo, pArgs->fLagMaster);
7665#else
7666# error "tsc delta method not selected"
7667#endif
7668
7669 /* Sync up with worker. */
7670 ASMSetFlags(uFlags);
7671
7672 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
7673 { /* nothing */ }
7674
7675 /* Process the data. */
7676 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7677 {
7678#ifdef GIP_TSC_DELTA_METHOD_1
7679 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
7680 {
7681 int64_t iDelta = pGipCpuWorker->u64TSCSample
7682 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
7683 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
7684 ? iDelta < pGipCpuWorker->i64TSCDelta
7685 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
7686 pGipCpuWorker->i64TSCDelta = iDelta;
7687 }
7688#elif defined(GIP_TSC_DELTA_METHOD_2)
7689 if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7690 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, i == GIP_TSC_DELTA_LOOPS - 1);
7691#else
7692# error "tsc delta method not selected"
7693#endif
7694 }
7695
7696 /* Reset our TSC sample and tell the worker to move on. */
7697 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
7698 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7699 }
7700 else
7701 {
7702 /*
7703 * The worker.
7704 */
7705 uint64_t uTscWorker;
7706 uint64_t uTscWorkerFlushed;
7707 uint64_t uCmpReadTime;
7708
7709 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
7710 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
7711 { /* nothing */ }
7712 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7713 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
7714
7715#ifdef GIP_TSC_DELTA_METHOD_1
7716 /*
7717 * Keep reading the TSC until we notice that the master has read his. Reading
7718 * the TSC -after- the master has updated the memory is way too late. We thus
7719 * compensate by trying to measure how long it took for the worker to notice
7720 * the memory flushed from the master.
7721 */
7722 do
7723 {
7724 ASMSerializeInstruction();
7725 uTscWorker = ASMReadTSC();
7726 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7727 ASMSerializeInstruction();
7728 uTscWorkerFlushed = ASMReadTSC();
7729
7730 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7731 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7732 {
7733 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7734 if (uCmpReadTime < (uMinCmpReadTime << 1))
7735 {
7736 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7737 if (uCmpReadTime < uMinCmpReadTime)
7738 uMinCmpReadTime = uCmpReadTime;
7739 }
7740 else
7741 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7742 }
7743 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7744 {
7745 if (uCmpReadTime < uMinCmpReadTime)
7746 uMinCmpReadTime = uCmpReadTime;
7747 }
7748
7749#elif defined(GIP_TSC_DELTA_METHOD_2)
7750 supdrvTscDeltaMethod2CollectData(pArgs->pWorkerData, &pArgs->pMasterData->iCurSeqNo, pArgs->fLagWorker);
7751#else
7752# error "tsc delta method not selected"
7753#endif
7754
7755 /* Tell master we're done collecting our data. */
7756 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7757
7758 /* Wait for the master to process the data. */
7759 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7760 ASMNopPause();
7761 }
7762 }
7763
7764 /*
7765 * We must reset the worker TSC sample value in case it gets picked as a
7766 * GIP master later on (it's trashed above, naturally).
7767 */
7768 if (idCpu == idMaster)
7769 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7770
7771 /*
7772 * Success? If so, stop trying.
7773 */
7774 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7775 {
7776 if (idCpu == idMaster)
7777 {
7778 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuMaster->iCpuSet);
7779 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuMaster->iCpuSet);
7780 }
7781 else
7782 {
7783 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
7784 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
7785 }
7786 break;
7787 }
7788 }
7789}
7790
7791
7792/**
7793 * Clears TSC delta related variables.
7794 *
7795 * Clears all TSC samples as well as the delta synchronization variable on the
7796 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7797 *
7798 * @param pDevExt Pointer to the device instance data.
7799 * @param fClearDeltas Whether the deltas are also to be cleared.
7800 */
7801DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7802{
7803 unsigned iCpu;
7804 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7805 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7806 {
7807 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7808 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7809 if (fClearDeltas)
7810 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7811 }
7812 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7813}
7814
7815
7816/**
7817 * Measures the TSC delta between the master GIP CPU and one specified worker
7818 * CPU.
7819 *
7820 * @returns VBox status code.
7821 * @param pDevExt Pointer to the device instance data.
7822 * @param idxWorker The index of the worker CPU from the GIP's array of
7823 * CPUs.
7824 *
7825 * @remarks This must be called with preemption enabled!
7826 */
7827static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7828{
7829 int rc;
7830 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7831 RTCPUID idMaster = pDevExt->idGipMaster;
7832 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
7833 PSUPGIPCPU pGipCpuMaster;
7834 uint32_t iGipCpuMaster;
7835
7836 /* Validate input a bit. */
7837 AssertReturn(pGip, VERR_INVALID_PARAMETER);
7838 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
7839 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7840
7841 /*
7842 * Don't attempt measuring the delta for the GIP master.
7843 */
7844 if (pGipCpuWorker->idCpu == idMaster)
7845 {
7846 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
7847 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
7848 return VINF_SUCCESS;
7849 }
7850
7851 /*
7852 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
7853 * try pick a different master. (This fudge only works with multi core systems.)
7854 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
7855 */
7856 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
7857 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
7858 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
7859 if ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
7860 && ASMHasCpuId()
7861 && ASMIsValidStdRange(ASMCpuId_EAX(0))
7862 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
7863 && pGip->cOnlineCpus > 2)
7864 {
7865 uint32_t i;
7866 for (i = 0; i < pGip->cCpus; i++)
7867 if ( i != iGipCpuMaster
7868 && i != idxWorker
7869 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
7870 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
7871 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
7872 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
7873 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
7874 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
7875 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic)
7876 {
7877 iGipCpuMaster = i;
7878 pGipCpuMaster = &pGip->aCPUs[i];
7879 idMaster = pGipCpuMaster->idCpu;
7880 break;
7881 }
7882 }
7883
7884 /*
7885 * Set the master TSC as the initiator. This serializes delta measurments.
7886 */
7887 while (!ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID))
7888 {
7889 /*
7890 * Sleep here rather than spin as there is a parallel measurement
7891 * being executed and that can take a good while to be done.
7892 */
7893 RTThreadSleep(1);
7894 }
7895
7896 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
7897 {
7898 /*
7899 * Initialize data package for the RTMpOnAll callback.
7900 */
7901 SUPDRVGIPTSCDELTARGS Args;
7902 RT_ZERO(Args);
7903 Args.pWorker = pGipCpuWorker;
7904 Args.pMaster = pGipCpuMaster;
7905 Args.idMaster = idMaster;
7906 Args.pDevExt = pDevExt;
7907#ifdef GIP_TSC_DELTA_METHOD_1
7908 rc = VINF_SUCCESS;
7909#elif defined(GIP_TSC_DELTA_METHOD_2)
7910 rc = supdrvTscDeltaMethod2Init(&Args);
7911#else
7912# error "huh?"
7913#endif
7914 if (RT_SUCCESS(rc))
7915 {
7916 /*
7917 * Fire TSC-read workers on all CPUs but only synchronize between master
7918 * and one worker to ease memory contention.
7919 */
7920 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7921 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7922
7923 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, &Args, NULL);
7924 if (RT_SUCCESS(rc))
7925 {
7926 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
7927 {
7928 /*
7929 * Work the TSC delta applicability rating. It starts
7930 * optimistic in supdrvGipInit, we downgrade it here.
7931 */
7932 SUPGIPUSETSCDELTA enmRating;
7933 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
7934 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
7935 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
7936 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
7937 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
7938 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
7939 else
7940 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
7941 if (pGip->enmUseTscDelta < enmRating)
7942 {
7943 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
7944 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
7945 }
7946 }
7947 else
7948 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7949 }
7950 }
7951
7952#ifdef GIP_TSC_DELTA_METHOD_2
7953 supdrvTscDeltaMethod2Term(&Args);
7954#endif
7955 }
7956 else
7957 rc = VERR_CPU_OFFLINE;
7958
7959 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7960 return rc;
7961}
7962
7963
7964/**
7965 * Performs the initial measurements of the TSC deltas between CPUs.
7966 *
7967 * This is called by supdrvGipCreate or triggered by it if threaded.
7968 *
7969 * @returns VBox status code.
7970 * @param pDevExt Pointer to the device instance data.
7971 *
7972 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7973 * idCpu, GIP's online CPU set which are populated in
7974 * supdrvGipInitOnCpu().
7975 */
7976static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
7977{
7978 PSUPGIPCPU pGipCpuMaster;
7979 unsigned iCpu;
7980 unsigned iOddEven;
7981 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7982 uint32_t idxMaster = UINT32_MAX;
7983 int rc = VINF_SUCCESS;
7984 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7985
7986 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
7987
7988 /*
7989 * Pick the first CPU online as the master TSC and make it the new GIP master based
7990 * on the APIC ID.
7991 *
7992 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7993 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7994 * master as this point since the sync/async timer isn't created yet.
7995 */
7996 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7997 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7998 {
7999 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
8000 if (idxCpu != UINT16_MAX)
8001 {
8002 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
8003 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
8004 {
8005 idxMaster = idxCpu;
8006 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
8007 break;
8008 }
8009 }
8010 }
8011 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
8012 pGipCpuMaster = &pGip->aCPUs[idxMaster];
8013 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
8014
8015 /*
8016 * If there is only a single CPU online we have nothing to do.
8017 */
8018 if (pGip->cOnlineCpus <= 1)
8019 {
8020 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
8021 return VINF_SUCCESS;
8022 }
8023
8024 /*
8025 * Loop thru the GIP CPU array and get deltas for each CPU (except the
8026 * master). We do the CPUs with the even numbered APIC IDs first so that
8027 * we've got alternative master CPUs to pick from on hyper-threaded systems.
8028 */
8029 for (iOddEven = 0; iOddEven < 2; iOddEven++)
8030 {
8031 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
8032 {
8033 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
8034 if ( iCpu != idxMaster
8035 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
8036 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
8037 {
8038 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
8039 if (RT_FAILURE(rc))
8040 {
8041 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
8042 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
8043 break;
8044 }
8045
8046 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
8047 {
8048 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
8049 rc = VERR_TRY_AGAIN;
8050 break;
8051 }
8052 }
8053 }
8054 }
8055
8056 return rc;
8057}
8058
8059
8060/**
8061 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
8062 *
8063 * @param idCpu Ignored.
8064 * @param pvUser1 Where to put the TSC.
8065 * @param pvUser2 Ignored.
8066 */
8067static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
8068{
8069 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
8070}
8071
8072
8073/**
8074 * Determine if Async GIP mode is required because of TSC drift.
8075 *
8076 * When using the default/normal timer code it is essential that the time stamp counter
8077 * (TSC) runs never backwards, that is, a read operation to the counter should return
8078 * a bigger value than any previous read operation. This is guaranteed by the latest
8079 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
8080 * case we have to choose the asynchronous timer mode.
8081 *
8082 * @param poffMin Pointer to the determined difference between different
8083 * cores (optional, can be NULL).
8084 * @return false if the time stamp counters appear to be synchronized, true otherwise.
8085 */
8086static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
8087{
8088 /*
8089 * Just iterate all the cpus 8 times and make sure that the TSC is
8090 * ever increasing. We don't bother taking TSC rollover into account.
8091 */
8092 int iEndCpu = RTMpGetArraySize();
8093 int iCpu;
8094 int cLoops = 8;
8095 bool fAsync = false;
8096 int rc = VINF_SUCCESS;
8097 uint64_t offMax = 0;
8098 uint64_t offMin = ~(uint64_t)0;
8099 uint64_t PrevTsc = ASMReadTSC();
8100
8101 while (cLoops-- > 0)
8102 {
8103 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
8104 {
8105 uint64_t CurTsc;
8106 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
8107 if (RT_SUCCESS(rc))
8108 {
8109 if (CurTsc <= PrevTsc)
8110 {
8111 fAsync = true;
8112 offMin = offMax = PrevTsc - CurTsc;
8113 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
8114 iCpu, cLoops, CurTsc, PrevTsc));
8115 break;
8116 }
8117
8118 /* Gather statistics (except the first time). */
8119 if (iCpu != 0 || cLoops != 7)
8120 {
8121 uint64_t off = CurTsc - PrevTsc;
8122 if (off < offMin)
8123 offMin = off;
8124 if (off > offMax)
8125 offMax = off;
8126 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
8127 }
8128
8129 /* Next */
8130 PrevTsc = CurTsc;
8131 }
8132 else if (rc == VERR_NOT_SUPPORTED)
8133 break;
8134 else
8135 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
8136 }
8137
8138 /* broke out of the loop. */
8139 if (iCpu < iEndCpu)
8140 break;
8141 }
8142
8143 if (poffMin)
8144 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
8145 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
8146 fAsync, iEndCpu, rc, offMin, offMax));
8147#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
8148 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
8149#endif
8150 return fAsync;
8151}
8152
8153
8154/**
8155 * supdrvGipInit() worker that determines the GIP TSC mode.
8156 *
8157 * @returns The most suitable TSC mode.
8158 * @param pDevExt Pointer to the device instance data.
8159 */
8160static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
8161{
8162 uint64_t u64DiffCoresIgnored;
8163 uint32_t uEAX, uEBX, uECX, uEDX;
8164
8165 /*
8166 * Establish whether the CPU advertises TSC as invariant, we need that in
8167 * a couple of places below.
8168 */
8169 bool fInvariantTsc = false;
8170 if (ASMHasCpuId())
8171 {
8172 uEAX = ASMCpuId_EAX(0x80000000);
8173 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
8174 {
8175 uEDX = ASMCpuId_EDX(0x80000007);
8176 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
8177 fInvariantTsc = true;
8178 }
8179 }
8180
8181 /*
8182 * On single CPU systems, we don't need to consider ASYNC mode.
8183 */
8184 if (RTMpGetCount() <= 1)
8185 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
8186
8187 /*
8188 * Allow the user and/or OS specific bits to force async mode.
8189 */
8190 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
8191 return SUPGIPMODE_ASYNC_TSC;
8192
8193
8194#if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */
8195 /*
8196 * Use invariant mode if the CPU says TSC is invariant.
8197 */
8198 if (fInvariantTsc)
8199 return SUPGIPMODE_INVARIANT_TSC;
8200#endif
8201
8202 /*
8203 * TSC is not invariant and we're on SMP, this presents two problems:
8204 *
8205 * (1) There might be a skew between the CPU, so that cpu0
8206 * returns a TSC that is slightly different from cpu1.
8207 * This screw may be due to (2), bad TSC initialization
8208 * or slightly different TSC rates.
8209 *
8210 * (2) Power management (and other things) may cause the TSC
8211 * to run at a non-constant speed, and cause the speed
8212 * to be different on the cpus. This will result in (1).
8213 *
8214 * If any of the above is detected, we will have to use ASYNC mode.
8215 */
8216
8217 /* (1). Try check for current differences between the cpus. */
8218 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
8219 return SUPGIPMODE_ASYNC_TSC;
8220
8221#if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */
8222 if (fInvariantTsc)
8223 return SUPGIPMODE_INVARIANT_TSC;
8224#endif
8225
8226 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
8227 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
8228 if ( ASMIsValidStdRange(uEAX)
8229 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
8230 {
8231 /* Check for APM support. */
8232 uEAX = ASMCpuId_EAX(0x80000000);
8233 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
8234 {
8235 uEDX = ASMCpuId_EDX(0x80000007);
8236 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
8237 return SUPGIPMODE_ASYNC_TSC;
8238 }
8239 }
8240
8241 return SUPGIPMODE_SYNC_TSC;
8242}
8243
8244
8245/**
8246 * Initializes per-CPU GIP information.
8247 *
8248 * @param pDevExt Pointer to the device instance data.
8249 * @param pGip Pointer to the GIP.
8250 * @param pCpu Pointer to which GIP CPU to initalize.
8251 * @param u64NanoTS The current nanosecond timestamp.
8252 */
8253static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
8254{
8255 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
8256 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
8257 pCpu->u32TransactionId = 2;
8258 pCpu->u64NanoTS = u64NanoTS;
8259 pCpu->u64TSC = ASMReadTSC();
8260 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
8261 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
8262
8263 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
8264 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
8265 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
8266 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
8267
8268 /*
8269 * We don't know the following values until we've executed updates.
8270 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
8271 * the 2nd timer callout.
8272 */
8273 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
8274 pCpu->u32UpdateIntervalTSC
8275 = pCpu->au32TSCHistory[0]
8276 = pCpu->au32TSCHistory[1]
8277 = pCpu->au32TSCHistory[2]
8278 = pCpu->au32TSCHistory[3]
8279 = pCpu->au32TSCHistory[4]
8280 = pCpu->au32TSCHistory[5]
8281 = pCpu->au32TSCHistory[6]
8282 = pCpu->au32TSCHistory[7]
8283 = (uint32_t)(_4G / pGip->u32UpdateHz);
8284}
8285
8286
8287/**
8288 * Initializes the GIP data.
8289 *
8290 * @param pDevExt Pointer to the device instance data.
8291 * @param pGip Pointer to the read-write kernel mapping of the GIP.
8292 * @param HCPhys The physical address of the GIP.
8293 * @param u64NanoTS The current nanosecond timestamp.
8294 * @param uUpdateHz The update frequency.
8295 * @param uUpdateIntervalNS The update interval in nanoseconds.
8296 * @param cCpus The CPU count.
8297 */
8298static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
8299 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
8300{
8301 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
8302 unsigned i;
8303#ifdef DEBUG_DARWIN_GIP
8304 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
8305#else
8306 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
8307#endif
8308
8309 /*
8310 * Initialize the structure.
8311 */
8312 memset(pGip, 0, cbGip);
8313
8314 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
8315 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
8316 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
8317 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
8318 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
8319 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
8320 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
8321 else
8322 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
8323 pGip->cCpus = (uint16_t)cCpus;
8324 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
8325 pGip->u32UpdateHz = uUpdateHz;
8326 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
8327 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
8328 RTCpuSetEmpty(&pGip->OnlineCpuSet);
8329 RTCpuSetEmpty(&pGip->PresentCpuSet);
8330 RTMpGetSet(&pGip->PossibleCpuSet);
8331 pGip->cOnlineCpus = RTMpGetOnlineCount();
8332 pGip->cPresentCpus = RTMpGetPresentCount();
8333 pGip->cPossibleCpus = RTMpGetCount();
8334 pGip->idCpuMax = RTMpGetMaxCpuId();
8335 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
8336 pGip->aiCpuFromApicId[i] = UINT16_MAX;
8337 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
8338 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
8339 for (i = 0; i < cCpus; i++)
8340 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
8341
8342 /*
8343 * Link it to the device extension.
8344 */
8345 pDevExt->pGip = pGip;
8346 pDevExt->HCPhysGip = HCPhys;
8347 pDevExt->cGipUsers = 0;
8348}
8349
8350
8351/**
8352 * On CPU initialization callback for RTMpOnAll.
8353 *
8354 * @param idCpu The CPU ID.
8355 * @param pvUser1 The device extension.
8356 * @param pvUser2 The GIP.
8357 */
8358static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
8359{
8360 /* This is good enough, even though it will update some of the globals a
8361 bit to much. */
8362 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
8363}
8364
8365
8366/**
8367 * Invalidates the GIP data upon termination.
8368 *
8369 * @param pGip Pointer to the read-write kernel mapping of the GIP.
8370 */
8371static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
8372{
8373 unsigned i;
8374 pGip->u32Magic = 0;
8375 for (i = 0; i < pGip->cCpus; i++)
8376 {
8377 pGip->aCPUs[i].u64NanoTS = 0;
8378 pGip->aCPUs[i].u64TSC = 0;
8379 pGip->aCPUs[i].iTSCHistoryHead = 0;
8380 pGip->aCPUs[i].u64TSCSample = 0;
8381 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
8382 }
8383}
8384
8385
8386/**
8387 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
8388 * updates all the per cpu data except the transaction id.
8389 *
8390 * @param pDevExt The device extension.
8391 * @param pGipCpu Pointer to the per cpu data.
8392 * @param u64NanoTS The current time stamp.
8393 * @param u64TSC The current TSC.
8394 * @param iTick The current timer tick.
8395 *
8396 * @remarks Can be called with interrupts disabled!
8397 */
8398static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
8399{
8400 uint64_t u64TSCDelta;
8401 uint32_t u32UpdateIntervalTSC;
8402 uint32_t u32UpdateIntervalTSCSlack;
8403 unsigned iTSCHistoryHead;
8404 uint64_t u64CpuHz;
8405 uint32_t u32TransactionId;
8406
8407 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8408 AssertPtrReturnVoid(pGip);
8409
8410 /* Delta between this and the previous update. */
8411 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
8412
8413 /*
8414 * Update the NanoTS.
8415 */
8416 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
8417
8418 /*
8419 * Calc TSC delta.
8420 */
8421 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
8422 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
8423
8424 /* We don't need to keep realculating the frequency when it's invariant. */
8425 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
8426 return;
8427
8428 if (u64TSCDelta >> 32)
8429 {
8430 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
8431 pGipCpu->cErrors++;
8432 }
8433
8434 /*
8435 * On the 2nd and 3rd callout, reset the history with the current TSC
8436 * interval since the values entered by supdrvGipInit are totally off.
8437 * The interval on the 1st callout completely unreliable, the 2nd is a bit
8438 * better, while the 3rd should be most reliable.
8439 */
8440 u32TransactionId = pGipCpu->u32TransactionId;
8441 if (RT_UNLIKELY( ( u32TransactionId == 5
8442 || u32TransactionId == 7)
8443 && ( iTick == 2
8444 || iTick == 3) ))
8445 {
8446 unsigned i;
8447 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
8448 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
8449 }
8450
8451 /*
8452 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
8453 * Wait until we have at least one full history since the above history reset. The
8454 * assumption is that the majority of the previous history values will be tolerable.
8455 * See @bugref{6710} comment #67.
8456 */
8457 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
8458 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
8459 {
8460 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
8461 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
8462 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
8463 {
8464 uint32_t u32;
8465 u32 = pGipCpu->au32TSCHistory[0];
8466 u32 += pGipCpu->au32TSCHistory[1];
8467 u32 += pGipCpu->au32TSCHistory[2];
8468 u32 += pGipCpu->au32TSCHistory[3];
8469 u32 >>= 2;
8470 u64TSCDelta = pGipCpu->au32TSCHistory[4];
8471 u64TSCDelta += pGipCpu->au32TSCHistory[5];
8472 u64TSCDelta += pGipCpu->au32TSCHistory[6];
8473 u64TSCDelta += pGipCpu->au32TSCHistory[7];
8474 u64TSCDelta >>= 2;
8475 u64TSCDelta += u32;
8476 u64TSCDelta >>= 1;
8477 }
8478 }
8479
8480 /*
8481 * TSC History.
8482 */
8483 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
8484 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
8485 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
8486 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
8487
8488 /*
8489 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
8490 *
8491 * On Windows, we have an occasional (but recurring) sour value that messed up
8492 * the history but taking only 1 interval reduces the precision overall.
8493 * However, this problem existed before the invariant mode was introduced.
8494 */
8495 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
8496 || pGip->u32UpdateHz >= 1000)
8497 {
8498 uint32_t u32;
8499 u32 = pGipCpu->au32TSCHistory[0];
8500 u32 += pGipCpu->au32TSCHistory[1];
8501 u32 += pGipCpu->au32TSCHistory[2];
8502 u32 += pGipCpu->au32TSCHistory[3];
8503 u32 >>= 2;
8504 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
8505 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
8506 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
8507 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
8508 u32UpdateIntervalTSC >>= 2;
8509 u32UpdateIntervalTSC += u32;
8510 u32UpdateIntervalTSC >>= 1;
8511
8512 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
8513 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
8514 }
8515 else if (pGip->u32UpdateHz >= 90)
8516 {
8517 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
8518 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
8519 u32UpdateIntervalTSC >>= 1;
8520
8521 /* value chosen on a 2GHz thinkpad running windows */
8522 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
8523 }
8524 else
8525 {
8526 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
8527
8528 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
8529 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
8530 }
8531 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
8532
8533 /*
8534 * CpuHz.
8535 */
8536 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
8537 u64CpuHz /= pGip->u32UpdateIntervalNS;
8538 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
8539}
8540
8541
8542/**
8543 * Updates the GIP.
8544 *
8545 * @param pDevExt The device extension.
8546 * @param u64NanoTS The current nanosecond timesamp.
8547 * @param u64TSC The current TSC timesamp.
8548 * @param idCpu The CPU ID.
8549 * @param iTick The current timer tick.
8550 *
8551 * @remarks Can be called with interrupts disabled!
8552 */
8553static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
8554{
8555 /*
8556 * Determine the relevant CPU data.
8557 */
8558 PSUPGIPCPU pGipCpu;
8559 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8560 AssertPtrReturnVoid(pGip);
8561
8562 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
8563 pGipCpu = &pGip->aCPUs[0];
8564 else
8565 {
8566 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
8567 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
8568 return;
8569 pGipCpu = &pGip->aCPUs[iCpu];
8570 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
8571 return;
8572 }
8573
8574 /*
8575 * Start update transaction.
8576 */
8577 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
8578 {
8579 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
8580 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
8581 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8582 pGipCpu->cErrors++;
8583 return;
8584 }
8585
8586 /*
8587 * Recalc the update frequency every 0x800th time.
8588 */
8589 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
8590 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
8591 {
8592 if (pGip->u64NanoTSLastUpdateHz)
8593 {
8594#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
8595 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
8596 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
8597 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
8598 {
8599 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
8600 * calculation on non-invariant hosts if it changes the history decision
8601 * taken in supdrvGipDoUpdateCpu(). */
8602 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
8603 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
8604 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
8605 }
8606#endif
8607 }
8608 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
8609 }
8610
8611 /*
8612 * Update the data.
8613 */
8614 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
8615
8616 /*
8617 * Complete transaction.
8618 */
8619 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8620}
8621
8622
8623/**
8624 * Updates the per cpu GIP data for the calling cpu.
8625 *
8626 * @param pDevExt The device extension.
8627 * @param u64NanoTS The current nanosecond timesamp.
8628 * @param u64TSC The current TSC timesamp.
8629 * @param idCpu The CPU ID.
8630 * @param idApic The APIC id for the CPU index.
8631 * @param iTick The current timer tick.
8632 *
8633 * @remarks Can be called with interrupts disabled!
8634 */
8635static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
8636 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
8637{
8638 uint32_t iCpu;
8639 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8640
8641 /*
8642 * Avoid a potential race when a CPU online notification doesn't fire on
8643 * the onlined CPU but the tick creeps in before the event notification is
8644 * run.
8645 */
8646 if (RT_UNLIKELY(iTick == 1))
8647 {
8648 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
8649 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
8650 supdrvGipMpEventOnline(pDevExt, idCpu);
8651 }
8652
8653 iCpu = pGip->aiCpuFromApicId[idApic];
8654 if (RT_LIKELY(iCpu < pGip->cCpus))
8655 {
8656 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
8657 if (pGipCpu->idCpu == idCpu)
8658 {
8659 /*
8660 * Start update transaction.
8661 */
8662 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
8663 {
8664 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
8665 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8666 pGipCpu->cErrors++;
8667 return;
8668 }
8669
8670 /*
8671 * Update the data.
8672 */
8673 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
8674
8675 /*
8676 * Complete transaction.
8677 */
8678 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8679 }
8680 }
8681}
8682
8683
8684/**
8685 * Resume built-in keyboard on MacBook Air and Pro hosts.
8686 * If there is no built-in keyboard device, return success anyway.
8687 *
8688 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
8689 */
8690static int supdrvIOCtl_ResumeSuspendedKbds(void)
8691{
8692#if defined(RT_OS_DARWIN)
8693 return supdrvDarwinResumeSuspendedKbds();
8694#else
8695 return VERR_NOT_IMPLEMENTED;
8696#endif
8697}
8698
8699
8700/**
8701 * Service a TSC-delta measurement request.
8702 *
8703 * @returns VBox status code.
8704 * @param pDevExt Pointer to the device instance data.
8705 * @param pSession The support driver session.
8706 * @param pReq Pointer to the TSC-delta measurement request.
8707 */
8708static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
8709{
8710 PSUPGLOBALINFOPAGE pGip;
8711 RTCPUID idCpuWorker;
8712 int rc;
8713 int16_t cTries;
8714 RTMSINTERVAL cMsWaitRetry;
8715 uint16_t iCpu;
8716
8717 /*
8718 * Validate.
8719 */
8720 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
8721 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
8722 return VERR_WRONG_ORDER;
8723 pGip = pDevExt->pGip;
8724 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
8725
8726 idCpuWorker = pReq->u.In.idCpu;
8727 if (idCpuWorker == NIL_RTCPUID)
8728 return VERR_INVALID_CPU_ID;
8729 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
8730 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
8731
8732 /*
8733 * The request is a noop if the TSC delta isn't being used.
8734 */
8735 pGip = pDevExt->pGip;
8736 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
8737 return VINF_SUCCESS;
8738
8739 rc = VERR_CPU_NOT_FOUND;
8740 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
8741 {
8742 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
8743 if (pGipCpuWorker->idCpu == idCpuWorker)
8744 {
8745 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
8746 && !pReq->u.In.fForce)
8747 return VINF_SUCCESS;
8748
8749#ifdef SUPDRV_USE_TSC_DELTA_THREAD
8750 if (pReq->u.In.fAsync)
8751 {
8752 /** @todo Async. doesn't implement options like retries, waiting. We'll need
8753 * to pass those options to the thread somehow and implement it in the
8754 * thread. Check if anyone uses/needs fAsync before implementing this. */
8755 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
8756 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
8757 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
8758 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
8759 {
8760 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
8761 }
8762 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
8763 RTThreadUserSignal(pDevExt->hTscDeltaThread);
8764 return VINF_SUCCESS;
8765 }
8766
8767 /*
8768 * If a TSC-delta measurement request is already being serviced by the thread,
8769 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
8770 */
8771 while (cTries-- > 0)
8772 {
8773 SUPDRVTSCDELTATHREADSTATE enmState;
8774 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
8775 enmState = pDevExt->enmTscDeltaThreadState;
8776 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
8777
8778 if ( enmState == kTscDeltaThreadState_Measuring
8779 || enmState == kTscDeltaThreadState_WaitAndMeasure)
8780 {
8781 if ( !cTries
8782 || !cMsWaitRetry)
8783 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
8784 if (cMsWaitRetry)
8785 RTThreadSleep(cMsWaitRetry);
8786 }
8787 }
8788 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
8789#endif
8790
8791 while (cTries-- > 0)
8792 {
8793 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
8794 if (RT_SUCCESS(rc))
8795 {
8796 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
8797 break;
8798 }
8799
8800 if (cMsWaitRetry)
8801 RTThreadSleep(cMsWaitRetry);
8802 }
8803
8804 break;
8805 }
8806 }
8807 return rc;
8808}
8809
8810
8811/**
8812 * Reads TSC with delta applied.
8813 *
8814 * Will try to resolve delta value INT64_MAX before applying it. This is the
8815 * main purpose of this function, to handle the case where the delta needs to be
8816 * determined.
8817 *
8818 * @returns VBox status code.
8819 * @param pDevExt Pointer to the device instance data.
8820 * @param pSession The support driver session.
8821 * @param pReq Pointer to the TSC-read request.
8822 */
8823static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
8824{
8825 PSUPGLOBALINFOPAGE pGip;
8826 int rc;
8827
8828 /*
8829 * Validate. We require the client to have mapped GIP (no asserting on
8830 * ring-3 preconditions).
8831 */
8832 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
8833 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
8834 return VERR_WRONG_ORDER;
8835 pGip = pDevExt->pGip;
8836 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
8837
8838 /*
8839 * We're usually here because we need to apply delta, but we shouldn't be
8840 * upset if the GIP is some different mode.
8841 */
8842 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
8843 {
8844 uint32_t cTries = 0;
8845 for (;;)
8846 {
8847 /*
8848 * Start by gathering the data, using CLI for disabling preemption
8849 * while we do that.
8850 */
8851 RTCCUINTREG uFlags = ASMIntDisableFlags();
8852 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
8853 int iGipCpu;
8854 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
8855 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
8856 {
8857 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
8858 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
8859 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8860 ASMSetFlags(uFlags);
8861
8862 /*
8863 * If we're lucky we've got a delta, but no predicitions here
8864 * as this I/O control is normally only used when the TSC delta
8865 * is set to INT64_MAX.
8866 */
8867 if (i64Delta != INT64_MAX)
8868 {
8869 pReq->u.Out.u64AdjustedTsc -= i64Delta;
8870 rc = VINF_SUCCESS;
8871 break;
8872 }
8873
8874 /* Give up after a few times. */
8875 if (cTries >= 4)
8876 {
8877 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
8878 break;
8879 }
8880
8881 /* Need to measure the delta an try again. */
8882 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
8883 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
8884 }
8885 else
8886 {
8887 /* This really shouldn't happen. */
8888 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
8889 pReq->u.Out.idApic = ASMGetApicId();
8890 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8891 ASMSetFlags(uFlags);
8892 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
8893 break;
8894 }
8895 }
8896 }
8897 else
8898 {
8899 /*
8900 * No delta to apply. Easy. Deal with preemption the lazy way.
8901 */
8902 RTCCUINTREG uFlags = ASMIntDisableFlags();
8903 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
8904 int iGipCpu;
8905 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
8906 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
8907 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
8908 else
8909 pReq->u.Out.idApic = ASMGetApicId();
8910 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8911 ASMSetFlags(uFlags);
8912 rc = VINF_SUCCESS;
8913 }
8914
8915 return rc;
8916}
8917
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette