VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 25540

Last change on this file since 25540 was 25540, checked in by vboxsync, 15 years ago

Missing pgmUnlock

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 187.8 KB
Line 
1/* $Id: PGMAllPool.cpp 25540 2009-12-21 14:02:28Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#ifndef IN_RING3
56DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
57#endif
58#ifdef LOG_ENABLED
59static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
60#endif
61#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
62static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
63#endif
64
65int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
66PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
67void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
68void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
69
70RT_C_DECLS_END
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92/** @def PGMPOOL_PAGE_2_LOCKED_PTR
93 * Maps a pool page pool into the current context and lock it (RC only).
94 *
95 * @returns VBox status code.
96 * @param pVM The VM handle.
97 * @param pPage The pool page.
98 *
99 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
100 * small page window employeed by that function. Be careful.
101 * @remark There is no need to assert on the result.
102 */
103#if defined(IN_RC)
104DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
105{
106 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
107
108 /* Make sure the dynamic mapping will not be reused. */
109 if (pv)
110 PGMDynLockHCPage(pVM, (uint8_t *)pv);
111
112 return pv;
113}
114#else
115# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
116#endif
117
118/** @def PGMPOOL_UNLOCK_PTR
119 * Unlock a previously locked dynamic caching (RC only).
120 *
121 * @returns VBox status code.
122 * @param pVM The VM handle.
123 * @param pPage The pool page.
124 *
125 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
126 * small page window employeed by that function. Be careful.
127 * @remark There is no need to assert on the result.
128 */
129#if defined(IN_RC)
130DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
131{
132 if (pvPage)
133 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
134}
135#else
136# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
137#endif
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
150
151 /*
152 * Find the list head.
153 */
154 uint16_t idx = pPage->idx;
155 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 idx = pPage->iMonitoredPrev;
160 Assert(idx != pPage->idx);
161 pPage = &pPool->aPages[idx];
162 }
163 }
164
165 /*
166 * Iterate the list flushing each shadow page.
167 */
168 int rc = VINF_SUCCESS;
169 for (;;)
170 {
171 idx = pPage->iMonitoredNext;
172 Assert(idx != pPage->idx);
173 if (pPage->idx >= PGMPOOL_IDX_FIRST)
174 {
175 int rc2 = pgmPoolFlushPage(pPool, pPage);
176 AssertRC(rc2);
177 }
178 /* next */
179 if (idx == NIL_PGMPOOL_IDX)
180 break;
181 pPage = &pPool->aPages[idx];
182 }
183 return rc;
184}
185
186
187/**
188 * Wrapper for getting the current context pointer to the entry being modified.
189 *
190 * @returns VBox status code suitable for scheduling.
191 * @param pVM VM Handle.
192 * @param pvDst Destination address
193 * @param pvSrc Source guest virtual address.
194 * @param GCPhysSrc The source guest physical address.
195 * @param cb Size of data to read
196 */
197DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
198{
199#if defined(IN_RING3)
200 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
201 return VINF_SUCCESS;
202#else
203 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
204 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
205#endif
206}
207
208/**
209 * Process shadow entries before they are changed by the guest.
210 *
211 * For PT entries we will clear them. For PD entries, we'll simply check
212 * for mapping conflicts and set the SyncCR3 FF if found.
213 *
214 * @param pVCpu VMCPU handle
215 * @param pPool The pool.
216 * @param pPage The head page.
217 * @param GCPhysFault The guest physical fault address.
218 * @param uAddress In R0 and GC this is the guest context fault address (flat).
219 * In R3 this is the host context 'fault' address.
220 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
221 */
222void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
223{
224 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
225 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
226 PVM pVM = pPool->CTX_SUFF(pVM);
227
228 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
229
230 for (;;)
231 {
232 union
233 {
234 void *pv;
235 PX86PT pPT;
236 PX86PTPAE pPTPae;
237 PX86PD pPD;
238 PX86PDPAE pPDPae;
239 PX86PDPT pPDPT;
240 PX86PML4 pPML4;
241 } uShw;
242
243 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
244
245 uShw.pv = NULL;
246 switch (pPage->enmKind)
247 {
248 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
249 {
250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
251 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
252 const unsigned iShw = off / sizeof(X86PTE);
253 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
254 if (uShw.pPT->a[iShw].n.u1Present)
255 {
256 X86PTE GstPte;
257
258 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
259 AssertRC(rc);
260 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
261 pgmPoolTracDerefGCPhysHint(pPool, pPage,
262 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
263 GstPte.u & X86_PTE_PG_MASK);
264 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
265 }
266 break;
267 }
268
269 /* page/2 sized */
270 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
271 {
272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
273 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
274 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
275 {
276 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
277 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
278 if (uShw.pPTPae->a[iShw].n.u1Present)
279 {
280 X86PTE GstPte;
281 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
282 AssertRC(rc);
283
284 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
289 }
290 }
291 break;
292 }
293
294 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
298 {
299 unsigned iGst = off / sizeof(X86PDE);
300 unsigned iShwPdpt = iGst / 256;
301 unsigned iShw = (iGst % 256) * 2;
302 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
303
304 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
305 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
306 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
307 {
308 for (unsigned i = 0; i < 2; i++)
309 {
310# ifndef IN_RING0
311 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
312 {
313 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
314 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
316 break;
317 }
318 else
319# endif /* !IN_RING0 */
320 if (uShw.pPDPae->a[iShw+i].n.u1Present)
321 {
322 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
323 pgmPoolFree(pVM,
324 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
325 pPage->idx,
326 iShw + i);
327 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
328 }
329
330 /* paranoia / a bit assumptive. */
331 if ( (off & 3)
332 && (off & 3) + cbWrite > 4)
333 {
334 const unsigned iShw2 = iShw + 2 + i;
335 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
336 {
337# ifndef IN_RING0
338 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
339 {
340 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
341 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
342 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
343 break;
344 }
345 else
346# endif /* !IN_RING0 */
347 if (uShw.pPDPae->a[iShw2].n.u1Present)
348 {
349 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
350 pgmPoolFree(pVM,
351 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
352 pPage->idx,
353 iShw2);
354 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
355 }
356 }
357 }
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTEPAE);
367 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
368 if (uShw.pPTPae->a[iShw].n.u1Present)
369 {
370 X86PTEPAE GstPte;
371 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
372 AssertRC(rc);
373
374 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
375 pgmPoolTracDerefGCPhysHint(pPool, pPage,
376 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
377 GstPte.u & X86_PTE_PAE_PG_MASK);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK);
401 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
402 }
403 }
404 break;
405 }
406
407 case PGMPOOLKIND_32BIT_PD:
408 {
409 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
410 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
411
412 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
413 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
414# ifndef IN_RING0
415 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
416 {
417 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
418 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
419 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
421 break;
422 }
423# endif /* !IN_RING0 */
424# ifndef IN_RING0
425 else
426# endif /* !IN_RING0 */
427 {
428 if (uShw.pPD->a[iShw].n.u1Present)
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431 pgmPoolFree(pVM,
432 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
433 pPage->idx,
434 iShw);
435 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
436 }
437 }
438 /* paranoia / a bit assumptive. */
439 if ( (off & 3)
440 && (off & 3) + cbWrite > sizeof(X86PTE))
441 {
442 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
443 if ( iShw2 != iShw
444 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
445 {
446# ifndef IN_RING0
447 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
450 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
451 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
453 break;
454 }
455# endif /* !IN_RING0 */
456# ifndef IN_RING0
457 else
458# endif /* !IN_RING0 */
459 {
460 if (uShw.pPD->a[iShw2].n.u1Present)
461 {
462 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
463 pgmPoolFree(pVM,
464 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
465 pPage->idx,
466 iShw2);
467 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
468 }
469 }
470 }
471 }
472#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
473 if ( uShw.pPD->a[iShw].n.u1Present
474 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
475 {
476 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
477# ifdef IN_RC /* TLB load - we're pushing things a bit... */
478 ASMProbeReadByte(pvAddress);
479# endif
480 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
481 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
482 }
483#endif
484 break;
485 }
486
487 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
488 {
489 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
490 const unsigned iShw = off / sizeof(X86PDEPAE);
491 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
492#ifndef IN_RING0
493 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
494 {
495 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
496 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
497 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
498 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
499 break;
500 }
501#endif /* !IN_RING0 */
502 /*
503 * Causes trouble when the guest uses a PDE to refer to the whole page table level
504 * structure. (Invalidate here; faults later on when it tries to change the page
505 * table entries -> recheck; probably only applies to the RC case.)
506 */
507# ifndef IN_RING0
508 else
509# endif /* !IN_RING0 */
510 {
511 if (uShw.pPDPae->a[iShw].n.u1Present)
512 {
513 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
514 pgmPoolFree(pVM,
515 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
516 pPage->idx,
517 iShw);
518 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
519 }
520 }
521 /* paranoia / a bit assumptive. */
522 if ( (off & 7)
523 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
524 {
525 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
526 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
527
528#ifndef IN_RING0
529 if ( iShw2 != iShw
530 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
531 {
532 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
533 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
534 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
535 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
536 break;
537 }
538#endif /* !IN_RING0 */
539# ifndef IN_RING0
540 else
541# endif /* !IN_RING0 */
542 if (uShw.pPDPae->a[iShw2].n.u1Present)
543 {
544 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
545 pgmPoolFree(pVM,
546 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
547 pPage->idx,
548 iShw2);
549 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
550 }
551 }
552 break;
553 }
554
555 case PGMPOOLKIND_PAE_PDPT:
556 {
557 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
558 /*
559 * Hopefully this doesn't happen very often:
560 * - touching unused parts of the page
561 * - messing with the bits of pd pointers without changing the physical address
562 */
563 /* PDPT roots are not page aligned; 32 byte only! */
564 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
565
566 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
567 const unsigned iShw = offPdpt / sizeof(X86PDPE);
568 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
569 {
570# ifndef IN_RING0
571 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
572 {
573 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
574 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
575 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
576 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
577 break;
578 }
579# endif /* !IN_RING0 */
580# ifndef IN_RING0
581 else
582# endif /* !IN_RING0 */
583 if (uShw.pPDPT->a[iShw].n.u1Present)
584 {
585 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
586 pgmPoolFree(pVM,
587 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
588 pPage->idx,
589 iShw);
590 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
591 }
592
593 /* paranoia / a bit assumptive. */
594 if ( (offPdpt & 7)
595 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
596 {
597 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
598 if ( iShw2 != iShw
599 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
600 {
601# ifndef IN_RING0
602 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
603 {
604 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
605 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
606 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
607 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
608 break;
609 }
610# endif /* !IN_RING0 */
611# ifndef IN_RING0
612 else
613# endif /* !IN_RING0 */
614 if (uShw.pPDPT->a[iShw2].n.u1Present)
615 {
616 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
617 pgmPoolFree(pVM,
618 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
619 pPage->idx,
620 iShw2);
621 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
622 }
623 }
624 }
625 }
626 break;
627 }
628
629#ifndef IN_RC
630 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
631 {
632 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
633 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
634 const unsigned iShw = off / sizeof(X86PDEPAE);
635 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
636 if (uShw.pPDPae->a[iShw].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
639 pgmPoolFree(pVM,
640 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
641 pPage->idx,
642 iShw);
643 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
644 }
645 /* paranoia / a bit assumptive. */
646 if ( (off & 7)
647 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
648 {
649 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
650 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
651
652 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
653 if (uShw.pPDPae->a[iShw2].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
656 pgmPoolFree(pVM,
657 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
658 pPage->idx,
659 iShw2);
660 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
661 }
662 }
663 break;
664 }
665
666 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
667 {
668 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
669 /*
670 * Hopefully this doesn't happen very often:
671 * - messing with the bits of pd pointers without changing the physical address
672 */
673 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
674 const unsigned iShw = off / sizeof(X86PDPE);
675 if (uShw.pPDPT->a[iShw].n.u1Present)
676 {
677 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
678 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
679 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
680 }
681 /* paranoia / a bit assumptive. */
682 if ( (off & 7)
683 && (off & 7) + cbWrite > sizeof(X86PDPE))
684 {
685 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
686 if (uShw.pPDPT->a[iShw2].n.u1Present)
687 {
688 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
689 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PML4:
697 {
698 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
699 /*
700 * Hopefully this doesn't happen very often:
701 * - messing with the bits of pd pointers without changing the physical address
702 */
703 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
704 const unsigned iShw = off / sizeof(X86PDPE);
705 if (uShw.pPML4->a[iShw].n.u1Present)
706 {
707 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
708 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
709 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
710 }
711 /* paranoia / a bit assumptive. */
712 if ( (off & 7)
713 && (off & 7) + cbWrite > sizeof(X86PDPE))
714 {
715 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
716 if (uShw.pPML4->a[iShw2].n.u1Present)
717 {
718 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
719 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
720 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
721 }
722 }
723 break;
724 }
725#endif /* IN_RING0 */
726
727 default:
728 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
729 }
730 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
731
732 /* next */
733 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
734 return;
735 pPage = &pPool->aPages[pPage->iMonitoredNext];
736 }
737}
738
739# ifndef IN_RING3
740/**
741 * Checks if a access could be a fork operation in progress.
742 *
743 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
744 *
745 * @returns true if it's likly that we're forking, otherwise false.
746 * @param pPool The pool.
747 * @param pDis The disassembled instruction.
748 * @param offFault The access offset.
749 */
750DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
751{
752 /*
753 * i386 linux is using btr to clear X86_PTE_RW.
754 * The functions involved are (2.6.16 source inspection):
755 * clear_bit
756 * ptep_set_wrprotect
757 * copy_one_pte
758 * copy_pte_range
759 * copy_pmd_range
760 * copy_pud_range
761 * copy_page_range
762 * dup_mmap
763 * dup_mm
764 * copy_mm
765 * copy_process
766 * do_fork
767 */
768 if ( pDis->pCurInstr->opcode == OP_BTR
769 && !(offFault & 4)
770 /** @todo Validate that the bit index is X86_PTE_RW. */
771 )
772 {
773 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
774 return true;
775 }
776 return false;
777}
778
779
780/**
781 * Determine whether the page is likely to have been reused.
782 *
783 * @returns true if we consider the page as being reused for a different purpose.
784 * @returns false if we consider it to still be a paging page.
785 * @param pVM VM Handle.
786 * @param pVCpu VMCPU Handle.
787 * @param pRegFrame Trap register frame.
788 * @param pDis The disassembly info for the faulting instruction.
789 * @param pvFault The fault address.
790 *
791 * @remark The REP prefix check is left to the caller because of STOSD/W.
792 */
793DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
794{
795#ifndef IN_RC
796 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
797 if ( HWACCMHasPendingIrq(pVM)
798 && (pRegFrame->rsp - pvFault) < 32)
799 {
800 /* Fault caused by stack writes while trying to inject an interrupt event. */
801 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
802 return true;
803 }
804#else
805 NOREF(pVM); NOREF(pvFault);
806#endif
807
808 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
809
810 /* Non-supervisor mode write means it's used for something else. */
811 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
812 return true;
813
814 switch (pDis->pCurInstr->opcode)
815 {
816 /* call implies the actual push of the return address faulted */
817 case OP_CALL:
818 Log4(("pgmPoolMonitorIsReused: CALL\n"));
819 return true;
820 case OP_PUSH:
821 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
822 return true;
823 case OP_PUSHF:
824 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
825 return true;
826 case OP_PUSHA:
827 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
828 return true;
829 case OP_FXSAVE:
830 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
831 return true;
832 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
833 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
834 return true;
835 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
836 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
837 return true;
838 case OP_MOVSWD:
839 case OP_STOSWD:
840 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
841 && pRegFrame->rcx >= 0x40
842 )
843 {
844 Assert(pDis->mode == CPUMODE_64BIT);
845
846 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
847 return true;
848 }
849 return false;
850 }
851 if ( ( (pDis->param1.flags & USE_REG_GEN32)
852 || (pDis->param1.flags & USE_REG_GEN64))
853 && (pDis->param1.base.reg_gen == USE_REG_ESP))
854 {
855 Log4(("pgmPoolMonitorIsReused: ESP\n"));
856 return true;
857 }
858
859 return false;
860}
861
862/**
863 * Flushes the page being accessed.
864 *
865 * @returns VBox status code suitable for scheduling.
866 * @param pVM The VM handle.
867 * @param pVCpu The VMCPU handle.
868 * @param pPool The pool.
869 * @param pPage The pool page (head).
870 * @param pDis The disassembly of the write instruction.
871 * @param pRegFrame The trap register frame.
872 * @param GCPhysFault The fault address as guest physical address.
873 * @param pvFault The fault address.
874 */
875static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
876 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
877{
878 /*
879 * First, do the flushing.
880 */
881 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
882
883 /*
884 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
885 */
886 uint32_t cbWritten;
887 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
888 if (RT_SUCCESS(rc2))
889 pRegFrame->rip += pDis->opsize;
890 else if (rc2 == VERR_EM_INTERPRETER)
891 {
892#ifdef IN_RC
893 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
894 {
895 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
896 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
897 rc = VINF_SUCCESS;
898 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
899 }
900 else
901#endif
902 {
903 rc = VINF_EM_RAW_EMULATE_INSTR;
904 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
905 }
906 }
907 else
908 rc = rc2;
909
910 /* See use in pgmPoolAccessHandlerSimple(). */
911 PGM_INVL_VCPU_TLBS(pVCpu);
912 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
913 return rc;
914}
915
916/**
917 * Handles the STOSD write accesses.
918 *
919 * @returns VBox status code suitable for scheduling.
920 * @param pVM The VM handle.
921 * @param pPool The pool.
922 * @param pPage The pool page (head).
923 * @param pDis The disassembly of the write instruction.
924 * @param pRegFrame The trap register frame.
925 * @param GCPhysFault The fault address as guest physical address.
926 * @param pvFault The fault address.
927 */
928DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
929 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
930{
931 unsigned uIncrement = pDis->param1.size;
932
933 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
934 Assert(pRegFrame->rcx <= 0x20);
935
936#ifdef VBOX_STRICT
937 if (pDis->opmode == CPUMODE_32BIT)
938 Assert(uIncrement == 4);
939 else
940 Assert(uIncrement == 8);
941#endif
942
943 Log3(("pgmPoolAccessHandlerSTOSD\n"));
944
945 /*
946 * Increment the modification counter and insert it into the list
947 * of modified pages the first time.
948 */
949 if (!pPage->cModifications++)
950 pgmPoolMonitorModifiedInsert(pPool, pPage);
951
952 /*
953 * Execute REP STOSD.
954 *
955 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
956 * write situation, meaning that it's safe to write here.
957 */
958 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
959 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
960 while (pRegFrame->rcx)
961 {
962#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
963 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
964 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
965 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
966#else
967 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
968#endif
969#ifdef IN_RC
970 *(uint32_t *)pu32 = pRegFrame->eax;
971#else
972 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
973#endif
974 pu32 += uIncrement;
975 GCPhysFault += uIncrement;
976 pRegFrame->rdi += uIncrement;
977 pRegFrame->rcx--;
978 }
979 pRegFrame->rip += pDis->opsize;
980
981#ifdef IN_RC
982 /* See use in pgmPoolAccessHandlerSimple(). */
983 PGM_INVL_VCPU_TLBS(pVCpu);
984#endif
985
986 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
987 return VINF_SUCCESS;
988}
989
990
991/**
992 * Handles the simple write accesses.
993 *
994 * @returns VBox status code suitable for scheduling.
995 * @param pVM The VM handle.
996 * @param pVCpu The VMCPU handle.
997 * @param pPool The pool.
998 * @param pPage The pool page (head).
999 * @param pDis The disassembly of the write instruction.
1000 * @param pRegFrame The trap register frame.
1001 * @param GCPhysFault The fault address as guest physical address.
1002 * @param pvFault The fault address.
1003 * @param pfReused Reused state (out)
1004 */
1005DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1006 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1007{
1008 Log3(("pgmPoolAccessHandlerSimple\n"));
1009 /*
1010 * Increment the modification counter and insert it into the list
1011 * of modified pages the first time.
1012 */
1013 if (!pPage->cModifications++)
1014 pgmPoolMonitorModifiedInsert(pPool, pPage);
1015
1016 /*
1017 * Clear all the pages. ASSUMES that pvFault is readable.
1018 */
1019#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1020 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1021 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1022 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1023#else
1024 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1025#endif
1026
1027 /*
1028 * Interpret the instruction.
1029 */
1030 uint32_t cb;
1031 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1032 if (RT_SUCCESS(rc))
1033 pRegFrame->rip += pDis->opsize;
1034 else if (rc == VERR_EM_INTERPRETER)
1035 {
1036 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1037 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1038 rc = VINF_EM_RAW_EMULATE_INSTR;
1039 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1040 }
1041
1042#if 0 /* experimental code */
1043 if (rc == VINF_SUCCESS)
1044 {
1045 switch (pPage->enmKind)
1046 {
1047 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1048 {
1049 X86PTEPAE GstPte;
1050 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1051 AssertRC(rc);
1052
1053 /* Check the new value written by the guest. If present and with a bogus physical address, then
1054 * it's fairly safe to assume the guest is reusing the PT.
1055 */
1056 if (GstPte.n.u1Present)
1057 {
1058 RTHCPHYS HCPhys = -1;
1059 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1060 if (rc != VINF_SUCCESS)
1061 {
1062 *pfReused = true;
1063 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1064 }
1065 }
1066 break;
1067 }
1068 }
1069 }
1070#endif
1071
1072#ifdef IN_RC
1073 /*
1074 * Quick hack, with logging enabled we're getting stale
1075 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1076 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1077 * have to be fixed to support this. But that'll have to wait till next week.
1078 *
1079 * An alternative is to keep track of the changed PTEs together with the
1080 * GCPhys from the guest PT. This may proove expensive though.
1081 *
1082 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1083 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1084 */
1085 PGM_INVL_VCPU_TLBS(pVCpu);
1086#endif
1087
1088 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1089 return rc;
1090}
1091
1092/**
1093 * \#PF Handler callback for PT write accesses.
1094 *
1095 * @returns VBox status code (appropriate for GC return).
1096 * @param pVM VM Handle.
1097 * @param uErrorCode CPU Error code.
1098 * @param pRegFrame Trap register frame.
1099 * NULL on DMA and other non CPU access.
1100 * @param pvFault The fault address (cr2).
1101 * @param GCPhysFault The GC physical address corresponding to pvFault.
1102 * @param pvUser User argument.
1103 */
1104DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1105{
1106 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1107 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1108 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1109 PVMCPU pVCpu = VMMGetCpu(pVM);
1110 unsigned cMaxModifications;
1111 bool fForcedFlush = false;
1112
1113 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1114
1115 pgmLock(pVM);
1116 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1117 {
1118 /* Pool page changed while we were waiting for the lock; ignore. */
1119 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1120 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1121 pgmUnlock(pVM);
1122 return VINF_SUCCESS;
1123 }
1124#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1125 if (pPage->fDirty)
1126 {
1127 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1128 pgmUnlock(pVM);
1129 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1130 }
1131#endif
1132
1133#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1134 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1135 {
1136 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1137 void *pvGst;
1138 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1139 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1140 }
1141#endif
1142
1143 /*
1144 * Disassemble the faulting instruction.
1145 */
1146 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1147 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1148 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1149 {
1150 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1151 pgmUnlock(pVM);
1152 return rc;
1153 }
1154
1155 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1156
1157 /*
1158 * We should ALWAYS have the list head as user parameter. This
1159 * is because we use that page to record the changes.
1160 */
1161 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1162
1163#ifdef IN_RING0
1164 /* Maximum nr of modifications depends on the page type. */
1165 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1166 cMaxModifications = 4;
1167 else
1168 cMaxModifications = 24;
1169#else
1170 cMaxModifications = 48;
1171#endif
1172
1173 /*
1174 * Incremental page table updates should weight more than random ones.
1175 * (Only applies when started from offset 0)
1176 */
1177 pVCpu->pgm.s.cPoolAccessHandler++;
1178 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1179 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1180 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1181 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1182 {
1183 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1184 pPage->cModifications = pPage->cModifications * 2;
1185 pPage->pvLastAccessHandlerFault = pvFault;
1186 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1187 if (pPage->cModifications >= cMaxModifications)
1188 {
1189 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1190 fForcedFlush = true;
1191 }
1192 }
1193
1194 if (pPage->cModifications >= cMaxModifications)
1195 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1196
1197 /*
1198 * Check if it's worth dealing with.
1199 */
1200 bool fReused = false;
1201 bool fNotReusedNotForking = false;
1202 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1203 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1204 )
1205 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1206 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1207 {
1208 /*
1209 * Simple instructions, no REP prefix.
1210 */
1211 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1212 {
1213 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1214 if (fReused)
1215 goto flushPage;
1216
1217 /* A mov instruction to change the first page table entry will be remembered so we can detect
1218 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1219 */
1220 if ( rc == VINF_SUCCESS
1221 && pDis->pCurInstr->opcode == OP_MOV
1222 && (pvFault & PAGE_OFFSET_MASK) == 0)
1223 {
1224 pPage->pvLastAccessHandlerFault = pvFault;
1225 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1226 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1227 /* Make sure we don't kick out a page too quickly. */
1228 if (pPage->cModifications > 8)
1229 pPage->cModifications = 2;
1230 }
1231 else
1232 if (pPage->pvLastAccessHandlerFault == pvFault)
1233 {
1234 /* ignore the 2nd write to this page table entry. */
1235 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1236 }
1237 else
1238 {
1239 pPage->pvLastAccessHandlerFault = 0;
1240 pPage->pvLastAccessHandlerRip = 0;
1241 }
1242
1243 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1244 pgmUnlock(pVM);
1245 return rc;
1246 }
1247
1248 /*
1249 * Windows is frequently doing small memset() operations (netio test 4k+).
1250 * We have to deal with these or we'll kill the cache and performance.
1251 */
1252 if ( pDis->pCurInstr->opcode == OP_STOSWD
1253 && !pRegFrame->eflags.Bits.u1DF
1254 && pDis->opmode == pDis->mode
1255 && pDis->addrmode == pDis->mode)
1256 {
1257 bool fValidStosd = false;
1258
1259 if ( pDis->mode == CPUMODE_32BIT
1260 && pDis->prefix == PREFIX_REP
1261 && pRegFrame->ecx <= 0x20
1262 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1263 && !((uintptr_t)pvFault & 3)
1264 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1265 )
1266 {
1267 fValidStosd = true;
1268 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1269 }
1270 else
1271 if ( pDis->mode == CPUMODE_64BIT
1272 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1273 && pRegFrame->rcx <= 0x20
1274 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1275 && !((uintptr_t)pvFault & 7)
1276 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1277 )
1278 {
1279 fValidStosd = true;
1280 }
1281
1282 if (fValidStosd)
1283 {
1284 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1285 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1286 pgmUnlock(pVM);
1287 return rc;
1288 }
1289 }
1290
1291 /* REP prefix, don't bother. */
1292 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1293 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1294 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1295 fNotReusedNotForking = true;
1296 }
1297
1298#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1299 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1300 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1301 */
1302 if ( pPage->cModifications >= cMaxModifications
1303 && !fForcedFlush
1304 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1305 && ( fNotReusedNotForking
1306 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1307 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1308 )
1309 )
1310 {
1311 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1312 Assert(pPage->fDirty == false);
1313
1314 /* Flush any monitored duplicates as we will disable write protection. */
1315 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1316 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1317 {
1318 PPGMPOOLPAGE pPageHead = pPage;
1319
1320 /* Find the monitor head. */
1321 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1322 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1323
1324 while (pPageHead)
1325 {
1326 unsigned idxNext = pPageHead->iMonitoredNext;
1327
1328 if (pPageHead != pPage)
1329 {
1330 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1331 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1332 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1333 AssertRC(rc2);
1334 }
1335
1336 if (idxNext == NIL_PGMPOOL_IDX)
1337 break;
1338
1339 pPageHead = &pPool->aPages[idxNext];
1340 }
1341 }
1342
1343 /* The flushing above might fail for locked pages, so double check. */
1344 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1345 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1346 {
1347 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1348
1349 /* Temporarily allow write access to the page table again. */
1350 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1351 if (rc == VINF_SUCCESS)
1352 {
1353 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1354 AssertMsg(rc == VINF_SUCCESS
1355 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1356 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1357 || rc == VERR_PAGE_NOT_PRESENT,
1358 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1359
1360 pPage->pvDirtyFault = pvFault;
1361
1362 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1363 pgmUnlock(pVM);
1364 return rc;
1365 }
1366 }
1367 }
1368#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1369
1370 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1371flushPage:
1372 /*
1373 * Not worth it, so flush it.
1374 *
1375 * If we considered it to be reused, don't go back to ring-3
1376 * to emulate failed instructions since we usually cannot
1377 * interpret then. This may be a bit risky, in which case
1378 * the reuse detection must be fixed.
1379 */
1380 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1381 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1382 && fReused)
1383 {
1384 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1385 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1386 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1387 }
1388 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1389 pgmUnlock(pVM);
1390 return rc;
1391}
1392
1393# endif /* !IN_RING3 */
1394
1395# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1396
1397# ifdef VBOX_STRICT
1398/**
1399 * Check references to guest physical memory in a PAE / PAE page table.
1400 *
1401 * @param pPool The pool.
1402 * @param pPage The page.
1403 * @param pShwPT The shadow page table (mapping of the page).
1404 * @param pGstPT The guest page table.
1405 */
1406static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1407{
1408 unsigned cErrors = 0;
1409 int LastRc = -1; /* initialized to shut up gcc */
1410 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1411 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1412
1413#ifdef VBOX_STRICT
1414 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1415 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1416#endif
1417 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1418 {
1419 if (pShwPT->a[i].n.u1Present)
1420 {
1421 RTHCPHYS HCPhys = -1;
1422 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1423 if ( rc != VINF_SUCCESS
1424 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1425 {
1426 RTHCPHYS HCPhysPT = -1;
1427 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1428 LastPTE = i;
1429 LastRc = rc;
1430 LastHCPhys = HCPhys;
1431 cErrors++;
1432
1433 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1434 AssertRC(rc);
1435
1436 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1437 {
1438 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1439
1440 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1441 {
1442 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1443
1444 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1445 {
1446 if ( pShwPT2->a[j].n.u1Present
1447 && pShwPT2->a[j].n.u1Write
1448 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1449 {
1450 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1451 }
1452 }
1453 }
1454 }
1455 }
1456 }
1457 }
1458 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1459}
1460# endif /* VBOX_STRICT */
1461
1462/**
1463 * Clear references to guest physical memory in a PAE / PAE page table.
1464 *
1465 * @returns nr of changed PTEs
1466 * @param pPool The pool.
1467 * @param pPage The page.
1468 * @param pShwPT The shadow page table (mapping of the page).
1469 * @param pGstPT The guest page table.
1470 * @param pOldGstPT The old cached guest page table.
1471 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1472 * @param pfFlush Flush reused page table (out)
1473 */
1474DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1475{
1476 unsigned cChanged = 0;
1477
1478#ifdef VBOX_STRICT
1479 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1480 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1481#endif
1482 *pfFlush = false;
1483
1484 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1485 {
1486 /* Check the new value written by the guest. If present and with a bogus physical address, then
1487 * it's fairly safe to assume the guest is reusing the PT.
1488 */
1489 if ( fAllowRemoval
1490 && pGstPT->a[i].n.u1Present)
1491 {
1492 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1493 {
1494 *pfFlush = true;
1495 return ++cChanged;
1496 }
1497 }
1498 if (pShwPT->a[i].n.u1Present)
1499 {
1500 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1501 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1502 {
1503#ifdef VBOX_STRICT
1504 RTHCPHYS HCPhys = -1;
1505 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1506 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1507#endif
1508 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1509 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1510 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1511 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1512
1513 if ( uHostAttr == uGuestAttr
1514 && fHostRW <= fGuestRW)
1515 continue;
1516 }
1517 cChanged++;
1518 /* Something was changed, so flush it. */
1519 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1520 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1521 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1522 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1523 }
1524 }
1525 return cChanged;
1526}
1527
1528
1529/**
1530 * Flush a dirty page
1531 *
1532 * @param pVM VM Handle.
1533 * @param pPool The pool.
1534 * @param idxSlot Dirty array slot index
1535 * @param fAllowRemoval Allow a reused page table to be removed
1536 */
1537static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1538{
1539 PPGMPOOLPAGE pPage;
1540 unsigned idxPage;
1541
1542 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1543 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1544 return;
1545
1546 idxPage = pPool->aIdxDirtyPages[idxSlot];
1547 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1548 pPage = &pPool->aPages[idxPage];
1549 Assert(pPage->idx == idxPage);
1550 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1551
1552 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1553 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1554
1555 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1556 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1557 Assert(rc == VINF_SUCCESS);
1558 pPage->fDirty = false;
1559
1560#ifdef VBOX_STRICT
1561 uint64_t fFlags = 0;
1562 RTHCPHYS HCPhys;
1563 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1564 AssertMsg( ( rc == VINF_SUCCESS
1565 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1566 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1567 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1568 || rc == VERR_PAGE_NOT_PRESENT,
1569 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1570#endif
1571
1572 /* Flush those PTEs that have changed. */
1573 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1574 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1575 void *pvGst;
1576 bool fFlush;
1577 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1578 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1579 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1580 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1581
1582 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1583 Assert(pPage->cModifications);
1584 if (cChanges < 4)
1585 pPage->cModifications = 1; /* must use > 0 here */
1586 else
1587 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1588
1589 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1590 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1591 pPool->idxFreeDirtyPage = idxSlot;
1592
1593 pPool->cDirtyPages--;
1594 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1595 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1596 if (fFlush)
1597 {
1598 Assert(fAllowRemoval);
1599 Log(("Flush reused page table!\n"));
1600 pgmPoolFlushPage(pPool, pPage);
1601 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1602 }
1603 else
1604 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1605}
1606
1607# ifndef IN_RING3
1608/**
1609 * Add a new dirty page
1610 *
1611 * @param pVM VM Handle.
1612 * @param pPool The pool.
1613 * @param pPage The page.
1614 */
1615void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1616{
1617 unsigned idxFree;
1618
1619 Assert(PGMIsLocked(pVM));
1620 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1621 Assert(!pPage->fDirty);
1622
1623 idxFree = pPool->idxFreeDirtyPage;
1624 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1625 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1626
1627 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1628 {
1629 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1630 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1631 }
1632 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1633 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1634
1635 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1636
1637 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1638 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1639 */
1640 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1641 void *pvGst;
1642 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1643 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1644#ifdef VBOX_STRICT
1645 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1646#endif
1647
1648 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1649 pPage->fDirty = true;
1650 pPage->idxDirty = idxFree;
1651 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1652 pPool->cDirtyPages++;
1653
1654 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1655 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1656 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1657 {
1658 unsigned i;
1659 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1660 {
1661 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1662 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1663 {
1664 pPool->idxFreeDirtyPage = idxFree;
1665 break;
1666 }
1667 }
1668 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1669 }
1670
1671 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1672 return;
1673}
1674# endif /* !IN_RING3 */
1675
1676/**
1677 * Check if the specified page is dirty (not write monitored)
1678 *
1679 * @return dirty or not
1680 * @param pVM VM Handle.
1681 * @param GCPhys Guest physical address
1682 */
1683bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1684{
1685 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1686 Assert(PGMIsLocked(pVM));
1687 if (!pPool->cDirtyPages)
1688 return false;
1689
1690 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1691
1692 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1693 {
1694 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1695 {
1696 PPGMPOOLPAGE pPage;
1697 unsigned idxPage = pPool->aIdxDirtyPages[i];
1698
1699 pPage = &pPool->aPages[idxPage];
1700 if (pPage->GCPhys == GCPhys)
1701 return true;
1702 }
1703 }
1704 return false;
1705}
1706
1707/**
1708 * Reset all dirty pages by reinstating page monitoring.
1709 *
1710 * @param pVM VM Handle.
1711 */
1712void pgmPoolResetDirtyPages(PVM pVM)
1713{
1714 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1715 Assert(PGMIsLocked(pVM));
1716 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1717
1718 if (!pPool->cDirtyPages)
1719 return;
1720
1721 Log(("pgmPoolResetDirtyPages\n"));
1722 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1723 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1724
1725 pPool->idxFreeDirtyPage = 0;
1726 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1727 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1728 {
1729 unsigned i;
1730 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1731 {
1732 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1733 {
1734 pPool->idxFreeDirtyPage = i;
1735 break;
1736 }
1737 }
1738 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1739 }
1740
1741 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1742 return;
1743}
1744
1745/**
1746 * Reset all dirty pages by reinstating page monitoring.
1747 *
1748 * @param pVM VM Handle.
1749 * @param GCPhysPT Physical address of the page table
1750 */
1751void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1752{
1753 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1754 Assert(PGMIsLocked(pVM));
1755 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1756 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1757
1758 if (!pPool->cDirtyPages)
1759 return;
1760
1761 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1762
1763 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1764 {
1765 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1766 {
1767 unsigned idxPage = pPool->aIdxDirtyPages[i];
1768
1769 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1770 if (pPage->GCPhys == GCPhysPT)
1771 {
1772 idxDirtyPage = i;
1773 break;
1774 }
1775 }
1776 }
1777
1778 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1779 {
1780 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1781 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1782 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1783 {
1784 unsigned i;
1785 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1786 {
1787 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1788 {
1789 pPool->idxFreeDirtyPage = i;
1790 break;
1791 }
1792 }
1793 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1794 }
1795 }
1796}
1797
1798# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1799
1800/**
1801 * Inserts a page into the GCPhys hash table.
1802 *
1803 * @param pPool The pool.
1804 * @param pPage The page.
1805 */
1806DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1807{
1808 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1809 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1810 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1811 pPage->iNext = pPool->aiHash[iHash];
1812 pPool->aiHash[iHash] = pPage->idx;
1813}
1814
1815
1816/**
1817 * Removes a page from the GCPhys hash table.
1818 *
1819 * @param pPool The pool.
1820 * @param pPage The page.
1821 */
1822DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1823{
1824 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1825 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1826 if (pPool->aiHash[iHash] == pPage->idx)
1827 pPool->aiHash[iHash] = pPage->iNext;
1828 else
1829 {
1830 uint16_t iPrev = pPool->aiHash[iHash];
1831 for (;;)
1832 {
1833 const int16_t i = pPool->aPages[iPrev].iNext;
1834 if (i == pPage->idx)
1835 {
1836 pPool->aPages[iPrev].iNext = pPage->iNext;
1837 break;
1838 }
1839 if (i == NIL_PGMPOOL_IDX)
1840 {
1841 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1842 break;
1843 }
1844 iPrev = i;
1845 }
1846 }
1847 pPage->iNext = NIL_PGMPOOL_IDX;
1848}
1849
1850
1851/**
1852 * Frees up one cache page.
1853 *
1854 * @returns VBox status code.
1855 * @retval VINF_SUCCESS on success.
1856 * @param pPool The pool.
1857 * @param iUser The user index.
1858 */
1859static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1860{
1861#ifndef IN_RC
1862 const PVM pVM = pPool->CTX_SUFF(pVM);
1863#endif
1864 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1865 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1866
1867 /*
1868 * Select one page from the tail of the age list.
1869 */
1870 PPGMPOOLPAGE pPage;
1871 for (unsigned iLoop = 0; ; iLoop++)
1872 {
1873 uint16_t iToFree = pPool->iAgeTail;
1874 if (iToFree == iUser)
1875 iToFree = pPool->aPages[iToFree].iAgePrev;
1876/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1877 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1878 {
1879 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1880 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1881 {
1882 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1883 continue;
1884 iToFree = i;
1885 break;
1886 }
1887 }
1888*/
1889 Assert(iToFree != iUser);
1890 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1891 pPage = &pPool->aPages[iToFree];
1892
1893 /*
1894 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1895 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1896 */
1897 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1898 break;
1899 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1900 pgmPoolCacheUsed(pPool, pPage);
1901 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1902 }
1903
1904 /*
1905 * Found a usable page, flush it and return.
1906 */
1907 int rc = pgmPoolFlushPage(pPool, pPage);
1908 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1909 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1910 if (rc == VINF_SUCCESS)
1911 PGM_INVL_ALL_VCPU_TLBS(pVM);
1912 return rc;
1913}
1914
1915
1916/**
1917 * Checks if a kind mismatch is really a page being reused
1918 * or if it's just normal remappings.
1919 *
1920 * @returns true if reused and the cached page (enmKind1) should be flushed
1921 * @returns false if not reused.
1922 * @param enmKind1 The kind of the cached page.
1923 * @param enmKind2 The kind of the requested page.
1924 */
1925static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1926{
1927 switch (enmKind1)
1928 {
1929 /*
1930 * Never reuse them. There is no remapping in non-paging mode.
1931 */
1932 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1933 case PGMPOOLKIND_32BIT_PD_PHYS:
1934 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1935 case PGMPOOLKIND_PAE_PD_PHYS:
1936 case PGMPOOLKIND_PAE_PDPT_PHYS:
1937 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1938 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1939 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1940 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1941 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1942 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1943 return false;
1944
1945 /*
1946 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1947 */
1948 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1949 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1950 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1951 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1952 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1953 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1954 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1955 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1956 case PGMPOOLKIND_32BIT_PD:
1957 case PGMPOOLKIND_PAE_PDPT:
1958 switch (enmKind2)
1959 {
1960 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1961 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1962 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1963 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1964 case PGMPOOLKIND_64BIT_PML4:
1965 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1966 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1967 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1968 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1969 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1970 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1971 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1972 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1973 return true;
1974 default:
1975 return false;
1976 }
1977
1978 /*
1979 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1980 */
1981 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1982 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1983 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1984 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1985 case PGMPOOLKIND_64BIT_PML4:
1986 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1987 switch (enmKind2)
1988 {
1989 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1990 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1991 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1992 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1993 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1994 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1995 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1996 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1997 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1998 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1999 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2000 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2001 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2002 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2003 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2004 return true;
2005 default:
2006 return false;
2007 }
2008
2009 /*
2010 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2011 */
2012 case PGMPOOLKIND_ROOT_NESTED:
2013 return false;
2014
2015 default:
2016 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2017 }
2018}
2019
2020
2021/**
2022 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2023 *
2024 * @returns VBox status code.
2025 * @retval VINF_PGM_CACHED_PAGE on success.
2026 * @retval VERR_FILE_NOT_FOUND if not found.
2027 * @param pPool The pool.
2028 * @param GCPhys The GC physical address of the page we're gonna shadow.
2029 * @param enmKind The kind of mapping.
2030 * @param enmAccess Access type for the mapping (only relevant for big pages)
2031 * @param iUser The shadow page pool index of the user table.
2032 * @param iUserTable The index into the user table (shadowed).
2033 * @param ppPage Where to store the pointer to the page.
2034 */
2035static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2036{
2037#ifndef IN_RC
2038 const PVM pVM = pPool->CTX_SUFF(pVM);
2039#endif
2040 /*
2041 * Look up the GCPhys in the hash.
2042 */
2043 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2044 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2045 if (i != NIL_PGMPOOL_IDX)
2046 {
2047 do
2048 {
2049 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2050 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2051 if (pPage->GCPhys == GCPhys)
2052 {
2053 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2054 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2055 {
2056 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2057 * doesn't flush it in case there are no more free use records.
2058 */
2059 pgmPoolCacheUsed(pPool, pPage);
2060
2061 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2062 if (RT_SUCCESS(rc))
2063 {
2064 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2065 *ppPage = pPage;
2066 if (pPage->cModifications)
2067 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2068 STAM_COUNTER_INC(&pPool->StatCacheHits);
2069 return VINF_PGM_CACHED_PAGE;
2070 }
2071 return rc;
2072 }
2073
2074 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2075 {
2076 /*
2077 * The kind is different. In some cases we should now flush the page
2078 * as it has been reused, but in most cases this is normal remapping
2079 * of PDs as PT or big pages using the GCPhys field in a slightly
2080 * different way than the other kinds.
2081 */
2082 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2083 {
2084 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2085 pgmPoolFlushPage(pPool, pPage);
2086 break;
2087 }
2088 }
2089 }
2090
2091 /* next */
2092 i = pPage->iNext;
2093 } while (i != NIL_PGMPOOL_IDX);
2094 }
2095
2096 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2097 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2098 return VERR_FILE_NOT_FOUND;
2099}
2100
2101
2102/**
2103 * Inserts a page into the cache.
2104 *
2105 * @param pPool The pool.
2106 * @param pPage The cached page.
2107 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2108 */
2109static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2110{
2111 /*
2112 * Insert into the GCPhys hash if the page is fit for that.
2113 */
2114 Assert(!pPage->fCached);
2115 if (fCanBeCached)
2116 {
2117 pPage->fCached = true;
2118 pgmPoolHashInsert(pPool, pPage);
2119 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2120 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2121 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2122 }
2123 else
2124 {
2125 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2126 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2127 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2128 }
2129
2130 /*
2131 * Insert at the head of the age list.
2132 */
2133 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2134 pPage->iAgeNext = pPool->iAgeHead;
2135 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2136 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2137 else
2138 pPool->iAgeTail = pPage->idx;
2139 pPool->iAgeHead = pPage->idx;
2140}
2141
2142
2143/**
2144 * Flushes a cached page.
2145 *
2146 * @param pPool The pool.
2147 * @param pPage The cached page.
2148 */
2149static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2150{
2151 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2152
2153 /*
2154 * Remove the page from the hash.
2155 */
2156 if (pPage->fCached)
2157 {
2158 pPage->fCached = false;
2159 pgmPoolHashRemove(pPool, pPage);
2160 }
2161 else
2162 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2163
2164 /*
2165 * Remove it from the age list.
2166 */
2167 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2168 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2169 else
2170 pPool->iAgeTail = pPage->iAgePrev;
2171 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2172 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2173 else
2174 pPool->iAgeHead = pPage->iAgeNext;
2175 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2176 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2177}
2178
2179
2180/**
2181 * Looks for pages sharing the monitor.
2182 *
2183 * @returns Pointer to the head page.
2184 * @returns NULL if not found.
2185 * @param pPool The Pool
2186 * @param pNewPage The page which is going to be monitored.
2187 */
2188static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2189{
2190 /*
2191 * Look up the GCPhys in the hash.
2192 */
2193 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2194 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2195 if (i == NIL_PGMPOOL_IDX)
2196 return NULL;
2197 do
2198 {
2199 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2200 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2201 && pPage != pNewPage)
2202 {
2203 switch (pPage->enmKind)
2204 {
2205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2207 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2208 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2209 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2210 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2211 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2212 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2213 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2214 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2215 case PGMPOOLKIND_64BIT_PML4:
2216 case PGMPOOLKIND_32BIT_PD:
2217 case PGMPOOLKIND_PAE_PDPT:
2218 {
2219 /* find the head */
2220 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2221 {
2222 Assert(pPage->iMonitoredPrev != pPage->idx);
2223 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2224 }
2225 return pPage;
2226 }
2227
2228 /* ignore, no monitoring. */
2229 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2230 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2231 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2232 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2233 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2234 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2235 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2236 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2237 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2238 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2239 case PGMPOOLKIND_ROOT_NESTED:
2240 case PGMPOOLKIND_PAE_PD_PHYS:
2241 case PGMPOOLKIND_PAE_PDPT_PHYS:
2242 case PGMPOOLKIND_32BIT_PD_PHYS:
2243 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2244 break;
2245 default:
2246 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2247 }
2248 }
2249
2250 /* next */
2251 i = pPage->iNext;
2252 } while (i != NIL_PGMPOOL_IDX);
2253 return NULL;
2254}
2255
2256
2257/**
2258 * Enabled write monitoring of a guest page.
2259 *
2260 * @returns VBox status code.
2261 * @retval VINF_SUCCESS on success.
2262 * @param pPool The pool.
2263 * @param pPage The cached page.
2264 */
2265static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2266{
2267 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2268
2269 /*
2270 * Filter out the relevant kinds.
2271 */
2272 switch (pPage->enmKind)
2273 {
2274 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2275 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2276 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2277 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2278 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2279 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2280 case PGMPOOLKIND_64BIT_PML4:
2281 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2282 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2283 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2284 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2285 case PGMPOOLKIND_32BIT_PD:
2286 case PGMPOOLKIND_PAE_PDPT:
2287 break;
2288
2289 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2290 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2291 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2292 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2293 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2294 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2295 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2296 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2297 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2298 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2299 case PGMPOOLKIND_ROOT_NESTED:
2300 /* Nothing to monitor here. */
2301 return VINF_SUCCESS;
2302
2303 case PGMPOOLKIND_32BIT_PD_PHYS:
2304 case PGMPOOLKIND_PAE_PDPT_PHYS:
2305 case PGMPOOLKIND_PAE_PD_PHYS:
2306 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2307 /* Nothing to monitor here. */
2308 return VINF_SUCCESS;
2309 default:
2310 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2311 }
2312
2313 /*
2314 * Install handler.
2315 */
2316 int rc;
2317 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2318 if (pPageHead)
2319 {
2320 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2321 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2322
2323#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2324 if (pPageHead->fDirty)
2325 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2326#endif
2327
2328 pPage->iMonitoredPrev = pPageHead->idx;
2329 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2330 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2331 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2332 pPageHead->iMonitoredNext = pPage->idx;
2333 rc = VINF_SUCCESS;
2334 }
2335 else
2336 {
2337 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2338 PVM pVM = pPool->CTX_SUFF(pVM);
2339 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2340 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2341 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2342 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2343 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2344 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2345 pPool->pszAccessHandler);
2346 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2347 * the heap size should suffice. */
2348 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2349 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2350 }
2351 pPage->fMonitored = true;
2352 return rc;
2353}
2354
2355
2356/**
2357 * Disables write monitoring of a guest page.
2358 *
2359 * @returns VBox status code.
2360 * @retval VINF_SUCCESS on success.
2361 * @param pPool The pool.
2362 * @param pPage The cached page.
2363 */
2364static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2365{
2366 /*
2367 * Filter out the relevant kinds.
2368 */
2369 switch (pPage->enmKind)
2370 {
2371 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2372 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2373 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2374 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2375 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2376 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2377 case PGMPOOLKIND_64BIT_PML4:
2378 case PGMPOOLKIND_32BIT_PD:
2379 case PGMPOOLKIND_PAE_PDPT:
2380 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2381 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2382 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2383 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2384 break;
2385
2386 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2387 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2388 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2389 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2390 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2391 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2392 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2393 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2394 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2395 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2396 case PGMPOOLKIND_ROOT_NESTED:
2397 case PGMPOOLKIND_PAE_PD_PHYS:
2398 case PGMPOOLKIND_PAE_PDPT_PHYS:
2399 case PGMPOOLKIND_32BIT_PD_PHYS:
2400 /* Nothing to monitor here. */
2401 return VINF_SUCCESS;
2402
2403 default:
2404 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2405 }
2406
2407 /*
2408 * Remove the page from the monitored list or uninstall it if last.
2409 */
2410 const PVM pVM = pPool->CTX_SUFF(pVM);
2411 int rc;
2412 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2413 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2414 {
2415 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2416 {
2417 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2418 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2419 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2420 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2421 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2422 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2423 pPool->pszAccessHandler);
2424 AssertFatalRCSuccess(rc);
2425 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2426 }
2427 else
2428 {
2429 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2430 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2431 {
2432 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2433 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2434 }
2435 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2436 rc = VINF_SUCCESS;
2437 }
2438 }
2439 else
2440 {
2441 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2442 AssertFatalRC(rc);
2443#ifdef VBOX_STRICT
2444 PVMCPU pVCpu = VMMGetCpu(pVM);
2445#endif
2446 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2447 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2448 }
2449 pPage->fMonitored = false;
2450
2451 /*
2452 * Remove it from the list of modified pages (if in it).
2453 */
2454 pgmPoolMonitorModifiedRemove(pPool, pPage);
2455
2456 return rc;
2457}
2458
2459
2460/**
2461 * Inserts the page into the list of modified pages.
2462 *
2463 * @param pPool The pool.
2464 * @param pPage The page.
2465 */
2466void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2467{
2468 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2469 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2470 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2471 && pPool->iModifiedHead != pPage->idx,
2472 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2473 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2474 pPool->iModifiedHead, pPool->cModifiedPages));
2475
2476 pPage->iModifiedNext = pPool->iModifiedHead;
2477 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2478 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2479 pPool->iModifiedHead = pPage->idx;
2480 pPool->cModifiedPages++;
2481#ifdef VBOX_WITH_STATISTICS
2482 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2483 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2484#endif
2485}
2486
2487
2488/**
2489 * Removes the page from the list of modified pages and resets the
2490 * moficiation counter.
2491 *
2492 * @param pPool The pool.
2493 * @param pPage The page which is believed to be in the list of modified pages.
2494 */
2495static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2496{
2497 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2498 if (pPool->iModifiedHead == pPage->idx)
2499 {
2500 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2501 pPool->iModifiedHead = pPage->iModifiedNext;
2502 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2503 {
2504 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2505 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2506 }
2507 pPool->cModifiedPages--;
2508 }
2509 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2510 {
2511 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2512 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2513 {
2514 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2515 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2516 }
2517 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2518 pPool->cModifiedPages--;
2519 }
2520 else
2521 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2522 pPage->cModifications = 0;
2523}
2524
2525
2526/**
2527 * Zaps the list of modified pages, resetting their modification counters in the process.
2528 *
2529 * @param pVM The VM handle.
2530 */
2531static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2532{
2533 pgmLock(pVM);
2534 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2535 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2536
2537 unsigned cPages = 0; NOREF(cPages);
2538
2539#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2540 pgmPoolResetDirtyPages(pVM);
2541#endif
2542
2543 uint16_t idx = pPool->iModifiedHead;
2544 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2545 while (idx != NIL_PGMPOOL_IDX)
2546 {
2547 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2548 idx = pPage->iModifiedNext;
2549 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2550 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2551 pPage->cModifications = 0;
2552 Assert(++cPages);
2553 }
2554 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2555 pPool->cModifiedPages = 0;
2556 pgmUnlock(pVM);
2557}
2558
2559
2560/**
2561 * Handle SyncCR3 pool tasks
2562 *
2563 * @returns VBox status code.
2564 * @retval VINF_SUCCESS if successfully added.
2565 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2566 * @param pVCpu The VMCPU handle.
2567 * @remark Should only be used when monitoring is available, thus placed in
2568 * the PGMPOOL_WITH_MONITORING #ifdef.
2569 */
2570int pgmPoolSyncCR3(PVMCPU pVCpu)
2571{
2572 PVM pVM = pVCpu->CTX_SUFF(pVM);
2573 LogFlow(("pgmPoolSyncCR3\n"));
2574
2575 /*
2576 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2577 * Occasionally we will have to clear all the shadow page tables because we wanted
2578 * to monitor a page which was mapped by too many shadowed page tables. This operation
2579 * sometimes refered to as a 'lightweight flush'.
2580 */
2581# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2582 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2583 pgmR3PoolClearAll(pVM);
2584# else /* !IN_RING3 */
2585 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2586 {
2587 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2588 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2589 return VINF_PGM_SYNC_CR3;
2590 }
2591# endif /* !IN_RING3 */
2592 else
2593 pgmPoolMonitorModifiedClearAll(pVM);
2594
2595 return VINF_SUCCESS;
2596}
2597
2598
2599/**
2600 * Frees up at least one user entry.
2601 *
2602 * @returns VBox status code.
2603 * @retval VINF_SUCCESS if successfully added.
2604 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2605 * @param pPool The pool.
2606 * @param iUser The user index.
2607 */
2608static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2609{
2610 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2611 /*
2612 * Just free cached pages in a braindead fashion.
2613 */
2614 /** @todo walk the age list backwards and free the first with usage. */
2615 int rc = VINF_SUCCESS;
2616 do
2617 {
2618 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2619 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2620 rc = rc2;
2621 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2622 return rc;
2623}
2624
2625
2626/**
2627 * Inserts a page into the cache.
2628 *
2629 * This will create user node for the page, insert it into the GCPhys
2630 * hash, and insert it into the age list.
2631 *
2632 * @returns VBox status code.
2633 * @retval VINF_SUCCESS if successfully added.
2634 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2635 * @param pPool The pool.
2636 * @param pPage The cached page.
2637 * @param GCPhys The GC physical address of the page we're gonna shadow.
2638 * @param iUser The user index.
2639 * @param iUserTable The user table index.
2640 */
2641DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2642{
2643 int rc = VINF_SUCCESS;
2644 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2645
2646 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2647
2648#ifdef VBOX_STRICT
2649 /*
2650 * Check that the entry doesn't already exists.
2651 */
2652 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2653 {
2654 uint16_t i = pPage->iUserHead;
2655 do
2656 {
2657 Assert(i < pPool->cMaxUsers);
2658 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2659 i = paUsers[i].iNext;
2660 } while (i != NIL_PGMPOOL_USER_INDEX);
2661 }
2662#endif
2663
2664 /*
2665 * Find free a user node.
2666 */
2667 uint16_t i = pPool->iUserFreeHead;
2668 if (i == NIL_PGMPOOL_USER_INDEX)
2669 {
2670 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2671 if (RT_FAILURE(rc))
2672 return rc;
2673 i = pPool->iUserFreeHead;
2674 }
2675
2676 /*
2677 * Unlink the user node from the free list,
2678 * initialize and insert it into the user list.
2679 */
2680 pPool->iUserFreeHead = paUsers[i].iNext;
2681 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2682 paUsers[i].iUser = iUser;
2683 paUsers[i].iUserTable = iUserTable;
2684 pPage->iUserHead = i;
2685
2686 /*
2687 * Insert into cache and enable monitoring of the guest page if enabled.
2688 *
2689 * Until we implement caching of all levels, including the CR3 one, we'll
2690 * have to make sure we don't try monitor & cache any recursive reuse of
2691 * a monitored CR3 page. Because all windows versions are doing this we'll
2692 * have to be able to do combined access monitoring, CR3 + PT and
2693 * PD + PT (guest PAE).
2694 *
2695 * Update:
2696 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2697 */
2698 const bool fCanBeMonitored = true;
2699 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2700 if (fCanBeMonitored)
2701 {
2702 rc = pgmPoolMonitorInsert(pPool, pPage);
2703 AssertRC(rc);
2704 }
2705 return rc;
2706}
2707
2708
2709/**
2710 * Adds a user reference to a page.
2711 *
2712 * This will move the page to the head of the
2713 *
2714 * @returns VBox status code.
2715 * @retval VINF_SUCCESS if successfully added.
2716 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2717 * @param pPool The pool.
2718 * @param pPage The cached page.
2719 * @param iUser The user index.
2720 * @param iUserTable The user table.
2721 */
2722static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2723{
2724 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2725
2726 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2727
2728# ifdef VBOX_STRICT
2729 /*
2730 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2731 */
2732 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2733 {
2734 uint16_t i = pPage->iUserHead;
2735 do
2736 {
2737 Assert(i < pPool->cMaxUsers);
2738 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2739 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2740 i = paUsers[i].iNext;
2741 } while (i != NIL_PGMPOOL_USER_INDEX);
2742 }
2743# endif
2744
2745 /*
2746 * Allocate a user node.
2747 */
2748 uint16_t i = pPool->iUserFreeHead;
2749 if (i == NIL_PGMPOOL_USER_INDEX)
2750 {
2751 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2752 if (RT_FAILURE(rc))
2753 return rc;
2754 i = pPool->iUserFreeHead;
2755 }
2756 pPool->iUserFreeHead = paUsers[i].iNext;
2757
2758 /*
2759 * Initialize the user node and insert it.
2760 */
2761 paUsers[i].iNext = pPage->iUserHead;
2762 paUsers[i].iUser = iUser;
2763 paUsers[i].iUserTable = iUserTable;
2764 pPage->iUserHead = i;
2765
2766# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2767 if (pPage->fDirty)
2768 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2769# endif
2770
2771 /*
2772 * Tell the cache to update its replacement stats for this page.
2773 */
2774 pgmPoolCacheUsed(pPool, pPage);
2775 return VINF_SUCCESS;
2776}
2777
2778
2779/**
2780 * Frees a user record associated with a page.
2781 *
2782 * This does not clear the entry in the user table, it simply replaces the
2783 * user record to the chain of free records.
2784 *
2785 * @param pPool The pool.
2786 * @param HCPhys The HC physical address of the shadow page.
2787 * @param iUser The shadow page pool index of the user table.
2788 * @param iUserTable The index into the user table (shadowed).
2789 */
2790static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2791{
2792 /*
2793 * Unlink and free the specified user entry.
2794 */
2795 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2796
2797 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2798 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2799 uint16_t i = pPage->iUserHead;
2800 if ( i != NIL_PGMPOOL_USER_INDEX
2801 && paUsers[i].iUser == iUser
2802 && paUsers[i].iUserTable == iUserTable)
2803 {
2804 pPage->iUserHead = paUsers[i].iNext;
2805
2806 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2807 paUsers[i].iNext = pPool->iUserFreeHead;
2808 pPool->iUserFreeHead = i;
2809 return;
2810 }
2811
2812 /* General: Linear search. */
2813 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2814 while (i != NIL_PGMPOOL_USER_INDEX)
2815 {
2816 if ( paUsers[i].iUser == iUser
2817 && paUsers[i].iUserTable == iUserTable)
2818 {
2819 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2820 paUsers[iPrev].iNext = paUsers[i].iNext;
2821 else
2822 pPage->iUserHead = paUsers[i].iNext;
2823
2824 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2825 paUsers[i].iNext = pPool->iUserFreeHead;
2826 pPool->iUserFreeHead = i;
2827 return;
2828 }
2829 iPrev = i;
2830 i = paUsers[i].iNext;
2831 }
2832
2833 /* Fatal: didn't find it */
2834 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2835 iUser, iUserTable, pPage->GCPhys));
2836}
2837
2838
2839/**
2840 * Gets the entry size of a shadow table.
2841 *
2842 * @param enmKind The kind of page.
2843 *
2844 * @returns The size of the entry in bytes. That is, 4 or 8.
2845 * @returns If the kind is not for a table, an assertion is raised and 0 is
2846 * returned.
2847 */
2848DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2849{
2850 switch (enmKind)
2851 {
2852 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2853 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2854 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2855 case PGMPOOLKIND_32BIT_PD:
2856 case PGMPOOLKIND_32BIT_PD_PHYS:
2857 return 4;
2858
2859 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2860 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2861 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2862 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2863 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2864 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2865 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2866 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2867 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2868 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2869 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2870 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2871 case PGMPOOLKIND_64BIT_PML4:
2872 case PGMPOOLKIND_PAE_PDPT:
2873 case PGMPOOLKIND_ROOT_NESTED:
2874 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2875 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2876 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2877 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2878 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2879 case PGMPOOLKIND_PAE_PD_PHYS:
2880 case PGMPOOLKIND_PAE_PDPT_PHYS:
2881 return 8;
2882
2883 default:
2884 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2885 }
2886}
2887
2888
2889/**
2890 * Gets the entry size of a guest table.
2891 *
2892 * @param enmKind The kind of page.
2893 *
2894 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2895 * @returns If the kind is not for a table, an assertion is raised and 0 is
2896 * returned.
2897 */
2898DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2899{
2900 switch (enmKind)
2901 {
2902 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2904 case PGMPOOLKIND_32BIT_PD:
2905 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2907 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2908 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2909 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2910 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2911 return 4;
2912
2913 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2914 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2915 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2916 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2917 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2918 case PGMPOOLKIND_64BIT_PML4:
2919 case PGMPOOLKIND_PAE_PDPT:
2920 return 8;
2921
2922 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2923 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2924 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2925 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2926 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2927 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2928 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2929 case PGMPOOLKIND_ROOT_NESTED:
2930 case PGMPOOLKIND_PAE_PD_PHYS:
2931 case PGMPOOLKIND_PAE_PDPT_PHYS:
2932 case PGMPOOLKIND_32BIT_PD_PHYS:
2933 /** @todo can we return 0? (nobody is calling this...) */
2934 AssertFailed();
2935 return 0;
2936
2937 default:
2938 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2939 }
2940}
2941
2942
2943/**
2944 * Scans one shadow page table for mappings of a physical page.
2945 *
2946 * @returns true/false indicating removal of all relevant PTEs
2947 * @param pVM The VM handle.
2948 * @param pPhysPage The guest page in question.
2949 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2950 * @param iShw The shadow page table.
2951 * @param cRefs The number of references made in that PT.
2952 * @param pfKeptPTEs Flag indicating removal of all relevant PTEs (out)
2953 */
2954static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2955{
2956 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2957 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2958 bool bRet = false;
2959
2960 /*
2961 * Assert sanity.
2962 */
2963 Assert(cRefs == 1);
2964 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2965 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2966
2967 /*
2968 * Then, clear the actual mappings to the page in the shadow PT.
2969 */
2970 switch (pPage->enmKind)
2971 {
2972 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2973 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2974 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2975 {
2976 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2977 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2978 uint32_t u32AndMask, u32OrMask;
2979
2980 u32AndMask = 0;
2981 u32OrMask = 0;
2982
2983 if (!fFlushPTEs)
2984 {
2985 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2986 {
2987 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2988 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2989 u32OrMask = X86_PTE_RW;
2990 u32AndMask = UINT32_MAX;
2991 bRet = true;
2992 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2993 break;
2994
2995 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2996 u32OrMask = 0;
2997 u32AndMask = ~X86_PTE_RW;
2998 bRet = true;
2999 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3000 break;
3001 default:
3002 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3003 break;
3004 }
3005 }
3006 else
3007 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3008
3009 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3010 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3011 {
3012 X86PTE Pte;
3013
3014 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3015 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
3016 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3017 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3018
3019 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3020 cRefs--;
3021 if (!cRefs)
3022 return bRet;
3023 }
3024#ifdef LOG_ENABLED
3025 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3026 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3027 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3028 {
3029 Log(("i=%d cRefs=%d\n", i, cRefs--));
3030 }
3031#endif
3032 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3033 break;
3034 }
3035
3036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3037 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3038 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3039 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3040 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3041 {
3042 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3043 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3044 uint64_t u64AndMask, u64OrMask;
3045
3046 u64OrMask = 0;
3047 u64AndMask = 0;
3048 if (!fFlushPTEs)
3049 {
3050 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3051 {
3052 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3053 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3054 u64OrMask = X86_PTE_RW;
3055 u64AndMask = UINT64_MAX;
3056 bRet = true;
3057 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3058 break;
3059
3060 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3061 u64OrMask = 0;
3062 u64AndMask = ~((uint64_t)X86_PTE_RW);
3063 bRet = true;
3064 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3065 break;
3066
3067 default:
3068 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3069 break;
3070 }
3071 }
3072 else
3073 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3074
3075 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3076 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3077 {
3078 X86PTEPAE Pte;
3079
3080 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3081 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3082 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3083 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3084
3085 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3086 cRefs--;
3087 if (!cRefs)
3088 return bRet;
3089 }
3090#ifdef LOG_ENABLED
3091 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3092 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3093 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3094 {
3095 Log(("i=%d cRefs=%d\n", i, cRefs--));
3096 }
3097#endif
3098 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3099 break;
3100 }
3101
3102 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3103 {
3104 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3105 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3106 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3107 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3108 {
3109 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3110 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3111 pPT->a[i].u = 0;
3112 cRefs--;
3113 if (!cRefs)
3114 return bRet;
3115 }
3116#ifdef LOG_ENABLED
3117 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3118 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3119 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3120 {
3121 Log(("i=%d cRefs=%d\n", i, cRefs--));
3122 }
3123#endif
3124 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3125 break;
3126 }
3127
3128 default:
3129 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3130 }
3131 return bRet;
3132}
3133
3134
3135/**
3136 * Scans one shadow page table for mappings of a physical page.
3137 *
3138 * @param pVM The VM handle.
3139 * @param pPhysPage The guest page in question.
3140 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3141 * @param iShw The shadow page table.
3142 * @param cRefs The number of references made in that PT.
3143 */
3144static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3145{
3146 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3147
3148 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3149 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3150 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3151 if (!fKeptPTEs)
3152 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3153 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3154}
3155
3156
3157/**
3158 * Flushes a list of shadow page tables mapping the same physical page.
3159 *
3160 * @param pVM The VM handle.
3161 * @param pPhysPage The guest page in question.
3162 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3163 * @param iPhysExt The physical cross reference extent list to flush.
3164 */
3165static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3166{
3167 Assert(PGMIsLockOwner(pVM));
3168 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3169 bool fKeepList = false;
3170
3171 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3172 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3173
3174 const uint16_t iPhysExtStart = iPhysExt;
3175 PPGMPOOLPHYSEXT pPhysExt;
3176 do
3177 {
3178 Assert(iPhysExt < pPool->cMaxPhysExts);
3179 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3180 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3181 {
3182 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3183 {
3184 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3185 if (!fKeptPTEs)
3186 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3187 else
3188 fKeepList = true;
3189 }
3190 }
3191 /* next */
3192 iPhysExt = pPhysExt->iNext;
3193 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3194
3195 if (!fKeepList)
3196 {
3197 /* insert the list into the free list and clear the ram range entry. */
3198 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3199 pPool->iPhysExtFreeHead = iPhysExtStart;
3200 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3201 }
3202
3203 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3204}
3205
3206
3207/**
3208 * Flushes all shadow page table mappings of the given guest page.
3209 *
3210 * This is typically called when the host page backing the guest one has been
3211 * replaced or when the page protection was changed due to an access handler.
3212 *
3213 * @returns VBox status code.
3214 * @retval VINF_SUCCESS if all references has been successfully cleared.
3215 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3216 * pool cleaning. FF and sync flags are set.
3217 *
3218 * @param pVM The VM handle.
3219 * @param pPhysPage The guest page in question.
3220 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3221 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3222 * flushed, it is NOT touched if this isn't necessary.
3223 * The caller MUST initialized this to @a false.
3224 */
3225int pgmPoolTrackUpdateGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3226{
3227 PVMCPU pVCpu = VMMGetCpu(pVM);
3228 pgmLock(pVM);
3229 int rc = VINF_SUCCESS;
3230 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3231 if (u16)
3232 {
3233 /*
3234 * The zero page is currently screwing up the tracking and we'll
3235 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3236 * is defined, zero pages won't normally be mapped. Some kind of solution
3237 * will be needed for this problem of course, but it will have to wait...
3238 */
3239 if (PGM_PAGE_IS_ZERO(pPhysPage))
3240 rc = VINF_PGM_GCPHYS_ALIASED;
3241 else
3242 {
3243# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3244 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3245 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3246 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3247# endif
3248
3249 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3250 pgmPoolTrackFlushGCPhysPT(pVM,
3251 pPhysPage,
3252 fFlushPTEs,
3253 PGMPOOL_TD_GET_IDX(u16),
3254 PGMPOOL_TD_GET_CREFS(u16));
3255 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3256 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3257 else
3258 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3259 *pfFlushTLBs = true;
3260
3261# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3262 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3263# endif
3264 }
3265 }
3266
3267 if (rc == VINF_PGM_GCPHYS_ALIASED)
3268 {
3269 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3270 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3271 rc = VINF_PGM_SYNC_CR3;
3272 }
3273 pgmUnlock(pVM);
3274 return rc;
3275}
3276
3277
3278/**
3279 * Scans all shadow page tables for mappings of a physical page.
3280 *
3281 * This may be slow, but it's most likely more efficient than cleaning
3282 * out the entire page pool / cache.
3283 *
3284 * @returns VBox status code.
3285 * @retval VINF_SUCCESS if all references has been successfully cleared.
3286 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3287 * a page pool cleaning.
3288 *
3289 * @param pVM The VM handle.
3290 * @param pPhysPage The guest page in question.
3291 */
3292int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3293{
3294 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3295 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3296 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3297 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3298
3299#if 1
3300 /*
3301 * There is a limit to what makes sense.
3302 */
3303 if (pPool->cPresent > 1024)
3304 {
3305 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3306 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3307 return VINF_PGM_GCPHYS_ALIASED;
3308 }
3309#endif
3310
3311 /*
3312 * Iterate all the pages until we've encountered all that in use.
3313 * This is simple but not quite optimal solution.
3314 */
3315 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3316 const uint32_t u32 = u64;
3317 unsigned cLeft = pPool->cUsedPages;
3318 unsigned iPage = pPool->cCurPages;
3319 while (--iPage >= PGMPOOL_IDX_FIRST)
3320 {
3321 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3322 if (pPage->GCPhys != NIL_RTGCPHYS)
3323 {
3324 switch (pPage->enmKind)
3325 {
3326 /*
3327 * We only care about shadow page tables.
3328 */
3329 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3330 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3331 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3332 {
3333 unsigned cPresent = pPage->cPresent;
3334 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3335 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3336 if (pPT->a[i].n.u1Present)
3337 {
3338 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3339 {
3340 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3341 pPT->a[i].u = 0;
3342 }
3343 if (!--cPresent)
3344 break;
3345 }
3346 break;
3347 }
3348
3349 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3351 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3352 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3353 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3354 {
3355 unsigned cPresent = pPage->cPresent;
3356 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3357 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3358 if (pPT->a[i].n.u1Present)
3359 {
3360 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3361 {
3362 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3363 pPT->a[i].u = 0;
3364 }
3365 if (!--cPresent)
3366 break;
3367 }
3368 break;
3369 }
3370 }
3371 if (!--cLeft)
3372 break;
3373 }
3374 }
3375
3376 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3377 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3378 return VINF_SUCCESS;
3379}
3380
3381
3382/**
3383 * Clears the user entry in a user table.
3384 *
3385 * This is used to remove all references to a page when flushing it.
3386 */
3387static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3388{
3389 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3390 Assert(pUser->iUser < pPool->cCurPages);
3391 uint32_t iUserTable = pUser->iUserTable;
3392
3393 /*
3394 * Map the user page.
3395 */
3396 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3397 union
3398 {
3399 uint64_t *pau64;
3400 uint32_t *pau32;
3401 } u;
3402 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3403
3404 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3405
3406 /* Safety precaution in case we change the paging for other modes too in the future. */
3407 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3408
3409#ifdef VBOX_STRICT
3410 /*
3411 * Some sanity checks.
3412 */
3413 switch (pUserPage->enmKind)
3414 {
3415 case PGMPOOLKIND_32BIT_PD:
3416 case PGMPOOLKIND_32BIT_PD_PHYS:
3417 Assert(iUserTable < X86_PG_ENTRIES);
3418 break;
3419 case PGMPOOLKIND_PAE_PDPT:
3420 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3421 case PGMPOOLKIND_PAE_PDPT_PHYS:
3422 Assert(iUserTable < 4);
3423 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3424 break;
3425 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3426 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3427 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3428 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3429 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3430 case PGMPOOLKIND_PAE_PD_PHYS:
3431 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3432 break;
3433 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3434 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3435 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3436 break;
3437 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3438 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3439 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3440 break;
3441 case PGMPOOLKIND_64BIT_PML4:
3442 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3443 /* GCPhys >> PAGE_SHIFT is the index here */
3444 break;
3445 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3446 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3447 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3448 break;
3449
3450 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3451 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3452 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3453 break;
3454
3455 case PGMPOOLKIND_ROOT_NESTED:
3456 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3457 break;
3458
3459 default:
3460 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3461 break;
3462 }
3463#endif /* VBOX_STRICT */
3464
3465 /*
3466 * Clear the entry in the user page.
3467 */
3468 switch (pUserPage->enmKind)
3469 {
3470 /* 32-bit entries */
3471 case PGMPOOLKIND_32BIT_PD:
3472 case PGMPOOLKIND_32BIT_PD_PHYS:
3473 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3474 break;
3475
3476 /* 64-bit entries */
3477 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3478 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3479 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3480 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3481 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3482#if defined(IN_RC)
3483 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3484 * non-present PDPT will continue to cause page faults.
3485 */
3486 ASMReloadCR3();
3487#endif
3488 /* no break */
3489 case PGMPOOLKIND_PAE_PD_PHYS:
3490 case PGMPOOLKIND_PAE_PDPT_PHYS:
3491 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3492 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3493 case PGMPOOLKIND_64BIT_PML4:
3494 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3495 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3496 case PGMPOOLKIND_PAE_PDPT:
3497 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3498 case PGMPOOLKIND_ROOT_NESTED:
3499 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3500 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3501 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3502 break;
3503
3504 default:
3505 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3506 }
3507}
3508
3509
3510/**
3511 * Clears all users of a page.
3512 */
3513static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3514{
3515 /*
3516 * Free all the user records.
3517 */
3518 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3519
3520 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3521 uint16_t i = pPage->iUserHead;
3522 while (i != NIL_PGMPOOL_USER_INDEX)
3523 {
3524 /* Clear enter in user table. */
3525 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3526
3527 /* Free it. */
3528 const uint16_t iNext = paUsers[i].iNext;
3529 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3530 paUsers[i].iNext = pPool->iUserFreeHead;
3531 pPool->iUserFreeHead = i;
3532
3533 /* Next. */
3534 i = iNext;
3535 }
3536 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3537}
3538
3539
3540/**
3541 * Allocates a new physical cross reference extent.
3542 *
3543 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3544 * @param pVM The VM handle.
3545 * @param piPhysExt Where to store the phys ext index.
3546 */
3547PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3548{
3549 Assert(PGMIsLockOwner(pVM));
3550 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3551 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3552 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3553 {
3554 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3555 return NULL;
3556 }
3557 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3558 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3559 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3560 *piPhysExt = iPhysExt;
3561 return pPhysExt;
3562}
3563
3564
3565/**
3566 * Frees a physical cross reference extent.
3567 *
3568 * @param pVM The VM handle.
3569 * @param iPhysExt The extent to free.
3570 */
3571void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3572{
3573 Assert(PGMIsLockOwner(pVM));
3574 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3575 Assert(iPhysExt < pPool->cMaxPhysExts);
3576 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3577 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3578 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3579 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3580 pPool->iPhysExtFreeHead = iPhysExt;
3581}
3582
3583
3584/**
3585 * Frees a physical cross reference extent.
3586 *
3587 * @param pVM The VM handle.
3588 * @param iPhysExt The extent to free.
3589 */
3590void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3591{
3592 Assert(PGMIsLockOwner(pVM));
3593 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3594
3595 const uint16_t iPhysExtStart = iPhysExt;
3596 PPGMPOOLPHYSEXT pPhysExt;
3597 do
3598 {
3599 Assert(iPhysExt < pPool->cMaxPhysExts);
3600 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3601 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3602 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3603
3604 /* next */
3605 iPhysExt = pPhysExt->iNext;
3606 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3607
3608 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3609 pPool->iPhysExtFreeHead = iPhysExtStart;
3610}
3611
3612
3613/**
3614 * Insert a reference into a list of physical cross reference extents.
3615 *
3616 * @returns The new tracking data for PGMPAGE.
3617 *
3618 * @param pVM The VM handle.
3619 * @param iPhysExt The physical extent index of the list head.
3620 * @param iShwPT The shadow page table index.
3621 *
3622 */
3623static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3624{
3625 Assert(PGMIsLockOwner(pVM));
3626 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3627 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3628
3629 /* special common case. */
3630 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3631 {
3632 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3633 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3634 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3635 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3636 }
3637
3638 /* general treatment. */
3639 const uint16_t iPhysExtStart = iPhysExt;
3640 unsigned cMax = 15;
3641 for (;;)
3642 {
3643 Assert(iPhysExt < pPool->cMaxPhysExts);
3644 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3645 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3646 {
3647 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3648 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3649 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3650 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3651 }
3652 if (!--cMax)
3653 {
3654 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3655 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3656 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3657 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3658 }
3659 }
3660
3661 /* add another extent to the list. */
3662 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3663 if (!pNew)
3664 {
3665 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3666 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3667 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3668 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3669 }
3670 pNew->iNext = iPhysExtStart;
3671 pNew->aidx[0] = iShwPT;
3672 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3673 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3674}
3675
3676
3677/**
3678 * Add a reference to guest physical page where extents are in use.
3679 *
3680 * @returns The new tracking data for PGMPAGE.
3681 *
3682 * @param pVM The VM handle.
3683 * @param u16 The ram range flags (top 16-bits).
3684 * @param iShwPT The shadow page table index.
3685 */
3686uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3687{
3688 pgmLock(pVM);
3689 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3690 {
3691 /*
3692 * Convert to extent list.
3693 */
3694 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3695 uint16_t iPhysExt;
3696 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3697 if (pPhysExt)
3698 {
3699 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3700 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3701 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3702 pPhysExt->aidx[1] = iShwPT;
3703 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3704 }
3705 else
3706 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3707 }
3708 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3709 {
3710 /*
3711 * Insert into the extent list.
3712 */
3713 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3714 }
3715 else
3716 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3717 pgmUnlock(pVM);
3718 return u16;
3719}
3720
3721
3722/**
3723 * Clear references to guest physical memory.
3724 *
3725 * @param pPool The pool.
3726 * @param pPage The page.
3727 * @param pPhysPage Pointer to the aPages entry in the ram range.
3728 */
3729void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3730{
3731 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3732 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3733
3734 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3735 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3736 {
3737 PVM pVM = pPool->CTX_SUFF(pVM);
3738 pgmLock(pVM);
3739
3740 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3741 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3742 do
3743 {
3744 Assert(iPhysExt < pPool->cMaxPhysExts);
3745
3746 /*
3747 * Look for the shadow page and check if it's all freed.
3748 */
3749 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3750 {
3751 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3752 {
3753 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3754
3755 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3756 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3757 {
3758 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3759 pgmUnlock(pVM);
3760 return;
3761 }
3762
3763 /* we can free the node. */
3764 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3765 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3766 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3767 {
3768 /* lonely node */
3769 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3770 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3771 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3772 }
3773 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3774 {
3775 /* head */
3776 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3777 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3778 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3779 }
3780 else
3781 {
3782 /* in list */
3783 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3784 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3785 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3786 }
3787 iPhysExt = iPhysExtNext;
3788 pgmUnlock(pVM);
3789 return;
3790 }
3791 }
3792
3793 /* next */
3794 iPhysExtPrev = iPhysExt;
3795 iPhysExt = paPhysExts[iPhysExt].iNext;
3796 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3797
3798 pgmUnlock(pVM);
3799 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3800 }
3801 else /* nothing to do */
3802 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3803}
3804
3805
3806/**
3807 * Clear references to guest physical memory.
3808 *
3809 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3810 * is assumed to be correct, so the linear search can be skipped and we can assert
3811 * at an earlier point.
3812 *
3813 * @param pPool The pool.
3814 * @param pPage The page.
3815 * @param HCPhys The host physical address corresponding to the guest page.
3816 * @param GCPhys The guest physical address corresponding to HCPhys.
3817 */
3818static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3819{
3820 /*
3821 * Walk range list.
3822 */
3823 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3824 while (pRam)
3825 {
3826 RTGCPHYS off = GCPhys - pRam->GCPhys;
3827 if (off < pRam->cb)
3828 {
3829 /* does it match? */
3830 const unsigned iPage = off >> PAGE_SHIFT;
3831 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3832#ifdef LOG_ENABLED
3833RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3834Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3835#endif
3836 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3837 {
3838 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3839 return;
3840 }
3841 break;
3842 }
3843 pRam = pRam->CTX_SUFF(pNext);
3844 }
3845 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3846}
3847
3848
3849/**
3850 * Clear references to guest physical memory.
3851 *
3852 * @param pPool The pool.
3853 * @param pPage The page.
3854 * @param HCPhys The host physical address corresponding to the guest page.
3855 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3856 */
3857void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3858{
3859 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3860
3861 /*
3862 * Walk range list.
3863 */
3864 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3865 while (pRam)
3866 {
3867 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3868 if (off < pRam->cb)
3869 {
3870 /* does it match? */
3871 const unsigned iPage = off >> PAGE_SHIFT;
3872 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3873 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3874 {
3875 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3876 return;
3877 }
3878 break;
3879 }
3880 pRam = pRam->CTX_SUFF(pNext);
3881 }
3882
3883 /*
3884 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3885 */
3886 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3887 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3888 while (pRam)
3889 {
3890 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3891 while (iPage-- > 0)
3892 {
3893 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3894 {
3895 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3896 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3897 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3898 return;
3899 }
3900 }
3901 pRam = pRam->CTX_SUFF(pNext);
3902 }
3903
3904 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3905}
3906
3907
3908/**
3909 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3910 *
3911 * @param pPool The pool.
3912 * @param pPage The page.
3913 * @param pShwPT The shadow page table (mapping of the page).
3914 * @param pGstPT The guest page table.
3915 */
3916DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3917{
3918 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3919 if (pShwPT->a[i].n.u1Present)
3920 {
3921 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3922 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3923 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3924 if (!--pPage->cPresent)
3925 break;
3926 }
3927}
3928
3929
3930/**
3931 * Clear references to guest physical memory in a PAE / 32-bit page table.
3932 *
3933 * @param pPool The pool.
3934 * @param pPage The page.
3935 * @param pShwPT The shadow page table (mapping of the page).
3936 * @param pGstPT The guest page table (just a half one).
3937 */
3938DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3939{
3940 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3941 if (pShwPT->a[i].n.u1Present)
3942 {
3943 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3944 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3945 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3946 if (!--pPage->cPresent)
3947 break;
3948 }
3949}
3950
3951
3952/**
3953 * Clear references to guest physical memory in a PAE / PAE page table.
3954 *
3955 * @param pPool The pool.
3956 * @param pPage The page.
3957 * @param pShwPT The shadow page table (mapping of the page).
3958 * @param pGstPT The guest page table.
3959 */
3960DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3961{
3962 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3963 if (pShwPT->a[i].n.u1Present)
3964 {
3965 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3966 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3967 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3968 if (!--pPage->cPresent)
3969 break;
3970 }
3971}
3972
3973
3974/**
3975 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3976 *
3977 * @param pPool The pool.
3978 * @param pPage The page.
3979 * @param pShwPT The shadow page table (mapping of the page).
3980 */
3981DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3982{
3983 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
3984 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3985 if (pShwPT->a[i].n.u1Present)
3986 {
3987 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3988 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3989 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3990 if (!--pPage->cPresent)
3991 break;
3992 }
3993}
3994
3995
3996/**
3997 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3998 *
3999 * @param pPool The pool.
4000 * @param pPage The page.
4001 * @param pShwPT The shadow page table (mapping of the page).
4002 */
4003DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4004{
4005 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4006 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4007 if (pShwPT->a[i].n.u1Present)
4008 {
4009 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4010 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4011 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4012 if (!--pPage->cPresent)
4013 break;
4014 }
4015}
4016
4017
4018/**
4019 * Clear references to shadowed pages in an EPT page table.
4020 *
4021 * @param pPool The pool.
4022 * @param pPage The page.
4023 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4024 */
4025DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4026{
4027 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4028 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4029 if (pShwPT->a[i].n.u1Present)
4030 {
4031 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4032 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4033 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4034 if (!--pPage->cPresent)
4035 break;
4036 }
4037}
4038
4039
4040
4041/**
4042 * Clear references to shadowed pages in a 32 bits page directory.
4043 *
4044 * @param pPool The pool.
4045 * @param pPage The page.
4046 * @param pShwPD The shadow page directory (mapping of the page).
4047 */
4048DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4049{
4050 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4051 {
4052 if ( pShwPD->a[i].n.u1Present
4053 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4054 )
4055 {
4056 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4057 if (pSubPage)
4058 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4059 else
4060 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4061 }
4062 }
4063}
4064
4065/**
4066 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4067 *
4068 * @param pPool The pool.
4069 * @param pPage The page.
4070 * @param pShwPD The shadow page directory (mapping of the page).
4071 */
4072DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4073{
4074 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4075 {
4076 if ( pShwPD->a[i].n.u1Present
4077 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4078 )
4079 {
4080 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4081 if (pSubPage)
4082 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4083 else
4084 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4085 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4086 }
4087 }
4088}
4089
4090/**
4091 * Clear references to shadowed pages in a PAE page directory pointer table.
4092 *
4093 * @param pPool The pool.
4094 * @param pPage The page.
4095 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4096 */
4097DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4098{
4099 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4100 {
4101 if ( pShwPDPT->a[i].n.u1Present
4102 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4103 )
4104 {
4105 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4106 if (pSubPage)
4107 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4108 else
4109 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4110 }
4111 }
4112}
4113
4114
4115/**
4116 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4117 *
4118 * @param pPool The pool.
4119 * @param pPage The page.
4120 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4121 */
4122DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4123{
4124 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4125 {
4126 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4127 if (pShwPDPT->a[i].n.u1Present)
4128 {
4129 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4130 if (pSubPage)
4131 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4132 else
4133 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4134 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4135 }
4136 }
4137}
4138
4139
4140/**
4141 * Clear references to shadowed pages in a 64-bit level 4 page table.
4142 *
4143 * @param pPool The pool.
4144 * @param pPage The page.
4145 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4146 */
4147DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4148{
4149 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4150 {
4151 if (pShwPML4->a[i].n.u1Present)
4152 {
4153 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4154 if (pSubPage)
4155 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4156 else
4157 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4158 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4159 }
4160 }
4161}
4162
4163
4164/**
4165 * Clear references to shadowed pages in an EPT page directory.
4166 *
4167 * @param pPool The pool.
4168 * @param pPage The page.
4169 * @param pShwPD The shadow page directory (mapping of the page).
4170 */
4171DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4172{
4173 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4174 {
4175 if (pShwPD->a[i].n.u1Present)
4176 {
4177 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4178 if (pSubPage)
4179 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4180 else
4181 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4182 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4183 }
4184 }
4185}
4186
4187
4188/**
4189 * Clear references to shadowed pages in an EPT page directory pointer table.
4190 *
4191 * @param pPool The pool.
4192 * @param pPage The page.
4193 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4194 */
4195DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4196{
4197 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4198 {
4199 if (pShwPDPT->a[i].n.u1Present)
4200 {
4201 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4202 if (pSubPage)
4203 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4204 else
4205 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4206 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4207 }
4208 }
4209}
4210
4211
4212/**
4213 * Clears all references made by this page.
4214 *
4215 * This includes other shadow pages and GC physical addresses.
4216 *
4217 * @param pPool The pool.
4218 * @param pPage The page.
4219 */
4220static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4221{
4222 /*
4223 * Map the shadow page and take action according to the page kind.
4224 */
4225 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4226 switch (pPage->enmKind)
4227 {
4228 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4229 {
4230 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4231 void *pvGst;
4232 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4233 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4234 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4235 break;
4236 }
4237
4238 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4239 {
4240 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4241 void *pvGst;
4242 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4243 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4244 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4245 break;
4246 }
4247
4248 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4249 {
4250 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4251 void *pvGst;
4252 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4253 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4254 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4255 break;
4256 }
4257
4258 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4259 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4260 {
4261 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4262 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4263 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4264 break;
4265 }
4266
4267 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4268 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4270 {
4271 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4272 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4273 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4274 break;
4275 }
4276
4277 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4278 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4279 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4280 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4281 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4282 case PGMPOOLKIND_PAE_PD_PHYS:
4283 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4284 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4285 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4286 break;
4287
4288 case PGMPOOLKIND_32BIT_PD_PHYS:
4289 case PGMPOOLKIND_32BIT_PD:
4290 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4291 break;
4292
4293 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4294 case PGMPOOLKIND_PAE_PDPT:
4295 case PGMPOOLKIND_PAE_PDPT_PHYS:
4296 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4297 break;
4298
4299 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4300 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4301 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4302 break;
4303
4304 case PGMPOOLKIND_64BIT_PML4:
4305 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4306 break;
4307
4308 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4309 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4310 break;
4311
4312 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4313 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4314 break;
4315
4316 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4317 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4318 break;
4319
4320 default:
4321 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4322 }
4323
4324 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4325 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4326 ASMMemZeroPage(pvShw);
4327 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4328 pPage->fZeroed = true;
4329 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4330}
4331
4332/**
4333 * Flushes a pool page.
4334 *
4335 * This moves the page to the free list after removing all user references to it.
4336 *
4337 * @returns VBox status code.
4338 * @retval VINF_SUCCESS on success.
4339 * @param pPool The pool.
4340 * @param HCPhys The HC physical address of the shadow page.
4341 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4342 */
4343int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4344{
4345 PVM pVM = pPool->CTX_SUFF(pVM);
4346 bool fFlushRequired = false;
4347
4348 int rc = VINF_SUCCESS;
4349 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4350 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4351 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4352
4353 /*
4354 * Quietly reject any attempts at flushing any of the special root pages.
4355 */
4356 if (pPage->idx < PGMPOOL_IDX_FIRST)
4357 {
4358 AssertFailed(); /* can no longer happen */
4359 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4360 return VINF_SUCCESS;
4361 }
4362
4363 pgmLock(pVM);
4364
4365 /*
4366 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4367 */
4368 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4369 {
4370 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4371 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4372 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4373 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4374 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4375 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4376 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4377 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4378 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4379 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4380 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4381 pgmUnlock(pVM);
4382 return VINF_SUCCESS;
4383 }
4384
4385#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4386 /* Start a subset so we won't run out of mapping space. */
4387 PVMCPU pVCpu = VMMGetCpu(pVM);
4388 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4389#endif
4390
4391 /*
4392 * Mark the page as being in need of an ASMMemZeroPage().
4393 */
4394 pPage->fZeroed = false;
4395
4396#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4397 if (pPage->fDirty)
4398 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4399#endif
4400
4401 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4402 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4403 fFlushRequired = true;
4404
4405 /*
4406 * Clear the page.
4407 */
4408 pgmPoolTrackClearPageUsers(pPool, pPage);
4409 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4410 pgmPoolTrackDeref(pPool, pPage);
4411 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4412
4413 /*
4414 * Flush it from the cache.
4415 */
4416 pgmPoolCacheFlushPage(pPool, pPage);
4417
4418#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4419 /* Heavy stuff done. */
4420 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4421#endif
4422
4423 /*
4424 * Deregistering the monitoring.
4425 */
4426 if (pPage->fMonitored)
4427 rc = pgmPoolMonitorFlush(pPool, pPage);
4428
4429 /*
4430 * Free the page.
4431 */
4432 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4433 pPage->iNext = pPool->iFreeHead;
4434 pPool->iFreeHead = pPage->idx;
4435 pPage->enmKind = PGMPOOLKIND_FREE;
4436 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4437 pPage->GCPhys = NIL_RTGCPHYS;
4438 pPage->fReusedFlushPending = false;
4439
4440 pPool->cUsedPages--;
4441
4442 /* Flush the TLBs of all VCPUs if required. */
4443 if ( fFlushRequired
4444 && fFlush)
4445 {
4446 PGM_INVL_ALL_VCPU_TLBS(pVM);
4447 }
4448
4449 pgmUnlock(pVM);
4450 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4451 return rc;
4452}
4453
4454
4455/**
4456 * Frees a usage of a pool page.
4457 *
4458 * The caller is responsible to updating the user table so that it no longer
4459 * references the shadow page.
4460 *
4461 * @param pPool The pool.
4462 * @param HCPhys The HC physical address of the shadow page.
4463 * @param iUser The shadow page pool index of the user table.
4464 * @param iUserTable The index into the user table (shadowed).
4465 */
4466void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4467{
4468 PVM pVM = pPool->CTX_SUFF(pVM);
4469
4470 STAM_PROFILE_START(&pPool->StatFree, a);
4471 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4472 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4473 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4474 pgmLock(pVM);
4475 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4476 if (!pPage->fCached)
4477 pgmPoolFlushPage(pPool, pPage);
4478 pgmUnlock(pVM);
4479 STAM_PROFILE_STOP(&pPool->StatFree, a);
4480}
4481
4482
4483/**
4484 * Makes one or more free page free.
4485 *
4486 * @returns VBox status code.
4487 * @retval VINF_SUCCESS on success.
4488 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4489 *
4490 * @param pPool The pool.
4491 * @param enmKind Page table kind
4492 * @param iUser The user of the page.
4493 */
4494static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4495{
4496 PVM pVM = pPool->CTX_SUFF(pVM);
4497
4498 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4499
4500 /*
4501 * If the pool isn't full grown yet, expand it.
4502 */
4503 if ( pPool->cCurPages < pPool->cMaxPages
4504#if defined(IN_RC)
4505 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4506 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4507 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4508#endif
4509 )
4510 {
4511 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4512#ifdef IN_RING3
4513 int rc = PGMR3PoolGrow(pVM);
4514#else
4515 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4516#endif
4517 if (RT_FAILURE(rc))
4518 return rc;
4519 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4520 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4521 return VINF_SUCCESS;
4522 }
4523
4524 /*
4525 * Free one cached page.
4526 */
4527 return pgmPoolCacheFreeOne(pPool, iUser);
4528}
4529
4530/**
4531 * Allocates a page from the pool.
4532 *
4533 * This page may actually be a cached page and not in need of any processing
4534 * on the callers part.
4535 *
4536 * @returns VBox status code.
4537 * @retval VINF_SUCCESS if a NEW page was allocated.
4538 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4539 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4540 * @param pVM The VM handle.
4541 * @param GCPhys The GC physical address of the page we're gonna shadow.
4542 * For 4MB and 2MB PD entries, it's the first address the
4543 * shadow PT is covering.
4544 * @param enmKind The kind of mapping.
4545 * @param enmAccess Access type for the mapping (only relevant for big pages)
4546 * @param iUser The shadow page pool index of the user table.
4547 * @param iUserTable The index into the user table (shadowed).
4548 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4549 * @param fLockPage Lock the page
4550 */
4551int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4552{
4553 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4554 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4555 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4556 *ppPage = NULL;
4557 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4558 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4559 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4560
4561 pgmLock(pVM);
4562
4563 if (pPool->fCacheEnabled)
4564 {
4565 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4566 if (RT_SUCCESS(rc2))
4567 {
4568 if (fLockPage)
4569 pgmPoolLockPage(pPool, *ppPage);
4570 pgmUnlock(pVM);
4571 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4572 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4573 return rc2;
4574 }
4575 }
4576
4577 /*
4578 * Allocate a new one.
4579 */
4580 int rc = VINF_SUCCESS;
4581 uint16_t iNew = pPool->iFreeHead;
4582 if (iNew == NIL_PGMPOOL_IDX)
4583 {
4584 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4585 if (RT_FAILURE(rc))
4586 {
4587 pgmUnlock(pVM);
4588 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4589 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4590 return rc;
4591 }
4592 iNew = pPool->iFreeHead;
4593 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4594 }
4595
4596 /* unlink the free head */
4597 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4598 pPool->iFreeHead = pPage->iNext;
4599 pPage->iNext = NIL_PGMPOOL_IDX;
4600
4601 /*
4602 * Initialize it.
4603 */
4604 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4605 pPage->enmKind = enmKind;
4606 pPage->enmAccess = enmAccess;
4607 pPage->GCPhys = GCPhys;
4608 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4609 pPage->fMonitored = false;
4610 pPage->fCached = false;
4611#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4612 pPage->fDirty = false;
4613#endif
4614 pPage->fReusedFlushPending = false;
4615 pPage->cModifications = 0;
4616 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4617 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4618 pPage->cPresent = 0;
4619 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4620 pPage->pvLastAccessHandlerFault = 0;
4621 pPage->cLastAccessHandlerCount = 0;
4622 pPage->pvLastAccessHandlerRip = 0;
4623
4624 /*
4625 * Insert into the tracking and cache. If this fails, free the page.
4626 */
4627 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4628 if (RT_FAILURE(rc3))
4629 {
4630 pPool->cUsedPages--;
4631 pPage->enmKind = PGMPOOLKIND_FREE;
4632 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4633 pPage->GCPhys = NIL_RTGCPHYS;
4634 pPage->iNext = pPool->iFreeHead;
4635 pPool->iFreeHead = pPage->idx;
4636 pgmUnlock(pVM);
4637 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4638 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4639 return rc3;
4640 }
4641
4642 /*
4643 * Commit the allocation, clear the page and return.
4644 */
4645#ifdef VBOX_WITH_STATISTICS
4646 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4647 pPool->cUsedPagesHigh = pPool->cUsedPages;
4648#endif
4649
4650 if (!pPage->fZeroed)
4651 {
4652 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4653 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4654 ASMMemZeroPage(pv);
4655 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4656 }
4657
4658 *ppPage = pPage;
4659 if (fLockPage)
4660 pgmPoolLockPage(pPool, pPage);
4661 pgmUnlock(pVM);
4662 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4663 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4664 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4665 return rc;
4666}
4667
4668
4669/**
4670 * Frees a usage of a pool page.
4671 *
4672 * @param pVM The VM handle.
4673 * @param HCPhys The HC physical address of the shadow page.
4674 * @param iUser The shadow page pool index of the user table.
4675 * @param iUserTable The index into the user table (shadowed).
4676 */
4677void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4678{
4679 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4680 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4681 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4682}
4683
4684/**
4685 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4686 *
4687 * @returns Pointer to the shadow page structure.
4688 * @param pPool The pool.
4689 * @param HCPhys The HC physical address of the shadow page.
4690 */
4691PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4692{
4693 PVM pVM = pPool->CTX_SUFF(pVM);
4694
4695 Assert(PGMIsLockOwner(pVM));
4696
4697 /*
4698 * Look up the page.
4699 */
4700 pgmLock(pVM);
4701 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4702 pgmUnlock(pVM);
4703
4704 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4705 return pPage;
4706}
4707
4708#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4709/**
4710 * Flush the specified page if present
4711 *
4712 * @param pVM The VM handle.
4713 * @param GCPhys Guest physical address of the page to flush
4714 */
4715void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4716{
4717 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4718
4719 VM_ASSERT_EMT(pVM);
4720
4721 /*
4722 * Look up the GCPhys in the hash.
4723 */
4724 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4725 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4726 if (i == NIL_PGMPOOL_IDX)
4727 return;
4728
4729 do
4730 {
4731 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4732 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4733 {
4734 switch (pPage->enmKind)
4735 {
4736 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4737 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4738 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4739 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4740 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4741 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4742 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4743 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4744 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4745 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4746 case PGMPOOLKIND_64BIT_PML4:
4747 case PGMPOOLKIND_32BIT_PD:
4748 case PGMPOOLKIND_PAE_PDPT:
4749 {
4750 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4751#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4752 if (pPage->fDirty)
4753 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4754 else
4755#endif
4756 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4757 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4758 pgmPoolMonitorChainFlush(pPool, pPage);
4759 return;
4760 }
4761
4762 /* ignore, no monitoring. */
4763 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4764 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4765 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4766 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4767 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4768 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4769 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4770 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4771 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4772 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4773 case PGMPOOLKIND_ROOT_NESTED:
4774 case PGMPOOLKIND_PAE_PD_PHYS:
4775 case PGMPOOLKIND_PAE_PDPT_PHYS:
4776 case PGMPOOLKIND_32BIT_PD_PHYS:
4777 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4778 break;
4779
4780 default:
4781 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4782 }
4783 }
4784
4785 /* next */
4786 i = pPage->iNext;
4787 } while (i != NIL_PGMPOOL_IDX);
4788 return;
4789}
4790#endif /* IN_RING3 */
4791
4792#ifdef IN_RING3
4793/**
4794 * Flushes the entire cache.
4795 *
4796 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
4797 * this and execute this CR3 flush.
4798 *
4799 * @param pPool The pool.
4800 */
4801void pgmR3PoolReset(PVM pVM)
4802{
4803 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4804
4805 Assert(PGMIsLockOwner(pVM));
4806 STAM_PROFILE_START(&pPool->StatR3Reset, a);
4807 LogFlow(("pgmR3PoolReset:\n"));
4808
4809 /*
4810 * If there are no pages in the pool, there is nothing to do.
4811 */
4812 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4813 {
4814 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4815 return;
4816 }
4817
4818 /*
4819 * Exit the shadow mode since we're going to clear everything,
4820 * including the root page.
4821 */
4822 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4823 {
4824 PVMCPU pVCpu = &pVM->aCpus[i];
4825 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4826 }
4827
4828 /*
4829 * Nuke the free list and reinsert all pages into it.
4830 */
4831 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4832 {
4833 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4834
4835 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4836 if (pPage->fMonitored)
4837 pgmPoolMonitorFlush(pPool, pPage);
4838 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4839 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4840 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4841 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4842 pPage->cModifications = 0;
4843 pPage->GCPhys = NIL_RTGCPHYS;
4844 pPage->enmKind = PGMPOOLKIND_FREE;
4845 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4846 Assert(pPage->idx == i);
4847 pPage->iNext = i + 1;
4848 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4849 pPage->fSeenNonGlobal = false;
4850 pPage->fMonitored = false;
4851#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4852 pPage->fDirty = false;
4853#endif
4854 pPage->fCached = false;
4855 pPage->fReusedFlushPending = false;
4856 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4857 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4858 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4859 pPage->cLocked = 0;
4860 }
4861 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4862 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4863 pPool->cUsedPages = 0;
4864
4865 /*
4866 * Zap and reinitialize the user records.
4867 */
4868 pPool->cPresent = 0;
4869 pPool->iUserFreeHead = 0;
4870 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4871 const unsigned cMaxUsers = pPool->cMaxUsers;
4872 for (unsigned i = 0; i < cMaxUsers; i++)
4873 {
4874 paUsers[i].iNext = i + 1;
4875 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4876 paUsers[i].iUserTable = 0xfffffffe;
4877 }
4878 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4879
4880 /*
4881 * Clear all the GCPhys links and rebuild the phys ext free list.
4882 */
4883 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4884 pRam;
4885 pRam = pRam->CTX_SUFF(pNext))
4886 {
4887 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4888 while (iPage-- > 0)
4889 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4890 }
4891
4892 pPool->iPhysExtFreeHead = 0;
4893 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4894 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4895 for (unsigned i = 0; i < cMaxPhysExts; i++)
4896 {
4897 paPhysExts[i].iNext = i + 1;
4898 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4899 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4900 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4901 }
4902 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4903
4904 /*
4905 * Just zap the modified list.
4906 */
4907 pPool->cModifiedPages = 0;
4908 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4909
4910 /*
4911 * Clear the GCPhys hash and the age list.
4912 */
4913 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4914 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4915 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4916 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4917
4918#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4919 /* Clear all dirty pages. */
4920 pPool->idxFreeDirtyPage = 0;
4921 pPool->cDirtyPages = 0;
4922 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4923 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4924#endif
4925
4926 /*
4927 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4928 */
4929 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4930 {
4931 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4932 pPage->iNext = NIL_PGMPOOL_IDX;
4933 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4934 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4935 pPage->cModifications = 0;
4936 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4937 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4938 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4939 if (pPage->fMonitored)
4940 {
4941 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4942 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4943 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4944 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4945 pPool->pszAccessHandler);
4946 AssertFatalRCSuccess(rc);
4947 pgmPoolHashInsert(pPool, pPage);
4948 }
4949 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4950 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4951 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4952 }
4953
4954 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4955 {
4956 /*
4957 * Re-enter the shadowing mode and assert Sync CR3 FF.
4958 */
4959 PVMCPU pVCpu = &pVM->aCpus[i];
4960 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4961 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4962 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4963 }
4964
4965 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4966}
4967#endif /* IN_RING3 */
4968
4969#ifdef LOG_ENABLED
4970static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4971{
4972 switch(enmKind)
4973 {
4974 case PGMPOOLKIND_INVALID:
4975 return "PGMPOOLKIND_INVALID";
4976 case PGMPOOLKIND_FREE:
4977 return "PGMPOOLKIND_FREE";
4978 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4979 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4980 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4981 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4982 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4983 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4984 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4985 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4986 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4987 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4988 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4989 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4990 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4991 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4992 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4993 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4994 case PGMPOOLKIND_32BIT_PD:
4995 return "PGMPOOLKIND_32BIT_PD";
4996 case PGMPOOLKIND_32BIT_PD_PHYS:
4997 return "PGMPOOLKIND_32BIT_PD_PHYS";
4998 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4999 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5000 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5001 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5002 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5003 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5004 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5005 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5006 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5007 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5008 case PGMPOOLKIND_PAE_PD_PHYS:
5009 return "PGMPOOLKIND_PAE_PD_PHYS";
5010 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5011 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5012 case PGMPOOLKIND_PAE_PDPT:
5013 return "PGMPOOLKIND_PAE_PDPT";
5014 case PGMPOOLKIND_PAE_PDPT_PHYS:
5015 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5016 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5017 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5018 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5019 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5020 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5021 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5022 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5023 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5024 case PGMPOOLKIND_64BIT_PML4:
5025 return "PGMPOOLKIND_64BIT_PML4";
5026 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5027 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5028 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5029 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5030 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5031 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5032 case PGMPOOLKIND_ROOT_NESTED:
5033 return "PGMPOOLKIND_ROOT_NESTED";
5034 }
5035 return "Unknown kind!";
5036}
5037#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette