VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 17497

Last change on this file since 17497 was 17496, checked in by vboxsync, 16 years ago

Better disable that code block completely

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 172.0 KB
Line 
1/* $Id: PGMAllPool.cpp 17496 2009-03-06 16:56:43Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48__BEGIN_DECLS
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70__END_DECLS
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92/** @def PGMPOOL_PAGE_2_LOCKED_PTR
93 * Maps a pool page pool into the current context and lock it (RC only).
94 *
95 * @returns VBox status code.
96 * @param pVM The VM handle.
97 * @param pPage The pool page.
98 *
99 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
100 * small page window employeed by that function. Be careful.
101 * @remark There is no need to assert on the result.
102 */
103#if defined(IN_RC)
104DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
105{
106 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
107
108 /* Make sure the dynamic mapping will not be reused. */
109 if (pv)
110 PGMDynLockHCPage(pVM, (uint8_t *)pv);
111
112 return pv;
113}
114#else
115# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
116#endif
117
118/** @def PGMPOOL_UNLOCK_PTR
119 * Unlock a previously locked dynamic caching (RC only).
120 *
121 * @returns VBox status code.
122 * @param pVM The VM handle.
123 * @param pPage The pool page.
124 *
125 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
126 * small page window employeed by that function. Be careful.
127 * @remark There is no need to assert on the result.
128 */
129#if defined(IN_RC)
130DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
131{
132 if (pvPage)
133 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
134}
135#else
136# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
137#endif
138
139#if !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && (defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0))
140/**
141 * Maps a pool page into the current context.
142 *
143 * @returns Pointer to the mapping.
144 * @param pPGM Pointer to the PGM instance data.
145 * @param pPage The page to map.
146 */
147void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
148{
149 /* general pages are take care of by the inlined part, it
150 only ends up here in case of failure. */
151 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
152
153/** @todo make sure HCPhys is valid for *all* indexes. */
154 /* special pages. */
155# ifdef IN_RC
156 switch (pPage->idx)
157 {
158 case PGMPOOL_IDX_PD:
159 return pPGM->pShw32BitPdRC;
160 case PGMPOOL_IDX_PAE_PD:
161 case PGMPOOL_IDX_PAE_PD_0:
162 return pPGM->apShwPaePDsRC[0];
163 case PGMPOOL_IDX_PAE_PD_1:
164 return pPGM->apShwPaePDsRC[1];
165 case PGMPOOL_IDX_PAE_PD_2:
166 return pPGM->apShwPaePDsRC[2];
167 case PGMPOOL_IDX_PAE_PD_3:
168 return pPGM->apShwPaePDsRC[3];
169 case PGMPOOL_IDX_PDPT:
170 return pPGM->pShwPaePdptRC;
171 default:
172 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
173 return NULL;
174 }
175
176# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
177 RTHCPHYS HCPhys;
178 switch (pPage->idx)
179 {
180 case PGMPOOL_IDX_PD:
181 HCPhys = pPGM->HCPhysShw32BitPD;
182 break;
183 case PGMPOOL_IDX_PAE_PD_0:
184 HCPhys = pPGM->aHCPhysPaePDs[0];
185 break;
186 case PGMPOOL_IDX_PAE_PD_1:
187 HCPhys = pPGM->aHCPhysPaePDs[1];
188 break;
189 case PGMPOOL_IDX_PAE_PD_2:
190 HCPhys = pPGM->aHCPhysPaePDs[2];
191 break;
192 case PGMPOOL_IDX_PAE_PD_3:
193 HCPhys = pPGM->aHCPhysPaePDs[3];
194 break;
195 case PGMPOOL_IDX_PDPT:
196 HCPhys = pPGM->HCPhysShwPaePdpt;
197 break;
198 case PGMPOOL_IDX_NESTED_ROOT:
199 HCPhys = pPGM->HCPhysShwNestedRoot;
200 break;
201 case PGMPOOL_IDX_PAE_PD:
202 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
203 return NULL;
204 default:
205 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
206 return NULL;
207 }
208 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
209
210 void *pv;
211 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
212 return pv;
213# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
214}
215#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
216
217
218#ifdef PGMPOOL_WITH_MONITORING
219/**
220 * Determin the size of a write instruction.
221 * @returns number of bytes written.
222 * @param pDis The disassembler state.
223 */
224static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
225{
226 /*
227 * This is very crude and possibly wrong for some opcodes,
228 * but since it's not really supposed to be called we can
229 * probably live with that.
230 */
231 return DISGetParamSize(pDis, &pDis->param1);
232}
233
234
235/**
236 * Flushes a chain of pages sharing the same access monitor.
237 *
238 * @returns VBox status code suitable for scheduling.
239 * @param pPool The pool.
240 * @param pPage A page in the chain.
241 */
242int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
243{
244 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
245
246 /*
247 * Find the list head.
248 */
249 uint16_t idx = pPage->idx;
250 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
251 {
252 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
253 {
254 idx = pPage->iMonitoredPrev;
255 Assert(idx != pPage->idx);
256 pPage = &pPool->aPages[idx];
257 }
258 }
259
260 /*
261 * Iterate the list flushing each shadow page.
262 */
263 int rc = VINF_SUCCESS;
264 for (;;)
265 {
266 idx = pPage->iMonitoredNext;
267 Assert(idx != pPage->idx);
268 if (pPage->idx >= PGMPOOL_IDX_FIRST)
269 {
270 int rc2 = pgmPoolFlushPage(pPool, pPage);
271 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
272 rc = VINF_PGM_SYNC_CR3;
273 }
274 /* next */
275 if (idx == NIL_PGMPOOL_IDX)
276 break;
277 pPage = &pPool->aPages[idx];
278 }
279 return rc;
280}
281
282
283/**
284 * Wrapper for getting the current context pointer to the entry being modified.
285 *
286 * @returns VBox status code suitable for scheduling.
287 * @param pVM VM Handle.
288 * @param pvDst Destination address
289 * @param pvSrc Source guest virtual address.
290 * @param GCPhysSrc The source guest physical address.
291 * @param cb Size of data to read
292 */
293DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
294{
295#if defined(IN_RING3)
296 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
297 return VINF_SUCCESS;
298#else
299 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
300 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
301#endif
302}
303
304/**
305 * Process shadow entries before they are changed by the guest.
306 *
307 * For PT entries we will clear them. For PD entries, we'll simply check
308 * for mapping conflicts and set the SyncCR3 FF if found.
309 *
310 * @param pPool The pool.
311 * @param pPage The head page.
312 * @param GCPhysFault The guest physical fault address.
313 * @param uAddress In R0 and GC this is the guest context fault address (flat).
314 * In R3 this is the host context 'fault' address.
315 * @param pCpu The disassembler state for figuring out the write size.
316 * This need not be specified if the caller knows we won't do cross entry accesses.
317 */
318void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pCpu)
319{
320 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
321 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
322 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
323 PVM pVM = pPool->CTX_SUFF(pVM);
324
325 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
326 for (;;)
327 {
328 union
329 {
330 void *pv;
331 PX86PT pPT;
332 PX86PTPAE pPTPae;
333 PX86PD pPD;
334 PX86PDPAE pPDPae;
335 PX86PDPT pPDPT;
336 PX86PML4 pPML4;
337 } uShw;
338
339 uShw.pv = NULL;
340 switch (pPage->enmKind)
341 {
342 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
343 {
344 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
345 const unsigned iShw = off / sizeof(X86PTE);
346 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
347 if (uShw.pPT->a[iShw].n.u1Present)
348 {
349# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
350 X86PTE GstPte;
351
352 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
353 AssertRC(rc);
354 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
355 pgmPoolTracDerefGCPhysHint(pPool, pPage,
356 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
357 GstPte.u & X86_PTE_PG_MASK);
358# endif
359 uShw.pPT->a[iShw].u = 0;
360 }
361 break;
362 }
363
364 /* page/2 sized */
365 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
366 {
367 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
368 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
369 {
370 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
371 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
372 if (uShw.pPTPae->a[iShw].n.u1Present)
373 {
374# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
375 X86PTE GstPte;
376 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
377 AssertRC(rc);
378
379 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
380 pgmPoolTracDerefGCPhysHint(pPool, pPage,
381 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
382 GstPte.u & X86_PTE_PG_MASK);
383# endif
384 uShw.pPTPae->a[iShw].u = 0;
385 }
386 }
387 break;
388 }
389
390# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
391 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
392 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
393 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
394 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
395 {
396 unsigned iGst = off / sizeof(X86PDE);
397 unsigned iShwPdpt = iGst / 256;
398 unsigned iShw = (iGst % 256) * 2;
399 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
400
401 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
402 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
403 {
404 for (unsigned i = 0; i < 2; i++)
405 {
406# ifndef IN_RING0
407 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
408 {
409 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
410 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
412 break;
413 }
414 else
415# endif /* !IN_RING0 */
416 if (uShw.pPDPae->a[iShw+i].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
419 pgmPoolFree(pVM,
420 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw + i);
423 uShw.pPDPae->a[iShw+i].u = 0;
424 }
425
426 /* paranoia / a bit assumptive. */
427 if ( pCpu
428 && (off & 3)
429 && (off & 3) + cbWrite > 4)
430 {
431 const unsigned iShw2 = iShw + 2 + i;
432 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
433 {
434# ifndef IN_RING0
435 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
436 {
437 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
438 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
439 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
440 break;
441 }
442 else
443# endif /* !IN_RING0 */
444 if (uShw.pPDPae->a[iShw2].n.u1Present)
445 {
446 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
447 pgmPoolFree(pVM,
448 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
449 pPage->idx,
450 iShw2);
451 uShw.pPDPae->a[iShw2].u = 0;
452 }
453 }
454 }
455 }
456 }
457 break;
458 }
459# endif
460
461
462 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
463 {
464 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
465 const unsigned iShw = off / sizeof(X86PTEPAE);
466 if (uShw.pPTPae->a[iShw].n.u1Present)
467 {
468# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
469 X86PTEPAE GstPte;
470 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
471 AssertRC(rc);
472
473 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
474 pgmPoolTracDerefGCPhysHint(pPool, pPage,
475 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
476 GstPte.u & X86_PTE_PAE_PG_MASK);
477# endif
478 uShw.pPTPae->a[iShw].u = 0;
479 }
480
481 /* paranoia / a bit assumptive. */
482 if ( pCpu
483 && (off & 7)
484 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
485 {
486 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
487 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
488
489 if (uShw.pPTPae->a[iShw2].n.u1Present)
490 {
491# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
492 X86PTEPAE GstPte;
493# ifdef IN_RING3
494 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
495# else
496 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
497# endif
498 AssertRC(rc);
499 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
500 pgmPoolTracDerefGCPhysHint(pPool, pPage,
501 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
502 GstPte.u & X86_PTE_PAE_PG_MASK);
503# endif
504 uShw.pPTPae->a[iShw2].u = 0;
505 }
506 }
507 break;
508 }
509
510# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
511 case PGMPOOLKIND_32BIT_PD:
512# else
513 case PGMPOOLKIND_ROOT_32BIT_PD:
514# endif
515 {
516 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
517 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
518
519# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
520 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
521# endif
522# ifndef IN_RING0
523 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
526 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
527 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
528 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531# endif /* !IN_RING0 */
532# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
533# ifndef IN_RING0
534 else
535# endif /* !IN_RING0 */
536 {
537 if (uShw.pPD->a[iShw].n.u1Present)
538 {
539 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
540 pgmPoolFree(pVM,
541 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
542 pPage->idx,
543 iShw);
544 uShw.pPD->a[iShw].u = 0;
545 }
546 }
547# endif
548 /* paranoia / a bit assumptive. */
549 if ( pCpu
550 && (off & 3)
551 && (off & 3) + cbWrite > sizeof(X86PTE))
552 {
553 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
554 if ( iShw2 != iShw
555 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
556 {
557# ifndef IN_RING0
558 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
559 {
560 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
561 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
562 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
563 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
564 }
565# endif /* !IN_RING0 */
566# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
567# ifndef IN_RING0
568 else
569# endif /* !IN_RING0 */
570 {
571 if (uShw.pPD->a[iShw2].n.u1Present)
572 {
573 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
574 pgmPoolFree(pVM,
575 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
576 pPage->idx,
577 iShw2);
578 uShw.pPD->a[iShw2].u = 0;
579 }
580 }
581# endif
582 }
583 }
584#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
585 if ( uShw.pPD->a[iShw].n.u1Present
586 && !VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
587 {
588 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
589# ifdef IN_RC /* TLB load - we're pushing things a bit... */
590 ASMProbeReadByte(pvAddress);
591# endif
592 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
593 uShw.pPD->a[iShw].u = 0;
594 }
595#endif
596 break;
597 }
598
599# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
600 case PGMPOOLKIND_ROOT_PAE_PD:
601 {
602 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
603 unsigned iShwPdpt = iGst / 256;
604 unsigned iShw = (iGst % 256) * 2;
605 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
606 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
607 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
608 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage2);
609 for (unsigned i = 0; i < 2; i++, iShw++)
610 {
611 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
612 {
613 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
614 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
615 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
616 }
617 /* paranoia / a bit assumptive. */
618 else if ( pCpu
619 && (off & 3)
620 && (off & 3) + cbWrite > 4)
621 {
622 const unsigned iShw2 = iShw + 2;
623 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
624 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
625 {
626 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
627 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
628 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
629 }
630 }
631#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
632 if ( uShw.pPDPae->a[iShw].n.u1Present
633 && !VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
634 {
635 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
636# ifdef IN_RC /* TLB load - we're pushing things a bit... */
637 ASMProbeReadByte(pvAddress);
638# endif
639 pgmPoolFree(pVM, uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
640 uShw.pPDPae->a[iShw].u = 0;
641 }
642#endif
643 }
644 break;
645 }
646# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
647
648 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
649 {
650 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
651 const unsigned iShw = off / sizeof(X86PDEPAE);
652#ifndef IN_RING0
653 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
654 {
655 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
656 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
657 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
658 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
659 break;
660 }
661#endif /* !IN_RING0 */
662#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
663 /*
664 * Causes trouble when the guest uses a PDE to refer to the whole page table level
665 * structure. (Invalidate here; faults later on when it tries to change the page
666 * table entries -> recheck; probably only applies to the RC case.)
667 */
668# ifndef IN_RING0
669 else
670# endif /* !IN_RING0 */
671 {
672 if (uShw.pPDPae->a[iShw].n.u1Present)
673 {
674 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
675 pgmPoolFree(pVM,
676 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
677# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
678 pPage->idx,
679 iShw);
680# else
681 /* Note: hardcoded PAE implementation dependency */
682 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
683 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
684# endif
685 uShw.pPDPae->a[iShw].u = 0;
686 }
687 }
688#endif
689 /* paranoia / a bit assumptive. */
690 if ( pCpu
691 && (off & 7)
692 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
693 {
694 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
695 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
696
697#ifndef IN_RING0
698 if ( iShw2 != iShw
699 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
700 {
701 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
702 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
703 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
704 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
705 }
706#endif /* !IN_RING0 */
707#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
708# ifndef IN_RING0
709 else
710# endif /* !IN_RING0 */
711 if (uShw.pPDPae->a[iShw2].n.u1Present)
712 {
713 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
714 pgmPoolFree(pVM,
715 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
716# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
717 pPage->idx,
718 iShw2);
719# else
720 /* Note: hardcoded PAE implementation dependency */
721 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
722 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
723# endif
724 uShw.pPDPae->a[iShw2].u = 0;
725 }
726#endif
727 }
728 break;
729 }
730
731# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
732 case PGMPOOLKIND_PAE_PDPT:
733# else
734 case PGMPOOLKIND_ROOT_PDPT:
735# endif
736 {
737 /*
738 * Hopefully this doesn't happen very often:
739 * - touching unused parts of the page
740 * - messing with the bits of pd pointers without changing the physical address
741 */
742# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
743 /* PDPT roots are not page aligned; 32 byte only! */
744 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
745# else
746 const unsigned offPdpt = off;
747# endif
748 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
749 const unsigned iShw = offPdpt / sizeof(X86PDPE);
750 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
751 {
752# ifndef IN_RING0
753 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
754 {
755 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
756 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
757 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
758 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
759 break;
760 }
761# endif /* !IN_RING0 */
762# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
763# ifndef IN_RING0
764 else
765# endif /* !IN_RING0 */
766 if (uShw.pPDPT->a[iShw].n.u1Present)
767 {
768 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
769 pgmPoolFree(pVM,
770 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
771 pPage->idx,
772 iShw);
773 uShw.pPDPT->a[iShw].u = 0;
774 }
775# endif
776
777 /* paranoia / a bit assumptive. */
778 if ( pCpu
779 && (offPdpt & 7)
780 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
781 {
782 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
783 if ( iShw2 != iShw
784 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
785 {
786# ifndef IN_RING0
787 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
788 {
789 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
790 STAM_COUNTER_INC(&(pVM->pgm.s.StatRZGuestCR3WriteConflict));
791 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
792 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
793 }
794# endif /* !IN_RING0 */
795# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
796# ifndef IN_RING0
797 else
798# endif /* !IN_RING0 */
799 if (uShw.pPDPT->a[iShw2].n.u1Present)
800 {
801 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
802 pgmPoolFree(pVM,
803 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
804 pPage->idx,
805 iShw2);
806 uShw.pPDPT->a[iShw2].u = 0;
807 }
808# endif
809 }
810 }
811 }
812 break;
813 }
814
815#ifndef IN_RC
816 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
817 {
818 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
819 const unsigned iShw = off / sizeof(X86PDEPAE);
820 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
821 if (uShw.pPDPae->a[iShw].n.u1Present)
822 {
823 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
824 pgmPoolFree(pVM,
825 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
826 pPage->idx,
827 iShw);
828 uShw.pPDPae->a[iShw].u = 0;
829 }
830 /* paranoia / a bit assumptive. */
831 if ( pCpu
832 && (off & 7)
833 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
834 {
835 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
836 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
837
838 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
839 if (uShw.pPDPae->a[iShw2].n.u1Present)
840 {
841 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
842 pgmPoolFree(pVM,
843 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
844 pPage->idx,
845 iShw2);
846 uShw.pPDPae->a[iShw2].u = 0;
847 }
848 }
849 break;
850 }
851
852 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
853 {
854 /*
855 * Hopefully this doesn't happen very often:
856 * - messing with the bits of pd pointers without changing the physical address
857 */
858# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
859 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
860# endif
861 {
862 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
863 const unsigned iShw = off / sizeof(X86PDPE);
864 if (uShw.pPDPT->a[iShw].n.u1Present)
865 {
866 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
867 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
868 uShw.pPDPT->a[iShw].u = 0;
869 }
870 /* paranoia / a bit assumptive. */
871 if ( pCpu
872 && (off & 7)
873 && (off & 7) + cbWrite > sizeof(X86PDPE))
874 {
875 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
876 if (uShw.pPDPT->a[iShw2].n.u1Present)
877 {
878 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
879 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
880 uShw.pPDPT->a[iShw2].u = 0;
881 }
882 }
883 }
884 break;
885 }
886
887 case PGMPOOLKIND_64BIT_PML4:
888 {
889 /*
890 * Hopefully this doesn't happen very often:
891 * - messing with the bits of pd pointers without changing the physical address
892 */
893# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
894 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
895# endif
896 {
897 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
898 const unsigned iShw = off / sizeof(X86PDPE);
899 if (uShw.pPML4->a[iShw].n.u1Present)
900 {
901 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
902 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
903 uShw.pPML4->a[iShw].u = 0;
904 }
905 /* paranoia / a bit assumptive. */
906 if ( pCpu
907 && (off & 7)
908 && (off & 7) + cbWrite > sizeof(X86PDPE))
909 {
910 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
911 if (uShw.pPML4->a[iShw2].n.u1Present)
912 {
913 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
914 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
915 uShw.pPML4->a[iShw2].u = 0;
916 }
917 }
918 }
919 break;
920 }
921#endif /* IN_RING0 */
922
923 default:
924 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
925 }
926 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
927
928 /* next */
929 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
930 return;
931 pPage = &pPool->aPages[pPage->iMonitoredNext];
932 }
933}
934
935# ifndef IN_RING3
936/**
937 * Checks if a access could be a fork operation in progress.
938 *
939 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
940 *
941 * @returns true if it's likly that we're forking, otherwise false.
942 * @param pPool The pool.
943 * @param pCpu The disassembled instruction.
944 * @param offFault The access offset.
945 */
946DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
947{
948 /*
949 * i386 linux is using btr to clear X86_PTE_RW.
950 * The functions involved are (2.6.16 source inspection):
951 * clear_bit
952 * ptep_set_wrprotect
953 * copy_one_pte
954 * copy_pte_range
955 * copy_pmd_range
956 * copy_pud_range
957 * copy_page_range
958 * dup_mmap
959 * dup_mm
960 * copy_mm
961 * copy_process
962 * do_fork
963 */
964 if ( pCpu->pCurInstr->opcode == OP_BTR
965 && !(offFault & 4)
966 /** @todo Validate that the bit index is X86_PTE_RW. */
967 )
968 {
969 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
970 return true;
971 }
972 return false;
973}
974
975
976/**
977 * Determine whether the page is likely to have been reused.
978 *
979 * @returns true if we consider the page as being reused for a different purpose.
980 * @returns false if we consider it to still be a paging page.
981 * @param pVM VM Handle.
982 * @param pPage The page in question.
983 * @param pRegFrame Trap register frame.
984 * @param pCpu The disassembly info for the faulting instruction.
985 * @param pvFault The fault address.
986 *
987 * @remark The REP prefix check is left to the caller because of STOSD/W.
988 */
989DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
990{
991#ifndef IN_RC
992 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
993 if ( HWACCMHasPendingIrq(pVM)
994 && (pRegFrame->rsp - pvFault) < 32)
995 {
996 /* Fault caused by stack writes while trying to inject an interrupt event. */
997 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
998 return true;
999 }
1000#else
1001 NOREF(pVM); NOREF(pvFault);
1002#endif
1003
1004 switch (pCpu->pCurInstr->opcode)
1005 {
1006 /* call implies the actual push of the return address faulted */
1007 case OP_CALL:
1008 Log4(("pgmPoolMonitorIsReused: CALL\n"));
1009 return true;
1010 case OP_PUSH:
1011 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
1012 return true;
1013 case OP_PUSHF:
1014 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
1015 return true;
1016 case OP_PUSHA:
1017 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
1018 return true;
1019 case OP_FXSAVE:
1020 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
1021 return true;
1022 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
1023 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
1024 return true;
1025 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
1026 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
1027 return true;
1028 case OP_MOVSWD:
1029 case OP_STOSWD:
1030 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
1031 && pRegFrame->rcx >= 0x40
1032 )
1033 {
1034 Assert(pCpu->mode == CPUMODE_64BIT);
1035
1036 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
1037 return true;
1038 }
1039 return false;
1040 }
1041 if ( (pCpu->param1.flags & USE_REG_GEN32)
1042 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
1043 {
1044 Log4(("pgmPoolMonitorIsReused: ESP\n"));
1045 return true;
1046 }
1047
1048 //if (pPage->fCR3Mix)
1049 // return false;
1050 return false;
1051}
1052
1053
1054/**
1055 * Flushes the page being accessed.
1056 *
1057 * @returns VBox status code suitable for scheduling.
1058 * @param pVM The VM handle.
1059 * @param pPool The pool.
1060 * @param pPage The pool page (head).
1061 * @param pCpu The disassembly of the write instruction.
1062 * @param pRegFrame The trap register frame.
1063 * @param GCPhysFault The fault address as guest physical address.
1064 * @param pvFault The fault address.
1065 */
1066static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1067 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1068{
1069 /*
1070 * First, do the flushing.
1071 */
1072 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
1073
1074 /*
1075 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
1076 */
1077 uint32_t cbWritten;
1078 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
1079 if (RT_SUCCESS(rc2))
1080 pRegFrame->rip += pCpu->opsize;
1081 else if (rc2 == VERR_EM_INTERPRETER)
1082 {
1083#ifdef IN_RC
1084 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
1085 {
1086 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
1087 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
1088 rc = VINF_SUCCESS;
1089 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
1090 }
1091 else
1092#endif
1093 {
1094 rc = VINF_EM_RAW_EMULATE_INSTR;
1095 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1096 }
1097 }
1098 else
1099 rc = rc2;
1100
1101 /* See use in pgmPoolAccessHandlerSimple(). */
1102 PGM_INVL_GUEST_TLBS();
1103
1104 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
1105 return rc;
1106
1107}
1108
1109
1110/**
1111 * Handles the STOSD write accesses.
1112 *
1113 * @returns VBox status code suitable for scheduling.
1114 * @param pVM The VM handle.
1115 * @param pPool The pool.
1116 * @param pPage The pool page (head).
1117 * @param pCpu The disassembly of the write instruction.
1118 * @param pRegFrame The trap register frame.
1119 * @param GCPhysFault The fault address as guest physical address.
1120 * @param pvFault The fault address.
1121 */
1122DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1123 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1124{
1125 Assert(pCpu->mode == CPUMODE_32BIT);
1126
1127 Log3(("pgmPoolAccessHandlerSTOSD\n"));
1128
1129 /*
1130 * Increment the modification counter and insert it into the list
1131 * of modified pages the first time.
1132 */
1133 if (!pPage->cModifications++)
1134 pgmPoolMonitorModifiedInsert(pPool, pPage);
1135
1136 /*
1137 * Execute REP STOSD.
1138 *
1139 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1140 * write situation, meaning that it's safe to write here.
1141 */
1142#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1143 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1144#endif
1145 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1146 while (pRegFrame->ecx)
1147 {
1148#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1149 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1150 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1151 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1152#else
1153 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1154#endif
1155#ifdef IN_RC
1156 *(uint32_t *)pu32 = pRegFrame->eax;
1157#else
1158 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
1159#endif
1160 pu32 += 4;
1161 GCPhysFault += 4;
1162 pRegFrame->edi += 4;
1163 pRegFrame->ecx--;
1164 }
1165 pRegFrame->rip += pCpu->opsize;
1166
1167#ifdef IN_RC
1168 /* See use in pgmPoolAccessHandlerSimple(). */
1169 PGM_INVL_GUEST_TLBS();
1170#endif
1171
1172 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1173 return VINF_SUCCESS;
1174}
1175
1176
1177/**
1178 * Handles the simple write accesses.
1179 *
1180 * @returns VBox status code suitable for scheduling.
1181 * @param pVM The VM handle.
1182 * @param pPool The pool.
1183 * @param pPage The pool page (head).
1184 * @param pCpu The disassembly of the write instruction.
1185 * @param pRegFrame The trap register frame.
1186 * @param GCPhysFault The fault address as guest physical address.
1187 * @param pvFault The fault address.
1188 */
1189DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
1190 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1191{
1192 Log3(("pgmPoolAccessHandlerSimple\n"));
1193 /*
1194 * Increment the modification counter and insert it into the list
1195 * of modified pages the first time.
1196 */
1197 if (!pPage->cModifications++)
1198 pgmPoolMonitorModifiedInsert(pPool, pPage);
1199
1200 /*
1201 * Clear all the pages. ASSUMES that pvFault is readable.
1202 */
1203#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1204 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1205 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1206 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1207 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1208#else
1209 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1210#endif
1211
1212 /*
1213 * Interpret the instruction.
1214 */
1215 uint32_t cb;
1216 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1217 if (RT_SUCCESS(rc))
1218 pRegFrame->rip += pCpu->opsize;
1219 else if (rc == VERR_EM_INTERPRETER)
1220 {
1221 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1222 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1223 rc = VINF_EM_RAW_EMULATE_INSTR;
1224 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1225 }
1226
1227#ifdef IN_RC
1228 /*
1229 * Quick hack, with logging enabled we're getting stale
1230 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1231 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1232 * have to be fixed to support this. But that'll have to wait till next week.
1233 *
1234 * An alternative is to keep track of the changed PTEs together with the
1235 * GCPhys from the guest PT. This may proove expensive though.
1236 *
1237 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1238 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1239 */
1240 PGM_INVL_GUEST_TLBS();
1241#endif
1242
1243 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1244 return rc;
1245}
1246
1247/**
1248 * \#PF Handler callback for PT write accesses.
1249 *
1250 * @returns VBox status code (appropriate for GC return).
1251 * @param pVM VM Handle.
1252 * @param uErrorCode CPU Error code.
1253 * @param pRegFrame Trap register frame.
1254 * NULL on DMA and other non CPU access.
1255 * @param pvFault The fault address (cr2).
1256 * @param GCPhysFault The GC physical address corresponding to pvFault.
1257 * @param pvUser User argument.
1258 */
1259DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1260{
1261 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1262 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1263 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1264 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1265
1266 /*
1267 * We should ALWAYS have the list head as user parameter. This
1268 * is because we use that page to record the changes.
1269 */
1270 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1271
1272 /*
1273 * Disassemble the faulting instruction.
1274 */
1275 DISCPUSTATE Cpu;
1276 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1277 AssertRCReturn(rc, rc);
1278
1279 /*
1280 * Check if it's worth dealing with.
1281 */
1282 bool fReused = false;
1283 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1284#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1285 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1286#else
1287 || pPage->fCR3Mix
1288#endif
1289 )
1290 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1291 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1292 {
1293 /*
1294 * Simple instructions, no REP prefix.
1295 */
1296 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1297 {
1298 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1299 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1300 return rc;
1301 }
1302
1303 /*
1304 * Windows is frequently doing small memset() operations (netio test 4k+).
1305 * We have to deal with these or we'll kill the cache and performance.
1306 */
1307 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1308 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1309 && pRegFrame->ecx <= 0x20
1310 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1311 && !((uintptr_t)pvFault & 3)
1312 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1313 && Cpu.mode == CPUMODE_32BIT
1314 && Cpu.opmode == CPUMODE_32BIT
1315 && Cpu.addrmode == CPUMODE_32BIT
1316 && Cpu.prefix == PREFIX_REP
1317 && !pRegFrame->eflags.Bits.u1DF
1318 )
1319 {
1320 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1321 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1322 return rc;
1323 }
1324
1325 /* REP prefix, don't bother. */
1326 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1327 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1328 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1329 }
1330
1331 /*
1332 * Not worth it, so flush it.
1333 *
1334 * If we considered it to be reused, don't to back to ring-3
1335 * to emulate failed instructions since we usually cannot
1336 * interpret then. This may be a bit risky, in which case
1337 * the reuse detection must be fixed.
1338 */
1339 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1340 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1341 rc = VINF_SUCCESS;
1342 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1343 return rc;
1344}
1345
1346# endif /* !IN_RING3 */
1347#endif /* PGMPOOL_WITH_MONITORING */
1348
1349#ifdef PGMPOOL_WITH_CACHE
1350
1351/**
1352 * Inserts a page into the GCPhys hash table.
1353 *
1354 * @param pPool The pool.
1355 * @param pPage The page.
1356 */
1357DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1358{
1359 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1360 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1361 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1362 pPage->iNext = pPool->aiHash[iHash];
1363 pPool->aiHash[iHash] = pPage->idx;
1364}
1365
1366
1367/**
1368 * Removes a page from the GCPhys hash table.
1369 *
1370 * @param pPool The pool.
1371 * @param pPage The page.
1372 */
1373DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1374{
1375 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1376 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1377 if (pPool->aiHash[iHash] == pPage->idx)
1378 pPool->aiHash[iHash] = pPage->iNext;
1379 else
1380 {
1381 uint16_t iPrev = pPool->aiHash[iHash];
1382 for (;;)
1383 {
1384 const int16_t i = pPool->aPages[iPrev].iNext;
1385 if (i == pPage->idx)
1386 {
1387 pPool->aPages[iPrev].iNext = pPage->iNext;
1388 break;
1389 }
1390 if (i == NIL_PGMPOOL_IDX)
1391 {
1392 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1393 break;
1394 }
1395 iPrev = i;
1396 }
1397 }
1398 pPage->iNext = NIL_PGMPOOL_IDX;
1399}
1400
1401
1402/**
1403 * Frees up one cache page.
1404 *
1405 * @returns VBox status code.
1406 * @retval VINF_SUCCESS on success.
1407 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1408 * @param pPool The pool.
1409 * @param iUser The user index.
1410 */
1411static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1412{
1413#ifndef IN_RC
1414 const PVM pVM = pPool->CTX_SUFF(pVM);
1415#endif
1416 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1417 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1418
1419 /*
1420 * Select one page from the tail of the age list.
1421 */
1422 uint16_t iToFree = pPool->iAgeTail;
1423 if (iToFree == iUser)
1424 iToFree = pPool->aPages[iToFree].iAgePrev;
1425/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1426 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1427 {
1428 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1429 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1430 {
1431 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1432 continue;
1433 iToFree = i;
1434 break;
1435 }
1436 }
1437*/
1438
1439 Assert(iToFree != iUser);
1440 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1441
1442 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1443
1444 /*
1445 * Reject any attempts at flushing the currently active shadow CR3 mapping
1446 */
1447#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1448 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1449#else
1450 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1451#endif
1452 {
1453 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1454 LogFlow(("pgmPoolCacheFreeOne refuse CR3 mapping\n"));
1455 pgmPoolCacheUsed(pPool, pPage);
1456 return pgmPoolCacheFreeOne(pPool, iUser);
1457 }
1458
1459 int rc = pgmPoolFlushPage(pPool, pPage);
1460 if (rc == VINF_SUCCESS)
1461 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1462 return rc;
1463}
1464
1465
1466/**
1467 * Checks if a kind mismatch is really a page being reused
1468 * or if it's just normal remappings.
1469 *
1470 * @returns true if reused and the cached page (enmKind1) should be flushed
1471 * @returns false if not reused.
1472 * @param enmKind1 The kind of the cached page.
1473 * @param enmKind2 The kind of the requested page.
1474 */
1475static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1476{
1477 switch (enmKind1)
1478 {
1479 /*
1480 * Never reuse them. There is no remapping in non-paging mode.
1481 */
1482 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1483 case PGMPOOLKIND_32BIT_PD_PHYS:
1484 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1485 case PGMPOOLKIND_PAE_PD_PHYS:
1486 case PGMPOOLKIND_PAE_PDPT_PHYS:
1487 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1488 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1489 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1490 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1491 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1492#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1493 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1494 return false;
1495#else
1496 return true;
1497#endif
1498
1499 /*
1500 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1501 */
1502 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1503 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1504 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1505 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1506 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1507 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1508 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1509 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1510 case PGMPOOLKIND_32BIT_PD:
1511 case PGMPOOLKIND_PAE_PDPT:
1512 switch (enmKind2)
1513 {
1514 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1515 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1516 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1517 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1518 case PGMPOOLKIND_64BIT_PML4:
1519 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1520 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1521 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1522 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1523 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1524 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1525 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1526 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1527 return true;
1528 default:
1529 return false;
1530 }
1531
1532 /*
1533 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1534 */
1535 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1536 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1537 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1538 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1539 case PGMPOOLKIND_64BIT_PML4:
1540 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1541 switch (enmKind2)
1542 {
1543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1544 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1545 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1546 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1547 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1548 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1549 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1550 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1551 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1552 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1553 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1554 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1555 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1556 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1557 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1558 return true;
1559 default:
1560 return false;
1561 }
1562
1563 /*
1564 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1565 */
1566#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1567 case PGMPOOLKIND_ROOT_32BIT_PD:
1568 case PGMPOOLKIND_ROOT_PAE_PD:
1569 case PGMPOOLKIND_ROOT_PDPT:
1570#endif
1571 case PGMPOOLKIND_ROOT_NESTED:
1572 return false;
1573
1574 default:
1575 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1576 }
1577}
1578
1579
1580/**
1581 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1582 *
1583 * @returns VBox status code.
1584 * @retval VINF_PGM_CACHED_PAGE on success.
1585 * @retval VERR_FILE_NOT_FOUND if not found.
1586 * @param pPool The pool.
1587 * @param GCPhys The GC physical address of the page we're gonna shadow.
1588 * @param enmKind The kind of mapping.
1589 * @param iUser The shadow page pool index of the user table.
1590 * @param iUserTable The index into the user table (shadowed).
1591 * @param ppPage Where to store the pointer to the page.
1592 */
1593static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1594{
1595#ifndef IN_RC
1596 const PVM pVM = pPool->CTX_SUFF(pVM);
1597#endif
1598 /*
1599 * Look up the GCPhys in the hash.
1600 */
1601 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1602 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1603 if (i != NIL_PGMPOOL_IDX)
1604 {
1605 do
1606 {
1607 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1608 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1609 if (pPage->GCPhys == GCPhys)
1610 {
1611 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1612 {
1613 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1614 * doesn't flush it in case there are no more free use records.
1615 */
1616 pgmPoolCacheUsed(pPool, pPage);
1617
1618 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1619 if (RT_SUCCESS(rc))
1620 {
1621 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1622 *ppPage = pPage;
1623 STAM_COUNTER_INC(&pPool->StatCacheHits);
1624 return VINF_PGM_CACHED_PAGE;
1625 }
1626 return rc;
1627 }
1628
1629 /*
1630 * The kind is different. In some cases we should now flush the page
1631 * as it has been reused, but in most cases this is normal remapping
1632 * of PDs as PT or big pages using the GCPhys field in a slightly
1633 * different way than the other kinds.
1634 */
1635 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1636 {
1637 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1638 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1639 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1640 break;
1641 }
1642 }
1643
1644 /* next */
1645 i = pPage->iNext;
1646 } while (i != NIL_PGMPOOL_IDX);
1647 }
1648
1649 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1650 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1651 return VERR_FILE_NOT_FOUND;
1652}
1653
1654
1655/**
1656 * Inserts a page into the cache.
1657 *
1658 * @param pPool The pool.
1659 * @param pPage The cached page.
1660 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1661 */
1662static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1663{
1664 /*
1665 * Insert into the GCPhys hash if the page is fit for that.
1666 */
1667 Assert(!pPage->fCached);
1668 if (fCanBeCached)
1669 {
1670 pPage->fCached = true;
1671 pgmPoolHashInsert(pPool, pPage);
1672 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1673 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1674 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1675 }
1676 else
1677 {
1678 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1679 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1680 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1681 }
1682
1683 /*
1684 * Insert at the head of the age list.
1685 */
1686 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1687 pPage->iAgeNext = pPool->iAgeHead;
1688 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1689 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1690 else
1691 pPool->iAgeTail = pPage->idx;
1692 pPool->iAgeHead = pPage->idx;
1693}
1694
1695
1696/**
1697 * Flushes a cached page.
1698 *
1699 * @param pPool The pool.
1700 * @param pPage The cached page.
1701 */
1702static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1703{
1704 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1705
1706 /*
1707 * Remove the page from the hash.
1708 */
1709 if (pPage->fCached)
1710 {
1711 pPage->fCached = false;
1712 pgmPoolHashRemove(pPool, pPage);
1713 }
1714 else
1715 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1716
1717 /*
1718 * Remove it from the age list.
1719 */
1720 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1721 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1722 else
1723 pPool->iAgeTail = pPage->iAgePrev;
1724 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1725 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1726 else
1727 pPool->iAgeHead = pPage->iAgeNext;
1728 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1729 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1730}
1731
1732#endif /* PGMPOOL_WITH_CACHE */
1733#ifdef PGMPOOL_WITH_MONITORING
1734
1735/**
1736 * Looks for pages sharing the monitor.
1737 *
1738 * @returns Pointer to the head page.
1739 * @returns NULL if not found.
1740 * @param pPool The Pool
1741 * @param pNewPage The page which is going to be monitored.
1742 */
1743static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1744{
1745#ifdef PGMPOOL_WITH_CACHE
1746 /*
1747 * Look up the GCPhys in the hash.
1748 */
1749 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1750 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1751 if (i == NIL_PGMPOOL_IDX)
1752 return NULL;
1753 do
1754 {
1755 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1756 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1757 && pPage != pNewPage)
1758 {
1759 switch (pPage->enmKind)
1760 {
1761 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1762 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1763 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1764 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1765 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1766 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1767 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1768 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1769 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1770 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1771 case PGMPOOLKIND_64BIT_PML4:
1772#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1773 case PGMPOOLKIND_32BIT_PD:
1774 case PGMPOOLKIND_PAE_PDPT:
1775#else
1776 case PGMPOOLKIND_ROOT_32BIT_PD:
1777 case PGMPOOLKIND_ROOT_PAE_PD:
1778 case PGMPOOLKIND_ROOT_PDPT:
1779#endif
1780 {
1781 /* find the head */
1782 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1783 {
1784 Assert(pPage->iMonitoredPrev != pPage->idx);
1785 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1786 }
1787 return pPage;
1788 }
1789
1790 /* ignore, no monitoring. */
1791 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1792 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1793 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1794 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1795 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1796 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1797 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1798 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1799 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1800 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1801 case PGMPOOLKIND_ROOT_NESTED:
1802 case PGMPOOLKIND_PAE_PD_PHYS:
1803 case PGMPOOLKIND_PAE_PDPT_PHYS:
1804 case PGMPOOLKIND_32BIT_PD_PHYS:
1805#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1806 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1807#endif
1808 break;
1809 default:
1810 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1811 }
1812 }
1813
1814 /* next */
1815 i = pPage->iNext;
1816 } while (i != NIL_PGMPOOL_IDX);
1817#endif
1818 return NULL;
1819}
1820
1821
1822/**
1823 * Enabled write monitoring of a guest page.
1824 *
1825 * @returns VBox status code.
1826 * @retval VINF_SUCCESS on success.
1827 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1828 * @param pPool The pool.
1829 * @param pPage The cached page.
1830 */
1831static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1832{
1833 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1834
1835 /*
1836 * Filter out the relevant kinds.
1837 */
1838 switch (pPage->enmKind)
1839 {
1840 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1841 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1842 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1843 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1844 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1845 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1846 case PGMPOOLKIND_64BIT_PML4:
1847#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1848 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1849 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1850 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1851 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1852 case PGMPOOLKIND_32BIT_PD:
1853 case PGMPOOLKIND_PAE_PDPT:
1854#else
1855 case PGMPOOLKIND_ROOT_PDPT:
1856#endif
1857 break;
1858
1859 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1860 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1861 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1862 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1863 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1864 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1865 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1866 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1867 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1868 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1869 case PGMPOOLKIND_ROOT_NESTED:
1870 /* Nothing to monitor here. */
1871 return VINF_SUCCESS;
1872
1873#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1874 case PGMPOOLKIND_32BIT_PD_PHYS:
1875 case PGMPOOLKIND_PAE_PDPT_PHYS:
1876 case PGMPOOLKIND_PAE_PD_PHYS:
1877 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1878 /* Nothing to monitor here. */
1879 return VINF_SUCCESS;
1880#else
1881 case PGMPOOLKIND_ROOT_32BIT_PD:
1882 case PGMPOOLKIND_ROOT_PAE_PD:
1883#endif
1884#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1885 break;
1886#else
1887 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1888#endif
1889 default:
1890 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1891 }
1892
1893 /*
1894 * Install handler.
1895 */
1896 int rc;
1897 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1898 if (pPageHead)
1899 {
1900 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1901 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1902 pPage->iMonitoredPrev = pPageHead->idx;
1903 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1904 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1905 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1906 pPageHead->iMonitoredNext = pPage->idx;
1907 rc = VINF_SUCCESS;
1908 }
1909 else
1910 {
1911 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1912 PVM pVM = pPool->CTX_SUFF(pVM);
1913 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1914 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1915 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1916 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1917 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1918 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1919 pPool->pszAccessHandler);
1920 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1921 * the heap size should suffice. */
1922 AssertFatalRC(rc);
1923 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1924 rc = VERR_PGM_POOL_CLEARED;
1925 }
1926 pPage->fMonitored = true;
1927 return rc;
1928}
1929
1930
1931/**
1932 * Disables write monitoring of a guest page.
1933 *
1934 * @returns VBox status code.
1935 * @retval VINF_SUCCESS on success.
1936 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1937 * @param pPool The pool.
1938 * @param pPage The cached page.
1939 */
1940static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1941{
1942 /*
1943 * Filter out the relevant kinds.
1944 */
1945 switch (pPage->enmKind)
1946 {
1947 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1948 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1949 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1950 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1951 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1952 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1953 case PGMPOOLKIND_64BIT_PML4:
1954#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1955 case PGMPOOLKIND_32BIT_PD:
1956 case PGMPOOLKIND_PAE_PDPT:
1957 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1958 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1959 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1960 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1961#else
1962 case PGMPOOLKIND_ROOT_PDPT:
1963#endif
1964 break;
1965
1966 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1967 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1968 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1969 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1970 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1971 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1972 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1973 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1974 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1975 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1976 case PGMPOOLKIND_ROOT_NESTED:
1977 case PGMPOOLKIND_PAE_PD_PHYS:
1978 case PGMPOOLKIND_PAE_PDPT_PHYS:
1979 case PGMPOOLKIND_32BIT_PD_PHYS:
1980 /* Nothing to monitor here. */
1981 return VINF_SUCCESS;
1982
1983#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1984 case PGMPOOLKIND_ROOT_32BIT_PD:
1985 case PGMPOOLKIND_ROOT_PAE_PD:
1986#endif
1987#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1988 break;
1989#endif
1990#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1991 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1992#endif
1993 default:
1994 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1995 }
1996
1997 /*
1998 * Remove the page from the monitored list or uninstall it if last.
1999 */
2000 const PVM pVM = pPool->CTX_SUFF(pVM);
2001 int rc;
2002 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2003 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2004 {
2005 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2006 {
2007 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2008 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2009#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2010 pNewHead->fCR3Mix = pPage->fCR3Mix;
2011#endif
2012 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2013 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2014 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2015 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2016 pPool->pszAccessHandler);
2017 AssertFatalRCSuccess(rc);
2018 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2019 }
2020 else
2021 {
2022 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2023 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2024 {
2025 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2026 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2027 }
2028 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2029 rc = VINF_SUCCESS;
2030 }
2031 }
2032 else
2033 {
2034 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2035 AssertFatalRC(rc);
2036 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2037 rc = VERR_PGM_POOL_CLEARED;
2038 }
2039 pPage->fMonitored = false;
2040
2041 /*
2042 * Remove it from the list of modified pages (if in it).
2043 */
2044 pgmPoolMonitorModifiedRemove(pPool, pPage);
2045
2046 return rc;
2047}
2048
2049# if defined(PGMPOOL_WITH_MIXED_PT_CR3) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2050
2051/**
2052 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
2053 *
2054 * @param pPool The Pool.
2055 * @param pPage A page in the chain.
2056 * @param fCR3Mix The new fCR3Mix value.
2057 */
2058static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
2059{
2060 /* current */
2061 pPage->fCR3Mix = fCR3Mix;
2062
2063 /* before */
2064 int16_t idx = pPage->iMonitoredPrev;
2065 while (idx != NIL_PGMPOOL_IDX)
2066 {
2067 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2068 idx = pPool->aPages[idx].iMonitoredPrev;
2069 }
2070
2071 /* after */
2072 idx = pPage->iMonitoredNext;
2073 while (idx != NIL_PGMPOOL_IDX)
2074 {
2075 pPool->aPages[idx].fCR3Mix = fCR3Mix;
2076 idx = pPool->aPages[idx].iMonitoredNext;
2077 }
2078}
2079
2080
2081/**
2082 * Installs or modifies monitoring of a CR3 page (special).
2083 *
2084 * We're pretending the CR3 page is shadowed by the pool so we can use the
2085 * generic mechanisms in detecting chained monitoring. (This also gives us a
2086 * tast of what code changes are required to really pool CR3 shadow pages.)
2087 *
2088 * @returns VBox status code.
2089 * @param pPool The pool.
2090 * @param idxRoot The CR3 (root) page index.
2091 * @param GCPhysCR3 The (new) CR3 value.
2092 */
2093int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
2094{
2095 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2096 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2097 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
2098 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
2099
2100 /*
2101 * The unlikely case where it already matches.
2102 */
2103 if (pPage->GCPhys == GCPhysCR3)
2104 {
2105 Assert(pPage->fMonitored);
2106 return VINF_SUCCESS;
2107 }
2108
2109 /*
2110 * Flush the current monitoring and remove it from the hash.
2111 */
2112 int rc = VINF_SUCCESS;
2113 if (pPage->fMonitored)
2114 {
2115 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2116 rc = pgmPoolMonitorFlush(pPool, pPage);
2117 if (rc == VERR_PGM_POOL_CLEARED)
2118 rc = VINF_SUCCESS;
2119 else
2120 AssertFatalRC(rc);
2121 pgmPoolHashRemove(pPool, pPage);
2122 }
2123
2124 /*
2125 * Monitor the page at the new location and insert it into the hash.
2126 */
2127 pPage->GCPhys = GCPhysCR3;
2128 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
2129 if (rc2 != VERR_PGM_POOL_CLEARED)
2130 {
2131 AssertFatalRC(rc2);
2132 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
2133 rc = rc2;
2134 }
2135 pgmPoolHashInsert(pPool, pPage);
2136 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
2137 return rc;
2138}
2139
2140
2141/**
2142 * Removes the monitoring of a CR3 page (special).
2143 *
2144 * @returns VBox status code.
2145 * @param pPool The pool.
2146 * @param idxRoot The CR3 (root) page index.
2147 */
2148int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
2149{
2150 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
2151 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
2152 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
2153 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
2154
2155 if (!pPage->fMonitored)
2156 return VINF_SUCCESS;
2157
2158 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
2159 int rc = pgmPoolMonitorFlush(pPool, pPage);
2160 if (rc != VERR_PGM_POOL_CLEARED)
2161 AssertFatalRC(rc);
2162 else
2163 rc = VINF_SUCCESS;
2164 pgmPoolHashRemove(pPool, pPage);
2165 Assert(!pPage->fMonitored);
2166 pPage->GCPhys = NIL_RTGCPHYS;
2167 return rc;
2168}
2169
2170# endif /* PGMPOOL_WITH_MIXED_PT_CR3 && !VBOX_WITH_PGMPOOL_PAGING_ONLY*/
2171
2172/**
2173 * Inserts the page into the list of modified pages.
2174 *
2175 * @param pPool The pool.
2176 * @param pPage The page.
2177 */
2178void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2179{
2180 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2181 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2182 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2183 && pPool->iModifiedHead != pPage->idx,
2184 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2185 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2186 pPool->iModifiedHead, pPool->cModifiedPages));
2187
2188 pPage->iModifiedNext = pPool->iModifiedHead;
2189 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2190 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2191 pPool->iModifiedHead = pPage->idx;
2192 pPool->cModifiedPages++;
2193#ifdef VBOX_WITH_STATISTICS
2194 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2195 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2196#endif
2197}
2198
2199
2200/**
2201 * Removes the page from the list of modified pages and resets the
2202 * moficiation counter.
2203 *
2204 * @param pPool The pool.
2205 * @param pPage The page which is believed to be in the list of modified pages.
2206 */
2207static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2208{
2209 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2210 if (pPool->iModifiedHead == pPage->idx)
2211 {
2212 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2213 pPool->iModifiedHead = pPage->iModifiedNext;
2214 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2215 {
2216 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2217 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2218 }
2219 pPool->cModifiedPages--;
2220 }
2221 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2222 {
2223 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2224 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2225 {
2226 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2227 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2228 }
2229 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2230 pPool->cModifiedPages--;
2231 }
2232 else
2233 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2234 pPage->cModifications = 0;
2235}
2236
2237
2238/**
2239 * Zaps the list of modified pages, resetting their modification counters in the process.
2240 *
2241 * @param pVM The VM handle.
2242 */
2243void pgmPoolMonitorModifiedClearAll(PVM pVM)
2244{
2245 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2246 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2247
2248 unsigned cPages = 0; NOREF(cPages);
2249 uint16_t idx = pPool->iModifiedHead;
2250 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2251 while (idx != NIL_PGMPOOL_IDX)
2252 {
2253 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2254 idx = pPage->iModifiedNext;
2255 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2256 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2257 pPage->cModifications = 0;
2258 Assert(++cPages);
2259 }
2260 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2261 pPool->cModifiedPages = 0;
2262}
2263
2264
2265#ifdef IN_RING3
2266/**
2267 * Clear all shadow pages and clear all modification counters.
2268 *
2269 * @param pVM The VM handle.
2270 * @remark Should only be used when monitoring is available, thus placed in
2271 * the PGMPOOL_WITH_MONITORING #ifdef.
2272 */
2273void pgmPoolClearAll(PVM pVM)
2274{
2275 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2276 STAM_PROFILE_START(&pPool->StatClearAll, c);
2277 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2278
2279 /*
2280 * Iterate all the pages until we've encountered all that in use.
2281 * This is simple but not quite optimal solution.
2282 */
2283 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2284 unsigned cLeft = pPool->cUsedPages;
2285 unsigned iPage = pPool->cCurPages;
2286 while (--iPage >= PGMPOOL_IDX_FIRST)
2287 {
2288 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2289 if (pPage->GCPhys != NIL_RTGCPHYS)
2290 {
2291 switch (pPage->enmKind)
2292 {
2293 /*
2294 * We only care about shadow page tables.
2295 */
2296 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2297 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2298 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2299 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2300 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2301 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2302 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2303 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2304 {
2305#ifdef PGMPOOL_WITH_USER_TRACKING
2306 if (pPage->cPresent)
2307#endif
2308 {
2309 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2310 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2311 ASMMemZeroPage(pvShw);
2312 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2313#ifdef PGMPOOL_WITH_USER_TRACKING
2314 pPage->cPresent = 0;
2315 pPage->iFirstPresent = ~0;
2316#endif
2317 }
2318 }
2319 /* fall thru */
2320
2321 default:
2322 Assert(!pPage->cModifications || ++cModifiedPages);
2323 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2324 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2325 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2326 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2327 pPage->cModifications = 0;
2328 break;
2329
2330 }
2331 if (!--cLeft)
2332 break;
2333 }
2334 }
2335
2336 /* swipe the special pages too. */
2337 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2338 {
2339 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2340 if (pPage->GCPhys != NIL_RTGCPHYS)
2341 {
2342 Assert(!pPage->cModifications || ++cModifiedPages);
2343 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2344 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2345 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2346 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2347 pPage->cModifications = 0;
2348 }
2349 }
2350
2351#ifndef DEBUG_michael
2352 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2353#endif
2354 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2355 pPool->cModifiedPages = 0;
2356
2357#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2358 /*
2359 * Clear all the GCPhys links and rebuild the phys ext free list.
2360 */
2361 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2362 pRam;
2363 pRam = pRam->CTX_SUFF(pNext))
2364 {
2365 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2366 while (iPage-- > 0)
2367 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2368 }
2369
2370 pPool->iPhysExtFreeHead = 0;
2371 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2372 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2373 for (unsigned i = 0; i < cMaxPhysExts; i++)
2374 {
2375 paPhysExts[i].iNext = i + 1;
2376 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2377 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2378 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2379 }
2380 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2381#endif
2382
2383
2384 pPool->cPresent = 0;
2385 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2386}
2387#endif /* IN_RING3 */
2388
2389
2390/**
2391 * Handle SyncCR3 pool tasks
2392 *
2393 * @returns VBox status code.
2394 * @retval VINF_SUCCESS if successfully added.
2395 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2396 * @param pVM The VM handle.
2397 * @remark Should only be used when monitoring is available, thus placed in
2398 * the PGMPOOL_WITH_MONITORING #ifdef.
2399 */
2400int pgmPoolSyncCR3(PVM pVM)
2401{
2402 LogFlow(("pgmPoolSyncCR3\n"));
2403 /*
2404 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2405 * Occasionally we will have to clear all the shadow page tables because we wanted
2406 * to monitor a page which was mapped by too many shadowed page tables. This operation
2407 * sometimes refered to as a 'lightweight flush'.
2408 */
2409 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2410 pgmPoolMonitorModifiedClearAll(pVM);
2411 else
2412 {
2413# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2414 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2415 pgmPoolClearAll(pVM);
2416# else /* !IN_RING3 */
2417 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2418 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2419 return VINF_PGM_SYNC_CR3;
2420# endif /* !IN_RING3 */
2421 }
2422 return VINF_SUCCESS;
2423}
2424
2425#endif /* PGMPOOL_WITH_MONITORING */
2426#ifdef PGMPOOL_WITH_USER_TRACKING
2427
2428/**
2429 * Frees up at least one user entry.
2430 *
2431 * @returns VBox status code.
2432 * @retval VINF_SUCCESS if successfully added.
2433 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2434 * @param pPool The pool.
2435 * @param iUser The user index.
2436 */
2437static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2438{
2439 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2440#ifdef PGMPOOL_WITH_CACHE
2441 /*
2442 * Just free cached pages in a braindead fashion.
2443 */
2444 /** @todo walk the age list backwards and free the first with usage. */
2445 int rc = VINF_SUCCESS;
2446 do
2447 {
2448 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2449 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2450 rc = rc2;
2451 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2452 return rc;
2453#else
2454 /*
2455 * Lazy approach.
2456 */
2457 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2458 Assert(!CPUMIsGuestInLongMode(pVM));
2459 pgmPoolFlushAllInt(pPool);
2460 return VERR_PGM_POOL_FLUSHED;
2461#endif
2462}
2463
2464
2465/**
2466 * Inserts a page into the cache.
2467 *
2468 * This will create user node for the page, insert it into the GCPhys
2469 * hash, and insert it into the age list.
2470 *
2471 * @returns VBox status code.
2472 * @retval VINF_SUCCESS if successfully added.
2473 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2474 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2475 * @param pPool The pool.
2476 * @param pPage The cached page.
2477 * @param GCPhys The GC physical address of the page we're gonna shadow.
2478 * @param iUser The user index.
2479 * @param iUserTable The user table index.
2480 */
2481DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2482{
2483 int rc = VINF_SUCCESS;
2484 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2485
2486 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2487
2488#ifdef VBOX_STRICT
2489 /*
2490 * Check that the entry doesn't already exists.
2491 */
2492 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2493 {
2494 uint16_t i = pPage->iUserHead;
2495 do
2496 {
2497 Assert(i < pPool->cMaxUsers);
2498 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2499 i = paUsers[i].iNext;
2500 } while (i != NIL_PGMPOOL_USER_INDEX);
2501 }
2502#endif
2503
2504 /*
2505 * Find free a user node.
2506 */
2507 uint16_t i = pPool->iUserFreeHead;
2508 if (i == NIL_PGMPOOL_USER_INDEX)
2509 {
2510 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2511 if (RT_FAILURE(rc))
2512 return rc;
2513 i = pPool->iUserFreeHead;
2514 }
2515
2516 /*
2517 * Unlink the user node from the free list,
2518 * initialize and insert it into the user list.
2519 */
2520 pPool->iUserFreeHead = paUsers[i].iNext;
2521 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2522 paUsers[i].iUser = iUser;
2523 paUsers[i].iUserTable = iUserTable;
2524 pPage->iUserHead = i;
2525
2526 /*
2527 * Insert into cache and enable monitoring of the guest page if enabled.
2528 *
2529 * Until we implement caching of all levels, including the CR3 one, we'll
2530 * have to make sure we don't try monitor & cache any recursive reuse of
2531 * a monitored CR3 page. Because all windows versions are doing this we'll
2532 * have to be able to do combined access monitoring, CR3 + PT and
2533 * PD + PT (guest PAE).
2534 *
2535 * Update:
2536 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2537 */
2538#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2539# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2540 const bool fCanBeMonitored = true;
2541# else
2542 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2543 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2544 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2545# endif
2546# ifdef PGMPOOL_WITH_CACHE
2547 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2548# endif
2549 if (fCanBeMonitored)
2550 {
2551# ifdef PGMPOOL_WITH_MONITORING
2552 rc = pgmPoolMonitorInsert(pPool, pPage);
2553 if (rc == VERR_PGM_POOL_CLEARED)
2554 {
2555 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2556# ifndef PGMPOOL_WITH_CACHE
2557 pgmPoolMonitorFlush(pPool, pPage);
2558 rc = VERR_PGM_POOL_FLUSHED;
2559# endif
2560 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2561 paUsers[i].iNext = pPool->iUserFreeHead;
2562 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2563 pPool->iUserFreeHead = i;
2564 }
2565 }
2566# endif
2567#endif /* PGMPOOL_WITH_MONITORING */
2568 return rc;
2569}
2570
2571
2572# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2573/**
2574 * Adds a user reference to a page.
2575 *
2576 * This will move the page to the head of the
2577 *
2578 * @returns VBox status code.
2579 * @retval VINF_SUCCESS if successfully added.
2580 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2581 * @param pPool The pool.
2582 * @param pPage The cached page.
2583 * @param iUser The user index.
2584 * @param iUserTable The user table.
2585 */
2586static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2587{
2588 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2589
2590 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2591# ifdef VBOX_STRICT
2592 /*
2593 * Check that the entry doesn't already exists.
2594 */
2595 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2596 {
2597 uint16_t i = pPage->iUserHead;
2598 do
2599 {
2600 Assert(i < pPool->cMaxUsers);
2601 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2602 i = paUsers[i].iNext;
2603 } while (i != NIL_PGMPOOL_USER_INDEX);
2604 }
2605# endif
2606
2607 /*
2608 * Allocate a user node.
2609 */
2610 uint16_t i = pPool->iUserFreeHead;
2611 if (i == NIL_PGMPOOL_USER_INDEX)
2612 {
2613 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2614 if (RT_FAILURE(rc))
2615 return rc;
2616 i = pPool->iUserFreeHead;
2617 }
2618 pPool->iUserFreeHead = paUsers[i].iNext;
2619
2620 /*
2621 * Initialize the user node and insert it.
2622 */
2623 paUsers[i].iNext = pPage->iUserHead;
2624 paUsers[i].iUser = iUser;
2625 paUsers[i].iUserTable = iUserTable;
2626 pPage->iUserHead = i;
2627
2628# ifdef PGMPOOL_WITH_CACHE
2629 /*
2630 * Tell the cache to update its replacement stats for this page.
2631 */
2632 pgmPoolCacheUsed(pPool, pPage);
2633# endif
2634 return VINF_SUCCESS;
2635}
2636# endif /* PGMPOOL_WITH_CACHE */
2637
2638
2639/**
2640 * Frees a user record associated with a page.
2641 *
2642 * This does not clear the entry in the user table, it simply replaces the
2643 * user record to the chain of free records.
2644 *
2645 * @param pPool The pool.
2646 * @param HCPhys The HC physical address of the shadow page.
2647 * @param iUser The shadow page pool index of the user table.
2648 * @param iUserTable The index into the user table (shadowed).
2649 */
2650static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2651{
2652 /*
2653 * Unlink and free the specified user entry.
2654 */
2655 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2656
2657 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2658 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2659 uint16_t i = pPage->iUserHead;
2660 if ( i != NIL_PGMPOOL_USER_INDEX
2661 && paUsers[i].iUser == iUser
2662 && paUsers[i].iUserTable == iUserTable)
2663 {
2664 pPage->iUserHead = paUsers[i].iNext;
2665
2666 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2667 paUsers[i].iNext = pPool->iUserFreeHead;
2668 pPool->iUserFreeHead = i;
2669 return;
2670 }
2671
2672 /* General: Linear search. */
2673 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2674 while (i != NIL_PGMPOOL_USER_INDEX)
2675 {
2676 if ( paUsers[i].iUser == iUser
2677 && paUsers[i].iUserTable == iUserTable)
2678 {
2679 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2680 paUsers[iPrev].iNext = paUsers[i].iNext;
2681 else
2682 pPage->iUserHead = paUsers[i].iNext;
2683
2684 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2685 paUsers[i].iNext = pPool->iUserFreeHead;
2686 pPool->iUserFreeHead = i;
2687 return;
2688 }
2689 iPrev = i;
2690 i = paUsers[i].iNext;
2691 }
2692
2693 /* Fatal: didn't find it */
2694 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2695 iUser, iUserTable, pPage->GCPhys));
2696}
2697
2698
2699/**
2700 * Gets the entry size of a shadow table.
2701 *
2702 * @param enmKind The kind of page.
2703 *
2704 * @returns The size of the entry in bytes. That is, 4 or 8.
2705 * @returns If the kind is not for a table, an assertion is raised and 0 is
2706 * returned.
2707 */
2708DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2709{
2710 switch (enmKind)
2711 {
2712 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2713 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2714 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2715#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2716 case PGMPOOLKIND_32BIT_PD:
2717 case PGMPOOLKIND_32BIT_PD_PHYS:
2718#else
2719 case PGMPOOLKIND_ROOT_32BIT_PD:
2720#endif
2721 return 4;
2722
2723 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2724 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2725 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2726 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2727 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2728 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2729 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2730 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2731 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2733 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2734 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2735 case PGMPOOLKIND_64BIT_PML4:
2736#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2737 case PGMPOOLKIND_ROOT_PAE_PD:
2738 case PGMPOOLKIND_ROOT_PDPT:
2739#endif
2740 case PGMPOOLKIND_PAE_PDPT:
2741 case PGMPOOLKIND_ROOT_NESTED:
2742 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2743 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2744 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2745 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2746 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2747 case PGMPOOLKIND_PAE_PD_PHYS:
2748 case PGMPOOLKIND_PAE_PDPT_PHYS:
2749 return 8;
2750
2751 default:
2752 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2753 }
2754}
2755
2756
2757/**
2758 * Gets the entry size of a guest table.
2759 *
2760 * @param enmKind The kind of page.
2761 *
2762 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2763 * @returns If the kind is not for a table, an assertion is raised and 0 is
2764 * returned.
2765 */
2766DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2767{
2768 switch (enmKind)
2769 {
2770 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2771 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2772#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2773 case PGMPOOLKIND_32BIT_PD:
2774#else
2775 case PGMPOOLKIND_ROOT_32BIT_PD:
2776#endif
2777 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2778 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2779 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2780 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2781 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2782 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2783 return 4;
2784
2785 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2786 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2787 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2788 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2789 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2790 case PGMPOOLKIND_64BIT_PML4:
2791#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2792 case PGMPOOLKIND_PAE_PDPT:
2793#else
2794 case PGMPOOLKIND_ROOT_PAE_PD:
2795 case PGMPOOLKIND_ROOT_PDPT:
2796#endif
2797 return 8;
2798
2799 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2800 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2801 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2802 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2803 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2804 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2805 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2806 case PGMPOOLKIND_ROOT_NESTED:
2807 case PGMPOOLKIND_PAE_PD_PHYS:
2808 case PGMPOOLKIND_PAE_PDPT_PHYS:
2809 case PGMPOOLKIND_32BIT_PD_PHYS:
2810 /** @todo can we return 0? (nobody is calling this...) */
2811 AssertFailed();
2812 return 0;
2813
2814 default:
2815 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2816 }
2817}
2818
2819#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2820
2821/**
2822 * Scans one shadow page table for mappings of a physical page.
2823 *
2824 * @param pVM The VM handle.
2825 * @param pPhysPage The guest page in question.
2826 * @param iShw The shadow page table.
2827 * @param cRefs The number of references made in that PT.
2828 */
2829static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2830{
2831 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2832 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2833
2834 /*
2835 * Assert sanity.
2836 */
2837 Assert(cRefs == 1);
2838 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2839 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2840
2841 /*
2842 * Then, clear the actual mappings to the page in the shadow PT.
2843 */
2844 switch (pPage->enmKind)
2845 {
2846 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2847 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2848 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2849 {
2850 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2851 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2852 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2853 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2854 {
2855 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2856 pPT->a[i].u = 0;
2857 cRefs--;
2858 if (!cRefs)
2859 return;
2860 }
2861#ifdef LOG_ENABLED
2862 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2863 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2864 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2865 {
2866 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2867 pPT->a[i].u = 0;
2868 }
2869#endif
2870 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2871 break;
2872 }
2873
2874 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2875 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2876 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2877 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2878 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2879 {
2880 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2881 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2882 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2883 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2884 {
2885 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2886 pPT->a[i].u = 0;
2887 cRefs--;
2888 if (!cRefs)
2889 return;
2890 }
2891#ifdef LOG_ENABLED
2892 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2893 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2894 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2895 {
2896 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2897 pPT->a[i].u = 0;
2898 }
2899#endif
2900 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2901 break;
2902 }
2903
2904 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2905 {
2906 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2907 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2908 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2909 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2910 {
2911 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2912 pPT->a[i].u = 0;
2913 cRefs--;
2914 if (!cRefs)
2915 return;
2916 }
2917#ifdef LOG_ENABLED
2918 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2919 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2920 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2921 {
2922 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2923 pPT->a[i].u = 0;
2924 }
2925#endif
2926 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2927 break;
2928 }
2929
2930 default:
2931 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2932 }
2933}
2934
2935
2936/**
2937 * Scans one shadow page table for mappings of a physical page.
2938 *
2939 * @param pVM The VM handle.
2940 * @param pPhysPage The guest page in question.
2941 * @param iShw The shadow page table.
2942 * @param cRefs The number of references made in that PT.
2943 */
2944void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2945{
2946 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2947 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2948 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2949 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2950 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2951 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2952}
2953
2954
2955/**
2956 * Flushes a list of shadow page tables mapping the same physical page.
2957 *
2958 * @param pVM The VM handle.
2959 * @param pPhysPage The guest page in question.
2960 * @param iPhysExt The physical cross reference extent list to flush.
2961 */
2962void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2963{
2964 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2965 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2966 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
2967
2968 const uint16_t iPhysExtStart = iPhysExt;
2969 PPGMPOOLPHYSEXT pPhysExt;
2970 do
2971 {
2972 Assert(iPhysExt < pPool->cMaxPhysExts);
2973 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2974 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2975 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2976 {
2977 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2978 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2979 }
2980
2981 /* next */
2982 iPhysExt = pPhysExt->iNext;
2983 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2984
2985 /* insert the list into the free list and clear the ram range entry. */
2986 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2987 pPool->iPhysExtFreeHead = iPhysExtStart;
2988 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2989
2990 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2991}
2992
2993#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2994
2995/**
2996 * Scans all shadow page tables for mappings of a physical page.
2997 *
2998 * This may be slow, but it's most likely more efficient than cleaning
2999 * out the entire page pool / cache.
3000 *
3001 * @returns VBox status code.
3002 * @retval VINF_SUCCESS if all references has been successfully cleared.
3003 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3004 * a page pool cleaning.
3005 *
3006 * @param pVM The VM handle.
3007 * @param pPhysPage The guest page in question.
3008 */
3009int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3010{
3011 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3012 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3013 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3014 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3015
3016#if 1
3017 /*
3018 * There is a limit to what makes sense.
3019 */
3020 if (pPool->cPresent > 1024)
3021 {
3022 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3023 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3024 return VINF_PGM_GCPHYS_ALIASED;
3025 }
3026#endif
3027
3028 /*
3029 * Iterate all the pages until we've encountered all that in use.
3030 * This is simple but not quite optimal solution.
3031 */
3032 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3033 const uint32_t u32 = u64;
3034 unsigned cLeft = pPool->cUsedPages;
3035 unsigned iPage = pPool->cCurPages;
3036 while (--iPage >= PGMPOOL_IDX_FIRST)
3037 {
3038 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3039 if (pPage->GCPhys != NIL_RTGCPHYS)
3040 {
3041 switch (pPage->enmKind)
3042 {
3043 /*
3044 * We only care about shadow page tables.
3045 */
3046 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3047 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3048 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3049 {
3050 unsigned cPresent = pPage->cPresent;
3051 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3052 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3053 if (pPT->a[i].n.u1Present)
3054 {
3055 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3056 {
3057 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3058 pPT->a[i].u = 0;
3059 }
3060 if (!--cPresent)
3061 break;
3062 }
3063 break;
3064 }
3065
3066 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3067 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3068 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3069 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3070 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3071 {
3072 unsigned cPresent = pPage->cPresent;
3073 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3074 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3075 if (pPT->a[i].n.u1Present)
3076 {
3077 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3078 {
3079 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3080 pPT->a[i].u = 0;
3081 }
3082 if (!--cPresent)
3083 break;
3084 }
3085 break;
3086 }
3087 }
3088 if (!--cLeft)
3089 break;
3090 }
3091 }
3092
3093 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3094 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3095 return VINF_SUCCESS;
3096}
3097
3098
3099/**
3100 * Clears the user entry in a user table.
3101 *
3102 * This is used to remove all references to a page when flushing it.
3103 */
3104static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3105{
3106 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3107 Assert(pUser->iUser < pPool->cCurPages);
3108 uint32_t iUserTable = pUser->iUserTable;
3109
3110 /*
3111 * Map the user page.
3112 */
3113 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3114#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3115 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
3116 {
3117 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
3118 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
3119 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
3120 iUserTable %= X86_PG_PAE_ENTRIES;
3121 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
3122 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
3123 }
3124#endif
3125 union
3126 {
3127 uint64_t *pau64;
3128 uint32_t *pau32;
3129 } u;
3130 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3131
3132 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3133
3134 /* Safety precaution in case we change the paging for other modes too in the future. */
3135#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3136 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3137#else
3138 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
3139#endif
3140
3141#ifdef VBOX_STRICT
3142 /*
3143 * Some sanity checks.
3144 */
3145 switch (pUserPage->enmKind)
3146 {
3147# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3148 case PGMPOOLKIND_32BIT_PD:
3149 case PGMPOOLKIND_32BIT_PD_PHYS:
3150 Assert(iUserTable < X86_PG_ENTRIES);
3151 break;
3152# else
3153 case PGMPOOLKIND_ROOT_32BIT_PD:
3154 Assert(iUserTable < X86_PG_ENTRIES);
3155 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
3156 break;
3157# endif
3158# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3159 case PGMPOOLKIND_ROOT_PAE_PD:
3160 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
3161 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
3162 break;
3163# endif
3164# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3165 case PGMPOOLKIND_PAE_PDPT:
3166 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3167 case PGMPOOLKIND_PAE_PDPT_PHYS:
3168# else
3169 case PGMPOOLKIND_ROOT_PDPT:
3170# endif
3171 Assert(iUserTable < 4);
3172 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3173 break;
3174 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3175 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3176 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3177 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3178 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3179 case PGMPOOLKIND_PAE_PD_PHYS:
3180 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3181 break;
3182 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3183 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3184 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3185 break;
3186 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3187 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3188 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3189 break;
3190 case PGMPOOLKIND_64BIT_PML4:
3191 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3192 /* GCPhys >> PAGE_SHIFT is the index here */
3193 break;
3194 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3195 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3196 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3197 break;
3198
3199 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3200 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3201 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3202 break;
3203
3204 case PGMPOOLKIND_ROOT_NESTED:
3205 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3206 break;
3207
3208 default:
3209 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3210 break;
3211 }
3212#endif /* VBOX_STRICT */
3213
3214 /*
3215 * Clear the entry in the user page.
3216 */
3217 switch (pUserPage->enmKind)
3218 {
3219 /* 32-bit entries */
3220#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3221 case PGMPOOLKIND_32BIT_PD:
3222 case PGMPOOLKIND_32BIT_PD_PHYS:
3223#else
3224 case PGMPOOLKIND_ROOT_32BIT_PD:
3225#endif
3226 u.pau32[iUserTable] = 0;
3227 break;
3228
3229 /* 64-bit entries */
3230 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3231 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3232 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3233 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3234 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3235#if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3236 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3237 * non-present PDPT will continue to cause page faults.
3238 */
3239 ASMReloadCR3();
3240#endif
3241 /* no break */
3242 case PGMPOOLKIND_PAE_PD_PHYS:
3243 case PGMPOOLKIND_PAE_PDPT_PHYS:
3244 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3245 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3246 case PGMPOOLKIND_64BIT_PML4:
3247 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3248 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3249#if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
3250 case PGMPOOLKIND_ROOT_PAE_PD:
3251#endif
3252#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3253 case PGMPOOLKIND_PAE_PDPT:
3254 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3255#else
3256 case PGMPOOLKIND_ROOT_PDPT:
3257#endif
3258 case PGMPOOLKIND_ROOT_NESTED:
3259 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3260 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3261 u.pau64[iUserTable] = 0;
3262 break;
3263
3264 default:
3265 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3266 }
3267}
3268
3269
3270/**
3271 * Clears all users of a page.
3272 */
3273static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3274{
3275 /*
3276 * Free all the user records.
3277 */
3278 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3279
3280 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3281 uint16_t i = pPage->iUserHead;
3282 while (i != NIL_PGMPOOL_USER_INDEX)
3283 {
3284 /* Clear enter in user table. */
3285 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3286
3287 /* Free it. */
3288 const uint16_t iNext = paUsers[i].iNext;
3289 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3290 paUsers[i].iNext = pPool->iUserFreeHead;
3291 pPool->iUserFreeHead = i;
3292
3293 /* Next. */
3294 i = iNext;
3295 }
3296 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3297}
3298
3299#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3300
3301/**
3302 * Allocates a new physical cross reference extent.
3303 *
3304 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3305 * @param pVM The VM handle.
3306 * @param piPhysExt Where to store the phys ext index.
3307 */
3308PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3309{
3310 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3311 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3312 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3313 {
3314 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3315 return NULL;
3316 }
3317 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3318 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3319 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3320 *piPhysExt = iPhysExt;
3321 return pPhysExt;
3322}
3323
3324
3325/**
3326 * Frees a physical cross reference extent.
3327 *
3328 * @param pVM The VM handle.
3329 * @param iPhysExt The extent to free.
3330 */
3331void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3332{
3333 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3334 Assert(iPhysExt < pPool->cMaxPhysExts);
3335 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3336 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3337 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3338 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3339 pPool->iPhysExtFreeHead = iPhysExt;
3340}
3341
3342
3343/**
3344 * Frees a physical cross reference extent.
3345 *
3346 * @param pVM The VM handle.
3347 * @param iPhysExt The extent to free.
3348 */
3349void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3350{
3351 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3352
3353 const uint16_t iPhysExtStart = iPhysExt;
3354 PPGMPOOLPHYSEXT pPhysExt;
3355 do
3356 {
3357 Assert(iPhysExt < pPool->cMaxPhysExts);
3358 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3359 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3360 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3361
3362 /* next */
3363 iPhysExt = pPhysExt->iNext;
3364 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3365
3366 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3367 pPool->iPhysExtFreeHead = iPhysExtStart;
3368}
3369
3370
3371/**
3372 * Insert a reference into a list of physical cross reference extents.
3373 *
3374 * @returns The new tracking data for PGMPAGE.
3375 *
3376 * @param pVM The VM handle.
3377 * @param iPhysExt The physical extent index of the list head.
3378 * @param iShwPT The shadow page table index.
3379 *
3380 */
3381static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3382{
3383 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3384 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3385
3386 /* special common case. */
3387 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3388 {
3389 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3390 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3391 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3392 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3393 }
3394
3395 /* general treatment. */
3396 const uint16_t iPhysExtStart = iPhysExt;
3397 unsigned cMax = 15;
3398 for (;;)
3399 {
3400 Assert(iPhysExt < pPool->cMaxPhysExts);
3401 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3402 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3403 {
3404 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3405 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3406 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3407 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3408 }
3409 if (!--cMax)
3410 {
3411 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3412 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3413 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3414 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3415 }
3416 }
3417
3418 /* add another extent to the list. */
3419 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3420 if (!pNew)
3421 {
3422 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3423 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3424 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3425 }
3426 pNew->iNext = iPhysExtStart;
3427 pNew->aidx[0] = iShwPT;
3428 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3429 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3430}
3431
3432
3433/**
3434 * Add a reference to guest physical page where extents are in use.
3435 *
3436 * @returns The new tracking data for PGMPAGE.
3437 *
3438 * @param pVM The VM handle.
3439 * @param u16 The ram range flags (top 16-bits).
3440 * @param iShwPT The shadow page table index.
3441 */
3442uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3443{
3444 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3445 {
3446 /*
3447 * Convert to extent list.
3448 */
3449 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3450 uint16_t iPhysExt;
3451 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3452 if (pPhysExt)
3453 {
3454 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3455 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3456 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3457 pPhysExt->aidx[1] = iShwPT;
3458 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3459 }
3460 else
3461 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3462 }
3463 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3464 {
3465 /*
3466 * Insert into the extent list.
3467 */
3468 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3469 }
3470 else
3471 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3472 return u16;
3473}
3474
3475
3476/**
3477 * Clear references to guest physical memory.
3478 *
3479 * @param pPool The pool.
3480 * @param pPage The page.
3481 * @param pPhysPage Pointer to the aPages entry in the ram range.
3482 */
3483void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3484{
3485 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3486 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3487
3488 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3489 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3490 {
3491 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3492 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3493 do
3494 {
3495 Assert(iPhysExt < pPool->cMaxPhysExts);
3496
3497 /*
3498 * Look for the shadow page and check if it's all freed.
3499 */
3500 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3501 {
3502 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3503 {
3504 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3505
3506 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3507 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3508 {
3509 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3510 return;
3511 }
3512
3513 /* we can free the node. */
3514 PVM pVM = pPool->CTX_SUFF(pVM);
3515 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3516 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3517 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3518 {
3519 /* lonely node */
3520 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3521 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3522 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3523 }
3524 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3525 {
3526 /* head */
3527 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3528 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3529 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3530 }
3531 else
3532 {
3533 /* in list */
3534 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3535 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3536 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3537 }
3538 iPhysExt = iPhysExtNext;
3539 return;
3540 }
3541 }
3542
3543 /* next */
3544 iPhysExtPrev = iPhysExt;
3545 iPhysExt = paPhysExts[iPhysExt].iNext;
3546 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3547
3548 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3549 }
3550 else /* nothing to do */
3551 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3552}
3553
3554
3555/**
3556 * Clear references to guest physical memory.
3557 *
3558 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3559 * is assumed to be correct, so the linear search can be skipped and we can assert
3560 * at an earlier point.
3561 *
3562 * @param pPool The pool.
3563 * @param pPage The page.
3564 * @param HCPhys The host physical address corresponding to the guest page.
3565 * @param GCPhys The guest physical address corresponding to HCPhys.
3566 */
3567static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3568{
3569 /*
3570 * Walk range list.
3571 */
3572 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3573 while (pRam)
3574 {
3575 RTGCPHYS off = GCPhys - pRam->GCPhys;
3576 if (off < pRam->cb)
3577 {
3578 /* does it match? */
3579 const unsigned iPage = off >> PAGE_SHIFT;
3580 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3581#ifdef LOG_ENABLED
3582RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3583Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3584#endif
3585 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3586 {
3587 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3588 return;
3589 }
3590 break;
3591 }
3592 pRam = pRam->CTX_SUFF(pNext);
3593 }
3594 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3595}
3596
3597
3598/**
3599 * Clear references to guest physical memory.
3600 *
3601 * @param pPool The pool.
3602 * @param pPage The page.
3603 * @param HCPhys The host physical address corresponding to the guest page.
3604 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3605 */
3606static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3607{
3608 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3609
3610 /*
3611 * Walk range list.
3612 */
3613 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3614 while (pRam)
3615 {
3616 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3617 if (off < pRam->cb)
3618 {
3619 /* does it match? */
3620 const unsigned iPage = off >> PAGE_SHIFT;
3621 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3622 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3623 {
3624 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3625 return;
3626 }
3627 break;
3628 }
3629 pRam = pRam->CTX_SUFF(pNext);
3630 }
3631
3632 /*
3633 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3634 */
3635 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3636 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3637 while (pRam)
3638 {
3639 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3640 while (iPage-- > 0)
3641 {
3642 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3643 {
3644 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3645 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3646 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3647 return;
3648 }
3649 }
3650 pRam = pRam->CTX_SUFF(pNext);
3651 }
3652
3653 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3654}
3655
3656
3657/**
3658 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3659 *
3660 * @param pPool The pool.
3661 * @param pPage The page.
3662 * @param pShwPT The shadow page table (mapping of the page).
3663 * @param pGstPT The guest page table.
3664 */
3665DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3666{
3667 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3668 if (pShwPT->a[i].n.u1Present)
3669 {
3670 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3671 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3672 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3673 if (!--pPage->cPresent)
3674 break;
3675 }
3676}
3677
3678
3679/**
3680 * Clear references to guest physical memory in a PAE / 32-bit page table.
3681 *
3682 * @param pPool The pool.
3683 * @param pPage The page.
3684 * @param pShwPT The shadow page table (mapping of the page).
3685 * @param pGstPT The guest page table (just a half one).
3686 */
3687DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3688{
3689 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3690 if (pShwPT->a[i].n.u1Present)
3691 {
3692 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3693 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3694 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3695 }
3696}
3697
3698
3699/**
3700 * Clear references to guest physical memory in a PAE / PAE page table.
3701 *
3702 * @param pPool The pool.
3703 * @param pPage The page.
3704 * @param pShwPT The shadow page table (mapping of the page).
3705 * @param pGstPT The guest page table.
3706 */
3707DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3708{
3709 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3710 if (pShwPT->a[i].n.u1Present)
3711 {
3712 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3713 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3714 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3715 }
3716}
3717
3718
3719/**
3720 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3721 *
3722 * @param pPool The pool.
3723 * @param pPage The page.
3724 * @param pShwPT The shadow page table (mapping of the page).
3725 */
3726DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3727{
3728 RTGCPHYS GCPhys = pPage->GCPhys;
3729 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3730 if (pShwPT->a[i].n.u1Present)
3731 {
3732 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3733 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3734 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3735 }
3736}
3737
3738
3739/**
3740 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3741 *
3742 * @param pPool The pool.
3743 * @param pPage The page.
3744 * @param pShwPT The shadow page table (mapping of the page).
3745 */
3746DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3747{
3748 RTGCPHYS GCPhys = pPage->GCPhys;
3749 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3750 if (pShwPT->a[i].n.u1Present)
3751 {
3752 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3753 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3754 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3755 }
3756}
3757
3758#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3759
3760
3761#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3762/**
3763 * Clear references to shadowed pages in a 32 bits page directory.
3764 *
3765 * @param pPool The pool.
3766 * @param pPage The page.
3767 * @param pShwPD The shadow page directory (mapping of the page).
3768 */
3769DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3770{
3771 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3772 {
3773 if ( pShwPD->a[i].n.u1Present
3774 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3775 )
3776 {
3777 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3778 if (pSubPage)
3779 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3780 else
3781 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3782 }
3783 }
3784}
3785#endif
3786
3787/**
3788 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3789 *
3790 * @param pPool The pool.
3791 * @param pPage The page.
3792 * @param pShwPD The shadow page directory (mapping of the page).
3793 */
3794DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3795{
3796 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3797 {
3798 if ( pShwPD->a[i].n.u1Present
3799#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3800 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3801#endif
3802 )
3803 {
3804 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3805 if (pSubPage)
3806 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3807 else
3808 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3809 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3810 }
3811 }
3812}
3813
3814
3815/**
3816 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3817 *
3818 * @param pPool The pool.
3819 * @param pPage The page.
3820 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3821 */
3822DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3823{
3824 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3825 {
3826 if ( pShwPDPT->a[i].n.u1Present
3827#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3828 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3829#endif
3830 )
3831 {
3832 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3833 if (pSubPage)
3834 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3835 else
3836 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3837 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3838 }
3839 }
3840}
3841
3842
3843/**
3844 * Clear references to shadowed pages in a 64-bit level 4 page table.
3845 *
3846 * @param pPool The pool.
3847 * @param pPage The page.
3848 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3849 */
3850DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3851{
3852 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3853 {
3854 if (pShwPML4->a[i].n.u1Present)
3855 {
3856 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3857 if (pSubPage)
3858 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3859 else
3860 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3861 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3862 }
3863 }
3864}
3865
3866
3867/**
3868 * Clear references to shadowed pages in an EPT page table.
3869 *
3870 * @param pPool The pool.
3871 * @param pPage The page.
3872 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3873 */
3874DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3875{
3876 RTGCPHYS GCPhys = pPage->GCPhys;
3877 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3878 if (pShwPT->a[i].n.u1Present)
3879 {
3880 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3881 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3882 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3883 }
3884}
3885
3886
3887/**
3888 * Clear references to shadowed pages in an EPT page directory.
3889 *
3890 * @param pPool The pool.
3891 * @param pPage The page.
3892 * @param pShwPD The shadow page directory (mapping of the page).
3893 */
3894DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3895{
3896 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3897 {
3898 if (pShwPD->a[i].n.u1Present)
3899 {
3900 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3901 if (pSubPage)
3902 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3903 else
3904 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3905 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3906 }
3907 }
3908}
3909
3910
3911/**
3912 * Clear references to shadowed pages in an EPT page directory pointer table.
3913 *
3914 * @param pPool The pool.
3915 * @param pPage The page.
3916 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3917 */
3918DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3919{
3920 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3921 {
3922 if (pShwPDPT->a[i].n.u1Present)
3923 {
3924 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3925 if (pSubPage)
3926 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3927 else
3928 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3929 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3930 }
3931 }
3932}
3933
3934
3935/**
3936 * Clears all references made by this page.
3937 *
3938 * This includes other shadow pages and GC physical addresses.
3939 *
3940 * @param pPool The pool.
3941 * @param pPage The page.
3942 */
3943static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3944{
3945 /*
3946 * Map the shadow page and take action according to the page kind.
3947 */
3948 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
3949 switch (pPage->enmKind)
3950 {
3951#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3952 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3953 {
3954 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3955 void *pvGst;
3956 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3957 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3958 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3959 break;
3960 }
3961
3962 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3963 {
3964 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3965 void *pvGst;
3966 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3967 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3968 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3969 break;
3970 }
3971
3972 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3973 {
3974 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3975 void *pvGst;
3976 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3977 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3978 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3979 break;
3980 }
3981
3982 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3983 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3984 {
3985 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3986 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3987 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3988 break;
3989 }
3990
3991 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3992 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3993 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3994 {
3995 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3996 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3997 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3998 break;
3999 }
4000
4001#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4002 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4003 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4005 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4006 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4007 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4008 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4009 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4010 break;
4011#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4012
4013 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4014 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4015 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4016 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4017 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4018 case PGMPOOLKIND_PAE_PD_PHYS:
4019 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4020 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4021 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4022 break;
4023
4024#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4025 case PGMPOOLKIND_32BIT_PD_PHYS:
4026 case PGMPOOLKIND_32BIT_PD:
4027 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4028 break;
4029
4030 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4031 case PGMPOOLKIND_PAE_PDPT:
4032 case PGMPOOLKIND_PAE_PDPT_PHYS:
4033#endif
4034 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4035 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4036 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4037 break;
4038
4039 case PGMPOOLKIND_64BIT_PML4:
4040 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4041 break;
4042
4043 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4044 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4045 break;
4046
4047 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4048 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4049 break;
4050
4051 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4052 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4053 break;
4054
4055 default:
4056 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4057 }
4058
4059 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4060 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4061 ASMMemZeroPage(pvShw);
4062 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4063 pPage->fZeroed = true;
4064 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4065}
4066
4067#endif /* PGMPOOL_WITH_USER_TRACKING */
4068
4069/**
4070 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
4071 *
4072 * @param pPool The pool.
4073 */
4074static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
4075{
4076#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4077 /* Start a subset so we won't run out of mapping space. */
4078 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4079 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4080#endif
4081
4082 /*
4083 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
4084 */
4085 Assert(NIL_PGMPOOL_IDX == 0);
4086#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4087 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
4088 {
4089 /*
4090 * Get the page address.
4091 */
4092 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4093 union
4094 {
4095 uint64_t *pau64;
4096 uint32_t *pau32;
4097 } u;
4098
4099 /*
4100 * Mark stuff not present.
4101 */
4102 switch (pPage->enmKind)
4103 {
4104 case PGMPOOLKIND_ROOT_32BIT_PD:
4105 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4106 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
4107 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4108 u.pau32[iPage] = 0;
4109 break;
4110
4111 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4112 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4113 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
4114 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
4115 u.pau64[iPage] = 0;
4116 break;
4117
4118 case PGMPOOLKIND_ROOT_PDPT:
4119 /* Not root of shadowed pages currently, ignore it. */
4120 break;
4121
4122 case PGMPOOLKIND_ROOT_NESTED:
4123 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
4124 ASMMemZero32(u.pau64, PAGE_SIZE);
4125 break;
4126 }
4127 }
4128#endif
4129
4130 /*
4131 * Paranoia (to be removed), flag a global CR3 sync.
4132 */
4133 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4134
4135#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4136 /* Pop the subset. */
4137 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4138#endif
4139}
4140
4141
4142/**
4143 * Flushes the entire cache.
4144 *
4145 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4146 * and execute this CR3 flush.
4147 *
4148 * @param pPool The pool.
4149 */
4150static void pgmPoolFlushAllInt(PPGMPOOL pPool)
4151{
4152 PVM pVM = pPool->CTX_SUFF(pVM);
4153
4154 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4155 LogFlow(("pgmPoolFlushAllInt:\n"));
4156
4157 /*
4158 * If there are no pages in the pool, there is nothing to do.
4159 */
4160 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4161 {
4162 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4163 return;
4164 }
4165
4166#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4167 /* Unmap the old CR3 value before flushing everything. */
4168 int rc = PGM_BTH_PFN(UnmapCR3, pVM)(pVM);
4169 AssertRC(rc);
4170#endif
4171
4172 /*
4173 * Nuke the free list and reinsert all pages into it.
4174 */
4175 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4176 {
4177 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4178
4179#ifdef IN_RING3
4180 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4181#endif
4182#ifdef PGMPOOL_WITH_MONITORING
4183 if (pPage->fMonitored)
4184 pgmPoolMonitorFlush(pPool, pPage);
4185 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4186 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4187 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4188 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4189 pPage->cModifications = 0;
4190#endif
4191 pPage->GCPhys = NIL_RTGCPHYS;
4192 pPage->enmKind = PGMPOOLKIND_FREE;
4193 Assert(pPage->idx == i);
4194 pPage->iNext = i + 1;
4195 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4196 pPage->fSeenNonGlobal = false;
4197 pPage->fMonitored= false;
4198 pPage->fCached = false;
4199 pPage->fReusedFlushPending = false;
4200#ifdef PGMPOOL_WITH_USER_TRACKING
4201 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4202#else
4203 pPage->fCR3Mix = false;
4204#endif
4205#ifdef PGMPOOL_WITH_CACHE
4206 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4207 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4208#endif
4209 }
4210 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4211 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4212 pPool->cUsedPages = 0;
4213
4214#ifdef PGMPOOL_WITH_USER_TRACKING
4215 /*
4216 * Zap and reinitialize the user records.
4217 */
4218 pPool->cPresent = 0;
4219 pPool->iUserFreeHead = 0;
4220 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4221 const unsigned cMaxUsers = pPool->cMaxUsers;
4222 for (unsigned i = 0; i < cMaxUsers; i++)
4223 {
4224 paUsers[i].iNext = i + 1;
4225 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4226 paUsers[i].iUserTable = 0xfffffffe;
4227 }
4228 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4229#endif
4230
4231#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4232 /*
4233 * Clear all the GCPhys links and rebuild the phys ext free list.
4234 */
4235 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4236 pRam;
4237 pRam = pRam->CTX_SUFF(pNext))
4238 {
4239 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4240 while (iPage-- > 0)
4241 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4242 }
4243
4244 pPool->iPhysExtFreeHead = 0;
4245 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4246 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4247 for (unsigned i = 0; i < cMaxPhysExts; i++)
4248 {
4249 paPhysExts[i].iNext = i + 1;
4250 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4251 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4252 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4253 }
4254 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4255#endif
4256
4257#ifdef PGMPOOL_WITH_MONITORING
4258 /*
4259 * Just zap the modified list.
4260 */
4261 pPool->cModifiedPages = 0;
4262 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4263#endif
4264
4265#ifdef PGMPOOL_WITH_CACHE
4266 /*
4267 * Clear the GCPhys hash and the age list.
4268 */
4269 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4270 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4271 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4272 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4273#endif
4274
4275 /*
4276 * Flush all the special root pages.
4277 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4278 */
4279 pgmPoolFlushAllSpecialRoots(pPool);
4280 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4281 {
4282 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4283 pPage->iNext = NIL_PGMPOOL_IDX;
4284#ifdef PGMPOOL_WITH_MONITORING
4285 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4286 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4287 pPage->cModifications = 0;
4288 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4289 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4290 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4291 if (pPage->fMonitored)
4292 {
4293 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4294 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4295 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4296 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4297 pPool->pszAccessHandler);
4298 AssertFatalRCSuccess(rc);
4299# ifdef PGMPOOL_WITH_CACHE
4300 pgmPoolHashInsert(pPool, pPage);
4301# endif
4302 }
4303#endif
4304#ifdef PGMPOOL_WITH_USER_TRACKING
4305 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4306#endif
4307#ifdef PGMPOOL_WITH_CACHE
4308 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4309 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4310#endif
4311 }
4312
4313 /*
4314 * Finally, assert the FF.
4315 */
4316 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4317
4318 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4319}
4320
4321
4322/**
4323 * Flushes a pool page.
4324 *
4325 * This moves the page to the free list after removing all user references to it.
4326 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4327 *
4328 * @returns VBox status code.
4329 * @retval VINF_SUCCESS on success.
4330 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4331 * @param pPool The pool.
4332 * @param HCPhys The HC physical address of the shadow page.
4333 */
4334int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4335{
4336 int rc = VINF_SUCCESS;
4337 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4338 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4339 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4340
4341 /*
4342 * Quietly reject any attempts at flushing any of the special root pages.
4343 */
4344 if (pPage->idx < PGMPOOL_IDX_FIRST)
4345 {
4346 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4347 return VINF_SUCCESS;
4348 }
4349
4350 /*
4351 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4352 */
4353#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4354 if (pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
4355 {
4356 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4357 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4358 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4359 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4360 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4361 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4362 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4363 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4364 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4365 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4366#else
4367 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4368 {
4369 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4370 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4371#endif
4372 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4373 return VINF_SUCCESS;
4374 }
4375
4376#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4377 /* Start a subset so we won't run out of mapping space. */
4378 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4379 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4380#endif
4381
4382 /*
4383 * Mark the page as being in need of a ASMMemZeroPage().
4384 */
4385 pPage->fZeroed = false;
4386
4387#ifdef PGMPOOL_WITH_USER_TRACKING
4388 /*
4389 * Clear the page.
4390 */
4391 pgmPoolTrackClearPageUsers(pPool, pPage);
4392 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4393 pgmPoolTrackDeref(pPool, pPage);
4394 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4395#endif
4396
4397#ifdef PGMPOOL_WITH_CACHE
4398 /*
4399 * Flush it from the cache.
4400 */
4401 pgmPoolCacheFlushPage(pPool, pPage);
4402#endif /* PGMPOOL_WITH_CACHE */
4403
4404#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4405 /* Heavy stuff done. */
4406 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4407#endif
4408
4409#ifdef PGMPOOL_WITH_MONITORING
4410 /*
4411 * Deregistering the monitoring.
4412 */
4413 if (pPage->fMonitored)
4414 rc = pgmPoolMonitorFlush(pPool, pPage);
4415#endif
4416
4417 /*
4418 * Free the page.
4419 */
4420 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4421 pPage->iNext = pPool->iFreeHead;
4422 pPool->iFreeHead = pPage->idx;
4423 pPage->enmKind = PGMPOOLKIND_FREE;
4424 pPage->GCPhys = NIL_RTGCPHYS;
4425 pPage->fReusedFlushPending = false;
4426
4427 pPool->cUsedPages--;
4428 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4429 return rc;
4430}
4431
4432
4433/**
4434 * Frees a usage of a pool page.
4435 *
4436 * The caller is responsible to updating the user table so that it no longer
4437 * references the shadow page.
4438 *
4439 * @param pPool The pool.
4440 * @param HCPhys The HC physical address of the shadow page.
4441 * @param iUser The shadow page pool index of the user table.
4442 * @param iUserTable The index into the user table (shadowed).
4443 */
4444void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4445{
4446 STAM_PROFILE_START(&pPool->StatFree, a);
4447 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4448 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4449 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4450#ifdef PGMPOOL_WITH_USER_TRACKING
4451 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4452#endif
4453#ifdef PGMPOOL_WITH_CACHE
4454 if (!pPage->fCached)
4455#endif
4456 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4457 STAM_PROFILE_STOP(&pPool->StatFree, a);
4458}
4459
4460
4461/**
4462 * Makes one or more free page free.
4463 *
4464 * @returns VBox status code.
4465 * @retval VINF_SUCCESS on success.
4466 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4467 *
4468 * @param pPool The pool.
4469 * @param enmKind Page table kind
4470 * @param iUser The user of the page.
4471 */
4472static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4473{
4474 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4475
4476 /*
4477 * If the pool isn't full grown yet, expand it.
4478 */
4479 if ( pPool->cCurPages < pPool->cMaxPages
4480#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && defined(IN_RC)
4481 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4482 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4483 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4484#endif
4485 )
4486 {
4487 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4488#ifdef IN_RING3
4489 int rc = PGMR3PoolGrow(pPool->pVMR3);
4490#else
4491 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4492#endif
4493 if (RT_FAILURE(rc))
4494 return rc;
4495 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4496 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4497 return VINF_SUCCESS;
4498 }
4499
4500#ifdef PGMPOOL_WITH_CACHE
4501 /*
4502 * Free one cached page.
4503 */
4504 return pgmPoolCacheFreeOne(pPool, iUser);
4505#else
4506 /*
4507 * Flush the pool.
4508 *
4509 * If we have tracking enabled, it should be possible to come up with
4510 * a cheap replacement strategy...
4511 */
4512 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4513 Assert(!CPUMIsGuestInLongMode(pVM));
4514 pgmPoolFlushAllInt(pPool);
4515 return VERR_PGM_POOL_FLUSHED;
4516#endif
4517}
4518
4519
4520/**
4521 * Allocates a page from the pool.
4522 *
4523 * This page may actually be a cached page and not in need of any processing
4524 * on the callers part.
4525 *
4526 * @returns VBox status code.
4527 * @retval VINF_SUCCESS if a NEW page was allocated.
4528 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4529 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4530 * @param pVM The VM handle.
4531 * @param GCPhys The GC physical address of the page we're gonna shadow.
4532 * For 4MB and 2MB PD entries, it's the first address the
4533 * shadow PT is covering.
4534 * @param enmKind The kind of mapping.
4535 * @param iUser The shadow page pool index of the user table.
4536 * @param iUserTable The index into the user table (shadowed).
4537 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4538 */
4539int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4540{
4541 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4542 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4543 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4544 *ppPage = NULL;
4545 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4546 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4547 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4548
4549#ifdef PGMPOOL_WITH_CACHE
4550 if (pPool->fCacheEnabled)
4551 {
4552 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4553 if (RT_SUCCESS(rc2))
4554 {
4555 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4556 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4557 return rc2;
4558 }
4559 }
4560#endif
4561
4562 /*
4563 * Allocate a new one.
4564 */
4565 int rc = VINF_SUCCESS;
4566 uint16_t iNew = pPool->iFreeHead;
4567 if (iNew == NIL_PGMPOOL_IDX)
4568 {
4569 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4570 if (RT_FAILURE(rc))
4571 {
4572 if (rc != VERR_PGM_POOL_CLEARED)
4573 {
4574 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4575 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4576 return rc;
4577 }
4578 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4579 rc = VERR_PGM_POOL_FLUSHED;
4580 }
4581 iNew = pPool->iFreeHead;
4582 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4583 }
4584
4585 /* unlink the free head */
4586 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4587 pPool->iFreeHead = pPage->iNext;
4588 pPage->iNext = NIL_PGMPOOL_IDX;
4589
4590 /*
4591 * Initialize it.
4592 */
4593 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4594 pPage->enmKind = enmKind;
4595 pPage->GCPhys = GCPhys;
4596 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4597 pPage->fMonitored = false;
4598 pPage->fCached = false;
4599 pPage->fReusedFlushPending = false;
4600#ifdef PGMPOOL_WITH_MONITORING
4601 pPage->cModifications = 0;
4602 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4603 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4604#else
4605 pPage->fCR3Mix = false;
4606#endif
4607#ifdef PGMPOOL_WITH_USER_TRACKING
4608 pPage->cPresent = 0;
4609 pPage->iFirstPresent = ~0;
4610
4611 /*
4612 * Insert into the tracking and cache. If this fails, free the page.
4613 */
4614 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4615 if (RT_FAILURE(rc3))
4616 {
4617 if (rc3 != VERR_PGM_POOL_CLEARED)
4618 {
4619 pPool->cUsedPages--;
4620 pPage->enmKind = PGMPOOLKIND_FREE;
4621 pPage->GCPhys = NIL_RTGCPHYS;
4622 pPage->iNext = pPool->iFreeHead;
4623 pPool->iFreeHead = pPage->idx;
4624 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4625 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4626 return rc3;
4627 }
4628 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4629 rc = VERR_PGM_POOL_FLUSHED;
4630 }
4631#endif /* PGMPOOL_WITH_USER_TRACKING */
4632
4633 /*
4634 * Commit the allocation, clear the page and return.
4635 */
4636#ifdef VBOX_WITH_STATISTICS
4637 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4638 pPool->cUsedPagesHigh = pPool->cUsedPages;
4639#endif
4640
4641 if (!pPage->fZeroed)
4642 {
4643 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4644 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4645 ASMMemZeroPage(pv);
4646 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4647 }
4648
4649 *ppPage = pPage;
4650 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4651 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4652 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4653 return rc;
4654}
4655
4656
4657/**
4658 * Frees a usage of a pool page.
4659 *
4660 * @param pVM The VM handle.
4661 * @param HCPhys The HC physical address of the shadow page.
4662 * @param iUser The shadow page pool index of the user table.
4663 * @param iUserTable The index into the user table (shadowed).
4664 */
4665void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4666{
4667 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4668 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4669 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4670}
4671
4672
4673/**
4674 * Gets a in-use page in the pool by it's physical address.
4675 *
4676 * @returns Pointer to the page.
4677 * @param pVM The VM handle.
4678 * @param HCPhys The HC physical address of the shadow page.
4679 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4680 */
4681PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4682{
4683 /** @todo profile this! */
4684 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4685 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4686 Log4(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%s}\n",
4687 HCPhys, pPage, pPage->idx, pPage->GCPhys, pgmPoolPoolKindToStr(pPage->enmKind)));
4688 return pPage;
4689}
4690
4691
4692/**
4693 * Flushes the entire cache.
4694 *
4695 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4696 * and execute this CR3 flush.
4697 *
4698 * @param pPool The pool.
4699 */
4700void pgmPoolFlushAll(PVM pVM)
4701{
4702 LogFlow(("pgmPoolFlushAll:\n"));
4703 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4704}
4705
4706#ifdef LOG_ENABLED
4707static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4708{
4709 switch(enmKind)
4710 {
4711 case PGMPOOLKIND_INVALID:
4712 return "PGMPOOLKIND_INVALID";
4713 case PGMPOOLKIND_FREE:
4714 return "PGMPOOLKIND_FREE";
4715 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4716 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4717 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4718 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4719 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4720 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4721 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4722 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4723 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4724 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4725 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4726 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4727 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4728 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4729 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4730 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4731 case PGMPOOLKIND_32BIT_PD:
4732 return "PGMPOOLKIND_32BIT_PD";
4733 case PGMPOOLKIND_32BIT_PD_PHYS:
4734 return "PGMPOOLKIND_32BIT_PD_PHYS";
4735 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4736 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4737 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4738 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4739 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4740 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4741 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4742 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4743 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4744 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4745 case PGMPOOLKIND_PAE_PD_PHYS:
4746 return "PGMPOOLKIND_PAE_PD_PHYS";
4747 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4748 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4749 case PGMPOOLKIND_PAE_PDPT:
4750 return "PGMPOOLKIND_PAE_PDPT";
4751 case PGMPOOLKIND_PAE_PDPT_PHYS:
4752 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4753 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4754 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4755 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4756 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4757 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4758 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4759 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4760 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4761 case PGMPOOLKIND_64BIT_PML4:
4762 return "PGMPOOLKIND_64BIT_PML4";
4763 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4764 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4765 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4766 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4767 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4768 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4769#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4770 case PGMPOOLKIND_ROOT_32BIT_PD:
4771 return "PGMPOOLKIND_ROOT_32BIT_PD";
4772 case PGMPOOLKIND_ROOT_PAE_PD:
4773 return "PGMPOOLKIND_ROOT_PAE_PD";
4774 case PGMPOOLKIND_ROOT_PDPT:
4775 return "PGMPOOLKIND_ROOT_PDPT";
4776#endif
4777 case PGMPOOLKIND_ROOT_NESTED:
4778 return "PGMPOOLKIND_ROOT_NESTED";
4779 }
4780 return "Unknown kind!";
4781}
4782#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette