VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 101025

Last change on this file since 101025 was 100869, checked in by vboxsync, 22 months ago

VMM/IEM: Use a fixed maxed-out TB during threaded compilation and duplicate this into the resulting TB but with optimial table sizes. bugref:10369

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 71.8 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 100869 2023-08-14 12:37:33Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) :
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.215389.xyz.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/x86.h>
86
87#ifndef TST_IEM_CHECK_MC
88# include "IEMInline.h"
89# include "IEMOpHlp.h"
90# include "IEMMc.h"
91#endif
92
93#include "IEMThreadedFunctions.h"
94
95
96/*
97 * Narrow down configs here to avoid wasting time on unused configs here.
98 */
99
100#ifndef IEM_WITH_CODE_TLB
101# error The code TLB must be enabled for the recompiler.
102#endif
103
104#ifndef IEM_WITH_DATA_TLB
105# error The data TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_SETJMP
109# error The setjmp approach must be enabled for the recompiler.
110#endif
111
112
113/*********************************************************************************************************************************
114* Internal Functions *
115*********************************************************************************************************************************/
116static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb);
117
118
119/**
120 * Calculates the effective address of a ModR/M memory operand, extended version
121 * for use in the recompilers.
122 *
123 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
124 *
125 * May longjmp on internal error.
126 *
127 * @return The effective address.
128 * @param pVCpu The cross context virtual CPU structure of the calling thread.
129 * @param bRm The ModRM byte.
130 * @param cbImmAndRspOffset - First byte: The size of any immediate
131 * following the effective address opcode bytes
132 * (only for RIP relative addressing).
133 * - Second byte: RSP displacement (for POP [ESP]).
134 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
135 * SIB byte (bits 39:32).
136 *
137 * @note This must be defined in a source file with matching
138 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
139 * or implemented differently...
140 */
141RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
142{
143 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
144# define SET_SS_DEF() \
145 do \
146 { \
147 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
148 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
149 } while (0)
150
151 if (!IEM_IS_64BIT_CODE(pVCpu))
152 {
153/** @todo Check the effective address size crap! */
154 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
155 {
156 uint16_t u16EffAddr;
157
158 /* Handle the disp16 form with no registers first. */
159 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
160 {
161 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
162 *puInfo = u16EffAddr;
163 }
164 else
165 {
166 /* Get the displacment. */
167 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
168 {
169 case 0: u16EffAddr = 0; break;
170 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
171 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
172 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
173 }
174 *puInfo = u16EffAddr;
175
176 /* Add the base and index registers to the disp. */
177 switch (bRm & X86_MODRM_RM_MASK)
178 {
179 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
180 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
181 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
182 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
183 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
184 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
185 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
186 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
187 }
188 }
189
190 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
191 return u16EffAddr;
192 }
193
194 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
195 uint32_t u32EffAddr;
196 uint64_t uInfo;
197
198 /* Handle the disp32 form with no registers first. */
199 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
200 {
201 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
202 uInfo = u32EffAddr;
203 }
204 else
205 {
206 /* Get the register (or SIB) value. */
207 uInfo = 0;
208 switch ((bRm & X86_MODRM_RM_MASK))
209 {
210 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
211 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
212 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
213 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
214 case 4: /* SIB */
215 {
216 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
217 uInfo = (uint64_t)bSib << 32;
218
219 /* Get the index and scale it. */
220 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
221 {
222 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
223 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
224 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
225 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
226 case 4: u32EffAddr = 0; /*none */ break;
227 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
228 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
229 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
230 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
231 }
232 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
233
234 /* add base */
235 switch (bSib & X86_SIB_BASE_MASK)
236 {
237 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
238 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
239 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
240 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
241 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
242 case 5:
243 if ((bRm & X86_MODRM_MOD_MASK) != 0)
244 {
245 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
246 SET_SS_DEF();
247 }
248 else
249 {
250 uint32_t u32Disp;
251 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
252 u32EffAddr += u32Disp;
253 uInfo |= u32Disp;
254 }
255 break;
256 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
257 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
258 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
259 }
260 break;
261 }
262 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
263 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
264 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
265 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
266 }
267
268 /* Get and add the displacement. */
269 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
270 {
271 case 0:
272 break;
273 case 1:
274 {
275 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
276 u32EffAddr += i8Disp;
277 uInfo |= (uint32_t)(int32_t)i8Disp;
278 break;
279 }
280 case 2:
281 {
282 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
283 u32EffAddr += u32Disp;
284 uInfo |= u32Disp;
285 break;
286 }
287 default:
288 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
289 }
290 }
291
292 *puInfo = uInfo;
293 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
294 return u32EffAddr;
295 }
296
297 uint64_t u64EffAddr;
298 uint64_t uInfo;
299
300 /* Handle the rip+disp32 form with no registers first. */
301 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
302 {
303 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
304 uInfo = (uint32_t)u64EffAddr;
305 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
306 }
307 else
308 {
309 /* Get the register (or SIB) value. */
310 uInfo = 0;
311 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
312 {
313 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
314 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
315 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
316 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
317 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
318 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
319 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
320 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
321 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
322 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
323 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
324 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
325 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
326 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
327 /* SIB */
328 case 4:
329 case 12:
330 {
331 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
332 uInfo = (uint64_t)bSib << 32;
333
334 /* Get the index and scale it. */
335 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
336 {
337 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
338 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
339 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
340 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
341 case 4: u64EffAddr = 0; /*none */ break;
342 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
343 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
344 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
345 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
346 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
347 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
348 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
349 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
350 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
351 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
352 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
353 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
354 }
355 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
356
357 /* add base */
358 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
359 {
360 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
361 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
362 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
363 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
364 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
365 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
366 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
367 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
368 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
369 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
370 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
371 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
372 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
373 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
374 /* complicated encodings */
375 case 5:
376 case 13:
377 if ((bRm & X86_MODRM_MOD_MASK) != 0)
378 {
379 if (!pVCpu->iem.s.uRexB)
380 {
381 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
382 SET_SS_DEF();
383 }
384 else
385 u64EffAddr += pVCpu->cpum.GstCtx.r13;
386 }
387 else
388 {
389 uint32_t u32Disp;
390 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
391 u64EffAddr += (int32_t)u32Disp;
392 uInfo |= u32Disp;
393 }
394 break;
395 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
396 }
397 break;
398 }
399 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
400 }
401
402 /* Get and add the displacement. */
403 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
404 {
405 case 0:
406 break;
407 case 1:
408 {
409 int8_t i8Disp;
410 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
411 u64EffAddr += i8Disp;
412 uInfo |= (uint32_t)(int32_t)i8Disp;
413 break;
414 }
415 case 2:
416 {
417 uint32_t u32Disp;
418 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
419 u64EffAddr += (int32_t)u32Disp;
420 uInfo |= u32Disp;
421 break;
422 }
423 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
424 }
425
426 }
427
428 *puInfo = uInfo;
429 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
430 {
431 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
432 return u64EffAddr;
433 }
434 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
435 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
436 return u64EffAddr & UINT32_MAX;
437}
438
439
440/*
441 * Translation block management.
442 */
443
444typedef struct IEMTBCACHE
445{
446 uint32_t cHash;
447 uint32_t uHashMask;
448 PIEMTB apHash[_1M];
449} IEMTBCACHE;
450
451static IEMTBCACHE g_TbCache = { _1M, _1M - 1, }; /**< Quick and dirty. */
452
453#define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \
454 ( ((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)
455
456
457/**
458 * Allocate a translation block for threadeded recompilation.
459 *
460 * This is allocated with maxed out call table and storage for opcode bytes,
461 * because it's only supposed to be called once per EMT to allocate the TB
462 * pointed to by IEMCPU::pThrdCompileTbR3.
463 *
464 * @returns Pointer to the translation block on success, NULL on failure.
465 * @param pVM The cross context virtual machine structure.
466 * @param pVCpu The cross context virtual CPU structure of the calling
467 * thread.
468 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
469 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
470 */
471static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
472{
473 PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));
474 if (pTb)
475 {
476 unsigned const cCalls = 256;
477 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
478 if (pTb->Thrd.paCalls)
479 {
480 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
481 if (pTb->pabOpcodes)
482 {
483 pTb->Thrd.cAllocated = cCalls;
484 pTb->cbOpcodesAllocated = cCalls * 16;
485 pTb->Thrd.cCalls = 0;
486 pTb->cbOpcodes = 0;
487 pTb->pNext = NULL;
488 RTListInit(&pTb->LocalList);
489 pTb->GCPhysPc = GCPhysPc;
490 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
491 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
492 pTb->cInstructions = 0;
493
494 /* Init the first opcode range. */
495 pTb->cRanges = 1;
496 pTb->aRanges[0].cbOpcodes = 0;
497 pTb->aRanges[0].offOpcodes = 0;
498 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
499 pTb->aRanges[0].u2Unused = 0;
500 pTb->aRanges[0].idxPhysPage = 0;
501 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
502 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
503
504 pVCpu->iem.s.cTbAllocs++;
505 return pTb;
506 }
507 RTMemFree(pTb->Thrd.paCalls);
508 }
509 RTMemFree(pTb);
510 }
511 RT_NOREF(pVM);
512 return NULL;
513}
514
515
516/**
517 * Called on the TB that are dedicated for recompilation before it's reused.
518 *
519 * @param pVCpu The cross context virtual CPU structure of the calling
520 * thread.
521 * @param pTb The translation block to reuse.
522 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
523 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
524 */
525static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
526{
527 pTb->GCPhysPc = GCPhysPc;
528 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
529 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
530 pTb->Thrd.cCalls = 0;
531 pTb->cbOpcodes = 0;
532 pTb->cInstructions = 0;
533
534 /* Init the first opcode range. */
535 pTb->cRanges = 1;
536 pTb->aRanges[0].cbOpcodes = 0;
537 pTb->aRanges[0].offOpcodes = 0;
538 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
539 pTb->aRanges[0].u2Unused = 0;
540 pTb->aRanges[0].idxPhysPage = 0;
541 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
542 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
543}
544
545
546/**
547 * Used to duplicate a threded translation block after recompilation is done.
548 *
549 * @returns Pointer to the translation block on success, NULL on failure.
550 * @param pVM The cross context virtual machine structure.
551 * @param pVCpu The cross context virtual CPU structure of the calling
552 * thread.
553 * @param pTbSrc The TB to duplicate.
554 */
555static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
556{
557 /*
558 * Just using the heap for now. Will make this more efficient and
559 * complicated later, don't worry. :-)
560 */
561 PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));
562 if (pTb)
563 {
564 memcpy(pTb, pTbSrc, sizeof(*pTb));
565
566 unsigned const cCalls = pTbSrc->Thrd.cCalls;
567 Assert(cCalls > 0);
568 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
569 if (pTb->Thrd.paCalls)
570 {
571 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
572 Assert(cbOpcodes > 0);
573 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
574 if (pTb->pabOpcodes)
575 {
576 pTb->Thrd.cAllocated = cCalls;
577 pTb->cbOpcodesAllocated = cbOpcodes;
578 pTb->pNext = NULL;
579 RTListInit(&pTb->LocalList);
580 pTb->fFlags = (pTbSrc->fFlags & ~IEMTB_F_STATE_MASK) | IEMTB_F_STATE_READY;
581
582 pVCpu->iem.s.cTbAllocs++;
583 return pTb;
584 }
585 RTMemFree(pTb->Thrd.paCalls);
586 }
587 RTMemFree(pTb);
588 }
589 RT_NOREF(pVM);
590 return NULL;
591
592}
593
594
595/**
596 * Adds the given TB to the hash table.
597 *
598 * @param pVM The cross context virtual machine structure.
599 * @param pVCpu The cross context virtual CPU structure of the calling
600 * thread.
601 * @param pTb The translation block to add.
602 */
603static void iemThreadedTbAdd(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
604{
605 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
606 pTb->pNext = g_TbCache.apHash[idxHash];
607 g_TbCache.apHash[idxHash] = pTb;
608 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
609 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
610 if (LogIs12Enabled())
611 {
612 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
613 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, idxHash, pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
614 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
615 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
616 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
617 pTb->aRanges[idxRange].idxPhysPage == 0
618 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
619 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
620 }
621 RT_NOREF(pVM);
622}
623
624
625/**
626 * Frees the given TB.
627 *
628 * @param pVM The cross context virtual machine structure.
629 * @param pVCpu The cross context virtual CPU structure of the calling
630 * thread.
631 * @param pTb The translation block to free..
632 */
633static void iemThreadedTbFree(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
634{
635 RT_NOREF(pVM);
636 AssertPtr(pTb);
637
638 AssertCompile(IEMTB_F_STATE_OBSOLETE == IEMTB_F_STATE_MASK);
639 pTb->fFlags |= IEMTB_F_STATE_OBSOLETE; /* works, both bits set */
640
641 /* Unlink it from the hash table: */
642 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
643 PIEMTB pTbCur = g_TbCache.apHash[idxHash];
644 if (pTbCur == pTb)
645 g_TbCache.apHash[idxHash] = pTb->pNext;
646 else
647 while (pTbCur)
648 {
649 PIEMTB const pNextTb = pTbCur->pNext;
650 if (pNextTb == pTb)
651 {
652 pTbCur->pNext = pTb->pNext;
653 break;
654 }
655 pTbCur = pNextTb;
656 }
657
658 /* Free it. */
659 RTMemFree(pTb->Thrd.paCalls);
660 pTb->Thrd.paCalls = NULL;
661
662 RTMemFree(pTb->pabOpcodes);
663 pTb->pabOpcodes = NULL;
664
665 RTMemFree(pTb);
666 pVCpu->iem.s.cTbFrees++;
667}
668
669
670/**
671 * Called by opcode verifier functions when they detect a problem.
672 */
673void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb)
674{
675 iemThreadedTbFree(pVCpu->CTX_SUFF(pVM), pVCpu, pTb);
676}
677
678
679static PIEMTB iemThreadedTbLookup(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
680{
681 uint32_t const fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags | IEMTB_F_STATE_READY;
682 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, fFlags, GCPhysPc);
683 Log10(("TB lookup: idxHash=%#x fFlags=%#x GCPhysPc=%RGp\n", idxHash, fFlags, GCPhysPc));
684 PIEMTB pTb = g_TbCache.apHash[idxHash];
685 while (pTb)
686 {
687 if (pTb->GCPhysPc == GCPhysPc)
688 {
689 if (pTb->fFlags == fFlags)
690 {
691 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
692 {
693#ifdef VBOX_WITH_STATISTICS
694 pVCpu->iem.s.cTbLookupHits++;
695#endif
696 return pTb;
697 }
698 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
699 }
700 else
701 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
702 }
703 else
704 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
705
706 pTb = pTb->pNext;
707 }
708 RT_NOREF(pVM);
709 pVCpu->iem.s.cTbLookupMisses++;
710 return pTb;
711}
712
713
714/*
715 * Real code.
716 */
717
718#ifdef LOG_ENABLED
719/**
720 * Logs the current instruction.
721 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
722 * @param pszFunction The IEM function doing the execution.
723 */
724static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction) RT_NOEXCEPT
725{
726# ifdef IN_RING3
727 if (LogIs2Enabled())
728 {
729 char szInstr[256];
730 uint32_t cbInstr = 0;
731 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
732 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
733 szInstr, sizeof(szInstr), &cbInstr);
734
735 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
736 Log2(("**** %s fExec=%x pTb=%p\n"
737 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
738 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
739 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
740 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
741 " %s\n"
742 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3,
743 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
744 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
745 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
746 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
747 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
748 szInstr));
749
750 if (LogIs3Enabled())
751 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
752 }
753 else
754# endif
755 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
756 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
757}
758#endif /* LOG_ENABLED */
759
760
761static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
762{
763 RT_NOREF(pVM, pVCpu);
764 return rcStrict;
765}
766
767
768/**
769 * Initializes the decoder state when compiling TBs.
770 *
771 * This presumes that fExec has already be initialized.
772 *
773 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
774 * to apply fixes to them as well.
775 *
776 * @param pVCpu The cross context virtual CPU structure of the calling
777 * thread.
778 * @param fReInit Clear for the first call for a TB, set for subsequent
779 * calls from inside the compile loop where we can skip a
780 * couple of things.
781 * @param fExtraFlags The extra translation block flags when @a fReInit is
782 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
783 * checked.
784 */
785DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
786{
787 /* ASSUMES: That iemInitExec was already called and that anyone changing
788 CPU state affecting the fExec bits since then will have updated fExec! */
789 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
790 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
791
792 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
793
794 /* Decoder state: */
795 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
796 pVCpu->iem.s.enmEffAddrMode = enmMode;
797 if (enmMode != IEMMODE_64BIT)
798 {
799 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
800 pVCpu->iem.s.enmEffOpSize = enmMode;
801 }
802 else
803 {
804 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
805 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
806 }
807 pVCpu->iem.s.fPrefixes = 0;
808 pVCpu->iem.s.uRexReg = 0;
809 pVCpu->iem.s.uRexB = 0;
810 pVCpu->iem.s.uRexIndex = 0;
811 pVCpu->iem.s.idxPrefix = 0;
812 pVCpu->iem.s.uVex3rdReg = 0;
813 pVCpu->iem.s.uVexLength = 0;
814 pVCpu->iem.s.fEvexStuff = 0;
815 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
816 pVCpu->iem.s.offModRm = 0;
817 pVCpu->iem.s.iNextMapping = 0;
818
819 if (!fReInit)
820 {
821 pVCpu->iem.s.cActiveMappings = 0;
822 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
823 pVCpu->iem.s.fEndTb = false;
824 pVCpu->iem.s.fTbCheckOpcodes = false;
825 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
826 pVCpu->iem.s.fTbCrossedPage = false;
827 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
828 pVCpu->iem.s.fTbCurInstrIsSti = false;
829 }
830 else
831 {
832 Assert(pVCpu->iem.s.cActiveMappings == 0);
833 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
834 Assert(pVCpu->iem.s.fEndTb == false);
835 Assert(pVCpu->iem.s.fTbCrossedPage == false);
836 }
837
838#ifdef DBGFTRACE_ENABLED
839 switch (IEM_GET_CPU_MODE(pVCpu))
840 {
841 case IEMMODE_64BIT:
842 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
843 break;
844 case IEMMODE_32BIT:
845 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
846 break;
847 case IEMMODE_16BIT:
848 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
849 break;
850 }
851#endif
852}
853
854
855/**
856 * Initializes the opcode fetcher when starting the compilation.
857 *
858 * @param pVCpu The cross context virtual CPU structure of the calling
859 * thread.
860 */
861DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
862{
863 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
864#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
865 pVCpu->iem.s.offOpcode = 0;
866#else
867 RT_NOREF(pVCpu);
868#endif
869}
870
871
872/**
873 * Re-initializes the opcode fetcher between instructions while compiling.
874 *
875 * @param pVCpu The cross context virtual CPU structure of the calling
876 * thread.
877 */
878DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
879{
880 if (pVCpu->iem.s.pbInstrBuf)
881 {
882 uint64_t off = pVCpu->cpum.GstCtx.rip;
883 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
884 off += pVCpu->cpum.GstCtx.cs.u64Base;
885 off -= pVCpu->iem.s.uInstrBufPc;
886 if (off < pVCpu->iem.s.cbInstrBufTotal)
887 {
888 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
889 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
890 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
891 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
892 else
893 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
894 }
895 else
896 {
897 pVCpu->iem.s.pbInstrBuf = NULL;
898 pVCpu->iem.s.offInstrNextByte = 0;
899 pVCpu->iem.s.offCurInstrStart = 0;
900 pVCpu->iem.s.cbInstrBuf = 0;
901 pVCpu->iem.s.cbInstrBufTotal = 0;
902 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
903 }
904 }
905 else
906 {
907 pVCpu->iem.s.offInstrNextByte = 0;
908 pVCpu->iem.s.offCurInstrStart = 0;
909 pVCpu->iem.s.cbInstrBuf = 0;
910 pVCpu->iem.s.cbInstrBufTotal = 0;
911#ifdef VBOX_STRICT
912 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
913#endif
914 }
915#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
916 pVCpu->iem.s.offOpcode = 0;
917#endif
918}
919
920
921DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
922{
923 switch (cbInstr)
924 {
925 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
926 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
927 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
928 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
929 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
930 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
931 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
932 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
933 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
934 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
935 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
936 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
937 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
938 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
939 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
940 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
941 }
942}
943
944
945/**
946 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
947 *
948 * - CS LIM check required.
949 * - Must recheck opcode bytes.
950 * - Previous instruction branched.
951 * - TLB load detected, probably due to page crossing.
952 *
953 * @returns true if everything went well, false if we're out of space in the TB
954 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
955 * @param pVCpu The cross context virtual CPU structure of the calling
956 * thread.
957 * @param pTb The translation block being compiled.
958 */
959bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
960{
961 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
962#if 0
963 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
964 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
965#endif
966
967 /*
968 * If we're not in 64-bit mode and not already checking CS.LIM we need to
969 * see if it's needed to start checking.
970 */
971 bool fConsiderCsLimChecking;
972 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
973 if ( fMode == IEM_F_MODE_X86_64BIT
974 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
975 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
976 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
977 fConsiderCsLimChecking = false; /* already enabled or not needed */
978 else
979 {
980 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
981 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
982 fConsiderCsLimChecking = true; /* likely */
983 else
984 {
985 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
986 return false;
987 }
988 }
989
990 /*
991 * Prepare call now, even before we know if can accept the instruction in this TB.
992 * This allows us amending parameters w/o making every case suffer.
993 */
994 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
995 uint16_t const offOpcode = pTb->cbOpcodes;
996 uint8_t idxRange = pTb->cRanges - 1;
997
998 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
999 pCall->idxInstr = pTb->cInstructions;
1000 pCall->offOpcode = offOpcode;
1001 pCall->idxRange = idxRange;
1002 pCall->cbOpcode = cbInstr;
1003 pCall->auParams[0] = cbInstr;
1004 pCall->auParams[1] = idxRange;
1005 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1006
1007/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1008 * gotten onto. If we do, stop */
1009
1010 /*
1011 * Case 1: We've branched (RIP changed).
1012 *
1013 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1014 * Req: 1 extra range, no extra phys.
1015 *
1016 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1017 * necessary (fTbCrossedPage is true).
1018 * Req: 1 extra range, probably 1 extra phys page entry.
1019 *
1020 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1021 * but in addition we cross into the following page and require
1022 * another TLB load.
1023 * Req: 2 extra ranges, probably 2 extra phys page entries.
1024 *
1025 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1026 * the following page (thus fTbCrossedPage is true).
1027 * Req: 2 extra ranges, probably 1 extra phys page entry.
1028 *
1029 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1030 * it may trigger "spuriously" from the CPU point of view because of
1031 * physical page changes that'll invalid the physical TLB and trigger a
1032 * call to the function. In theory this be a big deal, just a bit
1033 * performance loss as we'll pick the LoadingTlb variants.
1034 *
1035 * Note! We do not currently optimize branching to the next instruction (sorry
1036 * 32-bit PIC code). We could maybe do that in the branching code that
1037 * sets (or not) fTbBranched.
1038 */
1039 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1040 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1041 * code. This'll require filtering out far jmps and calls, as they
1042 * load CS which should technically be considered indirect since the
1043 * GDT/LDT entry's base address can be modified independently from
1044 * the code. */
1045 if (pVCpu->iem.s.fTbBranched != 0)
1046 {
1047 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1048 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1049 {
1050 /* 1a + 1b - instruction fully within the branched to page. */
1051 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1052 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1053
1054 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1055 {
1056 /* Check that we've got a free range. */
1057 idxRange += 1;
1058 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1059 { /* likely */ }
1060 else
1061 {
1062 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1063 return false;
1064 }
1065 pCall->idxRange = idxRange;
1066 pCall->auParams[1] = idxRange;
1067 pCall->auParams[2] = 0;
1068
1069 /* Check that we've got a free page slot. */
1070 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1071 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1072 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1073 pTb->aRanges[idxRange].idxPhysPage = 0;
1074 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1075 || pTb->aGCPhysPages[0] == GCPhysNew)
1076 {
1077 pTb->aGCPhysPages[0] = GCPhysNew;
1078 pTb->aRanges[idxRange].idxPhysPage = 1;
1079 }
1080 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1081 || pTb->aGCPhysPages[1] == GCPhysNew)
1082 {
1083 pTb->aGCPhysPages[1] = GCPhysNew;
1084 pTb->aRanges[idxRange].idxPhysPage = 2;
1085 }
1086 else
1087 {
1088 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1089 return false;
1090 }
1091
1092 /* Finish setting up the new range. */
1093 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1094 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1095 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1096 pTb->aRanges[idxRange].u2Unused = 0;
1097 pTb->cRanges++;
1098 }
1099 else
1100 {
1101 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1102 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1103 }
1104
1105 /* Determin which function we need to load & check.
1106 Note! For jumps to a new page, we'll set both fTbBranched and
1107 fTbCrossedPage to avoid unnecessary TLB work for intra
1108 page branching */
1109 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1110 || pVCpu->iem.s.fTbCrossedPage)
1111 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1112 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1113 : !fConsiderCsLimChecking
1114 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1115 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1116 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1117 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1118 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1119 : !fConsiderCsLimChecking
1120 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1121 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1122 else
1123 {
1124 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1125 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1126 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1127 : !fConsiderCsLimChecking
1128 ? kIemThreadedFunc_BltIn_CheckOpcodes
1129 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1130 }
1131 }
1132 else
1133 {
1134 /* 1c + 1d - instruction crosses pages. */
1135 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1136 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1137
1138 /* Lazy bird: Check that this isn't case 1c, since we've already
1139 load the first physical address. End the TB and
1140 make it a case 2b instead.
1141
1142 Hmm. Too much bother to detect, so just do the same
1143 with case 1d as well. */
1144#if 0 /** @todo get back to this later when we've got the actual branch code in
1145 * place. */
1146 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1147
1148 /* Check that we've got two free ranges. */
1149 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1150 { /* likely */ }
1151 else
1152 return false;
1153 idxRange += 1;
1154 pCall->idxRange = idxRange;
1155 pCall->auParams[1] = idxRange;
1156 pCall->auParams[2] = 0;
1157
1158 /* ... */
1159
1160#else
1161 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1162 return false;
1163#endif
1164 }
1165 }
1166
1167 /*
1168 * Case 2: Page crossing.
1169 *
1170 * Sub-case 2a: The instruction starts on the first byte in the next page.
1171 *
1172 * Sub-case 2b: The instruction has opcode bytes in both the current and
1173 * following page.
1174 *
1175 * Both cases requires a new range table entry and probably a new physical
1176 * page entry. The difference is in which functions to emit and whether to
1177 * add bytes to the current range.
1178 */
1179 else if (pVCpu->iem.s.fTbCrossedPage)
1180 {
1181 /* Check that we've got a free range. */
1182 idxRange += 1;
1183 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1184 { /* likely */ }
1185 else
1186 {
1187 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1188 return false;
1189 }
1190
1191 /* Check that we've got a free page slot. */
1192 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1193 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1194 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1195 pTb->aRanges[idxRange].idxPhysPage = 0;
1196 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1197 || pTb->aGCPhysPages[0] == GCPhysNew)
1198 {
1199 pTb->aGCPhysPages[0] = GCPhysNew;
1200 pTb->aRanges[idxRange].idxPhysPage = 1;
1201 }
1202 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1203 || pTb->aGCPhysPages[1] == GCPhysNew)
1204 {
1205 pTb->aGCPhysPages[1] = GCPhysNew;
1206 pTb->aRanges[idxRange].idxPhysPage = 2;
1207 }
1208 else
1209 {
1210 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1211 return false;
1212 }
1213
1214 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1215 {
1216 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1217 pCall->idxRange = idxRange;
1218 pCall->auParams[1] = idxRange;
1219 pCall->auParams[2] = 0;
1220
1221 /* Finish setting up the new range. */
1222 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1223 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1224 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1225 pTb->aRanges[idxRange].u2Unused = 0;
1226 pTb->cRanges++;
1227
1228 /* Determin which function we need to load & check. */
1229 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1230 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1231 : !fConsiderCsLimChecking
1232 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1233 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1234 }
1235 else
1236 {
1237 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1238 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1239 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1240 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1241
1242 /* We've good. Split the instruction over the old and new range table entries. */
1243 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1244
1245 pTb->aRanges[idxRange].offPhysPage = 0;
1246 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1247 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1248 pTb->aRanges[idxRange].u2Unused = 0;
1249 pTb->cRanges++;
1250
1251 /* Determin which function we need to load & check. */
1252 if (pVCpu->iem.s.fTbCheckOpcodes)
1253 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1254 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1255 : !fConsiderCsLimChecking
1256 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
1257 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
1258 else
1259 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1260 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1261 : !fConsiderCsLimChecking
1262 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
1263 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
1264 }
1265 }
1266
1267 /*
1268 * Regular case: No new range required.
1269 */
1270 else
1271 {
1272 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1273 if (pVCpu->iem.s.fTbCheckOpcodes)
1274 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1275 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1276 : kIemThreadedFunc_BltIn_CheckOpcodes;
1277 else
1278 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
1279
1280 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1281 pTb->cbOpcodes = offOpcode + cbInstr;
1282 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1283 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1284 }
1285
1286 /*
1287 * Commit the call.
1288 */
1289 pTb->Thrd.cCalls++;
1290
1291 /*
1292 * Clear state.
1293 */
1294 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1295 pVCpu->iem.s.fTbCrossedPage = false;
1296 pVCpu->iem.s.fTbCheckOpcodes = false;
1297
1298 /*
1299 * Copy opcode bytes.
1300 */
1301 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1302 pTb->cbOpcodes = offOpcode + cbInstr;
1303 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1304
1305 return true;
1306}
1307
1308
1309/**
1310 * Worker for iemThreadedCompileBeginEmitCallsComplications and
1311 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
1312 *
1313 * @returns true if anything is pending, false if not.
1314 * @param pVCpu The cross context virtual CPU structure of the calling
1315 * thread.
1316 */
1317DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
1318{
1319 uint64_t fCpu = pVCpu->fLocalForcedActions;
1320 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
1321#if 1
1322 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1323 if (RT_LIKELY( !fCpu
1324 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1325 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1326 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
1327 return false;
1328 return true;
1329#else
1330 return false;
1331#endif
1332
1333}
1334
1335
1336/**
1337 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
1338 * set.
1339 *
1340 * @returns true if we should continue, false if an IRQ is deliverable or a
1341 * relevant force flag is pending.
1342 * @param pVCpu The cross context virtual CPU structure of the calling
1343 * thread.
1344 * @param pTb The translation block being compiled.
1345 * @sa iemThreadedCompileCheckIrq
1346 */
1347bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
1348{
1349 /*
1350 * Skip this we've already emitted a call after the previous instruction
1351 * or if it's the first call, as we're always checking FFs between blocks.
1352 */
1353 uint32_t const idxCall = pTb->Thrd.cCalls;
1354 if ( idxCall > 0
1355 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
1356 {
1357 /* Emit the call. */
1358 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1359 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1360 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1361 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
1362 pCall->idxInstr = pTb->cInstructions;
1363 pCall->uUnused0 = 0;
1364 pCall->offOpcode = 0;
1365 pCall->cbOpcode = 0;
1366 pCall->idxRange = 0;
1367 pCall->auParams[0] = 0;
1368 pCall->auParams[1] = 0;
1369 pCall->auParams[2] = 0;
1370 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1371
1372 /* Reset the IRQ check value. */
1373 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
1374
1375 /*
1376 * Check for deliverable IRQs and pending force flags.
1377 */
1378 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
1379 }
1380 return true; /* continue */
1381}
1382
1383
1384/**
1385 * Emits an IRQ check call and checks for pending IRQs.
1386 *
1387 * @returns true if we should continue, false if an IRQ is deliverable or a
1388 * relevant force flag is pending.
1389 * @param pVCpu The cross context virtual CPU structure of the calling
1390 * thread.
1391 * @param pTb The transation block.
1392 * @sa iemThreadedCompileBeginEmitCallsComplications
1393 */
1394static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
1395{
1396 /* Check again in a little bit, unless it is immediately following an STI
1397 in which case we *must* check immediately after the next instruction
1398 as well in case it's executed with interrupt inhibition. We could
1399 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
1400 bs3-timers-1 which is doing sti + sti + cli. */
1401 if (!pVCpu->iem.s.fTbCurInstrIsSti)
1402 pVCpu->iem.s.cInstrTillIrqCheck = 32;
1403 else
1404 {
1405 pVCpu->iem.s.fTbCurInstrIsSti = false;
1406 pVCpu->iem.s.cInstrTillIrqCheck = 0;
1407 }
1408 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1409
1410 /*
1411 * Emit the call.
1412 */
1413 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
1414 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
1415 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
1416 pCall->idxInstr = pTb->cInstructions;
1417 pCall->uUnused0 = 0;
1418 pCall->offOpcode = 0;
1419 pCall->cbOpcode = 0;
1420 pCall->idxRange = 0;
1421 pCall->auParams[0] = 0;
1422 pCall->auParams[1] = 0;
1423 pCall->auParams[2] = 0;
1424
1425 /*
1426 * Check for deliverable IRQs and pending force flags.
1427 */
1428 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
1429}
1430
1431
1432/**
1433 * Compiles a new TB and executes it.
1434 *
1435 * We combine compilation and execution here as it makes it simpler code flow
1436 * in the main loop and it allows interpreting while compiling if we want to
1437 * explore that option.
1438 *
1439 * @returns Strict VBox status code.
1440 * @param pVM The cross context virtual machine structure.
1441 * @param pVCpu The cross context virtual CPU structure of the calling
1442 * thread.
1443 * @param GCPhysPc The physical address corresponding to the current
1444 * RIP+CS.BASE.
1445 * @param fExtraFlags Extra translation block flags: IEMTB_F_TYPE_THREADED and
1446 * maybe IEMTB_F_RIP_CHECKS.
1447 */
1448static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
1449{
1450 /*
1451 * Get the TB we use for the recompiling. This is a maxed-out TB so
1452 * that'll we'll make a more efficient copy of when we're done compiling.
1453 */
1454 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
1455 if (pTb)
1456 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags | IEMTB_F_STATE_COMPILING);
1457 else
1458 {
1459 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags | IEMTB_F_STATE_COMPILING);
1460 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
1461 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
1462 }
1463
1464 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
1465 functions may get at it. */
1466 pVCpu->iem.s.pCurTbR3 = pTb;
1467
1468#if 0
1469 /* Make sure the CheckIrq condition matches the one in EM. */
1470 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
1471 const uint32_t cZeroCalls = 1;
1472#else
1473 const uint32_t cZeroCalls = 0;
1474#endif
1475
1476 /*
1477 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
1478 */
1479 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
1480 iemThreadedCompileInitOpcodeFetching(pVCpu);
1481 VBOXSTRICTRC rcStrict;
1482 for (;;)
1483 {
1484 /* Process the next instruction. */
1485#ifdef LOG_ENABLED
1486 iemThreadedLogCurInstr(pVCpu, "CC");
1487 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
1488 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
1489#endif
1490 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
1491 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
1492
1493 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
1494 if ( rcStrict == VINF_SUCCESS
1495 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
1496 && !pVCpu->iem.s.fEndTb)
1497 {
1498 Assert(pTb->Thrd.cCalls > cCallsPrev);
1499 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
1500
1501 pVCpu->iem.s.cInstructions++;
1502 }
1503 else
1504 {
1505 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
1506 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
1507 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
1508 rcStrict = VINF_SUCCESS;
1509
1510 if (pTb->Thrd.cCalls > cZeroCalls)
1511 {
1512 if (cCallsPrev != pTb->Thrd.cCalls)
1513 pVCpu->iem.s.cInstructions++;
1514 break;
1515 }
1516
1517 pVCpu->iem.s.pCurTbR3 = NULL;
1518 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1519 }
1520
1521 /* Check for IRQs? */
1522 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
1523 pVCpu->iem.s.cInstrTillIrqCheck--;
1524 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
1525 break;
1526
1527 /* Still space in the TB? */
1528 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
1529 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)
1530 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
1531 else
1532 {
1533 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
1534 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
1535 break;
1536 }
1537 iemThreadedCompileReInitOpcodeFetching(pVCpu);
1538 }
1539
1540 /*
1541 * Duplicate the TB into a completed one and link it.
1542 */
1543 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
1544 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
1545
1546 iemThreadedTbAdd(pVM, pVCpu, pTb);
1547
1548#ifdef IEM_COMPILE_ONLY_MODE
1549 /*
1550 * Execute the translation block.
1551 */
1552#endif
1553
1554 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1555}
1556
1557
1558/**
1559 * Executes a translation block.
1560 *
1561 * @returns Strict VBox status code.
1562 * @param pVCpu The cross context virtual CPU structure of the calling
1563 * thread.
1564 * @param pTb The translation block to execute.
1565 */
1566static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
1567{
1568 /* Check the opcodes in the first page before starting execution. */
1569 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
1570 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
1571 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
1572 { /* likely */ }
1573 else
1574 {
1575 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
1576 iemThreadedTbFree(pVCpu->pVMR3, pVCpu, pTb);
1577 return VINF_SUCCESS;
1578 }
1579
1580 /* Set the current TB so CIMPL function may get at it. */
1581 pVCpu->iem.s.pCurTbR3 = pTb;
1582 pVCpu->iem.s.cTbExec++;
1583
1584 /* The execution loop. */
1585#ifdef LOG_ENABLED
1586 uint64_t uRipPrev = UINT64_MAX;
1587#endif
1588 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
1589 uint32_t cCallsLeft = pTb->Thrd.cCalls;
1590 while (cCallsLeft-- > 0)
1591 {
1592#ifdef LOG_ENABLED
1593 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
1594 {
1595 uRipPrev = pVCpu->cpum.GstCtx.rip;
1596 iemThreadedLogCurInstr(pVCpu, "EX");
1597 }
1598 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1599 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
1600 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
1601#endif
1602 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
1603 pCallEntry->auParams[0],
1604 pCallEntry->auParams[1],
1605 pCallEntry->auParams[2]);
1606 if (RT_LIKELY( rcStrict == VINF_SUCCESS
1607 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
1608 pCallEntry++;
1609 else
1610 {
1611 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
1612 pVCpu->iem.s.pCurTbR3 = NULL;
1613 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
1614
1615 /* Some status codes are just to get us out of this loop and
1616 continue in a different translation block. */
1617 if (rcStrict == VINF_IEM_REEXEC_BREAK)
1618 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
1619 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1620 }
1621 }
1622
1623 pVCpu->iem.s.cInstructions += pTb->cInstructions;
1624 pVCpu->iem.s.pCurTbR3 = NULL;
1625 return VINF_SUCCESS;
1626}
1627
1628
1629/**
1630 * This is called when the PC doesn't match the current pbInstrBuf.
1631 *
1632 * Upon return, we're ready for opcode fetching. But please note that
1633 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
1634 * MMIO or unassigned).
1635 */
1636static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
1637{
1638 pVCpu->iem.s.pbInstrBuf = NULL;
1639 pVCpu->iem.s.offCurInstrStart = 0;
1640 pVCpu->iem.s.offInstrNextByte = 0;
1641 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
1642 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
1643}
1644
1645
1646/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
1647DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
1648{
1649 /*
1650 * Set uCurTbStartPc to RIP and calc the effective PC.
1651 */
1652 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
1653 pVCpu->iem.s.uCurTbStartPc = uPc;
1654 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1655 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
1656
1657 /*
1658 * Advance within the current buffer (PAGE) when possible.
1659 */
1660 if (pVCpu->iem.s.pbInstrBuf)
1661 {
1662 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
1663 if (off < pVCpu->iem.s.cbInstrBufTotal)
1664 {
1665 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1666 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1667 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1668 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1669 else
1670 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1671
1672 return pVCpu->iem.s.GCPhysInstrBuf + off;
1673 }
1674 }
1675 return iemGetPcWithPhysAndCodeMissed(pVCpu);
1676}
1677
1678
1679/**
1680 * Determines the extra IEMTB_F_XXX flags.
1681 *
1682 * @returns IEMTB_F_TYPE_THREADED and maybe IEMTB_F_RIP_CHECKS.
1683 * @param pVCpu The cross context virtual CPU structure of the calling
1684 * thread.
1685 */
1686DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
1687{
1688 uint32_t fRet = IEMTB_F_TYPE_THREADED;
1689
1690 /*
1691 * Determine the inhibit bits.
1692 */
1693 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
1694 { /* typical */ }
1695 else
1696 {
1697 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
1698 fRet |= IEMTB_F_INHIBIT_SHADOW;
1699 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
1700 fRet |= IEMTB_F_INHIBIT_NMI;
1701 }
1702
1703 /*
1704 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
1705 * likely to go invalid before the end of the translation block.
1706 */
1707 if (IEM_IS_64BIT_CODE(pVCpu))
1708 return fRet;
1709
1710 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1711 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1712 return fRet;
1713 return fRet | IEMTB_F_CS_LIM_CHECKS;
1714}
1715
1716
1717VMMDECL(VBOXSTRICTRC) IEMExecRecompilerThreaded(PVMCC pVM, PVMCPUCC pVCpu)
1718{
1719 /*
1720 * See if there is an interrupt pending in TRPM, inject it if we can.
1721 */
1722 if (!TRPMHasTrap(pVCpu))
1723 { /* likely */ }
1724 else
1725 {
1726 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
1727 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
1728 { /*likely */ }
1729 else
1730 return rcStrict;
1731 }
1732
1733 /*
1734 * Init the execution environment.
1735 */
1736 iemInitExec(pVCpu, 0 /*fExecOpts*/);
1737
1738 /*
1739 * Run-loop.
1740 *
1741 * If we're using setjmp/longjmp we combine all the catching here to avoid
1742 * having to call setjmp for each block we're executing.
1743 */
1744 for (;;)
1745 {
1746 PIEMTB pTb = NULL;
1747 VBOXSTRICTRC rcStrict;
1748 IEM_TRY_SETJMP(pVCpu, rcStrict)
1749 {
1750 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
1751 for (uint32_t iIterations = 0; ; iIterations++)
1752 {
1753 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
1754 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
1755 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
1756
1757 pTb = iemThreadedTbLookup(pVM, pVCpu, GCPhysPc, fExtraFlags);
1758 if (pTb)
1759 rcStrict = iemThreadedTbExec(pVCpu, pTb);
1760 else
1761 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
1762 if (rcStrict == VINF_SUCCESS)
1763 {
1764 Assert(pVCpu->iem.s.cActiveMappings == 0);
1765
1766 uint64_t fCpu = pVCpu->fLocalForcedActions;
1767 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
1768 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
1769 | VMCPU_FF_TLB_FLUSH
1770 | VMCPU_FF_UNHALT );
1771 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
1772 if (RT_LIKELY( ( !fCpu
1773 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1774 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
1775 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
1776 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
1777 {
1778 if (RT_LIKELY( (iIterations & cPollRate) != 0
1779 || !TMTimerPollBool(pVM, pVCpu)))
1780 {
1781
1782 }
1783 else
1784 return VINF_SUCCESS;
1785 }
1786 else
1787 return VINF_SUCCESS;
1788 }
1789 else
1790 return rcStrict;
1791 }
1792 }
1793 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
1794 {
1795 pVCpu->iem.s.cLongJumps++;
1796 if (pVCpu->iem.s.cActiveMappings > 0)
1797 iemMemRollback(pVCpu);
1798
1799 /* If pTb isn't NULL we're in iemThreadedTbExec. */
1800 if (!pTb)
1801 {
1802 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
1803 pTb = pVCpu->iem.s.pCurTbR3;
1804 if (pTb)
1805 {
1806 /* If the pCurTbR3 block is in compiling state, we're in iemThreadedCompile,
1807 otherwise it's iemThreadedTbExec inside iemThreadedCompile (compile option). */
1808 if ((pTb->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_COMPILING)
1809 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
1810 }
1811 }
1812 return rcStrict;
1813 }
1814 IEM_CATCH_LONGJMP_END(pVCpu);
1815 }
1816}
1817
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette