VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 47148

Last change on this file since 47148 was 47138, checked in by vboxsync, 12 years ago

IEM: idiv and div missing bits and fixes.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 74.3 KB
Line 
1; $Id: IEMAllAImpl.asm 47138 2013-07-14 18:05:53Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2012 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.215389.xyz. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %else ; stub it for now - later, replace with hand coded stuff.
358BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359 int3
360 ret
361ENDPROC iemAImpl_ %+ %1 %+ _u64
362 %endif ; !RT_ARCH_AMD64
363
364 %if %2 != 0 ; locked versions requested?
365
366BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367 PROLOGUE_3_ARGS
368 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369 lock %1 byte [A0], A1_8
370 IEM_SAVE_FLAGS A2, %3, %4
371 EPILOGUE_3_ARGS
372ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375 PROLOGUE_3_ARGS
376 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377 lock %1 word [A0], A1_16
378 IEM_SAVE_FLAGS A2, %3, %4
379 EPILOGUE_3_ARGS
380ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383 PROLOGUE_3_ARGS
384 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385 lock %1 dword [A0], A1_32
386 IEM_SAVE_FLAGS A2, %3, %4
387 EPILOGUE_3_ARGS
388ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390 %ifdef RT_ARCH_AMD64
391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392 PROLOGUE_3_ARGS
393 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394 lock %1 qword [A0], A1
395 IEM_SAVE_FLAGS A2, %3, %4
396 EPILOGUE_3_ARGS_EX 8
397ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398 %else ; stub it for now - later, replace with hand coded stuff.
399BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400 int3
401 ret 8
402ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403 %endif ; !RT_ARCH_AMD64
404 %endif ; locked
405%endmacro
406
407; instr,lock,modified-flags.
408IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
409IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
410IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
411IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
412IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
413IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
414IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
415IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
416IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
417
418
419;;
420; Macro for implementing a bit operator.
421;
422; This will generate code for the 16, 32 and 64 bit accesses with locked
423; variants, except on 32-bit system where the 64-bit accesses requires hand
424; coding.
425;
426; All the functions takes a pointer to the destination memory operand in A0,
427; the source register operand in A1 and a pointer to eflags in A2.
428;
429; @param 1 The instruction mnemonic.
430; @param 2 Non-zero if there should be a locked version.
431; @param 3 The modified flags.
432; @param 4 The undefined flags.
433;
434%macro IEMIMPL_BIT_OP 4
435BEGINCODE
436BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437 PROLOGUE_3_ARGS
438 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439 %1 word [A0], A1_16
440 IEM_SAVE_FLAGS A2, %3, %4
441 EPILOGUE_3_ARGS
442ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 dword [A0], A1_32
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS
450ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452 %ifdef RT_ARCH_AMD64
453BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454 PROLOGUE_3_ARGS
455 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456 %1 qword [A0], A1
457 IEM_SAVE_FLAGS A2, %3, %4
458 EPILOGUE_3_ARGS_EX 8
459ENDPROC iemAImpl_ %+ %1 %+ _u64
460 %else ; stub it for now - later, replace with hand coded stuff.
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462 int3
463 ret 8
464ENDPROC iemAImpl_ %+ %1 %+ _u64
465 %endif ; !RT_ARCH_AMD64
466
467 %if %2 != 0 ; locked versions requested?
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 lock %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 lock %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 lock %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493 %else ; stub it for now - later, replace with hand coded stuff.
494BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495 int3
496 ret 8
497ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498 %endif ; !RT_ARCH_AMD64
499 %endif ; locked
500%endmacro
501IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
502IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
503IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
504IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
505
506;;
507; Macro for implementing a bit search operator.
508;
509; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510; system where the 64-bit accesses requires hand coding.
511;
512; All the functions takes a pointer to the destination memory operand in A0,
513; the source register operand in A1 and a pointer to eflags in A2.
514;
515; @param 1 The instruction mnemonic.
516; @param 2 The modified flags.
517; @param 3 The undefined flags.
518;
519%macro IEMIMPL_BIT_OP 3
520BEGINCODE
521BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522 PROLOGUE_3_ARGS
523 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524 %1 T0_16, A1_16
525 jz .unchanged_dst
526 mov [A0], T0_16
527.unchanged_dst:
528 IEM_SAVE_FLAGS A2, %2, %3
529 EPILOGUE_3_ARGS
530ENDPROC iemAImpl_ %+ %1 %+ _u16
531
532BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
533 PROLOGUE_3_ARGS
534 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
535 %1 T0_32, A1_32
536 jz .unchanged_dst
537 mov [A0], T0_32
538.unchanged_dst:
539 IEM_SAVE_FLAGS A2, %2, %3
540 EPILOGUE_3_ARGS
541ENDPROC iemAImpl_ %+ %1 %+ _u32
542
543 %ifdef RT_ARCH_AMD64
544BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
545 PROLOGUE_3_ARGS
546 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
547 %1 T0, A1
548 jz .unchanged_dst
549 mov [A0], T0
550.unchanged_dst:
551 IEM_SAVE_FLAGS A2, %2, %3
552 EPILOGUE_3_ARGS_EX 8
553ENDPROC iemAImpl_ %+ %1 %+ _u64
554 %else ; stub it for now - later, replace with hand coded stuff.
555BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
556 int3
557 ret 8
558ENDPROC iemAImpl_ %+ %1 %+ _u64
559 %endif ; !RT_ARCH_AMD64
560%endmacro
561IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
562IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
563
564
565;
566; IMUL is also a similar but yet different case (no lock, no mem dst).
567; The rDX:rAX variant of imul is handled together with mul further down.
568;
569BEGINCODE
570BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
573 imul A1_16, word [A0]
574 mov [A0], A1_16
575 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
576 EPILOGUE_3_ARGS
577ENDPROC iemAImpl_imul_two_u16
578
579BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
582 imul A1_32, dword [A0]
583 mov [A0], A1_32
584 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
585 EPILOGUE_3_ARGS
586ENDPROC iemAImpl_imul_two_u32
587
588BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
589 PROLOGUE_3_ARGS
590%ifdef RT_ARCH_AMD64
591 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
592 imul A1, qword [A0]
593 mov [A0], A1
594 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
595%else
596 int3 ;; @todo implement me
597%endif
598 EPILOGUE_3_ARGS_EX 8
599ENDPROC iemAImpl_imul_two_u64
600
601
602;
603; XCHG for memory operands. This implies locking. No flag changes.
604;
605; Each function takes two arguments, first the pointer to the memory,
606; then the pointer to the register. They all return void.
607;
608BEGINCODE
609BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
610 PROLOGUE_2_ARGS
611 mov T0_8, [A1]
612 xchg [A0], T0_8
613 mov [A1], T0_8
614 EPILOGUE_2_ARGS
615ENDPROC iemAImpl_xchg_u8
616
617BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
618 PROLOGUE_2_ARGS
619 mov T0_16, [A1]
620 xchg [A0], T0_16
621 mov [A1], T0_16
622 EPILOGUE_2_ARGS
623ENDPROC iemAImpl_xchg_u16
624
625BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
626 PROLOGUE_2_ARGS
627 mov T0_32, [A1]
628 xchg [A0], T0_32
629 mov [A1], T0_32
630 EPILOGUE_2_ARGS
631ENDPROC iemAImpl_xchg_u32
632
633BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
634%ifdef RT_ARCH_AMD64
635 PROLOGUE_2_ARGS
636 mov T0, [A1]
637 xchg [A0], T0
638 mov [A1], T0
639 EPILOGUE_2_ARGS
640%else
641 int3
642 ret 0
643%endif
644ENDPROC iemAImpl_xchg_u64
645
646
647;
648; XADD for memory operands.
649;
650; Each function takes three arguments, first the pointer to the
651; memory/register, then the pointer to the register, and finally a pointer to
652; eflags. They all return void.
653;
654BEGINCODE
655BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
656 PROLOGUE_3_ARGS
657 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
658 mov T0_8, [A1]
659 xadd [A0], T0_8
660 mov [A1], T0_8
661 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
662 EPILOGUE_3_ARGS
663ENDPROC iemAImpl_xadd_u8
664
665BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
666 PROLOGUE_3_ARGS
667 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
668 mov T0_16, [A1]
669 xadd [A0], T0_16
670 mov [A1], T0_16
671 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
672 EPILOGUE_3_ARGS
673ENDPROC iemAImpl_xadd_u16
674
675BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
676 PROLOGUE_3_ARGS
677 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
678 mov T0_32, [A1]
679 xadd [A0], T0_32
680 mov [A1], T0_32
681 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
682 EPILOGUE_3_ARGS
683ENDPROC iemAImpl_xadd_u32
684
685BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
686%ifdef RT_ARCH_AMD64
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0, [A1]
690 xadd [A0], T0
691 mov [A1], T0
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694%else
695 int3
696 ret 4
697%endif
698ENDPROC iemAImpl_xadd_u64
699
700BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
701 PROLOGUE_3_ARGS
702 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
703 mov T0_8, [A1]
704 lock xadd [A0], T0_8
705 mov [A1], T0_8
706 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_xadd_u8_locked
709
710BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
711 PROLOGUE_3_ARGS
712 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
713 mov T0_16, [A1]
714 lock xadd [A0], T0_16
715 mov [A1], T0_16
716 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
717 EPILOGUE_3_ARGS
718ENDPROC iemAImpl_xadd_u16_locked
719
720BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
721 PROLOGUE_3_ARGS
722 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
723 mov T0_32, [A1]
724 lock xadd [A0], T0_32
725 mov [A1], T0_32
726 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
727 EPILOGUE_3_ARGS
728ENDPROC iemAImpl_xadd_u32_locked
729
730BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
731%ifdef RT_ARCH_AMD64
732 PROLOGUE_3_ARGS
733 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
734 mov T0, [A1]
735 lock xadd [A0], T0
736 mov [A1], T0
737 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
738 EPILOGUE_3_ARGS
739%else
740 int3
741 ret 4
742%endif
743ENDPROC iemAImpl_xadd_u64_locked
744
745
746;
747; CMPXCHG8B.
748;
749; These are tricky register wise, so the code is duplicated for each calling
750; convention.
751;
752; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
753;
754; C-proto:
755; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
756; uint32_t *pEFlags));
757;
758BEGINCODE
759BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
760%ifdef RT_ARCH_AMD64
761 %ifdef ASM_CALL64_MSC
762 push rbx
763
764 mov r11, rdx ; pu64EaxEdx (is also T1)
765 mov r10, rcx ; pu64Dst
766
767 mov ebx, [r8]
768 mov ecx, [r8 + 4]
769 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
770 mov eax, [r11]
771 mov edx, [r11 + 4]
772
773 lock cmpxchg8b [r10]
774
775 mov [r11], eax
776 mov [r11 + 4], edx
777 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
778
779 pop rbx
780 ret
781 %else
782 push rbx
783
784 mov r10, rcx ; pEFlags
785 mov r11, rdx ; pu64EbxEcx (is also T1)
786
787 mov ebx, [r11]
788 mov ecx, [r11 + 4]
789 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
790 mov eax, [rsi]
791 mov edx, [rsi + 4]
792
793 lock cmpxchg8b [rdi]
794
795 mov [rsi], eax
796 mov [rsi + 4], edx
797 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
798
799 pop rbx
800 ret
801
802 %endif
803%else
804 push esi
805 push edi
806 push ebx
807 push ebp
808
809 mov edi, ecx ; pu64Dst
810 mov esi, edx ; pu64EaxEdx
811 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
812 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
813
814 mov ebx, [ecx]
815 mov ecx, [ecx + 4]
816 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
817 mov eax, [esi]
818 mov edx, [esi + 4]
819
820 lock cmpxchg8b [edi]
821
822 mov [esi], eax
823 mov [esi + 4], edx
824 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
825
826 pop ebp
827 pop ebx
828 pop edi
829 pop esi
830 ret 8
831%endif
832ENDPROC iemAImpl_cmpxchg8b
833
834BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
835 ; Lazy bird always lock prefixes cmpxchg8b.
836 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
837ENDPROC iemAImpl_cmpxchg8b_locked
838
839
840
841;
842; CMPXCHG.
843;
844; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
845;
846; C-proto:
847; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
848;
849BEGINCODE
850%macro IEMIMPL_CMPXCHG 2
851BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
852 PROLOGUE_4_ARGS
853 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
854 mov al, [A1]
855 %1 cmpxchg [A0], A2_8
856 mov [A1], al
857 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
858 EPILOGUE_4_ARGS
859ENDPROC iemAImpl_cmpxchg_u8 %+ %2
860
861BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
862 PROLOGUE_4_ARGS
863 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
864 mov ax, [A1]
865 %1 cmpxchg [A0], A2_16
866 mov [A1], ax
867 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
868 EPILOGUE_4_ARGS
869ENDPROC iemAImpl_cmpxchg_u16 %+ %2
870
871BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
872 PROLOGUE_4_ARGS
873 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
874 mov eax, [A1]
875 %1 cmpxchg [A0], A2_32
876 mov [A1], eax
877 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
878 EPILOGUE_4_ARGS
879ENDPROC iemAImpl_cmpxchg_u32 %+ %2
880
881BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
882%ifdef RT_ARCH_AMD64
883 PROLOGUE_4_ARGS
884 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
885 mov ax, [A1]
886 %1 cmpxchg [A0], A2
887 mov [A1], ax
888 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
889 EPILOGUE_4_ARGS
890%else
891 ;
892 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
893 ;
894 push esi
895 push edi
896 push ebx
897 push ebp
898
899 mov edi, ecx ; pu64Dst
900 mov esi, edx ; pu64Rax
901 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
902 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
903
904 mov ebx, [ecx]
905 mov ecx, [ecx + 4]
906 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
907 mov eax, [esi]
908 mov edx, [esi + 4]
909
910 lock cmpxchg8b [edi]
911
912 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
913 jz .cmpxchg8b_not_equal
914 cmp eax, eax ; just set the other flags.
915.store:
916 mov [esi], eax
917 mov [esi + 4], edx
918 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
919
920 pop ebp
921 pop ebx
922 pop edi
923 pop esi
924 ret 8
925
926.cmpxchg8b_not_equal:
927 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
928 jne .store
929 cmp [esi], eax
930 jmp .store
931
932%endif
933ENDPROC iemAImpl_cmpxchg_u64 %+ %2
934%endmacro ; IEMIMPL_CMPXCHG
935
936IEMIMPL_CMPXCHG , ,
937IEMIMPL_CMPXCHG lock, _locked
938
939;;
940; Macro for implementing a unary operator.
941;
942; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
943; variants, except on 32-bit system where the 64-bit accesses requires hand
944; coding.
945;
946; All the functions takes a pointer to the destination memory operand in A0,
947; the source register operand in A1 and a pointer to eflags in A2.
948;
949; @param 1 The instruction mnemonic.
950; @param 2 The modified flags.
951; @param 3 The undefined flags.
952;
953%macro IEMIMPL_UNARY_OP 3
954BEGINCODE
955BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
956 PROLOGUE_2_ARGS
957 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
958 %1 byte [A0]
959 IEM_SAVE_FLAGS A1, %2, %3
960 EPILOGUE_2_ARGS
961ENDPROC iemAImpl_ %+ %1 %+ _u8
962
963BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
964 PROLOGUE_2_ARGS
965 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
966 lock %1 byte [A0]
967 IEM_SAVE_FLAGS A1, %2, %3
968 EPILOGUE_2_ARGS
969ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
970
971BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
972 PROLOGUE_2_ARGS
973 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
974 %1 word [A0]
975 IEM_SAVE_FLAGS A1, %2, %3
976 EPILOGUE_2_ARGS
977ENDPROC iemAImpl_ %+ %1 %+ _u16
978
979BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
980 PROLOGUE_2_ARGS
981 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
982 lock %1 word [A0]
983 IEM_SAVE_FLAGS A1, %2, %3
984 EPILOGUE_2_ARGS
985ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
986
987BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
988 PROLOGUE_2_ARGS
989 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
990 %1 dword [A0]
991 IEM_SAVE_FLAGS A1, %2, %3
992 EPILOGUE_2_ARGS
993ENDPROC iemAImpl_ %+ %1 %+ _u32
994
995BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
996 PROLOGUE_2_ARGS
997 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
998 lock %1 dword [A0]
999 IEM_SAVE_FLAGS A1, %2, %3
1000 EPILOGUE_2_ARGS
1001ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1002
1003 %ifdef RT_ARCH_AMD64
1004BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1005 PROLOGUE_2_ARGS
1006 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1007 %1 qword [A0]
1008 IEM_SAVE_FLAGS A1, %2, %3
1009 EPILOGUE_2_ARGS
1010ENDPROC iemAImpl_ %+ %1 %+ _u64
1011
1012BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1013 PROLOGUE_2_ARGS
1014 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1015 lock %1 qword [A0]
1016 IEM_SAVE_FLAGS A1, %2, %3
1017 EPILOGUE_2_ARGS
1018ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1019 %else
1020 ; stub them for now.
1021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1022 int3
1023 ret 0
1024ENDPROC iemAImpl_ %+ %1 %+ _u64
1025BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1026 int3
1027 ret 0
1028ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1029 %endif
1030
1031%endmacro
1032
1033IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1034IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1035IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1036IEMIMPL_UNARY_OP not, 0, 0
1037
1038
1039
1040;;
1041; Macro for implementing a shift operation.
1042;
1043; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1044; 32-bit system where the 64-bit accesses requires hand coding.
1045;
1046; All the functions takes a pointer to the destination memory operand in A0,
1047; the shift count in A1 and a pointer to eflags in A2.
1048;
1049; @param 1 The instruction mnemonic.
1050; @param 2 The modified flags.
1051; @param 3 The undefined flags.
1052;
1053; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1054;
1055%macro IEMIMPL_SHIFT_OP 3
1056BEGINCODE
1057BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1058 PROLOGUE_3_ARGS
1059 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1060 %ifdef ASM_CALL64_GCC
1061 mov cl, A1_8
1062 %1 byte [A0], cl
1063 %else
1064 xchg A1, A0
1065 %1 byte [A1], cl
1066 %endif
1067 IEM_SAVE_FLAGS A2, %2, %3
1068 EPILOGUE_3_ARGS
1069ENDPROC iemAImpl_ %+ %1 %+ _u8
1070
1071BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1072 PROLOGUE_3_ARGS
1073 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1074 %ifdef ASM_CALL64_GCC
1075 mov cl, A1_8
1076 %1 word [A0], cl
1077 %else
1078 xchg A1, A0
1079 %1 word [A1], cl
1080 %endif
1081 IEM_SAVE_FLAGS A2, %2, %3
1082 EPILOGUE_3_ARGS
1083ENDPROC iemAImpl_ %+ %1 %+ _u16
1084
1085BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1086 PROLOGUE_3_ARGS
1087 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1088 %ifdef ASM_CALL64_GCC
1089 mov cl, A1_8
1090 %1 dword [A0], cl
1091 %else
1092 xchg A1, A0
1093 %1 dword [A1], cl
1094 %endif
1095 IEM_SAVE_FLAGS A2, %2, %3
1096 EPILOGUE_3_ARGS
1097ENDPROC iemAImpl_ %+ %1 %+ _u32
1098
1099 %ifdef RT_ARCH_AMD64
1100BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1101 PROLOGUE_3_ARGS
1102 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1103 %ifdef ASM_CALL64_GCC
1104 mov cl, A1_8
1105 %1 qword [A0], cl
1106 %else
1107 xchg A1, A0
1108 %1 qword [A1], cl
1109 %endif
1110 IEM_SAVE_FLAGS A2, %2, %3
1111 EPILOGUE_3_ARGS
1112ENDPROC iemAImpl_ %+ %1 %+ _u64
1113 %else ; stub it for now - later, replace with hand coded stuff.
1114BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1115 int3
1116 ret 4
1117ENDPROC iemAImpl_ %+ %1 %+ _u64
1118 %endif ; !RT_ARCH_AMD64
1119
1120%endmacro
1121
1122IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1123IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1124IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1125IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1126IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1127IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1128IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1129
1130
1131;;
1132; Macro for implementing a double precision shift operation.
1133;
1134; This will generate code for the 16, 32 and 64 bit accesses, except on
1135; 32-bit system where the 64-bit accesses requires hand coding.
1136;
1137; The functions takes the destination operand (r/m) in A0, the source (reg) in
1138; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1139;
1140; @param 1 The instruction mnemonic.
1141; @param 2 The modified flags.
1142; @param 3 The undefined flags.
1143;
1144; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1145;
1146%macro IEMIMPL_SHIFT_DBL_OP 3
1147BEGINCODE
1148BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1149 PROLOGUE_4_ARGS
1150 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1151 %ifdef ASM_CALL64_GCC
1152 xchg A3, A2
1153 %1 [A0], A1_16, cl
1154 xchg A3, A2
1155 %else
1156 xchg A0, A2
1157 %1 [A2], A1_16, cl
1158 %endif
1159 IEM_SAVE_FLAGS A3, %2, %3
1160 EPILOGUE_4_ARGS
1161ENDPROC iemAImpl_ %+ %1 %+ _u16
1162
1163BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1164 PROLOGUE_4_ARGS
1165 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1166 %ifdef ASM_CALL64_GCC
1167 xchg A3, A2
1168 %1 [A0], A1_32, cl
1169 xchg A3, A2
1170 %else
1171 xchg A0, A2
1172 %1 [A2], A1_32, cl
1173 %endif
1174 IEM_SAVE_FLAGS A3, %2, %3
1175 EPILOGUE_4_ARGS
1176ENDPROC iemAImpl_ %+ %1 %+ _u32
1177
1178 %ifdef RT_ARCH_AMD64
1179BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1180 PROLOGUE_4_ARGS
1181 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1182 %ifdef ASM_CALL64_GCC
1183 xchg A3, A2
1184 %1 [A0], A1, cl
1185 xchg A3, A2
1186 %else
1187 xchg A0, A2
1188 %1 [A2], A1, cl
1189 %endif
1190 IEM_SAVE_FLAGS A3, %2, %3
1191 EPILOGUE_4_ARGS_EX 12
1192ENDPROC iemAImpl_ %+ %1 %+ _u64
1193 %else ; stub it for now - later, replace with hand coded stuff.
1194BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1195 int3
1196 ret 12
1197ENDPROC iemAImpl_ %+ %1 %+ _u64
1198 %endif ; !RT_ARCH_AMD64
1199
1200%endmacro
1201
1202IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1203IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1204
1205
1206;;
1207; Macro for implementing a multiplication operations.
1208;
1209; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1210; 32-bit system where the 64-bit accesses requires hand coding.
1211;
1212; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1213; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1214; pointer to eflags in A3.
1215;
1216; The functions all return 0 so the caller can be used for div/idiv as well as
1217; for the mul/imul implementation.
1218;
1219; @param 1 The instruction mnemonic.
1220; @param 2 The modified flags.
1221; @param 3 The undefined flags.
1222;
1223; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1224;
1225%macro IEMIMPL_MUL_OP 3
1226BEGINCODE
1227BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1228 PROLOGUE_3_ARGS
1229 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1230 mov al, [A0]
1231 %1 A1_8
1232 mov [A0], ax
1233 IEM_SAVE_FLAGS A2, %2, %3
1234 xor eax, eax
1235 EPILOGUE_3_ARGS
1236ENDPROC iemAImpl_ %+ %1 %+ _u8
1237
1238BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1239 PROLOGUE_4_ARGS
1240 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1241 mov ax, [A0]
1242 %ifdef ASM_CALL64_GCC
1243 %1 A2_16
1244 mov [A0], ax
1245 mov [A1], dx
1246 %else
1247 mov T1, A1
1248 %1 A2_16
1249 mov [A0], ax
1250 mov [T1], dx
1251 %endif
1252 IEM_SAVE_FLAGS A3, %2, %3
1253 xor eax, eax
1254 EPILOGUE_4_ARGS
1255ENDPROC iemAImpl_ %+ %1 %+ _u16
1256
1257BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1258 PROLOGUE_4_ARGS
1259 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1260 mov eax, [A0]
1261 %ifdef ASM_CALL64_GCC
1262 %1 A2_32
1263 mov [A0], eax
1264 mov [A1], edx
1265 %else
1266 mov T1, A1
1267 %1 A2_32
1268 mov [A0], eax
1269 mov [T1], edx
1270 %endif
1271 IEM_SAVE_FLAGS A3, %2, %3
1272 xor eax, eax
1273 EPILOGUE_4_ARGS
1274ENDPROC iemAImpl_ %+ %1 %+ _u32
1275
1276 %ifdef RT_ARCH_AMD64
1277BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1278 PROLOGUE_4_ARGS
1279 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1280 mov rax, [A0]
1281 %ifdef ASM_CALL64_GCC
1282 %1 A2
1283 mov [A0], rax
1284 mov [A1], rdx
1285 %else
1286 mov T1, A1
1287 %1 A2
1288 mov [A0], rax
1289 mov [T1], rdx
1290 %endif
1291 IEM_SAVE_FLAGS A3, %2, %3
1292 xor eax, eax
1293 EPILOGUE_4_ARGS_EX 12
1294ENDPROC iemAImpl_ %+ %1 %+ _u64
1295 %else ; stub it for now - later, replace with hand coded stuff.
1296BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1297 int3
1298 ret 12
1299ENDPROC iemAImpl_ %+ %1 %+ _u64
1300 %endif ; !RT_ARCH_AMD64
1301
1302%endmacro
1303
1304IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1305IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1306
1307
1308;;
1309; Macro for implementing a division operations.
1310;
1311; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1312; 32-bit system where the 64-bit accesses requires hand coding.
1313;
1314; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1315; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1316; pointer to eflags in A3.
1317;
1318; The functions all return 0 on success and -1 if a divide error should be
1319; raised by the caller.
1320;
1321; @param 1 The instruction mnemonic.
1322; @param 2 The modified flags.
1323; @param 3 The undefined flags.
1324; @param 4 1 if signed, 0 if unsigned.
1325;
1326; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1327;
1328%macro IEMIMPL_DIV_OP 4
1329BEGINCODE
1330BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1331 PROLOGUE_3_ARGS
1332
1333 ; div by chainsaw check.
1334 test A1_8, A1_8
1335 jz .div_zero
1336
1337 ; Overflow check - unsigned division is simple to verify, haven't
1338 ; found a simple way to check signed division yet unfortunately.
1339 %if %4 == 0
1340 cmp [A0 + 1], A1_8
1341 jae .div_overflow
1342 %else
1343 mov T0_16, [A0] ; T0 = dividend
1344 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1345 test A1_8, A1_8
1346 js .divisor_negative
1347 test T0_16, T0_16
1348 jns .both_positive
1349 neg T0_16
1350.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1351 push T0 ; Start off like unsigned below.
1352 shr T0_16, 7
1353 cmp T0_8, A1_8
1354 pop T0
1355 jb .div_no_overflow
1356 ja .div_overflow
1357 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1358 cmp T0_8, A1_8
1359 jae .div_overflow
1360 jmp .div_no_overflow
1361
1362.divisor_negative:
1363 neg A1_8
1364 test T0_16, T0_16
1365 jns .one_of_each
1366 neg T0_16
1367.both_positive: ; Same as unsigned shifted by sign indicator bit.
1368 shr T0_16, 7
1369 cmp T0_8, A1_8
1370 jae .div_overflow
1371.div_no_overflow:
1372 mov A1, T1 ; restore divisor
1373 %endif
1374
1375 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1376 mov ax, [A0]
1377 %1 A1_8
1378 mov [A0], ax
1379 IEM_SAVE_FLAGS A2, %2, %3
1380 xor eax, eax
1381
1382.return:
1383 EPILOGUE_3_ARGS
1384
1385.div_zero:
1386.div_overflow:
1387 mov eax, -1
1388 jmp .return
1389ENDPROC iemAImpl_ %+ %1 %+ _u8
1390
1391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1392 PROLOGUE_4_ARGS
1393
1394 ; div by chainsaw check.
1395 test A2_16, A2_16
1396 jz .div_zero
1397
1398 ; Overflow check - unsigned division is simple to verify, haven't
1399 ; found a simple way to check signed division yet unfortunately.
1400 %if %4 == 0
1401 cmp [A1], A2_16
1402 jae .div_overflow
1403 %else
1404 mov T0_16, [A1]
1405 shl T0_32, 16
1406 mov T0_16, [A0] ; T0 = dividend
1407 mov T1, A2 ; T1 = divisor
1408 test T1_16, T1_16
1409 js .divisor_negative
1410 test T0_32, T0_32
1411 jns .both_positive
1412 neg T0_32
1413.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1414 push T0 ; Start off like unsigned below.
1415 shr T0_32, 15
1416 cmp T0_16, T1_16
1417 pop T0
1418 jb .div_no_overflow
1419 ja .div_overflow
1420 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1421 cmp T0_16, T1_16
1422 jae .div_overflow
1423 jmp .div_no_overflow
1424
1425.divisor_negative:
1426 neg T1_16
1427 test T0_32, T0_32
1428 jns .one_of_each
1429 neg T0_32
1430.both_positive: ; Same as unsigned shifted by sign indicator bit.
1431 shr T0_32, 15
1432 cmp T0_16, T1_16
1433 jae .div_overflow
1434.div_no_overflow:
1435 %endif
1436
1437 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1438 %ifdef ASM_CALL64_GCC
1439 mov T1, A2
1440 mov ax, [A0]
1441 mov dx, [A1]
1442 %1 T1_16
1443 mov [A0], ax
1444 mov [A1], dx
1445 %else
1446 mov T1, A1
1447 mov ax, [A0]
1448 mov dx, [T1]
1449 %1 A2_16
1450 mov [A0], ax
1451 mov [T1], dx
1452 %endif
1453 IEM_SAVE_FLAGS A3, %2, %3
1454 xor eax, eax
1455
1456.return:
1457 EPILOGUE_4_ARGS
1458
1459.div_zero:
1460.div_overflow:
1461 mov eax, -1
1462 jmp .return
1463ENDPROC iemAImpl_ %+ %1 %+ _u16
1464
1465BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1466 PROLOGUE_4_ARGS
1467
1468 ; div by chainsaw check.
1469 test A2_32, A2_32
1470 jz .div_zero
1471
1472 ; Overflow check - unsigned division is simple to verify, haven't
1473 ; found a simple way to check signed division yet unfortunately.
1474 %if %4 == 0
1475 cmp [A1], A2_32
1476 jae .div_overflow
1477 %else
1478 push A2 ; save A2 so we modify it (we out of regs on x86).
1479 mov T0_32, [A0] ; T0 = dividend low
1480 mov T1_32, [A1] ; T1 = dividend high
1481 test A2_32, A2_32
1482 js .divisor_negative
1483 test T1_32, T1_32
1484 jns .both_positive
1485 neg T0_32
1486 neg T1_32
1487.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1488 push T0 ; Start off like unsigned below.
1489 shl T1_32, 1
1490 shr T0_32, 31
1491 or T1_32, T0_32
1492 cmp T1_32, A2_32
1493 pop T0
1494 jb .div_no_overflow
1495 ja .div_overflow
1496 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1497 cmp T0_32, A2_32
1498 jae .div_overflow
1499 jmp .div_no_overflow
1500
1501.divisor_negative:
1502 neg A2_32
1503 test T1_32, T1_32
1504 jns .one_of_each
1505 neg T0_32
1506 neg T1_32
1507.both_positive: ; Same as unsigned shifted by sign indicator bit.
1508 shl T1_32, 1
1509 shr T0_32, 31
1510 or T1_32, T0_32
1511 cmp T1_32, A2_32
1512 jae .div_overflow
1513.div_no_overflow:
1514 pop A2
1515 %endif
1516
1517 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1518 mov eax, [A0]
1519 %ifdef ASM_CALL64_GCC
1520 mov T1, A2
1521 mov eax, [A0]
1522 mov edx, [A1]
1523 %1 T1_32
1524 mov [A0], eax
1525 mov [A1], edx
1526 %else
1527 mov T1, A1
1528 mov eax, [A0]
1529 mov edx, [T1]
1530 %1 A2_32
1531 mov [A0], eax
1532 mov [T1], edx
1533 %endif
1534 IEM_SAVE_FLAGS A3, %2, %3
1535 xor eax, eax
1536
1537.return:
1538 EPILOGUE_4_ARGS
1539
1540.div_overflow:
1541 %if %4 != 0
1542 pop A2
1543 %endif
1544.div_zero:
1545 mov eax, -1
1546 jmp .return
1547ENDPROC iemAImpl_ %+ %1 %+ _u32
1548
1549 %ifdef RT_ARCH_AMD64
1550BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1551 PROLOGUE_4_ARGS
1552
1553 test A2, A2
1554 jz .div_zero
1555 %if %4 == 0
1556 cmp [A1], A2
1557 jae .div_overflow
1558 %else
1559 push A2 ; save A2 so we modify it (we out of regs on x86).
1560 mov T0, [A0] ; T0 = dividend low
1561 mov T1, [A1] ; T1 = dividend high
1562 test A2, A2
1563 js .divisor_negative
1564 test T1, T1
1565 jns .both_positive
1566 neg T0
1567 neg T1
1568.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1569 push T0 ; Start off like unsigned below.
1570 shl T1, 1
1571 shr T0, 63
1572 or T1, T0
1573 cmp T1, A2
1574 pop T0
1575 jb .div_no_overflow
1576 ja .div_overflow
1577 mov T1, 0x7fffffffffffffff
1578 and T0, T1 ; Special case for covering (divisor - 1).
1579 cmp T0, A2
1580 jae .div_overflow
1581 jmp .div_no_overflow
1582
1583.divisor_negative:
1584 neg A2
1585 test T1, T1
1586 jns .one_of_each
1587 neg T0
1588 neg T1
1589.both_positive: ; Same as unsigned shifted by sign indicator bit.
1590 shl T1, 1
1591 shr T0, 63
1592 or T1, T0
1593 cmp T1, A2
1594 jae .div_overflow
1595.div_no_overflow:
1596 pop A2
1597 %endif
1598
1599 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1600 mov rax, [A0]
1601 %ifdef ASM_CALL64_GCC
1602 mov T1, A2
1603 mov rax, [A0]
1604 mov rdx, [A1]
1605 %1 T1
1606 mov [A0], rax
1607 mov [A1], rdx
1608 %else
1609 mov T1, A1
1610 mov rax, [A0]
1611 mov rdx, [T1]
1612 %1 A2
1613 mov [A0], rax
1614 mov [T1], rdx
1615 %endif
1616 IEM_SAVE_FLAGS A3, %2, %3
1617 xor eax, eax
1618
1619.return:
1620 EPILOGUE_4_ARGS_EX 12
1621
1622.div_overflow:
1623 %if %4 != 0
1624 pop A2
1625 %endif
1626.div_zero:
1627 mov eax, -1
1628 jmp .return
1629ENDPROC iemAImpl_ %+ %1 %+ _u64
1630 %else ; stub it for now - later, replace with hand coded stuff.
1631BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1632 int3
1633 ret
1634ENDPROC iemAImpl_ %+ %1 %+ _u64
1635 %endif ; !RT_ARCH_AMD64
1636
1637%endmacro
1638
1639IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1640IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1641
1642
1643;
1644; BSWAP. No flag changes.
1645;
1646; Each function takes one argument, pointer to the value to bswap
1647; (input/output). They all return void.
1648;
1649BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1650 PROLOGUE_1_ARGS
1651 mov T0_32, [A0] ; just in case any of the upper bits are used.
1652 db 66h
1653 bswap T0_32
1654 mov [A0], T0_32
1655 EPILOGUE_1_ARGS
1656ENDPROC iemAImpl_bswap_u16
1657
1658BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1659 PROLOGUE_1_ARGS
1660 mov T0_32, [A0]
1661 bswap T0_32
1662 mov [A0], T0_32
1663 EPILOGUE_1_ARGS
1664ENDPROC iemAImpl_bswap_u32
1665
1666BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1667%ifdef RT_ARCH_AMD64
1668 PROLOGUE_1_ARGS
1669 mov T0, [A0]
1670 bswap T0
1671 mov [A0], T0
1672 EPILOGUE_1_ARGS
1673%else
1674 PROLOGUE_1_ARGS
1675 mov T0, [A0]
1676 mov T1, [A0 + 4]
1677 bswap T0
1678 bswap T1
1679 mov [A0 + 4], T0
1680 mov [A0], T1
1681 EPILOGUE_1_ARGS
1682%endif
1683ENDPROC iemAImpl_bswap_u64
1684
1685
1686;;
1687; Initialize the FPU for the actual instruction being emulated, this means
1688; loading parts of the guest's control word and status word.
1689;
1690; @uses 24 bytes of stack.
1691; @param 1 Expression giving the address of the FXSTATE of the guest.
1692;
1693%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1694 fnstenv [xSP]
1695
1696 ; FCW - for exception, precision and rounding control.
1697 movzx T0, word [%1 + X86FXSTATE.FCW]
1698 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1699 mov [xSP + X86FSTENV32P.FCW], T0_16
1700
1701 ; FSW - for undefined C0, C1, C2, and C3.
1702 movzx T1, word [%1 + X86FXSTATE.FSW]
1703 and T1, X86_FSW_C_MASK
1704 movzx T0, word [xSP + X86FSTENV32P.FSW]
1705 and T0, X86_FSW_TOP_MASK
1706 or T0, T1
1707 mov [xSP + X86FSTENV32P.FSW], T0_16
1708
1709 fldenv [xSP]
1710%endmacro
1711
1712
1713;;
1714; Need to move this as well somewhere better?
1715;
1716struc IEMFPURESULT
1717 .r80Result resw 5
1718 .FSW resw 1
1719endstruc
1720
1721
1722;;
1723; Need to move this as well somewhere better?
1724;
1725struc IEMFPURESULTTWO
1726 .r80Result1 resw 5
1727 .FSW resw 1
1728 .r80Result2 resw 5
1729endstruc
1730
1731
1732;
1733;---------------------- 16-bit signed integer operations ----------------------
1734;
1735
1736
1737;;
1738; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1739;
1740; @param A0 FPU context (fxsave).
1741; @param A1 Pointer to a IEMFPURESULT for the output.
1742; @param A2 Pointer to the 16-bit floating point value to convert.
1743;
1744BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1745 PROLOGUE_3_ARGS
1746 sub xSP, 20h
1747
1748 fninit
1749 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1750 fild word [A2]
1751
1752 fnstsw word [A1 + IEMFPURESULT.FSW]
1753 fnclex
1754 fstp tword [A1 + IEMFPURESULT.r80Result]
1755
1756 fninit
1757 add xSP, 20h
1758 EPILOGUE_3_ARGS
1759ENDPROC iemAImpl_fild_i16_to_r80
1760
1761
1762;;
1763; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1764;
1765; @param A0 FPU context (fxsave).
1766; @param A1 Where to return the output FSW.
1767; @param A2 Where to store the 16-bit signed integer value.
1768; @param A3 Pointer to the 80-bit value.
1769;
1770BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1771 PROLOGUE_4_ARGS
1772 sub xSP, 20h
1773
1774 fninit
1775 fld tword [A3]
1776 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1777 fistp word [A2]
1778
1779 fnstsw word [A1]
1780
1781 fninit
1782 add xSP, 20h
1783 EPILOGUE_4_ARGS
1784ENDPROC iemAImpl_fist_r80_to_i16
1785
1786
1787;;
1788; Store a 80-bit floating point value (register) as a 16-bit signed integer
1789; (memory) with truncation.
1790;
1791; @param A0 FPU context (fxsave).
1792; @param A1 Where to return the output FSW.
1793; @param A2 Where to store the 16-bit signed integer value.
1794; @param A3 Pointer to the 80-bit value.
1795;
1796BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1797 PROLOGUE_4_ARGS
1798 sub xSP, 20h
1799
1800 fninit
1801 fld tword [A3]
1802 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1803 fisttp dword [A2]
1804
1805 fnstsw word [A1]
1806
1807 fninit
1808 add xSP, 20h
1809 EPILOGUE_4_ARGS
1810ENDPROC iemAImpl_fistt_r80_to_i16
1811
1812
1813;;
1814; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1815;
1816; @param 1 The instruction
1817;
1818; @param A0 FPU context (fxsave).
1819; @param A1 Pointer to a IEMFPURESULT for the output.
1820; @param A2 Pointer to the 80-bit value.
1821; @param A3 Pointer to the 16-bit value.
1822;
1823%macro IEMIMPL_FPU_R80_BY_I16 1
1824BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1825 PROLOGUE_4_ARGS
1826 sub xSP, 20h
1827
1828 fninit
1829 fld tword [A2]
1830 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1831 %1 word [A3]
1832
1833 fnstsw word [A1 + IEMFPURESULT.FSW]
1834 fnclex
1835 fstp tword [A1 + IEMFPURESULT.r80Result]
1836
1837 fninit
1838 add xSP, 20h
1839 EPILOGUE_4_ARGS
1840ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1841%endmacro
1842
1843IEMIMPL_FPU_R80_BY_I16 fiadd
1844IEMIMPL_FPU_R80_BY_I16 fimul
1845IEMIMPL_FPU_R80_BY_I16 fisub
1846IEMIMPL_FPU_R80_BY_I16 fisubr
1847IEMIMPL_FPU_R80_BY_I16 fidiv
1848IEMIMPL_FPU_R80_BY_I16 fidivr
1849
1850
1851;;
1852; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1853; only returning FSW.
1854;
1855; @param 1 The instruction
1856;
1857; @param A0 FPU context (fxsave).
1858; @param A1 Where to store the output FSW.
1859; @param A2 Pointer to the 80-bit value.
1860; @param A3 Pointer to the 64-bit value.
1861;
1862%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1863BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1864 PROLOGUE_4_ARGS
1865 sub xSP, 20h
1866
1867 fninit
1868 fld tword [A2]
1869 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1870 %1 word [A3]
1871
1872 fnstsw word [A1]
1873
1874 fninit
1875 add xSP, 20h
1876 EPILOGUE_4_ARGS
1877ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1878%endmacro
1879
1880IEMIMPL_FPU_R80_BY_I16_FSW ficom
1881
1882
1883
1884;
1885;---------------------- 32-bit signed integer operations ----------------------
1886;
1887
1888
1889;;
1890; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1891;
1892; @param A0 FPU context (fxsave).
1893; @param A1 Pointer to a IEMFPURESULT for the output.
1894; @param A2 Pointer to the 32-bit floating point value to convert.
1895;
1896BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1897 PROLOGUE_3_ARGS
1898 sub xSP, 20h
1899
1900 fninit
1901 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1902 fild dword [A2]
1903
1904 fnstsw word [A1 + IEMFPURESULT.FSW]
1905 fnclex
1906 fstp tword [A1 + IEMFPURESULT.r80Result]
1907
1908 fninit
1909 add xSP, 20h
1910 EPILOGUE_3_ARGS
1911ENDPROC iemAImpl_fild_i32_to_r80
1912
1913
1914;;
1915; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1916;
1917; @param A0 FPU context (fxsave).
1918; @param A1 Where to return the output FSW.
1919; @param A2 Where to store the 32-bit signed integer value.
1920; @param A3 Pointer to the 80-bit value.
1921;
1922BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1923 PROLOGUE_4_ARGS
1924 sub xSP, 20h
1925
1926 fninit
1927 fld tword [A3]
1928 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1929 fistp dword [A2]
1930
1931 fnstsw word [A1]
1932
1933 fninit
1934 add xSP, 20h
1935 EPILOGUE_4_ARGS
1936ENDPROC iemAImpl_fist_r80_to_i32
1937
1938
1939;;
1940; Store a 80-bit floating point value (register) as a 32-bit signed integer
1941; (memory) with truncation.
1942;
1943; @param A0 FPU context (fxsave).
1944; @param A1 Where to return the output FSW.
1945; @param A2 Where to store the 32-bit signed integer value.
1946; @param A3 Pointer to the 80-bit value.
1947;
1948BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1949 PROLOGUE_4_ARGS
1950 sub xSP, 20h
1951
1952 fninit
1953 fld tword [A3]
1954 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1955 fisttp dword [A2]
1956
1957 fnstsw word [A1]
1958
1959 fninit
1960 add xSP, 20h
1961 EPILOGUE_4_ARGS
1962ENDPROC iemAImpl_fistt_r80_to_i32
1963
1964
1965;;
1966; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1967;
1968; @param 1 The instruction
1969;
1970; @param A0 FPU context (fxsave).
1971; @param A1 Pointer to a IEMFPURESULT for the output.
1972; @param A2 Pointer to the 80-bit value.
1973; @param A3 Pointer to the 32-bit value.
1974;
1975%macro IEMIMPL_FPU_R80_BY_I32 1
1976BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1977 PROLOGUE_4_ARGS
1978 sub xSP, 20h
1979
1980 fninit
1981 fld tword [A2]
1982 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1983 %1 dword [A3]
1984
1985 fnstsw word [A1 + IEMFPURESULT.FSW]
1986 fnclex
1987 fstp tword [A1 + IEMFPURESULT.r80Result]
1988
1989 fninit
1990 add xSP, 20h
1991 EPILOGUE_4_ARGS
1992ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1993%endmacro
1994
1995IEMIMPL_FPU_R80_BY_I32 fiadd
1996IEMIMPL_FPU_R80_BY_I32 fimul
1997IEMIMPL_FPU_R80_BY_I32 fisub
1998IEMIMPL_FPU_R80_BY_I32 fisubr
1999IEMIMPL_FPU_R80_BY_I32 fidiv
2000IEMIMPL_FPU_R80_BY_I32 fidivr
2001
2002
2003;;
2004; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2005; only returning FSW.
2006;
2007; @param 1 The instruction
2008;
2009; @param A0 FPU context (fxsave).
2010; @param A1 Where to store the output FSW.
2011; @param A2 Pointer to the 80-bit value.
2012; @param A3 Pointer to the 64-bit value.
2013;
2014%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2015BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2016 PROLOGUE_4_ARGS
2017 sub xSP, 20h
2018
2019 fninit
2020 fld tword [A2]
2021 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2022 %1 dword [A3]
2023
2024 fnstsw word [A1]
2025
2026 fninit
2027 add xSP, 20h
2028 EPILOGUE_4_ARGS
2029ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2030%endmacro
2031
2032IEMIMPL_FPU_R80_BY_I32_FSW ficom
2033
2034
2035
2036;
2037;---------------------- 64-bit signed integer operations ----------------------
2038;
2039
2040
2041;;
2042; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2043;
2044; @param A0 FPU context (fxsave).
2045; @param A1 Pointer to a IEMFPURESULT for the output.
2046; @param A2 Pointer to the 64-bit floating point value to convert.
2047;
2048BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2049 PROLOGUE_3_ARGS
2050 sub xSP, 20h
2051
2052 fninit
2053 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2054 fild qword [A2]
2055
2056 fnstsw word [A1 + IEMFPURESULT.FSW]
2057 fnclex
2058 fstp tword [A1 + IEMFPURESULT.r80Result]
2059
2060 fninit
2061 add xSP, 20h
2062 EPILOGUE_3_ARGS
2063ENDPROC iemAImpl_fild_i64_to_r80
2064
2065
2066;;
2067; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2068;
2069; @param A0 FPU context (fxsave).
2070; @param A1 Where to return the output FSW.
2071; @param A2 Where to store the 64-bit signed integer value.
2072; @param A3 Pointer to the 80-bit value.
2073;
2074BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2075 PROLOGUE_4_ARGS
2076 sub xSP, 20h
2077
2078 fninit
2079 fld tword [A3]
2080 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2081 fistp qword [A2]
2082
2083 fnstsw word [A1]
2084
2085 fninit
2086 add xSP, 20h
2087 EPILOGUE_4_ARGS
2088ENDPROC iemAImpl_fist_r80_to_i64
2089
2090
2091;;
2092; Store a 80-bit floating point value (register) as a 64-bit signed integer
2093; (memory) with truncation.
2094;
2095; @param A0 FPU context (fxsave).
2096; @param A1 Where to return the output FSW.
2097; @param A2 Where to store the 64-bit signed integer value.
2098; @param A3 Pointer to the 80-bit value.
2099;
2100BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2101 PROLOGUE_4_ARGS
2102 sub xSP, 20h
2103
2104 fninit
2105 fld tword [A3]
2106 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2107 fisttp qword [A2]
2108
2109 fnstsw word [A1]
2110
2111 fninit
2112 add xSP, 20h
2113 EPILOGUE_4_ARGS
2114ENDPROC iemAImpl_fistt_r80_to_i64
2115
2116
2117
2118;
2119;---------------------- 32-bit floating point operations ----------------------
2120;
2121
2122;;
2123; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2124;
2125; @param A0 FPU context (fxsave).
2126; @param A1 Pointer to a IEMFPURESULT for the output.
2127; @param A2 Pointer to the 32-bit floating point value to convert.
2128;
2129BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2130 PROLOGUE_3_ARGS
2131 sub xSP, 20h
2132
2133 fninit
2134 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2135 fld dword [A2]
2136
2137 fnstsw word [A1 + IEMFPURESULT.FSW]
2138 fnclex
2139 fstp tword [A1 + IEMFPURESULT.r80Result]
2140
2141 fninit
2142 add xSP, 20h
2143 EPILOGUE_3_ARGS
2144ENDPROC iemAImpl_fld_r32_to_r80
2145
2146
2147;;
2148; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2149;
2150; @param A0 FPU context (fxsave).
2151; @param A1 Where to return the output FSW.
2152; @param A2 Where to store the 32-bit value.
2153; @param A3 Pointer to the 80-bit value.
2154;
2155BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2156 PROLOGUE_4_ARGS
2157 sub xSP, 20h
2158
2159 fninit
2160 fld tword [A3]
2161 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2162 fst dword [A2]
2163
2164 fnstsw word [A1]
2165
2166 fninit
2167 add xSP, 20h
2168 EPILOGUE_4_ARGS
2169ENDPROC iemAImpl_fst_r80_to_r32
2170
2171
2172;;
2173; FPU instruction working on one 80-bit and one 32-bit floating point value.
2174;
2175; @param 1 The instruction
2176;
2177; @param A0 FPU context (fxsave).
2178; @param A1 Pointer to a IEMFPURESULT for the output.
2179; @param A2 Pointer to the 80-bit value.
2180; @param A3 Pointer to the 32-bit value.
2181;
2182%macro IEMIMPL_FPU_R80_BY_R32 1
2183BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2184 PROLOGUE_4_ARGS
2185 sub xSP, 20h
2186
2187 fninit
2188 fld tword [A2]
2189 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2190 %1 dword [A3]
2191
2192 fnstsw word [A1 + IEMFPURESULT.FSW]
2193 fnclex
2194 fstp tword [A1 + IEMFPURESULT.r80Result]
2195
2196 fninit
2197 add xSP, 20h
2198 EPILOGUE_4_ARGS
2199ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2200%endmacro
2201
2202IEMIMPL_FPU_R80_BY_R32 fadd
2203IEMIMPL_FPU_R80_BY_R32 fmul
2204IEMIMPL_FPU_R80_BY_R32 fsub
2205IEMIMPL_FPU_R80_BY_R32 fsubr
2206IEMIMPL_FPU_R80_BY_R32 fdiv
2207IEMIMPL_FPU_R80_BY_R32 fdivr
2208
2209
2210;;
2211; FPU instruction working on one 80-bit and one 32-bit floating point value,
2212; only returning FSW.
2213;
2214; @param 1 The instruction
2215;
2216; @param A0 FPU context (fxsave).
2217; @param A1 Where to store the output FSW.
2218; @param A2 Pointer to the 80-bit value.
2219; @param A3 Pointer to the 64-bit value.
2220;
2221%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2222BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2223 PROLOGUE_4_ARGS
2224 sub xSP, 20h
2225
2226 fninit
2227 fld tword [A2]
2228 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2229 %1 dword [A3]
2230
2231 fnstsw word [A1]
2232
2233 fninit
2234 add xSP, 20h
2235 EPILOGUE_4_ARGS
2236ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2237%endmacro
2238
2239IEMIMPL_FPU_R80_BY_R32_FSW fcom
2240
2241
2242
2243;
2244;---------------------- 64-bit floating point operations ----------------------
2245;
2246
2247;;
2248; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2249;
2250; @param A0 FPU context (fxsave).
2251; @param A1 Pointer to a IEMFPURESULT for the output.
2252; @param A2 Pointer to the 64-bit floating point value to convert.
2253;
2254BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2255 PROLOGUE_3_ARGS
2256 sub xSP, 20h
2257
2258 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2259 fld qword [A2]
2260
2261 fnstsw word [A1 + IEMFPURESULT.FSW]
2262 fnclex
2263 fstp tword [A1 + IEMFPURESULT.r80Result]
2264
2265 fninit
2266 add xSP, 20h
2267 EPILOGUE_3_ARGS
2268ENDPROC iemAImpl_fld_r64_to_r80
2269
2270
2271;;
2272; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2273;
2274; @param A0 FPU context (fxsave).
2275; @param A1 Where to return the output FSW.
2276; @param A2 Where to store the 64-bit value.
2277; @param A3 Pointer to the 80-bit value.
2278;
2279BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2280 PROLOGUE_4_ARGS
2281 sub xSP, 20h
2282
2283 fninit
2284 fld tword [A3]
2285 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2286 fst qword [A2]
2287
2288 fnstsw word [A1]
2289
2290 fninit
2291 add xSP, 20h
2292 EPILOGUE_4_ARGS
2293ENDPROC iemAImpl_fst_r80_to_r64
2294
2295
2296;;
2297; FPU instruction working on one 80-bit and one 64-bit floating point value.
2298;
2299; @param 1 The instruction
2300;
2301; @param A0 FPU context (fxsave).
2302; @param A1 Pointer to a IEMFPURESULT for the output.
2303; @param A2 Pointer to the 80-bit value.
2304; @param A3 Pointer to the 64-bit value.
2305;
2306%macro IEMIMPL_FPU_R80_BY_R64 1
2307BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2308 PROLOGUE_4_ARGS
2309 sub xSP, 20h
2310
2311 fninit
2312 fld tword [A2]
2313 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2314 %1 qword [A3]
2315
2316 fnstsw word [A1 + IEMFPURESULT.FSW]
2317 fnclex
2318 fstp tword [A1 + IEMFPURESULT.r80Result]
2319
2320 fninit
2321 add xSP, 20h
2322 EPILOGUE_4_ARGS
2323ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2324%endmacro
2325
2326IEMIMPL_FPU_R80_BY_R64 fadd
2327IEMIMPL_FPU_R80_BY_R64 fmul
2328IEMIMPL_FPU_R80_BY_R64 fsub
2329IEMIMPL_FPU_R80_BY_R64 fsubr
2330IEMIMPL_FPU_R80_BY_R64 fdiv
2331IEMIMPL_FPU_R80_BY_R64 fdivr
2332
2333;;
2334; FPU instruction working on one 80-bit and one 64-bit floating point value,
2335; only returning FSW.
2336;
2337; @param 1 The instruction
2338;
2339; @param A0 FPU context (fxsave).
2340; @param A1 Where to store the output FSW.
2341; @param A2 Pointer to the 80-bit value.
2342; @param A3 Pointer to the 64-bit value.
2343;
2344%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2345BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2346 PROLOGUE_4_ARGS
2347 sub xSP, 20h
2348
2349 fninit
2350 fld tword [A2]
2351 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2352 %1 qword [A3]
2353
2354 fnstsw word [A1]
2355
2356 fninit
2357 add xSP, 20h
2358 EPILOGUE_4_ARGS
2359ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2360%endmacro
2361
2362IEMIMPL_FPU_R80_BY_R64_FSW fcom
2363
2364
2365
2366;
2367;---------------------- 80-bit floating point operations ----------------------
2368;
2369
2370;;
2371; Loads a 80-bit floating point register value from memory.
2372;
2373; @param A0 FPU context (fxsave).
2374; @param A1 Pointer to a IEMFPURESULT for the output.
2375; @param A2 Pointer to the 80-bit floating point value to load.
2376;
2377BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2378 PROLOGUE_3_ARGS
2379 sub xSP, 20h
2380
2381 fninit
2382 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2383 fld tword [A2]
2384
2385 fnstsw word [A1 + IEMFPURESULT.FSW]
2386 fnclex
2387 fstp tword [A1 + IEMFPURESULT.r80Result]
2388
2389 fninit
2390 add xSP, 20h
2391 EPILOGUE_3_ARGS
2392ENDPROC iemAImpl_fld_r80_from_r80
2393
2394
2395;;
2396; Store a 80-bit floating point register to memory
2397;
2398; @param A0 FPU context (fxsave).
2399; @param A1 Where to return the output FSW.
2400; @param A2 Where to store the 80-bit value.
2401; @param A3 Pointer to the 80-bit register value.
2402;
2403BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2404 PROLOGUE_4_ARGS
2405 sub xSP, 20h
2406
2407 fninit
2408 fld tword [A3]
2409 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2410 fstp tword [A2]
2411
2412 fnstsw word [A1]
2413
2414 fninit
2415 add xSP, 20h
2416 EPILOGUE_4_ARGS
2417ENDPROC iemAImpl_fst_r80_to_r80
2418
2419
2420;;
2421; FPU instruction working on two 80-bit floating point values.
2422;
2423; @param 1 The instruction
2424;
2425; @param A0 FPU context (fxsave).
2426; @param A1 Pointer to a IEMFPURESULT for the output.
2427; @param A2 Pointer to the first 80-bit value (ST0)
2428; @param A3 Pointer to the second 80-bit value (STn).
2429;
2430%macro IEMIMPL_FPU_R80_BY_R80 2
2431BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2432 PROLOGUE_4_ARGS
2433 sub xSP, 20h
2434
2435 fninit
2436 fld tword [A3]
2437 fld tword [A2]
2438 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2439 %1 %2
2440
2441 fnstsw word [A1 + IEMFPURESULT.FSW]
2442 fnclex
2443 fstp tword [A1 + IEMFPURESULT.r80Result]
2444
2445 fninit
2446 add xSP, 20h
2447 EPILOGUE_4_ARGS
2448ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2449%endmacro
2450
2451IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2452IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2453IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2454IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2455IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2456IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2457IEMIMPL_FPU_R80_BY_R80 fprem, {}
2458IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2459IEMIMPL_FPU_R80_BY_R80 fscale, {}
2460
2461
2462;;
2463; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2464; storing the result in ST1 and popping the stack.
2465;
2466; @param 1 The instruction
2467;
2468; @param A0 FPU context (fxsave).
2469; @param A1 Pointer to a IEMFPURESULT for the output.
2470; @param A2 Pointer to the first 80-bit value (ST1).
2471; @param A3 Pointer to the second 80-bit value (ST0).
2472;
2473%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2474BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2475 PROLOGUE_4_ARGS
2476 sub xSP, 20h
2477
2478 fninit
2479 fld tword [A2]
2480 fld tword [A3]
2481 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2482 %1
2483
2484 fnstsw word [A1 + IEMFPURESULT.FSW]
2485 fnclex
2486 fstp tword [A1 + IEMFPURESULT.r80Result]
2487
2488 fninit
2489 add xSP, 20h
2490 EPILOGUE_4_ARGS
2491ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2492%endmacro
2493
2494IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2495IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2496
2497
2498;;
2499; FPU instruction working on two 80-bit floating point values, only
2500; returning FSW.
2501;
2502; @param 1 The instruction
2503;
2504; @param A0 FPU context (fxsave).
2505; @param A1 Pointer to a uint16_t for the resulting FSW.
2506; @param A2 Pointer to the first 80-bit value.
2507; @param A3 Pointer to the second 80-bit value.
2508;
2509%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2510BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2511 PROLOGUE_4_ARGS
2512 sub xSP, 20h
2513
2514 fninit
2515 fld tword [A3]
2516 fld tword [A2]
2517 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2518 %1 st0, st1
2519
2520 fnstsw word [A1]
2521
2522 fninit
2523 add xSP, 20h
2524 EPILOGUE_4_ARGS
2525ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2526%endmacro
2527
2528IEMIMPL_FPU_R80_BY_R80_FSW fcom
2529IEMIMPL_FPU_R80_BY_R80_FSW fucom
2530
2531
2532;;
2533; FPU instruction working on two 80-bit floating point values,
2534; returning FSW and EFLAGS (eax).
2535;
2536; @param 1 The instruction
2537;
2538; @returns EFLAGS in EAX.
2539; @param A0 FPU context (fxsave).
2540; @param A1 Pointer to a uint16_t for the resulting FSW.
2541; @param A2 Pointer to the first 80-bit value.
2542; @param A3 Pointer to the second 80-bit value.
2543;
2544%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2545BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2546 PROLOGUE_4_ARGS
2547 sub xSP, 20h
2548
2549 fninit
2550 fld tword [A3]
2551 fld tword [A2]
2552 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2553 %1 st1
2554
2555 fnstsw word [A1]
2556 pushf
2557 pop xAX
2558
2559 fninit
2560 add xSP, 20h
2561 EPILOGUE_4_ARGS
2562ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2563%endmacro
2564
2565IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2566IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2567
2568
2569;;
2570; FPU instruction working on one 80-bit floating point value.
2571;
2572; @param 1 The instruction
2573;
2574; @param A0 FPU context (fxsave).
2575; @param A1 Pointer to a IEMFPURESULT for the output.
2576; @param A2 Pointer to the 80-bit value.
2577;
2578%macro IEMIMPL_FPU_R80 1
2579BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2580 PROLOGUE_3_ARGS
2581 sub xSP, 20h
2582
2583 fninit
2584 fld tword [A2]
2585 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2586 %1
2587
2588 fnstsw word [A1 + IEMFPURESULT.FSW]
2589 fnclex
2590 fstp tword [A1 + IEMFPURESULT.r80Result]
2591
2592 fninit
2593 add xSP, 20h
2594 EPILOGUE_3_ARGS
2595ENDPROC iemAImpl_ %+ %1 %+ _r80
2596%endmacro
2597
2598IEMIMPL_FPU_R80 fchs
2599IEMIMPL_FPU_R80 fabs
2600IEMIMPL_FPU_R80 f2xm1
2601IEMIMPL_FPU_R80 fyl2x
2602IEMIMPL_FPU_R80 fsqrt
2603IEMIMPL_FPU_R80 frndint
2604IEMIMPL_FPU_R80 fsin
2605IEMIMPL_FPU_R80 fcos
2606
2607
2608;;
2609; FPU instruction working on one 80-bit floating point value, only
2610; returning FSW.
2611;
2612; @param 1 The instruction
2613;
2614; @param A0 FPU context (fxsave).
2615; @param A1 Pointer to a uint16_t for the resulting FSW.
2616; @param A2 Pointer to the 80-bit value.
2617;
2618%macro IEMIMPL_FPU_R80_FSW 1
2619BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2620 PROLOGUE_3_ARGS
2621 sub xSP, 20h
2622
2623 fninit
2624 fld tword [A2]
2625 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2626 %1
2627
2628 fnstsw word [A1]
2629
2630 fninit
2631 add xSP, 20h
2632 EPILOGUE_3_ARGS
2633ENDPROC iemAImpl_ %+ %1 %+ _r80
2634%endmacro
2635
2636IEMIMPL_FPU_R80_FSW ftst
2637IEMIMPL_FPU_R80_FSW fxam
2638
2639
2640
2641;;
2642; FPU instruction loading a 80-bit floating point constant.
2643;
2644; @param 1 The instruction
2645;
2646; @param A0 FPU context (fxsave).
2647; @param A1 Pointer to a IEMFPURESULT for the output.
2648;
2649%macro IEMIMPL_FPU_R80_CONST 1
2650BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2651 PROLOGUE_2_ARGS
2652 sub xSP, 20h
2653
2654 fninit
2655 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2656 %1
2657
2658 fnstsw word [A1 + IEMFPURESULT.FSW]
2659 fnclex
2660 fstp tword [A1 + IEMFPURESULT.r80Result]
2661
2662 fninit
2663 add xSP, 20h
2664 EPILOGUE_2_ARGS
2665ENDPROC iemAImpl_ %+ %1 %+
2666%endmacro
2667
2668IEMIMPL_FPU_R80_CONST fld1
2669IEMIMPL_FPU_R80_CONST fldl2t
2670IEMIMPL_FPU_R80_CONST fldl2e
2671IEMIMPL_FPU_R80_CONST fldpi
2672IEMIMPL_FPU_R80_CONST fldlg2
2673IEMIMPL_FPU_R80_CONST fldln2
2674IEMIMPL_FPU_R80_CONST fldz
2675
2676
2677;;
2678; FPU instruction working on one 80-bit floating point value, outputing two.
2679;
2680; @param 1 The instruction
2681;
2682; @param A0 FPU context (fxsave).
2683; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2684; @param A2 Pointer to the 80-bit value.
2685;
2686%macro IEMIMPL_FPU_R80_R80 1
2687BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2688 PROLOGUE_3_ARGS
2689 sub xSP, 20h
2690
2691 fninit
2692 fld tword [A2]
2693 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2694 %1
2695
2696 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2697 fnclex
2698 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2699 fnclex
2700 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2701
2702 fninit
2703 add xSP, 20h
2704 EPILOGUE_3_ARGS
2705ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2706%endmacro
2707
2708IEMIMPL_FPU_R80_R80 fptan
2709IEMIMPL_FPU_R80_R80 fxtract
2710IEMIMPL_FPU_R80_R80 fsincos
2711
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette