IEMAllAImpl.asm@ 47148

Last change on this file since 47148 was 47138, checked in by vboxsync, 12 years ago
IEM: idiv and div missing bits and fixes.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 74.3 KB

Line
1	; $Id: IEMAllAImpl.asm 47138 2013-07-14 18:05:53Z vboxsync $
2	;; @file
3	; IEM - Instruction Implementation in Assembly.
4	;
5
6	; Copyright (C) 2011-2012 Oracle Corporation
7	;
8	; This file is part of VirtualBox Open Source Edition (OSE), as
9	; available from http://www.215389.xyz. This file is free software;
10	; you can redistribute it and/or modify it under the terms of the GNU
11	; General Public License (GPL) as published by the Free Software
12	; Foundation, in version 2 as it comes in the "COPYING" file of the
13	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15	;
16
17
18	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19	; Header Files ;
20	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21	%include "VBox/asmdefs.mac"
22	%include "VBox/err.mac"
23	%include "iprt/x86.mac"
24
25
26	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27	; Defined Constants And Macros ;
28	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30	;;
31	; RET XX / RET wrapper for fastcall.
32	;
33	%macro RET_FASTCALL 1
34	%ifdef RT_ARCH_X86
35	%ifdef RT_OS_WINDOWS
36	ret %1
37	%else
38	ret
39	%endif
40	%else
41	ret
42	%endif
43	%endmacro
44
45	;;
46	; NAME for fastcall functions.
47	;
48	;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49	; escaping (or whatever the dollar is good for here). Thus the ugly
50	; prefix argument.
51	;
52	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53	%ifdef RT_ARCH_X86
54	%ifdef RT_OS_WINDOWS
55	%undef NAME_FASTCALL
56	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57	%endif
58	%endif
59
60	;;
61	; BEGINPROC for fastcall functions.
62	;
63	; @param 1 The function name (C).
64	; @param 2 The argument size on x86.
65	;
66	%macro BEGINPROC_FASTCALL 2
67	%ifdef ASM_FORMAT_PE
68	export %1=NAME_FASTCALL(%1,%2,$@)
69	%endif
70	%ifdef __NASM__
71	%ifdef ASM_FORMAT_OMF
72	export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73	%endif
74	%endif
75	%ifndef ASM_FORMAT_BIN
76	global NAME_FASTCALL(%1,%2,$@)
77	%endif
78	NAME_FASTCALL(%1,%2,@):
79	%endmacro
80
81
82	;
83	; We employ some macro assembly here to hid the calling convention differences.
84	;
85	%ifdef RT_ARCH_AMD64
86	%macro PROLOGUE_1_ARGS 0
87	%endmacro
88	%macro EPILOGUE_1_ARGS 0
89	ret
90	%endmacro
91	%macro EPILOGUE_1_ARGS_EX 0
92	ret
93	%endmacro
94
95	%macro PROLOGUE_2_ARGS 0
96	%endmacro
97	%macro EPILOGUE_2_ARGS 0
98	ret
99	%endmacro
100	%macro EPILOGUE_2_ARGS_EX 1
101	ret
102	%endmacro
103
104	%macro PROLOGUE_3_ARGS 0
105	%endmacro
106	%macro EPILOGUE_3_ARGS 0
107	ret
108	%endmacro
109	%macro EPILOGUE_3_ARGS_EX 1
110	ret
111	%endmacro
112
113	%macro PROLOGUE_4_ARGS 0
114	%endmacro
115	%macro EPILOGUE_4_ARGS 0
116	ret
117	%endmacro
118	%macro EPILOGUE_4_ARGS_EX 1
119	ret
120	%endmacro
121
122	%ifdef ASM_CALL64_GCC
123	%define A0 rdi
124	%define A0_32 edi
125	%define A0_16 di
126	%define A0_8 dil
127
128	%define A1 rsi
129	%define A1_32 esi
130	%define A1_16 si
131	%define A1_8 sil
132
133	%define A2 rdx
134	%define A2_32 edx
135	%define A2_16 dx
136	%define A2_8 dl
137
138	%define A3 rcx
139	%define A3_32 ecx
140	%define A3_16 cx
141	%endif
142
143	%ifdef ASM_CALL64_MSC
144	%define A0 rcx
145	%define A0_32 ecx
146	%define A0_16 cx
147	%define A0_8 cl
148
149	%define A1 rdx
150	%define A1_32 edx
151	%define A1_16 dx
152	%define A1_8 dl
153
154	%define A2 r8
155	%define A2_32 r8d
156	%define A2_16 r8w
157	%define A2_8 r8b
158
159	%define A3 r9
160	%define A3_32 r9d
161	%define A3_16 r9w
162	%endif
163
164	%define T0 rax
165	%define T0_32 eax
166	%define T0_16 ax
167	%define T0_8 al
168
169	%define T1 r11
170	%define T1_32 r11d
171	%define T1_16 r11w
172	%define T1_8 r11b
173
174	%else
175	; x86
176	%macro PROLOGUE_1_ARGS 0
177	push edi
178	%endmacro
179	%macro EPILOGUE_1_ARGS 0
180	pop edi
181	ret 0
182	%endmacro
183	%macro EPILOGUE_1_ARGS_EX 1
184	pop edi
185	ret %1
186	%endmacro
187
188	%macro PROLOGUE_2_ARGS 0
189	push edi
190	%endmacro
191	%macro EPILOGUE_2_ARGS 0
192	pop edi
193	ret 0
194	%endmacro
195	%macro EPILOGUE_2_ARGS_EX 1
196	pop edi
197	ret %1
198	%endmacro
199
200	%macro PROLOGUE_3_ARGS 0
201	push ebx
202	mov ebx, [esp + 4 + 4]
203	push edi
204	%endmacro
205	%macro EPILOGUE_3_ARGS_EX 1
206	%if (%1) < 4
207	%error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208	%endif
209	pop edi
210	pop ebx
211	ret %1
212	%endmacro
213	%macro EPILOGUE_3_ARGS 0
214	EPILOGUE_3_ARGS_EX 4
215	%endmacro
216
217	%macro PROLOGUE_4_ARGS 0
218	push ebx
219	push edi
220	push esi
221	mov ebx, [esp + 12 + 4 + 0]
222	mov esi, [esp + 12 + 4 + 4]
223	%endmacro
224	%macro EPILOGUE_4_ARGS_EX 1
225	%if (%1) < 8
226	%error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227	%endif
228	pop esi
229	pop edi
230	pop ebx
231	ret %1
232	%endmacro
233	%macro EPILOGUE_4_ARGS 0
234	EPILOGUE_4_ARGS_EX 8
235	%endmacro
236
237	%define A0 ecx
238	%define A0_32 ecx
239	%define A0_16 cx
240	%define A0_8 cl
241
242	%define A1 edx
243	%define A1_32 edx
244	%define A1_16 dx
245	%define A1_8 dl
246
247	%define A2 ebx
248	%define A2_32 ebx
249	%define A2_16 bx
250	%define A2_8 bl
251
252	%define A3 esi
253	%define A3_32 esi
254	%define A3_16 si
255
256	%define T0 eax
257	%define T0_32 eax
258	%define T0_16 ax
259	%define T0_8 al
260
261	%define T1 edi
262	%define T1_32 edi
263	%define T1_16 di
264	%endif
265
266
267	;;
268	; Load the relevant flags from [%1] if there are undefined flags (%3).
269	;
270	; @remarks Clobbers T0, stack. Changes EFLAGS.
271	; @param A2 The register pointing to the flags.
272	; @param 1 The parameter (A0..A3) pointing to the eflags.
273	; @param 2 The set of modified flags.
274	; @param 3 The set of undefined flags.
275	;
276	%macro IEM_MAYBE_LOAD_FLAGS 3
277	;%if (%3) != 0
278	pushf ; store current flags
279	mov T0_32, [%1] ; load the guest flags
280	and dword [xSP], ~(%2 \| %3) ; mask out the modified and undefined flags
281	and T0_32, (%2 \| %3) ; select the modified and undefined flags.
282	or [xSP], T0 ; merge guest flags with host flags.
283	popf ; load the mixed flags.
284	;%endif
285	%endmacro
286
287	;;
288	; Update the flag.
289	;
290	; @remarks Clobbers T0, T1, stack.
291	; @param 1 The register pointing to the EFLAGS.
292	; @param 2 The mask of modified flags to save.
293	; @param 3 The mask of undefined flags to (maybe) save.
294	;
295	%macro IEM_SAVE_FLAGS 3
296	%if (%2 \| %3) != 0
297	pushf
298	pop T1
299	mov T0_32, [%1] ; flags
300	and T0_32, ~(%2 \| %3) ; clear the modified & undefined flags.
301	and T1_32, (%2 \| %3) ; select the modified and undefined flags.
302	or T0_32, T1_32 ; combine the flags.
303	mov [%1], T0_32 ; save the flags.
304	%endif
305	%endmacro
306
307
308	;;
309	; Macro for implementing a binary operator.
310	;
311	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312	; variants, except on 32-bit system where the 64-bit accesses requires hand
313	; coding.
314	;
315	; All the functions takes a pointer to the destination memory operand in A0,
316	; the source register operand in A1 and a pointer to eflags in A2.
317	;
318	; @param 1 The instruction mnemonic.
319	; @param 2 Non-zero if there should be a locked version.
320	; @param 3 The modified flags.
321	; @param 4 The undefined flags.
322	;
323	%macro IEMIMPL_BIN_OP 4
324	BEGINCODE
325	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326	PROLOGUE_3_ARGS
327	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328	%1 byte [A0], A1_8
329	IEM_SAVE_FLAGS A2, %3, %4
330	EPILOGUE_3_ARGS
331	ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334	PROLOGUE_3_ARGS
335	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336	%1 word [A0], A1_16
337	IEM_SAVE_FLAGS A2, %3, %4
338	EPILOGUE_3_ARGS
339	ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342	PROLOGUE_3_ARGS
343	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344	%1 dword [A0], A1_32
345	IEM_SAVE_FLAGS A2, %3, %4
346	EPILOGUE_3_ARGS
347	ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349	%ifdef RT_ARCH_AMD64
350	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351	PROLOGUE_3_ARGS
352	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353	%1 qword [A0], A1
354	IEM_SAVE_FLAGS A2, %3, %4
355	EPILOGUE_3_ARGS_EX 8
356	ENDPROC iemAImpl_ %+ %1 %+ _u64
357	%else ; stub it for now - later, replace with hand coded stuff.
358	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359	int3
360	ret
361	ENDPROC iemAImpl_ %+ %1 %+ _u64
362	%endif ; !RT_ARCH_AMD64
363
364	%if %2 != 0 ; locked versions requested?
365
366	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367	PROLOGUE_3_ARGS
368	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369	lock %1 byte [A0], A1_8
370	IEM_SAVE_FLAGS A2, %3, %4
371	EPILOGUE_3_ARGS
372	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375	PROLOGUE_3_ARGS
376	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377	lock %1 word [A0], A1_16
378	IEM_SAVE_FLAGS A2, %3, %4
379	EPILOGUE_3_ARGS
380	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383	PROLOGUE_3_ARGS
384	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385	lock %1 dword [A0], A1_32
386	IEM_SAVE_FLAGS A2, %3, %4
387	EPILOGUE_3_ARGS
388	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390	%ifdef RT_ARCH_AMD64
391	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392	PROLOGUE_3_ARGS
393	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394	lock %1 qword [A0], A1
395	IEM_SAVE_FLAGS A2, %3, %4
396	EPILOGUE_3_ARGS_EX 8
397	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398	%else ; stub it for now - later, replace with hand coded stuff.
399	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400	int3
401	ret 8
402	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403	%endif ; !RT_ARCH_AMD64
404	%endif ; locked
405	%endmacro
406
407	; instr,lock,modified-flags.
408	IEMIMPL_BIN_OP add, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
409	IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
410	IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
411	IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
412	IEMIMPL_BIN_OP or, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
413	IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
414	IEMIMPL_BIN_OP and, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
415	IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
416	IEMIMPL_BIN_OP test, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
417
418
419	;;
420	; Macro for implementing a bit operator.
421	;
422	; This will generate code for the 16, 32 and 64 bit accesses with locked
423	; variants, except on 32-bit system where the 64-bit accesses requires hand
424	; coding.
425	;
426	; All the functions takes a pointer to the destination memory operand in A0,
427	; the source register operand in A1 and a pointer to eflags in A2.
428	;
429	; @param 1 The instruction mnemonic.
430	; @param 2 Non-zero if there should be a locked version.
431	; @param 3 The modified flags.
432	; @param 4 The undefined flags.
433	;
434	%macro IEMIMPL_BIT_OP 4
435	BEGINCODE
436	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437	PROLOGUE_3_ARGS
438	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439	%1 word [A0], A1_16
440	IEM_SAVE_FLAGS A2, %3, %4
441	EPILOGUE_3_ARGS
442	ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445	PROLOGUE_3_ARGS
446	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447	%1 dword [A0], A1_32
448	IEM_SAVE_FLAGS A2, %3, %4
449	EPILOGUE_3_ARGS
450	ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452	%ifdef RT_ARCH_AMD64
453	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454	PROLOGUE_3_ARGS
455	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456	%1 qword [A0], A1
457	IEM_SAVE_FLAGS A2, %3, %4
458	EPILOGUE_3_ARGS_EX 8
459	ENDPROC iemAImpl_ %+ %1 %+ _u64
460	%else ; stub it for now - later, replace with hand coded stuff.
461	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462	int3
463	ret 8
464	ENDPROC iemAImpl_ %+ %1 %+ _u64
465	%endif ; !RT_ARCH_AMD64
466
467	%if %2 != 0 ; locked versions requested?
468
469	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470	PROLOGUE_3_ARGS
471	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472	lock %1 word [A0], A1_16
473	IEM_SAVE_FLAGS A2, %3, %4
474	EPILOGUE_3_ARGS
475	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478	PROLOGUE_3_ARGS
479	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480	lock %1 dword [A0], A1_32
481	IEM_SAVE_FLAGS A2, %3, %4
482	EPILOGUE_3_ARGS
483	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485	%ifdef RT_ARCH_AMD64
486	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487	PROLOGUE_3_ARGS
488	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489	lock %1 qword [A0], A1
490	IEM_SAVE_FLAGS A2, %3, %4
491	EPILOGUE_3_ARGS_EX 8
492	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493	%else ; stub it for now - later, replace with hand coded stuff.
494	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495	int3
496	ret 8
497	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498	%endif ; !RT_ARCH_AMD64
499	%endif ; locked
500	%endmacro
501	IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
502	IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
503	IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
504	IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
505
506	;;
507	; Macro for implementing a bit search operator.
508	;
509	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510	; system where the 64-bit accesses requires hand coding.
511	;
512	; All the functions takes a pointer to the destination memory operand in A0,
513	; the source register operand in A1 and a pointer to eflags in A2.
514	;
515	; @param 1 The instruction mnemonic.
516	; @param 2 The modified flags.
517	; @param 3 The undefined flags.
518	;
519	%macro IEMIMPL_BIT_OP 3
520	BEGINCODE
521	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522	PROLOGUE_3_ARGS
523	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524	%1 T0_16, A1_16
525	jz .unchanged_dst
526	mov [A0], T0_16
527	.unchanged_dst:
528	IEM_SAVE_FLAGS A2, %2, %3
529	EPILOGUE_3_ARGS
530	ENDPROC iemAImpl_ %+ %1 %+ _u16
531
532	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
533	PROLOGUE_3_ARGS
534	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
535	%1 T0_32, A1_32
536	jz .unchanged_dst
537	mov [A0], T0_32
538	.unchanged_dst:
539	IEM_SAVE_FLAGS A2, %2, %3
540	EPILOGUE_3_ARGS
541	ENDPROC iemAImpl_ %+ %1 %+ _u32
542
543	%ifdef RT_ARCH_AMD64
544	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
545	PROLOGUE_3_ARGS
546	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
547	%1 T0, A1
548	jz .unchanged_dst
549	mov [A0], T0
550	.unchanged_dst:
551	IEM_SAVE_FLAGS A2, %2, %3
552	EPILOGUE_3_ARGS_EX 8
553	ENDPROC iemAImpl_ %+ %1 %+ _u64
554	%else ; stub it for now - later, replace with hand coded stuff.
555	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
556	int3
557	ret 8
558	ENDPROC iemAImpl_ %+ %1 %+ _u64
559	%endif ; !RT_ARCH_AMD64
560	%endmacro
561	IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
562	IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
563
564
565	;
566	; IMUL is also a similar but yet different case (no lock, no mem dst).
567	; The rDX:rAX variant of imul is handled together with mul further down.
568	;
569	BEGINCODE
570	BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
571	PROLOGUE_3_ARGS
572	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
573	imul A1_16, word [A0]
574	mov [A0], A1_16
575	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
576	EPILOGUE_3_ARGS
577	ENDPROC iemAImpl_imul_two_u16
578
579	BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
580	PROLOGUE_3_ARGS
581	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
582	imul A1_32, dword [A0]
583	mov [A0], A1_32
584	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
585	EPILOGUE_3_ARGS
586	ENDPROC iemAImpl_imul_two_u32
587
588	BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
589	PROLOGUE_3_ARGS
590	%ifdef RT_ARCH_AMD64
591	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
592	imul A1, qword [A0]
593	mov [A0], A1
594	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
595	%else
596	int3 ;; @todo implement me
597	%endif
598	EPILOGUE_3_ARGS_EX 8
599	ENDPROC iemAImpl_imul_two_u64
600
601
602	;
603	; XCHG for memory operands. This implies locking. No flag changes.
604	;
605	; Each function takes two arguments, first the pointer to the memory,
606	; then the pointer to the register. They all return void.
607	;
608	BEGINCODE
609	BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
610	PROLOGUE_2_ARGS
611	mov T0_8, [A1]
612	xchg [A0], T0_8
613	mov [A1], T0_8
614	EPILOGUE_2_ARGS
615	ENDPROC iemAImpl_xchg_u8
616
617	BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
618	PROLOGUE_2_ARGS
619	mov T0_16, [A1]
620	xchg [A0], T0_16
621	mov [A1], T0_16
622	EPILOGUE_2_ARGS
623	ENDPROC iemAImpl_xchg_u16
624
625	BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
626	PROLOGUE_2_ARGS
627	mov T0_32, [A1]
628	xchg [A0], T0_32
629	mov [A1], T0_32
630	EPILOGUE_2_ARGS
631	ENDPROC iemAImpl_xchg_u32
632
633	BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
634	%ifdef RT_ARCH_AMD64
635	PROLOGUE_2_ARGS
636	mov T0, [A1]
637	xchg [A0], T0
638	mov [A1], T0
639	EPILOGUE_2_ARGS
640	%else
641	int3
642	ret 0
643	%endif
644	ENDPROC iemAImpl_xchg_u64
645
646
647	;
648	; XADD for memory operands.
649	;
650	; Each function takes three arguments, first the pointer to the
651	; memory/register, then the pointer to the register, and finally a pointer to
652	; eflags. They all return void.
653	;
654	BEGINCODE
655	BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
656	PROLOGUE_3_ARGS
657	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
658	mov T0_8, [A1]
659	xadd [A0], T0_8
660	mov [A1], T0_8
661	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
662	EPILOGUE_3_ARGS
663	ENDPROC iemAImpl_xadd_u8
664
665	BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
666	PROLOGUE_3_ARGS
667	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
668	mov T0_16, [A1]
669	xadd [A0], T0_16
670	mov [A1], T0_16
671	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
672	EPILOGUE_3_ARGS
673	ENDPROC iemAImpl_xadd_u16
674
675	BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
676	PROLOGUE_3_ARGS
677	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
678	mov T0_32, [A1]
679	xadd [A0], T0_32
680	mov [A1], T0_32
681	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
682	EPILOGUE_3_ARGS
683	ENDPROC iemAImpl_xadd_u32
684
685	BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
686	%ifdef RT_ARCH_AMD64
687	PROLOGUE_3_ARGS
688	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
689	mov T0, [A1]
690	xadd [A0], T0
691	mov [A1], T0
692	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
693	EPILOGUE_3_ARGS
694	%else
695	int3
696	ret 4
697	%endif
698	ENDPROC iemAImpl_xadd_u64
699
700	BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
701	PROLOGUE_3_ARGS
702	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
703	mov T0_8, [A1]
704	lock xadd [A0], T0_8
705	mov [A1], T0_8
706	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
707	EPILOGUE_3_ARGS
708	ENDPROC iemAImpl_xadd_u8_locked
709
710	BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
711	PROLOGUE_3_ARGS
712	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
713	mov T0_16, [A1]
714	lock xadd [A0], T0_16
715	mov [A1], T0_16
716	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
717	EPILOGUE_3_ARGS
718	ENDPROC iemAImpl_xadd_u16_locked
719
720	BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
721	PROLOGUE_3_ARGS
722	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
723	mov T0_32, [A1]
724	lock xadd [A0], T0_32
725	mov [A1], T0_32
726	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
727	EPILOGUE_3_ARGS
728	ENDPROC iemAImpl_xadd_u32_locked
729
730	BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
731	%ifdef RT_ARCH_AMD64
732	PROLOGUE_3_ARGS
733	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
734	mov T0, [A1]
735	lock xadd [A0], T0
736	mov [A1], T0
737	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
738	EPILOGUE_3_ARGS
739	%else
740	int3
741	ret 4
742	%endif
743	ENDPROC iemAImpl_xadd_u64_locked
744
745
746	;
747	; CMPXCHG8B.
748	;
749	; These are tricky register wise, so the code is duplicated for each calling
750	; convention.
751	;
752	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
753	;
754	; C-proto:
755	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
756	; uint32_t *pEFlags));
757	;
758	BEGINCODE
759	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
760	%ifdef RT_ARCH_AMD64
761	%ifdef ASM_CALL64_MSC
762	push rbx
763
764	mov r11, rdx ; pu64EaxEdx (is also T1)
765	mov r10, rcx ; pu64Dst
766
767	mov ebx, [r8]
768	mov ecx, [r8 + 4]
769	IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
770	mov eax, [r11]
771	mov edx, [r11 + 4]
772
773	lock cmpxchg8b [r10]
774
775	mov [r11], eax
776	mov [r11 + 4], edx
777	IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
778
779	pop rbx
780	ret
781	%else
782	push rbx
783
784	mov r10, rcx ; pEFlags
785	mov r11, rdx ; pu64EbxEcx (is also T1)
786
787	mov ebx, [r11]
788	mov ecx, [r11 + 4]
789	IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
790	mov eax, [rsi]
791	mov edx, [rsi + 4]
792
793	lock cmpxchg8b [rdi]
794
795	mov [rsi], eax
796	mov [rsi + 4], edx
797	IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
798
799	pop rbx
800	ret
801
802	%endif
803	%else
804	push esi
805	push edi
806	push ebx
807	push ebp
808
809	mov edi, ecx ; pu64Dst
810	mov esi, edx ; pu64EaxEdx
811	mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
812	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
813
814	mov ebx, [ecx]
815	mov ecx, [ecx + 4]
816	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
817	mov eax, [esi]
818	mov edx, [esi + 4]
819
820	lock cmpxchg8b [edi]
821
822	mov [esi], eax
823	mov [esi + 4], edx
824	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
825
826	pop ebp
827	pop ebx
828	pop edi
829	pop esi
830	ret 8
831	%endif
832	ENDPROC iemAImpl_cmpxchg8b
833
834	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
835	; Lazy bird always lock prefixes cmpxchg8b.
836	jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
837	ENDPROC iemAImpl_cmpxchg8b_locked
838
839
840
841	;
842	; CMPXCHG.
843	;
844	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
845	;
846	; C-proto:
847	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t puXDst, uintX_t puEax, uintX_t uReg, uint32_t pEFlags));
848	;
849	BEGINCODE
850	%macro IEMIMPL_CMPXCHG 2
851	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
852	PROLOGUE_4_ARGS
853	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
854	mov al, [A1]
855	%1 cmpxchg [A0], A2_8
856	mov [A1], al
857	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
858	EPILOGUE_4_ARGS
859	ENDPROC iemAImpl_cmpxchg_u8 %+ %2
860
861	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
862	PROLOGUE_4_ARGS
863	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
864	mov ax, [A1]
865	%1 cmpxchg [A0], A2_16
866	mov [A1], ax
867	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
868	EPILOGUE_4_ARGS
869	ENDPROC iemAImpl_cmpxchg_u16 %+ %2
870
871	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
872	PROLOGUE_4_ARGS
873	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
874	mov eax, [A1]
875	%1 cmpxchg [A0], A2_32
876	mov [A1], eax
877	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
878	EPILOGUE_4_ARGS
879	ENDPROC iemAImpl_cmpxchg_u32 %+ %2
880
881	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
882	%ifdef RT_ARCH_AMD64
883	PROLOGUE_4_ARGS
884	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
885	mov ax, [A1]
886	%1 cmpxchg [A0], A2
887	mov [A1], ax
888	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
889	EPILOGUE_4_ARGS
890	%else
891	;
892	; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
893	;
894	push esi
895	push edi
896	push ebx
897	push ebp
898
899	mov edi, ecx ; pu64Dst
900	mov esi, edx ; pu64Rax
901	mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
902	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
903
904	mov ebx, [ecx]
905	mov ecx, [ecx + 4]
906	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
907	mov eax, [esi]
908	mov edx, [esi + 4]
909
910	lock cmpxchg8b [edi]
911
912	; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
913	jz .cmpxchg8b_not_equal
914	cmp eax, eax ; just set the other flags.
915	.store:
916	mov [esi], eax
917	mov [esi + 4], edx
918	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
919
920	pop ebp
921	pop ebx
922	pop edi
923	pop esi
924	ret 8
925
926	.cmpxchg8b_not_equal:
927	cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
928	jne .store
929	cmp [esi], eax
930	jmp .store
931
932	%endif
933	ENDPROC iemAImpl_cmpxchg_u64 %+ %2
934	%endmacro ; IEMIMPL_CMPXCHG
935
936	IEMIMPL_CMPXCHG , ,
937	IEMIMPL_CMPXCHG lock, _locked
938
939	;;
940	; Macro for implementing a unary operator.
941	;
942	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
943	; variants, except on 32-bit system where the 64-bit accesses requires hand
944	; coding.
945	;
946	; All the functions takes a pointer to the destination memory operand in A0,
947	; the source register operand in A1 and a pointer to eflags in A2.
948	;
949	; @param 1 The instruction mnemonic.
950	; @param 2 The modified flags.
951	; @param 3 The undefined flags.
952	;
953	%macro IEMIMPL_UNARY_OP 3
954	BEGINCODE
955	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
956	PROLOGUE_2_ARGS
957	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
958	%1 byte [A0]
959	IEM_SAVE_FLAGS A1, %2, %3
960	EPILOGUE_2_ARGS
961	ENDPROC iemAImpl_ %+ %1 %+ _u8
962
963	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
964	PROLOGUE_2_ARGS
965	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
966	lock %1 byte [A0]
967	IEM_SAVE_FLAGS A1, %2, %3
968	EPILOGUE_2_ARGS
969	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
970
971	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
972	PROLOGUE_2_ARGS
973	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
974	%1 word [A0]
975	IEM_SAVE_FLAGS A1, %2, %3
976	EPILOGUE_2_ARGS
977	ENDPROC iemAImpl_ %+ %1 %+ _u16
978
979	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
980	PROLOGUE_2_ARGS
981	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
982	lock %1 word [A0]
983	IEM_SAVE_FLAGS A1, %2, %3
984	EPILOGUE_2_ARGS
985	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
986
987	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
988	PROLOGUE_2_ARGS
989	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
990	%1 dword [A0]
991	IEM_SAVE_FLAGS A1, %2, %3
992	EPILOGUE_2_ARGS
993	ENDPROC iemAImpl_ %+ %1 %+ _u32
994
995	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
996	PROLOGUE_2_ARGS
997	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
998	lock %1 dword [A0]
999	IEM_SAVE_FLAGS A1, %2, %3
1000	EPILOGUE_2_ARGS
1001	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1002
1003	%ifdef RT_ARCH_AMD64
1004	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1005	PROLOGUE_2_ARGS
1006	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1007	%1 qword [A0]
1008	IEM_SAVE_FLAGS A1, %2, %3
1009	EPILOGUE_2_ARGS
1010	ENDPROC iemAImpl_ %+ %1 %+ _u64
1011
1012	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1013	PROLOGUE_2_ARGS
1014	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1015	lock %1 qword [A0]
1016	IEM_SAVE_FLAGS A1, %2, %3
1017	EPILOGUE_2_ARGS
1018	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1019	%else
1020	; stub them for now.
1021	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1022	int3
1023	ret 0
1024	ENDPROC iemAImpl_ %+ %1 %+ _u64
1025	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1026	int3
1027	ret 0
1028	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1029	%endif
1030
1031	%endmacro
1032
1033	IEMIMPL_UNARY_OP inc, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
1034	IEMIMPL_UNARY_OP dec, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
1035	IEMIMPL_UNARY_OP neg, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1036	IEMIMPL_UNARY_OP not, 0, 0
1037
1038
1039
1040	;;
1041	; Macro for implementing a shift operation.
1042	;
1043	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1044	; 32-bit system where the 64-bit accesses requires hand coding.
1045	;
1046	; All the functions takes a pointer to the destination memory operand in A0,
1047	; the shift count in A1 and a pointer to eflags in A2.
1048	;
1049	; @param 1 The instruction mnemonic.
1050	; @param 2 The modified flags.
1051	; @param 3 The undefined flags.
1052	;
1053	; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1054	;
1055	%macro IEMIMPL_SHIFT_OP 3
1056	BEGINCODE
1057	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1058	PROLOGUE_3_ARGS
1059	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1060	%ifdef ASM_CALL64_GCC
1061	mov cl, A1_8
1062	%1 byte [A0], cl
1063	%else
1064	xchg A1, A0
1065	%1 byte [A1], cl
1066	%endif
1067	IEM_SAVE_FLAGS A2, %2, %3
1068	EPILOGUE_3_ARGS
1069	ENDPROC iemAImpl_ %+ %1 %+ _u8
1070
1071	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1072	PROLOGUE_3_ARGS
1073	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1074	%ifdef ASM_CALL64_GCC
1075	mov cl, A1_8
1076	%1 word [A0], cl
1077	%else
1078	xchg A1, A0
1079	%1 word [A1], cl
1080	%endif
1081	IEM_SAVE_FLAGS A2, %2, %3
1082	EPILOGUE_3_ARGS
1083	ENDPROC iemAImpl_ %+ %1 %+ _u16
1084
1085	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1086	PROLOGUE_3_ARGS
1087	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1088	%ifdef ASM_CALL64_GCC
1089	mov cl, A1_8
1090	%1 dword [A0], cl
1091	%else
1092	xchg A1, A0
1093	%1 dword [A1], cl
1094	%endif
1095	IEM_SAVE_FLAGS A2, %2, %3
1096	EPILOGUE_3_ARGS
1097	ENDPROC iemAImpl_ %+ %1 %+ _u32
1098
1099	%ifdef RT_ARCH_AMD64
1100	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1101	PROLOGUE_3_ARGS
1102	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1103	%ifdef ASM_CALL64_GCC
1104	mov cl, A1_8
1105	%1 qword [A0], cl
1106	%else
1107	xchg A1, A0
1108	%1 qword [A1], cl
1109	%endif
1110	IEM_SAVE_FLAGS A2, %2, %3
1111	EPILOGUE_3_ARGS
1112	ENDPROC iemAImpl_ %+ %1 %+ _u64
1113	%else ; stub it for now - later, replace with hand coded stuff.
1114	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1115	int3
1116	ret 4
1117	ENDPROC iemAImpl_ %+ %1 %+ _u64
1118	%endif ; !RT_ARCH_AMD64
1119
1120	%endmacro
1121
1122	IEMIMPL_SHIFT_OP rol, (X86_EFL_OF \| X86_EFL_CF), 0
1123	IEMIMPL_SHIFT_OP ror, (X86_EFL_OF \| X86_EFL_CF), 0
1124	IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF \| X86_EFL_CF), 0
1125	IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF \| X86_EFL_CF), 0
1126	IEMIMPL_SHIFT_OP shl, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1127	IEMIMPL_SHIFT_OP shr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1128	IEMIMPL_SHIFT_OP sar, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1129
1130
1131	;;
1132	; Macro for implementing a double precision shift operation.
1133	;
1134	; This will generate code for the 16, 32 and 64 bit accesses, except on
1135	; 32-bit system where the 64-bit accesses requires hand coding.
1136	;
1137	; The functions takes the destination operand (r/m) in A0, the source (reg) in
1138	; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1139	;
1140	; @param 1 The instruction mnemonic.
1141	; @param 2 The modified flags.
1142	; @param 3 The undefined flags.
1143	;
1144	; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1145	;
1146	%macro IEMIMPL_SHIFT_DBL_OP 3
1147	BEGINCODE
1148	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1149	PROLOGUE_4_ARGS
1150	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1151	%ifdef ASM_CALL64_GCC
1152	xchg A3, A2
1153	%1 [A0], A1_16, cl
1154	xchg A3, A2
1155	%else
1156	xchg A0, A2
1157	%1 [A2], A1_16, cl
1158	%endif
1159	IEM_SAVE_FLAGS A3, %2, %3
1160	EPILOGUE_4_ARGS
1161	ENDPROC iemAImpl_ %+ %1 %+ _u16
1162
1163	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1164	PROLOGUE_4_ARGS
1165	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1166	%ifdef ASM_CALL64_GCC
1167	xchg A3, A2
1168	%1 [A0], A1_32, cl
1169	xchg A3, A2
1170	%else
1171	xchg A0, A2
1172	%1 [A2], A1_32, cl
1173	%endif
1174	IEM_SAVE_FLAGS A3, %2, %3
1175	EPILOGUE_4_ARGS
1176	ENDPROC iemAImpl_ %+ %1 %+ _u32
1177
1178	%ifdef RT_ARCH_AMD64
1179	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1180	PROLOGUE_4_ARGS
1181	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1182	%ifdef ASM_CALL64_GCC
1183	xchg A3, A2
1184	%1 [A0], A1, cl
1185	xchg A3, A2
1186	%else
1187	xchg A0, A2
1188	%1 [A2], A1, cl
1189	%endif
1190	IEM_SAVE_FLAGS A3, %2, %3
1191	EPILOGUE_4_ARGS_EX 12
1192	ENDPROC iemAImpl_ %+ %1 %+ _u64
1193	%else ; stub it for now - later, replace with hand coded stuff.
1194	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1195	int3
1196	ret 12
1197	ENDPROC iemAImpl_ %+ %1 %+ _u64
1198	%endif ; !RT_ARCH_AMD64
1199
1200	%endmacro
1201
1202	IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1203	IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1204
1205
1206	;;
1207	; Macro for implementing a multiplication operations.
1208	;
1209	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1210	; 32-bit system where the 64-bit accesses requires hand coding.
1211	;
1212	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1213	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1214	; pointer to eflags in A3.
1215	;
1216	; The functions all return 0 so the caller can be used for div/idiv as well as
1217	; for the mul/imul implementation.
1218	;
1219	; @param 1 The instruction mnemonic.
1220	; @param 2 The modified flags.
1221	; @param 3 The undefined flags.
1222	;
1223	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1224	;
1225	%macro IEMIMPL_MUL_OP 3
1226	BEGINCODE
1227	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1228	PROLOGUE_3_ARGS
1229	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1230	mov al, [A0]
1231	%1 A1_8
1232	mov [A0], ax
1233	IEM_SAVE_FLAGS A2, %2, %3
1234	xor eax, eax
1235	EPILOGUE_3_ARGS
1236	ENDPROC iemAImpl_ %+ %1 %+ _u8
1237
1238	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1239	PROLOGUE_4_ARGS
1240	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1241	mov ax, [A0]
1242	%ifdef ASM_CALL64_GCC
1243	%1 A2_16
1244	mov [A0], ax
1245	mov [A1], dx
1246	%else
1247	mov T1, A1
1248	%1 A2_16
1249	mov [A0], ax
1250	mov [T1], dx
1251	%endif
1252	IEM_SAVE_FLAGS A3, %2, %3
1253	xor eax, eax
1254	EPILOGUE_4_ARGS
1255	ENDPROC iemAImpl_ %+ %1 %+ _u16
1256
1257	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1258	PROLOGUE_4_ARGS
1259	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1260	mov eax, [A0]
1261	%ifdef ASM_CALL64_GCC
1262	%1 A2_32
1263	mov [A0], eax
1264	mov [A1], edx
1265	%else
1266	mov T1, A1
1267	%1 A2_32
1268	mov [A0], eax
1269	mov [T1], edx
1270	%endif
1271	IEM_SAVE_FLAGS A3, %2, %3
1272	xor eax, eax
1273	EPILOGUE_4_ARGS
1274	ENDPROC iemAImpl_ %+ %1 %+ _u32
1275
1276	%ifdef RT_ARCH_AMD64
1277	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1278	PROLOGUE_4_ARGS
1279	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1280	mov rax, [A0]
1281	%ifdef ASM_CALL64_GCC
1282	%1 A2
1283	mov [A0], rax
1284	mov [A1], rdx
1285	%else
1286	mov T1, A1
1287	%1 A2
1288	mov [A0], rax
1289	mov [T1], rdx
1290	%endif
1291	IEM_SAVE_FLAGS A3, %2, %3
1292	xor eax, eax
1293	EPILOGUE_4_ARGS_EX 12
1294	ENDPROC iemAImpl_ %+ %1 %+ _u64
1295	%else ; stub it for now - later, replace with hand coded stuff.
1296	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1297	int3
1298	ret 12
1299	ENDPROC iemAImpl_ %+ %1 %+ _u64
1300	%endif ; !RT_ARCH_AMD64
1301
1302	%endmacro
1303
1304	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1305	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1306
1307
1308	;;
1309	; Macro for implementing a division operations.
1310	;
1311	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1312	; 32-bit system where the 64-bit accesses requires hand coding.
1313	;
1314	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1315	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1316	; pointer to eflags in A3.
1317	;
1318	; The functions all return 0 on success and -1 if a divide error should be
1319	; raised by the caller.
1320	;
1321	; @param 1 The instruction mnemonic.
1322	; @param 2 The modified flags.
1323	; @param 3 The undefined flags.
1324	; @param 4 1 if signed, 0 if unsigned.
1325	;
1326	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1327	;
1328	%macro IEMIMPL_DIV_OP 4
1329	BEGINCODE
1330	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1331	PROLOGUE_3_ARGS
1332
1333	; div by chainsaw check.
1334	test A1_8, A1_8
1335	jz .div_zero
1336
1337	; Overflow check - unsigned division is simple to verify, haven't
1338	; found a simple way to check signed division yet unfortunately.
1339	%if %4 == 0
1340	cmp [A0 + 1], A1_8
1341	jae .div_overflow
1342	%else
1343	mov T0_16, [A0] ; T0 = dividend
1344	mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1345	test A1_8, A1_8
1346	js .divisor_negative
1347	test T0_16, T0_16
1348	jns .both_positive
1349	neg T0_16
1350	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1351	push T0 ; Start off like unsigned below.
1352	shr T0_16, 7
1353	cmp T0_8, A1_8
1354	pop T0
1355	jb .div_no_overflow
1356	ja .div_overflow
1357	and T0_8, 0x7f ; Special case for covering (divisor - 1).
1358	cmp T0_8, A1_8
1359	jae .div_overflow
1360	jmp .div_no_overflow
1361
1362	.divisor_negative:
1363	neg A1_8
1364	test T0_16, T0_16
1365	jns .one_of_each
1366	neg T0_16
1367	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1368	shr T0_16, 7
1369	cmp T0_8, A1_8
1370	jae .div_overflow
1371	.div_no_overflow:
1372	mov A1, T1 ; restore divisor
1373	%endif
1374
1375	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1376	mov ax, [A0]
1377	%1 A1_8
1378	mov [A0], ax
1379	IEM_SAVE_FLAGS A2, %2, %3
1380	xor eax, eax
1381
1382	.return:
1383	EPILOGUE_3_ARGS
1384
1385	.div_zero:
1386	.div_overflow:
1387	mov eax, -1
1388	jmp .return
1389	ENDPROC iemAImpl_ %+ %1 %+ _u8
1390
1391	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1392	PROLOGUE_4_ARGS
1393
1394	; div by chainsaw check.
1395	test A2_16, A2_16
1396	jz .div_zero
1397
1398	; Overflow check - unsigned division is simple to verify, haven't
1399	; found a simple way to check signed division yet unfortunately.
1400	%if %4 == 0
1401	cmp [A1], A2_16
1402	jae .div_overflow
1403	%else
1404	mov T0_16, [A1]
1405	shl T0_32, 16
1406	mov T0_16, [A0] ; T0 = dividend
1407	mov T1, A2 ; T1 = divisor
1408	test T1_16, T1_16
1409	js .divisor_negative
1410	test T0_32, T0_32
1411	jns .both_positive
1412	neg T0_32
1413	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1414	push T0 ; Start off like unsigned below.
1415	shr T0_32, 15
1416	cmp T0_16, T1_16
1417	pop T0
1418	jb .div_no_overflow
1419	ja .div_overflow
1420	and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1421	cmp T0_16, T1_16
1422	jae .div_overflow
1423	jmp .div_no_overflow
1424
1425	.divisor_negative:
1426	neg T1_16
1427	test T0_32, T0_32
1428	jns .one_of_each
1429	neg T0_32
1430	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1431	shr T0_32, 15
1432	cmp T0_16, T1_16
1433	jae .div_overflow
1434	.div_no_overflow:
1435	%endif
1436
1437	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1438	%ifdef ASM_CALL64_GCC
1439	mov T1, A2
1440	mov ax, [A0]
1441	mov dx, [A1]
1442	%1 T1_16
1443	mov [A0], ax
1444	mov [A1], dx
1445	%else
1446	mov T1, A1
1447	mov ax, [A0]
1448	mov dx, [T1]
1449	%1 A2_16
1450	mov [A0], ax
1451	mov [T1], dx
1452	%endif
1453	IEM_SAVE_FLAGS A3, %2, %3
1454	xor eax, eax
1455
1456	.return:
1457	EPILOGUE_4_ARGS
1458
1459	.div_zero:
1460	.div_overflow:
1461	mov eax, -1
1462	jmp .return
1463	ENDPROC iemAImpl_ %+ %1 %+ _u16
1464
1465	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1466	PROLOGUE_4_ARGS
1467
1468	; div by chainsaw check.
1469	test A2_32, A2_32
1470	jz .div_zero
1471
1472	; Overflow check - unsigned division is simple to verify, haven't
1473	; found a simple way to check signed division yet unfortunately.
1474	%if %4 == 0
1475	cmp [A1], A2_32
1476	jae .div_overflow
1477	%else
1478	push A2 ; save A2 so we modify it (we out of regs on x86).
1479	mov T0_32, [A0] ; T0 = dividend low
1480	mov T1_32, [A1] ; T1 = dividend high
1481	test A2_32, A2_32
1482	js .divisor_negative
1483	test T1_32, T1_32
1484	jns .both_positive
1485	neg T0_32
1486	neg T1_32
1487	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1488	push T0 ; Start off like unsigned below.
1489	shl T1_32, 1
1490	shr T0_32, 31
1491	or T1_32, T0_32
1492	cmp T1_32, A2_32
1493	pop T0
1494	jb .div_no_overflow
1495	ja .div_overflow
1496	and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1497	cmp T0_32, A2_32
1498	jae .div_overflow
1499	jmp .div_no_overflow
1500
1501	.divisor_negative:
1502	neg A2_32
1503	test T1_32, T1_32
1504	jns .one_of_each
1505	neg T0_32
1506	neg T1_32
1507	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1508	shl T1_32, 1
1509	shr T0_32, 31
1510	or T1_32, T0_32
1511	cmp T1_32, A2_32
1512	jae .div_overflow
1513	.div_no_overflow:
1514	pop A2
1515	%endif
1516
1517	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1518	mov eax, [A0]
1519	%ifdef ASM_CALL64_GCC
1520	mov T1, A2
1521	mov eax, [A0]
1522	mov edx, [A1]
1523	%1 T1_32
1524	mov [A0], eax
1525	mov [A1], edx
1526	%else
1527	mov T1, A1
1528	mov eax, [A0]
1529	mov edx, [T1]
1530	%1 A2_32
1531	mov [A0], eax
1532	mov [T1], edx
1533	%endif
1534	IEM_SAVE_FLAGS A3, %2, %3
1535	xor eax, eax
1536
1537	.return:
1538	EPILOGUE_4_ARGS
1539
1540	.div_overflow:
1541	%if %4 != 0
1542	pop A2
1543	%endif
1544	.div_zero:
1545	mov eax, -1
1546	jmp .return
1547	ENDPROC iemAImpl_ %+ %1 %+ _u32
1548
1549	%ifdef RT_ARCH_AMD64
1550	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1551	PROLOGUE_4_ARGS
1552
1553	test A2, A2
1554	jz .div_zero
1555	%if %4 == 0
1556	cmp [A1], A2
1557	jae .div_overflow
1558	%else
1559	push A2 ; save A2 so we modify it (we out of regs on x86).
1560	mov T0, [A0] ; T0 = dividend low
1561	mov T1, [A1] ; T1 = dividend high
1562	test A2, A2
1563	js .divisor_negative
1564	test T1, T1
1565	jns .both_positive
1566	neg T0
1567	neg T1
1568	.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1569	push T0 ; Start off like unsigned below.
1570	shl T1, 1
1571	shr T0, 63
1572	or T1, T0
1573	cmp T1, A2
1574	pop T0
1575	jb .div_no_overflow
1576	ja .div_overflow
1577	mov T1, 0x7fffffffffffffff
1578	and T0, T1 ; Special case for covering (divisor - 1).
1579	cmp T0, A2
1580	jae .div_overflow
1581	jmp .div_no_overflow
1582
1583	.divisor_negative:
1584	neg A2
1585	test T1, T1
1586	jns .one_of_each
1587	neg T0
1588	neg T1
1589	.both_positive: ; Same as unsigned shifted by sign indicator bit.
1590	shl T1, 1
1591	shr T0, 63
1592	or T1, T0
1593	cmp T1, A2
1594	jae .div_overflow
1595	.div_no_overflow:
1596	pop A2
1597	%endif
1598
1599	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1600	mov rax, [A0]
1601	%ifdef ASM_CALL64_GCC
1602	mov T1, A2
1603	mov rax, [A0]
1604	mov rdx, [A1]
1605	%1 T1
1606	mov [A0], rax
1607	mov [A1], rdx
1608	%else
1609	mov T1, A1
1610	mov rax, [A0]
1611	mov rdx, [T1]
1612	%1 A2
1613	mov [A0], rax
1614	mov [T1], rdx
1615	%endif
1616	IEM_SAVE_FLAGS A3, %2, %3
1617	xor eax, eax
1618
1619	.return:
1620	EPILOGUE_4_ARGS_EX 12
1621
1622	.div_overflow:
1623	%if %4 != 0
1624	pop A2
1625	%endif
1626	.div_zero:
1627	mov eax, -1
1628	jmp .return
1629	ENDPROC iemAImpl_ %+ %1 %+ _u64
1630	%else ; stub it for now - later, replace with hand coded stuff.
1631	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1632	int3
1633	ret
1634	ENDPROC iemAImpl_ %+ %1 %+ _u64
1635	%endif ; !RT_ARCH_AMD64
1636
1637	%endmacro
1638
1639	IEMIMPL_DIV_OP div, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1640	IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 1
1641
1642
1643	;
1644	; BSWAP. No flag changes.
1645	;
1646	; Each function takes one argument, pointer to the value to bswap
1647	; (input/output). They all return void.
1648	;
1649	BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1650	PROLOGUE_1_ARGS
1651	mov T0_32, [A0] ; just in case any of the upper bits are used.
1652	db 66h
1653	bswap T0_32
1654	mov [A0], T0_32
1655	EPILOGUE_1_ARGS
1656	ENDPROC iemAImpl_bswap_u16
1657
1658	BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1659	PROLOGUE_1_ARGS
1660	mov T0_32, [A0]
1661	bswap T0_32
1662	mov [A0], T0_32
1663	EPILOGUE_1_ARGS
1664	ENDPROC iemAImpl_bswap_u32
1665
1666	BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1667	%ifdef RT_ARCH_AMD64
1668	PROLOGUE_1_ARGS
1669	mov T0, [A0]
1670	bswap T0
1671	mov [A0], T0
1672	EPILOGUE_1_ARGS
1673	%else
1674	PROLOGUE_1_ARGS
1675	mov T0, [A0]
1676	mov T1, [A0 + 4]
1677	bswap T0
1678	bswap T1
1679	mov [A0 + 4], T0
1680	mov [A0], T1
1681	EPILOGUE_1_ARGS
1682	%endif
1683	ENDPROC iemAImpl_bswap_u64
1684
1685
1686	;;
1687	; Initialize the FPU for the actual instruction being emulated, this means
1688	; loading parts of the guest's control word and status word.
1689	;
1690	; @uses 24 bytes of stack.
1691	; @param 1 Expression giving the address of the FXSTATE of the guest.
1692	;
1693	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1694	fnstenv [xSP]
1695
1696	; FCW - for exception, precision and rounding control.
1697	movzx T0, word [%1 + X86FXSTATE.FCW]
1698	and T0, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
1699	mov [xSP + X86FSTENV32P.FCW], T0_16
1700
1701	; FSW - for undefined C0, C1, C2, and C3.
1702	movzx T1, word [%1 + X86FXSTATE.FSW]
1703	and T1, X86_FSW_C_MASK
1704	movzx T0, word [xSP + X86FSTENV32P.FSW]
1705	and T0, X86_FSW_TOP_MASK
1706	or T0, T1
1707	mov [xSP + X86FSTENV32P.FSW], T0_16
1708
1709	fldenv [xSP]
1710	%endmacro
1711
1712
1713	;;
1714	; Need to move this as well somewhere better?
1715	;
1716	struc IEMFPURESULT
1717	.r80Result resw 5
1718	.FSW resw 1
1719	endstruc
1720
1721
1722	;;
1723	; Need to move this as well somewhere better?
1724	;
1725	struc IEMFPURESULTTWO
1726	.r80Result1 resw 5
1727	.FSW resw 1
1728	.r80Result2 resw 5
1729	endstruc
1730
1731
1732	;
1733	;---------------------- 16-bit signed integer operations ----------------------
1734	;
1735
1736
1737	;;
1738	; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1739	;
1740	; @param A0 FPU context (fxsave).
1741	; @param A1 Pointer to a IEMFPURESULT for the output.
1742	; @param A2 Pointer to the 16-bit floating point value to convert.
1743	;
1744	BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1745	PROLOGUE_3_ARGS
1746	sub xSP, 20h
1747
1748	fninit
1749	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1750	fild word [A2]
1751
1752	fnstsw word [A1 + IEMFPURESULT.FSW]
1753	fnclex
1754	fstp tword [A1 + IEMFPURESULT.r80Result]
1755
1756	fninit
1757	add xSP, 20h
1758	EPILOGUE_3_ARGS
1759	ENDPROC iemAImpl_fild_i16_to_r80
1760
1761
1762	;;
1763	; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1764	;
1765	; @param A0 FPU context (fxsave).
1766	; @param A1 Where to return the output FSW.
1767	; @param A2 Where to store the 16-bit signed integer value.
1768	; @param A3 Pointer to the 80-bit value.
1769	;
1770	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1771	PROLOGUE_4_ARGS
1772	sub xSP, 20h
1773
1774	fninit
1775	fld tword [A3]
1776	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1777	fistp word [A2]
1778
1779	fnstsw word [A1]
1780
1781	fninit
1782	add xSP, 20h
1783	EPILOGUE_4_ARGS
1784	ENDPROC iemAImpl_fist_r80_to_i16
1785
1786
1787	;;
1788	; Store a 80-bit floating point value (register) as a 16-bit signed integer
1789	; (memory) with truncation.
1790	;
1791	; @param A0 FPU context (fxsave).
1792	; @param A1 Where to return the output FSW.
1793	; @param A2 Where to store the 16-bit signed integer value.
1794	; @param A3 Pointer to the 80-bit value.
1795	;
1796	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1797	PROLOGUE_4_ARGS
1798	sub xSP, 20h
1799
1800	fninit
1801	fld tword [A3]
1802	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1803	fisttp dword [A2]
1804
1805	fnstsw word [A1]
1806
1807	fninit
1808	add xSP, 20h
1809	EPILOGUE_4_ARGS
1810	ENDPROC iemAImpl_fistt_r80_to_i16
1811
1812
1813	;;
1814	; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1815	;
1816	; @param 1 The instruction
1817	;
1818	; @param A0 FPU context (fxsave).
1819	; @param A1 Pointer to a IEMFPURESULT for the output.
1820	; @param A2 Pointer to the 80-bit value.
1821	; @param A3 Pointer to the 16-bit value.
1822	;
1823	%macro IEMIMPL_FPU_R80_BY_I16 1
1824	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1825	PROLOGUE_4_ARGS
1826	sub xSP, 20h
1827
1828	fninit
1829	fld tword [A2]
1830	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1831	%1 word [A3]
1832
1833	fnstsw word [A1 + IEMFPURESULT.FSW]
1834	fnclex
1835	fstp tword [A1 + IEMFPURESULT.r80Result]
1836
1837	fninit
1838	add xSP, 20h
1839	EPILOGUE_4_ARGS
1840	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1841	%endmacro
1842
1843	IEMIMPL_FPU_R80_BY_I16 fiadd
1844	IEMIMPL_FPU_R80_BY_I16 fimul
1845	IEMIMPL_FPU_R80_BY_I16 fisub
1846	IEMIMPL_FPU_R80_BY_I16 fisubr
1847	IEMIMPL_FPU_R80_BY_I16 fidiv
1848	IEMIMPL_FPU_R80_BY_I16 fidivr
1849
1850
1851	;;
1852	; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1853	; only returning FSW.
1854	;
1855	; @param 1 The instruction
1856	;
1857	; @param A0 FPU context (fxsave).
1858	; @param A1 Where to store the output FSW.
1859	; @param A2 Pointer to the 80-bit value.
1860	; @param A3 Pointer to the 64-bit value.
1861	;
1862	%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1863	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1864	PROLOGUE_4_ARGS
1865	sub xSP, 20h
1866
1867	fninit
1868	fld tword [A2]
1869	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1870	%1 word [A3]
1871
1872	fnstsw word [A1]
1873
1874	fninit
1875	add xSP, 20h
1876	EPILOGUE_4_ARGS
1877	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1878	%endmacro
1879
1880	IEMIMPL_FPU_R80_BY_I16_FSW ficom
1881
1882
1883
1884	;
1885	;---------------------- 32-bit signed integer operations ----------------------
1886	;
1887
1888
1889	;;
1890	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1891	;
1892	; @param A0 FPU context (fxsave).
1893	; @param A1 Pointer to a IEMFPURESULT for the output.
1894	; @param A2 Pointer to the 32-bit floating point value to convert.
1895	;
1896	BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1897	PROLOGUE_3_ARGS
1898	sub xSP, 20h
1899
1900	fninit
1901	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1902	fild dword [A2]
1903
1904	fnstsw word [A1 + IEMFPURESULT.FSW]
1905	fnclex
1906	fstp tword [A1 + IEMFPURESULT.r80Result]
1907
1908	fninit
1909	add xSP, 20h
1910	EPILOGUE_3_ARGS
1911	ENDPROC iemAImpl_fild_i32_to_r80
1912
1913
1914	;;
1915	; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1916	;
1917	; @param A0 FPU context (fxsave).
1918	; @param A1 Where to return the output FSW.
1919	; @param A2 Where to store the 32-bit signed integer value.
1920	; @param A3 Pointer to the 80-bit value.
1921	;
1922	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1923	PROLOGUE_4_ARGS
1924	sub xSP, 20h
1925
1926	fninit
1927	fld tword [A3]
1928	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1929	fistp dword [A2]
1930
1931	fnstsw word [A1]
1932
1933	fninit
1934	add xSP, 20h
1935	EPILOGUE_4_ARGS
1936	ENDPROC iemAImpl_fist_r80_to_i32
1937
1938
1939	;;
1940	; Store a 80-bit floating point value (register) as a 32-bit signed integer
1941	; (memory) with truncation.
1942	;
1943	; @param A0 FPU context (fxsave).
1944	; @param A1 Where to return the output FSW.
1945	; @param A2 Where to store the 32-bit signed integer value.
1946	; @param A3 Pointer to the 80-bit value.
1947	;
1948	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1949	PROLOGUE_4_ARGS
1950	sub xSP, 20h
1951
1952	fninit
1953	fld tword [A3]
1954	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1955	fisttp dword [A2]
1956
1957	fnstsw word [A1]
1958
1959	fninit
1960	add xSP, 20h
1961	EPILOGUE_4_ARGS
1962	ENDPROC iemAImpl_fistt_r80_to_i32
1963
1964
1965	;;
1966	; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1967	;
1968	; @param 1 The instruction
1969	;
1970	; @param A0 FPU context (fxsave).
1971	; @param A1 Pointer to a IEMFPURESULT for the output.
1972	; @param A2 Pointer to the 80-bit value.
1973	; @param A3 Pointer to the 32-bit value.
1974	;
1975	%macro IEMIMPL_FPU_R80_BY_I32 1
1976	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1977	PROLOGUE_4_ARGS
1978	sub xSP, 20h
1979
1980	fninit
1981	fld tword [A2]
1982	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1983	%1 dword [A3]
1984
1985	fnstsw word [A1 + IEMFPURESULT.FSW]
1986	fnclex
1987	fstp tword [A1 + IEMFPURESULT.r80Result]
1988
1989	fninit
1990	add xSP, 20h
1991	EPILOGUE_4_ARGS
1992	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1993	%endmacro
1994
1995	IEMIMPL_FPU_R80_BY_I32 fiadd
1996	IEMIMPL_FPU_R80_BY_I32 fimul
1997	IEMIMPL_FPU_R80_BY_I32 fisub
1998	IEMIMPL_FPU_R80_BY_I32 fisubr
1999	IEMIMPL_FPU_R80_BY_I32 fidiv
2000	IEMIMPL_FPU_R80_BY_I32 fidivr
2001
2002
2003	;;
2004	; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2005	; only returning FSW.
2006	;
2007	; @param 1 The instruction
2008	;
2009	; @param A0 FPU context (fxsave).
2010	; @param A1 Where to store the output FSW.
2011	; @param A2 Pointer to the 80-bit value.
2012	; @param A3 Pointer to the 64-bit value.
2013	;
2014	%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2015	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2016	PROLOGUE_4_ARGS
2017	sub xSP, 20h
2018
2019	fninit
2020	fld tword [A2]
2021	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2022	%1 dword [A3]
2023
2024	fnstsw word [A1]
2025
2026	fninit
2027	add xSP, 20h
2028	EPILOGUE_4_ARGS
2029	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2030	%endmacro
2031
2032	IEMIMPL_FPU_R80_BY_I32_FSW ficom
2033
2034
2035
2036	;
2037	;---------------------- 64-bit signed integer operations ----------------------
2038	;
2039
2040
2041	;;
2042	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2043	;
2044	; @param A0 FPU context (fxsave).
2045	; @param A1 Pointer to a IEMFPURESULT for the output.
2046	; @param A2 Pointer to the 64-bit floating point value to convert.
2047	;
2048	BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2049	PROLOGUE_3_ARGS
2050	sub xSP, 20h
2051
2052	fninit
2053	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2054	fild qword [A2]
2055
2056	fnstsw word [A1 + IEMFPURESULT.FSW]
2057	fnclex
2058	fstp tword [A1 + IEMFPURESULT.r80Result]
2059
2060	fninit
2061	add xSP, 20h
2062	EPILOGUE_3_ARGS
2063	ENDPROC iemAImpl_fild_i64_to_r80
2064
2065
2066	;;
2067	; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2068	;
2069	; @param A0 FPU context (fxsave).
2070	; @param A1 Where to return the output FSW.
2071	; @param A2 Where to store the 64-bit signed integer value.
2072	; @param A3 Pointer to the 80-bit value.
2073	;
2074	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2075	PROLOGUE_4_ARGS
2076	sub xSP, 20h
2077
2078	fninit
2079	fld tword [A3]
2080	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2081	fistp qword [A2]
2082
2083	fnstsw word [A1]
2084
2085	fninit
2086	add xSP, 20h
2087	EPILOGUE_4_ARGS
2088	ENDPROC iemAImpl_fist_r80_to_i64
2089
2090
2091	;;
2092	; Store a 80-bit floating point value (register) as a 64-bit signed integer
2093	; (memory) with truncation.
2094	;
2095	; @param A0 FPU context (fxsave).
2096	; @param A1 Where to return the output FSW.
2097	; @param A2 Where to store the 64-bit signed integer value.
2098	; @param A3 Pointer to the 80-bit value.
2099	;
2100	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2101	PROLOGUE_4_ARGS
2102	sub xSP, 20h
2103
2104	fninit
2105	fld tword [A3]
2106	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2107	fisttp qword [A2]
2108
2109	fnstsw word [A1]
2110
2111	fninit
2112	add xSP, 20h
2113	EPILOGUE_4_ARGS
2114	ENDPROC iemAImpl_fistt_r80_to_i64
2115
2116
2117
2118	;
2119	;---------------------- 32-bit floating point operations ----------------------
2120	;
2121
2122	;;
2123	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2124	;
2125	; @param A0 FPU context (fxsave).
2126	; @param A1 Pointer to a IEMFPURESULT for the output.
2127	; @param A2 Pointer to the 32-bit floating point value to convert.
2128	;
2129	BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2130	PROLOGUE_3_ARGS
2131	sub xSP, 20h
2132
2133	fninit
2134	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2135	fld dword [A2]
2136
2137	fnstsw word [A1 + IEMFPURESULT.FSW]
2138	fnclex
2139	fstp tword [A1 + IEMFPURESULT.r80Result]
2140
2141	fninit
2142	add xSP, 20h
2143	EPILOGUE_3_ARGS
2144	ENDPROC iemAImpl_fld_r32_to_r80
2145
2146
2147	;;
2148	; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2149	;
2150	; @param A0 FPU context (fxsave).
2151	; @param A1 Where to return the output FSW.
2152	; @param A2 Where to store the 32-bit value.
2153	; @param A3 Pointer to the 80-bit value.
2154	;
2155	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2156	PROLOGUE_4_ARGS
2157	sub xSP, 20h
2158
2159	fninit
2160	fld tword [A3]
2161	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2162	fst dword [A2]
2163
2164	fnstsw word [A1]
2165
2166	fninit
2167	add xSP, 20h
2168	EPILOGUE_4_ARGS
2169	ENDPROC iemAImpl_fst_r80_to_r32
2170
2171
2172	;;
2173	; FPU instruction working on one 80-bit and one 32-bit floating point value.
2174	;
2175	; @param 1 The instruction
2176	;
2177	; @param A0 FPU context (fxsave).
2178	; @param A1 Pointer to a IEMFPURESULT for the output.
2179	; @param A2 Pointer to the 80-bit value.
2180	; @param A3 Pointer to the 32-bit value.
2181	;
2182	%macro IEMIMPL_FPU_R80_BY_R32 1
2183	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2184	PROLOGUE_4_ARGS
2185	sub xSP, 20h
2186
2187	fninit
2188	fld tword [A2]
2189	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2190	%1 dword [A3]
2191
2192	fnstsw word [A1 + IEMFPURESULT.FSW]
2193	fnclex
2194	fstp tword [A1 + IEMFPURESULT.r80Result]
2195
2196	fninit
2197	add xSP, 20h
2198	EPILOGUE_4_ARGS
2199	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2200	%endmacro
2201
2202	IEMIMPL_FPU_R80_BY_R32 fadd
2203	IEMIMPL_FPU_R80_BY_R32 fmul
2204	IEMIMPL_FPU_R80_BY_R32 fsub
2205	IEMIMPL_FPU_R80_BY_R32 fsubr
2206	IEMIMPL_FPU_R80_BY_R32 fdiv
2207	IEMIMPL_FPU_R80_BY_R32 fdivr
2208
2209
2210	;;
2211	; FPU instruction working on one 80-bit and one 32-bit floating point value,
2212	; only returning FSW.
2213	;
2214	; @param 1 The instruction
2215	;
2216	; @param A0 FPU context (fxsave).
2217	; @param A1 Where to store the output FSW.
2218	; @param A2 Pointer to the 80-bit value.
2219	; @param A3 Pointer to the 64-bit value.
2220	;
2221	%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2222	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2223	PROLOGUE_4_ARGS
2224	sub xSP, 20h
2225
2226	fninit
2227	fld tword [A2]
2228	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2229	%1 dword [A3]
2230
2231	fnstsw word [A1]
2232
2233	fninit
2234	add xSP, 20h
2235	EPILOGUE_4_ARGS
2236	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2237	%endmacro
2238
2239	IEMIMPL_FPU_R80_BY_R32_FSW fcom
2240
2241
2242
2243	;
2244	;---------------------- 64-bit floating point operations ----------------------
2245	;
2246
2247	;;
2248	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2249	;
2250	; @param A0 FPU context (fxsave).
2251	; @param A1 Pointer to a IEMFPURESULT for the output.
2252	; @param A2 Pointer to the 64-bit floating point value to convert.
2253	;
2254	BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2255	PROLOGUE_3_ARGS
2256	sub xSP, 20h
2257
2258	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2259	fld qword [A2]
2260
2261	fnstsw word [A1 + IEMFPURESULT.FSW]
2262	fnclex
2263	fstp tword [A1 + IEMFPURESULT.r80Result]
2264
2265	fninit
2266	add xSP, 20h
2267	EPILOGUE_3_ARGS
2268	ENDPROC iemAImpl_fld_r64_to_r80
2269
2270
2271	;;
2272	; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2273	;
2274	; @param A0 FPU context (fxsave).
2275	; @param A1 Where to return the output FSW.
2276	; @param A2 Where to store the 64-bit value.
2277	; @param A3 Pointer to the 80-bit value.
2278	;
2279	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2280	PROLOGUE_4_ARGS
2281	sub xSP, 20h
2282
2283	fninit
2284	fld tword [A3]
2285	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2286	fst qword [A2]
2287
2288	fnstsw word [A1]
2289
2290	fninit
2291	add xSP, 20h
2292	EPILOGUE_4_ARGS
2293	ENDPROC iemAImpl_fst_r80_to_r64
2294
2295
2296	;;
2297	; FPU instruction working on one 80-bit and one 64-bit floating point value.
2298	;
2299	; @param 1 The instruction
2300	;
2301	; @param A0 FPU context (fxsave).
2302	; @param A1 Pointer to a IEMFPURESULT for the output.
2303	; @param A2 Pointer to the 80-bit value.
2304	; @param A3 Pointer to the 64-bit value.
2305	;
2306	%macro IEMIMPL_FPU_R80_BY_R64 1
2307	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2308	PROLOGUE_4_ARGS
2309	sub xSP, 20h
2310
2311	fninit
2312	fld tword [A2]
2313	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2314	%1 qword [A3]
2315
2316	fnstsw word [A1 + IEMFPURESULT.FSW]
2317	fnclex
2318	fstp tword [A1 + IEMFPURESULT.r80Result]
2319
2320	fninit
2321	add xSP, 20h
2322	EPILOGUE_4_ARGS
2323	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2324	%endmacro
2325
2326	IEMIMPL_FPU_R80_BY_R64 fadd
2327	IEMIMPL_FPU_R80_BY_R64 fmul
2328	IEMIMPL_FPU_R80_BY_R64 fsub
2329	IEMIMPL_FPU_R80_BY_R64 fsubr
2330	IEMIMPL_FPU_R80_BY_R64 fdiv
2331	IEMIMPL_FPU_R80_BY_R64 fdivr
2332
2333	;;
2334	; FPU instruction working on one 80-bit and one 64-bit floating point value,
2335	; only returning FSW.
2336	;
2337	; @param 1 The instruction
2338	;
2339	; @param A0 FPU context (fxsave).
2340	; @param A1 Where to store the output FSW.
2341	; @param A2 Pointer to the 80-bit value.
2342	; @param A3 Pointer to the 64-bit value.
2343	;
2344	%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2345	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2346	PROLOGUE_4_ARGS
2347	sub xSP, 20h
2348
2349	fninit
2350	fld tword [A2]
2351	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2352	%1 qword [A3]
2353
2354	fnstsw word [A1]
2355
2356	fninit
2357	add xSP, 20h
2358	EPILOGUE_4_ARGS
2359	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2360	%endmacro
2361
2362	IEMIMPL_FPU_R80_BY_R64_FSW fcom
2363
2364
2365
2366	;
2367	;---------------------- 80-bit floating point operations ----------------------
2368	;
2369
2370	;;
2371	; Loads a 80-bit floating point register value from memory.
2372	;
2373	; @param A0 FPU context (fxsave).
2374	; @param A1 Pointer to a IEMFPURESULT for the output.
2375	; @param A2 Pointer to the 80-bit floating point value to load.
2376	;
2377	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2378	PROLOGUE_3_ARGS
2379	sub xSP, 20h
2380
2381	fninit
2382	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2383	fld tword [A2]
2384
2385	fnstsw word [A1 + IEMFPURESULT.FSW]
2386	fnclex
2387	fstp tword [A1 + IEMFPURESULT.r80Result]
2388
2389	fninit
2390	add xSP, 20h
2391	EPILOGUE_3_ARGS
2392	ENDPROC iemAImpl_fld_r80_from_r80
2393
2394
2395	;;
2396	; Store a 80-bit floating point register to memory
2397	;
2398	; @param A0 FPU context (fxsave).
2399	; @param A1 Where to return the output FSW.
2400	; @param A2 Where to store the 80-bit value.
2401	; @param A3 Pointer to the 80-bit register value.
2402	;
2403	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2404	PROLOGUE_4_ARGS
2405	sub xSP, 20h
2406
2407	fninit
2408	fld tword [A3]
2409	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2410	fstp tword [A2]
2411
2412	fnstsw word [A1]
2413
2414	fninit
2415	add xSP, 20h
2416	EPILOGUE_4_ARGS
2417	ENDPROC iemAImpl_fst_r80_to_r80
2418
2419
2420	;;
2421	; FPU instruction working on two 80-bit floating point values.
2422	;
2423	; @param 1 The instruction
2424	;
2425	; @param A0 FPU context (fxsave).
2426	; @param A1 Pointer to a IEMFPURESULT for the output.
2427	; @param A2 Pointer to the first 80-bit value (ST0)
2428	; @param A3 Pointer to the second 80-bit value (STn).
2429	;
2430	%macro IEMIMPL_FPU_R80_BY_R80 2
2431	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2432	PROLOGUE_4_ARGS
2433	sub xSP, 20h
2434
2435	fninit
2436	fld tword [A3]
2437	fld tword [A2]
2438	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2439	%1 %2
2440
2441	fnstsw word [A1 + IEMFPURESULT.FSW]
2442	fnclex
2443	fstp tword [A1 + IEMFPURESULT.r80Result]
2444
2445	fninit
2446	add xSP, 20h
2447	EPILOGUE_4_ARGS
2448	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2449	%endmacro
2450
2451	IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2452	IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2453	IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2454	IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2455	IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2456	IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2457	IEMIMPL_FPU_R80_BY_R80 fprem, {}
2458	IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2459	IEMIMPL_FPU_R80_BY_R80 fscale, {}
2460
2461
2462	;;
2463	; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2464	; storing the result in ST1 and popping the stack.
2465	;
2466	; @param 1 The instruction
2467	;
2468	; @param A0 FPU context (fxsave).
2469	; @param A1 Pointer to a IEMFPURESULT for the output.
2470	; @param A2 Pointer to the first 80-bit value (ST1).
2471	; @param A3 Pointer to the second 80-bit value (ST0).
2472	;
2473	%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2474	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2475	PROLOGUE_4_ARGS
2476	sub xSP, 20h
2477
2478	fninit
2479	fld tword [A2]
2480	fld tword [A3]
2481	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2482	%1
2483
2484	fnstsw word [A1 + IEMFPURESULT.FSW]
2485	fnclex
2486	fstp tword [A1 + IEMFPURESULT.r80Result]
2487
2488	fninit
2489	add xSP, 20h
2490	EPILOGUE_4_ARGS
2491	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2492	%endmacro
2493
2494	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2495	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2496
2497
2498	;;
2499	; FPU instruction working on two 80-bit floating point values, only
2500	; returning FSW.
2501	;
2502	; @param 1 The instruction
2503	;
2504	; @param A0 FPU context (fxsave).
2505	; @param A1 Pointer to a uint16_t for the resulting FSW.
2506	; @param A2 Pointer to the first 80-bit value.
2507	; @param A3 Pointer to the second 80-bit value.
2508	;
2509	%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2510	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2511	PROLOGUE_4_ARGS
2512	sub xSP, 20h
2513
2514	fninit
2515	fld tword [A3]
2516	fld tword [A2]
2517	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2518	%1 st0, st1
2519
2520	fnstsw word [A1]
2521
2522	fninit
2523	add xSP, 20h
2524	EPILOGUE_4_ARGS
2525	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2526	%endmacro
2527
2528	IEMIMPL_FPU_R80_BY_R80_FSW fcom
2529	IEMIMPL_FPU_R80_BY_R80_FSW fucom
2530
2531
2532	;;
2533	; FPU instruction working on two 80-bit floating point values,
2534	; returning FSW and EFLAGS (eax).
2535	;
2536	; @param 1 The instruction
2537	;
2538	; @returns EFLAGS in EAX.
2539	; @param A0 FPU context (fxsave).
2540	; @param A1 Pointer to a uint16_t for the resulting FSW.
2541	; @param A2 Pointer to the first 80-bit value.
2542	; @param A3 Pointer to the second 80-bit value.
2543	;
2544	%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2545	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2546	PROLOGUE_4_ARGS
2547	sub xSP, 20h
2548
2549	fninit
2550	fld tword [A3]
2551	fld tword [A2]
2552	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2553	%1 st1
2554
2555	fnstsw word [A1]
2556	pushf
2557	pop xAX
2558
2559	fninit
2560	add xSP, 20h
2561	EPILOGUE_4_ARGS
2562	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2563	%endmacro
2564
2565	IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2566	IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2567
2568
2569	;;
2570	; FPU instruction working on one 80-bit floating point value.
2571	;
2572	; @param 1 The instruction
2573	;
2574	; @param A0 FPU context (fxsave).
2575	; @param A1 Pointer to a IEMFPURESULT for the output.
2576	; @param A2 Pointer to the 80-bit value.
2577	;
2578	%macro IEMIMPL_FPU_R80 1
2579	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2580	PROLOGUE_3_ARGS
2581	sub xSP, 20h
2582
2583	fninit
2584	fld tword [A2]
2585	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2586	%1
2587
2588	fnstsw word [A1 + IEMFPURESULT.FSW]
2589	fnclex
2590	fstp tword [A1 + IEMFPURESULT.r80Result]
2591
2592	fninit
2593	add xSP, 20h
2594	EPILOGUE_3_ARGS
2595	ENDPROC iemAImpl_ %+ %1 %+ _r80
2596	%endmacro
2597
2598	IEMIMPL_FPU_R80 fchs
2599	IEMIMPL_FPU_R80 fabs
2600	IEMIMPL_FPU_R80 f2xm1
2601	IEMIMPL_FPU_R80 fyl2x
2602	IEMIMPL_FPU_R80 fsqrt
2603	IEMIMPL_FPU_R80 frndint
2604	IEMIMPL_FPU_R80 fsin
2605	IEMIMPL_FPU_R80 fcos
2606
2607
2608	;;
2609	; FPU instruction working on one 80-bit floating point value, only
2610	; returning FSW.
2611	;
2612	; @param 1 The instruction
2613	;
2614	; @param A0 FPU context (fxsave).
2615	; @param A1 Pointer to a uint16_t for the resulting FSW.
2616	; @param A2 Pointer to the 80-bit value.
2617	;
2618	%macro IEMIMPL_FPU_R80_FSW 1
2619	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2620	PROLOGUE_3_ARGS
2621	sub xSP, 20h
2622
2623	fninit
2624	fld tword [A2]
2625	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2626	%1
2627
2628	fnstsw word [A1]
2629
2630	fninit
2631	add xSP, 20h
2632	EPILOGUE_3_ARGS
2633	ENDPROC iemAImpl_ %+ %1 %+ _r80
2634	%endmacro
2635
2636	IEMIMPL_FPU_R80_FSW ftst
2637	IEMIMPL_FPU_R80_FSW fxam
2638
2639
2640
2641	;;
2642	; FPU instruction loading a 80-bit floating point constant.
2643	;
2644	; @param 1 The instruction
2645	;
2646	; @param A0 FPU context (fxsave).
2647	; @param A1 Pointer to a IEMFPURESULT for the output.
2648	;
2649	%macro IEMIMPL_FPU_R80_CONST 1
2650	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2651	PROLOGUE_2_ARGS
2652	sub xSP, 20h
2653
2654	fninit
2655	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2656	%1
2657
2658	fnstsw word [A1 + IEMFPURESULT.FSW]
2659	fnclex
2660	fstp tword [A1 + IEMFPURESULT.r80Result]
2661
2662	fninit
2663	add xSP, 20h
2664	EPILOGUE_2_ARGS
2665	ENDPROC iemAImpl_ %+ %1 %+
2666	%endmacro
2667
2668	IEMIMPL_FPU_R80_CONST fld1
2669	IEMIMPL_FPU_R80_CONST fldl2t
2670	IEMIMPL_FPU_R80_CONST fldl2e
2671	IEMIMPL_FPU_R80_CONST fldpi
2672	IEMIMPL_FPU_R80_CONST fldlg2
2673	IEMIMPL_FPU_R80_CONST fldln2
2674	IEMIMPL_FPU_R80_CONST fldz
2675
2676
2677	;;
2678	; FPU instruction working on one 80-bit floating point value, outputing two.
2679	;
2680	; @param 1 The instruction
2681	;
2682	; @param A0 FPU context (fxsave).
2683	; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2684	; @param A2 Pointer to the 80-bit value.
2685	;
2686	%macro IEMIMPL_FPU_R80_R80 1
2687	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2688	PROLOGUE_3_ARGS
2689	sub xSP, 20h
2690
2691	fninit
2692	fld tword [A2]
2693	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2694	%1
2695
2696	fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2697	fnclex
2698	fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2699	fnclex
2700	fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2701
2702	fninit
2703	add xSP, 20h
2704	EPILOGUE_3_ARGS
2705	ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2706	%endmacro
2707
2708	IEMIMPL_FPU_R80_R80 fptan
2709	IEMIMPL_FPU_R80_R80 fxtract
2710	IEMIMPL_FPU_R80_R80 fsincos
2711

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 47148

Download in other formats: