tstIEMAImpl.cpp@ 94540

Last change on this file since 94540 was 94540, checked in by vboxsync, 3 years ago
tstIEMAImpl: f2xm1 testing. Added enabling/disabling of individual tests. bugref:9898
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 188.2 KB

Line
1	/* $Id: tstIEMAImpl.cpp 94540 2022-04-10 14:20:28Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.215389.xyz. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/*********************************************************************************************************************************
20	* Header Files *
21	*********************************************************************************************************************************/
22	#include "../include/IEMInternal.h"
23
24	#include <iprt/errcore.h>
25	#include <VBox/log.h>
26	#include <iprt/assert.h>
27	#include <iprt/ctype.h>
28	#include <iprt/getopt.h>
29	#include <iprt/initterm.h>
30	#include <iprt/message.h>
31	#include <iprt/mp.h>
32	#include <iprt/rand.h>
33	#include <iprt/stream.h>
34	#include <iprt/string.h>
35	#include <iprt/test.h>
36
37	#include "tstIEMAImpl.h"
38
39
40	/*********************************************************************************************************************************
41	* Defined Constants And Macros *
42	*********************************************************************************************************************************/
43	#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44	#define ENTRY_EX(a_Name, a_uExtra) \
45	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46	g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49	#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50	#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52	g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55	#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56	#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58	g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62	typedef struct a_TypeName \
63	{ \
64	const char *pszName; \
65	a_FunctionPtrType pfn; \
66	a_FunctionPtrType pfnNative; \
67	a_TestType const *paTests; \
68	uint32_t const *pcTests; \
69	uint32_t uExtra; \
70	uint8_t idxCpuEflFlavour; \
71	} a_TypeName
72
73	#define COUNT_VARIATIONS(a_SubTest) \
74	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77	/*********************************************************************************************************************************
78	* Global Variables *
79	*********************************************************************************************************************************/
80	static RTTEST g_hTest;
81	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82	#ifdef TSTIEMAIMPL_WITH_GENERATOR
83	static uint32_t g_cZeroDstTests = 2;
84	static uint32_t g_cZeroSrcTests = 4;
85	#endif
86	static uint8_t g_pu8, g_pu8Two;
87	static uint16_t g_pu16, g_pu16Two;
88	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
89	static uint64_t g_pu64, g_pu64Two;
90	static RTUINT128U g_pu128, g_pu128Two;
91
92	static char g_aszBuf[16][256];
93	static unsigned g_idxBuf = 0;
94
95	static uint32_t g_cIncludeTestPatterns;
96	static uint32_t g_cExcludeTestPatterns;
97	static const char *g_apszIncludeTestPatterns[64];
98	static const char *g_apszExcludeTestPatterns[64];
99
100
101	/*********************************************************************************************************************************
102	* Internal Functions *
103	*********************************************************************************************************************************/
104	static const char *FormatR80(PCRTFLOAT80U pr80);
105	static const char *FormatR64(PCRTFLOAT64U pr64);
106	static const char *FormatR32(PCRTFLOAT32U pr32);
107
108
109	/*
110	* Random helpers.
111	*/
112
113	static uint32_t RandEFlags(void)
114	{
115	uint32_t fEfl = RTRandU32();
116	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
117	}
118
119	#ifdef TSTIEMAIMPL_WITH_GENERATOR
120
121	static uint8_t RandU8(void)
122	{
123	return RTRandU32Ex(0, 0xff);
124	}
125
126
127	static uint16_t RandU16(void)
128	{
129	return RTRandU32Ex(0, 0xffff);
130	}
131
132
133	static uint32_t RandU32(void)
134	{
135	return RTRandU32();
136	}
137
138	#endif
139
140	static uint64_t RandU64(void)
141	{
142	return RTRandU64();
143	}
144
145
146	static RTUINT128U RandU128(void)
147	{
148	RTUINT128U Ret;
149	Ret.s.Hi = RTRandU64();
150	Ret.s.Lo = RTRandU64();
151	return Ret;
152	}
153
154	#ifdef TSTIEMAIMPL_WITH_GENERATOR
155
156	static uint8_t RandU8Dst(uint32_t iTest)
157	{
158	if (iTest < g_cZeroDstTests)
159	return 0;
160	return RandU8();
161	}
162
163
164	static uint8_t RandU8Src(uint32_t iTest)
165	{
166	if (iTest < g_cZeroSrcTests)
167	return 0;
168	return RandU8();
169	}
170
171
172	static uint16_t RandU16Dst(uint32_t iTest)
173	{
174	if (iTest < g_cZeroDstTests)
175	return 0;
176	return RandU16();
177	}
178
179
180	static uint16_t RandU16Src(uint32_t iTest)
181	{
182	if (iTest < g_cZeroSrcTests)
183	return 0;
184	return RandU16();
185	}
186
187
188	static uint32_t RandU32Dst(uint32_t iTest)
189	{
190	if (iTest < g_cZeroDstTests)
191	return 0;
192	return RandU32();
193	}
194
195
196	static uint32_t RandU32Src(uint32_t iTest)
197	{
198	if (iTest < g_cZeroSrcTests)
199	return 0;
200	return RandU32();
201	}
202
203
204	static uint64_t RandU64Dst(uint32_t iTest)
205	{
206	if (iTest < g_cZeroDstTests)
207	return 0;
208	return RandU64();
209	}
210
211
212	static uint64_t RandU64Src(uint32_t iTest)
213	{
214	if (iTest < g_cZeroSrcTests)
215	return 0;
216	return RandU64();
217	}
218
219
220	static int16_t RandI16Src(uint32_t iTest)
221	{
222	RT_NOREF(iTest);
223	return (int16_t)RandU16();
224	}
225
226
227	static int32_t RandI32Src(uint32_t iTest)
228	{
229	RT_NOREF(iTest);
230	return (int32_t)RandU32();
231	}
232
233
234	#if 0
235	static int64_t RandI64Src(uint32_t iTest)
236	{
237	RT_NOREF(iTest);
238	return (int64_t)RandU64();
239	}
240	#endif
241
242
243	static uint16_t RandFcw(void)
244	{
245	return RandU16() & ~X86_FCW_ZERO_MASK;
246	}
247
248
249	static uint16_t RandFsw(void)
250	{
251	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
252	return RandU16();
253	}
254
255
256	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
257	{
258	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
259	pr80->sj64.uFraction >>= cShift;
260	else
261	pr80->sj64.uFraction = (cShift % 19) + 1;
262	}
263
264
265	static RTFLOAT80U RandR80Ex(unsigned cTarget = 80, bool fIntTarget = false)
266	{
267	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
268
269	RTFLOAT80U r80;
270	r80.au64[0] = RandU64();
271	r80.au16[4] = RandU16();
272
273	/*
274	* Make it more likely that we get a good selection of special values.
275	*/
276	uint8_t bType = RandU8() & 0x1f;
277	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
278	{
279	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
280	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
281	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
282	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
283	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
284	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
285	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
286	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
287	}
288	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
289	{
290	/* Denormals (4,5) and Pseudo denormals (6,7) */
291	if (bType & 1)
292	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
293	else if (r80.sj64.uFraction == 0 && bType < 6)
294	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
295	r80.sj64.uExponent = 0;
296	r80.sj64.fInteger = bType >= 6;
297	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
298	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
299	}
300	else if (bType == 8 \|\| bType == 9)
301	{
302	/* Pseudo NaN. */
303	if (bType & 1)
304	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
305	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
306	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
307	r80.sj64.uExponent = 0x7fff;
308	if (r80.sj64.fInteger)
309	r80.sj64.uFraction \|= RT_BIT_64(62);
310	else
311	r80.sj64.uFraction &= ~RT_BIT_64(62);
312	r80.sj64.fInteger = 0;
313	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
314	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
315	}
316	else if (bType == 10 \|\| bType == 11)
317	{
318	/* Quiet and signalling NaNs (using fInteger to pick which). */
319	if (bType & 1)
320	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
321	else if (r80.sj64.uFraction == 0)
322	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
323	r80.sj64.uExponent = 0x7fff;
324	if (r80.sj64.fInteger)
325	r80.sj64.uFraction \|= RT_BIT_64(62);
326	else
327	r80.sj64.uFraction &= ~RT_BIT_64(62);
328	r80.sj64.fInteger = 1;
329	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
330	AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
331	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
332	}
333	else if (bType == 12 \|\| bType == 13)
334	{
335	/* Unnormals */
336	if (bType & 1)
337	SafeR80FractionShift(&r80, RandU8() % 62);
338	r80.sj64.fInteger = 0;
339	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
340	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
341	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
342	}
343	else if (bType < 24)
344	{
345	/* Make sure we have lots of normalized values. */
346	if (!fIntTarget)
347	{
348	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
349	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
350	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
351	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
352	r80.sj64.fInteger = 1;
353	if (r80.sj64.uExponent <= uMinExp)
354	r80.sj64.uExponent = uMinExp + 1;
355	else if (r80.sj64.uExponent >= uMaxExp)
356	r80.sj64.uExponent = uMaxExp - 1;
357
358	if (bType == 14)
359	{ /* All 1s is useful to testing rounding. Also try trigger special
360	behaviour by sometimes rounding out of range, while we're at it. */
361	r80.sj64.uFraction = RT_BIT_64(63) - 1;
362	uint8_t bExp = RandU8();
363	if ((bExp & 3) == 0)
364	r80.sj64.uExponent = uMaxExp - 1;
365	else if ((bExp & 3) == 1)
366	r80.sj64.uExponent = uMinExp + 1;
367	else if ((bExp & 3) == 2)
368	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
369	}
370	}
371	else
372	{
373	/* integer target: */
374	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
375	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
376	r80.sj64.fInteger = 1;
377	if (r80.sj64.uExponent < uMinExp)
378	r80.sj64.uExponent = uMinExp;
379	else if (r80.sj64.uExponent > uMaxExp)
380	r80.sj64.uExponent = uMaxExp;
381
382	if (bType == 14)
383	{ /* All 1s is useful to testing rounding. Also try trigger special
384	behaviour by sometimes rounding out of range, while we're at it. */
385	r80.sj64.uFraction = RT_BIT_64(63) - 1;
386	uint8_t bExp = RandU8();
387	if ((bExp & 3) == 0)
388	r80.sj64.uExponent = uMaxExp;
389	else if ((bExp & 3) == 1)
390	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
391	}
392	}
393
394	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
395	}
396	return r80;
397	}
398
399
400	static RTFLOAT80U RandR80Src(uint32_t iTest)
401	{
402	RT_NOREF(iTest);
403	return RandR80Ex();
404	}
405
406
407	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
408	{
409	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
410	pr64->s64.uFraction >>= cShift;
411	else
412	pr64->s64.uFraction = (cShift % 19) + 1;
413	}
414
415
416	static RTFLOAT64U RandR64Src(uint32_t iTest)
417	{
418	RT_NOREF(iTest);
419
420	RTFLOAT64U r64;
421	r64.u = RandU64();
422
423	/*
424	* Make it more likely that we get a good selection of special values.
425	* On average 6 out of 16 calls should return a special value.
426	*/
427	uint8_t bType = RandU8() & 0xf;
428	if (bType == 0 \|\| bType == 1)
429	{
430	/* 0 or Infinity. We only keep fSign here. */
431	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
432	r64.s.uFractionHigh = 0;
433	r64.s.uFractionLow = 0;
434	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
435	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
436	}
437	else if (bType == 2 \|\| bType == 3)
438	{
439	/* Subnormals */
440	if (bType == 3)
441	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
442	else if (r64.s64.uFraction == 0)
443	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
444	r64.s64.uExponent = 0;
445	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
446	}
447	else if (bType == 4 \|\| bType == 5)
448	{
449	/* NaNs */
450	if (bType == 5)
451	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
452	else if (r64.s64.uFraction == 0)
453	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
454	r64.s64.uExponent = 0x7ff;
455	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
456	}
457	else if (bType < 12)
458	{
459	/* Make sure we have lots of normalized values. */
460	if (r64.s.uExponent == 0)
461	r64.s.uExponent = 1;
462	else if (r64.s.uExponent == 0x7ff)
463	r64.s.uExponent = 0x7fe;
464	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
465	}
466	return r64;
467	}
468
469
470	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
471	{
472	if (pr32->s.uFraction >= RT_BIT_32(cShift))
473	pr32->s.uFraction >>= cShift;
474	else
475	pr32->s.uFraction = (cShift % 19) + 1;
476	}
477
478
479	static RTFLOAT32U RandR32Src(uint32_t iTest)
480	{
481	RT_NOREF(iTest);
482
483	RTFLOAT32U r32;
484	r32.u = RandU32();
485
486	/*
487	* Make it more likely that we get a good selection of special values.
488	* On average 6 out of 16 calls should return a special value.
489	*/
490	uint8_t bType = RandU8() & 0xf;
491	if (bType == 0 \|\| bType == 1)
492	{
493	/* 0 or Infinity. We only keep fSign here. */
494	r32.s.uExponent = bType == 0 ? 0 : 0xff;
495	r32.s.uFraction = 0;
496	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
497	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
498	}
499	else if (bType == 2 \|\| bType == 3)
500	{
501	/* Subnormals */
502	if (bType == 3)
503	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
504	else if (r32.s.uFraction == 0)
505	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
506	r32.s.uExponent = 0;
507	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
508	}
509	else if (bType == 4 \|\| bType == 5)
510	{
511	/* NaNs */
512	if (bType == 5)
513	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
514	else if (r32.s.uFraction == 0)
515	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
516	r32.s.uExponent = 0xff;
517	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
518	}
519	else if (bType < 12)
520	{
521	/* Make sure we have lots of normalized values. */
522	if (r32.s.uExponent == 0)
523	r32.s.uExponent = 1;
524	else if (r32.s.uExponent == 0xff)
525	r32.s.uExponent = 0xfe;
526	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
527	}
528	return r32;
529	}
530
531
532	static RTPBCD80U RandD80Src(uint32_t iTest)
533	{
534	if (iTest < 3)
535	{
536	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
537	return d80Zero;
538	}
539	if (iTest < 5)
540	{
541	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
542	return d80Ind;
543	}
544
545	RTPBCD80U d80;
546	uint8_t b = RandU8();
547	d80.s.fSign = b & 1;
548
549	if ((iTest & 7) >= 6)
550	{
551	/* Illegal */
552	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
553	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
554	d80.s.abPairs[iPair] = RandU8();
555	}
556	else
557	{
558	/* Normal */
559	d80.s.uPad = 0;
560	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
561	{
562	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
563	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
564	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
565	}
566	}
567	return d80;
568	}
569
570
571	const char *GenFormatR80(PCRTFLOAT80U plrd)
572	{
573	if (RTFLOAT80U_IS_ZERO(plrd))
574	return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
575	if (RTFLOAT80U_IS_INF(plrd))
576	return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
577	if (RTFLOAT80U_IS_INDEFINITE(plrd))
578	return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
579	if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
580	return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
581	if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
582	return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
583
584	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
585	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
586	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
587	return pszBuf;
588	}
589
590	const char *GenFormatR64(PCRTFLOAT64U prd)
591	{
592	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
593	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
594	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
595	return pszBuf;
596	}
597
598
599	const char *GenFormatR32(PCRTFLOAT32U pr)
600	{
601	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
602	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
603	return pszBuf;
604	}
605
606
607	const char *GenFormatD80(PCRTPBCD80U pd80)
608	{
609	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
610	size_t off;
611	if (pd80->s.uPad == 0)
612	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
613	else
614	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
615	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
616	while (iPair-- > 0)
617	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
618	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
619	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
620	pszBuf[off++] = ')';
621	pszBuf[off++] = '\0';
622	return pszBuf;
623	}
624
625
626	const char *GenFormatI64(int64_t i64)
627	{
628	if (i64 == INT64_MIN) /* This one is problematic */
629	return "INT64_MIN";
630	if (i64 == INT64_MAX)
631	return "INT64_MAX";
632	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
633	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
634	return pszBuf;
635	}
636
637
638	const char GenFormatI64(int64_t const pi64)
639	{
640	return GenFormatI64(*pi64);
641	}
642
643
644	const char *GenFormatI32(int32_t i32)
645	{
646	if (i32 == INT32_MIN) /* This one is problematic */
647	return "INT32_MIN";
648	if (i32 == INT32_MAX)
649	return "INT32_MAX";
650	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
651	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
652	return pszBuf;
653	}
654
655
656	const char GenFormatI32(int32_t const pi32)
657	{
658	return GenFormatI32(*pi32);
659	}
660
661
662	const char *GenFormatI16(int16_t i16)
663	{
664	if (i16 == INT16_MIN) /* This one is problematic */
665	return "INT16_MIN";
666	if (i16 == INT16_MAX)
667	return "INT16_MAX";
668	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
669	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
670	return pszBuf;
671	}
672
673
674	const char GenFormatI16(int16_t const pi16)
675	{
676	return GenFormatI16(*pi16);
677	}
678
679
680	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
681	{
682	/* We want to tag the generated source code with the revision that produced it. */
683	static char s_szRev[] = "$Revision: 94540 $";
684	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
685	size_t cchRev = 0;
686	while (RT_C_IS_DIGIT(pszRev[cchRev]))
687	cchRev++;
688
689	RTStrmPrintf(pOut,
690	"/* $Id: tstIEMAImpl.cpp 94540 2022-04-10 14:20:28Z vboxsync $ */\n"
691	"/** @file\n"
692	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
693	" */\n"
694	"\n"
695	"/*\n"
696	" * Copyright (C) 2022 Oracle Corporation\n"
697	" *\n"
698	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
699	" * available from http://www.215389.xyz. This file is free software;\n"
700	" * you can redistribute it and/or modify it under the terms of the GNU\n"
701	" * General Public License (GPL) as published by the Free Software\n"
702	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
703	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
704	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
705	" */\n"
706	"\n"
707	"#include \"tstIEMAImpl.h\"\n"
708	"\n"
709	,
710	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
711	}
712
713
714	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
715	{
716	PRTSTREAM pOut = NULL;
717	int rc = RTStrmOpen(pszFilename, "w", &pOut);
718	if (RT_SUCCESS(rc))
719	{
720	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
721	return pOut;
722	}
723	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
724	return NULL;
725	}
726
727
728	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
729	{
730	RTStrmPrintf(pOut,
731	"\n"
732	"/* end of file */\n");
733	int rc = RTStrmClose(pOut);
734	if (RT_SUCCESS(rc))
735	return rcExit;
736	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
737	}
738
739
740	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
741	{
742	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
743	}
744
745
746	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
747	{
748	RTStrmPrintf(pOut,
749	"};\n"
750	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
751	"\n",
752	pszName, pszName);
753	}
754
755	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
756
757
758	/*
759	* Test helpers.
760	*/
761	static bool IsTestEnabled(const char *pszName)
762	{
763	/* Process excludes first: */
764	uint32_t i = g_cExcludeTestPatterns;
765	while (i-- > 0)
766	if (RTStrSimplePatternMatch(g_apszExcludeTestPatterns[i], pszName))
767	return false;
768
769	/* If no include patterns, everything is included: */
770	i = g_cIncludeTestPatterns;
771	if (!i)
772	return true;
773
774	/* Otherwise only tests in the include patters gets tested: */
775	while (i-- > 0)
776	if (RTStrSimplePatternMatch(g_apszIncludeTestPatterns[i], pszName))
777	return true;
778
779	return false;
780	}
781
782
783	static bool SubTestAndCheckIfEnabled(const char *pszName)
784	{
785	RTTestSub(g_hTest, pszName);
786	if (IsTestEnabled(pszName))
787	return true;
788	RTTestSkipped(g_hTest, "excluded");
789	return false;
790	}
791
792
793	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
794	{
795	if (fActual == fExpected)
796	return "";
797
798	uint32_t const fXor = fActual ^ fExpected;
799	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
800	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
801
802	static struct
803	{
804	const char *pszName;
805	uint32_t fFlag;
806	} const s_aFlags[] =
807	{
808	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
809	EFL_ENTRY(CF),
810	EFL_ENTRY(PF),
811	EFL_ENTRY(AF),
812	EFL_ENTRY(ZF),
813	EFL_ENTRY(SF),
814	EFL_ENTRY(TF),
815	EFL_ENTRY(IF),
816	EFL_ENTRY(DF),
817	EFL_ENTRY(OF),
818	EFL_ENTRY(IOPL),
819	EFL_ENTRY(NT),
820	EFL_ENTRY(RF),
821	EFL_ENTRY(VM),
822	EFL_ENTRY(AC),
823	EFL_ENTRY(VIF),
824	EFL_ENTRY(VIP),
825	EFL_ENTRY(ID),
826	};
827	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
828	if (s_aFlags[i].fFlag & fXor)
829	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
830	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
831	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
832	return pszBuf;
833	}
834
835
836	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
837	{
838	if (fActual == fExpected)
839	return "";
840
841	uint16_t const fXor = fActual ^ fExpected;
842	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
843	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
844
845	static struct
846	{
847	const char *pszName;
848	uint32_t fFlag;
849	} const s_aFlags[] =
850	{
851	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
852	FSW_ENTRY(IE),
853	FSW_ENTRY(DE),
854	FSW_ENTRY(ZE),
855	FSW_ENTRY(OE),
856	FSW_ENTRY(UE),
857	FSW_ENTRY(PE),
858	FSW_ENTRY(SF),
859	FSW_ENTRY(ES),
860	FSW_ENTRY(C0),
861	FSW_ENTRY(C1),
862	FSW_ENTRY(C2),
863	FSW_ENTRY(C3),
864	FSW_ENTRY(B),
865	};
866	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
867	if (s_aFlags[i].fFlag & fXor)
868	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
869	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
870	if (fXor & X86_FSW_TOP_MASK)
871	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
872	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
873	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
874	return pszBuf;
875	}
876
877
878	static const char *FormatFcw(uint16_t fFcw)
879	{
880	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
881
882	const char pszPC = NULL; / (msc+gcc are too stupid) */
883	switch (fFcw & X86_FCW_PC_MASK)
884	{
885	case X86_FCW_PC_24: pszPC = "PC24"; break;
886	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
887	case X86_FCW_PC_53: pszPC = "PC53"; break;
888	case X86_FCW_PC_64: pszPC = "PC64"; break;
889	}
890
891	const char pszRC = NULL; / (msc+gcc are too stupid) */
892	switch (fFcw & X86_FCW_RC_MASK)
893	{
894	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
895	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
896	case X86_FCW_RC_UP: pszRC = "UP"; break;
897	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
898	}
899	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
900
901	static struct
902	{
903	const char *pszName;
904	uint32_t fFlag;
905	} const s_aFlags[] =
906	{
907	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
908	FCW_ENTRY(IM),
909	FCW_ENTRY(DM),
910	FCW_ENTRY(ZM),
911	FCW_ENTRY(OM),
912	FCW_ENTRY(UM),
913	FCW_ENTRY(PM),
914	{ "6M", 64 },
915	};
916	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
917	if (fFcw & s_aFlags[i].fFlag)
918	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
919
920	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
921	return pszBuf;
922	}
923
924
925	static const char *FormatR80(PCRTFLOAT80U pr80)
926	{
927	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
928	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
929	return pszBuf;
930	}
931
932
933	static const char *FormatR64(PCRTFLOAT64U pr64)
934	{
935	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
936	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
937	return pszBuf;
938	}
939
940
941	static const char *FormatR32(PCRTFLOAT32U pr32)
942	{
943	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
944	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
945	return pszBuf;
946	}
947
948
949	static const char *FormatD80(PCRTPBCD80U pd80)
950	{
951	/* There is only one indefinite endcoding (same as for 80-bit
952	floating point), so get it out of the way first: */
953	if (RTPBCD80U_IS_INDEFINITE(pd80))
954	return "Ind";
955
956	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
957	size_t off = 0;
958	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
959	unsigned cBadDigits = 0;
960	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
961	while (iPair-- > 0)
962	{
963	static const char s_szDigits[] = "0123456789abcdef";
964	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
965	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
966	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
967	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
968	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
969	}
970	if (cBadDigits \|\| pd80->s.uPad != 0)
971	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
972	pszBuf[off] = '\0';
973	return pszBuf;
974	}
975
976
977	#if 0
978	static const char FormatI64(int64_t const piVal)
979	{
980	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
981	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
982	return pszBuf;
983	}
984	#endif
985
986
987	static const char FormatI32(int32_t const piVal)
988	{
989	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
990	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
991	return pszBuf;
992	}
993
994
995	static const char FormatI16(int16_t const piVal)
996	{
997	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
998	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
999	return pszBuf;
1000	}
1001
1002
1003	/*
1004	* Binary operations.
1005	*/
1006	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1007	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1008	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1009	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1010
1011	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1012	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1013	static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1014	{ \
1015	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1016	{ \
1017	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1018	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1019	PRTSTREAM pOutFn = pOut; \
1020	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1021	{ \
1022	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1023	continue; \
1024	pOutFn = pOutCpu; \
1025	} \
1026	\
1027	GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1028	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1029	{ \
1030	a_TestType Test; \
1031	Test.fEflIn = RandEFlags(); \
1032	Test.fEflOut = Test.fEflIn; \
1033	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1034	Test.uDstOut = Test.uDstIn; \
1035	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1036	if (g_aBinU ## a_cBits[iFn].uExtra) \
1037	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1038	Test.uMisc = 0; \
1039	pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1040	RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1041	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1042	} \
1043	GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1044	} \
1045	}
1046	#else
1047	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1048	#endif
1049
1050	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1051	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1052	\
1053	static void BinU ## a_cBits ## Test(void) \
1054	{ \
1055	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1056	{ \
1057	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1058	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1059	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1060	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1061	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1062	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1063	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1064	{ \
1065	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1066	{ \
1067	uint32_t fEfl = paTests[iTest].fEflIn; \
1068	a_uType uDst = paTests[iTest].uDstIn; \
1069	pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1070	if ( uDst != paTests[iTest].uDstOut \
1071	\|\| fEfl != paTests[iTest].fEflOut) \
1072	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1073	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1074	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1075	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1076	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1077	else \
1078	{ \
1079	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1080	*g_pfEfl = paTests[iTest].fEflIn; \
1081	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1082	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1083	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1084	} \
1085	} \
1086	pfn = a_aSubTests[iFn].pfnNative; \
1087	} \
1088	} \
1089	}
1090
1091
1092	/*
1093	* 8-bit binary operations.
1094	*/
1095	static const BINU8_T g_aBinU8[] =
1096	{
1097	ENTRY(add_u8),
1098	ENTRY(add_u8_locked),
1099	ENTRY(adc_u8),
1100	ENTRY(adc_u8_locked),
1101	ENTRY(sub_u8),
1102	ENTRY(sub_u8_locked),
1103	ENTRY(sbb_u8),
1104	ENTRY(sbb_u8_locked),
1105	ENTRY(or_u8),
1106	ENTRY(or_u8_locked),
1107	ENTRY(xor_u8),
1108	ENTRY(xor_u8_locked),
1109	ENTRY(and_u8),
1110	ENTRY(and_u8_locked),
1111	ENTRY(cmp_u8),
1112	ENTRY(test_u8),
1113	};
1114	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1115
1116
1117	/*
1118	* 16-bit binary operations.
1119	*/
1120	static const BINU16_T g_aBinU16[] =
1121	{
1122	ENTRY(add_u16),
1123	ENTRY(add_u16_locked),
1124	ENTRY(adc_u16),
1125	ENTRY(adc_u16_locked),
1126	ENTRY(sub_u16),
1127	ENTRY(sub_u16_locked),
1128	ENTRY(sbb_u16),
1129	ENTRY(sbb_u16_locked),
1130	ENTRY(or_u16),
1131	ENTRY(or_u16_locked),
1132	ENTRY(xor_u16),
1133	ENTRY(xor_u16_locked),
1134	ENTRY(and_u16),
1135	ENTRY(and_u16_locked),
1136	ENTRY(cmp_u16),
1137	ENTRY(test_u16),
1138	ENTRY_EX(bt_u16, 1),
1139	ENTRY_EX(btc_u16, 1),
1140	ENTRY_EX(btc_u16_locked, 1),
1141	ENTRY_EX(btr_u16, 1),
1142	ENTRY_EX(btr_u16_locked, 1),
1143	ENTRY_EX(bts_u16, 1),
1144	ENTRY_EX(bts_u16_locked, 1),
1145	ENTRY_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1146	ENTRY_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1147	ENTRY_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1148	ENTRY_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1149	ENTRY_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1150	ENTRY_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1151	ENTRY(arpl),
1152	};
1153	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1154
1155
1156	/*
1157	* 32-bit binary operations.
1158	*/
1159	static const BINU32_T g_aBinU32[] =
1160	{
1161	ENTRY(add_u32),
1162	ENTRY(add_u32_locked),
1163	ENTRY(adc_u32),
1164	ENTRY(adc_u32_locked),
1165	ENTRY(sub_u32),
1166	ENTRY(sub_u32_locked),
1167	ENTRY(sbb_u32),
1168	ENTRY(sbb_u32_locked),
1169	ENTRY(or_u32),
1170	ENTRY(or_u32_locked),
1171	ENTRY(xor_u32),
1172	ENTRY(xor_u32_locked),
1173	ENTRY(and_u32),
1174	ENTRY(and_u32_locked),
1175	ENTRY(cmp_u32),
1176	ENTRY(test_u32),
1177	ENTRY_EX(bt_u32, 1),
1178	ENTRY_EX(btc_u32, 1),
1179	ENTRY_EX(btc_u32_locked, 1),
1180	ENTRY_EX(btr_u32, 1),
1181	ENTRY_EX(btr_u32_locked, 1),
1182	ENTRY_EX(bts_u32, 1),
1183	ENTRY_EX(bts_u32_locked, 1),
1184	ENTRY_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1185	ENTRY_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1186	ENTRY_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1187	ENTRY_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1188	ENTRY_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1189	ENTRY_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1190	};
1191	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1192
1193
1194	/*
1195	* 64-bit binary operations.
1196	*/
1197	static const BINU64_T g_aBinU64[] =
1198	{
1199	ENTRY(add_u64),
1200	ENTRY(add_u64_locked),
1201	ENTRY(adc_u64),
1202	ENTRY(adc_u64_locked),
1203	ENTRY(sub_u64),
1204	ENTRY(sub_u64_locked),
1205	ENTRY(sbb_u64),
1206	ENTRY(sbb_u64_locked),
1207	ENTRY(or_u64),
1208	ENTRY(or_u64_locked),
1209	ENTRY(xor_u64),
1210	ENTRY(xor_u64_locked),
1211	ENTRY(and_u64),
1212	ENTRY(and_u64_locked),
1213	ENTRY(cmp_u64),
1214	ENTRY(test_u64),
1215	ENTRY_EX(bt_u64, 1),
1216	ENTRY_EX(btc_u64, 1),
1217	ENTRY_EX(btc_u64_locked, 1),
1218	ENTRY_EX(btr_u64, 1),
1219	ENTRY_EX(btr_u64_locked, 1),
1220	ENTRY_EX(bts_u64, 1),
1221	ENTRY_EX(bts_u64_locked, 1),
1222	ENTRY_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1223	ENTRY_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1224	ENTRY_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1225	ENTRY_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1226	ENTRY_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1227	ENTRY_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1228	};
1229	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1230
1231
1232	/*
1233	* XCHG
1234	*/
1235	static void XchgTest(void)
1236	{
1237	if (!SubTestAndCheckIfEnabled("xchg"))
1238	return;
1239	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
1240	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
1241	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
1242	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
1243
1244	static struct
1245	{
1246	uint8_t cb; uint64_t fMask;
1247	union
1248	{
1249	uintptr_t pfn;
1250	FNIEMAIMPLXCHGU8 *pfnU8;
1251	FNIEMAIMPLXCHGU16 *pfnU16;
1252	FNIEMAIMPLXCHGU32 *pfnU32;
1253	FNIEMAIMPLXCHGU64 *pfnU64;
1254	} u;
1255	}
1256	s_aXchgWorkers[] =
1257	{
1258	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1259	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1260	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1261	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1262	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1263	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1264	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1265	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1266	};
1267	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1268	{
1269	RTUINT64U uIn1, uIn2, uMem, uDst;
1270	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1271	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1272	if (uIn1.u == uIn2.u)
1273	uDst.u = uIn2.u = ~uIn2.u;
1274
1275	switch (s_aXchgWorkers[i].cb)
1276	{
1277	case 1:
1278	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1279	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1280	break;
1281	case 2:
1282	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1283	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1284	break;
1285	case 4:
1286	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1287	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1288	break;
1289	case 8:
1290	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1291	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1292	break;
1293	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1294	}
1295
1296	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
1297	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1298	}
1299	}
1300
1301
1302	/*
1303	* XADD
1304	*/
1305	static void XaddTest(void)
1306	{
1307	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1308	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
1309	static struct \
1310	{ \
1311	const char *pszName; \
1312	FNIEMAIMPLXADDU ## a_cBits *pfn; \
1313	BINU ## a_cBits ## _TEST_T const *paTests; \
1314	uint32_t const *pcTests; \
1315	} const s_aFuncs[] = \
1316	{ \
1317	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1318	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1319	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1320	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1321	}; \
1322	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1323	{ \
1324	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1325	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1326	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1327	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1328	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1329	{ \
1330	uint32_t fEfl = paTests[iTest].fEflIn; \
1331	a_Type uSrc = paTests[iTest].uSrcIn; \
1332	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1333	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1334	if ( fEfl != paTests[iTest].fEflOut \
1335	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
1336	\|\| uSrc != paTests[iTest].uDstIn) \
1337	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1338	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1339	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1340	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1341	} \
1342	} \
1343	} while(0)
1344	TEST_XADD(8, uint8_t, "%#04x");
1345	TEST_XADD(16, uint16_t, "%#06x");
1346	TEST_XADD(32, uint32_t, "%#010RX32");
1347	TEST_XADD(64, uint64_t, "%#010RX64");
1348	}
1349
1350
1351	/*
1352	* CMPXCHG
1353	*/
1354
1355	static void CmpXchgTest(void)
1356	{
1357	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1358	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
1359	static struct \
1360	{ \
1361	const char *pszName; \
1362	FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1363	PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1364	BINU ## a_cBits ## _TEST_T const *paTests; \
1365	uint32_t const *pcTests; \
1366	} const s_aFuncs[] = \
1367	{ \
1368	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1369	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1370	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1371	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1372	}; \
1373	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1374	{ \
1375	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1376	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1377	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1378	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1379	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1380	{ \
1381	/* as is (99% likely to be negative). */ \
1382	uint32_t fEfl = paTests[iTest].fEflIn; \
1383	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1384	a_Type uA = paTests[iTest].uDstIn; \
1385	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1386	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1387	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1388	if ( fEfl != paTests[iTest].fEflOut \
1389	\|\| *g_pu ## a_cBits != uExpect \
1390	\|\| uA != paTests[iTest].uSrcIn) \
1391	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1392	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1393	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1394	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1395	/* positive */ \
1396	uint32_t fEflExpect = paTests[iTest].fEflIn; \
1397	uA = paTests[iTest].uDstIn; \
1398	s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1399	fEfl = paTests[iTest].fEflIn; \
1400	uA = paTests[iTest].uDstIn; \
1401	*g_pu ## a_cBits = uA; \
1402	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1403	if ( fEfl != fEflExpect \
1404	\|\| *g_pu ## a_cBits != uNew \
1405	\|\| uA != paTests[iTest].uDstIn) \
1406	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1407	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1408	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1409	EFlagsDiff(fEfl, fEflExpect)); \
1410	} \
1411	} \
1412	} while(0)
1413	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1414	TEST_CMPXCHG(16, uint16_t, "%#06x");
1415	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1416	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1417	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1418	#endif
1419	}
1420
1421	static void CmpXchg8bTest(void)
1422	{
1423	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
1424	static struct
1425	{
1426	const char *pszName;
1427	FNIEMAIMPLCMPXCHG8B *pfn;
1428	} const s_aFuncs[] =
1429	{
1430	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
1431	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1432	};
1433	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1434	{
1435	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1436	continue;
1437	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1438	{
1439	uint64_t const uOldValue = RandU64();
1440	uint64_t const uNewValue = RandU64();
1441
1442	/* positive test. */
1443	RTUINT64U uA, uB;
1444	uB.u = uNewValue;
1445	uA.u = uOldValue;
1446	*g_pu64 = uOldValue;
1447	uint32_t fEflIn = RandEFlags();
1448	uint32_t fEfl = fEflIn;
1449	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1450	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1451	\|\| *g_pu64 != uNewValue
1452	\|\| uA.u != uOldValue)
1453	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1454	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1455	fEfl, *g_pu64, uA.u,
1456	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1457	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1458
1459	/* negative */
1460	uint64_t const uExpect = ~uOldValue;
1461	*g_pu64 = uExpect;
1462	uA.u = uOldValue;
1463	uB.u = uNewValue;
1464	fEfl = fEflIn = RandEFlags();
1465	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1466	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1467	\|\| *g_pu64 != uExpect
1468	\|\| uA.u != uExpect)
1469	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1470	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1471	fEfl, *g_pu64, uA.u,
1472	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1473	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1474	}
1475	}
1476	}
1477
1478	static void CmpXchg16bTest(void)
1479	{
1480	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1481	static struct
1482	{
1483	const char *pszName;
1484	FNIEMAIMPLCMPXCHG16B *pfn;
1485	} const s_aFuncs[] =
1486	{
1487	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
1488	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1489	#if !defined(RT_ARCH_ARM64)
1490	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1491	#endif
1492	};
1493	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1494	{
1495	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1496	continue;
1497	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1498	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1499	{
1500	RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1501	continue;
1502	}
1503	#endif
1504	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1505	{
1506	RTUINT128U const uOldValue = RandU128();
1507	RTUINT128U const uNewValue = RandU128();
1508
1509	/* positive test. */
1510	RTUINT128U uA, uB;
1511	uB = uNewValue;
1512	uA = uOldValue;
1513	*g_pu128 = uOldValue;
1514	uint32_t fEflIn = RandEFlags();
1515	uint32_t fEfl = fEflIn;
1516	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1517	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1518	\|\| g_pu128->s.Lo != uNewValue.s.Lo
1519	\|\| g_pu128->s.Hi != uNewValue.s.Hi
1520	\|\| uA.s.Lo != uOldValue.s.Lo
1521	\|\| uA.s.Hi != uOldValue.s.Hi)
1522	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1523	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1524	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1525	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1526	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1527	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1528	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1529	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1530
1531	/* negative */
1532	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1533	*g_pu128 = uExpect;
1534	uA = uOldValue;
1535	uB = uNewValue;
1536	fEfl = fEflIn = RandEFlags();
1537	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1538	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1539	\|\| g_pu128->s.Lo != uExpect.s.Lo
1540	\|\| g_pu128->s.Hi != uExpect.s.Hi
1541	\|\| uA.s.Lo != uExpect.s.Lo
1542	\|\| uA.s.Hi != uExpect.s.Hi)
1543	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1544	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1545	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1546	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1547	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1548	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1549	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1550	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1551	}
1552	}
1553	}
1554
1555
1556	/*
1557	* Double shifts.
1558	*
1559	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1560	*/
1561	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1562	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1563	void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1564	{ \
1565	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1566	{ \
1567	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1568	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1569	continue; \
1570	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1571	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1572	{ \
1573	a_TestType Test; \
1574	Test.fEflIn = RandEFlags(); \
1575	Test.fEflOut = Test.fEflIn; \
1576	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1577	Test.uDstOut = Test.uDstIn; \
1578	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1579	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1580	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1581	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1582	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1583	} \
1584	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1585	} \
1586	}
1587	#else
1588	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1589	#endif
1590
1591	#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1592	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1593	\
1594	static a_SubTestType const a_aSubTests[] = \
1595	{ \
1596	ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1597	ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1598	ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1599	ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1600	}; \
1601	\
1602	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1603	\
1604	static void ShiftDblU ## a_cBits ## Test(void) \
1605	{ \
1606	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1607	{ \
1608	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1609	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1610	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1611	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1612	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1613	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1614	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1615	{ \
1616	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1617	{ \
1618	uint32_t fEfl = paTests[iTest].fEflIn; \
1619	a_Type uDst = paTests[iTest].uDstIn; \
1620	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1621	if ( uDst != paTests[iTest].uDstOut \
1622	\|\| fEfl != paTests[iTest].fEflOut) \
1623	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1624	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1625	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1626	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1627	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1628	else \
1629	{ \
1630	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1631	*g_pfEfl = paTests[iTest].fEflIn; \
1632	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1633	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1634	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1635	} \
1636	} \
1637	pfn = a_aSubTests[iFn].pfnNative; \
1638	} \
1639	} \
1640	}
1641	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1642	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1643	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1644
1645	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1646	static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1647	{
1648	ShiftDblU16Generate(pOut, cTests);
1649	ShiftDblU32Generate(pOut, cTests);
1650	ShiftDblU64Generate(pOut, cTests);
1651	}
1652	#endif
1653
1654	static void ShiftDblTest(void)
1655	{
1656	ShiftDblU16Test();
1657	ShiftDblU32Test();
1658	ShiftDblU64Test();
1659	}
1660
1661
1662	/*
1663	* Unary operators.
1664	*
1665	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1666	*/
1667	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1668	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1669	void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1670	{ \
1671	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1672	{ \
1673	GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1674	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1675	{ \
1676	a_TestType Test; \
1677	Test.fEflIn = RandEFlags(); \
1678	Test.fEflOut = Test.fEflIn; \
1679	Test.uDstIn = RandU ## a_cBits(); \
1680	Test.uDstOut = Test.uDstIn; \
1681	Test.uSrcIn = 0; \
1682	Test.uMisc = 0; \
1683	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1684	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1685	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1686	} \
1687	GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1688	} \
1689	}
1690	#else
1691	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1692	#endif
1693
1694	#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1695	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1696	static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1697	{ \
1698	ENTRY(inc_u ## a_cBits), \
1699	ENTRY(inc_u ## a_cBits ## _locked), \
1700	ENTRY(dec_u ## a_cBits), \
1701	ENTRY(dec_u ## a_cBits ## _locked), \
1702	ENTRY(not_u ## a_cBits), \
1703	ENTRY(not_u ## a_cBits ## _locked), \
1704	ENTRY(neg_u ## a_cBits), \
1705	ENTRY(neg_u ## a_cBits ## _locked), \
1706	}; \
1707	\
1708	GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1709	\
1710	static void UnaryU ## a_cBits ## Test(void) \
1711	{ \
1712	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1713	{ \
1714	if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1715	a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1716	uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1717	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1718	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1719	{ \
1720	uint32_t fEfl = paTests[iTest].fEflIn; \
1721	a_Type uDst = paTests[iTest].uDstIn; \
1722	g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1723	if ( uDst != paTests[iTest].uDstOut \
1724	\|\| fEfl != paTests[iTest].fEflOut) \
1725	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1726	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1727	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1728	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1729	else \
1730	{ \
1731	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1732	*g_pfEfl = paTests[iTest].fEflIn; \
1733	g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1734	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1735	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1736	} \
1737	} \
1738	} \
1739	}
1740	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1741	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1742	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1743	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1744
1745	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1746	static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1747	{
1748	UnaryU8Generate(pOut, cTests);
1749	UnaryU16Generate(pOut, cTests);
1750	UnaryU32Generate(pOut, cTests);
1751	UnaryU64Generate(pOut, cTests);
1752	}
1753	#endif
1754
1755	static void UnaryTest(void)
1756	{
1757	UnaryU8Test();
1758	UnaryU16Test();
1759	UnaryU32Test();
1760	UnaryU64Test();
1761	}
1762
1763
1764	/*
1765	* Shifts.
1766	*
1767	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
1768	*/
1769	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1770	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1771	void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1772	{ \
1773	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1774	{ \
1775	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1776	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1777	continue; \
1778	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1779	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1780	{ \
1781	a_TestType Test; \
1782	Test.fEflIn = RandEFlags(); \
1783	Test.fEflOut = Test.fEflIn; \
1784	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1785	Test.uDstOut = Test.uDstIn; \
1786	Test.uSrcIn = 0; \
1787	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1788	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1789	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
1790	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1791	\
1792	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
1793	Test.fEflOut = Test.fEflIn; \
1794	Test.uDstOut = Test.uDstIn; \
1795	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1796	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
1797	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1798	} \
1799	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1800	} \
1801	}
1802	#else
1803	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1804	#endif
1805
1806	#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1807	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
1808	static a_SubTestType const a_aSubTests[] = \
1809	{ \
1810	ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
1811	ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
1812	ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
1813	ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
1814	ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
1815	ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
1816	ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
1817	ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
1818	ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1819	ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1820	ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1821	ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1822	ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1823	ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1824	}; \
1825	\
1826	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1827	\
1828	static void ShiftU ## a_cBits ## Test(void) \
1829	{ \
1830	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1831	{ \
1832	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1833	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1834	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1835	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1836	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1837	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1838	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1839	{ \
1840	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1841	{ \
1842	uint32_t fEfl = paTests[iTest].fEflIn; \
1843	a_Type uDst = paTests[iTest].uDstIn; \
1844	pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
1845	if ( uDst != paTests[iTest].uDstOut \
1846	\|\| fEfl != paTests[iTest].fEflOut ) \
1847	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1848	iTest, iVar == 0 ? "" : "/n", \
1849	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
1850	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1851	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1852	else \
1853	{ \
1854	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1855	*g_pfEfl = paTests[iTest].fEflIn; \
1856	pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
1857	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1858	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1859	} \
1860	} \
1861	pfn = a_aSubTests[iFn].pfnNative; \
1862	} \
1863	} \
1864	}
1865	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
1866	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
1867	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
1868	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
1869
1870	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1871	static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
1872	{
1873	ShiftU8Generate(pOut, cTests);
1874	ShiftU16Generate(pOut, cTests);
1875	ShiftU32Generate(pOut, cTests);
1876	ShiftU64Generate(pOut, cTests);
1877	}
1878	#endif
1879
1880	static void ShiftTest(void)
1881	{
1882	ShiftU8Test();
1883	ShiftU16Test();
1884	ShiftU32Test();
1885	ShiftU64Test();
1886	}
1887
1888
1889	/*
1890	* Multiplication and division.
1891	*
1892	* Note! The 8-bit functions has a different format, so we need to duplicate things.
1893	* Note! Currently ignoring undefined bits.
1894	*/
1895
1896	/* U8 */
1897	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
1898	static INT_MULDIV_U8_T const g_aMulDivU8[] =
1899	{
1900	ENTRY_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
1901	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
1902	ENTRY_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
1903	ENTRY_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
1904	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
1905	ENTRY_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
1906	ENTRY_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1907	ENTRY_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1908	ENTRY_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1909	ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1910	};
1911
1912	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1913	static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
1914	{
1915	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
1916	{
1917	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
1918	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
1919	continue;
1920	GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
1921	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
1922	{
1923	MULDIVU8_TEST_T Test;
1924	Test.fEflIn = RandEFlags();
1925	Test.fEflOut = Test.fEflIn;
1926	Test.uDstIn = RandU16Dst(iTest);
1927	Test.uDstOut = Test.uDstIn;
1928	Test.uSrcIn = RandU8Src(iTest);
1929	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
1930	RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
1931	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
1932	}
1933	GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
1934	}
1935	}
1936	#endif
1937
1938	static void MulDivU8Test(void)
1939	{
1940	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
1941	{
1942	if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
1943	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
1944	uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
1945	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
1946	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
1947	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
1948	if (!cTests) RTTestSkipped(g_hTest, "no tests");
1949	for (uint32_t iVar = 0; iVar < cVars; iVar++)
1950	{
1951	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
1952	{
1953	uint32_t fEfl = paTests[iTest].fEflIn;
1954	uint16_t uDst = paTests[iTest].uDstIn;
1955	int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
1956	if ( uDst != paTests[iTest].uDstOut
1957	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
1958	\|\| rc != paTests[iTest].rc)
1959	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
1960	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
1961	"%sexpected %#08x %#06RX16 %d%s\n",
1962	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
1963	iVar ? " " : "", fEfl, uDst, rc,
1964	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
1965	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
1966	else
1967	{
1968	*g_pu16 = paTests[iTest].uDstIn;
1969	*g_pfEfl = paTests[iTest].fEflIn;
1970	rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
1971	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
1972	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
1973	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
1974	}
1975	}
1976	pfn = g_aMulDivU8[iFn].pfnNative;
1977	}
1978	}
1979	}
1980
1981	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1982	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1983	void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1984	{ \
1985	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1986	{ \
1987	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1988	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1989	continue; \
1990	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1991	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1992	{ \
1993	a_TestType Test; \
1994	Test.fEflIn = RandEFlags(); \
1995	Test.fEflOut = Test.fEflIn; \
1996	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
1997	Test.uDst1Out = Test.uDst1In; \
1998	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
1999	Test.uDst2Out = Test.uDst2In; \
2000	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2001	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2002	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2003	Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2004	Test.rc, iTest); \
2005	} \
2006	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2007	} \
2008	}
2009	#else
2010	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2011	#endif
2012
2013	#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2014	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2015	static a_SubTestType const a_aSubTests [] = \
2016	{ \
2017	ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2018	ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2019	ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2020	ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2021	ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2022	ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2023	ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2024	ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2025	}; \
2026	\
2027	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2028	\
2029	static void MulDivU ## a_cBits ## Test(void) \
2030	{ \
2031	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2032	{ \
2033	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2034	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2035	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2036	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2037	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2038	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2039	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2040	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2041	{ \
2042	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2043	{ \
2044	uint32_t fEfl = paTests[iTest].fEflIn; \
2045	a_Type uDst1 = paTests[iTest].uDst1In; \
2046	a_Type uDst2 = paTests[iTest].uDst2In; \
2047	int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2048	if ( uDst1 != paTests[iTest].uDst1Out \
2049	\|\| uDst2 != paTests[iTest].uDst2Out \
2050	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
2051	\|\| rc != paTests[iTest].rc) \
2052	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2053	" -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2054	"expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2055	iTest, iVar == 0 ? "" : "/n", \
2056	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2057	fEfl, uDst1, uDst2, rc, \
2058	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2059	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
2060	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2061	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
2062	else \
2063	{ \
2064	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
2065	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2066	*g_pfEfl = paTests[iTest].fEflIn; \
2067	rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2068	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2069	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2070	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
2071	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2072	} \
2073	} \
2074	pfn = a_aSubTests[iFn].pfnNative; \
2075	} \
2076	} \
2077	}
2078	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2079	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2080	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2081
2082	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2083	static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2084	{
2085	MulDivU8Generate(pOut, cTests);
2086	MulDivU16Generate(pOut, cTests);
2087	MulDivU32Generate(pOut, cTests);
2088	MulDivU64Generate(pOut, cTests);
2089	}
2090	#endif
2091
2092	static void MulDivTest(void)
2093	{
2094	MulDivU8Test();
2095	MulDivU16Test();
2096	MulDivU32Test();
2097	MulDivU64Test();
2098	}
2099
2100
2101	/*
2102	* BSWAP
2103	*/
2104	static void BswapTest(void)
2105	{
2106	if (SubTestAndCheckIfEnabled("bswap_u16"))
2107	{
2108	*g_pu32 = UINT32_C(0x12345678);
2109	iemAImpl_bswap_u16(g_pu32);
2110	#if 0
2111	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2112	#else
2113	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2114	#endif
2115	*g_pu32 = UINT32_C(0xffff1122);
2116	iemAImpl_bswap_u16(g_pu32);
2117	#if 0
2118	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2119	#else
2120	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2121	#endif
2122	}
2123
2124	if (SubTestAndCheckIfEnabled("bswap_u32"))
2125	{
2126	*g_pu32 = UINT32_C(0x12345678);
2127	iemAImpl_bswap_u32(g_pu32);
2128	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2129	}
2130
2131	if (SubTestAndCheckIfEnabled("bswap_u64"))
2132	{
2133	*g_pu64 = UINT64_C(0x0123456789abcdef);
2134	iemAImpl_bswap_u64(g_pu64);
2135	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2136	}
2137	}
2138
2139
2140
2141	/*********************************************************************************************************************************
2142	* Floating point (x87 style) *
2143	*********************************************************************************************************************************/
2144
2145	/*
2146	* FPU constant loading.
2147	*/
2148	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2149
2150	static const FPU_LD_CONST_T g_aFpuLdConst[] =
2151	{
2152	ENTRY(fld1),
2153	ENTRY(fldl2t),
2154	ENTRY(fldl2e),
2155	ENTRY(fldpi),
2156	ENTRY(fldlg2),
2157	ENTRY(fldln2),
2158	ENTRY(fldz),
2159	};
2160
2161	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2162	static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2163	{
2164	X86FXSTATE State;
2165	RT_ZERO(State);
2166	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2167	{
2168	GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2169	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2170	{
2171	State.FCW = RandFcw();
2172	State.FSW = RandFsw();
2173
2174	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2175	{
2176	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2177	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2178	g_aFpuLdConst[iFn].pfn(&State, &Res);
2179	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2180	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2181	}
2182	}
2183	GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2184	}
2185	}
2186	#endif
2187
2188	static void FpuLoadConstTest(void)
2189	{
2190	/*
2191	* Inputs:
2192	* - FSW: C0, C1, C2, C3
2193	* - FCW: Exception masks, Precision control, Rounding control.
2194	*
2195	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2196	*/
2197	X86FXSTATE State;
2198	RT_ZERO(State);
2199	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2200	{
2201	if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2202	continue;
2203
2204	uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2205	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2206	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2207	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2208	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2209	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2210	{
2211	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2212	{
2213	State.FCW = paTests[iTest].fFcw;
2214	State.FSW = paTests[iTest].fFswIn;
2215	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2216	pfn(&State, &Res);
2217	if ( Res.FSW != paTests[iTest].fFswOut
2218	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2219	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2220	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2221	Res.FSW, FormatR80(&Res.r80Result),
2222	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2223	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2224	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2225	FormatFcw(paTests[iTest].fFcw) );
2226	}
2227	pfn = g_aFpuLdConst[iFn].pfnNative;
2228	}
2229	}
2230	}
2231
2232
2233	/*
2234	* Load floating point values from memory.
2235	*/
2236	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2237	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2238	static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2239	{ \
2240	X86FXSTATE State; \
2241	RT_ZERO(State); \
2242	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2243	{ \
2244	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2245	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2246	{ \
2247	State.FCW = RandFcw(); \
2248	State.FSW = RandFsw(); \
2249	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2250	\
2251	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2252	{ \
2253	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2254	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2255	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2256	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2257	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2258	GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2259	} \
2260	} \
2261	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2262	} \
2263	}
2264	#else
2265	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2266	#endif
2267
2268	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2269	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2270	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2271	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2272	\
2273	static const a_SubTestType a_aSubTests[] = \
2274	{ \
2275	ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2276	}; \
2277	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2278	\
2279	static void FpuLdR ## a_cBits ## Test(void) \
2280	{ \
2281	X86FXSTATE State; \
2282	RT_ZERO(State); \
2283	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2284	{ \
2285	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2286	\
2287	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2288	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2289	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2290	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2291	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2292	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2293	{ \
2294	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2295	{ \
2296	a_rdTypeIn const InVal = paTests[iTest].InVal; \
2297	State.FCW = paTests[iTest].fFcw; \
2298	State.FSW = paTests[iTest].fFswIn; \
2299	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2300	pfn(&State, &Res, &InVal); \
2301	if ( Res.FSW != paTests[iTest].fFswOut \
2302	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2303	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2304	"%s -> fsw=%#06x %s\n" \
2305	"%s expected %#06x %s%s%s (%s)\n", \
2306	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2307	FormatR ## a_cBits(&paTests[iTest].InVal), \
2308	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2309	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2310	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2311	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2312	FormatFcw(paTests[iTest].fFcw) ); \
2313	} \
2314	pfn = a_aSubTests[iFn].pfnNative; \
2315	} \
2316	} \
2317	}
2318
2319	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2320	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2321	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2322
2323	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2324	static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2325	{
2326	FpuLdR80Generate(pOut, cTests);
2327	FpuLdR64Generate(pOut, cTests);
2328	FpuLdR32Generate(pOut, cTests);
2329	}
2330	#endif
2331
2332	static void FpuLdMemTest(void)
2333	{
2334	FpuLdR80Test();
2335	FpuLdR64Test();
2336	FpuLdR32Test();
2337	}
2338
2339
2340	/*
2341	* Load integer values from memory.
2342	*/
2343	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2344	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2345	static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2346	{ \
2347	X86FXSTATE State; \
2348	RT_ZERO(State); \
2349	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2350	{ \
2351	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2352	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2353	{ \
2354	State.FCW = RandFcw(); \
2355	State.FSW = RandFsw(); \
2356	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2357	\
2358	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2359	{ \
2360	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2361	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2362	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2363	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2364	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2365	} \
2366	} \
2367	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2368	} \
2369	}
2370	#else
2371	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2372	#endif
2373
2374	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2375	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2376	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2377	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2378	\
2379	static const a_SubTestType a_aSubTests[] = \
2380	{ \
2381	ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2382	}; \
2383	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2384	\
2385	static void FpuLdI ## a_cBits ## Test(void) \
2386	{ \
2387	X86FXSTATE State; \
2388	RT_ZERO(State); \
2389	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2390	{ \
2391	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2392	\
2393	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2394	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2395	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2396	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2397	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2398	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2399	{ \
2400	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2401	{ \
2402	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2403	State.FCW = paTests[iTest].fFcw; \
2404	State.FSW = paTests[iTest].fFswIn; \
2405	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2406	pfn(&State, &Res, &iInVal); \
2407	if ( Res.FSW != paTests[iTest].fFswOut \
2408	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2409	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2410	"%s -> fsw=%#06x %s\n" \
2411	"%s expected %#06x %s%s%s (%s)\n", \
2412	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2413	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2414	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2415	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2416	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2417	FormatFcw(paTests[iTest].fFcw) ); \
2418	} \
2419	pfn = a_aSubTests[iFn].pfnNative; \
2420	} \
2421	} \
2422	}
2423
2424	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2425	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2426	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2427
2428	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2429	static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2430	{
2431	FpuLdI64Generate(pOut, cTests);
2432	FpuLdI32Generate(pOut, cTests);
2433	FpuLdI16Generate(pOut, cTests);
2434	}
2435	#endif
2436
2437	static void FpuLdIntTest(void)
2438	{
2439	FpuLdI64Test();
2440	FpuLdI32Test();
2441	FpuLdI16Test();
2442	}
2443
2444
2445	/*
2446	* Load binary coded decimal values from memory.
2447	*/
2448	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2449	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2450	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2451
2452	static const FPU_LD_D80_T g_aFpuLdD80[] =
2453	{
2454	ENTRY(fld_r80_from_d80)
2455	};
2456
2457	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2458	static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2459	{
2460	X86FXSTATE State;
2461	RT_ZERO(State);
2462	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2463	{
2464	GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2465	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2466	{
2467	State.FCW = RandFcw();
2468	State.FSW = RandFsw();
2469	RTPBCD80U InVal = RandD80Src(iTest);
2470
2471	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2472	{
2473	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2474	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2475	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2476	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2477	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2478	iTest, iRounding);
2479	}
2480	}
2481	GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2482	}
2483	}
2484	#endif
2485
2486	static void FpuLdD80Test(void)
2487	{
2488	X86FXSTATE State;
2489	RT_ZERO(State);
2490	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2491	{
2492	if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2493	continue;
2494
2495	uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2496	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2497	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2498	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2499	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2500	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2501	{
2502	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2503	{
2504	RTPBCD80U const InVal = paTests[iTest].InVal;
2505	State.FCW = paTests[iTest].fFcw;
2506	State.FSW = paTests[iTest].fFswIn;
2507	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2508	pfn(&State, &Res, &InVal);
2509	if ( Res.FSW != paTests[iTest].fFswOut
2510	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2511	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2512	"%s -> fsw=%#06x %s\n"
2513	"%s expected %#06x %s%s%s (%s)\n",
2514	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2515	FormatD80(&paTests[iTest].InVal),
2516	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2517	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2518	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2519	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2520	FormatFcw(paTests[iTest].fFcw) );
2521	}
2522	pfn = g_aFpuLdD80[iFn].pfnNative;
2523	}
2524	}
2525	}
2526
2527
2528	/*
2529	* Store values floating point values to memory.
2530	*/
2531	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2532	static const RTFLOAT80U g_aFpuStR32Specials[] =
2533	{
2534	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2535	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2536	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2537	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2538	};
2539	static const RTFLOAT80U g_aFpuStR64Specials[] =
2540	{
2541	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2542	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2543	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2544	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2545	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2546	};
2547	static const RTFLOAT80U g_aFpuStR80Specials[] =
2548	{
2549	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2550	};
2551	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2552	static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2553	{ \
2554	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2555	X86FXSTATE State; \
2556	RT_ZERO(State); \
2557	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2558	{ \
2559	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2560	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2561	{ \
2562	uint16_t const fFcw = RandFcw(); \
2563	State.FSW = RandFsw(); \
2564	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2565	\
2566	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2567	{ \
2568	/* PC doesn't influence these, so leave as is. */ \
2569	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2570	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2571	{ \
2572	uint16_t uFswOut = 0; \
2573	a_rdType OutVal; \
2574	RT_ZERO(OutVal); \
2575	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2576	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2577	\| (iRounding << X86_FCW_RC_SHIFT); \
2578	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2579	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2580	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2581	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2582	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2583	GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2584	} \
2585	} \
2586	} \
2587	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2588	} \
2589	}
2590	#else
2591	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2592	#endif
2593
2594	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2595	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2596	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2597	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2598	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2599	\
2600	static const a_SubTestType a_aSubTests[] = \
2601	{ \
2602	ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2603	}; \
2604	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2605	\
2606	static void FpuStR ## a_cBits ## Test(void) \
2607	{ \
2608	X86FXSTATE State; \
2609	RT_ZERO(State); \
2610	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2611	{ \
2612	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2613	\
2614	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2615	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2616	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2617	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2618	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2619	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2620	{ \
2621	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2622	{ \
2623	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2624	uint16_t uFswOut = 0; \
2625	a_rdType OutVal; \
2626	RT_ZERO(OutVal); \
2627	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2628	State.FCW = paTests[iTest].fFcw; \
2629	State.FSW = paTests[iTest].fFswIn; \
2630	pfn(&State, &uFswOut, &OutVal, &InVal); \
2631	if ( uFswOut != paTests[iTest].fFswOut \
2632	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2633	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2634	"%s -> fsw=%#06x %s\n" \
2635	"%s expected %#06x %s%s%s (%s)\n", \
2636	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2637	FormatR80(&paTests[iTest].InVal), \
2638	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2639	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2640	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2641	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2642	FormatFcw(paTests[iTest].fFcw) ); \
2643	} \
2644	pfn = a_aSubTests[iFn].pfnNative; \
2645	} \
2646	} \
2647	}
2648
2649	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2650	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2651	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2652
2653	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2654	static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2655	{
2656	FpuStR80Generate(pOut, cTests);
2657	FpuStR64Generate(pOut, cTests);
2658	FpuStR32Generate(pOut, cTests);
2659	}
2660	#endif
2661
2662	static void FpuStMemTest(void)
2663	{
2664	FpuStR80Test();
2665	FpuStR64Test();
2666	FpuStR32Test();
2667	}
2668
2669
2670	/*
2671	* Store integer values to memory or register.
2672	*/
2673	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2674	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2675	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2676
2677	static const FPU_ST_I16_T g_aFpuStI16[] =
2678	{
2679	ENTRY(fist_r80_to_i16),
2680	ENTRY_AMD( fistt_r80_to_i16, 0),
2681	ENTRY_INTEL(fistt_r80_to_i16, 0),
2682	};
2683	static const FPU_ST_I32_T g_aFpuStI32[] =
2684	{
2685	ENTRY(fist_r80_to_i32),
2686	ENTRY(fistt_r80_to_i32),
2687	};
2688	static const FPU_ST_I64_T g_aFpuStI64[] =
2689	{
2690	ENTRY(fist_r80_to_i64),
2691	ENTRY(fistt_r80_to_i64),
2692	};
2693
2694	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2695	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2696	{
2697	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2698	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2699	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2700	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2701	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2702	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2703	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2704	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2705	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2706	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2707	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2708	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2709	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2710	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2711	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2712	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2713	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2714	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2715	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2716	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2717	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2718	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2719	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2720	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2721	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2722	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2723	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2724	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2725	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2726	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2727	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2728	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2729	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2730	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2731	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2732	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2733	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2734	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2735	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2736	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2737	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2738	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2739	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2740	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2741	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2742	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2743	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2744	};
2745	static const RTFLOAT80U g_aFpuStI32Specials[] =
2746	{
2747	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2748	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2749	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2750	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2751	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2752	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2753	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2754	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2755	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2756	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2757	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2758	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2759	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2760	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2761	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2762	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2763	};
2764	static const RTFLOAT80U g_aFpuStI64Specials[] =
2765	{
2766	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
2767	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
2768	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2769	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2770	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2771	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2772	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2773	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
2774	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2775	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2776	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2777	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2778	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2779	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2780	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2781	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2782	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
2783	};
2784
2785	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2786	static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
2787	{ \
2788	X86FXSTATE State; \
2789	RT_ZERO(State); \
2790	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2791	{ \
2792	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
2793	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
2794	PRTSTREAM pOutFn = pOut; \
2795	if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
2796	{ \
2797	if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2798	continue; \
2799	pOutFn = pOutCpu; \
2800	} \
2801	\
2802	GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
2803	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
2804	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2805	{ \
2806	uint16_t const fFcw = RandFcw(); \
2807	State.FSW = RandFsw(); \
2808	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex(a_cBits, true) \
2809	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
2810	\
2811	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2812	{ \
2813	/* PC doesn't influence these, so leave as is. */ \
2814	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2815	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2816	{ \
2817	uint16_t uFswOut = 0; \
2818	a_iType iOutVal = ~(a_iType)2; \
2819	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2820	\| (iRounding << X86_FCW_RC_SHIFT); \
2821	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2822	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2823	pfn(&State, &uFswOut, &iOutVal, &InVal); \
2824	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2825	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2826	GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
2827	} \
2828	} \
2829	} \
2830	GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
2831	} \
2832	}
2833	#else
2834	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
2835	#endif
2836
2837	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
2838	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2839	\
2840	static void FpuStI ## a_cBits ## Test(void) \
2841	{ \
2842	X86FXSTATE State; \
2843	RT_ZERO(State); \
2844	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2845	{ \
2846	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2847	\
2848	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2849	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2850	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2851	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2852	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2853	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2854	{ \
2855	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2856	{ \
2857	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2858	uint16_t uFswOut = 0; \
2859	a_iType iOutVal = ~(a_iType)2; \
2860	State.FCW = paTests[iTest].fFcw; \
2861	State.FSW = paTests[iTest].fFswIn; \
2862	pfn(&State, &uFswOut, &iOutVal, &InVal); \
2863	if ( uFswOut != paTests[iTest].fFswOut \
2864	\|\| iOutVal != paTests[iTest].iOutVal) \
2865	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2866	"%s -> fsw=%#06x " a_szFmt "\n" \
2867	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
2868	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2869	FormatR80(&paTests[iTest].InVal), \
2870	iVar ? " " : "", uFswOut, iOutVal, \
2871	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
2872	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2873	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
2874	} \
2875	pfn = a_aSubTests[iFn].pfnNative; \
2876	} \
2877	} \
2878	}
2879
2880	//fistt_r80_to_i16 diffs for AMD, of course :-)
2881
2882	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
2883	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
2884	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
2885
2886	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2887	static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
2888	{
2889	FpuStI64Generate(pOut, pOutCpu, cTests);
2890	FpuStI32Generate(pOut, pOutCpu, cTests);
2891	FpuStI16Generate(pOut, pOutCpu, cTests);
2892	}
2893	#endif
2894
2895	static void FpuStIntTest(void)
2896	{
2897	FpuStI64Test();
2898	FpuStI32Test();
2899	FpuStI16Test();
2900	}
2901
2902
2903	/*
2904	* Store as packed BCD value (memory).
2905	*/
2906	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
2907	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
2908	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
2909
2910	static const FPU_ST_D80_T g_aFpuStD80[] =
2911	{
2912	ENTRY(fst_r80_to_d80),
2913	};
2914
2915	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2916	static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
2917	{
2918	static RTFLOAT80U const s_aSpecials[] =
2919	{
2920	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
2921	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
2922	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
2923	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
2924	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
2925	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
2926	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
2927	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
2928	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
2929	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
2930	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
2931	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
2932	};
2933
2934	X86FXSTATE State;
2935	RT_ZERO(State);
2936	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
2937	{
2938	GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
2939	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
2940	{
2941	uint16_t const fFcw = RandFcw();
2942	State.FSW = RandFsw();
2943	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex(59, true) : s_aSpecials[iTest - cTests];
2944
2945	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2946	{
2947	/* PC doesn't influence these, so leave as is. */
2948	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
2949	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
2950	{
2951	uint16_t uFswOut = 0;
2952	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
2953	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
2954	\| (iRounding << X86_FCW_RC_SHIFT);
2955	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
2956	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
2957	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
2958	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
2959	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
2960	GenFormatD80(&OutVal), iTest, iRounding, iMask);
2961	}
2962	}
2963	}
2964	GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
2965	}
2966	}
2967	#endif
2968
2969
2970	static void FpuStD80Test(void)
2971	{
2972	X86FXSTATE State;
2973	RT_ZERO(State);
2974	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
2975	{
2976	if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
2977	continue;
2978
2979	uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
2980	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
2981	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
2982	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
2983	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2984	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2985	{
2986	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2987	{
2988	RTFLOAT80U const InVal = paTests[iTest].InVal;
2989	uint16_t uFswOut = 0;
2990	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
2991	State.FCW = paTests[iTest].fFcw;
2992	State.FSW = paTests[iTest].fFswIn;
2993	pfn(&State, &uFswOut, &OutVal, &InVal);
2994	if ( uFswOut != paTests[iTest].fFswOut
2995	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
2996	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
2997	"%s -> fsw=%#06x %s\n"
2998	"%s expected %#06x %s%s%s (%s)\n",
2999	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3000	FormatR80(&paTests[iTest].InVal),
3001	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3002	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3003	FswDiff(uFswOut, paTests[iTest].fFswOut),
3004	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3005	FormatFcw(paTests[iTest].fFcw) );
3006	}
3007	pfn = g_aFpuStD80[iFn].pfnNative;
3008	}
3009	}
3010	}
3011
3012
3013
3014	/*********************************************************************************************************************************
3015	* x87 FPU Binary Operations *
3016	*********************************************************************************************************************************/
3017
3018	/*
3019	* Binary FPU operations on two 80-bit floating point values.
3020	*/
3021	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3022
3023	static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3024	{
3025	ENTRY(fadd_r80_by_r80),
3026	ENTRY(fsub_r80_by_r80),
3027	ENTRY(fsubr_r80_by_r80),
3028	ENTRY(fmul_r80_by_r80),
3029	ENTRY(fdiv_r80_by_r80),
3030	ENTRY(fdivr_r80_by_r80),
3031	ENTRY(fprem_r80_by_r80),
3032	ENTRY(fprem1_r80_by_r80),
3033	ENTRY(fscale_r80_by_r80),
3034	ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3035	ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3036	ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3037	ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3038	ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3039	ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3040	};
3041
3042	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3043	static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3044	{
3045	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3046	{
3047	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3048	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3049	};
3050
3051	X86FXSTATE State;
3052	RT_ZERO(State);
3053	uint32_t cMinNormalPairs = cTests / 4;
3054	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3055	{
3056	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3057	PRTSTREAM pOutFn = pOut;
3058	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3059	{
3060	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3061	continue;
3062	pOutFn = pOutCpu;
3063	}
3064
3065	GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3066	uint32_t cNormalInputPairs = 0;
3067	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3068	{
3069	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests].Val1;
3070	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests].Val2;
3071	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3072	cNormalInputPairs++;
3073	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3074	{
3075	iTest -= 1;
3076	continue;
3077	}
3078
3079	uint16_t const fFcw = RandFcw();
3080	State.FSW = RandFsw();
3081
3082	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3083	{
3084	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3085	{
3086	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3087	{
3088	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3089	\| (iRounding << X86_FCW_RC_SHIFT)
3090	\| (iPrecision << X86_FCW_PC_SHIFT)
3091	\| iMask;
3092	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3093	pfn(&State, &Res, &InVal1, &InVal2);
3094	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n",
3095	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3096	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3097	}
3098	}
3099	}
3100	}
3101	GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3102	}
3103	}
3104	#endif
3105
3106
3107	static void FpuBinaryR80Test(void)
3108	{
3109	X86FXSTATE State;
3110	RT_ZERO(State);
3111	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3112	{
3113	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3114	continue;
3115
3116	uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3117	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3118	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3119	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3120	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3121	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3122	{
3123	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3124	{
3125	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3126	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3127	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3128	State.FCW = paTests[iTest].fFcw;
3129	State.FSW = paTests[iTest].fFswIn;
3130	pfn(&State, &Res, &InVal1, &InVal2);
3131	if ( Res.FSW != paTests[iTest].fFswOut
3132	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3133	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3134	"%s -> fsw=%#06x %s\n"
3135	"%s expected %#06x %s%s%s (%s)\n",
3136	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3137	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3138	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3139	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3140	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3141	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3142	FormatFcw(paTests[iTest].fFcw) );
3143	}
3144	pfn = g_aFpuBinaryR80[iFn].pfnNative;
3145	}
3146	}
3147	}
3148
3149
3150	/*
3151	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3152	*/
3153	#define int64_t_IS_NORMAL(a) 1
3154	#define int32_t_IS_NORMAL(a) 1
3155	#define int16_t_IS_NORMAL(a) 1
3156
3157	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3158	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3159	{
3160	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3161	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3162	};
3163	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3164	{
3165	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3166	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3167	};
3168	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3169	{
3170	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3171	};
3172	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3173	{
3174	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3175	};
3176
3177	# define GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3178	static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3179	{ \
3180	X86FXSTATE State; \
3181	RT_ZERO(State); \
3182	uint32_t cMinNormalPairs = cTests / 4; \
3183	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3184	{ \
3185	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3186	uint32_t cNormalInputPairs = 0; \
3187	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3188	{ \
3189	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() \
3190	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3191	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src(a_cBits) \
3192	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3193	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3194	cNormalInputPairs++; \
3195	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3196	{ \
3197	iTest -= 1; \
3198	continue; \
3199	} \
3200	\
3201	uint16_t const fFcw = RandFcw(); \
3202	State.FSW = RandFsw(); \
3203	\
3204	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3205	{ \
3206	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3207	{ \
3208	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3209	{ \
3210	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
3211	\| (iRounding << X86_FCW_RC_SHIFT) \
3212	\| (iPrecision << X86_FCW_PC_SHIFT) \
3213	\| iMask; \
3214	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3215	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3216	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3217	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3218	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3219	} \
3220	} \
3221	} \
3222	} \
3223	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3224	} \
3225	}
3226	#else
3227	# define GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3228	#endif
3229
3230	#define TEST_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3231	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3232	\
3233	static const a_SubTestType a_aSubTests[] = \
3234	{ \
3235	ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3236	ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3237	ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3238	ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3239	ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3240	ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3241	}; \
3242	\
3243	GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3244	\
3245	static void FpuBinary ## a_UpBits ## Test(void) \
3246	{ \
3247	X86FXSTATE State; \
3248	RT_ZERO(State); \
3249	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3250	{ \
3251	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3252	\
3253	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3254	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3255	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3256	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3257	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3258	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3259	{ \
3260	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3261	{ \
3262	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3263	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3264	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3265	State.FCW = paTests[iTest].fFcw; \
3266	State.FSW = paTests[iTest].fFswIn; \
3267	pfn(&State, &Res, &InVal1, &InVal2); \
3268	if ( Res.FSW != paTests[iTest].fFswOut \
3269	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3270	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3271	"%s -> fsw=%#06x %s\n" \
3272	"%s expected %#06x %s%s%s (%s)\n", \
3273	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3274	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3275	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3276	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3277	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3278	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3279	FormatFcw(paTests[iTest].fFcw) ); \
3280	} \
3281	pfn = a_aSubTests[iFn].pfnNative; \
3282	} \
3283	} \
3284	}
3285
3286	TEST_FPU_BINARY_SMALL(64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3287	TEST_FPU_BINARY_SMALL(32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3288	TEST_FPU_BINARY_SMALL(32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3289	TEST_FPU_BINARY_SMALL(16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3290
3291
3292	/*
3293	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3294	*/
3295	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3296	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3297	{
3298	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3299	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3300	};
3301	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3302	{
3303	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3304	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3305	};
3306	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3307	{
3308	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3309	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3310	};
3311	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3312	{
3313	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3314	};
3315	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3316	{
3317	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3318	};
3319
3320	# define GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3321	static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3322	{ \
3323	X86FXSTATE State; \
3324	RT_ZERO(State); \
3325	uint32_t cMinNormalPairs = cTests / 4; \
3326	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3327	{ \
3328	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3329	uint32_t cNormalInputPairs = 0; \
3330	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3331	{ \
3332	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() \
3333	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3334	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src(a_cBits) \
3335	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3336	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3337	cNormalInputPairs++; \
3338	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3339	{ \
3340	iTest -= 1; \
3341	continue; \
3342	} \
3343	\
3344	uint16_t const fFcw = RandFcw(); \
3345	State.FSW = RandFsw(); \
3346	\
3347	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3348	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3349	{ \
3350	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
3351	uint16_t fFswOut = 0; \
3352	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3353	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3354	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3355	iTest, iMask ? 'c' : 'u'); \
3356	} \
3357	} \
3358	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3359	} \
3360	}
3361	#else
3362	# define GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3363	#endif
3364
3365	#define TEST_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3366	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3367	\
3368	static const a_SubTestType a_aSubTests[] = \
3369	{ \
3370	__VA_ARGS__ \
3371	}; \
3372	\
3373	GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3374	\
3375	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3376	{ \
3377	X86FXSTATE State; \
3378	RT_ZERO(State); \
3379	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3380	{ \
3381	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3382	\
3383	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3384	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3385	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3386	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3387	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3388	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3389	{ \
3390	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3391	{ \
3392	uint16_t fFswOut = 0; \
3393	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3394	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3395	State.FCW = paTests[iTest].fFcw; \
3396	State.FSW = paTests[iTest].fFswIn; \
3397	pfn(&State, &fFswOut, &InVal1, &InVal2); \
3398	if (fFswOut != paTests[iTest].fFswOut) \
3399	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3400	"%s -> fsw=%#06x\n" \
3401	"%s expected %#06x %s (%s)\n", \
3402	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3403	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3404	iVar ? " " : "", fFswOut, \
3405	iVar ? " " : "", paTests[iTest].fFswOut, \
3406	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3407	} \
3408	pfn = a_aSubTests[iFn].pfnNative; \
3409	} \
3410	} \
3411	}
3412
3413	TEST_FPU_BINARY_FSW(80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3414	TEST_FPU_BINARY_FSW(64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3415	TEST_FPU_BINARY_FSW(32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3416	TEST_FPU_BINARY_FSW(32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3417	TEST_FPU_BINARY_FSW(16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3418
3419
3420	/*
3421	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3422	*/
3423	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3424
3425	static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3426	{
3427	ENTRY(fcomi_r80_by_r80),
3428	ENTRY(fucomi_r80_by_r80),
3429	};
3430
3431	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3432	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3433	{
3434	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3435	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3436	};
3437
3438	static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3439	{
3440	X86FXSTATE State;
3441	RT_ZERO(State);
3442	uint32_t cMinNormalPairs = cTests / 4;
3443	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3444	{
3445	GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3446	uint32_t cNormalInputPairs = 0;
3447	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3448	{
3449	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3450	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Ex() : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3451	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3452	cNormalInputPairs++;
3453	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3454	{
3455	iTest -= 1;
3456	continue;
3457	}
3458
3459	uint16_t const fFcw = RandFcw();
3460	State.FSW = RandFsw();
3461
3462	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3463	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3464	{
3465	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
3466	uint16_t uFswOut = 0;
3467	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3468	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3469	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3470	iTest, iMask ? 'c' : 'u');
3471	}
3472	}
3473	GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3474	}
3475	}
3476	#endif /TSTIEMAIMPL_WITH_GENERATOR/
3477
3478	static void FpuBinaryEflR80Test(void)
3479	{
3480	X86FXSTATE State;
3481	RT_ZERO(State);
3482	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3483	{
3484	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3485	continue;
3486
3487	uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3488	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3489	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3490	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3491	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3492	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3493	{
3494	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3495	{
3496	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3497	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3498	State.FCW = paTests[iTest].fFcw;
3499	State.FSW = paTests[iTest].fFswIn;
3500	uint16_t uFswOut = 0;
3501	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3502	if ( uFswOut != paTests[iTest].fFswOut
3503	\|\| fEflOut != paTests[iTest].fEflOut)
3504	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3505	"%s -> fsw=%#06x efl=%#08x\n"
3506	"%s expected %#06x %#08x %s (%s)\n",
3507	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3508	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3509	iVar ? " " : "", uFswOut, fEflOut,
3510	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3511	EFlagsDiff(fEflOut, paTests[iTest].fEflOut), FormatFcw(paTests[iTest].fFcw));
3512	}
3513	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3514	}
3515	}
3516	}
3517
3518
3519	/*********************************************************************************************************************************
3520	* x87 FPU Unary Operations *
3521	*********************************************************************************************************************************/
3522
3523	/*
3524	* Unary FPU operations on one 80-bit floating point value.
3525	*
3526	* Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3527	* a rounding error or not.
3528	*/
3529	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3530
3531	enum { kUnary_Accurate = 0, kUnary_Rounding_F2xm1 };
3532	static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3533	{
3534	ENTRY_EX( fabs_r80, kUnary_Accurate),
3535	ENTRY_EX( fchs_r80, kUnary_Accurate),
3536	ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3537	ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3538	ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3539	ENTRY_EX( frndint_r80, kUnary_Accurate),
3540	ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3541	ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate),
3542	ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate), // value & C1 differences
3543	ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate),
3544	};
3545
3546	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3547
3548	static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3549	{
3550	if ( enmKind == kUnary_Rounding_F2xm1
3551	&& RTFLOAT80U_IS_NORMAL(pr80Val)
3552	&& pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3553	&& pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3554	return true;
3555	return false;
3556	}
3557
3558	static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3559	{
3560	static RTFLOAT80U const s_aSpecials[] =
3561	{
3562	#if 1
3563	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3564	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3565	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3566	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3567	#endif
3568	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3569	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3570	#if 1
3571	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3572	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3573	RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3574	RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3575	#endif
3576	};
3577	X86FXSTATE State;
3578	RT_ZERO(State);
3579	uint32_t cMinNormals = cTests / 4;
3580	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3581	{
3582	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3583	PRTSTREAM pOutFn = pOut;
3584	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3585	{
3586	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3587	continue;
3588	pOutFn = pOutCpu;
3589	}
3590
3591	GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3592	uint32_t cNormalInputs = 0;
3593	uint32_t cTargetRangeInputs = 0;
3594	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3595	{
3596	RTFLOAT80U InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3597	if (RTFLOAT80U_IS_NORMAL(&InVal))
3598	{
3599	if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3600	{
3601	unsigned uTargetExp = RTFLOAT80U_EXP_BIAS;
3602	unsigned cTargetExp = 69;
3603	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3604	cTargetRangeInputs++;
3605	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3606	{
3607	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3608	cTargetRangeInputs++;
3609	}
3610	}
3611	cNormalInputs++;
3612	}
3613	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3614	{
3615	iTest -= 1;
3616	continue;
3617	}
3618
3619	uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
3620	uint16_t const fFcw = RandFcw();
3621	State.FSW = RandFsw();
3622
3623	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3624	{
3625	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3626	{
3627	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3628	\| (iRounding << X86_FCW_RC_SHIFT)
3629	\| (iPrecision << X86_FCW_PC_SHIFT)
3630	\| X86_FCW_MASK_ALL;
3631	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3632	pfn(&State, &ResM, &InVal);
3633	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m */\n",
3634	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
3635	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision);
3636
3637	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3638	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3639	pfn(&State, &ResU, &InVal);
3640	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u */\n",
3641	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
3642	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision);
3643
3644	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3645	if (fXcpt)
3646	{
3647	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3648	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3649	pfn(&State, &Res1, &InVal);
3650	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x */\n",
3651	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
3652	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt);
3653	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3654	{
3655	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
3656	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3657	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3658	pfn(&State, &Res2, &InVal);
3659	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] */\n",
3660	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
3661	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt);
3662	}
3663	if (!RT_IS_POWER_OF_TWO(fXcpt))
3664	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3665	if (fUnmasked & fXcpt)
3666	{
3667	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
3668	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3669	pfn(&State, &Res3, &InVal);
3670	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x */\n",
3671	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
3672	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked);
3673	}
3674	}
3675	}
3676	}
3677	}
3678	GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
3679	}
3680	}
3681	#endif
3682
3683	static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
3684	{
3685	if (fFcw1 == fFcw2)
3686	return true;
3687	if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
3688	{
3689	*pfRndErr = true;
3690	return true;
3691	}
3692	return false;
3693	}
3694
3695	static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
3696	{
3697	if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
3698	return true;
3699	if ( fRndErrOk
3700	&& pr80Val1->s.fSign == pr80Val2->s.fSign)
3701	{
3702	if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
3703	&& ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
3704	? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
3705	: pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
3706	\|\|
3707	( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
3708	&& pr80Val1->s.uMantissa == UINT64_MAX
3709	&& pr80Val2->s.uMantissa == RT_BIT_64(63))
3710	\|\|
3711	( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
3712	&& pr80Val2->s.uMantissa == UINT64_MAX
3713	&& pr80Val1->s.uMantissa == RT_BIT_64(63)) )
3714	{
3715	*pfRndErr = true;
3716	return true;
3717	}
3718	}
3719	return false;
3720	}
3721
3722
3723	static void FpuUnaryR80Test(void)
3724	{
3725	X86FXSTATE State;
3726	RT_ZERO(State);
3727	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3728	{
3729	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
3730	continue;
3731
3732	uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
3733	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
3734	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
3735	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
3736	uint32_t cRndErrs = 0;
3737	uint32_t cPossibleRndErrs = 0;
3738	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3739	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3740	{
3741	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3742	{
3743	RTFLOAT80U const InVal = paTests[iTest].InVal;
3744	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3745	bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
3746	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
3747	State.FSW = paTests[iTest].fFswIn;
3748	pfn(&State, &Res, &InVal);
3749	bool fRndErr = false;
3750	if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
3751	\|\| !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
3752	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3753	"%s -> fsw=%#06x %s\n"
3754	"%s expected %#06x %s%s%s%s (%s)\n",
3755	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3756	FormatR80(&paTests[iTest].InVal),
3757	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3758	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3759	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3760	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3761	fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
3762	cRndErrs += fRndErr;
3763	cPossibleRndErrs += fRndErrOk;
3764	}
3765	pfn = g_aFpuUnaryR80[iFn].pfnNative;
3766	}
3767	if (cPossibleRndErrs > 0)
3768	RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
3769	}
3770	}
3771
3772
3773	/*
3774	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
3775	*/
3776	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
3777
3778	static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
3779	{
3780	ENTRY(ftst_r80),
3781	ENTRY_EX(fxam_r80, 1),
3782	};
3783
3784	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3785	static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3786	{
3787	static RTFLOAT80U const s_aSpecials[] =
3788	{
3789	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3790	};
3791
3792	X86FXSTATE State;
3793	RT_ZERO(State);
3794	uint32_t cMinNormals = cTests / 4;
3795	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3796	{
3797	bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
3798	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
3799	PRTSTREAM pOutFn = pOut;
3800	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3801	{
3802	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3803	continue;
3804	pOutFn = pOutCpu;
3805	}
3806	State.FTW = 0;
3807
3808	GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3809	uint32_t cNormalInputs = 0;
3810	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3811	{
3812	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3813	if (RTFLOAT80U_IS_NORMAL(&InVal))
3814	cNormalInputs++;
3815	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3816	{
3817	iTest -= 1;
3818	continue;
3819	}
3820
3821	uint16_t const fFcw = RandFcw();
3822	State.FSW = RandFsw();
3823	if (!fIsFxam)
3824	{
3825	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3826	{
3827	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3828	{
3829	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3830	{
3831	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3832	\| (iRounding << X86_FCW_RC_SHIFT)
3833	\| (iPrecision << X86_FCW_PC_SHIFT)
3834	\| iMask;
3835	uint16_t fFswOut = 0;
3836	pfn(&State, &fFswOut, &InVal);
3837	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
3838	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
3839	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3840	}
3841	}
3842	}
3843	}
3844	else
3845	{
3846	uint16_t fFswOut = 0;
3847	uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
3848	State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
3849	State.FCW = fFcw;
3850	pfn(&State, &fFswOut, &InVal);
3851	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
3852	fFcw \| fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
3853	}
3854	}
3855	GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
3856	}
3857	}
3858	#endif
3859
3860
3861	static void FpuUnaryFswR80Test(void)
3862	{
3863	X86FXSTATE State;
3864	RT_ZERO(State);
3865	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3866	{
3867	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
3868	continue;
3869
3870	uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
3871	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
3872	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
3873	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
3874	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3875	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3876	{
3877	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3878	{
3879	RTFLOAT80U const InVal = paTests[iTest].InVal;
3880	uint16_t fFswOut = 0;
3881	State.FSW = paTests[iTest].fFswIn;
3882	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
3883	State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
3884	pfn(&State, &fFswOut, &InVal);
3885	if (fFswOut != paTests[iTest].fFswOut)
3886	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3887	"%s -> fsw=%#06x\n"
3888	"%s expected %#06x %s (%s%s)\n",
3889	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3890	FormatR80(&paTests[iTest].InVal),
3891	iVar ? " " : "", fFswOut,
3892	iVar ? " " : "", paTests[iTest].fFswOut,
3893	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
3894	paTests[iTest].fFcw & 0x80 ? " empty" : "");
3895	}
3896	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
3897	}
3898	}
3899	}
3900
3901	/*
3902	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
3903	*/
3904	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
3905
3906	static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
3907	{
3908	ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
3909	ENTRY_INTEL(fptan_r80_r80, 0),
3910	ENTRY(fxtract_r80_r80),
3911	ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
3912	ENTRY_INTEL(fsincos_r80_r80, 0),
3913	};
3914
3915	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3916	static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3917	{
3918	static RTFLOAT80U const s_aSpecials[] =
3919	{
3920	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3921	};
3922
3923	X86FXSTATE State;
3924	RT_ZERO(State);
3925	uint32_t cMinNormals = cTests / 4;
3926	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
3927	{
3928	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
3929	PRTSTREAM pOutFn = pOut;
3930	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3931	{
3932	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3933	continue;
3934	pOutFn = pOutCpu;
3935	}
3936
3937	GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
3938	uint32_t cNormalInputs = 0;
3939	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3940	{
3941	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3942	if (RTFLOAT80U_IS_NORMAL(&InVal))
3943	cNormalInputs++;
3944	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3945	{
3946	iTest -= 1;
3947	continue;
3948	}
3949
3950	uint16_t const fFcw = RandFcw();
3951	State.FSW = RandFsw();
3952
3953	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3954	{
3955	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3956	{
3957	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3958	{
3959	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3960	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3961	\| (iRounding << X86_FCW_RC_SHIFT)
3962	\| (iPrecision << X86_FCW_PC_SHIFT)
3963	\| iMask;
3964	pfn(&State, &Res, &InVal);
3965	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n",
3966	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal),
3967	GenFormatR80(&Res.r80Result1), GenFormatR80(&Res.r80Result2),
3968	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3969	}
3970	}
3971	}
3972	}
3973	GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
3974	}
3975	}
3976	#endif
3977
3978
3979	static void FpuUnaryTwoR80Test(void)
3980	{
3981	X86FXSTATE State;
3982	RT_ZERO(State);
3983	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
3984	{
3985	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
3986	continue;
3987
3988	uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
3989	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
3990	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
3991	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
3992	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3993	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3994	{
3995	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3996	{
3997	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3998	RTFLOAT80U const InVal = paTests[iTest].InVal;
3999	State.FCW = paTests[iTest].fFcw;
4000	State.FSW = paTests[iTest].fFswIn;
4001	pfn(&State, &Res, &InVal);
4002	if ( Res.FSW != paTests[iTest].fFswOut
4003	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4004	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4005	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4006	"%s -> fsw=%#06x %s %s\n"
4007	"%s expected %#06x %s %s %s%s%s (%s)\n",
4008	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4009	FormatR80(&paTests[iTest].InVal),
4010	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4011	iVar ? " " : "", paTests[iTest].fFswOut,
4012	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4013	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4014	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4015	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4016	}
4017	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4018	}
4019	}
4020	}
4021
4022
4023
4024	int main(int argc, char **argv)
4025	{
4026	int rc = RTR3InitExe(argc, &argv, 0);
4027	if (RT_FAILURE(rc))
4028	return RTMsgInitFailure(rc);
4029
4030	/*
4031	* Determin the host CPU.
4032	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
4033	*/
4034	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
4035	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
4036	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
4037	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4038	#else
4039	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4040	#endif
4041
4042	/*
4043	* Parse arguments.
4044	*/
4045	enum { kModeNotSet, kModeTest, kModeGenerate }
4046	enmMode = kModeNotSet;
4047	bool fInt = true;
4048	bool fFpuLdSt = true;
4049	bool fFpuBinary1 = true;
4050	bool fFpuBinary2 = true;
4051	bool fFpuOther = true;
4052	bool fCpuData = true;
4053	bool fCommonData = true;
4054	uint32_t const cDefaultTests = 96;
4055	uint32_t cTests = cDefaultTests;
4056	RTGETOPTDEF const s_aOptions[] =
4057	{
4058	// mode:
4059	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
4060	{ "--test", 't', RTGETOPT_REQ_NOTHING },
4061	// test selection (both)
4062	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
4063	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
4064	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
4065	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
4066	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
4067	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
4068	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
4069	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
4070	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
4071	{ "--include", 'I', RTGETOPT_REQ_STRING },
4072	{ "--exclude", 'X', RTGETOPT_REQ_STRING },
4073	// generation parameters
4074	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
4075	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
4076	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
4077	};
4078
4079	RTGETOPTSTATE State;
4080	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
4081	AssertRCReturn(rc, RTEXITCODE_FAILURE);
4082
4083	RTGETOPTUNION ValueUnion;
4084	while ((rc = RTGetOpt(&State, &ValueUnion)))
4085	{
4086	switch (rc)
4087	{
4088	case 'g':
4089	enmMode = kModeGenerate;
4090	break;
4091	case 't':
4092	enmMode = kModeTest;
4093	break;
4094
4095	case 'a':
4096	fCpuData = true;
4097	fCommonData = true;
4098	fInt = true;
4099	fFpuLdSt = true;
4100	fFpuBinary1 = true;
4101	fFpuBinary2 = true;
4102	fFpuOther = true;
4103	break;
4104	case 'z':
4105	fCpuData = false;
4106	fCommonData = false;
4107	fInt = false;
4108	fFpuLdSt = false;
4109	fFpuBinary1 = false;
4110	fFpuBinary2 = false;
4111	fFpuOther = false;
4112	break;
4113
4114	case 'F':
4115	fFpuLdSt = true;
4116	break;
4117	case 'O':
4118	fFpuOther = true;
4119	break;
4120	case 'B':
4121	fFpuBinary1 = true;
4122	break;
4123	case 'P':
4124	fFpuBinary2 = true;
4125	break;
4126	case 'i':
4127	fInt = true;
4128	break;
4129
4130	case 'I':
4131	if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
4132	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
4133	RT_ELEMENTS(g_apszIncludeTestPatterns));
4134	g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
4135	break;
4136	case 'X':
4137	if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
4138	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
4139	RT_ELEMENTS(g_apszExcludeTestPatterns));
4140	g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
4141	break;
4142
4143	case 'm':
4144	fCommonData = true;
4145	break;
4146	case 'c':
4147	fCpuData = true;
4148	break;
4149	case 'n':
4150	cTests = ValueUnion.u32;
4151	break;
4152
4153	case 'h':
4154	RTPrintf("usage: %s <-g\|-t> [options]\n"
4155	"\n"
4156	"Mode:\n"
4157	" -g, --generate\n"
4158	" Generate test data.\n"
4159	" -t, --test\n"
4160	" Execute tests.\n"
4161	"\n"
4162	"Test selection (both modes):\n"
4163	" -a, --all\n"
4164	" Enable all tests and generated test data. (default)\n"
4165	" -z, --zap, --none\n"
4166	" Disable all tests and test data types.\n"
4167	" -i, --int\n"
4168	" Enable non-FPU tests.\n"
4169	" -F, --fpu-ld-st\n"
4170	" Enable FPU load and store tests.\n"
4171	" -B, --fpu-binary-1\n"
4172	" Enable FPU binary 80-bit FP tests.\n"
4173	" -P, --fpu-binary-2\n"
4174	" Enable FPU binary 64- and 32-bit FP tests.\n"
4175	" -O, --fpu-other\n"
4176	" Enable other FPU tests.\n"
4177	" -I,--include=<test-patter>\n"
4178	" Enable tests matching the given pattern.\n"
4179	" -X,--exclude=<test-patter>\n"
4180	" Skip tests matching the given pattern (overrides --include).\n"
4181	"\n"
4182	"Generation:\n"
4183	" -m, --common\n"
4184	" Enable generating common test data.\n"
4185	" -c, --only-cpu\n"
4186	" Enable generating CPU specific test data.\n"
4187	" -n, --number-of-test <count>\n"
4188	" Number of tests to generate. Default: %u\n"
4189	, argv[0], cDefaultTests);
4190	return RTEXITCODE_SUCCESS;
4191	default:
4192	return RTGetOptPrintError(rc, &ValueUnion);
4193	}
4194	}
4195
4196	/*
4197	* Generate data?
4198	*/
4199	if (enmMode == kModeGenerate)
4200	{
4201	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4202	char szCpuDesc[256] = {0};
4203	RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
4204	const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
4205	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_OS2)
4206	const char * const pszBitBucket = "NUL";
4207	# else
4208	const char * const pszBitBucket = "/dev/null";
4209	# endif
4210
4211	if (cTests == 0)
4212	cTests = cDefaultTests;
4213	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
4214	g_cZeroSrcTests = g_cZeroDstTests * 2;
4215
4216	if (fInt)
4217	{
4218	const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
4219	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4220	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4221	? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
4222	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4223	if (!pStrmData \|\| !pStrmDataCpu)
4224	return RTEXITCODE_FAILURE;
4225
4226	BinU8Generate( pStrmData, pStrmDataCpu, cTests);
4227	BinU16Generate(pStrmData, pStrmDataCpu, cTests);
4228	BinU32Generate(pStrmData, pStrmDataCpu, cTests);
4229	BinU64Generate(pStrmData, pStrmDataCpu, cTests);
4230	ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
4231	UnaryGenerate(pStrmData, cTests);
4232	ShiftGenerate(pStrmDataCpu, cTests);
4233	MulDivGenerate(pStrmDataCpu, cTests);
4234
4235	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4236	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4237	if (rcExit != RTEXITCODE_SUCCESS)
4238	return rcExit;
4239	}
4240
4241	if (fFpuLdSt)
4242	{
4243	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4244	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4245	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4246	? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4247	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4248	if (!pStrmData \|\| !pStrmDataCpu)
4249	return RTEXITCODE_FAILURE;
4250
4251	FpuLdConstGenerate(pStrmData, cTests);
4252	FpuLdIntGenerate(pStrmData, cTests);
4253	FpuLdD80Generate(pStrmData, cTests);
4254	FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4255	FpuStD80Generate(pStrmData, cTests);
4256	uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4257	FpuLdMemGenerate(pStrmData, cTests2);
4258	FpuStMemGenerate(pStrmData, cTests2);
4259
4260	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4261	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4262	if (rcExit != RTEXITCODE_SUCCESS)
4263	return rcExit;
4264	}
4265
4266	if (fFpuBinary1)
4267	{
4268	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4269	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4270	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4271	? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4272	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4273	if (!pStrmData \|\| !pStrmDataCpu)
4274	return RTEXITCODE_FAILURE;
4275
4276	FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4277	FpuBinaryFswR80Generate(pStrmData, cTests);
4278	FpuBinaryEflR80Generate(pStrmData, cTests);
4279
4280	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4281	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4282	if (rcExit != RTEXITCODE_SUCCESS)
4283	return rcExit;
4284	}
4285
4286	if (fFpuBinary2)
4287	{
4288	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4289	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4290	const char pszDataCpuFile = pszBitBucket; /!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4291	? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4292	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4293	if (!pStrmData \|\| !pStrmDataCpu)
4294	return RTEXITCODE_FAILURE;
4295
4296	FpuBinaryR64Generate(pStrmData, cTests);
4297	FpuBinaryR32Generate(pStrmData, cTests);
4298	FpuBinaryI32Generate(pStrmData, cTests);
4299	FpuBinaryI16Generate(pStrmData, cTests);
4300	FpuBinaryFswR64Generate(pStrmData, cTests);
4301	FpuBinaryFswR32Generate(pStrmData, cTests);
4302	FpuBinaryFswI32Generate(pStrmData, cTests);
4303	FpuBinaryFswI16Generate(pStrmData, cTests);
4304
4305	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4306	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4307	if (rcExit != RTEXITCODE_SUCCESS)
4308	return rcExit;
4309	}
4310
4311	if (fFpuOther)
4312	{
4313	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4314	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4315	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4316	? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4317	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4318	if (!pStrmData \|\| !pStrmDataCpu)
4319	return RTEXITCODE_FAILURE;
4320
4321	FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4322	FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4323	FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4324
4325	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4326	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4327	if (rcExit != RTEXITCODE_SUCCESS)
4328	return rcExit;
4329	}
4330
4331	return RTEXITCODE_SUCCESS;
4332	#else
4333	return RTMsgErrorExitFailure("Test data generator not compiled in!");
4334	#endif
4335	}
4336
4337	/*
4338	* Do testing. Currrently disabled by default as data needs to be checked
4339	* on both intel and AMD systems first.
4340	*/
4341	rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4342	AssertRCReturn(rc, RTEXITCODE_FAILURE);
4343	if (enmMode == kModeTest)
4344	{
4345	RTTestBanner(g_hTest);
4346
4347	/* Allocate guarded memory for use in the tests. */
4348	#define ALLOC_GUARDED_VAR(a_puVar) do { \
4349	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
4350	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
4351	} while (0)
4352	ALLOC_GUARDED_VAR(g_pu8);
4353	ALLOC_GUARDED_VAR(g_pu16);
4354	ALLOC_GUARDED_VAR(g_pu32);
4355	ALLOC_GUARDED_VAR(g_pu64);
4356	ALLOC_GUARDED_VAR(g_pu128);
4357	ALLOC_GUARDED_VAR(g_pu8Two);
4358	ALLOC_GUARDED_VAR(g_pu16Two);
4359	ALLOC_GUARDED_VAR(g_pu32Two);
4360	ALLOC_GUARDED_VAR(g_pu64Two);
4361	ALLOC_GUARDED_VAR(g_pu128Two);
4362	ALLOC_GUARDED_VAR(g_pfEfl);
4363	if (RTTestErrorCount(g_hTest) == 0)
4364	{
4365	if (fInt)
4366	{
4367	BinU8Test();
4368	BinU16Test();
4369	BinU32Test();
4370	BinU64Test();
4371	XchgTest();
4372	XaddTest();
4373	CmpXchgTest();
4374	CmpXchg8bTest();
4375	CmpXchg16bTest();
4376	ShiftDblTest();
4377	UnaryTest();
4378	ShiftTest();
4379	MulDivTest();
4380	BswapTest();
4381	}
4382
4383	if (fFpuLdSt)
4384	{
4385	FpuLoadConstTest();
4386	FpuLdMemTest();
4387	FpuLdIntTest();
4388	FpuLdD80Test();
4389	FpuStMemTest();
4390	FpuStIntTest();
4391	FpuStD80Test();
4392	}
4393
4394	if (fFpuBinary1)
4395	{
4396	FpuBinaryR80Test();
4397	FpuBinaryFswR80Test();
4398	FpuBinaryEflR80Test();
4399	}
4400
4401	if (fFpuBinary2)
4402	{
4403	FpuBinaryR64Test();
4404	FpuBinaryR32Test();
4405	FpuBinaryI32Test();
4406	FpuBinaryI16Test();
4407	FpuBinaryFswR64Test();
4408	FpuBinaryFswR32Test();
4409	FpuBinaryFswI32Test();
4410	FpuBinaryFswI16Test();
4411	}
4412
4413	if (fFpuOther)
4414	{
4415	FpuUnaryR80Test();
4416	FpuUnaryFswR80Test();
4417	FpuUnaryTwoR80Test();
4418	}
4419	}
4420	return RTTestSummaryAndDestroy(g_hTest);
4421	}
4422	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
4423	}
4424

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 94540

Download in other formats: