tstIEMAImpl.cpp@ 94609

Last change on this file since 94609 was 94606, checked in by vboxsync, 3 years ago
VMM/IEM,libs/softfloat,tstIEMAImpl: C implementation of fadd helper and related rounding tweaks for SoftFloat. Improved floating point test value generation to make sure we cover all types of input before we go full random. This means increasing the minimum number of tests to 160 for the binary floating point instructions, to cover all basic combinations. bugref:9898
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 201.1 KB

Line
1	/* $Id: tstIEMAImpl.cpp 94606 2022-04-14 13:48:52Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.215389.xyz. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/*********************************************************************************************************************************
20	* Header Files *
21	*********************************************************************************************************************************/
22	#include "../include/IEMInternal.h"
23
24	#include <iprt/errcore.h>
25	#include <VBox/log.h>
26	#include <iprt/assert.h>
27	#include <iprt/ctype.h>
28	#include <iprt/getopt.h>
29	#include <iprt/initterm.h>
30	#include <iprt/message.h>
31	#include <iprt/mp.h>
32	#include <iprt/rand.h>
33	#include <iprt/stream.h>
34	#include <iprt/string.h>
35	#include <iprt/test.h>
36
37	#include "tstIEMAImpl.h"
38
39
40	/*********************************************************************************************************************************
41	* Defined Constants And Macros *
42	*********************************************************************************************************************************/
43	#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44	#define ENTRY_EX(a_Name, a_uExtra) \
45	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46	g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49	#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50	#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52	g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55	#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56	#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58	g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62	typedef struct a_TypeName \
63	{ \
64	const char *pszName; \
65	a_FunctionPtrType pfn; \
66	a_FunctionPtrType pfnNative; \
67	a_TestType const *paTests; \
68	uint32_t const *pcTests; \
69	uint32_t uExtra; \
70	uint8_t idxCpuEflFlavour; \
71	} a_TypeName
72
73	#define COUNT_VARIATIONS(a_SubTest) \
74	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77	/*********************************************************************************************************************************
78	* Global Variables *
79	*********************************************************************************************************************************/
80	static RTTEST g_hTest;
81	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82	#ifdef TSTIEMAIMPL_WITH_GENERATOR
83	static uint32_t g_cZeroDstTests = 2;
84	static uint32_t g_cZeroSrcTests = 4;
85	#endif
86	static uint8_t g_pu8, g_pu8Two;
87	static uint16_t g_pu16, g_pu16Two;
88	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
89	static uint64_t g_pu64, g_pu64Two;
90	static RTUINT128U g_pu128, g_pu128Two;
91
92	static char g_aszBuf[16][256];
93	static unsigned g_idxBuf = 0;
94
95	static uint32_t g_cIncludeTestPatterns;
96	static uint32_t g_cExcludeTestPatterns;
97	static const char *g_apszIncludeTestPatterns[64];
98	static const char *g_apszExcludeTestPatterns[64];
99
100
101	/*********************************************************************************************************************************
102	* Internal Functions *
103	*********************************************************************************************************************************/
104	static const char *FormatR80(PCRTFLOAT80U pr80);
105	static const char *FormatR64(PCRTFLOAT64U pr64);
106	static const char *FormatR32(PCRTFLOAT32U pr32);
107
108
109	/*
110	* Random helpers.
111	*/
112
113	static uint32_t RandEFlags(void)
114	{
115	uint32_t fEfl = RTRandU32();
116	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
117	}
118
119	#ifdef TSTIEMAIMPL_WITH_GENERATOR
120
121	static uint8_t RandU8(void)
122	{
123	return RTRandU32Ex(0, 0xff);
124	}
125
126
127	static uint16_t RandU16(void)
128	{
129	return RTRandU32Ex(0, 0xffff);
130	}
131
132
133	static uint32_t RandU32(void)
134	{
135	return RTRandU32();
136	}
137
138	#endif
139
140	static uint64_t RandU64(void)
141	{
142	return RTRandU64();
143	}
144
145
146	static RTUINT128U RandU128(void)
147	{
148	RTUINT128U Ret;
149	Ret.s.Hi = RTRandU64();
150	Ret.s.Lo = RTRandU64();
151	return Ret;
152	}
153
154	#ifdef TSTIEMAIMPL_WITH_GENERATOR
155
156	static uint8_t RandU8Dst(uint32_t iTest)
157	{
158	if (iTest < g_cZeroDstTests)
159	return 0;
160	return RandU8();
161	}
162
163
164	static uint8_t RandU8Src(uint32_t iTest)
165	{
166	if (iTest < g_cZeroSrcTests)
167	return 0;
168	return RandU8();
169	}
170
171
172	static uint16_t RandU16Dst(uint32_t iTest)
173	{
174	if (iTest < g_cZeroDstTests)
175	return 0;
176	return RandU16();
177	}
178
179
180	static uint16_t RandU16Src(uint32_t iTest)
181	{
182	if (iTest < g_cZeroSrcTests)
183	return 0;
184	return RandU16();
185	}
186
187
188	static uint32_t RandU32Dst(uint32_t iTest)
189	{
190	if (iTest < g_cZeroDstTests)
191	return 0;
192	return RandU32();
193	}
194
195
196	static uint32_t RandU32Src(uint32_t iTest)
197	{
198	if (iTest < g_cZeroSrcTests)
199	return 0;
200	return RandU32();
201	}
202
203
204	static uint64_t RandU64Dst(uint32_t iTest)
205	{
206	if (iTest < g_cZeroDstTests)
207	return 0;
208	return RandU64();
209	}
210
211
212	static uint64_t RandU64Src(uint32_t iTest)
213	{
214	if (iTest < g_cZeroSrcTests)
215	return 0;
216	return RandU64();
217	}
218
219
220	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
221	static int16_t RandI16Src2(uint32_t iTest)
222	{
223	if (iTest < 18 * 4)
224	switch (iTest % 4)
225	{
226	case 0: return 0;
227	case 1: return INT16_MAX;
228	case 2: return INT16_MIN;
229	case 3: break;
230	}
231	return (int16_t)RandU16();
232	}
233
234
235	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
236	static int32_t RandI32Src2(uint32_t iTest)
237	{
238	if (iTest < 18 * 4)
239	switch (iTest % 4)
240	{
241	case 0: return 0;
242	case 1: return INT32_MAX;
243	case 2: return INT32_MIN;
244	case 3: break;
245	}
246	return (int32_t)RandU32();
247	}
248
249
250	#if 0
251	static int64_t RandI64Src(uint32_t iTest)
252	{
253	RT_NOREF(iTest);
254	return (int64_t)RandU64();
255	}
256	#endif
257
258
259	static uint16_t RandFcw(void)
260	{
261	return RandU16() & ~X86_FCW_ZERO_MASK;
262	}
263
264
265	static uint16_t RandFsw(void)
266	{
267	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
268	return RandU16();
269	}
270
271
272	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
273	{
274	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
275	pr80->sj64.uFraction >>= cShift;
276	else
277	pr80->sj64.uFraction = (cShift % 19) + 1;
278	}
279
280
281
282	static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
283	{
284	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
285
286	RTFLOAT80U r80;
287	r80.au64[0] = RandU64();
288	r80.au16[4] = RandU16();
289
290	/*
291	* Adjust the random stuff according to bType.
292	*/
293	bType &= 0x1f;
294	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
295	{
296	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
297	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
298	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
299	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
300	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
301	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
302	Assert( bType != 1 \|\| RTFLOAT80U_IS_387_INVALID(&r80));
303	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
304	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
305	}
306	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
307	{
308	/* Denormals (4,5) and Pseudo denormals (6,7) */
309	if (bType & 1)
310	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
311	else if (r80.sj64.uFraction == 0 && bType < 6)
312	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
313	r80.sj64.uExponent = 0;
314	r80.sj64.fInteger = bType >= 6;
315	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
316	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
317	}
318	else if (bType == 8 \|\| bType == 9)
319	{
320	/* Pseudo NaN. */
321	if (bType & 1)
322	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
323	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
324	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
325	r80.sj64.uExponent = 0x7fff;
326	if (r80.sj64.fInteger)
327	r80.sj64.uFraction \|= RT_BIT_64(62);
328	else
329	r80.sj64.uFraction &= ~RT_BIT_64(62);
330	r80.sj64.fInteger = 0;
331	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
332	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
333	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
334	}
335	else if (bType == 10 \|\| bType == 11 \|\| bType == 12 \|\| bType == 13)
336	{
337	/* Quiet and signalling NaNs. */
338	if (bType & 1)
339	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
340	else if (r80.sj64.uFraction == 0)
341	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
342	r80.sj64.uExponent = 0x7fff;
343	if (bType < 12)
344	r80.sj64.uFraction \|= RT_BIT_64(62); /* quiet */
345	else
346	r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
347	r80.sj64.fInteger = 1;
348	AssertMsg(bType >= 12 \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
349	AssertMsg(bType < 12 \|\| RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
350	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
351	AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
352	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
353	}
354	else if (bType == 14 \|\| bType == 15)
355	{
356	/* Unnormals */
357	if (bType & 1)
358	SafeR80FractionShift(&r80, RandU8() % 62);
359	r80.sj64.fInteger = 0;
360	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
361	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
362	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
363	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
364	}
365	else if (bType < 26)
366	{
367	/* Make sure we have lots of normalized values. */
368	if (!fIntTarget)
369	{
370	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
371	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
372	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
373	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
374	r80.sj64.fInteger = 1;
375	if (r80.sj64.uExponent <= uMinExp)
376	r80.sj64.uExponent = uMinExp + 1;
377	else if (r80.sj64.uExponent >= uMaxExp)
378	r80.sj64.uExponent = uMaxExp - 1;
379
380	if (bType == 16)
381	{ /* All 1s is useful to testing rounding. Also try trigger special
382	behaviour by sometimes rounding out of range, while we're at it. */
383	r80.sj64.uFraction = RT_BIT_64(63) - 1;
384	uint8_t bExp = RandU8();
385	if ((bExp & 3) == 0)
386	r80.sj64.uExponent = uMaxExp - 1;
387	else if ((bExp & 3) == 1)
388	r80.sj64.uExponent = uMinExp + 1;
389	else if ((bExp & 3) == 2)
390	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
391	}
392	}
393	else
394	{
395	/* integer target: */
396	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
397	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
398	r80.sj64.fInteger = 1;
399	if (r80.sj64.uExponent < uMinExp)
400	r80.sj64.uExponent = uMinExp;
401	else if (r80.sj64.uExponent > uMaxExp)
402	r80.sj64.uExponent = uMaxExp;
403
404	if (bType == 16)
405	{ /* All 1s is useful to testing rounding. Also try trigger special
406	behaviour by sometimes rounding out of range, while we're at it. */
407	r80.sj64.uFraction = RT_BIT_64(63) - 1;
408	uint8_t bExp = RandU8();
409	if ((bExp & 3) == 0)
410	r80.sj64.uExponent = uMaxExp;
411	else if ((bExp & 3) == 1)
412	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
413	}
414	}
415
416	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
417	}
418	return r80;
419	}
420
421
422	static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
423	{
424	/*
425	* Make it more likely that we get a good selection of special values.
426	*/
427	return RandR80Ex(RandU8(), cTarget, fIntTarget);
428
429	}
430
431
432	static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
433	{
434	/* Make sure we cover all the basic types first before going for random selection: */
435	if (iTest <= 18)
436	return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
437	return RandR80(cTarget, fIntTarget);
438	}
439
440
441	/**
442	* Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
443	* to a 0..17, covering all basic value types.
444	*/
445	static uint8_t RandR80Src12RemapType(uint8_t bType)
446	{
447	switch (bType)
448	{
449	case 0: return 18; /* normal */
450	case 1: return 16; /* normal extreme rounding */
451	case 2: return 14; /* unnormal */
452	case 3: return 12; /* Signalling NaN */
453	case 4: return 10; /* Quiet NaN */
454	case 5: return 8; /* PseudoNaN */
455	case 6: return 6; /* Pseudo Denormal */
456	case 7: return 4; /* Denormal */
457	case 8: return 3; /* Indefinite */
458	case 9: return 2; /* Infinity */
459	case 10: return 1; /* Pseudo-Infinity */
460	case 11: return 0; /* Zero */
461	default: AssertFailedReturn(18);
462	}
463	}
464
465
466	/**
467	* This works in tandem with RandR80Src2 to make sure we cover all operand
468	* type mixes first before we venture into regular random testing.
469	*
470	* There are 11 basic variations, when we leave out the five odd ones using
471	* SafeR80FractionShift. Because of the special normalized value targetting at
472	* rounding, we make it an even 12. So 144 combinations for two operands.
473	*/
474	static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
475	{
476	if (cPartnerBits == 80)
477	{
478	Assert(!fPartnerInt);
479	if (iTest < 12 * 12)
480	return RandR80Ex(RandR80Src12RemapType(iTest / 12));
481	}
482	else if ((cPartnerBits == 64 \|\| cPartnerBits == 32) && !fPartnerInt)
483	{
484	if (iTest < 12 * 10)
485	return RandR80Ex(RandR80Src12RemapType(iTest / 10));
486	}
487	else if (iTest < 18 * 4 && fPartnerInt)
488	return RandR80Ex(iTest / 4);
489	return RandR80();
490	}
491
492
493	/** Partner to RandR80Src1. */
494	static RTFLOAT80U RandR80Src2(uint32_t iTest)
495	{
496	if (iTest < 12 * 12)
497	return RandR80Ex(RandR80Src12RemapType(iTest % 12));
498	return RandR80();
499	}
500
501
502	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
503	{
504	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
505	pr64->s64.uFraction >>= cShift;
506	else
507	pr64->s64.uFraction = (cShift % 19) + 1;
508	}
509
510
511	static RTFLOAT64U RandR64Ex(uint8_t bType)
512	{
513	RTFLOAT64U r64;
514	r64.u = RandU64();
515
516	/*
517	* Make it more likely that we get a good selection of special values.
518	* On average 6 out of 16 calls should return a special value.
519	*/
520	bType &= 0xf;
521	if (bType == 0 \|\| bType == 1)
522	{
523	/* 0 or Infinity. We only keep fSign here. */
524	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
525	r64.s.uFractionHigh = 0;
526	r64.s.uFractionLow = 0;
527	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
528	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
529	}
530	else if (bType == 2 \|\| bType == 3)
531	{
532	/* Subnormals */
533	if (bType == 3)
534	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
535	else if (r64.s64.uFraction == 0)
536	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
537	r64.s64.uExponent = 0;
538	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
539	}
540	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
541	{
542	/* NaNs */
543	if (bType & 1)
544	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
545	else if (r64.s64.uFraction == 0)
546	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
547	r64.s64.uExponent = 0x7ff;
548	if (bType < 6)
549	r64.s64.uFraction \|= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
550	else
551	r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
552	AssertMsg(bType >= 6 \|\| RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
553	AssertMsg(bType < 6 \|\| RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
554	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
555	}
556	else if (bType < 12)
557	{
558	/* Make sure we have lots of normalized values. */
559	if (r64.s.uExponent == 0)
560	r64.s.uExponent = 1;
561	else if (r64.s.uExponent == 0x7ff)
562	r64.s.uExponent = 0x7fe;
563	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
564	}
565	return r64;
566	}
567
568
569	static RTFLOAT64U RandR64Src(uint32_t iTest)
570	{
571	if (iTest < 16)
572	return RandR64Ex(iTest);
573	return RandR64Ex(RandU8());
574	}
575
576
577	/** Pairing with a 80-bit floating point arg. */
578	static RTFLOAT64U RandR64Src2(uint32_t iTest)
579	{
580	if (iTest < 12 * 10)
581	return RandR64Ex(9 - iTest % 10); /* start with normal values */
582	return RandR64Ex(RandU8());
583	}
584
585
586	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
587	{
588	if (pr32->s.uFraction >= RT_BIT_32(cShift))
589	pr32->s.uFraction >>= cShift;
590	else
591	pr32->s.uFraction = (cShift % 19) + 1;
592	}
593
594
595	static RTFLOAT32U RandR32Ex(uint8_t bType)
596	{
597	RTFLOAT32U r32;
598	r32.u = RandU32();
599
600	/*
601	* Make it more likely that we get a good selection of special values.
602	* On average 6 out of 16 calls should return a special value.
603	*/
604	bType &= 0xf;
605	if (bType == 0 \|\| bType == 1)
606	{
607	/* 0 or Infinity. We only keep fSign here. */
608	r32.s.uExponent = bType == 0 ? 0 : 0xff;
609	r32.s.uFraction = 0;
610	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
611	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
612	}
613	else if (bType == 2 \|\| bType == 3)
614	{
615	/* Subnormals */
616	if (bType == 3)
617	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
618	else if (r32.s.uFraction == 0)
619	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
620	r32.s.uExponent = 0;
621	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
622	}
623	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
624	{
625	/* NaNs */
626	if (bType & 1)
627	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
628	else if (r32.s.uFraction == 0)
629	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
630	r32.s.uExponent = 0xff;
631	if (bType < 6)
632	r32.s.uFraction \|= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
633	else
634	r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
635	AssertMsg(bType >= 6 \|\| RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
636	AssertMsg(bType < 6 \|\| RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
637	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
638	}
639	else if (bType < 12)
640	{
641	/* Make sure we have lots of normalized values. */
642	if (r32.s.uExponent == 0)
643	r32.s.uExponent = 1;
644	else if (r32.s.uExponent == 0xff)
645	r32.s.uExponent = 0xfe;
646	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
647	}
648	return r32;
649	}
650
651
652	static RTFLOAT32U RandR32Src(uint32_t iTest)
653	{
654	if (iTest < 16)
655	return RandR32Ex(iTest);
656	return RandR32Ex(RandU8());
657	}
658
659
660	/** Pairing with a 80-bit floating point arg. */
661	static RTFLOAT32U RandR32Src2(uint32_t iTest)
662	{
663	if (iTest < 12 * 10)
664	return RandR32Ex(9 - iTest % 10); /* start with normal values */
665	return RandR32Ex(RandU8());
666	}
667
668
669	static RTPBCD80U RandD80Src(uint32_t iTest)
670	{
671	if (iTest < 3)
672	{
673	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
674	return d80Zero;
675	}
676	if (iTest < 5)
677	{
678	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
679	return d80Ind;
680	}
681
682	RTPBCD80U d80;
683	uint8_t b = RandU8();
684	d80.s.fSign = b & 1;
685
686	if ((iTest & 7) >= 6)
687	{
688	/* Illegal */
689	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
690	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
691	d80.s.abPairs[iPair] = RandU8();
692	}
693	else
694	{
695	/* Normal */
696	d80.s.uPad = 0;
697	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
698	{
699	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
700	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
701	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
702	}
703	}
704	return d80;
705	}
706
707
708	const char *GenFormatR80(PCRTFLOAT80U plrd)
709	{
710	if (RTFLOAT80U_IS_ZERO(plrd))
711	return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
712	if (RTFLOAT80U_IS_INF(plrd))
713	return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
714	if (RTFLOAT80U_IS_INDEFINITE(plrd))
715	return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
716	if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
717	return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
718	if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
719	return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
720
721	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
722	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
723	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
724	return pszBuf;
725	}
726
727	const char *GenFormatR64(PCRTFLOAT64U prd)
728	{
729	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
730	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
731	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
732	return pszBuf;
733	}
734
735
736	const char *GenFormatR32(PCRTFLOAT32U pr)
737	{
738	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
739	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
740	return pszBuf;
741	}
742
743
744	const char *GenFormatD80(PCRTPBCD80U pd80)
745	{
746	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
747	size_t off;
748	if (pd80->s.uPad == 0)
749	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
750	else
751	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
752	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
753	while (iPair-- > 0)
754	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
755	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
756	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
757	pszBuf[off++] = ')';
758	pszBuf[off++] = '\0';
759	return pszBuf;
760	}
761
762
763	const char *GenFormatI64(int64_t i64)
764	{
765	if (i64 == INT64_MIN) /* This one is problematic */
766	return "INT64_MIN";
767	if (i64 == INT64_MAX)
768	return "INT64_MAX";
769	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
770	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
771	return pszBuf;
772	}
773
774
775	const char GenFormatI64(int64_t const pi64)
776	{
777	return GenFormatI64(*pi64);
778	}
779
780
781	const char *GenFormatI32(int32_t i32)
782	{
783	if (i32 == INT32_MIN) /* This one is problematic */
784	return "INT32_MIN";
785	if (i32 == INT32_MAX)
786	return "INT32_MAX";
787	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
788	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
789	return pszBuf;
790	}
791
792
793	const char GenFormatI32(int32_t const pi32)
794	{
795	return GenFormatI32(*pi32);
796	}
797
798
799	const char *GenFormatI16(int16_t i16)
800	{
801	if (i16 == INT16_MIN) /* This one is problematic */
802	return "INT16_MIN";
803	if (i16 == INT16_MAX)
804	return "INT16_MAX";
805	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
806	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
807	return pszBuf;
808	}
809
810
811	const char GenFormatI16(int16_t const pi16)
812	{
813	return GenFormatI16(*pi16);
814	}
815
816
817	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
818	{
819	/* We want to tag the generated source code with the revision that produced it. */
820	static char s_szRev[] = "$Revision: 94606 $";
821	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
822	size_t cchRev = 0;
823	while (RT_C_IS_DIGIT(pszRev[cchRev]))
824	cchRev++;
825
826	RTStrmPrintf(pOut,
827	"/* $Id: tstIEMAImpl.cpp 94606 2022-04-14 13:48:52Z vboxsync $ */\n"
828	"/** @file\n"
829	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
830	" */\n"
831	"\n"
832	"/*\n"
833	" * Copyright (C) 2022 Oracle Corporation\n"
834	" *\n"
835	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
836	" * available from http://www.215389.xyz. This file is free software;\n"
837	" * you can redistribute it and/or modify it under the terms of the GNU\n"
838	" * General Public License (GPL) as published by the Free Software\n"
839	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
840	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
841	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
842	" */\n"
843	"\n"
844	"#include \"tstIEMAImpl.h\"\n"
845	"\n"
846	,
847	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
848	}
849
850
851	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
852	{
853	PRTSTREAM pOut = NULL;
854	int rc = RTStrmOpen(pszFilename, "w", &pOut);
855	if (RT_SUCCESS(rc))
856	{
857	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
858	return pOut;
859	}
860	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
861	return NULL;
862	}
863
864
865	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
866	{
867	RTStrmPrintf(pOut,
868	"\n"
869	"/* end of file */\n");
870	int rc = RTStrmClose(pOut);
871	if (RT_SUCCESS(rc))
872	return rcExit;
873	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
874	}
875
876
877	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
878	{
879	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
880	}
881
882
883	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
884	{
885	RTStrmPrintf(pOut,
886	"};\n"
887	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
888	"\n",
889	pszName, pszName);
890	}
891
892	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
893
894
895	/*
896	* Test helpers.
897	*/
898	static bool IsTestEnabled(const char *pszName)
899	{
900	/* Process excludes first: */
901	uint32_t i = g_cExcludeTestPatterns;
902	while (i-- > 0)
903	if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
904	return false;
905
906	/* If no include patterns, everything is included: */
907	i = g_cIncludeTestPatterns;
908	if (!i)
909	return true;
910
911	/* Otherwise only tests in the include patters gets tested: */
912	while (i-- > 0)
913	if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
914	return true;
915
916	return false;
917	}
918
919
920	static bool SubTestAndCheckIfEnabled(const char *pszName)
921	{
922	RTTestSub(g_hTest, pszName);
923	if (IsTestEnabled(pszName))
924	return true;
925	RTTestSkipped(g_hTest, "excluded");
926	return false;
927	}
928
929
930	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
931	{
932	if (fActual == fExpected)
933	return "";
934
935	uint32_t const fXor = fActual ^ fExpected;
936	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
937	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
938
939	static struct
940	{
941	const char *pszName;
942	uint32_t fFlag;
943	} const s_aFlags[] =
944	{
945	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
946	EFL_ENTRY(CF),
947	EFL_ENTRY(PF),
948	EFL_ENTRY(AF),
949	EFL_ENTRY(ZF),
950	EFL_ENTRY(SF),
951	EFL_ENTRY(TF),
952	EFL_ENTRY(IF),
953	EFL_ENTRY(DF),
954	EFL_ENTRY(OF),
955	EFL_ENTRY(IOPL),
956	EFL_ENTRY(NT),
957	EFL_ENTRY(RF),
958	EFL_ENTRY(VM),
959	EFL_ENTRY(AC),
960	EFL_ENTRY(VIF),
961	EFL_ENTRY(VIP),
962	EFL_ENTRY(ID),
963	};
964	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
965	if (s_aFlags[i].fFlag & fXor)
966	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
967	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
968	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
969	return pszBuf;
970	}
971
972
973	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
974	{
975	if (fActual == fExpected)
976	return "";
977
978	uint16_t const fXor = fActual ^ fExpected;
979	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
980	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
981
982	static struct
983	{
984	const char *pszName;
985	uint32_t fFlag;
986	} const s_aFlags[] =
987	{
988	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
989	FSW_ENTRY(IE),
990	FSW_ENTRY(DE),
991	FSW_ENTRY(ZE),
992	FSW_ENTRY(OE),
993	FSW_ENTRY(UE),
994	FSW_ENTRY(PE),
995	FSW_ENTRY(SF),
996	FSW_ENTRY(ES),
997	FSW_ENTRY(C0),
998	FSW_ENTRY(C1),
999	FSW_ENTRY(C2),
1000	FSW_ENTRY(C3),
1001	FSW_ENTRY(B),
1002	};
1003	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1004	if (s_aFlags[i].fFlag & fXor)
1005	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1006	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1007	if (fXor & X86_FSW_TOP_MASK)
1008	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1009	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1010	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1011	return pszBuf;
1012	}
1013
1014
1015	static const char *FormatFcw(uint16_t fFcw)
1016	{
1017	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1018
1019	const char pszPC = NULL; / (msc+gcc are too stupid) */
1020	switch (fFcw & X86_FCW_PC_MASK)
1021	{
1022	case X86_FCW_PC_24: pszPC = "PC24"; break;
1023	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1024	case X86_FCW_PC_53: pszPC = "PC53"; break;
1025	case X86_FCW_PC_64: pszPC = "PC64"; break;
1026	}
1027
1028	const char pszRC = NULL; / (msc+gcc are too stupid) */
1029	switch (fFcw & X86_FCW_RC_MASK)
1030	{
1031	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1032	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1033	case X86_FCW_RC_UP: pszRC = "UP"; break;
1034	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1035	}
1036	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1037
1038	static struct
1039	{
1040	const char *pszName;
1041	uint32_t fFlag;
1042	} const s_aFlags[] =
1043	{
1044	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1045	FCW_ENTRY(IM),
1046	FCW_ENTRY(DM),
1047	FCW_ENTRY(ZM),
1048	FCW_ENTRY(OM),
1049	FCW_ENTRY(UM),
1050	FCW_ENTRY(PM),
1051	{ "6M", 64 },
1052	};
1053	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1054	if (fFcw & s_aFlags[i].fFlag)
1055	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1056
1057	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1058	return pszBuf;
1059	}
1060
1061
1062	static const char *FormatR80(PCRTFLOAT80U pr80)
1063	{
1064	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1065	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1066	return pszBuf;
1067	}
1068
1069
1070	static const char *FormatR64(PCRTFLOAT64U pr64)
1071	{
1072	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1073	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1074	return pszBuf;
1075	}
1076
1077
1078	static const char *FormatR32(PCRTFLOAT32U pr32)
1079	{
1080	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1081	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1082	return pszBuf;
1083	}
1084
1085
1086	static const char *FormatD80(PCRTPBCD80U pd80)
1087	{
1088	/* There is only one indefinite endcoding (same as for 80-bit
1089	floating point), so get it out of the way first: */
1090	if (RTPBCD80U_IS_INDEFINITE(pd80))
1091	return "Ind";
1092
1093	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1094	size_t off = 0;
1095	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1096	unsigned cBadDigits = 0;
1097	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1098	while (iPair-- > 0)
1099	{
1100	static const char s_szDigits[] = "0123456789abcdef";
1101	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1102	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1103	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1104	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1105	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1106	}
1107	if (cBadDigits \|\| pd80->s.uPad != 0)
1108	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1109	pszBuf[off] = '\0';
1110	return pszBuf;
1111	}
1112
1113
1114	#if 0
1115	static const char FormatI64(int64_t const piVal)
1116	{
1117	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1118	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1119	return pszBuf;
1120	}
1121	#endif
1122
1123
1124	static const char FormatI32(int32_t const piVal)
1125	{
1126	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1127	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1128	return pszBuf;
1129	}
1130
1131
1132	static const char FormatI16(int16_t const piVal)
1133	{
1134	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1135	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1136	return pszBuf;
1137	}
1138
1139
1140	/*
1141	* Binary operations.
1142	*/
1143	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1144	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1145	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1146	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1147
1148	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1149	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1150	static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1151	{ \
1152	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1153	{ \
1154	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1155	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1156	PRTSTREAM pOutFn = pOut; \
1157	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1158	{ \
1159	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1160	continue; \
1161	pOutFn = pOutCpu; \
1162	} \
1163	\
1164	GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1165	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1166	{ \
1167	a_TestType Test; \
1168	Test.fEflIn = RandEFlags(); \
1169	Test.fEflOut = Test.fEflIn; \
1170	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1171	Test.uDstOut = Test.uDstIn; \
1172	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1173	if (g_aBinU ## a_cBits[iFn].uExtra) \
1174	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1175	Test.uMisc = 0; \
1176	pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1177	RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1178	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1179	} \
1180	GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1181	} \
1182	}
1183	#else
1184	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1185	#endif
1186
1187	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1188	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1189	\
1190	static void BinU ## a_cBits ## Test(void) \
1191	{ \
1192	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1193	{ \
1194	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1195	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1196	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1197	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1198	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1199	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1200	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1201	{ \
1202	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1203	{ \
1204	uint32_t fEfl = paTests[iTest].fEflIn; \
1205	a_uType uDst = paTests[iTest].uDstIn; \
1206	pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1207	if ( uDst != paTests[iTest].uDstOut \
1208	\|\| fEfl != paTests[iTest].fEflOut) \
1209	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1210	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1211	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1212	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1213	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1214	else \
1215	{ \
1216	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1217	*g_pfEfl = paTests[iTest].fEflIn; \
1218	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1219	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1220	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1221	} \
1222	} \
1223	pfn = a_aSubTests[iFn].pfnNative; \
1224	} \
1225	} \
1226	}
1227
1228
1229	/*
1230	* 8-bit binary operations.
1231	*/
1232	static const BINU8_T g_aBinU8[] =
1233	{
1234	ENTRY(add_u8),
1235	ENTRY(add_u8_locked),
1236	ENTRY(adc_u8),
1237	ENTRY(adc_u8_locked),
1238	ENTRY(sub_u8),
1239	ENTRY(sub_u8_locked),
1240	ENTRY(sbb_u8),
1241	ENTRY(sbb_u8_locked),
1242	ENTRY(or_u8),
1243	ENTRY(or_u8_locked),
1244	ENTRY(xor_u8),
1245	ENTRY(xor_u8_locked),
1246	ENTRY(and_u8),
1247	ENTRY(and_u8_locked),
1248	ENTRY(cmp_u8),
1249	ENTRY(test_u8),
1250	};
1251	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1252
1253
1254	/*
1255	* 16-bit binary operations.
1256	*/
1257	static const BINU16_T g_aBinU16[] =
1258	{
1259	ENTRY(add_u16),
1260	ENTRY(add_u16_locked),
1261	ENTRY(adc_u16),
1262	ENTRY(adc_u16_locked),
1263	ENTRY(sub_u16),
1264	ENTRY(sub_u16_locked),
1265	ENTRY(sbb_u16),
1266	ENTRY(sbb_u16_locked),
1267	ENTRY(or_u16),
1268	ENTRY(or_u16_locked),
1269	ENTRY(xor_u16),
1270	ENTRY(xor_u16_locked),
1271	ENTRY(and_u16),
1272	ENTRY(and_u16_locked),
1273	ENTRY(cmp_u16),
1274	ENTRY(test_u16),
1275	ENTRY_EX(bt_u16, 1),
1276	ENTRY_EX(btc_u16, 1),
1277	ENTRY_EX(btc_u16_locked, 1),
1278	ENTRY_EX(btr_u16, 1),
1279	ENTRY_EX(btr_u16_locked, 1),
1280	ENTRY_EX(bts_u16, 1),
1281	ENTRY_EX(bts_u16_locked, 1),
1282	ENTRY_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1283	ENTRY_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1284	ENTRY_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1285	ENTRY_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1286	ENTRY_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1287	ENTRY_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1288	ENTRY(arpl),
1289	};
1290	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1291
1292
1293	/*
1294	* 32-bit binary operations.
1295	*/
1296	static const BINU32_T g_aBinU32[] =
1297	{
1298	ENTRY(add_u32),
1299	ENTRY(add_u32_locked),
1300	ENTRY(adc_u32),
1301	ENTRY(adc_u32_locked),
1302	ENTRY(sub_u32),
1303	ENTRY(sub_u32_locked),
1304	ENTRY(sbb_u32),
1305	ENTRY(sbb_u32_locked),
1306	ENTRY(or_u32),
1307	ENTRY(or_u32_locked),
1308	ENTRY(xor_u32),
1309	ENTRY(xor_u32_locked),
1310	ENTRY(and_u32),
1311	ENTRY(and_u32_locked),
1312	ENTRY(cmp_u32),
1313	ENTRY(test_u32),
1314	ENTRY_EX(bt_u32, 1),
1315	ENTRY_EX(btc_u32, 1),
1316	ENTRY_EX(btc_u32_locked, 1),
1317	ENTRY_EX(btr_u32, 1),
1318	ENTRY_EX(btr_u32_locked, 1),
1319	ENTRY_EX(bts_u32, 1),
1320	ENTRY_EX(bts_u32_locked, 1),
1321	ENTRY_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1322	ENTRY_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1323	ENTRY_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1324	ENTRY_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1325	ENTRY_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1326	ENTRY_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1327	};
1328	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1329
1330
1331	/*
1332	* 64-bit binary operations.
1333	*/
1334	static const BINU64_T g_aBinU64[] =
1335	{
1336	ENTRY(add_u64),
1337	ENTRY(add_u64_locked),
1338	ENTRY(adc_u64),
1339	ENTRY(adc_u64_locked),
1340	ENTRY(sub_u64),
1341	ENTRY(sub_u64_locked),
1342	ENTRY(sbb_u64),
1343	ENTRY(sbb_u64_locked),
1344	ENTRY(or_u64),
1345	ENTRY(or_u64_locked),
1346	ENTRY(xor_u64),
1347	ENTRY(xor_u64_locked),
1348	ENTRY(and_u64),
1349	ENTRY(and_u64_locked),
1350	ENTRY(cmp_u64),
1351	ENTRY(test_u64),
1352	ENTRY_EX(bt_u64, 1),
1353	ENTRY_EX(btc_u64, 1),
1354	ENTRY_EX(btc_u64_locked, 1),
1355	ENTRY_EX(btr_u64, 1),
1356	ENTRY_EX(btr_u64_locked, 1),
1357	ENTRY_EX(bts_u64, 1),
1358	ENTRY_EX(bts_u64_locked, 1),
1359	ENTRY_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1360	ENTRY_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1361	ENTRY_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1362	ENTRY_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1363	ENTRY_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1364	ENTRY_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1365	};
1366	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1367
1368
1369	/*
1370	* XCHG
1371	*/
1372	static void XchgTest(void)
1373	{
1374	if (!SubTestAndCheckIfEnabled("xchg"))
1375	return;
1376	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
1377	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
1378	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
1379	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
1380
1381	static struct
1382	{
1383	uint8_t cb; uint64_t fMask;
1384	union
1385	{
1386	uintptr_t pfn;
1387	FNIEMAIMPLXCHGU8 *pfnU8;
1388	FNIEMAIMPLXCHGU16 *pfnU16;
1389	FNIEMAIMPLXCHGU32 *pfnU32;
1390	FNIEMAIMPLXCHGU64 *pfnU64;
1391	} u;
1392	}
1393	s_aXchgWorkers[] =
1394	{
1395	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1396	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1397	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1398	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1399	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1400	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1401	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1402	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1403	};
1404	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1405	{
1406	RTUINT64U uIn1, uIn2, uMem, uDst;
1407	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1408	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1409	if (uIn1.u == uIn2.u)
1410	uDst.u = uIn2.u = ~uIn2.u;
1411
1412	switch (s_aXchgWorkers[i].cb)
1413	{
1414	case 1:
1415	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1416	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1417	break;
1418	case 2:
1419	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1420	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1421	break;
1422	case 4:
1423	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1424	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1425	break;
1426	case 8:
1427	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1428	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1429	break;
1430	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1431	}
1432
1433	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
1434	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1435	}
1436	}
1437
1438
1439	/*
1440	* XADD
1441	*/
1442	static void XaddTest(void)
1443	{
1444	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1445	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
1446	static struct \
1447	{ \
1448	const char *pszName; \
1449	FNIEMAIMPLXADDU ## a_cBits *pfn; \
1450	BINU ## a_cBits ## _TEST_T const *paTests; \
1451	uint32_t const *pcTests; \
1452	} const s_aFuncs[] = \
1453	{ \
1454	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1455	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1456	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1457	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1458	}; \
1459	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1460	{ \
1461	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1462	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1463	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1464	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1465	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1466	{ \
1467	uint32_t fEfl = paTests[iTest].fEflIn; \
1468	a_Type uSrc = paTests[iTest].uSrcIn; \
1469	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1470	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1471	if ( fEfl != paTests[iTest].fEflOut \
1472	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
1473	\|\| uSrc != paTests[iTest].uDstIn) \
1474	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1475	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1476	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1477	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1478	} \
1479	} \
1480	} while(0)
1481	TEST_XADD(8, uint8_t, "%#04x");
1482	TEST_XADD(16, uint16_t, "%#06x");
1483	TEST_XADD(32, uint32_t, "%#010RX32");
1484	TEST_XADD(64, uint64_t, "%#010RX64");
1485	}
1486
1487
1488	/*
1489	* CMPXCHG
1490	*/
1491
1492	static void CmpXchgTest(void)
1493	{
1494	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1495	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
1496	static struct \
1497	{ \
1498	const char *pszName; \
1499	FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1500	PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1501	BINU ## a_cBits ## _TEST_T const *paTests; \
1502	uint32_t const *pcTests; \
1503	} const s_aFuncs[] = \
1504	{ \
1505	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1506	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1507	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1508	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1509	}; \
1510	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1511	{ \
1512	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1513	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1514	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1515	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1516	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1517	{ \
1518	/* as is (99% likely to be negative). */ \
1519	uint32_t fEfl = paTests[iTest].fEflIn; \
1520	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1521	a_Type uA = paTests[iTest].uDstIn; \
1522	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1523	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1524	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1525	if ( fEfl != paTests[iTest].fEflOut \
1526	\|\| *g_pu ## a_cBits != uExpect \
1527	\|\| uA != paTests[iTest].uSrcIn) \
1528	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1529	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1530	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1531	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1532	/* positive */ \
1533	uint32_t fEflExpect = paTests[iTest].fEflIn; \
1534	uA = paTests[iTest].uDstIn; \
1535	s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1536	fEfl = paTests[iTest].fEflIn; \
1537	uA = paTests[iTest].uDstIn; \
1538	*g_pu ## a_cBits = uA; \
1539	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1540	if ( fEfl != fEflExpect \
1541	\|\| *g_pu ## a_cBits != uNew \
1542	\|\| uA != paTests[iTest].uDstIn) \
1543	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1544	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1545	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1546	EFlagsDiff(fEfl, fEflExpect)); \
1547	} \
1548	} \
1549	} while(0)
1550	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1551	TEST_CMPXCHG(16, uint16_t, "%#06x");
1552	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1553	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1554	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1555	#endif
1556	}
1557
1558	static void CmpXchg8bTest(void)
1559	{
1560	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
1561	static struct
1562	{
1563	const char *pszName;
1564	FNIEMAIMPLCMPXCHG8B *pfn;
1565	} const s_aFuncs[] =
1566	{
1567	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
1568	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1569	};
1570	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1571	{
1572	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1573	continue;
1574	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1575	{
1576	uint64_t const uOldValue = RandU64();
1577	uint64_t const uNewValue = RandU64();
1578
1579	/* positive test. */
1580	RTUINT64U uA, uB;
1581	uB.u = uNewValue;
1582	uA.u = uOldValue;
1583	*g_pu64 = uOldValue;
1584	uint32_t fEflIn = RandEFlags();
1585	uint32_t fEfl = fEflIn;
1586	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1587	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1588	\|\| *g_pu64 != uNewValue
1589	\|\| uA.u != uOldValue)
1590	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1591	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1592	fEfl, *g_pu64, uA.u,
1593	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1594	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1595
1596	/* negative */
1597	uint64_t const uExpect = ~uOldValue;
1598	*g_pu64 = uExpect;
1599	uA.u = uOldValue;
1600	uB.u = uNewValue;
1601	fEfl = fEflIn = RandEFlags();
1602	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1603	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1604	\|\| *g_pu64 != uExpect
1605	\|\| uA.u != uExpect)
1606	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1607	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1608	fEfl, *g_pu64, uA.u,
1609	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1610	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1611	}
1612	}
1613	}
1614
1615	static void CmpXchg16bTest(void)
1616	{
1617	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1618	static struct
1619	{
1620	const char *pszName;
1621	FNIEMAIMPLCMPXCHG16B *pfn;
1622	} const s_aFuncs[] =
1623	{
1624	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
1625	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1626	#if !defined(RT_ARCH_ARM64)
1627	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1628	#endif
1629	};
1630	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1631	{
1632	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1633	continue;
1634	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1635	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1636	{
1637	RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1638	continue;
1639	}
1640	#endif
1641	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1642	{
1643	RTUINT128U const uOldValue = RandU128();
1644	RTUINT128U const uNewValue = RandU128();
1645
1646	/* positive test. */
1647	RTUINT128U uA, uB;
1648	uB = uNewValue;
1649	uA = uOldValue;
1650	*g_pu128 = uOldValue;
1651	uint32_t fEflIn = RandEFlags();
1652	uint32_t fEfl = fEflIn;
1653	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1654	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1655	\|\| g_pu128->s.Lo != uNewValue.s.Lo
1656	\|\| g_pu128->s.Hi != uNewValue.s.Hi
1657	\|\| uA.s.Lo != uOldValue.s.Lo
1658	\|\| uA.s.Hi != uOldValue.s.Hi)
1659	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1660	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1661	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1662	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1663	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1664	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1665	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1666	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1667
1668	/* negative */
1669	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1670	*g_pu128 = uExpect;
1671	uA = uOldValue;
1672	uB = uNewValue;
1673	fEfl = fEflIn = RandEFlags();
1674	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1675	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1676	\|\| g_pu128->s.Lo != uExpect.s.Lo
1677	\|\| g_pu128->s.Hi != uExpect.s.Hi
1678	\|\| uA.s.Lo != uExpect.s.Lo
1679	\|\| uA.s.Hi != uExpect.s.Hi)
1680	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1681	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1682	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1683	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1684	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1685	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1686	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1687	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1688	}
1689	}
1690	}
1691
1692
1693	/*
1694	* Double shifts.
1695	*
1696	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1697	*/
1698	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1699	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1700	void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1701	{ \
1702	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1703	{ \
1704	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1705	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1706	continue; \
1707	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1708	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1709	{ \
1710	a_TestType Test; \
1711	Test.fEflIn = RandEFlags(); \
1712	Test.fEflOut = Test.fEflIn; \
1713	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1714	Test.uDstOut = Test.uDstIn; \
1715	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1716	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1717	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1718	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1719	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1720	} \
1721	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1722	} \
1723	}
1724	#else
1725	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1726	#endif
1727
1728	#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1729	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1730	\
1731	static a_SubTestType const a_aSubTests[] = \
1732	{ \
1733	ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1734	ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1735	ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1736	ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1737	}; \
1738	\
1739	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1740	\
1741	static void ShiftDblU ## a_cBits ## Test(void) \
1742	{ \
1743	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1744	{ \
1745	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1746	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1747	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1748	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1749	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1750	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1751	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1752	{ \
1753	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1754	{ \
1755	uint32_t fEfl = paTests[iTest].fEflIn; \
1756	a_Type uDst = paTests[iTest].uDstIn; \
1757	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1758	if ( uDst != paTests[iTest].uDstOut \
1759	\|\| fEfl != paTests[iTest].fEflOut) \
1760	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1761	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1762	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1763	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1764	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1765	else \
1766	{ \
1767	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1768	*g_pfEfl = paTests[iTest].fEflIn; \
1769	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1770	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1771	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1772	} \
1773	} \
1774	pfn = a_aSubTests[iFn].pfnNative; \
1775	} \
1776	} \
1777	}
1778	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1779	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1780	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1781
1782	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1783	static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1784	{
1785	ShiftDblU16Generate(pOut, cTests);
1786	ShiftDblU32Generate(pOut, cTests);
1787	ShiftDblU64Generate(pOut, cTests);
1788	}
1789	#endif
1790
1791	static void ShiftDblTest(void)
1792	{
1793	ShiftDblU16Test();
1794	ShiftDblU32Test();
1795	ShiftDblU64Test();
1796	}
1797
1798
1799	/*
1800	* Unary operators.
1801	*
1802	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1803	*/
1804	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1805	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1806	void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1807	{ \
1808	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1809	{ \
1810	GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1811	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1812	{ \
1813	a_TestType Test; \
1814	Test.fEflIn = RandEFlags(); \
1815	Test.fEflOut = Test.fEflIn; \
1816	Test.uDstIn = RandU ## a_cBits(); \
1817	Test.uDstOut = Test.uDstIn; \
1818	Test.uSrcIn = 0; \
1819	Test.uMisc = 0; \
1820	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1821	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1822	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1823	} \
1824	GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1825	} \
1826	}
1827	#else
1828	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1829	#endif
1830
1831	#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1832	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1833	static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1834	{ \
1835	ENTRY(inc_u ## a_cBits), \
1836	ENTRY(inc_u ## a_cBits ## _locked), \
1837	ENTRY(dec_u ## a_cBits), \
1838	ENTRY(dec_u ## a_cBits ## _locked), \
1839	ENTRY(not_u ## a_cBits), \
1840	ENTRY(not_u ## a_cBits ## _locked), \
1841	ENTRY(neg_u ## a_cBits), \
1842	ENTRY(neg_u ## a_cBits ## _locked), \
1843	}; \
1844	\
1845	GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1846	\
1847	static void UnaryU ## a_cBits ## Test(void) \
1848	{ \
1849	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1850	{ \
1851	if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1852	a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1853	uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1854	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1855	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1856	{ \
1857	uint32_t fEfl = paTests[iTest].fEflIn; \
1858	a_Type uDst = paTests[iTest].uDstIn; \
1859	g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1860	if ( uDst != paTests[iTest].uDstOut \
1861	\|\| fEfl != paTests[iTest].fEflOut) \
1862	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1863	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1864	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1865	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1866	else \
1867	{ \
1868	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1869	*g_pfEfl = paTests[iTest].fEflIn; \
1870	g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1871	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1872	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1873	} \
1874	} \
1875	} \
1876	}
1877	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1878	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1879	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1880	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1881
1882	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1883	static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1884	{
1885	UnaryU8Generate(pOut, cTests);
1886	UnaryU16Generate(pOut, cTests);
1887	UnaryU32Generate(pOut, cTests);
1888	UnaryU64Generate(pOut, cTests);
1889	}
1890	#endif
1891
1892	static void UnaryTest(void)
1893	{
1894	UnaryU8Test();
1895	UnaryU16Test();
1896	UnaryU32Test();
1897	UnaryU64Test();
1898	}
1899
1900
1901	/*
1902	* Shifts.
1903	*
1904	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
1905	*/
1906	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1907	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1908	void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1909	{ \
1910	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1911	{ \
1912	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1913	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1914	continue; \
1915	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1916	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1917	{ \
1918	a_TestType Test; \
1919	Test.fEflIn = RandEFlags(); \
1920	Test.fEflOut = Test.fEflIn; \
1921	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1922	Test.uDstOut = Test.uDstIn; \
1923	Test.uSrcIn = 0; \
1924	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1925	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1926	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
1927	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1928	\
1929	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
1930	Test.fEflOut = Test.fEflIn; \
1931	Test.uDstOut = Test.uDstIn; \
1932	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1933	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
1934	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1935	} \
1936	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1937	} \
1938	}
1939	#else
1940	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1941	#endif
1942
1943	#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1944	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
1945	static a_SubTestType const a_aSubTests[] = \
1946	{ \
1947	ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
1948	ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
1949	ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
1950	ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
1951	ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
1952	ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
1953	ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
1954	ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
1955	ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1956	ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1957	ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1958	ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1959	ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1960	ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1961	}; \
1962	\
1963	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1964	\
1965	static void ShiftU ## a_cBits ## Test(void) \
1966	{ \
1967	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1968	{ \
1969	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1970	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1971	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1972	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1973	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1974	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1975	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1976	{ \
1977	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1978	{ \
1979	uint32_t fEfl = paTests[iTest].fEflIn; \
1980	a_Type uDst = paTests[iTest].uDstIn; \
1981	pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
1982	if ( uDst != paTests[iTest].uDstOut \
1983	\|\| fEfl != paTests[iTest].fEflOut ) \
1984	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1985	iTest, iVar == 0 ? "" : "/n", \
1986	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
1987	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1988	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1989	else \
1990	{ \
1991	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1992	*g_pfEfl = paTests[iTest].fEflIn; \
1993	pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
1994	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1995	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1996	} \
1997	} \
1998	pfn = a_aSubTests[iFn].pfnNative; \
1999	} \
2000	} \
2001	}
2002	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2003	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2004	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2005	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2006
2007	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2008	static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2009	{
2010	ShiftU8Generate(pOut, cTests);
2011	ShiftU16Generate(pOut, cTests);
2012	ShiftU32Generate(pOut, cTests);
2013	ShiftU64Generate(pOut, cTests);
2014	}
2015	#endif
2016
2017	static void ShiftTest(void)
2018	{
2019	ShiftU8Test();
2020	ShiftU16Test();
2021	ShiftU32Test();
2022	ShiftU64Test();
2023	}
2024
2025
2026	/*
2027	* Multiplication and division.
2028	*
2029	* Note! The 8-bit functions has a different format, so we need to duplicate things.
2030	* Note! Currently ignoring undefined bits.
2031	*/
2032
2033	/* U8 */
2034	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2035	static INT_MULDIV_U8_T const g_aMulDivU8[] =
2036	{
2037	ENTRY_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2038	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2039	ENTRY_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2040	ENTRY_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2041	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2042	ENTRY_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2043	ENTRY_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2044	ENTRY_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2045	ENTRY_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2046	ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2047	};
2048
2049	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2050	static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2051	{
2052	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2053	{
2054	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2055	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2056	continue;
2057	GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2058	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2059	{
2060	MULDIVU8_TEST_T Test;
2061	Test.fEflIn = RandEFlags();
2062	Test.fEflOut = Test.fEflIn;
2063	Test.uDstIn = RandU16Dst(iTest);
2064	Test.uDstOut = Test.uDstIn;
2065	Test.uSrcIn = RandU8Src(iTest);
2066	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2067	RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2068	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2069	}
2070	GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2071	}
2072	}
2073	#endif
2074
2075	static void MulDivU8Test(void)
2076	{
2077	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2078	{
2079	if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2080	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2081	uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2082	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2083	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2084	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2085	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2086	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2087	{
2088	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2089	{
2090	uint32_t fEfl = paTests[iTest].fEflIn;
2091	uint16_t uDst = paTests[iTest].uDstIn;
2092	int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2093	if ( uDst != paTests[iTest].uDstOut
2094	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
2095	\|\| rc != paTests[iTest].rc)
2096	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2097	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2098	"%sexpected %#08x %#06RX16 %d%s\n",
2099	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2100	iVar ? " " : "", fEfl, uDst, rc,
2101	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2102	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
2103	else
2104	{
2105	*g_pu16 = paTests[iTest].uDstIn;
2106	*g_pfEfl = paTests[iTest].fEflIn;
2107	rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2108	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2109	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
2110	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2111	}
2112	}
2113	pfn = g_aMulDivU8[iFn].pfnNative;
2114	}
2115	}
2116	}
2117
2118	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2119	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2120	void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2121	{ \
2122	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2123	{ \
2124	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2125	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2126	continue; \
2127	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2128	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2129	{ \
2130	a_TestType Test; \
2131	Test.fEflIn = RandEFlags(); \
2132	Test.fEflOut = Test.fEflIn; \
2133	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2134	Test.uDst1Out = Test.uDst1In; \
2135	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2136	Test.uDst2Out = Test.uDst2In; \
2137	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2138	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2139	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2140	Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2141	Test.rc, iTest); \
2142	} \
2143	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2144	} \
2145	}
2146	#else
2147	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2148	#endif
2149
2150	#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2151	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2152	static a_SubTestType const a_aSubTests [] = \
2153	{ \
2154	ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2155	ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2156	ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2157	ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2158	ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2159	ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2160	ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2161	ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2162	}; \
2163	\
2164	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2165	\
2166	static void MulDivU ## a_cBits ## Test(void) \
2167	{ \
2168	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2169	{ \
2170	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2171	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2172	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2173	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2174	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2175	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2176	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2177	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2178	{ \
2179	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2180	{ \
2181	uint32_t fEfl = paTests[iTest].fEflIn; \
2182	a_Type uDst1 = paTests[iTest].uDst1In; \
2183	a_Type uDst2 = paTests[iTest].uDst2In; \
2184	int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2185	if ( uDst1 != paTests[iTest].uDst1Out \
2186	\|\| uDst2 != paTests[iTest].uDst2Out \
2187	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
2188	\|\| rc != paTests[iTest].rc) \
2189	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2190	" -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2191	"expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2192	iTest, iVar == 0 ? "" : "/n", \
2193	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2194	fEfl, uDst1, uDst2, rc, \
2195	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2196	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
2197	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2198	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
2199	else \
2200	{ \
2201	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
2202	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2203	*g_pfEfl = paTests[iTest].fEflIn; \
2204	rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2205	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2206	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2207	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
2208	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2209	} \
2210	} \
2211	pfn = a_aSubTests[iFn].pfnNative; \
2212	} \
2213	} \
2214	}
2215	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2216	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2217	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2218
2219	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2220	static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2221	{
2222	MulDivU8Generate(pOut, cTests);
2223	MulDivU16Generate(pOut, cTests);
2224	MulDivU32Generate(pOut, cTests);
2225	MulDivU64Generate(pOut, cTests);
2226	}
2227	#endif
2228
2229	static void MulDivTest(void)
2230	{
2231	MulDivU8Test();
2232	MulDivU16Test();
2233	MulDivU32Test();
2234	MulDivU64Test();
2235	}
2236
2237
2238	/*
2239	* BSWAP
2240	*/
2241	static void BswapTest(void)
2242	{
2243	if (SubTestAndCheckIfEnabled("bswap_u16"))
2244	{
2245	*g_pu32 = UINT32_C(0x12345678);
2246	iemAImpl_bswap_u16(g_pu32);
2247	#if 0
2248	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2249	#else
2250	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2251	#endif
2252	*g_pu32 = UINT32_C(0xffff1122);
2253	iemAImpl_bswap_u16(g_pu32);
2254	#if 0
2255	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2256	#else
2257	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2258	#endif
2259	}
2260
2261	if (SubTestAndCheckIfEnabled("bswap_u32"))
2262	{
2263	*g_pu32 = UINT32_C(0x12345678);
2264	iemAImpl_bswap_u32(g_pu32);
2265	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2266	}
2267
2268	if (SubTestAndCheckIfEnabled("bswap_u64"))
2269	{
2270	*g_pu64 = UINT64_C(0x0123456789abcdef);
2271	iemAImpl_bswap_u64(g_pu64);
2272	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2273	}
2274	}
2275
2276
2277
2278	/*********************************************************************************************************************************
2279	* Floating point (x87 style) *
2280	*********************************************************************************************************************************/
2281
2282	/*
2283	* FPU constant loading.
2284	*/
2285	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2286
2287	static const FPU_LD_CONST_T g_aFpuLdConst[] =
2288	{
2289	ENTRY(fld1),
2290	ENTRY(fldl2t),
2291	ENTRY(fldl2e),
2292	ENTRY(fldpi),
2293	ENTRY(fldlg2),
2294	ENTRY(fldln2),
2295	ENTRY(fldz),
2296	};
2297
2298	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2299	static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2300	{
2301	X86FXSTATE State;
2302	RT_ZERO(State);
2303	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2304	{
2305	GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2306	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2307	{
2308	State.FCW = RandFcw();
2309	State.FSW = RandFsw();
2310
2311	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2312	{
2313	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2314	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2315	g_aFpuLdConst[iFn].pfn(&State, &Res);
2316	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2317	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2318	}
2319	}
2320	GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2321	}
2322	}
2323	#endif
2324
2325	static void FpuLoadConstTest(void)
2326	{
2327	/*
2328	* Inputs:
2329	* - FSW: C0, C1, C2, C3
2330	* - FCW: Exception masks, Precision control, Rounding control.
2331	*
2332	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2333	*/
2334	X86FXSTATE State;
2335	RT_ZERO(State);
2336	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2337	{
2338	if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2339	continue;
2340
2341	uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2342	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2343	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2344	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2345	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2346	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2347	{
2348	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2349	{
2350	State.FCW = paTests[iTest].fFcw;
2351	State.FSW = paTests[iTest].fFswIn;
2352	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2353	pfn(&State, &Res);
2354	if ( Res.FSW != paTests[iTest].fFswOut
2355	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2356	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2357	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2358	Res.FSW, FormatR80(&Res.r80Result),
2359	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2360	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2361	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2362	FormatFcw(paTests[iTest].fFcw) );
2363	}
2364	pfn = g_aFpuLdConst[iFn].pfnNative;
2365	}
2366	}
2367	}
2368
2369
2370	/*
2371	* Load floating point values from memory.
2372	*/
2373	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2374	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2375	static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2376	{ \
2377	X86FXSTATE State; \
2378	RT_ZERO(State); \
2379	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2380	{ \
2381	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2382	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2383	{ \
2384	State.FCW = RandFcw(); \
2385	State.FSW = RandFsw(); \
2386	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2387	\
2388	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2389	{ \
2390	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2391	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2392	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2393	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2394	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2395	GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2396	} \
2397	} \
2398	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2399	} \
2400	}
2401	#else
2402	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2403	#endif
2404
2405	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2406	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2407	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2408	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2409	\
2410	static const a_SubTestType a_aSubTests[] = \
2411	{ \
2412	ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2413	}; \
2414	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2415	\
2416	static void FpuLdR ## a_cBits ## Test(void) \
2417	{ \
2418	X86FXSTATE State; \
2419	RT_ZERO(State); \
2420	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2421	{ \
2422	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2423	\
2424	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2425	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2426	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2427	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2428	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2429	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2430	{ \
2431	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2432	{ \
2433	a_rdTypeIn const InVal = paTests[iTest].InVal; \
2434	State.FCW = paTests[iTest].fFcw; \
2435	State.FSW = paTests[iTest].fFswIn; \
2436	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2437	pfn(&State, &Res, &InVal); \
2438	if ( Res.FSW != paTests[iTest].fFswOut \
2439	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2440	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2441	"%s -> fsw=%#06x %s\n" \
2442	"%s expected %#06x %s%s%s (%s)\n", \
2443	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2444	FormatR ## a_cBits(&paTests[iTest].InVal), \
2445	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2446	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2447	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2448	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2449	FormatFcw(paTests[iTest].fFcw) ); \
2450	} \
2451	pfn = a_aSubTests[iFn].pfnNative; \
2452	} \
2453	} \
2454	}
2455
2456	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2457	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2458	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2459
2460	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2461	static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2462	{
2463	FpuLdR80Generate(pOut, cTests);
2464	FpuLdR64Generate(pOut, cTests);
2465	FpuLdR32Generate(pOut, cTests);
2466	}
2467	#endif
2468
2469	static void FpuLdMemTest(void)
2470	{
2471	FpuLdR80Test();
2472	FpuLdR64Test();
2473	FpuLdR32Test();
2474	}
2475
2476
2477	/*
2478	* Load integer values from memory.
2479	*/
2480	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2481	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2482	static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2483	{ \
2484	X86FXSTATE State; \
2485	RT_ZERO(State); \
2486	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2487	{ \
2488	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2489	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2490	{ \
2491	State.FCW = RandFcw(); \
2492	State.FSW = RandFsw(); \
2493	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2494	\
2495	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2496	{ \
2497	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2498	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2499	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2500	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2501	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2502	} \
2503	} \
2504	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2505	} \
2506	}
2507	#else
2508	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2509	#endif
2510
2511	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2512	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2513	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2514	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2515	\
2516	static const a_SubTestType a_aSubTests[] = \
2517	{ \
2518	ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2519	}; \
2520	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2521	\
2522	static void FpuLdI ## a_cBits ## Test(void) \
2523	{ \
2524	X86FXSTATE State; \
2525	RT_ZERO(State); \
2526	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2527	{ \
2528	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2529	\
2530	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2531	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2532	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2533	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2534	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2535	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2536	{ \
2537	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2538	{ \
2539	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2540	State.FCW = paTests[iTest].fFcw; \
2541	State.FSW = paTests[iTest].fFswIn; \
2542	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2543	pfn(&State, &Res, &iInVal); \
2544	if ( Res.FSW != paTests[iTest].fFswOut \
2545	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2546	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2547	"%s -> fsw=%#06x %s\n" \
2548	"%s expected %#06x %s%s%s (%s)\n", \
2549	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2550	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2551	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2552	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2553	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2554	FormatFcw(paTests[iTest].fFcw) ); \
2555	} \
2556	pfn = a_aSubTests[iFn].pfnNative; \
2557	} \
2558	} \
2559	}
2560
2561	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2562	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2563	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2564
2565	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2566	static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2567	{
2568	FpuLdI64Generate(pOut, cTests);
2569	FpuLdI32Generate(pOut, cTests);
2570	FpuLdI16Generate(pOut, cTests);
2571	}
2572	#endif
2573
2574	static void FpuLdIntTest(void)
2575	{
2576	FpuLdI64Test();
2577	FpuLdI32Test();
2578	FpuLdI16Test();
2579	}
2580
2581
2582	/*
2583	* Load binary coded decimal values from memory.
2584	*/
2585	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2586	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2587	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2588
2589	static const FPU_LD_D80_T g_aFpuLdD80[] =
2590	{
2591	ENTRY(fld_r80_from_d80)
2592	};
2593
2594	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2595	static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2596	{
2597	X86FXSTATE State;
2598	RT_ZERO(State);
2599	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2600	{
2601	GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2602	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2603	{
2604	State.FCW = RandFcw();
2605	State.FSW = RandFsw();
2606	RTPBCD80U InVal = RandD80Src(iTest);
2607
2608	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2609	{
2610	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2611	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2612	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2613	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2614	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2615	iTest, iRounding);
2616	}
2617	}
2618	GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2619	}
2620	}
2621	#endif
2622
2623	static void FpuLdD80Test(void)
2624	{
2625	X86FXSTATE State;
2626	RT_ZERO(State);
2627	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2628	{
2629	if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2630	continue;
2631
2632	uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2633	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2634	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2635	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2636	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2637	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2638	{
2639	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2640	{
2641	RTPBCD80U const InVal = paTests[iTest].InVal;
2642	State.FCW = paTests[iTest].fFcw;
2643	State.FSW = paTests[iTest].fFswIn;
2644	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2645	pfn(&State, &Res, &InVal);
2646	if ( Res.FSW != paTests[iTest].fFswOut
2647	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2648	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2649	"%s -> fsw=%#06x %s\n"
2650	"%s expected %#06x %s%s%s (%s)\n",
2651	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2652	FormatD80(&paTests[iTest].InVal),
2653	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2654	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2655	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2656	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2657	FormatFcw(paTests[iTest].fFcw) );
2658	}
2659	pfn = g_aFpuLdD80[iFn].pfnNative;
2660	}
2661	}
2662	}
2663
2664
2665	/*
2666	* Store values floating point values to memory.
2667	*/
2668	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2669	static const RTFLOAT80U g_aFpuStR32Specials[] =
2670	{
2671	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2672	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2673	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2674	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2675	};
2676	static const RTFLOAT80U g_aFpuStR64Specials[] =
2677	{
2678	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2679	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2680	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2681	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2682	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2683	};
2684	static const RTFLOAT80U g_aFpuStR80Specials[] =
2685	{
2686	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2687	};
2688	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2689	static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2690	{ \
2691	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2692	X86FXSTATE State; \
2693	RT_ZERO(State); \
2694	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2695	{ \
2696	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2697	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2698	{ \
2699	uint16_t const fFcw = RandFcw(); \
2700	State.FSW = RandFsw(); \
2701	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2702	: g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2703	\
2704	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2705	{ \
2706	/* PC doesn't influence these, so leave as is. */ \
2707	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2708	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2709	{ \
2710	uint16_t uFswOut = 0; \
2711	a_rdType OutVal; \
2712	RT_ZERO(OutVal); \
2713	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2714	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2715	\| (iRounding << X86_FCW_RC_SHIFT); \
2716	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2717	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2718	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2719	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2720	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2721	GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2722	} \
2723	} \
2724	} \
2725	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2726	} \
2727	}
2728	#else
2729	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2730	#endif
2731
2732	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2733	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2734	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2735	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2736	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2737	\
2738	static const a_SubTestType a_aSubTests[] = \
2739	{ \
2740	ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2741	}; \
2742	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2743	\
2744	static void FpuStR ## a_cBits ## Test(void) \
2745	{ \
2746	X86FXSTATE State; \
2747	RT_ZERO(State); \
2748	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2749	{ \
2750	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2751	\
2752	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2753	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2754	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2755	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2756	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2757	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2758	{ \
2759	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2760	{ \
2761	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2762	uint16_t uFswOut = 0; \
2763	a_rdType OutVal; \
2764	RT_ZERO(OutVal); \
2765	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2766	State.FCW = paTests[iTest].fFcw; \
2767	State.FSW = paTests[iTest].fFswIn; \
2768	pfn(&State, &uFswOut, &OutVal, &InVal); \
2769	if ( uFswOut != paTests[iTest].fFswOut \
2770	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2771	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2772	"%s -> fsw=%#06x %s\n" \
2773	"%s expected %#06x %s%s%s (%s)\n", \
2774	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2775	FormatR80(&paTests[iTest].InVal), \
2776	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2777	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2778	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2779	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2780	FormatFcw(paTests[iTest].fFcw) ); \
2781	} \
2782	pfn = a_aSubTests[iFn].pfnNative; \
2783	} \
2784	} \
2785	}
2786
2787	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2788	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2789	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2790
2791	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2792	static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2793	{
2794	FpuStR80Generate(pOut, cTests);
2795	FpuStR64Generate(pOut, cTests);
2796	FpuStR32Generate(pOut, cTests);
2797	}
2798	#endif
2799
2800	static void FpuStMemTest(void)
2801	{
2802	FpuStR80Test();
2803	FpuStR64Test();
2804	FpuStR32Test();
2805	}
2806
2807
2808	/*
2809	* Store integer values to memory or register.
2810	*/
2811	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2812	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2813	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2814
2815	static const FPU_ST_I16_T g_aFpuStI16[] =
2816	{
2817	ENTRY(fist_r80_to_i16),
2818	ENTRY_AMD( fistt_r80_to_i16, 0),
2819	ENTRY_INTEL(fistt_r80_to_i16, 0),
2820	};
2821	static const FPU_ST_I32_T g_aFpuStI32[] =
2822	{
2823	ENTRY(fist_r80_to_i32),
2824	ENTRY(fistt_r80_to_i32),
2825	};
2826	static const FPU_ST_I64_T g_aFpuStI64[] =
2827	{
2828	ENTRY(fist_r80_to_i64),
2829	ENTRY(fistt_r80_to_i64),
2830	};
2831
2832	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2833	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2834	{
2835	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2836	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2837	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2838	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2839	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2840	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2841	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2842	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2843	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2844	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2845	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2846	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2847	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2848	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2849	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2850	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2851	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2852	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2853	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2854	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2855	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2856	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2857	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2858	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2859	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2860	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2861	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2862	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2863	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2864	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2865	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2866	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2867	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2868	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2869	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2870	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2871	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2872	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2873	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2874	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2875	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2876	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2877	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2878	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2879	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2880	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2881	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2882	};
2883	static const RTFLOAT80U g_aFpuStI32Specials[] =
2884	{
2885	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2886	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2887	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2888	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2889	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2890	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2891	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2892	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2893	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2894	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2895	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2896	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2897	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2898	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2899	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2900	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2901	};
2902	static const RTFLOAT80U g_aFpuStI64Specials[] =
2903	{
2904	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
2905	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
2906	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2907	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2908	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2909	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2910	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2911	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
2912	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2913	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2914	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2915	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2916	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2917	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2918	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2919	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2920	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
2921	};
2922
2923	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2924	static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
2925	{ \
2926	X86FXSTATE State; \
2927	RT_ZERO(State); \
2928	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2929	{ \
2930	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
2931	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
2932	PRTSTREAM pOutFn = pOut; \
2933	if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
2934	{ \
2935	if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2936	continue; \
2937	pOutFn = pOutCpu; \
2938	} \
2939	\
2940	GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
2941	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
2942	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2943	{ \
2944	uint16_t const fFcw = RandFcw(); \
2945	State.FSW = RandFsw(); \
2946	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
2947	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
2948	\
2949	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2950	{ \
2951	/* PC doesn't influence these, so leave as is. */ \
2952	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2953	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2954	{ \
2955	uint16_t uFswOut = 0; \
2956	a_iType iOutVal = ~(a_iType)2; \
2957	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2958	\| (iRounding << X86_FCW_RC_SHIFT); \
2959	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2960	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2961	pfn(&State, &uFswOut, &iOutVal, &InVal); \
2962	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2963	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2964	GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
2965	} \
2966	} \
2967	} \
2968	GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
2969	} \
2970	}
2971	#else
2972	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
2973	#endif
2974
2975	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
2976	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2977	\
2978	static void FpuStI ## a_cBits ## Test(void) \
2979	{ \
2980	X86FXSTATE State; \
2981	RT_ZERO(State); \
2982	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2983	{ \
2984	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2985	\
2986	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2987	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2988	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2989	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2990	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2991	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2992	{ \
2993	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2994	{ \
2995	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2996	uint16_t uFswOut = 0; \
2997	a_iType iOutVal = ~(a_iType)2; \
2998	State.FCW = paTests[iTest].fFcw; \
2999	State.FSW = paTests[iTest].fFswIn; \
3000	pfn(&State, &uFswOut, &iOutVal, &InVal); \
3001	if ( uFswOut != paTests[iTest].fFswOut \
3002	\|\| iOutVal != paTests[iTest].iOutVal) \
3003	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3004	"%s -> fsw=%#06x " a_szFmt "\n" \
3005	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3006	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3007	FormatR80(&paTests[iTest].InVal), \
3008	iVar ? " " : "", uFswOut, iOutVal, \
3009	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3010	FswDiff(uFswOut, paTests[iTest].fFswOut), \
3011	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3012	} \
3013	pfn = a_aSubTests[iFn].pfnNative; \
3014	} \
3015	} \
3016	}
3017
3018	//fistt_r80_to_i16 diffs for AMD, of course :-)
3019
3020	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3021	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3022	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3023
3024	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3025	static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3026	{
3027	FpuStI64Generate(pOut, pOutCpu, cTests);
3028	FpuStI32Generate(pOut, pOutCpu, cTests);
3029	FpuStI16Generate(pOut, pOutCpu, cTests);
3030	}
3031	#endif
3032
3033	static void FpuStIntTest(void)
3034	{
3035	FpuStI64Test();
3036	FpuStI32Test();
3037	FpuStI16Test();
3038	}
3039
3040
3041	/*
3042	* Store as packed BCD value (memory).
3043	*/
3044	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3045	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3046	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3047
3048	static const FPU_ST_D80_T g_aFpuStD80[] =
3049	{
3050	ENTRY(fst_r80_to_d80),
3051	};
3052
3053	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3054	static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3055	{
3056	static RTFLOAT80U const s_aSpecials[] =
3057	{
3058	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3059	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3060	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3061	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3062	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3063	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3064	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3065	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3066	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3067	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3068	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3069	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3070	};
3071
3072	X86FXSTATE State;
3073	RT_ZERO(State);
3074	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3075	{
3076	GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3077	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3078	{
3079	uint16_t const fFcw = RandFcw();
3080	State.FSW = RandFsw();
3081	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3082
3083	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3084	{
3085	/* PC doesn't influence these, so leave as is. */
3086	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3087	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
3088	{
3089	uint16_t uFswOut = 0;
3090	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3091	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
3092	\| (iRounding << X86_FCW_RC_SHIFT);
3093	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
3094	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
3095	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3096	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3097	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3098	GenFormatD80(&OutVal), iTest, iRounding, iMask);
3099	}
3100	}
3101	}
3102	GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3103	}
3104	}
3105	#endif
3106
3107
3108	static void FpuStD80Test(void)
3109	{
3110	X86FXSTATE State;
3111	RT_ZERO(State);
3112	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3113	{
3114	if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3115	continue;
3116
3117	uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3118	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3119	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3120	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3121	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3122	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3123	{
3124	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3125	{
3126	RTFLOAT80U const InVal = paTests[iTest].InVal;
3127	uint16_t uFswOut = 0;
3128	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3129	State.FCW = paTests[iTest].fFcw;
3130	State.FSW = paTests[iTest].fFswIn;
3131	pfn(&State, &uFswOut, &OutVal, &InVal);
3132	if ( uFswOut != paTests[iTest].fFswOut
3133	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3134	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3135	"%s -> fsw=%#06x %s\n"
3136	"%s expected %#06x %s%s%s (%s)\n",
3137	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3138	FormatR80(&paTests[iTest].InVal),
3139	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3140	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3141	FswDiff(uFswOut, paTests[iTest].fFswOut),
3142	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3143	FormatFcw(paTests[iTest].fFcw) );
3144	}
3145	pfn = g_aFpuStD80[iFn].pfnNative;
3146	}
3147	}
3148	}
3149
3150
3151
3152	/*********************************************************************************************************************************
3153	* x87 FPU Binary Operations *
3154	*********************************************************************************************************************************/
3155
3156	/*
3157	* Binary FPU operations on two 80-bit floating point values.
3158	*/
3159	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3160
3161	static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3162	{
3163	ENTRY(fadd_r80_by_r80),
3164	ENTRY(fsub_r80_by_r80),
3165	ENTRY(fsubr_r80_by_r80),
3166	ENTRY(fmul_r80_by_r80),
3167	ENTRY(fdiv_r80_by_r80),
3168	ENTRY(fdivr_r80_by_r80),
3169	ENTRY(fprem_r80_by_r80),
3170	ENTRY(fprem1_r80_by_r80),
3171	ENTRY(fscale_r80_by_r80),
3172	ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3173	ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3174	ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3175	ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3176	ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3177	ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3178	};
3179
3180	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3181	static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3182	{
3183	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3184
3185	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3186	{
3187	{ RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3188	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3189	{ RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3190	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3191	{ RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3192	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3193	};
3194
3195	X86FXSTATE State;
3196	RT_ZERO(State);
3197	uint32_t cMinNormalPairs = (cTests - 144) / 4;
3198	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3199	{
3200	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3201	PRTSTREAM pOutFn = pOut;
3202	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3203	{
3204	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3205	continue;
3206	pOutFn = pOutCpu;
3207	}
3208
3209	GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3210	uint32_t iTestOutput = 0;
3211	uint32_t cNormalInputPairs = 0;
3212	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3213	{
3214	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3215	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3216	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3217	cNormalInputPairs++;
3218	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3219	{
3220	iTest -= 1;
3221	continue;
3222	}
3223
3224	uint16_t const fFcwExtra = 0;
3225	uint16_t const fFcw = RandFcw();
3226	State.FSW = RandFsw();
3227
3228	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3229	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3230	{
3231	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3232	\| (iRounding << X86_FCW_RC_SHIFT)
3233	\| (iPrecision << X86_FCW_PC_SHIFT)
3234	\| X86_FCW_MASK_ALL;
3235	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3236	pfn(&State, &ResM, &InVal1, &InVal2);
3237	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3238	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3239	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3240
3241	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3242	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3243	pfn(&State, &ResU, &InVal1, &InVal2);
3244	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3245	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3246	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3247
3248	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3249	if (fXcpt)
3250	{
3251	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3252	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3253	pfn(&State, &Res1, &InVal1, &InVal2);
3254	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3255	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3256	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3257	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3258	{
3259	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
3260	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3261	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3262	pfn(&State, &Res2, &InVal1, &InVal2);
3263	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3264	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3265	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3266	}
3267	if (!RT_IS_POWER_OF_TWO(fXcpt))
3268	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3269	if (fUnmasked & fXcpt)
3270	{
3271	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
3272	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3273	pfn(&State, &Res3, &InVal1, &InVal2);
3274	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3275	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3276	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3277	}
3278	}
3279	}
3280	}
3281	GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3282	}
3283	}
3284	#endif
3285
3286
3287	static void FpuBinaryR80Test(void)
3288	{
3289	X86FXSTATE State;
3290	RT_ZERO(State);
3291	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3292	{
3293	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3294	continue;
3295
3296	uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3297	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3298	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3299	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3300	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3301	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3302	{
3303	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3304	{
3305	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3306	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3307	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3308	State.FCW = paTests[iTest].fFcw;
3309	State.FSW = paTests[iTest].fFswIn;
3310	pfn(&State, &Res, &InVal1, &InVal2);
3311	if ( Res.FSW != paTests[iTest].fFswOut
3312	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3313	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3314	"%s -> fsw=%#06x %s\n"
3315	"%s expected %#06x %s%s%s (%s)\n",
3316	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3317	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3318	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3319	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3320	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3321	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3322	FormatFcw(paTests[iTest].fFcw) );
3323	}
3324	pfn = g_aFpuBinaryR80[iFn].pfnNative;
3325	}
3326	}
3327	}
3328
3329
3330	/*
3331	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3332	*/
3333	#define int64_t_IS_NORMAL(a) 1
3334	#define int32_t_IS_NORMAL(a) 1
3335	#define int16_t_IS_NORMAL(a) 1
3336
3337	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3338	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3339	{
3340	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3341	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3342	};
3343	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3344	{
3345	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3346	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3347	};
3348	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3349	{
3350	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3351	};
3352	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3353	{
3354	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3355	};
3356
3357	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3358	static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3359	{ \
3360	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3361	\
3362	X86FXSTATE State; \
3363	RT_ZERO(State); \
3364	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3365	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3366	{ \
3367	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3368	uint32_t cNormalInputPairs = 0; \
3369	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3370	{ \
3371	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3372	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3373	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3374	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3375	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3376	cNormalInputPairs++; \
3377	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3378	{ \
3379	iTest -= 1; \
3380	continue; \
3381	} \
3382	\
3383	uint16_t const fFcw = RandFcw(); \
3384	State.FSW = RandFsw(); \
3385	\
3386	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3387	{ \
3388	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3389	{ \
3390	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3391	{ \
3392	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
3393	\| (iRounding << X86_FCW_RC_SHIFT) \
3394	\| (iPrecision << X86_FCW_PC_SHIFT) \
3395	\| iMask; \
3396	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3397	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3398	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3399	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3400	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3401	} \
3402	} \
3403	} \
3404	} \
3405	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3406	} \
3407	}
3408	#else
3409	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3410	#endif
3411
3412	#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3413	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3414	\
3415	static const a_SubTestType a_aSubTests[] = \
3416	{ \
3417	ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3418	ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3419	ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3420	ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3421	ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3422	ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3423	}; \
3424	\
3425	GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3426	\
3427	static void FpuBinary ## a_UpBits ## Test(void) \
3428	{ \
3429	X86FXSTATE State; \
3430	RT_ZERO(State); \
3431	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3432	{ \
3433	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3434	\
3435	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3436	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3437	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3438	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3439	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3440	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3441	{ \
3442	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3443	{ \
3444	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3445	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3446	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3447	State.FCW = paTests[iTest].fFcw; \
3448	State.FSW = paTests[iTest].fFswIn; \
3449	pfn(&State, &Res, &InVal1, &InVal2); \
3450	if ( Res.FSW != paTests[iTest].fFswOut \
3451	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3452	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3453	"%s -> fsw=%#06x %s\n" \
3454	"%s expected %#06x %s%s%s (%s)\n", \
3455	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3456	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3457	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3458	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3459	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3460	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3461	FormatFcw(paTests[iTest].fFcw) ); \
3462	} \
3463	pfn = a_aSubTests[iFn].pfnNative; \
3464	} \
3465	} \
3466	}
3467
3468	TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3469	TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3470	TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3471	TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3472
3473
3474	/*
3475	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3476	*/
3477	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3478	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3479	{
3480	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3481	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3482	};
3483	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3484	{
3485	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3486	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3487	};
3488	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3489	{
3490	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3491	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3492	};
3493	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3494	{
3495	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3496	};
3497	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3498	{
3499	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3500	};
3501
3502	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3503	static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3504	{ \
3505	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3506	\
3507	X86FXSTATE State; \
3508	RT_ZERO(State); \
3509	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3510	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3511	{ \
3512	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3513	uint32_t cNormalInputPairs = 0; \
3514	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3515	{ \
3516	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3517	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3518	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3519	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3520	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3521	cNormalInputPairs++; \
3522	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3523	{ \
3524	iTest -= 1; \
3525	continue; \
3526	} \
3527	\
3528	uint16_t const fFcw = RandFcw(); \
3529	State.FSW = RandFsw(); \
3530	\
3531	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3532	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3533	{ \
3534	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
3535	uint16_t fFswOut = 0; \
3536	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3537	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3538	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3539	iTest, iMask ? 'c' : 'u'); \
3540	} \
3541	} \
3542	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3543	} \
3544	}
3545	#else
3546	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3547	#endif
3548
3549	#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3550	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3551	\
3552	static const a_SubTestType a_aSubTests[] = \
3553	{ \
3554	__VA_ARGS__ \
3555	}; \
3556	\
3557	GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3558	\
3559	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3560	{ \
3561	X86FXSTATE State; \
3562	RT_ZERO(State); \
3563	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3564	{ \
3565	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3566	\
3567	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3568	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3569	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3570	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3571	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3572	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3573	{ \
3574	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3575	{ \
3576	uint16_t fFswOut = 0; \
3577	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3578	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3579	State.FCW = paTests[iTest].fFcw; \
3580	State.FSW = paTests[iTest].fFswIn; \
3581	pfn(&State, &fFswOut, &InVal1, &InVal2); \
3582	if (fFswOut != paTests[iTest].fFswOut) \
3583	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3584	"%s -> fsw=%#06x\n" \
3585	"%s expected %#06x %s (%s)\n", \
3586	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3587	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3588	iVar ? " " : "", fFswOut, \
3589	iVar ? " " : "", paTests[iTest].fFswOut, \
3590	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3591	} \
3592	pfn = a_aSubTests[iFn].pfnNative; \
3593	} \
3594	} \
3595	}
3596
3597	TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3598	TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3599	TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3600	TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3601	TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3602
3603
3604	/*
3605	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3606	*/
3607	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3608
3609	static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3610	{
3611	ENTRY(fcomi_r80_by_r80),
3612	ENTRY(fucomi_r80_by_r80),
3613	};
3614
3615	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3616	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3617	{
3618	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3619	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3620	};
3621
3622	static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3623	{
3624	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3625
3626	X86FXSTATE State;
3627	RT_ZERO(State);
3628	uint32_t cMinNormalPairs = (cTests - 144) / 4;
3629	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3630	{
3631	GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3632	uint32_t cNormalInputPairs = 0;
3633	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3634	{
3635	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3636	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3637	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3638	cNormalInputPairs++;
3639	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3640	{
3641	iTest -= 1;
3642	continue;
3643	}
3644
3645	uint16_t const fFcw = RandFcw();
3646	State.FSW = RandFsw();
3647
3648	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3649	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3650	{
3651	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
3652	uint16_t uFswOut = 0;
3653	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3654	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3655	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3656	iTest, iMask ? 'c' : 'u');
3657	}
3658	}
3659	GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3660	}
3661	}
3662	#endif /TSTIEMAIMPL_WITH_GENERATOR/
3663
3664	static void FpuBinaryEflR80Test(void)
3665	{
3666	X86FXSTATE State;
3667	RT_ZERO(State);
3668	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3669	{
3670	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3671	continue;
3672
3673	uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3674	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3675	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3676	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3677	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3678	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3679	{
3680	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3681	{
3682	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3683	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3684	State.FCW = paTests[iTest].fFcw;
3685	State.FSW = paTests[iTest].fFswIn;
3686	uint16_t uFswOut = 0;
3687	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3688	if ( uFswOut != paTests[iTest].fFswOut
3689	\|\| fEflOut != paTests[iTest].fEflOut)
3690	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3691	"%s -> fsw=%#06x efl=%#08x\n"
3692	"%s expected %#06x %#08x %s (%s)\n",
3693	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3694	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3695	iVar ? " " : "", uFswOut, fEflOut,
3696	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3697	EFlagsDiff(fEflOut, paTests[iTest].fEflOut), FormatFcw(paTests[iTest].fFcw));
3698	}
3699	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3700	}
3701	}
3702	}
3703
3704
3705	/*********************************************************************************************************************************
3706	* x87 FPU Unary Operations *
3707	*********************************************************************************************************************************/
3708
3709	/*
3710	* Unary FPU operations on one 80-bit floating point value.
3711	*
3712	* Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3713	* a rounding error or not.
3714	*/
3715	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3716
3717	enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /probably not accurate, but need impl to know/, kUnary_Rounding_F2xm1 };
3718	static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3719	{
3720	ENTRY_EX( fabs_r80, kUnary_Accurate),
3721	ENTRY_EX( fchs_r80, kUnary_Accurate),
3722	ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3723	ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3724	ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3725	ENTRY_EX( frndint_r80, kUnary_Accurate),
3726	ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3727	ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3728	ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3729	ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3730	};
3731
3732	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3733
3734	static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3735	{
3736	if ( enmKind == kUnary_Rounding_F2xm1
3737	&& RTFLOAT80U_IS_NORMAL(pr80Val)
3738	&& pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3739	&& pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3740	return true;
3741	return false;
3742	}
3743
3744	static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3745	{
3746	static RTFLOAT80U const s_aSpecials[] =
3747	{
3748	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3749	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3750	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3751	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3752	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3753	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3754	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3755	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3756	RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3757	RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3758	};
3759	X86FXSTATE State;
3760	RT_ZERO(State);
3761	uint32_t cMinNormals = cTests / 4;
3762	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3763	{
3764	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3765	PRTSTREAM pOutFn = pOut;
3766	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3767	{
3768	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3769	continue;
3770	pOutFn = pOutCpu;
3771	}
3772
3773	GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3774	uint32_t iTestOutput = 0;
3775	uint32_t cNormalInputs = 0;
3776	uint32_t cTargetRangeInputs = 0;
3777	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3778	{
3779	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3780	if (RTFLOAT80U_IS_NORMAL(&InVal))
3781	{
3782	if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3783	{
3784	unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3785	? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 / : RTFLOAT80U_EXP_BIAS + 63 + 1 / 2^64..2^-64 */;
3786	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3787	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3788	cTargetRangeInputs++;
3789	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3790	{
3791	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3792	cTargetRangeInputs++;
3793	}
3794	}
3795	cNormalInputs++;
3796	}
3797	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3798	{
3799	iTest -= 1;
3800	continue;
3801	}
3802
3803	uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
3804	uint16_t const fFcw = RandFcw();
3805	State.FSW = RandFsw();
3806
3807	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3808	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3809	{
3810	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3811	\| (iRounding << X86_FCW_RC_SHIFT)
3812	\| (iPrecision << X86_FCW_PC_SHIFT)
3813	\| X86_FCW_MASK_ALL;
3814	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3815	pfn(&State, &ResM, &InVal);
3816	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3817	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
3818	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3819
3820	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3821	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3822	pfn(&State, &ResU, &InVal);
3823	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3824	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
3825	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3826
3827	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3828	if (fXcpt)
3829	{
3830	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3831	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3832	pfn(&State, &Res1, &InVal);
3833	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3834	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
3835	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3836	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3837	{
3838	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
3839	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3840	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3841	pfn(&State, &Res2, &InVal);
3842	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3843	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
3844	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3845	}
3846	if (!RT_IS_POWER_OF_TWO(fXcpt))
3847	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3848	if (fUnmasked & fXcpt)
3849	{
3850	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
3851	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3852	pfn(&State, &Res3, &InVal);
3853	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3854	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
3855	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3856	}
3857	}
3858	}
3859	}
3860	GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
3861	}
3862	}
3863	#endif
3864
3865	static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
3866	{
3867	if (fFcw1 == fFcw2)
3868	return true;
3869	if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
3870	{
3871	*pfRndErr = true;
3872	return true;
3873	}
3874	return false;
3875	}
3876
3877	static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
3878	{
3879	if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
3880	return true;
3881	if ( fRndErrOk
3882	&& pr80Val1->s.fSign == pr80Val2->s.fSign)
3883	{
3884	if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
3885	&& ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
3886	? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
3887	: pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
3888	\|\|
3889	( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
3890	&& pr80Val1->s.uMantissa == UINT64_MAX
3891	&& pr80Val2->s.uMantissa == RT_BIT_64(63))
3892	\|\|
3893	( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
3894	&& pr80Val2->s.uMantissa == UINT64_MAX
3895	&& pr80Val1->s.uMantissa == RT_BIT_64(63)) )
3896	{
3897	*pfRndErr = true;
3898	return true;
3899	}
3900	}
3901	return false;
3902	}
3903
3904
3905	static void FpuUnaryR80Test(void)
3906	{
3907	X86FXSTATE State;
3908	RT_ZERO(State);
3909	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3910	{
3911	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
3912	continue;
3913
3914	uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
3915	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
3916	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
3917	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
3918	uint32_t cRndErrs = 0;
3919	uint32_t cPossibleRndErrs = 0;
3920	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3921	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3922	{
3923	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3924	{
3925	RTFLOAT80U const InVal = paTests[iTest].InVal;
3926	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3927	bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
3928	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
3929	State.FSW = paTests[iTest].fFswIn;
3930	pfn(&State, &Res, &InVal);
3931	bool fRndErr = false;
3932	if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
3933	\|\| !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
3934	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3935	"%s -> fsw=%#06x %s\n"
3936	"%s expected %#06x %s%s%s%s (%s)\n",
3937	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3938	FormatR80(&paTests[iTest].InVal),
3939	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3940	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3941	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3942	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3943	fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
3944	cRndErrs += fRndErr;
3945	cPossibleRndErrs += fRndErrOk;
3946	}
3947	pfn = g_aFpuUnaryR80[iFn].pfnNative;
3948	}
3949	if (cPossibleRndErrs > 0)
3950	RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
3951	}
3952	}
3953
3954
3955	/*
3956	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
3957	*/
3958	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
3959
3960	static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
3961	{
3962	ENTRY(ftst_r80),
3963	ENTRY_EX(fxam_r80, 1),
3964	};
3965
3966	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3967	static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3968	{
3969	static RTFLOAT80U const s_aSpecials[] =
3970	{
3971	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3972	};
3973
3974	X86FXSTATE State;
3975	RT_ZERO(State);
3976	uint32_t cMinNormals = cTests / 4;
3977	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3978	{
3979	bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
3980	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
3981	PRTSTREAM pOutFn = pOut;
3982	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3983	{
3984	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3985	continue;
3986	pOutFn = pOutCpu;
3987	}
3988	State.FTW = 0;
3989
3990	GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3991	uint32_t cNormalInputs = 0;
3992	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3993	{
3994	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3995	if (RTFLOAT80U_IS_NORMAL(&InVal))
3996	cNormalInputs++;
3997	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3998	{
3999	iTest -= 1;
4000	continue;
4001	}
4002
4003	uint16_t const fFcw = RandFcw();
4004	State.FSW = RandFsw();
4005	if (!fIsFxam)
4006	{
4007	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4008	{
4009	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4010	{
4011	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4012	{
4013	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4014	\| (iRounding << X86_FCW_RC_SHIFT)
4015	\| (iPrecision << X86_FCW_PC_SHIFT)
4016	\| iMask;
4017	uint16_t fFswOut = 0;
4018	pfn(&State, &fFswOut, &InVal);
4019	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4020	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4021	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4022	}
4023	}
4024	}
4025	}
4026	else
4027	{
4028	uint16_t fFswOut = 0;
4029	uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4030	State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4031	State.FCW = fFcw;
4032	pfn(&State, &fFswOut, &InVal);
4033	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4034	fFcw \| fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4035	}
4036	}
4037	GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4038	}
4039	}
4040	#endif
4041
4042
4043	static void FpuUnaryFswR80Test(void)
4044	{
4045	X86FXSTATE State;
4046	RT_ZERO(State);
4047	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4048	{
4049	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4050	continue;
4051
4052	uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4053	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4054	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4055	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4056	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4057	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4058	{
4059	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4060	{
4061	RTFLOAT80U const InVal = paTests[iTest].InVal;
4062	uint16_t fFswOut = 0;
4063	State.FSW = paTests[iTest].fFswIn;
4064	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4065	State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4066	pfn(&State, &fFswOut, &InVal);
4067	if (fFswOut != paTests[iTest].fFswOut)
4068	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4069	"%s -> fsw=%#06x\n"
4070	"%s expected %#06x %s (%s%s)\n",
4071	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4072	FormatR80(&paTests[iTest].InVal),
4073	iVar ? " " : "", fFswOut,
4074	iVar ? " " : "", paTests[iTest].fFswOut,
4075	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4076	paTests[iTest].fFcw & 0x80 ? " empty" : "");
4077	}
4078	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4079	}
4080	}
4081	}
4082
4083	/*
4084	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
4085	*/
4086	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4087
4088	static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4089	{
4090	ENTRY(fxtract_r80_r80),
4091	ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4092	ENTRY_INTEL(fptan_r80_r80, 0),
4093	ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4094	ENTRY_INTEL(fsincos_r80_r80, 0),
4095	};
4096
4097	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4098	static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4099	{
4100	static RTFLOAT80U const s_aSpecials[] =
4101	{
4102	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4103	};
4104
4105	X86FXSTATE State;
4106	RT_ZERO(State);
4107	uint32_t cMinNormals = cTests / 4;
4108	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4109	{
4110	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4111	PRTSTREAM pOutFn = pOut;
4112	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4113	{
4114	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4115	continue;
4116	pOutFn = pOutCpu;
4117	}
4118
4119	GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4120	uint32_t iTestOutput = 0;
4121	uint32_t cNormalInputs = 0;
4122	uint32_t cTargetRangeInputs = 0;
4123	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4124	{
4125	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4126	if (RTFLOAT80U_IS_NORMAL(&InVal))
4127	{
4128	if (iFn != 0)
4129	{
4130	unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4131	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4132	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4133	cTargetRangeInputs++;
4134	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4135	{
4136	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4137	cTargetRangeInputs++;
4138	}
4139	}
4140	cNormalInputs++;
4141	}
4142	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4143	{
4144	iTest -= 1;
4145	continue;
4146	}
4147
4148	uint16_t const fFcwExtra = 0; /* for rounding error indication */
4149	uint16_t const fFcw = RandFcw();
4150	State.FSW = RandFsw();
4151
4152	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4153	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4154	{
4155	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4156	\| (iRounding << X86_FCW_RC_SHIFT)
4157	\| (iPrecision << X86_FCW_PC_SHIFT)
4158	\| X86_FCW_MASK_ALL;
4159	IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4160	pfn(&State, &ResM, &InVal);
4161	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4162	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4163	GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4164
4165	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4166	IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4167	pfn(&State, &ResU, &InVal);
4168	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4169	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4170	GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4171
4172	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4173	if (fXcpt)
4174	{
4175	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4176	IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4177	pfn(&State, &Res1, &InVal);
4178	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4179	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4180	GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4181	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4182	{
4183	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4184	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4185	IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4186	pfn(&State, &Res2, &InVal);
4187	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4188	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4189	GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4190	}
4191	if (!RT_IS_POWER_OF_TWO(fXcpt))
4192	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4193	if (fUnmasked & fXcpt)
4194	{
4195	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4196	IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4197	pfn(&State, &Res3, &InVal);
4198	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4199	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4200	GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4201	}
4202	}
4203	}
4204	}
4205	GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4206	}
4207	}
4208	#endif
4209
4210
4211	static void FpuUnaryTwoR80Test(void)
4212	{
4213	X86FXSTATE State;
4214	RT_ZERO(State);
4215	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4216	{
4217	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4218	continue;
4219
4220	uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4221	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4222	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4223	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4224	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4225	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4226	{
4227	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4228	{
4229	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4230	RTFLOAT80U const InVal = paTests[iTest].InVal;
4231	State.FCW = paTests[iTest].fFcw;
4232	State.FSW = paTests[iTest].fFswIn;
4233	pfn(&State, &Res, &InVal);
4234	if ( Res.FSW != paTests[iTest].fFswOut
4235	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4236	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4237	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4238	"%s -> fsw=%#06x %s %s\n"
4239	"%s expected %#06x %s %s %s%s%s (%s)\n",
4240	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4241	FormatR80(&paTests[iTest].InVal),
4242	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4243	iVar ? " " : "", paTests[iTest].fFswOut,
4244	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4245	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4246	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4247	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4248	}
4249	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4250	}
4251	}
4252	}
4253
4254
4255
4256	int main(int argc, char **argv)
4257	{
4258	int rc = RTR3InitExe(argc, &argv, 0);
4259	if (RT_FAILURE(rc))
4260	return RTMsgInitFailure(rc);
4261
4262	/*
4263	* Determin the host CPU.
4264	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
4265	*/
4266	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
4267	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
4268	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
4269	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4270	#else
4271	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4272	#endif
4273
4274	/*
4275	* Parse arguments.
4276	*/
4277	enum { kModeNotSet, kModeTest, kModeGenerate }
4278	enmMode = kModeNotSet;
4279	bool fInt = true;
4280	bool fFpuLdSt = true;
4281	bool fFpuBinary1 = true;
4282	bool fFpuBinary2 = true;
4283	bool fFpuOther = true;
4284	bool fCpuData = true;
4285	bool fCommonData = true;
4286	uint32_t const cDefaultTests = 96;
4287	uint32_t cTests = cDefaultTests;
4288	RTGETOPTDEF const s_aOptions[] =
4289	{
4290	// mode:
4291	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
4292	{ "--test", 't', RTGETOPT_REQ_NOTHING },
4293	// test selection (both)
4294	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
4295	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
4296	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
4297	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
4298	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
4299	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
4300	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
4301	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
4302	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
4303	{ "--include", 'I', RTGETOPT_REQ_STRING },
4304	{ "--exclude", 'X', RTGETOPT_REQ_STRING },
4305	// generation parameters
4306	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
4307	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
4308	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
4309	};
4310
4311	RTGETOPTSTATE State;
4312	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
4313	AssertRCReturn(rc, RTEXITCODE_FAILURE);
4314
4315	RTGETOPTUNION ValueUnion;
4316	while ((rc = RTGetOpt(&State, &ValueUnion)))
4317	{
4318	switch (rc)
4319	{
4320	case 'g':
4321	enmMode = kModeGenerate;
4322	break;
4323	case 't':
4324	enmMode = kModeTest;
4325	break;
4326
4327	case 'a':
4328	fCpuData = true;
4329	fCommonData = true;
4330	fInt = true;
4331	fFpuLdSt = true;
4332	fFpuBinary1 = true;
4333	fFpuBinary2 = true;
4334	fFpuOther = true;
4335	break;
4336	case 'z':
4337	fCpuData = false;
4338	fCommonData = false;
4339	fInt = false;
4340	fFpuLdSt = false;
4341	fFpuBinary1 = false;
4342	fFpuBinary2 = false;
4343	fFpuOther = false;
4344	break;
4345
4346	case 'F':
4347	fFpuLdSt = true;
4348	break;
4349	case 'O':
4350	fFpuOther = true;
4351	break;
4352	case 'B':
4353	fFpuBinary1 = true;
4354	break;
4355	case 'P':
4356	fFpuBinary2 = true;
4357	break;
4358	case 'i':
4359	fInt = true;
4360	break;
4361
4362	case 'I':
4363	if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
4364	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
4365	RT_ELEMENTS(g_apszIncludeTestPatterns));
4366	g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
4367	break;
4368	case 'X':
4369	if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
4370	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
4371	RT_ELEMENTS(g_apszExcludeTestPatterns));
4372	g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
4373	break;
4374
4375	case 'm':
4376	fCommonData = true;
4377	break;
4378	case 'c':
4379	fCpuData = true;
4380	break;
4381	case 'n':
4382	cTests = ValueUnion.u32;
4383	break;
4384
4385	case 'h':
4386	RTPrintf("usage: %s <-g\|-t> [options]\n"
4387	"\n"
4388	"Mode:\n"
4389	" -g, --generate\n"
4390	" Generate test data.\n"
4391	" -t, --test\n"
4392	" Execute tests.\n"
4393	"\n"
4394	"Test selection (both modes):\n"
4395	" -a, --all\n"
4396	" Enable all tests and generated test data. (default)\n"
4397	" -z, --zap, --none\n"
4398	" Disable all tests and test data types.\n"
4399	" -i, --int\n"
4400	" Enable non-FPU tests.\n"
4401	" -F, --fpu-ld-st\n"
4402	" Enable FPU load and store tests.\n"
4403	" -B, --fpu-binary-1\n"
4404	" Enable FPU binary 80-bit FP tests.\n"
4405	" -P, --fpu-binary-2\n"
4406	" Enable FPU binary 64- and 32-bit FP tests.\n"
4407	" -O, --fpu-other\n"
4408	" Enable other FPU tests.\n"
4409	" -I,--include=<test-patter>\n"
4410	" Enable tests matching the given pattern.\n"
4411	" -X,--exclude=<test-patter>\n"
4412	" Skip tests matching the given pattern (overrides --include).\n"
4413	"\n"
4414	"Generation:\n"
4415	" -m, --common\n"
4416	" Enable generating common test data.\n"
4417	" -c, --only-cpu\n"
4418	" Enable generating CPU specific test data.\n"
4419	" -n, --number-of-test <count>\n"
4420	" Number of tests to generate. Default: %u\n"
4421	, argv[0], cDefaultTests);
4422	return RTEXITCODE_SUCCESS;
4423	default:
4424	return RTGetOptPrintError(rc, &ValueUnion);
4425	}
4426	}
4427
4428	/*
4429	* Generate data?
4430	*/
4431	if (enmMode == kModeGenerate)
4432	{
4433	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4434	char szCpuDesc[256] = {0};
4435	RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
4436	const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
4437	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_OS2)
4438	const char * const pszBitBucket = "NUL";
4439	# else
4440	const char * const pszBitBucket = "/dev/null";
4441	# endif
4442
4443	if (cTests == 0)
4444	cTests = cDefaultTests;
4445	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
4446	g_cZeroSrcTests = g_cZeroDstTests * 2;
4447
4448	if (fInt)
4449	{
4450	const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
4451	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4452	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4453	? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
4454	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4455	if (!pStrmData \|\| !pStrmDataCpu)
4456	return RTEXITCODE_FAILURE;
4457
4458	BinU8Generate( pStrmData, pStrmDataCpu, cTests);
4459	BinU16Generate(pStrmData, pStrmDataCpu, cTests);
4460	BinU32Generate(pStrmData, pStrmDataCpu, cTests);
4461	BinU64Generate(pStrmData, pStrmDataCpu, cTests);
4462	ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
4463	UnaryGenerate(pStrmData, cTests);
4464	ShiftGenerate(pStrmDataCpu, cTests);
4465	MulDivGenerate(pStrmDataCpu, cTests);
4466
4467	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4468	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4469	if (rcExit != RTEXITCODE_SUCCESS)
4470	return rcExit;
4471	}
4472
4473	if (fFpuLdSt)
4474	{
4475	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4476	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4477	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4478	? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4479	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4480	if (!pStrmData \|\| !pStrmDataCpu)
4481	return RTEXITCODE_FAILURE;
4482
4483	FpuLdConstGenerate(pStrmData, cTests);
4484	FpuLdIntGenerate(pStrmData, cTests);
4485	FpuLdD80Generate(pStrmData, cTests);
4486	FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4487	FpuStD80Generate(pStrmData, cTests);
4488	uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4489	FpuLdMemGenerate(pStrmData, cTests2);
4490	FpuStMemGenerate(pStrmData, cTests2);
4491
4492	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4493	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4494	if (rcExit != RTEXITCODE_SUCCESS)
4495	return rcExit;
4496	}
4497
4498	if (fFpuBinary1)
4499	{
4500	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4501	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4502	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4503	? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4504	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4505	if (!pStrmData \|\| !pStrmDataCpu)
4506	return RTEXITCODE_FAILURE;
4507
4508	FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4509	FpuBinaryFswR80Generate(pStrmData, cTests);
4510	FpuBinaryEflR80Generate(pStrmData, cTests);
4511
4512	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4513	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4514	if (rcExit != RTEXITCODE_SUCCESS)
4515	return rcExit;
4516	}
4517
4518	if (fFpuBinary2)
4519	{
4520	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4521	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4522	const char pszDataCpuFile = pszBitBucket; /!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4523	? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4524	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4525	if (!pStrmData \|\| !pStrmDataCpu)
4526	return RTEXITCODE_FAILURE;
4527
4528	FpuBinaryR64Generate(pStrmData, cTests);
4529	FpuBinaryR32Generate(pStrmData, cTests);
4530	FpuBinaryI32Generate(pStrmData, cTests);
4531	FpuBinaryI16Generate(pStrmData, cTests);
4532	FpuBinaryFswR64Generate(pStrmData, cTests);
4533	FpuBinaryFswR32Generate(pStrmData, cTests);
4534	FpuBinaryFswI32Generate(pStrmData, cTests);
4535	FpuBinaryFswI16Generate(pStrmData, cTests);
4536
4537	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4538	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4539	if (rcExit != RTEXITCODE_SUCCESS)
4540	return rcExit;
4541	}
4542
4543	if (fFpuOther)
4544	{
4545	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4546	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4547	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4548	? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4549	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4550	if (!pStrmData \|\| !pStrmDataCpu)
4551	return RTEXITCODE_FAILURE;
4552
4553	FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4554	FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4555	FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4556
4557	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4558	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4559	if (rcExit != RTEXITCODE_SUCCESS)
4560	return rcExit;
4561	}
4562
4563	return RTEXITCODE_SUCCESS;
4564	#else
4565	return RTMsgErrorExitFailure("Test data generator not compiled in!");
4566	#endif
4567	}
4568
4569	/*
4570	* Do testing. Currrently disabled by default as data needs to be checked
4571	* on both intel and AMD systems first.
4572	*/
4573	rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4574	AssertRCReturn(rc, RTEXITCODE_FAILURE);
4575	if (enmMode == kModeTest)
4576	{
4577	RTTestBanner(g_hTest);
4578
4579	/* Allocate guarded memory for use in the tests. */
4580	#define ALLOC_GUARDED_VAR(a_puVar) do { \
4581	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
4582	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
4583	} while (0)
4584	ALLOC_GUARDED_VAR(g_pu8);
4585	ALLOC_GUARDED_VAR(g_pu16);
4586	ALLOC_GUARDED_VAR(g_pu32);
4587	ALLOC_GUARDED_VAR(g_pu64);
4588	ALLOC_GUARDED_VAR(g_pu128);
4589	ALLOC_GUARDED_VAR(g_pu8Two);
4590	ALLOC_GUARDED_VAR(g_pu16Two);
4591	ALLOC_GUARDED_VAR(g_pu32Two);
4592	ALLOC_GUARDED_VAR(g_pu64Two);
4593	ALLOC_GUARDED_VAR(g_pu128Two);
4594	ALLOC_GUARDED_VAR(g_pfEfl);
4595	if (RTTestErrorCount(g_hTest) == 0)
4596	{
4597	if (fInt)
4598	{
4599	BinU8Test();
4600	BinU16Test();
4601	BinU32Test();
4602	BinU64Test();
4603	XchgTest();
4604	XaddTest();
4605	CmpXchgTest();
4606	CmpXchg8bTest();
4607	CmpXchg16bTest();
4608	ShiftDblTest();
4609	UnaryTest();
4610	ShiftTest();
4611	MulDivTest();
4612	BswapTest();
4613	}
4614
4615	if (fFpuLdSt)
4616	{
4617	FpuLoadConstTest();
4618	FpuLdMemTest();
4619	FpuLdIntTest();
4620	FpuLdD80Test();
4621	FpuStMemTest();
4622	FpuStIntTest();
4623	FpuStD80Test();
4624	}
4625
4626	if (fFpuBinary1)
4627	{
4628	FpuBinaryR80Test();
4629	FpuBinaryFswR80Test();
4630	FpuBinaryEflR80Test();
4631	}
4632
4633	if (fFpuBinary2)
4634	{
4635	FpuBinaryR64Test();
4636	FpuBinaryR32Test();
4637	FpuBinaryI32Test();
4638	FpuBinaryI16Test();
4639	FpuBinaryFswR64Test();
4640	FpuBinaryFswR32Test();
4641	FpuBinaryFswI32Test();
4642	FpuBinaryFswI16Test();
4643	}
4644
4645	if (fFpuOther)
4646	{
4647	FpuUnaryR80Test();
4648	FpuUnaryFswR80Test();
4649	FpuUnaryTwoR80Test();
4650	}
4651	}
4652	return RTTestSummaryAndDestroy(g_hTest);
4653	}
4654	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
4655	}
4656

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 94609

Download in other formats: