VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 94680

Last change on this file since 94680 was 94680, checked in by vboxsync, 3 years ago

tstIEMAImpl: fprem, fprem1 & fscale adjustments. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 210.5 KB
Line 
1/* $Id: tstIEMAImpl.cpp 94680 2022-04-22 07:37:55Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "../include/IEMInternal.h"
23
24#include <iprt/errcore.h>
25#include <VBox/log.h>
26#include <iprt/assert.h>
27#include <iprt/ctype.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/message.h>
31#include <iprt/mp.h>
32#include <iprt/rand.h>
33#include <iprt/stream.h>
34#include <iprt/string.h>
35#include <iprt/test.h>
36
37#include "tstIEMAImpl.h"
38
39
40/*********************************************************************************************************************************
41* Defined Constants And Macros *
42*********************************************************************************************************************************/
43#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44#define ENTRY_EX(a_Name, a_uExtra) \
45 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62 typedef struct a_TypeName \
63 { \
64 const char *pszName; \
65 a_FunctionPtrType pfn; \
66 a_FunctionPtrType pfnNative; \
67 a_TestType const *paTests; \
68 uint32_t const *pcTests; \
69 uint32_t uExtra; \
70 uint8_t idxCpuEflFlavour; \
71 } a_TypeName
72
73#define COUNT_VARIATIONS(a_SubTest) \
74 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77/*********************************************************************************************************************************
78* Global Variables *
79*********************************************************************************************************************************/
80static RTTEST g_hTest;
81static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82#ifdef TSTIEMAIMPL_WITH_GENERATOR
83static uint32_t g_cZeroDstTests = 2;
84static uint32_t g_cZeroSrcTests = 4;
85#endif
86static uint8_t *g_pu8, *g_pu8Two;
87static uint16_t *g_pu16, *g_pu16Two;
88static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
89static uint64_t *g_pu64, *g_pu64Two;
90static RTUINT128U *g_pu128, *g_pu128Two;
91
92static char g_aszBuf[16][256];
93static unsigned g_idxBuf = 0;
94
95static uint32_t g_cIncludeTestPatterns;
96static uint32_t g_cExcludeTestPatterns;
97static const char *g_apszIncludeTestPatterns[64];
98static const char *g_apszExcludeTestPatterns[64];
99
100
101/*********************************************************************************************************************************
102* Internal Functions *
103*********************************************************************************************************************************/
104static const char *FormatR80(PCRTFLOAT80U pr80);
105static const char *FormatR64(PCRTFLOAT64U pr64);
106static const char *FormatR32(PCRTFLOAT32U pr32);
107
108
109/*
110 * Random helpers.
111 */
112
113static uint32_t RandEFlags(void)
114{
115 uint32_t fEfl = RTRandU32();
116 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
117}
118
119#ifdef TSTIEMAIMPL_WITH_GENERATOR
120
121static uint8_t RandU8(void)
122{
123 return RTRandU32Ex(0, 0xff);
124}
125
126
127static uint16_t RandU16(void)
128{
129 return RTRandU32Ex(0, 0xffff);
130}
131
132
133static uint32_t RandU32(void)
134{
135 return RTRandU32();
136}
137
138#endif
139
140static uint64_t RandU64(void)
141{
142 return RTRandU64();
143}
144
145
146static RTUINT128U RandU128(void)
147{
148 RTUINT128U Ret;
149 Ret.s.Hi = RTRandU64();
150 Ret.s.Lo = RTRandU64();
151 return Ret;
152}
153
154#ifdef TSTIEMAIMPL_WITH_GENERATOR
155
156static uint8_t RandU8Dst(uint32_t iTest)
157{
158 if (iTest < g_cZeroDstTests)
159 return 0;
160 return RandU8();
161}
162
163
164static uint8_t RandU8Src(uint32_t iTest)
165{
166 if (iTest < g_cZeroSrcTests)
167 return 0;
168 return RandU8();
169}
170
171
172static uint16_t RandU16Dst(uint32_t iTest)
173{
174 if (iTest < g_cZeroDstTests)
175 return 0;
176 return RandU16();
177}
178
179
180static uint16_t RandU16Src(uint32_t iTest)
181{
182 if (iTest < g_cZeroSrcTests)
183 return 0;
184 return RandU16();
185}
186
187
188static uint32_t RandU32Dst(uint32_t iTest)
189{
190 if (iTest < g_cZeroDstTests)
191 return 0;
192 return RandU32();
193}
194
195
196static uint32_t RandU32Src(uint32_t iTest)
197{
198 if (iTest < g_cZeroSrcTests)
199 return 0;
200 return RandU32();
201}
202
203
204static uint64_t RandU64Dst(uint32_t iTest)
205{
206 if (iTest < g_cZeroDstTests)
207 return 0;
208 return RandU64();
209}
210
211
212static uint64_t RandU64Src(uint32_t iTest)
213{
214 if (iTest < g_cZeroSrcTests)
215 return 0;
216 return RandU64();
217}
218
219
220/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
221static int16_t RandI16Src2(uint32_t iTest)
222{
223 if (iTest < 18 * 4)
224 switch (iTest % 4)
225 {
226 case 0: return 0;
227 case 1: return INT16_MAX;
228 case 2: return INT16_MIN;
229 case 3: break;
230 }
231 return (int16_t)RandU16();
232}
233
234
235/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
236static int32_t RandI32Src2(uint32_t iTest)
237{
238 if (iTest < 18 * 4)
239 switch (iTest % 4)
240 {
241 case 0: return 0;
242 case 1: return INT32_MAX;
243 case 2: return INT32_MIN;
244 case 3: break;
245 }
246 return (int32_t)RandU32();
247}
248
249
250#if 0
251static int64_t RandI64Src(uint32_t iTest)
252{
253 RT_NOREF(iTest);
254 return (int64_t)RandU64();
255}
256#endif
257
258
259static uint16_t RandFcw(void)
260{
261 return RandU16() & ~X86_FCW_ZERO_MASK;
262}
263
264
265static uint16_t RandFsw(void)
266{
267 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
268 return RandU16();
269}
270
271
272static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
273{
274 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
275 pr80->sj64.uFraction >>= cShift;
276 else
277 pr80->sj64.uFraction = (cShift % 19) + 1;
278}
279
280
281
282static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
283{
284 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
285
286 RTFLOAT80U r80;
287 r80.au64[0] = RandU64();
288 r80.au16[4] = RandU16();
289
290 /*
291 * Adjust the random stuff according to bType.
292 */
293 bType &= 0x1f;
294 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
295 {
296 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
297 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
298 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
299 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
300 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
301 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
302 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
303 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
304 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
305 }
306 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
307 {
308 /* Denormals (4,5) and Pseudo denormals (6,7) */
309 if (bType & 1)
310 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
311 else if (r80.sj64.uFraction == 0 && bType < 6)
312 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
313 r80.sj64.uExponent = 0;
314 r80.sj64.fInteger = bType >= 6;
315 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
316 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
317 }
318 else if (bType == 8 || bType == 9)
319 {
320 /* Pseudo NaN. */
321 if (bType & 1)
322 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
323 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
324 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
325 r80.sj64.uExponent = 0x7fff;
326 if (r80.sj64.fInteger)
327 r80.sj64.uFraction |= RT_BIT_64(62);
328 else
329 r80.sj64.uFraction &= ~RT_BIT_64(62);
330 r80.sj64.fInteger = 0;
331 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
332 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
333 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
334 }
335 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
336 {
337 /* Quiet and signalling NaNs. */
338 if (bType & 1)
339 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
340 else if (r80.sj64.uFraction == 0)
341 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
342 r80.sj64.uExponent = 0x7fff;
343 if (bType < 12)
344 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
345 else
346 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
347 r80.sj64.fInteger = 1;
348 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
349 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
350 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
351 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
352 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
353 }
354 else if (bType == 14 || bType == 15)
355 {
356 /* Unnormals */
357 if (bType & 1)
358 SafeR80FractionShift(&r80, RandU8() % 62);
359 r80.sj64.fInteger = 0;
360 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
361 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
362 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
363 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
364 }
365 else if (bType < 26)
366 {
367 /* Make sure we have lots of normalized values. */
368 if (!fIntTarget)
369 {
370 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
371 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
372 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
373 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
374 r80.sj64.fInteger = 1;
375 if (r80.sj64.uExponent <= uMinExp)
376 r80.sj64.uExponent = uMinExp + 1;
377 else if (r80.sj64.uExponent >= uMaxExp)
378 r80.sj64.uExponent = uMaxExp - 1;
379
380 if (bType == 16)
381 { /* All 1s is useful to testing rounding. Also try trigger special
382 behaviour by sometimes rounding out of range, while we're at it. */
383 r80.sj64.uFraction = RT_BIT_64(63) - 1;
384 uint8_t bExp = RandU8();
385 if ((bExp & 3) == 0)
386 r80.sj64.uExponent = uMaxExp - 1;
387 else if ((bExp & 3) == 1)
388 r80.sj64.uExponent = uMinExp + 1;
389 else if ((bExp & 3) == 2)
390 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
391 }
392 }
393 else
394 {
395 /* integer target: */
396 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
397 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
398 r80.sj64.fInteger = 1;
399 if (r80.sj64.uExponent < uMinExp)
400 r80.sj64.uExponent = uMinExp;
401 else if (r80.sj64.uExponent > uMaxExp)
402 r80.sj64.uExponent = uMaxExp;
403
404 if (bType == 16)
405 { /* All 1s is useful to testing rounding. Also try trigger special
406 behaviour by sometimes rounding out of range, while we're at it. */
407 r80.sj64.uFraction = RT_BIT_64(63) - 1;
408 uint8_t bExp = RandU8();
409 if ((bExp & 3) == 0)
410 r80.sj64.uExponent = uMaxExp;
411 else if ((bExp & 3) == 1)
412 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
413 }
414 }
415
416 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
417 }
418 return r80;
419}
420
421
422static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
423{
424 /*
425 * Make it more likely that we get a good selection of special values.
426 */
427 return RandR80Ex(RandU8(), cTarget, fIntTarget);
428
429}
430
431
432static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
433{
434 /* Make sure we cover all the basic types first before going for random selection: */
435 if (iTest <= 18)
436 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
437 return RandR80(cTarget, fIntTarget);
438}
439
440
441/**
442 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
443 * to a 0..17, covering all basic value types.
444 */
445static uint8_t RandR80Src12RemapType(uint8_t bType)
446{
447 switch (bType)
448 {
449 case 0: return 18; /* normal */
450 case 1: return 16; /* normal extreme rounding */
451 case 2: return 14; /* unnormal */
452 case 3: return 12; /* Signalling NaN */
453 case 4: return 10; /* Quiet NaN */
454 case 5: return 8; /* PseudoNaN */
455 case 6: return 6; /* Pseudo Denormal */
456 case 7: return 4; /* Denormal */
457 case 8: return 3; /* Indefinite */
458 case 9: return 2; /* Infinity */
459 case 10: return 1; /* Pseudo-Infinity */
460 case 11: return 0; /* Zero */
461 default: AssertFailedReturn(18);
462 }
463}
464
465
466/**
467 * This works in tandem with RandR80Src2 to make sure we cover all operand
468 * type mixes first before we venture into regular random testing.
469 *
470 * There are 11 basic variations, when we leave out the five odd ones using
471 * SafeR80FractionShift. Because of the special normalized value targetting at
472 * rounding, we make it an even 12. So 144 combinations for two operands.
473 */
474static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
475{
476 if (cPartnerBits == 80)
477 {
478 Assert(!fPartnerInt);
479 if (iTest < 12 * 12)
480 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
481 }
482 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
483 {
484 if (iTest < 12 * 10)
485 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
486 }
487 else if (iTest < 18 * 4 && fPartnerInt)
488 return RandR80Ex(iTest / 4);
489 return RandR80();
490}
491
492
493/** Partner to RandR80Src1. */
494static RTFLOAT80U RandR80Src2(uint32_t iTest)
495{
496 if (iTest < 12 * 12)
497 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
498 return RandR80();
499}
500
501
502static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
503{
504 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
505 pr64->s64.uFraction >>= cShift;
506 else
507 pr64->s64.uFraction = (cShift % 19) + 1;
508}
509
510
511static RTFLOAT64U RandR64Ex(uint8_t bType)
512{
513 RTFLOAT64U r64;
514 r64.u = RandU64();
515
516 /*
517 * Make it more likely that we get a good selection of special values.
518 * On average 6 out of 16 calls should return a special value.
519 */
520 bType &= 0xf;
521 if (bType == 0 || bType == 1)
522 {
523 /* 0 or Infinity. We only keep fSign here. */
524 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
525 r64.s.uFractionHigh = 0;
526 r64.s.uFractionLow = 0;
527 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
528 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
529 }
530 else if (bType == 2 || bType == 3)
531 {
532 /* Subnormals */
533 if (bType == 3)
534 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
535 else if (r64.s64.uFraction == 0)
536 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
537 r64.s64.uExponent = 0;
538 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
539 }
540 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
541 {
542 /* NaNs */
543 if (bType & 1)
544 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
545 else if (r64.s64.uFraction == 0)
546 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
547 r64.s64.uExponent = 0x7ff;
548 if (bType < 6)
549 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
550 else
551 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
552 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
553 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
554 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
555 }
556 else if (bType < 12)
557 {
558 /* Make sure we have lots of normalized values. */
559 if (r64.s.uExponent == 0)
560 r64.s.uExponent = 1;
561 else if (r64.s.uExponent == 0x7ff)
562 r64.s.uExponent = 0x7fe;
563 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
564 }
565 return r64;
566}
567
568
569static RTFLOAT64U RandR64Src(uint32_t iTest)
570{
571 if (iTest < 16)
572 return RandR64Ex(iTest);
573 return RandR64Ex(RandU8());
574}
575
576
577/** Pairing with a 80-bit floating point arg. */
578static RTFLOAT64U RandR64Src2(uint32_t iTest)
579{
580 if (iTest < 12 * 10)
581 return RandR64Ex(9 - iTest % 10); /* start with normal values */
582 return RandR64Ex(RandU8());
583}
584
585
586static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
587{
588 if (pr32->s.uFraction >= RT_BIT_32(cShift))
589 pr32->s.uFraction >>= cShift;
590 else
591 pr32->s.uFraction = (cShift % 19) + 1;
592}
593
594
595static RTFLOAT32U RandR32Ex(uint8_t bType)
596{
597 RTFLOAT32U r32;
598 r32.u = RandU32();
599
600 /*
601 * Make it more likely that we get a good selection of special values.
602 * On average 6 out of 16 calls should return a special value.
603 */
604 bType &= 0xf;
605 if (bType == 0 || bType == 1)
606 {
607 /* 0 or Infinity. We only keep fSign here. */
608 r32.s.uExponent = bType == 0 ? 0 : 0xff;
609 r32.s.uFraction = 0;
610 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
611 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
612 }
613 else if (bType == 2 || bType == 3)
614 {
615 /* Subnormals */
616 if (bType == 3)
617 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
618 else if (r32.s.uFraction == 0)
619 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
620 r32.s.uExponent = 0;
621 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
622 }
623 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
624 {
625 /* NaNs */
626 if (bType & 1)
627 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
628 else if (r32.s.uFraction == 0)
629 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
630 r32.s.uExponent = 0xff;
631 if (bType < 6)
632 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
633 else
634 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
635 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
636 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
637 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
638 }
639 else if (bType < 12)
640 {
641 /* Make sure we have lots of normalized values. */
642 if (r32.s.uExponent == 0)
643 r32.s.uExponent = 1;
644 else if (r32.s.uExponent == 0xff)
645 r32.s.uExponent = 0xfe;
646 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
647 }
648 return r32;
649}
650
651
652static RTFLOAT32U RandR32Src(uint32_t iTest)
653{
654 if (iTest < 16)
655 return RandR32Ex(iTest);
656 return RandR32Ex(RandU8());
657}
658
659
660/** Pairing with a 80-bit floating point arg. */
661static RTFLOAT32U RandR32Src2(uint32_t iTest)
662{
663 if (iTest < 12 * 10)
664 return RandR32Ex(9 - iTest % 10); /* start with normal values */
665 return RandR32Ex(RandU8());
666}
667
668
669static RTPBCD80U RandD80Src(uint32_t iTest)
670{
671 if (iTest < 3)
672 {
673 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
674 return d80Zero;
675 }
676 if (iTest < 5)
677 {
678 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
679 return d80Ind;
680 }
681
682 RTPBCD80U d80;
683 uint8_t b = RandU8();
684 d80.s.fSign = b & 1;
685
686 if ((iTest & 7) >= 6)
687 {
688 /* Illegal */
689 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
690 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
691 d80.s.abPairs[iPair] = RandU8();
692 }
693 else
694 {
695 /* Normal */
696 d80.s.uPad = 0;
697 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
698 {
699 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
700 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
701 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
702 }
703 }
704 return d80;
705}
706
707
708const char *GenFormatR80(PCRTFLOAT80U plrd)
709{
710 if (RTFLOAT80U_IS_ZERO(plrd))
711 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
712 if (RTFLOAT80U_IS_INF(plrd))
713 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
714 if (RTFLOAT80U_IS_INDEFINITE(plrd))
715 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
716 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
717 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
718 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
719 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
720
721 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
722 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
723 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
724 return pszBuf;
725}
726
727const char *GenFormatR64(PCRTFLOAT64U prd)
728{
729 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
730 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
731 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
732 return pszBuf;
733}
734
735
736const char *GenFormatR32(PCRTFLOAT32U pr)
737{
738 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
739 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
740 return pszBuf;
741}
742
743
744const char *GenFormatD80(PCRTPBCD80U pd80)
745{
746 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
747 size_t off;
748 if (pd80->s.uPad == 0)
749 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
750 else
751 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
752 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
753 while (iPair-- > 0)
754 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
755 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
756 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
757 pszBuf[off++] = ')';
758 pszBuf[off++] = '\0';
759 return pszBuf;
760}
761
762
763const char *GenFormatI64(int64_t i64)
764{
765 if (i64 == INT64_MIN) /* This one is problematic */
766 return "INT64_MIN";
767 if (i64 == INT64_MAX)
768 return "INT64_MAX";
769 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
770 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
771 return pszBuf;
772}
773
774
775const char *GenFormatI64(int64_t const *pi64)
776{
777 return GenFormatI64(*pi64);
778}
779
780
781const char *GenFormatI32(int32_t i32)
782{
783 if (i32 == INT32_MIN) /* This one is problematic */
784 return "INT32_MIN";
785 if (i32 == INT32_MAX)
786 return "INT32_MAX";
787 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
788 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
789 return pszBuf;
790}
791
792
793const char *GenFormatI32(int32_t const *pi32)
794{
795 return GenFormatI32(*pi32);
796}
797
798
799const char *GenFormatI16(int16_t i16)
800{
801 if (i16 == INT16_MIN) /* This one is problematic */
802 return "INT16_MIN";
803 if (i16 == INT16_MAX)
804 return "INT16_MAX";
805 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
806 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
807 return pszBuf;
808}
809
810
811const char *GenFormatI16(int16_t const *pi16)
812{
813 return GenFormatI16(*pi16);
814}
815
816
817static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
818{
819 /* We want to tag the generated source code with the revision that produced it. */
820 static char s_szRev[] = "$Revision: 94680 $";
821 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
822 size_t cchRev = 0;
823 while (RT_C_IS_DIGIT(pszRev[cchRev]))
824 cchRev++;
825
826 RTStrmPrintf(pOut,
827 "/* $Id: tstIEMAImpl.cpp 94680 2022-04-22 07:37:55Z vboxsync $ */\n"
828 "/** @file\n"
829 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
830 " */\n"
831 "\n"
832 "/*\n"
833 " * Copyright (C) 2022 Oracle Corporation\n"
834 " *\n"
835 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
836 " * available from http://www.215389.xyz. This file is free software;\n"
837 " * you can redistribute it and/or modify it under the terms of the GNU\n"
838 " * General Public License (GPL) as published by the Free Software\n"
839 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
840 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
841 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
842 " */\n"
843 "\n"
844 "#include \"tstIEMAImpl.h\"\n"
845 "\n"
846 ,
847 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
848}
849
850
851static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
852{
853 PRTSTREAM pOut = NULL;
854 int rc = RTStrmOpen(pszFilename, "w", &pOut);
855 if (RT_SUCCESS(rc))
856 {
857 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
858 return pOut;
859 }
860 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
861 return NULL;
862}
863
864
865static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
866{
867 RTStrmPrintf(pOut,
868 "\n"
869 "/* end of file */\n");
870 int rc = RTStrmClose(pOut);
871 if (RT_SUCCESS(rc))
872 return rcExit;
873 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
874}
875
876
877static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
878{
879 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
880}
881
882
883static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
884{
885 RTStrmPrintf(pOut,
886 "};\n"
887 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
888 "\n",
889 pszName, pszName);
890}
891
892#endif /* TSTIEMAIMPL_WITH_GENERATOR */
893
894
895/*
896 * Test helpers.
897 */
898static bool IsTestEnabled(const char *pszName)
899{
900 /* Process excludes first: */
901 uint32_t i = g_cExcludeTestPatterns;
902 while (i-- > 0)
903 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
904 return false;
905
906 /* If no include patterns, everything is included: */
907 i = g_cIncludeTestPatterns;
908 if (!i)
909 return true;
910
911 /* Otherwise only tests in the include patters gets tested: */
912 while (i-- > 0)
913 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
914 return true;
915
916 return false;
917}
918
919
920static bool SubTestAndCheckIfEnabled(const char *pszName)
921{
922 RTTestSub(g_hTest, pszName);
923 if (IsTestEnabled(pszName))
924 return true;
925 RTTestSkipped(g_hTest, "excluded");
926 return false;
927}
928
929
930static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
931{
932 if (fActual == fExpected)
933 return "";
934
935 uint32_t const fXor = fActual ^ fExpected;
936 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
937 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
938
939 static struct
940 {
941 const char *pszName;
942 uint32_t fFlag;
943 } const s_aFlags[] =
944 {
945#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
946 EFL_ENTRY(CF),
947 EFL_ENTRY(PF),
948 EFL_ENTRY(AF),
949 EFL_ENTRY(ZF),
950 EFL_ENTRY(SF),
951 EFL_ENTRY(TF),
952 EFL_ENTRY(IF),
953 EFL_ENTRY(DF),
954 EFL_ENTRY(OF),
955 EFL_ENTRY(IOPL),
956 EFL_ENTRY(NT),
957 EFL_ENTRY(RF),
958 EFL_ENTRY(VM),
959 EFL_ENTRY(AC),
960 EFL_ENTRY(VIF),
961 EFL_ENTRY(VIP),
962 EFL_ENTRY(ID),
963 };
964 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
965 if (s_aFlags[i].fFlag & fXor)
966 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
967 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
968 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
969 return pszBuf;
970}
971
972
973static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
974{
975 if (fActual == fExpected)
976 return "";
977
978 uint16_t const fXor = fActual ^ fExpected;
979 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
980 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
981
982 static struct
983 {
984 const char *pszName;
985 uint32_t fFlag;
986 } const s_aFlags[] =
987 {
988#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
989 FSW_ENTRY(IE),
990 FSW_ENTRY(DE),
991 FSW_ENTRY(ZE),
992 FSW_ENTRY(OE),
993 FSW_ENTRY(UE),
994 FSW_ENTRY(PE),
995 FSW_ENTRY(SF),
996 FSW_ENTRY(ES),
997 FSW_ENTRY(C0),
998 FSW_ENTRY(C1),
999 FSW_ENTRY(C2),
1000 FSW_ENTRY(C3),
1001 FSW_ENTRY(B),
1002 };
1003 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1004 if (s_aFlags[i].fFlag & fXor)
1005 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1006 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1007 if (fXor & X86_FSW_TOP_MASK)
1008 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1009 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1010#if 0 /* For debugging fprem & fprem1 */
1011 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1012 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1013#endif
1014 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1015 return pszBuf;
1016}
1017
1018
1019static const char *FormatFcw(uint16_t fFcw)
1020{
1021 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1022
1023 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1024 switch (fFcw & X86_FCW_PC_MASK)
1025 {
1026 case X86_FCW_PC_24: pszPC = "PC24"; break;
1027 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1028 case X86_FCW_PC_53: pszPC = "PC53"; break;
1029 case X86_FCW_PC_64: pszPC = "PC64"; break;
1030 }
1031
1032 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1033 switch (fFcw & X86_FCW_RC_MASK)
1034 {
1035 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1036 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1037 case X86_FCW_RC_UP: pszRC = "UP"; break;
1038 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1039 }
1040 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1041
1042 static struct
1043 {
1044 const char *pszName;
1045 uint32_t fFlag;
1046 } const s_aFlags[] =
1047 {
1048#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1049 FCW_ENTRY(IM),
1050 FCW_ENTRY(DM),
1051 FCW_ENTRY(ZM),
1052 FCW_ENTRY(OM),
1053 FCW_ENTRY(UM),
1054 FCW_ENTRY(PM),
1055 { "6M", 64 },
1056 };
1057 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1058 if (fFcw & s_aFlags[i].fFlag)
1059 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1060
1061 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1062 return pszBuf;
1063}
1064
1065
1066static const char *FormatR80(PCRTFLOAT80U pr80)
1067{
1068 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1069 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1070 return pszBuf;
1071}
1072
1073
1074static const char *FormatR64(PCRTFLOAT64U pr64)
1075{
1076 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1077 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1078 return pszBuf;
1079}
1080
1081
1082static const char *FormatR32(PCRTFLOAT32U pr32)
1083{
1084 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1085 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1086 return pszBuf;
1087}
1088
1089
1090static const char *FormatD80(PCRTPBCD80U pd80)
1091{
1092 /* There is only one indefinite endcoding (same as for 80-bit
1093 floating point), so get it out of the way first: */
1094 if (RTPBCD80U_IS_INDEFINITE(pd80))
1095 return "Ind";
1096
1097 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1098 size_t off = 0;
1099 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1100 unsigned cBadDigits = 0;
1101 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1102 while (iPair-- > 0)
1103 {
1104 static const char s_szDigits[] = "0123456789abcdef";
1105 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1106 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1107 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1108 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1109 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1110 }
1111 if (cBadDigits || pd80->s.uPad != 0)
1112 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1113 pszBuf[off] = '\0';
1114 return pszBuf;
1115}
1116
1117
1118#if 0
1119static const char *FormatI64(int64_t const *piVal)
1120{
1121 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1122 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1123 return pszBuf;
1124}
1125#endif
1126
1127
1128static const char *FormatI32(int32_t const *piVal)
1129{
1130 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1131 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1132 return pszBuf;
1133}
1134
1135
1136static const char *FormatI16(int16_t const *piVal)
1137{
1138 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1139 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1140 return pszBuf;
1141}
1142
1143
1144/*
1145 * Binary operations.
1146 */
1147TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1148TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1149TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1150TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1151
1152#ifdef TSTIEMAIMPL_WITH_GENERATOR
1153# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1154static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1155{ \
1156 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1157 { \
1158 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1159 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1160 PRTSTREAM pOutFn = pOut; \
1161 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1162 { \
1163 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1164 continue; \
1165 pOutFn = pOutCpu; \
1166 } \
1167 \
1168 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1169 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1170 { \
1171 a_TestType Test; \
1172 Test.fEflIn = RandEFlags(); \
1173 Test.fEflOut = Test.fEflIn; \
1174 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1175 Test.uDstOut = Test.uDstIn; \
1176 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1177 if (g_aBinU ## a_cBits[iFn].uExtra) \
1178 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1179 Test.uMisc = 0; \
1180 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1181 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1182 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1183 } \
1184 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1185 } \
1186}
1187#else
1188# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1189#endif
1190
1191#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1192GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1193\
1194static void BinU ## a_cBits ## Test(void) \
1195{ \
1196 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1197 { \
1198 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1199 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1200 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1201 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1202 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1203 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1204 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1205 { \
1206 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1207 { \
1208 uint32_t fEfl = paTests[iTest].fEflIn; \
1209 a_uType uDst = paTests[iTest].uDstIn; \
1210 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1211 if ( uDst != paTests[iTest].uDstOut \
1212 || fEfl != paTests[iTest].fEflOut) \
1213 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1214 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1215 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1216 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1217 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1218 else \
1219 { \
1220 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1221 *g_pfEfl = paTests[iTest].fEflIn; \
1222 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1223 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1224 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1225 } \
1226 } \
1227 pfn = a_aSubTests[iFn].pfnNative; \
1228 } \
1229 } \
1230}
1231
1232
1233/*
1234 * 8-bit binary operations.
1235 */
1236static const BINU8_T g_aBinU8[] =
1237{
1238 ENTRY(add_u8),
1239 ENTRY(add_u8_locked),
1240 ENTRY(adc_u8),
1241 ENTRY(adc_u8_locked),
1242 ENTRY(sub_u8),
1243 ENTRY(sub_u8_locked),
1244 ENTRY(sbb_u8),
1245 ENTRY(sbb_u8_locked),
1246 ENTRY(or_u8),
1247 ENTRY(or_u8_locked),
1248 ENTRY(xor_u8),
1249 ENTRY(xor_u8_locked),
1250 ENTRY(and_u8),
1251 ENTRY(and_u8_locked),
1252 ENTRY(cmp_u8),
1253 ENTRY(test_u8),
1254};
1255TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1256
1257
1258/*
1259 * 16-bit binary operations.
1260 */
1261static const BINU16_T g_aBinU16[] =
1262{
1263 ENTRY(add_u16),
1264 ENTRY(add_u16_locked),
1265 ENTRY(adc_u16),
1266 ENTRY(adc_u16_locked),
1267 ENTRY(sub_u16),
1268 ENTRY(sub_u16_locked),
1269 ENTRY(sbb_u16),
1270 ENTRY(sbb_u16_locked),
1271 ENTRY(or_u16),
1272 ENTRY(or_u16_locked),
1273 ENTRY(xor_u16),
1274 ENTRY(xor_u16_locked),
1275 ENTRY(and_u16),
1276 ENTRY(and_u16_locked),
1277 ENTRY(cmp_u16),
1278 ENTRY(test_u16),
1279 ENTRY_EX(bt_u16, 1),
1280 ENTRY_EX(btc_u16, 1),
1281 ENTRY_EX(btc_u16_locked, 1),
1282 ENTRY_EX(btr_u16, 1),
1283 ENTRY_EX(btr_u16_locked, 1),
1284 ENTRY_EX(bts_u16, 1),
1285 ENTRY_EX(bts_u16_locked, 1),
1286 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1287 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1288 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1289 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1290 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1291 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1292 ENTRY(arpl),
1293};
1294TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1295
1296
1297/*
1298 * 32-bit binary operations.
1299 */
1300static const BINU32_T g_aBinU32[] =
1301{
1302 ENTRY(add_u32),
1303 ENTRY(add_u32_locked),
1304 ENTRY(adc_u32),
1305 ENTRY(adc_u32_locked),
1306 ENTRY(sub_u32),
1307 ENTRY(sub_u32_locked),
1308 ENTRY(sbb_u32),
1309 ENTRY(sbb_u32_locked),
1310 ENTRY(or_u32),
1311 ENTRY(or_u32_locked),
1312 ENTRY(xor_u32),
1313 ENTRY(xor_u32_locked),
1314 ENTRY(and_u32),
1315 ENTRY(and_u32_locked),
1316 ENTRY(cmp_u32),
1317 ENTRY(test_u32),
1318 ENTRY_EX(bt_u32, 1),
1319 ENTRY_EX(btc_u32, 1),
1320 ENTRY_EX(btc_u32_locked, 1),
1321 ENTRY_EX(btr_u32, 1),
1322 ENTRY_EX(btr_u32_locked, 1),
1323 ENTRY_EX(bts_u32, 1),
1324 ENTRY_EX(bts_u32_locked, 1),
1325 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1326 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1327 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1328 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1329 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1330 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1331};
1332TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1333
1334
1335/*
1336 * 64-bit binary operations.
1337 */
1338static const BINU64_T g_aBinU64[] =
1339{
1340 ENTRY(add_u64),
1341 ENTRY(add_u64_locked),
1342 ENTRY(adc_u64),
1343 ENTRY(adc_u64_locked),
1344 ENTRY(sub_u64),
1345 ENTRY(sub_u64_locked),
1346 ENTRY(sbb_u64),
1347 ENTRY(sbb_u64_locked),
1348 ENTRY(or_u64),
1349 ENTRY(or_u64_locked),
1350 ENTRY(xor_u64),
1351 ENTRY(xor_u64_locked),
1352 ENTRY(and_u64),
1353 ENTRY(and_u64_locked),
1354 ENTRY(cmp_u64),
1355 ENTRY(test_u64),
1356 ENTRY_EX(bt_u64, 1),
1357 ENTRY_EX(btc_u64, 1),
1358 ENTRY_EX(btc_u64_locked, 1),
1359 ENTRY_EX(btr_u64, 1),
1360 ENTRY_EX(btr_u64_locked, 1),
1361 ENTRY_EX(bts_u64, 1),
1362 ENTRY_EX(bts_u64_locked, 1),
1363 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1364 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1365 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1366 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1367 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1368 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1369};
1370TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1371
1372
1373/*
1374 * XCHG
1375 */
1376static void XchgTest(void)
1377{
1378 if (!SubTestAndCheckIfEnabled("xchg"))
1379 return;
1380 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1381 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1382 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1383 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1384
1385 static struct
1386 {
1387 uint8_t cb; uint64_t fMask;
1388 union
1389 {
1390 uintptr_t pfn;
1391 FNIEMAIMPLXCHGU8 *pfnU8;
1392 FNIEMAIMPLXCHGU16 *pfnU16;
1393 FNIEMAIMPLXCHGU32 *pfnU32;
1394 FNIEMAIMPLXCHGU64 *pfnU64;
1395 } u;
1396 }
1397 s_aXchgWorkers[] =
1398 {
1399 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1400 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1401 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1402 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1403 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1404 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1405 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1406 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1407 };
1408 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1409 {
1410 RTUINT64U uIn1, uIn2, uMem, uDst;
1411 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1412 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1413 if (uIn1.u == uIn2.u)
1414 uDst.u = uIn2.u = ~uIn2.u;
1415
1416 switch (s_aXchgWorkers[i].cb)
1417 {
1418 case 1:
1419 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1420 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1421 break;
1422 case 2:
1423 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1424 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1425 break;
1426 case 4:
1427 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1428 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1429 break;
1430 case 8:
1431 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1432 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1433 break;
1434 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1435 }
1436
1437 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1438 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1439 }
1440}
1441
1442
1443/*
1444 * XADD
1445 */
1446static void XaddTest(void)
1447{
1448#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1449 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1450 static struct \
1451 { \
1452 const char *pszName; \
1453 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1454 BINU ## a_cBits ## _TEST_T const *paTests; \
1455 uint32_t const *pcTests; \
1456 } const s_aFuncs[] = \
1457 { \
1458 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1459 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1460 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1461 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1462 }; \
1463 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1464 { \
1465 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1466 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1467 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1468 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1469 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1470 { \
1471 uint32_t fEfl = paTests[iTest].fEflIn; \
1472 a_Type uSrc = paTests[iTest].uSrcIn; \
1473 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1474 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1475 if ( fEfl != paTests[iTest].fEflOut \
1476 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1477 || uSrc != paTests[iTest].uDstIn) \
1478 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1479 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1480 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1481 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1482 } \
1483 } \
1484 } while(0)
1485 TEST_XADD(8, uint8_t, "%#04x");
1486 TEST_XADD(16, uint16_t, "%#06x");
1487 TEST_XADD(32, uint32_t, "%#010RX32");
1488 TEST_XADD(64, uint64_t, "%#010RX64");
1489}
1490
1491
1492/*
1493 * CMPXCHG
1494 */
1495
1496static void CmpXchgTest(void)
1497{
1498#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1499 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1500 static struct \
1501 { \
1502 const char *pszName; \
1503 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1504 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1505 BINU ## a_cBits ## _TEST_T const *paTests; \
1506 uint32_t const *pcTests; \
1507 } const s_aFuncs[] = \
1508 { \
1509 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1510 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1511 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1512 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1513 }; \
1514 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1515 { \
1516 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1517 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1518 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1519 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1520 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1521 { \
1522 /* as is (99% likely to be negative). */ \
1523 uint32_t fEfl = paTests[iTest].fEflIn; \
1524 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1525 a_Type uA = paTests[iTest].uDstIn; \
1526 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1527 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1528 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1529 if ( fEfl != paTests[iTest].fEflOut \
1530 || *g_pu ## a_cBits != uExpect \
1531 || uA != paTests[iTest].uSrcIn) \
1532 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1533 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1534 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1535 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1536 /* positive */ \
1537 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1538 uA = paTests[iTest].uDstIn; \
1539 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1540 fEfl = paTests[iTest].fEflIn; \
1541 uA = paTests[iTest].uDstIn; \
1542 *g_pu ## a_cBits = uA; \
1543 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1544 if ( fEfl != fEflExpect \
1545 || *g_pu ## a_cBits != uNew \
1546 || uA != paTests[iTest].uDstIn) \
1547 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1548 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1549 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1550 EFlagsDiff(fEfl, fEflExpect)); \
1551 } \
1552 } \
1553 } while(0)
1554 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1555 TEST_CMPXCHG(16, uint16_t, "%#06x");
1556 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1557#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1558 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1559#endif
1560}
1561
1562static void CmpXchg8bTest(void)
1563{
1564 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1565 static struct
1566 {
1567 const char *pszName;
1568 FNIEMAIMPLCMPXCHG8B *pfn;
1569 } const s_aFuncs[] =
1570 {
1571 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1572 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1573 };
1574 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1575 {
1576 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1577 continue;
1578 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1579 {
1580 uint64_t const uOldValue = RandU64();
1581 uint64_t const uNewValue = RandU64();
1582
1583 /* positive test. */
1584 RTUINT64U uA, uB;
1585 uB.u = uNewValue;
1586 uA.u = uOldValue;
1587 *g_pu64 = uOldValue;
1588 uint32_t fEflIn = RandEFlags();
1589 uint32_t fEfl = fEflIn;
1590 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1591 if ( fEfl != (fEflIn | X86_EFL_ZF)
1592 || *g_pu64 != uNewValue
1593 || uA.u != uOldValue)
1594 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1595 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1596 fEfl, *g_pu64, uA.u,
1597 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1598 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1599
1600 /* negative */
1601 uint64_t const uExpect = ~uOldValue;
1602 *g_pu64 = uExpect;
1603 uA.u = uOldValue;
1604 uB.u = uNewValue;
1605 fEfl = fEflIn = RandEFlags();
1606 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1607 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1608 || *g_pu64 != uExpect
1609 || uA.u != uExpect)
1610 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1611 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1612 fEfl, *g_pu64, uA.u,
1613 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1614 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1615 }
1616 }
1617}
1618
1619static void CmpXchg16bTest(void)
1620{
1621 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1622 static struct
1623 {
1624 const char *pszName;
1625 FNIEMAIMPLCMPXCHG16B *pfn;
1626 } const s_aFuncs[] =
1627 {
1628 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1629 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1630#if !defined(RT_ARCH_ARM64)
1631 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1632#endif
1633 };
1634 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1635 {
1636 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1637 continue;
1638#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1639 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1640 {
1641 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1642 continue;
1643 }
1644#endif
1645 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1646 {
1647 RTUINT128U const uOldValue = RandU128();
1648 RTUINT128U const uNewValue = RandU128();
1649
1650 /* positive test. */
1651 RTUINT128U uA, uB;
1652 uB = uNewValue;
1653 uA = uOldValue;
1654 *g_pu128 = uOldValue;
1655 uint32_t fEflIn = RandEFlags();
1656 uint32_t fEfl = fEflIn;
1657 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1658 if ( fEfl != (fEflIn | X86_EFL_ZF)
1659 || g_pu128->s.Lo != uNewValue.s.Lo
1660 || g_pu128->s.Hi != uNewValue.s.Hi
1661 || uA.s.Lo != uOldValue.s.Lo
1662 || uA.s.Hi != uOldValue.s.Hi)
1663 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1664 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1665 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1666 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1667 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1668 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1669 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1670 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1671
1672 /* negative */
1673 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1674 *g_pu128 = uExpect;
1675 uA = uOldValue;
1676 uB = uNewValue;
1677 fEfl = fEflIn = RandEFlags();
1678 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1679 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1680 || g_pu128->s.Lo != uExpect.s.Lo
1681 || g_pu128->s.Hi != uExpect.s.Hi
1682 || uA.s.Lo != uExpect.s.Lo
1683 || uA.s.Hi != uExpect.s.Hi)
1684 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1685 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1686 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1687 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1688 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1689 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1690 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1691 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1692 }
1693 }
1694}
1695
1696
1697/*
1698 * Double shifts.
1699 *
1700 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1701 */
1702#ifdef TSTIEMAIMPL_WITH_GENERATOR
1703# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1704void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1705{ \
1706 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1707 { \
1708 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1709 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1710 continue; \
1711 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1712 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1713 { \
1714 a_TestType Test; \
1715 Test.fEflIn = RandEFlags(); \
1716 Test.fEflOut = Test.fEflIn; \
1717 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1718 Test.uDstOut = Test.uDstIn; \
1719 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1720 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1721 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1722 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1723 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1724 } \
1725 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1726 } \
1727}
1728#else
1729# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1730#endif
1731
1732#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1733TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1734\
1735static a_SubTestType const a_aSubTests[] = \
1736{ \
1737 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1738 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1739 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1740 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1741}; \
1742\
1743GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1744\
1745static void ShiftDblU ## a_cBits ## Test(void) \
1746{ \
1747 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1748 { \
1749 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1750 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1751 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1752 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1753 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1754 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1755 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1756 { \
1757 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1758 { \
1759 uint32_t fEfl = paTests[iTest].fEflIn; \
1760 a_Type uDst = paTests[iTest].uDstIn; \
1761 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1762 if ( uDst != paTests[iTest].uDstOut \
1763 || fEfl != paTests[iTest].fEflOut) \
1764 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1765 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1766 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1767 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1768 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1769 else \
1770 { \
1771 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1772 *g_pfEfl = paTests[iTest].fEflIn; \
1773 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1774 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1775 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1776 } \
1777 } \
1778 pfn = a_aSubTests[iFn].pfnNative; \
1779 } \
1780 } \
1781}
1782TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1783TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1784TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1785
1786#ifdef TSTIEMAIMPL_WITH_GENERATOR
1787static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1788{
1789 ShiftDblU16Generate(pOut, cTests);
1790 ShiftDblU32Generate(pOut, cTests);
1791 ShiftDblU64Generate(pOut, cTests);
1792}
1793#endif
1794
1795static void ShiftDblTest(void)
1796{
1797 ShiftDblU16Test();
1798 ShiftDblU32Test();
1799 ShiftDblU64Test();
1800}
1801
1802
1803/*
1804 * Unary operators.
1805 *
1806 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1807 */
1808#ifdef TSTIEMAIMPL_WITH_GENERATOR
1809# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1810void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1811{ \
1812 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1813 { \
1814 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1815 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1816 { \
1817 a_TestType Test; \
1818 Test.fEflIn = RandEFlags(); \
1819 Test.fEflOut = Test.fEflIn; \
1820 Test.uDstIn = RandU ## a_cBits(); \
1821 Test.uDstOut = Test.uDstIn; \
1822 Test.uSrcIn = 0; \
1823 Test.uMisc = 0; \
1824 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1825 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1826 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1827 } \
1828 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1829 } \
1830}
1831#else
1832# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1833#endif
1834
1835#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1836TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1837static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1838{ \
1839 ENTRY(inc_u ## a_cBits), \
1840 ENTRY(inc_u ## a_cBits ## _locked), \
1841 ENTRY(dec_u ## a_cBits), \
1842 ENTRY(dec_u ## a_cBits ## _locked), \
1843 ENTRY(not_u ## a_cBits), \
1844 ENTRY(not_u ## a_cBits ## _locked), \
1845 ENTRY(neg_u ## a_cBits), \
1846 ENTRY(neg_u ## a_cBits ## _locked), \
1847}; \
1848\
1849GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1850\
1851static void UnaryU ## a_cBits ## Test(void) \
1852{ \
1853 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1854 { \
1855 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1856 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1857 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1858 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1859 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1860 { \
1861 uint32_t fEfl = paTests[iTest].fEflIn; \
1862 a_Type uDst = paTests[iTest].uDstIn; \
1863 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1864 if ( uDst != paTests[iTest].uDstOut \
1865 || fEfl != paTests[iTest].fEflOut) \
1866 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1867 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1868 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1869 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1870 else \
1871 { \
1872 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1873 *g_pfEfl = paTests[iTest].fEflIn; \
1874 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1875 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1876 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1877 } \
1878 } \
1879 } \
1880}
1881TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1882TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1883TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1884TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1885
1886#ifdef TSTIEMAIMPL_WITH_GENERATOR
1887static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1888{
1889 UnaryU8Generate(pOut, cTests);
1890 UnaryU16Generate(pOut, cTests);
1891 UnaryU32Generate(pOut, cTests);
1892 UnaryU64Generate(pOut, cTests);
1893}
1894#endif
1895
1896static void UnaryTest(void)
1897{
1898 UnaryU8Test();
1899 UnaryU16Test();
1900 UnaryU32Test();
1901 UnaryU64Test();
1902}
1903
1904
1905/*
1906 * Shifts.
1907 *
1908 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
1909 */
1910#ifdef TSTIEMAIMPL_WITH_GENERATOR
1911# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1912void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1913{ \
1914 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1915 { \
1916 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1917 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1918 continue; \
1919 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1920 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1921 { \
1922 a_TestType Test; \
1923 Test.fEflIn = RandEFlags(); \
1924 Test.fEflOut = Test.fEflIn; \
1925 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1926 Test.uDstOut = Test.uDstIn; \
1927 Test.uSrcIn = 0; \
1928 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1929 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1930 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
1931 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1932 \
1933 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
1934 Test.fEflOut = Test.fEflIn; \
1935 Test.uDstOut = Test.uDstIn; \
1936 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1937 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
1938 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1939 } \
1940 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1941 } \
1942}
1943#else
1944# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1945#endif
1946
1947#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1948TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
1949static a_SubTestType const a_aSubTests[] = \
1950{ \
1951 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
1952 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
1953 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
1954 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
1955 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
1956 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
1957 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
1958 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
1959 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1960 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1961 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1962 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1963 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1964 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1965}; \
1966\
1967GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1968\
1969static void ShiftU ## a_cBits ## Test(void) \
1970{ \
1971 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1972 { \
1973 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1974 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1975 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1976 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1977 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1978 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1979 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1980 { \
1981 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1982 { \
1983 uint32_t fEfl = paTests[iTest].fEflIn; \
1984 a_Type uDst = paTests[iTest].uDstIn; \
1985 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
1986 if ( uDst != paTests[iTest].uDstOut \
1987 || fEfl != paTests[iTest].fEflOut ) \
1988 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1989 iTest, iVar == 0 ? "" : "/n", \
1990 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
1991 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1992 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1993 else \
1994 { \
1995 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1996 *g_pfEfl = paTests[iTest].fEflIn; \
1997 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
1998 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1999 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2000 } \
2001 } \
2002 pfn = a_aSubTests[iFn].pfnNative; \
2003 } \
2004 } \
2005}
2006TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2007TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2008TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2009TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2010
2011#ifdef TSTIEMAIMPL_WITH_GENERATOR
2012static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2013{
2014 ShiftU8Generate(pOut, cTests);
2015 ShiftU16Generate(pOut, cTests);
2016 ShiftU32Generate(pOut, cTests);
2017 ShiftU64Generate(pOut, cTests);
2018}
2019#endif
2020
2021static void ShiftTest(void)
2022{
2023 ShiftU8Test();
2024 ShiftU16Test();
2025 ShiftU32Test();
2026 ShiftU64Test();
2027}
2028
2029
2030/*
2031 * Multiplication and division.
2032 *
2033 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2034 * Note! Currently ignoring undefined bits.
2035 */
2036
2037/* U8 */
2038TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2039static INT_MULDIV_U8_T const g_aMulDivU8[] =
2040{
2041 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2042 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2043 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2044 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2045 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2046 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2047 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2048 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2049 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2050 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2051};
2052
2053#ifdef TSTIEMAIMPL_WITH_GENERATOR
2054static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2055{
2056 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2057 {
2058 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2059 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2060 continue;
2061 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2062 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2063 {
2064 MULDIVU8_TEST_T Test;
2065 Test.fEflIn = RandEFlags();
2066 Test.fEflOut = Test.fEflIn;
2067 Test.uDstIn = RandU16Dst(iTest);
2068 Test.uDstOut = Test.uDstIn;
2069 Test.uSrcIn = RandU8Src(iTest);
2070 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2071 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2072 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2073 }
2074 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2075 }
2076}
2077#endif
2078
2079static void MulDivU8Test(void)
2080{
2081 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2082 {
2083 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2084 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2085 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2086 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2087 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2088 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2089 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2090 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2091 {
2092 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2093 {
2094 uint32_t fEfl = paTests[iTest].fEflIn;
2095 uint16_t uDst = paTests[iTest].uDstIn;
2096 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2097 if ( uDst != paTests[iTest].uDstOut
2098 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2099 || rc != paTests[iTest].rc)
2100 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2101 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2102 "%sexpected %#08x %#06RX16 %d%s\n",
2103 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2104 iVar ? " " : "", fEfl, uDst, rc,
2105 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2106 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2107 else
2108 {
2109 *g_pu16 = paTests[iTest].uDstIn;
2110 *g_pfEfl = paTests[iTest].fEflIn;
2111 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2112 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2113 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2114 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2115 }
2116 }
2117 pfn = g_aMulDivU8[iFn].pfnNative;
2118 }
2119 }
2120}
2121
2122#ifdef TSTIEMAIMPL_WITH_GENERATOR
2123# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2124void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2125{ \
2126 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2127 { \
2128 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2129 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2130 continue; \
2131 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2132 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2133 { \
2134 a_TestType Test; \
2135 Test.fEflIn = RandEFlags(); \
2136 Test.fEflOut = Test.fEflIn; \
2137 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2138 Test.uDst1Out = Test.uDst1In; \
2139 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2140 Test.uDst2Out = Test.uDst2In; \
2141 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2142 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2143 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2144 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2145 Test.rc, iTest); \
2146 } \
2147 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2148 } \
2149}
2150#else
2151# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2152#endif
2153
2154#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2155TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2156static a_SubTestType const a_aSubTests [] = \
2157{ \
2158 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2159 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2160 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2161 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2162 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2163 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2164 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2165 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2166}; \
2167\
2168GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2169\
2170static void MulDivU ## a_cBits ## Test(void) \
2171{ \
2172 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2173 { \
2174 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2175 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2176 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2177 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2178 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2179 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2180 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2181 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2182 { \
2183 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2184 { \
2185 uint32_t fEfl = paTests[iTest].fEflIn; \
2186 a_Type uDst1 = paTests[iTest].uDst1In; \
2187 a_Type uDst2 = paTests[iTest].uDst2In; \
2188 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2189 if ( uDst1 != paTests[iTest].uDst1Out \
2190 || uDst2 != paTests[iTest].uDst2Out \
2191 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2192 || rc != paTests[iTest].rc) \
2193 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2194 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2195 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2196 iTest, iVar == 0 ? "" : "/n", \
2197 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2198 fEfl, uDst1, uDst2, rc, \
2199 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2200 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2201 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2202 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2203 else \
2204 { \
2205 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2206 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2207 *g_pfEfl = paTests[iTest].fEflIn; \
2208 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2209 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2210 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2211 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2212 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2213 } \
2214 } \
2215 pfn = a_aSubTests[iFn].pfnNative; \
2216 } \
2217 } \
2218}
2219TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2220TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2221TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2222
2223#ifdef TSTIEMAIMPL_WITH_GENERATOR
2224static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2225{
2226 MulDivU8Generate(pOut, cTests);
2227 MulDivU16Generate(pOut, cTests);
2228 MulDivU32Generate(pOut, cTests);
2229 MulDivU64Generate(pOut, cTests);
2230}
2231#endif
2232
2233static void MulDivTest(void)
2234{
2235 MulDivU8Test();
2236 MulDivU16Test();
2237 MulDivU32Test();
2238 MulDivU64Test();
2239}
2240
2241
2242/*
2243 * BSWAP
2244 */
2245static void BswapTest(void)
2246{
2247 if (SubTestAndCheckIfEnabled("bswap_u16"))
2248 {
2249 *g_pu32 = UINT32_C(0x12345678);
2250 iemAImpl_bswap_u16(g_pu32);
2251#if 0
2252 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2253#else
2254 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2255#endif
2256 *g_pu32 = UINT32_C(0xffff1122);
2257 iemAImpl_bswap_u16(g_pu32);
2258#if 0
2259 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2260#else
2261 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2262#endif
2263 }
2264
2265 if (SubTestAndCheckIfEnabled("bswap_u32"))
2266 {
2267 *g_pu32 = UINT32_C(0x12345678);
2268 iemAImpl_bswap_u32(g_pu32);
2269 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2270 }
2271
2272 if (SubTestAndCheckIfEnabled("bswap_u64"))
2273 {
2274 *g_pu64 = UINT64_C(0x0123456789abcdef);
2275 iemAImpl_bswap_u64(g_pu64);
2276 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2277 }
2278}
2279
2280
2281
2282/*********************************************************************************************************************************
2283* Floating point (x87 style) *
2284*********************************************************************************************************************************/
2285
2286/*
2287 * FPU constant loading.
2288 */
2289TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2290
2291static const FPU_LD_CONST_T g_aFpuLdConst[] =
2292{
2293 ENTRY(fld1),
2294 ENTRY(fldl2t),
2295 ENTRY(fldl2e),
2296 ENTRY(fldpi),
2297 ENTRY(fldlg2),
2298 ENTRY(fldln2),
2299 ENTRY(fldz),
2300};
2301
2302#ifdef TSTIEMAIMPL_WITH_GENERATOR
2303static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2304{
2305 X86FXSTATE State;
2306 RT_ZERO(State);
2307 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2308 {
2309 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2310 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2311 {
2312 State.FCW = RandFcw();
2313 State.FSW = RandFsw();
2314
2315 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2316 {
2317 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2318 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2319 g_aFpuLdConst[iFn].pfn(&State, &Res);
2320 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2321 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2322 }
2323 }
2324 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2325 }
2326}
2327#endif
2328
2329static void FpuLoadConstTest(void)
2330{
2331 /*
2332 * Inputs:
2333 * - FSW: C0, C1, C2, C3
2334 * - FCW: Exception masks, Precision control, Rounding control.
2335 *
2336 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2337 */
2338 X86FXSTATE State;
2339 RT_ZERO(State);
2340 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2341 {
2342 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2343 continue;
2344
2345 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2346 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2347 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2348 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2349 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2350 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2351 {
2352 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2353 {
2354 State.FCW = paTests[iTest].fFcw;
2355 State.FSW = paTests[iTest].fFswIn;
2356 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2357 pfn(&State, &Res);
2358 if ( Res.FSW != paTests[iTest].fFswOut
2359 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2360 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2361 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2362 Res.FSW, FormatR80(&Res.r80Result),
2363 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2364 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2365 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2366 FormatFcw(paTests[iTest].fFcw) );
2367 }
2368 pfn = g_aFpuLdConst[iFn].pfnNative;
2369 }
2370 }
2371}
2372
2373
2374/*
2375 * Load floating point values from memory.
2376 */
2377#ifdef TSTIEMAIMPL_WITH_GENERATOR
2378# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2379static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2380{ \
2381 X86FXSTATE State; \
2382 RT_ZERO(State); \
2383 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2384 { \
2385 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2386 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2387 { \
2388 State.FCW = RandFcw(); \
2389 State.FSW = RandFsw(); \
2390 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2391 \
2392 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2393 { \
2394 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2395 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2396 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2397 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2398 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2399 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2400 } \
2401 } \
2402 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2403 } \
2404}
2405#else
2406# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2407#endif
2408
2409#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2410typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2411typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2412TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2413\
2414static const a_SubTestType a_aSubTests[] = \
2415{ \
2416 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2417}; \
2418GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2419\
2420static void FpuLdR ## a_cBits ## Test(void) \
2421{ \
2422 X86FXSTATE State; \
2423 RT_ZERO(State); \
2424 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2425 { \
2426 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2427 \
2428 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2429 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2430 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2431 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2432 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2433 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2434 { \
2435 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2436 { \
2437 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2438 State.FCW = paTests[iTest].fFcw; \
2439 State.FSW = paTests[iTest].fFswIn; \
2440 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2441 pfn(&State, &Res, &InVal); \
2442 if ( Res.FSW != paTests[iTest].fFswOut \
2443 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2444 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2445 "%s -> fsw=%#06x %s\n" \
2446 "%s expected %#06x %s%s%s (%s)\n", \
2447 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2448 FormatR ## a_cBits(&paTests[iTest].InVal), \
2449 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2450 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2451 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2452 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2453 FormatFcw(paTests[iTest].fFcw) ); \
2454 } \
2455 pfn = a_aSubTests[iFn].pfnNative; \
2456 } \
2457 } \
2458}
2459
2460TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2461TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2462TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2463
2464#ifdef TSTIEMAIMPL_WITH_GENERATOR
2465static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2466{
2467 FpuLdR80Generate(pOut, cTests);
2468 FpuLdR64Generate(pOut, cTests);
2469 FpuLdR32Generate(pOut, cTests);
2470}
2471#endif
2472
2473static void FpuLdMemTest(void)
2474{
2475 FpuLdR80Test();
2476 FpuLdR64Test();
2477 FpuLdR32Test();
2478}
2479
2480
2481/*
2482 * Load integer values from memory.
2483 */
2484#ifdef TSTIEMAIMPL_WITH_GENERATOR
2485# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2486static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2487{ \
2488 X86FXSTATE State; \
2489 RT_ZERO(State); \
2490 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2491 { \
2492 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2493 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2494 { \
2495 State.FCW = RandFcw(); \
2496 State.FSW = RandFsw(); \
2497 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2498 \
2499 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2500 { \
2501 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2502 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2503 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2504 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2505 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2506 } \
2507 } \
2508 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2509 } \
2510}
2511#else
2512# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2513#endif
2514
2515#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2516typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2517typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2518TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2519\
2520static const a_SubTestType a_aSubTests[] = \
2521{ \
2522 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2523}; \
2524GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2525\
2526static void FpuLdI ## a_cBits ## Test(void) \
2527{ \
2528 X86FXSTATE State; \
2529 RT_ZERO(State); \
2530 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2531 { \
2532 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2533 \
2534 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2535 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2536 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2537 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2538 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2539 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2540 { \
2541 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2542 { \
2543 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2544 State.FCW = paTests[iTest].fFcw; \
2545 State.FSW = paTests[iTest].fFswIn; \
2546 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2547 pfn(&State, &Res, &iInVal); \
2548 if ( Res.FSW != paTests[iTest].fFswOut \
2549 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2550 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2551 "%s -> fsw=%#06x %s\n" \
2552 "%s expected %#06x %s%s%s (%s)\n", \
2553 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2554 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2555 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2556 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2557 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2558 FormatFcw(paTests[iTest].fFcw) ); \
2559 } \
2560 pfn = a_aSubTests[iFn].pfnNative; \
2561 } \
2562 } \
2563}
2564
2565TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2566TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2567TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2568
2569#ifdef TSTIEMAIMPL_WITH_GENERATOR
2570static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2571{
2572 FpuLdI64Generate(pOut, cTests);
2573 FpuLdI32Generate(pOut, cTests);
2574 FpuLdI16Generate(pOut, cTests);
2575}
2576#endif
2577
2578static void FpuLdIntTest(void)
2579{
2580 FpuLdI64Test();
2581 FpuLdI32Test();
2582 FpuLdI16Test();
2583}
2584
2585
2586/*
2587 * Load binary coded decimal values from memory.
2588 */
2589typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2590typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2591TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2592
2593static const FPU_LD_D80_T g_aFpuLdD80[] =
2594{
2595 ENTRY(fld_r80_from_d80)
2596};
2597
2598#ifdef TSTIEMAIMPL_WITH_GENERATOR
2599static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2600{
2601 X86FXSTATE State;
2602 RT_ZERO(State);
2603 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2604 {
2605 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2606 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2607 {
2608 State.FCW = RandFcw();
2609 State.FSW = RandFsw();
2610 RTPBCD80U InVal = RandD80Src(iTest);
2611
2612 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2613 {
2614 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2615 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2616 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2617 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2618 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2619 iTest, iRounding);
2620 }
2621 }
2622 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2623 }
2624}
2625#endif
2626
2627static void FpuLdD80Test(void)
2628{
2629 X86FXSTATE State;
2630 RT_ZERO(State);
2631 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2632 {
2633 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2634 continue;
2635
2636 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2637 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2638 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2639 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2640 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2641 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2642 {
2643 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2644 {
2645 RTPBCD80U const InVal = paTests[iTest].InVal;
2646 State.FCW = paTests[iTest].fFcw;
2647 State.FSW = paTests[iTest].fFswIn;
2648 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2649 pfn(&State, &Res, &InVal);
2650 if ( Res.FSW != paTests[iTest].fFswOut
2651 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2652 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2653 "%s -> fsw=%#06x %s\n"
2654 "%s expected %#06x %s%s%s (%s)\n",
2655 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2656 FormatD80(&paTests[iTest].InVal),
2657 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2658 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2659 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2660 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2661 FormatFcw(paTests[iTest].fFcw) );
2662 }
2663 pfn = g_aFpuLdD80[iFn].pfnNative;
2664 }
2665 }
2666}
2667
2668
2669/*
2670 * Store values floating point values to memory.
2671 */
2672#ifdef TSTIEMAIMPL_WITH_GENERATOR
2673static const RTFLOAT80U g_aFpuStR32Specials[] =
2674{
2675 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2676 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2677 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2678 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2679};
2680static const RTFLOAT80U g_aFpuStR64Specials[] =
2681{
2682 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2683 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2684 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2685 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2686 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2687};
2688static const RTFLOAT80U g_aFpuStR80Specials[] =
2689{
2690 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2691};
2692# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2693static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2694{ \
2695 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2696 X86FXSTATE State; \
2697 RT_ZERO(State); \
2698 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2699 { \
2700 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2701 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2702 { \
2703 uint16_t const fFcw = RandFcw(); \
2704 State.FSW = RandFsw(); \
2705 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2706 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2707 \
2708 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2709 { \
2710 /* PC doesn't influence these, so leave as is. */ \
2711 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2712 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2713 { \
2714 uint16_t uFswOut = 0; \
2715 a_rdType OutVal; \
2716 RT_ZERO(OutVal); \
2717 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2718 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2719 | (iRounding << X86_FCW_RC_SHIFT); \
2720 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2721 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2722 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2723 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2724 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2725 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2726 } \
2727 } \
2728 } \
2729 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2730 } \
2731}
2732#else
2733# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2734#endif
2735
2736#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2737typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2738 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2739typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2740TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2741\
2742static const a_SubTestType a_aSubTests[] = \
2743{ \
2744 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2745}; \
2746GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2747\
2748static void FpuStR ## a_cBits ## Test(void) \
2749{ \
2750 X86FXSTATE State; \
2751 RT_ZERO(State); \
2752 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2753 { \
2754 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2755 \
2756 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2757 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2758 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2759 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2760 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2761 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2762 { \
2763 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2764 { \
2765 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2766 uint16_t uFswOut = 0; \
2767 a_rdType OutVal; \
2768 RT_ZERO(OutVal); \
2769 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2770 State.FCW = paTests[iTest].fFcw; \
2771 State.FSW = paTests[iTest].fFswIn; \
2772 pfn(&State, &uFswOut, &OutVal, &InVal); \
2773 if ( uFswOut != paTests[iTest].fFswOut \
2774 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2775 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2776 "%s -> fsw=%#06x %s\n" \
2777 "%s expected %#06x %s%s%s (%s)\n", \
2778 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2779 FormatR80(&paTests[iTest].InVal), \
2780 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2781 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2782 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2783 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2784 FormatFcw(paTests[iTest].fFcw) ); \
2785 } \
2786 pfn = a_aSubTests[iFn].pfnNative; \
2787 } \
2788 } \
2789}
2790
2791TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2792TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2793TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2794
2795#ifdef TSTIEMAIMPL_WITH_GENERATOR
2796static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2797{
2798 FpuStR80Generate(pOut, cTests);
2799 FpuStR64Generate(pOut, cTests);
2800 FpuStR32Generate(pOut, cTests);
2801}
2802#endif
2803
2804static void FpuStMemTest(void)
2805{
2806 FpuStR80Test();
2807 FpuStR64Test();
2808 FpuStR32Test();
2809}
2810
2811
2812/*
2813 * Store integer values to memory or register.
2814 */
2815TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2816TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2817TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2818
2819static const FPU_ST_I16_T g_aFpuStI16[] =
2820{
2821 ENTRY(fist_r80_to_i16),
2822 ENTRY_AMD( fistt_r80_to_i16, 0),
2823 ENTRY_INTEL(fistt_r80_to_i16, 0),
2824};
2825static const FPU_ST_I32_T g_aFpuStI32[] =
2826{
2827 ENTRY(fist_r80_to_i32),
2828 ENTRY(fistt_r80_to_i32),
2829};
2830static const FPU_ST_I64_T g_aFpuStI64[] =
2831{
2832 ENTRY(fist_r80_to_i64),
2833 ENTRY(fistt_r80_to_i64),
2834};
2835
2836#ifdef TSTIEMAIMPL_WITH_GENERATOR
2837static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2838{
2839 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2840 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2841 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2842 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2843 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2844 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2845 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2846 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2847 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2848 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2849 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2850 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2851 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2852 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2853 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2854 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2855 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2856 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2857 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2858 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2859 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2860 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2861 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2862 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2863 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2864 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2865 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2866 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2867 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2868 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2869 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2870 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2871 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2872 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2873 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2874 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2875 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2876 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2877 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2878 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2879 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2880 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2881 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2882 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2883 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2884 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2885 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2886};
2887static const RTFLOAT80U g_aFpuStI32Specials[] =
2888{
2889 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2890 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2891 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2892 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2893 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2894 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2895 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2896 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2897 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2898 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2899 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2900 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2901 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2902 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2903 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2904 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2905};
2906static const RTFLOAT80U g_aFpuStI64Specials[] =
2907{
2908 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
2909 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
2910 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2911 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2912 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2913 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2914 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2915 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
2916 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2917 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2918 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2919 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2920 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2921 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2922 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2923 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2924 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
2925};
2926
2927# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2928static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
2929{ \
2930 X86FXSTATE State; \
2931 RT_ZERO(State); \
2932 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2933 { \
2934 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
2935 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
2936 PRTSTREAM pOutFn = pOut; \
2937 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
2938 { \
2939 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2940 continue; \
2941 pOutFn = pOutCpu; \
2942 } \
2943 \
2944 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
2945 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
2946 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2947 { \
2948 uint16_t const fFcw = RandFcw(); \
2949 State.FSW = RandFsw(); \
2950 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
2951 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
2952 \
2953 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2954 { \
2955 /* PC doesn't influence these, so leave as is. */ \
2956 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2957 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2958 { \
2959 uint16_t uFswOut = 0; \
2960 a_iType iOutVal = ~(a_iType)2; \
2961 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2962 | (iRounding << X86_FCW_RC_SHIFT); \
2963 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2964 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2965 pfn(&State, &uFswOut, &iOutVal, &InVal); \
2966 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2967 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2968 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
2969 } \
2970 } \
2971 } \
2972 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
2973 } \
2974}
2975#else
2976# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
2977#endif
2978
2979#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
2980GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2981\
2982static void FpuStI ## a_cBits ## Test(void) \
2983{ \
2984 X86FXSTATE State; \
2985 RT_ZERO(State); \
2986 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2987 { \
2988 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2989 \
2990 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2991 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2992 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2993 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2994 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2995 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2996 { \
2997 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2998 { \
2999 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3000 uint16_t uFswOut = 0; \
3001 a_iType iOutVal = ~(a_iType)2; \
3002 State.FCW = paTests[iTest].fFcw; \
3003 State.FSW = paTests[iTest].fFswIn; \
3004 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3005 if ( uFswOut != paTests[iTest].fFswOut \
3006 || iOutVal != paTests[iTest].iOutVal) \
3007 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3008 "%s -> fsw=%#06x " a_szFmt "\n" \
3009 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3010 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3011 FormatR80(&paTests[iTest].InVal), \
3012 iVar ? " " : "", uFswOut, iOutVal, \
3013 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3014 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3015 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3016 } \
3017 pfn = a_aSubTests[iFn].pfnNative; \
3018 } \
3019 } \
3020}
3021
3022//fistt_r80_to_i16 diffs for AMD, of course :-)
3023
3024TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3025TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3026TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3027
3028#ifdef TSTIEMAIMPL_WITH_GENERATOR
3029static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3030{
3031 FpuStI64Generate(pOut, pOutCpu, cTests);
3032 FpuStI32Generate(pOut, pOutCpu, cTests);
3033 FpuStI16Generate(pOut, pOutCpu, cTests);
3034}
3035#endif
3036
3037static void FpuStIntTest(void)
3038{
3039 FpuStI64Test();
3040 FpuStI32Test();
3041 FpuStI16Test();
3042}
3043
3044
3045/*
3046 * Store as packed BCD value (memory).
3047 */
3048typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3049typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3050TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3051
3052static const FPU_ST_D80_T g_aFpuStD80[] =
3053{
3054 ENTRY(fst_r80_to_d80),
3055};
3056
3057#ifdef TSTIEMAIMPL_WITH_GENERATOR
3058static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3059{
3060 static RTFLOAT80U const s_aSpecials[] =
3061 {
3062 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3063 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3064 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3065 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3066 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3067 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3068 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3069 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3070 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3071 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3072 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3073 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3074 };
3075
3076 X86FXSTATE State;
3077 RT_ZERO(State);
3078 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3079 {
3080 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3081 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3082 {
3083 uint16_t const fFcw = RandFcw();
3084 State.FSW = RandFsw();
3085 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3086
3087 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3088 {
3089 /* PC doesn't influence these, so leave as is. */
3090 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3091 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3092 {
3093 uint16_t uFswOut = 0;
3094 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3095 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3096 | (iRounding << X86_FCW_RC_SHIFT);
3097 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3098 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3099 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3100 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3101 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3102 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3103 }
3104 }
3105 }
3106 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3107 }
3108}
3109#endif
3110
3111
3112static void FpuStD80Test(void)
3113{
3114 X86FXSTATE State;
3115 RT_ZERO(State);
3116 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3117 {
3118 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3119 continue;
3120
3121 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3122 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3123 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3124 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3125 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3126 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3127 {
3128 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3129 {
3130 RTFLOAT80U const InVal = paTests[iTest].InVal;
3131 uint16_t uFswOut = 0;
3132 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3133 State.FCW = paTests[iTest].fFcw;
3134 State.FSW = paTests[iTest].fFswIn;
3135 pfn(&State, &uFswOut, &OutVal, &InVal);
3136 if ( uFswOut != paTests[iTest].fFswOut
3137 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3138 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3139 "%s -> fsw=%#06x %s\n"
3140 "%s expected %#06x %s%s%s (%s)\n",
3141 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3142 FormatR80(&paTests[iTest].InVal),
3143 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3144 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3145 FswDiff(uFswOut, paTests[iTest].fFswOut),
3146 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3147 FormatFcw(paTests[iTest].fFcw) );
3148 }
3149 pfn = g_aFpuStD80[iFn].pfnNative;
3150 }
3151 }
3152}
3153
3154
3155
3156/*********************************************************************************************************************************
3157* x87 FPU Binary Operations *
3158*********************************************************************************************************************************/
3159
3160/*
3161 * Binary FPU operations on two 80-bit floating point values.
3162 */
3163TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3164enum { kFpuBinaryHint_fprem = 1, };
3165
3166static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3167{
3168 ENTRY(fadd_r80_by_r80),
3169 ENTRY(fsub_r80_by_r80),
3170 ENTRY(fsubr_r80_by_r80),
3171 ENTRY(fmul_r80_by_r80),
3172 ENTRY(fdiv_r80_by_r80),
3173 ENTRY(fdivr_r80_by_r80),
3174 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3175 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3176 ENTRY(fscale_r80_by_r80),
3177 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3178 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3179 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3180 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3181 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3182 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3183};
3184
3185#ifdef TSTIEMAIMPL_WITH_GENERATOR
3186static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3187{
3188 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3189
3190 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3191 {
3192 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3193 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3194 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3195 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3196 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3197 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3198 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3199 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3200 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3201 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3202 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3203 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3204 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3205 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3206 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3207 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3208 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3209 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3210 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3211 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3212 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3213 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3214 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3215 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3216 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3217 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3218 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3219 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3220 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3221 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3222 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3223 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3224 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3225 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3226 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3227 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3228 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3229 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3230 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3231 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3232 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3233 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3234 /* fscale: Negative variants for the essentials of the above. */
3235 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3236 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3237 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3238 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3239 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3240 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3241 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3242 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3243 /* fscale: Some fun with denormals and pseudo-denormals. */
3244 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3245 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3246 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3247 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3248 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3249 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3250 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3251 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3252 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3253 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3254 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3255 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3256 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3257 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3258 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3259 };
3260
3261 X86FXSTATE State;
3262 RT_ZERO(State);
3263 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3264 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3265 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3266 {
3267 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3268 PRTSTREAM pOutFn = pOut;
3269 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3270 {
3271 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3272 continue;
3273 pOutFn = pOutCpu;
3274 }
3275
3276 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3277 uint32_t iTestOutput = 0;
3278 uint32_t cNormalInputPairs = 0;
3279 uint32_t cTargetRangeInputs = 0;
3280 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3281 {
3282 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3283 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3284 bool fTargetRange = false;
3285 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3286 {
3287 cNormalInputPairs++;
3288 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3289 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3290 cTargetRangeInputs += fTargetRange = true;
3291 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3292 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3293 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3294 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3295 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3296 cTargetRangeInputs += fTargetRange = true;
3297 }
3298 }
3299 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3300 {
3301 iTest -= 1;
3302 continue;
3303 }
3304
3305 uint16_t const fFcwExtra = 0;
3306 uint16_t const fFcw = RandFcw();
3307 State.FSW = RandFsw();
3308
3309 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3310 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3311 {
3312 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3313 | (iRounding << X86_FCW_RC_SHIFT)
3314 | (iPrecision << X86_FCW_PC_SHIFT)
3315 | X86_FCW_MASK_ALL;
3316 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3317 pfn(&State, &ResM, &InVal1, &InVal2);
3318 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3319 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3320 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3321
3322 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3323 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3324 pfn(&State, &ResU, &InVal1, &InVal2);
3325 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3326 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3327 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3328
3329 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3330 if (fXcpt)
3331 {
3332 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3333 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3334 pfn(&State, &Res1, &InVal1, &InVal2);
3335 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3336 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3337 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3338 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3339 {
3340 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3341 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3342 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3343 pfn(&State, &Res2, &InVal1, &InVal2);
3344 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3345 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3346 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3347 }
3348 if (!RT_IS_POWER_OF_TWO(fXcpt))
3349 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3350 if (fUnmasked & fXcpt)
3351 {
3352 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3353 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3354 pfn(&State, &Res3, &InVal1, &InVal2);
3355 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3356 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3357 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3358 }
3359 }
3360
3361 /* If the values are in range and caused no exceptions, do the whole series of
3362 partial reminders till we get the non-partial one or run into an exception. */
3363 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3364 {
3365 IEMFPURESULT ResPrev = ResM;
3366 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3367 {
3368 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3369 State.FSW = ResPrev.FSW;
3370 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3371 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3372 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3373 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3374 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3375 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3376 ResPrev = ResSeq;
3377 }
3378 }
3379 }
3380 }
3381 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3382 }
3383}
3384#endif
3385
3386
3387static void FpuBinaryR80Test(void)
3388{
3389 X86FXSTATE State;
3390 RT_ZERO(State);
3391 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3392 {
3393 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3394 continue;
3395
3396 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3397 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3398 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3399 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3400 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3401 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3402 {
3403 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3404 {
3405 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3406 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3407 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3408 State.FCW = paTests[iTest].fFcw;
3409 State.FSW = paTests[iTest].fFswIn;
3410 pfn(&State, &Res, &InVal1, &InVal2);
3411 if ( Res.FSW != paTests[iTest].fFswOut
3412 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3413 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3414 "%s -> fsw=%#06x %s\n"
3415 "%s expected %#06x %s%s%s (%s)\n",
3416 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3417 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3418 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3419 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3420 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3421 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3422 FormatFcw(paTests[iTest].fFcw) );
3423 }
3424 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3425 }
3426 }
3427}
3428
3429
3430/*
3431 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3432 */
3433#define int64_t_IS_NORMAL(a) 1
3434#define int32_t_IS_NORMAL(a) 1
3435#define int16_t_IS_NORMAL(a) 1
3436
3437#ifdef TSTIEMAIMPL_WITH_GENERATOR
3438static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3439{
3440 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3441 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3442};
3443static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3444{
3445 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3446 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3447};
3448static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3449{
3450 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3451};
3452static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3453{
3454 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3455};
3456
3457# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3458static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3459{ \
3460 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3461 \
3462 X86FXSTATE State; \
3463 RT_ZERO(State); \
3464 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3465 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3466 { \
3467 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3468 uint32_t cNormalInputPairs = 0; \
3469 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3470 { \
3471 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3472 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3473 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3474 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3475 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3476 cNormalInputPairs++; \
3477 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3478 { \
3479 iTest -= 1; \
3480 continue; \
3481 } \
3482 \
3483 uint16_t const fFcw = RandFcw(); \
3484 State.FSW = RandFsw(); \
3485 \
3486 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3487 { \
3488 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3489 { \
3490 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3491 { \
3492 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3493 | (iRounding << X86_FCW_RC_SHIFT) \
3494 | (iPrecision << X86_FCW_PC_SHIFT) \
3495 | iMask; \
3496 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3497 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3498 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3499 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3500 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3501 } \
3502 } \
3503 } \
3504 } \
3505 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3506 } \
3507}
3508#else
3509# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3510#endif
3511
3512#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3513TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3514\
3515static const a_SubTestType a_aSubTests[] = \
3516{ \
3517 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3518 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3519 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3520 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3521 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3522 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3523}; \
3524\
3525GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3526\
3527static void FpuBinary ## a_UpBits ## Test(void) \
3528{ \
3529 X86FXSTATE State; \
3530 RT_ZERO(State); \
3531 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3532 { \
3533 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3534 \
3535 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3536 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3537 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3538 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3539 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3540 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3541 { \
3542 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3543 { \
3544 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3545 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3546 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3547 State.FCW = paTests[iTest].fFcw; \
3548 State.FSW = paTests[iTest].fFswIn; \
3549 pfn(&State, &Res, &InVal1, &InVal2); \
3550 if ( Res.FSW != paTests[iTest].fFswOut \
3551 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3552 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3553 "%s -> fsw=%#06x %s\n" \
3554 "%s expected %#06x %s%s%s (%s)\n", \
3555 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3556 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3557 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3558 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3559 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3560 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3561 FormatFcw(paTests[iTest].fFcw) ); \
3562 } \
3563 pfn = a_aSubTests[iFn].pfnNative; \
3564 } \
3565 } \
3566}
3567
3568TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3569TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3570TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3571TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3572
3573
3574/*
3575 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3576 */
3577#ifdef TSTIEMAIMPL_WITH_GENERATOR
3578static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3579{
3580 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3581 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3582};
3583static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3584{
3585 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3586 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3587};
3588static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3589{
3590 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3591 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3592};
3593static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3594{
3595 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3596};
3597static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3598{
3599 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3600};
3601
3602# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3603static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3604{ \
3605 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3606 \
3607 X86FXSTATE State; \
3608 RT_ZERO(State); \
3609 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3610 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3611 { \
3612 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3613 uint32_t cNormalInputPairs = 0; \
3614 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3615 { \
3616 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3617 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3618 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3619 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3620 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3621 cNormalInputPairs++; \
3622 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3623 { \
3624 iTest -= 1; \
3625 continue; \
3626 } \
3627 \
3628 uint16_t const fFcw = RandFcw(); \
3629 State.FSW = RandFsw(); \
3630 \
3631 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3632 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3633 { \
3634 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3635 uint16_t fFswOut = 0; \
3636 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3637 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3638 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3639 iTest, iMask ? 'c' : 'u'); \
3640 } \
3641 } \
3642 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3643 } \
3644}
3645#else
3646# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3647#endif
3648
3649#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3650TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3651\
3652static const a_SubTestType a_aSubTests[] = \
3653{ \
3654 __VA_ARGS__ \
3655}; \
3656\
3657GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3658\
3659static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3660{ \
3661 X86FXSTATE State; \
3662 RT_ZERO(State); \
3663 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3664 { \
3665 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3666 \
3667 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3668 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3669 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3670 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3671 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3672 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3673 { \
3674 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3675 { \
3676 uint16_t fFswOut = 0; \
3677 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3678 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3679 State.FCW = paTests[iTest].fFcw; \
3680 State.FSW = paTests[iTest].fFswIn; \
3681 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3682 if (fFswOut != paTests[iTest].fFswOut) \
3683 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3684 "%s -> fsw=%#06x\n" \
3685 "%s expected %#06x %s (%s)\n", \
3686 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3687 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3688 iVar ? " " : "", fFswOut, \
3689 iVar ? " " : "", paTests[iTest].fFswOut, \
3690 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3691 } \
3692 pfn = a_aSubTests[iFn].pfnNative; \
3693 } \
3694 } \
3695}
3696
3697TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3698TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3699TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3700TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3701TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3702
3703
3704/*
3705 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3706 */
3707TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3708
3709static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3710{
3711 ENTRY(fcomi_r80_by_r80),
3712 ENTRY(fucomi_r80_by_r80),
3713};
3714
3715#ifdef TSTIEMAIMPL_WITH_GENERATOR
3716static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3717{
3718 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3719 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3720};
3721
3722static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3723{
3724 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3725
3726 X86FXSTATE State;
3727 RT_ZERO(State);
3728 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3729 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3730 {
3731 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3732 uint32_t cNormalInputPairs = 0;
3733 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3734 {
3735 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3736 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3737 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3738 cNormalInputPairs++;
3739 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3740 {
3741 iTest -= 1;
3742 continue;
3743 }
3744
3745 uint16_t const fFcw = RandFcw();
3746 State.FSW = RandFsw();
3747
3748 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3749 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3750 {
3751 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3752 uint16_t uFswOut = 0;
3753 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3754 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3755 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3756 iTest, iMask ? 'c' : 'u');
3757 }
3758 }
3759 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3760 }
3761}
3762#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3763
3764static void FpuBinaryEflR80Test(void)
3765{
3766 X86FXSTATE State;
3767 RT_ZERO(State);
3768 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3769 {
3770 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3771 continue;
3772
3773 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3774 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3775 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3776 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3777 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3778 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3779 {
3780 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3781 {
3782 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3783 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3784 State.FCW = paTests[iTest].fFcw;
3785 State.FSW = paTests[iTest].fFswIn;
3786 uint16_t uFswOut = 0;
3787 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3788 if ( uFswOut != paTests[iTest].fFswOut
3789 || fEflOut != paTests[iTest].fEflOut)
3790 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3791 "%s -> fsw=%#06x efl=%#08x\n"
3792 "%s expected %#06x %#08x %s (%s)\n",
3793 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3794 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3795 iVar ? " " : "", uFswOut, fEflOut,
3796 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3797 EFlagsDiff(fEflOut, paTests[iTest].fEflOut), FormatFcw(paTests[iTest].fFcw));
3798 }
3799 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3800 }
3801 }
3802}
3803
3804
3805/*********************************************************************************************************************************
3806* x87 FPU Unary Operations *
3807*********************************************************************************************************************************/
3808
3809/*
3810 * Unary FPU operations on one 80-bit floating point value.
3811 *
3812 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3813 * a rounding error or not.
3814 */
3815TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3816
3817enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3818static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3819{
3820 ENTRY_EX( fabs_r80, kUnary_Accurate),
3821 ENTRY_EX( fchs_r80, kUnary_Accurate),
3822 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3823 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3824 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3825 ENTRY_EX( frndint_r80, kUnary_Accurate),
3826 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3827 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3828 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3829 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3830};
3831
3832#ifdef TSTIEMAIMPL_WITH_GENERATOR
3833
3834static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3835{
3836 if ( enmKind == kUnary_Rounding_F2xm1
3837 && RTFLOAT80U_IS_NORMAL(pr80Val)
3838 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3839 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3840 return true;
3841 return false;
3842}
3843
3844static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3845{
3846 static RTFLOAT80U const s_aSpecials[] =
3847 {
3848 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3849 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3850 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3851 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3852 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3853 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3854 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3855 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3856 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3857 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3858 };
3859 X86FXSTATE State;
3860 RT_ZERO(State);
3861 uint32_t cMinNormals = cTests / 4;
3862 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3863 {
3864 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3865 PRTSTREAM pOutFn = pOut;
3866 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3867 {
3868 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3869 continue;
3870 pOutFn = pOutCpu;
3871 }
3872
3873 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3874 uint32_t iTestOutput = 0;
3875 uint32_t cNormalInputs = 0;
3876 uint32_t cTargetRangeInputs = 0;
3877 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3878 {
3879 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3880 if (RTFLOAT80U_IS_NORMAL(&InVal))
3881 {
3882 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3883 {
3884 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3885 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
3886 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3887 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3888 cTargetRangeInputs++;
3889 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3890 {
3891 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3892 cTargetRangeInputs++;
3893 }
3894 }
3895 cNormalInputs++;
3896 }
3897 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3898 {
3899 iTest -= 1;
3900 continue;
3901 }
3902
3903 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
3904 uint16_t const fFcw = RandFcw();
3905 State.FSW = RandFsw();
3906
3907 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3908 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3909 {
3910 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3911 | (iRounding << X86_FCW_RC_SHIFT)
3912 | (iPrecision << X86_FCW_PC_SHIFT)
3913 | X86_FCW_MASK_ALL;
3914 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3915 pfn(&State, &ResM, &InVal);
3916 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3917 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
3918 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3919
3920 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3921 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3922 pfn(&State, &ResU, &InVal);
3923 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3924 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
3925 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3926
3927 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3928 if (fXcpt)
3929 {
3930 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3931 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3932 pfn(&State, &Res1, &InVal);
3933 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3934 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
3935 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3936 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3937 {
3938 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3939 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3940 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3941 pfn(&State, &Res2, &InVal);
3942 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3943 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
3944 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3945 }
3946 if (!RT_IS_POWER_OF_TWO(fXcpt))
3947 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3948 if (fUnmasked & fXcpt)
3949 {
3950 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3951 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3952 pfn(&State, &Res3, &InVal);
3953 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3954 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
3955 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3956 }
3957 }
3958 }
3959 }
3960 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
3961 }
3962}
3963#endif
3964
3965static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
3966{
3967 if (fFcw1 == fFcw2)
3968 return true;
3969 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
3970 {
3971 *pfRndErr = true;
3972 return true;
3973 }
3974 return false;
3975}
3976
3977static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
3978{
3979 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
3980 return true;
3981 if ( fRndErrOk
3982 && pr80Val1->s.fSign == pr80Val2->s.fSign)
3983 {
3984 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
3985 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
3986 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
3987 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
3988 ||
3989 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
3990 && pr80Val1->s.uMantissa == UINT64_MAX
3991 && pr80Val2->s.uMantissa == RT_BIT_64(63))
3992 ||
3993 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
3994 && pr80Val2->s.uMantissa == UINT64_MAX
3995 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
3996 {
3997 *pfRndErr = true;
3998 return true;
3999 }
4000 }
4001 return false;
4002}
4003
4004
4005static void FpuUnaryR80Test(void)
4006{
4007 X86FXSTATE State;
4008 RT_ZERO(State);
4009 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4010 {
4011 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4012 continue;
4013
4014 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4015 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4016 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4017 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4018 uint32_t cRndErrs = 0;
4019 uint32_t cPossibleRndErrs = 0;
4020 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4021 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4022 {
4023 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4024 {
4025 RTFLOAT80U const InVal = paTests[iTest].InVal;
4026 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4027 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4028 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4029 State.FSW = paTests[iTest].fFswIn;
4030 pfn(&State, &Res, &InVal);
4031 bool fRndErr = false;
4032 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4033 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4034 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4035 "%s -> fsw=%#06x %s\n"
4036 "%s expected %#06x %s%s%s%s (%s)\n",
4037 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4038 FormatR80(&paTests[iTest].InVal),
4039 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4040 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4041 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4042 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4043 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4044 cRndErrs += fRndErr;
4045 cPossibleRndErrs += fRndErrOk;
4046 }
4047 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4048 }
4049 if (cPossibleRndErrs > 0)
4050 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4051 }
4052}
4053
4054
4055/*
4056 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4057 */
4058TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4059
4060static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4061{
4062 ENTRY(ftst_r80),
4063 ENTRY_EX(fxam_r80, 1),
4064};
4065
4066#ifdef TSTIEMAIMPL_WITH_GENERATOR
4067static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4068{
4069 static RTFLOAT80U const s_aSpecials[] =
4070 {
4071 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4072 };
4073
4074 X86FXSTATE State;
4075 RT_ZERO(State);
4076 uint32_t cMinNormals = cTests / 4;
4077 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4078 {
4079 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4080 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4081 PRTSTREAM pOutFn = pOut;
4082 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4083 {
4084 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4085 continue;
4086 pOutFn = pOutCpu;
4087 }
4088 State.FTW = 0;
4089
4090 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4091 uint32_t cNormalInputs = 0;
4092 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4093 {
4094 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4095 if (RTFLOAT80U_IS_NORMAL(&InVal))
4096 cNormalInputs++;
4097 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4098 {
4099 iTest -= 1;
4100 continue;
4101 }
4102
4103 uint16_t const fFcw = RandFcw();
4104 State.FSW = RandFsw();
4105 if (!fIsFxam)
4106 {
4107 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4108 {
4109 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4110 {
4111 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4112 {
4113 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4114 | (iRounding << X86_FCW_RC_SHIFT)
4115 | (iPrecision << X86_FCW_PC_SHIFT)
4116 | iMask;
4117 uint16_t fFswOut = 0;
4118 pfn(&State, &fFswOut, &InVal);
4119 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4120 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4121 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4122 }
4123 }
4124 }
4125 }
4126 else
4127 {
4128 uint16_t fFswOut = 0;
4129 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4130 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4131 State.FCW = fFcw;
4132 pfn(&State, &fFswOut, &InVal);
4133 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4134 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4135 }
4136 }
4137 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4138 }
4139}
4140#endif
4141
4142
4143static void FpuUnaryFswR80Test(void)
4144{
4145 X86FXSTATE State;
4146 RT_ZERO(State);
4147 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4148 {
4149 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4150 continue;
4151
4152 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4153 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4154 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4155 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4156 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4157 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4158 {
4159 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4160 {
4161 RTFLOAT80U const InVal = paTests[iTest].InVal;
4162 uint16_t fFswOut = 0;
4163 State.FSW = paTests[iTest].fFswIn;
4164 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4165 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4166 pfn(&State, &fFswOut, &InVal);
4167 if (fFswOut != paTests[iTest].fFswOut)
4168 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4169 "%s -> fsw=%#06x\n"
4170 "%s expected %#06x %s (%s%s)\n",
4171 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4172 FormatR80(&paTests[iTest].InVal),
4173 iVar ? " " : "", fFswOut,
4174 iVar ? " " : "", paTests[iTest].fFswOut,
4175 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4176 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4177 }
4178 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4179 }
4180 }
4181}
4182
4183/*
4184 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4185 */
4186TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4187
4188static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4189{
4190 ENTRY(fxtract_r80_r80),
4191 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4192 ENTRY_INTEL(fptan_r80_r80, 0),
4193 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4194 ENTRY_INTEL(fsincos_r80_r80, 0),
4195};
4196
4197#ifdef TSTIEMAIMPL_WITH_GENERATOR
4198static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4199{
4200 static RTFLOAT80U const s_aSpecials[] =
4201 {
4202 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4203 };
4204
4205 X86FXSTATE State;
4206 RT_ZERO(State);
4207 uint32_t cMinNormals = cTests / 4;
4208 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4209 {
4210 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4211 PRTSTREAM pOutFn = pOut;
4212 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4213 {
4214 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4215 continue;
4216 pOutFn = pOutCpu;
4217 }
4218
4219 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4220 uint32_t iTestOutput = 0;
4221 uint32_t cNormalInputs = 0;
4222 uint32_t cTargetRangeInputs = 0;
4223 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4224 {
4225 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4226 if (RTFLOAT80U_IS_NORMAL(&InVal))
4227 {
4228 if (iFn != 0)
4229 {
4230 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4231 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4232 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4233 cTargetRangeInputs++;
4234 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4235 {
4236 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4237 cTargetRangeInputs++;
4238 }
4239 }
4240 cNormalInputs++;
4241 }
4242 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4243 {
4244 iTest -= 1;
4245 continue;
4246 }
4247
4248 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4249 uint16_t const fFcw = RandFcw();
4250 State.FSW = RandFsw();
4251
4252 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4253 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4254 {
4255 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4256 | (iRounding << X86_FCW_RC_SHIFT)
4257 | (iPrecision << X86_FCW_PC_SHIFT)
4258 | X86_FCW_MASK_ALL;
4259 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4260 pfn(&State, &ResM, &InVal);
4261 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4262 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4263 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4264
4265 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4266 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4267 pfn(&State, &ResU, &InVal);
4268 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4269 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4270 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4271
4272 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4273 if (fXcpt)
4274 {
4275 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4276 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4277 pfn(&State, &Res1, &InVal);
4278 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4279 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4280 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4281 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4282 {
4283 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4284 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4285 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4286 pfn(&State, &Res2, &InVal);
4287 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4288 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4289 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4290 }
4291 if (!RT_IS_POWER_OF_TWO(fXcpt))
4292 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4293 if (fUnmasked & fXcpt)
4294 {
4295 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4296 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4297 pfn(&State, &Res3, &InVal);
4298 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4299 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4300 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4301 }
4302 }
4303 }
4304 }
4305 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4306 }
4307}
4308#endif
4309
4310
4311static void FpuUnaryTwoR80Test(void)
4312{
4313 X86FXSTATE State;
4314 RT_ZERO(State);
4315 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4316 {
4317 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4318 continue;
4319
4320 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4321 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4322 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4323 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4324 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4325 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4326 {
4327 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4328 {
4329 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4330 RTFLOAT80U const InVal = paTests[iTest].InVal;
4331 State.FCW = paTests[iTest].fFcw;
4332 State.FSW = paTests[iTest].fFswIn;
4333 pfn(&State, &Res, &InVal);
4334 if ( Res.FSW != paTests[iTest].fFswOut
4335 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4336 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4337 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4338 "%s -> fsw=%#06x %s %s\n"
4339 "%s expected %#06x %s %s %s%s%s (%s)\n",
4340 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4341 FormatR80(&paTests[iTest].InVal),
4342 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4343 iVar ? " " : "", paTests[iTest].fFswOut,
4344 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4345 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4346 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4347 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4348 }
4349 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4350 }
4351 }
4352}
4353
4354
4355
4356int main(int argc, char **argv)
4357{
4358 int rc = RTR3InitExe(argc, &argv, 0);
4359 if (RT_FAILURE(rc))
4360 return RTMsgInitFailure(rc);
4361
4362 /*
4363 * Determin the host CPU.
4364 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
4365 */
4366#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
4367 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
4368 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
4369 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4370#else
4371 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4372#endif
4373
4374 /*
4375 * Parse arguments.
4376 */
4377 enum { kModeNotSet, kModeTest, kModeGenerate }
4378 enmMode = kModeNotSet;
4379 bool fInt = true;
4380 bool fFpuLdSt = true;
4381 bool fFpuBinary1 = true;
4382 bool fFpuBinary2 = true;
4383 bool fFpuOther = true;
4384 bool fCpuData = true;
4385 bool fCommonData = true;
4386 uint32_t const cDefaultTests = 96;
4387 uint32_t cTests = cDefaultTests;
4388 RTGETOPTDEF const s_aOptions[] =
4389 {
4390 // mode:
4391 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
4392 { "--test", 't', RTGETOPT_REQ_NOTHING },
4393 // test selection (both)
4394 { "--all", 'a', RTGETOPT_REQ_NOTHING },
4395 { "--none", 'z', RTGETOPT_REQ_NOTHING },
4396 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
4397 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
4398 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
4399 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
4400 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
4401 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
4402 { "--int", 'i', RTGETOPT_REQ_NOTHING },
4403 { "--include", 'I', RTGETOPT_REQ_STRING },
4404 { "--exclude", 'X', RTGETOPT_REQ_STRING },
4405 // generation parameters
4406 { "--common", 'm', RTGETOPT_REQ_NOTHING },
4407 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
4408 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
4409 };
4410
4411 RTGETOPTSTATE State;
4412 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
4413 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4414
4415 RTGETOPTUNION ValueUnion;
4416 while ((rc = RTGetOpt(&State, &ValueUnion)))
4417 {
4418 switch (rc)
4419 {
4420 case 'g':
4421 enmMode = kModeGenerate;
4422 break;
4423 case 't':
4424 enmMode = kModeTest;
4425 break;
4426
4427 case 'a':
4428 fCpuData = true;
4429 fCommonData = true;
4430 fInt = true;
4431 fFpuLdSt = true;
4432 fFpuBinary1 = true;
4433 fFpuBinary2 = true;
4434 fFpuOther = true;
4435 break;
4436 case 'z':
4437 fCpuData = false;
4438 fCommonData = false;
4439 fInt = false;
4440 fFpuLdSt = false;
4441 fFpuBinary1 = false;
4442 fFpuBinary2 = false;
4443 fFpuOther = false;
4444 break;
4445
4446 case 'F':
4447 fFpuLdSt = true;
4448 break;
4449 case 'O':
4450 fFpuOther = true;
4451 break;
4452 case 'B':
4453 fFpuBinary1 = true;
4454 break;
4455 case 'P':
4456 fFpuBinary2 = true;
4457 break;
4458 case 'i':
4459 fInt = true;
4460 break;
4461
4462 case 'I':
4463 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
4464 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
4465 RT_ELEMENTS(g_apszIncludeTestPatterns));
4466 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
4467 break;
4468 case 'X':
4469 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
4470 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
4471 RT_ELEMENTS(g_apszExcludeTestPatterns));
4472 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
4473 break;
4474
4475 case 'm':
4476 fCommonData = true;
4477 break;
4478 case 'c':
4479 fCpuData = true;
4480 break;
4481 case 'n':
4482 cTests = ValueUnion.u32;
4483 break;
4484
4485 case 'h':
4486 RTPrintf("usage: %s <-g|-t> [options]\n"
4487 "\n"
4488 "Mode:\n"
4489 " -g, --generate\n"
4490 " Generate test data.\n"
4491 " -t, --test\n"
4492 " Execute tests.\n"
4493 "\n"
4494 "Test selection (both modes):\n"
4495 " -a, --all\n"
4496 " Enable all tests and generated test data. (default)\n"
4497 " -z, --zap, --none\n"
4498 " Disable all tests and test data types.\n"
4499 " -i, --int\n"
4500 " Enable non-FPU tests.\n"
4501 " -F, --fpu-ld-st\n"
4502 " Enable FPU load and store tests.\n"
4503 " -B, --fpu-binary-1\n"
4504 " Enable FPU binary 80-bit FP tests.\n"
4505 " -P, --fpu-binary-2\n"
4506 " Enable FPU binary 64- and 32-bit FP tests.\n"
4507 " -O, --fpu-other\n"
4508 " Enable other FPU tests.\n"
4509 " -I,--include=<test-patter>\n"
4510 " Enable tests matching the given pattern.\n"
4511 " -X,--exclude=<test-patter>\n"
4512 " Skip tests matching the given pattern (overrides --include).\n"
4513 "\n"
4514 "Generation:\n"
4515 " -m, --common\n"
4516 " Enable generating common test data.\n"
4517 " -c, --only-cpu\n"
4518 " Enable generating CPU specific test data.\n"
4519 " -n, --number-of-test <count>\n"
4520 " Number of tests to generate. Default: %u\n"
4521 , argv[0], cDefaultTests);
4522 return RTEXITCODE_SUCCESS;
4523 default:
4524 return RTGetOptPrintError(rc, &ValueUnion);
4525 }
4526 }
4527
4528 /*
4529 * Generate data?
4530 */
4531 if (enmMode == kModeGenerate)
4532 {
4533#ifdef TSTIEMAIMPL_WITH_GENERATOR
4534 char szCpuDesc[256] = {0};
4535 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
4536 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
4537# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
4538 const char * const pszBitBucket = "NUL";
4539# else
4540 const char * const pszBitBucket = "/dev/null";
4541# endif
4542
4543 if (cTests == 0)
4544 cTests = cDefaultTests;
4545 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
4546 g_cZeroSrcTests = g_cZeroDstTests * 2;
4547
4548 if (fInt)
4549 {
4550 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
4551 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4552 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4553 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
4554 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4555 if (!pStrmData || !pStrmDataCpu)
4556 return RTEXITCODE_FAILURE;
4557
4558 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
4559 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
4560 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
4561 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
4562 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
4563 UnaryGenerate(pStrmData, cTests);
4564 ShiftGenerate(pStrmDataCpu, cTests);
4565 MulDivGenerate(pStrmDataCpu, cTests);
4566
4567 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4568 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4569 if (rcExit != RTEXITCODE_SUCCESS)
4570 return rcExit;
4571 }
4572
4573 if (fFpuLdSt)
4574 {
4575 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4576 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4577 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4578 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4579 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4580 if (!pStrmData || !pStrmDataCpu)
4581 return RTEXITCODE_FAILURE;
4582
4583 FpuLdConstGenerate(pStrmData, cTests);
4584 FpuLdIntGenerate(pStrmData, cTests);
4585 FpuLdD80Generate(pStrmData, cTests);
4586 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4587 FpuStD80Generate(pStrmData, cTests);
4588 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4589 FpuLdMemGenerate(pStrmData, cTests2);
4590 FpuStMemGenerate(pStrmData, cTests2);
4591
4592 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4593 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4594 if (rcExit != RTEXITCODE_SUCCESS)
4595 return rcExit;
4596 }
4597
4598 if (fFpuBinary1)
4599 {
4600 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4601 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4602 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4603 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4604 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4605 if (!pStrmData || !pStrmDataCpu)
4606 return RTEXITCODE_FAILURE;
4607
4608 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4609 FpuBinaryFswR80Generate(pStrmData, cTests);
4610 FpuBinaryEflR80Generate(pStrmData, cTests);
4611
4612 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4613 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4614 if (rcExit != RTEXITCODE_SUCCESS)
4615 return rcExit;
4616 }
4617
4618 if (fFpuBinary2)
4619 {
4620 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4621 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4622 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4623 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4624 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4625 if (!pStrmData || !pStrmDataCpu)
4626 return RTEXITCODE_FAILURE;
4627
4628 FpuBinaryR64Generate(pStrmData, cTests);
4629 FpuBinaryR32Generate(pStrmData, cTests);
4630 FpuBinaryI32Generate(pStrmData, cTests);
4631 FpuBinaryI16Generate(pStrmData, cTests);
4632 FpuBinaryFswR64Generate(pStrmData, cTests);
4633 FpuBinaryFswR32Generate(pStrmData, cTests);
4634 FpuBinaryFswI32Generate(pStrmData, cTests);
4635 FpuBinaryFswI16Generate(pStrmData, cTests);
4636
4637 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4638 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4639 if (rcExit != RTEXITCODE_SUCCESS)
4640 return rcExit;
4641 }
4642
4643 if (fFpuOther)
4644 {
4645 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4646 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4647 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4648 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4649 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4650 if (!pStrmData || !pStrmDataCpu)
4651 return RTEXITCODE_FAILURE;
4652
4653 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4654 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4655 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4656
4657 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4658 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4659 if (rcExit != RTEXITCODE_SUCCESS)
4660 return rcExit;
4661 }
4662
4663 return RTEXITCODE_SUCCESS;
4664#else
4665 return RTMsgErrorExitFailure("Test data generator not compiled in!");
4666#endif
4667 }
4668
4669 /*
4670 * Do testing. Currrently disabled by default as data needs to be checked
4671 * on both intel and AMD systems first.
4672 */
4673 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4674 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4675 if (enmMode == kModeTest)
4676 {
4677 RTTestBanner(g_hTest);
4678
4679 /* Allocate guarded memory for use in the tests. */
4680#define ALLOC_GUARDED_VAR(a_puVar) do { \
4681 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
4682 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
4683 } while (0)
4684 ALLOC_GUARDED_VAR(g_pu8);
4685 ALLOC_GUARDED_VAR(g_pu16);
4686 ALLOC_GUARDED_VAR(g_pu32);
4687 ALLOC_GUARDED_VAR(g_pu64);
4688 ALLOC_GUARDED_VAR(g_pu128);
4689 ALLOC_GUARDED_VAR(g_pu8Two);
4690 ALLOC_GUARDED_VAR(g_pu16Two);
4691 ALLOC_GUARDED_VAR(g_pu32Two);
4692 ALLOC_GUARDED_VAR(g_pu64Two);
4693 ALLOC_GUARDED_VAR(g_pu128Two);
4694 ALLOC_GUARDED_VAR(g_pfEfl);
4695 if (RTTestErrorCount(g_hTest) == 0)
4696 {
4697 if (fInt)
4698 {
4699 BinU8Test();
4700 BinU16Test();
4701 BinU32Test();
4702 BinU64Test();
4703 XchgTest();
4704 XaddTest();
4705 CmpXchgTest();
4706 CmpXchg8bTest();
4707 CmpXchg16bTest();
4708 ShiftDblTest();
4709 UnaryTest();
4710 ShiftTest();
4711 MulDivTest();
4712 BswapTest();
4713 }
4714
4715 if (fFpuLdSt)
4716 {
4717 FpuLoadConstTest();
4718 FpuLdMemTest();
4719 FpuLdIntTest();
4720 FpuLdD80Test();
4721 FpuStMemTest();
4722 FpuStIntTest();
4723 FpuStD80Test();
4724 }
4725
4726 if (fFpuBinary1)
4727 {
4728 FpuBinaryR80Test();
4729 FpuBinaryFswR80Test();
4730 FpuBinaryEflR80Test();
4731 }
4732
4733 if (fFpuBinary2)
4734 {
4735 FpuBinaryR64Test();
4736 FpuBinaryR32Test();
4737 FpuBinaryI32Test();
4738 FpuBinaryI16Test();
4739 FpuBinaryFswR64Test();
4740 FpuBinaryFswR32Test();
4741 FpuBinaryFswI32Test();
4742 FpuBinaryFswI16Test();
4743 }
4744
4745 if (fFpuOther)
4746 {
4747 FpuUnaryR80Test();
4748 FpuUnaryFswR80Test();
4749 FpuUnaryTwoR80Test();
4750 }
4751 }
4752 return RTTestSummaryAndDestroy(g_hTest);
4753 }
4754 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
4755}
4756
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette