VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 94571

Last change on this file since 94571 was 94571, checked in by vboxsync, 3 years ago

tstIEMAImpl: Allow multiple simple pattern expressions for the --include and --exclude options (separated by |). bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 192.5 KB
Line 
1/* $Id: tstIEMAImpl.cpp 94571 2022-04-12 09:43:59Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "../include/IEMInternal.h"
23
24#include <iprt/errcore.h>
25#include <VBox/log.h>
26#include <iprt/assert.h>
27#include <iprt/ctype.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/message.h>
31#include <iprt/mp.h>
32#include <iprt/rand.h>
33#include <iprt/stream.h>
34#include <iprt/string.h>
35#include <iprt/test.h>
36
37#include "tstIEMAImpl.h"
38
39
40/*********************************************************************************************************************************
41* Defined Constants And Macros *
42*********************************************************************************************************************************/
43#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44#define ENTRY_EX(a_Name, a_uExtra) \
45 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62 typedef struct a_TypeName \
63 { \
64 const char *pszName; \
65 a_FunctionPtrType pfn; \
66 a_FunctionPtrType pfnNative; \
67 a_TestType const *paTests; \
68 uint32_t const *pcTests; \
69 uint32_t uExtra; \
70 uint8_t idxCpuEflFlavour; \
71 } a_TypeName
72
73#define COUNT_VARIATIONS(a_SubTest) \
74 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77/*********************************************************************************************************************************
78* Global Variables *
79*********************************************************************************************************************************/
80static RTTEST g_hTest;
81static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82#ifdef TSTIEMAIMPL_WITH_GENERATOR
83static uint32_t g_cZeroDstTests = 2;
84static uint32_t g_cZeroSrcTests = 4;
85#endif
86static uint8_t *g_pu8, *g_pu8Two;
87static uint16_t *g_pu16, *g_pu16Two;
88static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
89static uint64_t *g_pu64, *g_pu64Two;
90static RTUINT128U *g_pu128, *g_pu128Two;
91
92static char g_aszBuf[16][256];
93static unsigned g_idxBuf = 0;
94
95static uint32_t g_cIncludeTestPatterns;
96static uint32_t g_cExcludeTestPatterns;
97static const char *g_apszIncludeTestPatterns[64];
98static const char *g_apszExcludeTestPatterns[64];
99
100
101/*********************************************************************************************************************************
102* Internal Functions *
103*********************************************************************************************************************************/
104static const char *FormatR80(PCRTFLOAT80U pr80);
105static const char *FormatR64(PCRTFLOAT64U pr64);
106static const char *FormatR32(PCRTFLOAT32U pr32);
107
108
109/*
110 * Random helpers.
111 */
112
113static uint32_t RandEFlags(void)
114{
115 uint32_t fEfl = RTRandU32();
116 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
117}
118
119#ifdef TSTIEMAIMPL_WITH_GENERATOR
120
121static uint8_t RandU8(void)
122{
123 return RTRandU32Ex(0, 0xff);
124}
125
126
127static uint16_t RandU16(void)
128{
129 return RTRandU32Ex(0, 0xffff);
130}
131
132
133static uint32_t RandU32(void)
134{
135 return RTRandU32();
136}
137
138#endif
139
140static uint64_t RandU64(void)
141{
142 return RTRandU64();
143}
144
145
146static RTUINT128U RandU128(void)
147{
148 RTUINT128U Ret;
149 Ret.s.Hi = RTRandU64();
150 Ret.s.Lo = RTRandU64();
151 return Ret;
152}
153
154#ifdef TSTIEMAIMPL_WITH_GENERATOR
155
156static uint8_t RandU8Dst(uint32_t iTest)
157{
158 if (iTest < g_cZeroDstTests)
159 return 0;
160 return RandU8();
161}
162
163
164static uint8_t RandU8Src(uint32_t iTest)
165{
166 if (iTest < g_cZeroSrcTests)
167 return 0;
168 return RandU8();
169}
170
171
172static uint16_t RandU16Dst(uint32_t iTest)
173{
174 if (iTest < g_cZeroDstTests)
175 return 0;
176 return RandU16();
177}
178
179
180static uint16_t RandU16Src(uint32_t iTest)
181{
182 if (iTest < g_cZeroSrcTests)
183 return 0;
184 return RandU16();
185}
186
187
188static uint32_t RandU32Dst(uint32_t iTest)
189{
190 if (iTest < g_cZeroDstTests)
191 return 0;
192 return RandU32();
193}
194
195
196static uint32_t RandU32Src(uint32_t iTest)
197{
198 if (iTest < g_cZeroSrcTests)
199 return 0;
200 return RandU32();
201}
202
203
204static uint64_t RandU64Dst(uint32_t iTest)
205{
206 if (iTest < g_cZeroDstTests)
207 return 0;
208 return RandU64();
209}
210
211
212static uint64_t RandU64Src(uint32_t iTest)
213{
214 if (iTest < g_cZeroSrcTests)
215 return 0;
216 return RandU64();
217}
218
219
220static int16_t RandI16Src(uint32_t iTest)
221{
222 RT_NOREF(iTest);
223 return (int16_t)RandU16();
224}
225
226
227static int32_t RandI32Src(uint32_t iTest)
228{
229 RT_NOREF(iTest);
230 return (int32_t)RandU32();
231}
232
233
234#if 0
235static int64_t RandI64Src(uint32_t iTest)
236{
237 RT_NOREF(iTest);
238 return (int64_t)RandU64();
239}
240#endif
241
242
243static uint16_t RandFcw(void)
244{
245 return RandU16() & ~X86_FCW_ZERO_MASK;
246}
247
248
249static uint16_t RandFsw(void)
250{
251 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
252 return RandU16();
253}
254
255
256static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
257{
258 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
259 pr80->sj64.uFraction >>= cShift;
260 else
261 pr80->sj64.uFraction = (cShift % 19) + 1;
262}
263
264
265static RTFLOAT80U RandR80Ex(unsigned cTarget = 80, bool fIntTarget = false)
266{
267 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
268
269 RTFLOAT80U r80;
270 r80.au64[0] = RandU64();
271 r80.au16[4] = RandU16();
272
273 /*
274 * Make it more likely that we get a good selection of special values.
275 */
276 uint8_t bType = RandU8() & 0x1f;
277 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
278 {
279 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
280 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
281 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
282 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
283 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
284 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
285 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
286 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
287 }
288 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
289 {
290 /* Denormals (4,5) and Pseudo denormals (6,7) */
291 if (bType & 1)
292 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
293 else if (r80.sj64.uFraction == 0 && bType < 6)
294 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
295 r80.sj64.uExponent = 0;
296 r80.sj64.fInteger = bType >= 6;
297 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
298 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
299 }
300 else if (bType == 8 || bType == 9)
301 {
302 /* Pseudo NaN. */
303 if (bType & 1)
304 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
305 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
306 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
307 r80.sj64.uExponent = 0x7fff;
308 if (r80.sj64.fInteger)
309 r80.sj64.uFraction |= RT_BIT_64(62);
310 else
311 r80.sj64.uFraction &= ~RT_BIT_64(62);
312 r80.sj64.fInteger = 0;
313 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
314 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
315 }
316 else if (bType == 10 || bType == 11)
317 {
318 /* Quiet and signalling NaNs (using fInteger to pick which). */
319 if (bType & 1)
320 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
321 else if (r80.sj64.uFraction == 0)
322 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
323 r80.sj64.uExponent = 0x7fff;
324 if (r80.sj64.fInteger)
325 r80.sj64.uFraction |= RT_BIT_64(62);
326 else
327 r80.sj64.uFraction &= ~RT_BIT_64(62);
328 r80.sj64.fInteger = 1;
329 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
330 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
331 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
332 }
333 else if (bType == 12 || bType == 13)
334 {
335 /* Unnormals */
336 if (bType & 1)
337 SafeR80FractionShift(&r80, RandU8() % 62);
338 r80.sj64.fInteger = 0;
339 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
340 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
341 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
342 }
343 else if (bType < 24)
344 {
345 /* Make sure we have lots of normalized values. */
346 if (!fIntTarget)
347 {
348 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
349 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
350 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
351 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
352 r80.sj64.fInteger = 1;
353 if (r80.sj64.uExponent <= uMinExp)
354 r80.sj64.uExponent = uMinExp + 1;
355 else if (r80.sj64.uExponent >= uMaxExp)
356 r80.sj64.uExponent = uMaxExp - 1;
357
358 if (bType == 14)
359 { /* All 1s is useful to testing rounding. Also try trigger special
360 behaviour by sometimes rounding out of range, while we're at it. */
361 r80.sj64.uFraction = RT_BIT_64(63) - 1;
362 uint8_t bExp = RandU8();
363 if ((bExp & 3) == 0)
364 r80.sj64.uExponent = uMaxExp - 1;
365 else if ((bExp & 3) == 1)
366 r80.sj64.uExponent = uMinExp + 1;
367 else if ((bExp & 3) == 2)
368 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
369 }
370 }
371 else
372 {
373 /* integer target: */
374 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
375 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
376 r80.sj64.fInteger = 1;
377 if (r80.sj64.uExponent < uMinExp)
378 r80.sj64.uExponent = uMinExp;
379 else if (r80.sj64.uExponent > uMaxExp)
380 r80.sj64.uExponent = uMaxExp;
381
382 if (bType == 14)
383 { /* All 1s is useful to testing rounding. Also try trigger special
384 behaviour by sometimes rounding out of range, while we're at it. */
385 r80.sj64.uFraction = RT_BIT_64(63) - 1;
386 uint8_t bExp = RandU8();
387 if ((bExp & 3) == 0)
388 r80.sj64.uExponent = uMaxExp;
389 else if ((bExp & 3) == 1)
390 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
391 }
392 }
393
394 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
395 }
396 return r80;
397}
398
399
400static RTFLOAT80U RandR80Src(uint32_t iTest)
401{
402 RT_NOREF(iTest);
403 return RandR80Ex();
404}
405
406
407static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
408{
409 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
410 pr64->s64.uFraction >>= cShift;
411 else
412 pr64->s64.uFraction = (cShift % 19) + 1;
413}
414
415
416static RTFLOAT64U RandR64Src(uint32_t iTest)
417{
418 RT_NOREF(iTest);
419
420 RTFLOAT64U r64;
421 r64.u = RandU64();
422
423 /*
424 * Make it more likely that we get a good selection of special values.
425 * On average 6 out of 16 calls should return a special value.
426 */
427 uint8_t bType = RandU8() & 0xf;
428 if (bType == 0 || bType == 1)
429 {
430 /* 0 or Infinity. We only keep fSign here. */
431 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
432 r64.s.uFractionHigh = 0;
433 r64.s.uFractionLow = 0;
434 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
435 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
436 }
437 else if (bType == 2 || bType == 3)
438 {
439 /* Subnormals */
440 if (bType == 3)
441 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
442 else if (r64.s64.uFraction == 0)
443 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
444 r64.s64.uExponent = 0;
445 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
446 }
447 else if (bType == 4 || bType == 5)
448 {
449 /* NaNs */
450 if (bType == 5)
451 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
452 else if (r64.s64.uFraction == 0)
453 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
454 r64.s64.uExponent = 0x7ff;
455 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
456 }
457 else if (bType < 12)
458 {
459 /* Make sure we have lots of normalized values. */
460 if (r64.s.uExponent == 0)
461 r64.s.uExponent = 1;
462 else if (r64.s.uExponent == 0x7ff)
463 r64.s.uExponent = 0x7fe;
464 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
465 }
466 return r64;
467}
468
469
470static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
471{
472 if (pr32->s.uFraction >= RT_BIT_32(cShift))
473 pr32->s.uFraction >>= cShift;
474 else
475 pr32->s.uFraction = (cShift % 19) + 1;
476}
477
478
479static RTFLOAT32U RandR32Src(uint32_t iTest)
480{
481 RT_NOREF(iTest);
482
483 RTFLOAT32U r32;
484 r32.u = RandU32();
485
486 /*
487 * Make it more likely that we get a good selection of special values.
488 * On average 6 out of 16 calls should return a special value.
489 */
490 uint8_t bType = RandU8() & 0xf;
491 if (bType == 0 || bType == 1)
492 {
493 /* 0 or Infinity. We only keep fSign here. */
494 r32.s.uExponent = bType == 0 ? 0 : 0xff;
495 r32.s.uFraction = 0;
496 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
497 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
498 }
499 else if (bType == 2 || bType == 3)
500 {
501 /* Subnormals */
502 if (bType == 3)
503 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
504 else if (r32.s.uFraction == 0)
505 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
506 r32.s.uExponent = 0;
507 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
508 }
509 else if (bType == 4 || bType == 5)
510 {
511 /* NaNs */
512 if (bType == 5)
513 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
514 else if (r32.s.uFraction == 0)
515 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
516 r32.s.uExponent = 0xff;
517 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
518 }
519 else if (bType < 12)
520 {
521 /* Make sure we have lots of normalized values. */
522 if (r32.s.uExponent == 0)
523 r32.s.uExponent = 1;
524 else if (r32.s.uExponent == 0xff)
525 r32.s.uExponent = 0xfe;
526 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
527 }
528 return r32;
529}
530
531
532static RTPBCD80U RandD80Src(uint32_t iTest)
533{
534 if (iTest < 3)
535 {
536 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
537 return d80Zero;
538 }
539 if (iTest < 5)
540 {
541 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
542 return d80Ind;
543 }
544
545 RTPBCD80U d80;
546 uint8_t b = RandU8();
547 d80.s.fSign = b & 1;
548
549 if ((iTest & 7) >= 6)
550 {
551 /* Illegal */
552 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
553 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
554 d80.s.abPairs[iPair] = RandU8();
555 }
556 else
557 {
558 /* Normal */
559 d80.s.uPad = 0;
560 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
561 {
562 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
563 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
564 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
565 }
566 }
567 return d80;
568}
569
570
571const char *GenFormatR80(PCRTFLOAT80U plrd)
572{
573 if (RTFLOAT80U_IS_ZERO(plrd))
574 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
575 if (RTFLOAT80U_IS_INF(plrd))
576 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
577 if (RTFLOAT80U_IS_INDEFINITE(plrd))
578 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
579 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
580 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
581 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
582 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
583
584 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
585 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
586 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
587 return pszBuf;
588}
589
590const char *GenFormatR64(PCRTFLOAT64U prd)
591{
592 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
593 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
594 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
595 return pszBuf;
596}
597
598
599const char *GenFormatR32(PCRTFLOAT32U pr)
600{
601 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
602 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
603 return pszBuf;
604}
605
606
607const char *GenFormatD80(PCRTPBCD80U pd80)
608{
609 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
610 size_t off;
611 if (pd80->s.uPad == 0)
612 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
613 else
614 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
615 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
616 while (iPair-- > 0)
617 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
618 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
619 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
620 pszBuf[off++] = ')';
621 pszBuf[off++] = '\0';
622 return pszBuf;
623}
624
625
626const char *GenFormatI64(int64_t i64)
627{
628 if (i64 == INT64_MIN) /* This one is problematic */
629 return "INT64_MIN";
630 if (i64 == INT64_MAX)
631 return "INT64_MAX";
632 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
633 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
634 return pszBuf;
635}
636
637
638const char *GenFormatI64(int64_t const *pi64)
639{
640 return GenFormatI64(*pi64);
641}
642
643
644const char *GenFormatI32(int32_t i32)
645{
646 if (i32 == INT32_MIN) /* This one is problematic */
647 return "INT32_MIN";
648 if (i32 == INT32_MAX)
649 return "INT32_MAX";
650 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
651 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
652 return pszBuf;
653}
654
655
656const char *GenFormatI32(int32_t const *pi32)
657{
658 return GenFormatI32(*pi32);
659}
660
661
662const char *GenFormatI16(int16_t i16)
663{
664 if (i16 == INT16_MIN) /* This one is problematic */
665 return "INT16_MIN";
666 if (i16 == INT16_MAX)
667 return "INT16_MAX";
668 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
669 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
670 return pszBuf;
671}
672
673
674const char *GenFormatI16(int16_t const *pi16)
675{
676 return GenFormatI16(*pi16);
677}
678
679
680static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
681{
682 /* We want to tag the generated source code with the revision that produced it. */
683 static char s_szRev[] = "$Revision: 94571 $";
684 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
685 size_t cchRev = 0;
686 while (RT_C_IS_DIGIT(pszRev[cchRev]))
687 cchRev++;
688
689 RTStrmPrintf(pOut,
690 "/* $Id: tstIEMAImpl.cpp 94571 2022-04-12 09:43:59Z vboxsync $ */\n"
691 "/** @file\n"
692 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
693 " */\n"
694 "\n"
695 "/*\n"
696 " * Copyright (C) 2022 Oracle Corporation\n"
697 " *\n"
698 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
699 " * available from http://www.215389.xyz. This file is free software;\n"
700 " * you can redistribute it and/or modify it under the terms of the GNU\n"
701 " * General Public License (GPL) as published by the Free Software\n"
702 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
703 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
704 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
705 " */\n"
706 "\n"
707 "#include \"tstIEMAImpl.h\"\n"
708 "\n"
709 ,
710 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
711}
712
713
714static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
715{
716 PRTSTREAM pOut = NULL;
717 int rc = RTStrmOpen(pszFilename, "w", &pOut);
718 if (RT_SUCCESS(rc))
719 {
720 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
721 return pOut;
722 }
723 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
724 return NULL;
725}
726
727
728static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
729{
730 RTStrmPrintf(pOut,
731 "\n"
732 "/* end of file */\n");
733 int rc = RTStrmClose(pOut);
734 if (RT_SUCCESS(rc))
735 return rcExit;
736 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
737}
738
739
740static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
741{
742 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
743}
744
745
746static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
747{
748 RTStrmPrintf(pOut,
749 "};\n"
750 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
751 "\n",
752 pszName, pszName);
753}
754
755#endif /* TSTIEMAIMPL_WITH_GENERATOR */
756
757
758/*
759 * Test helpers.
760 */
761static bool IsTestEnabled(const char *pszName)
762{
763 /* Process excludes first: */
764 uint32_t i = g_cExcludeTestPatterns;
765 while (i-- > 0)
766 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
767 return false;
768
769 /* If no include patterns, everything is included: */
770 i = g_cIncludeTestPatterns;
771 if (!i)
772 return true;
773
774 /* Otherwise only tests in the include patters gets tested: */
775 while (i-- > 0)
776 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
777 return true;
778
779 return false;
780}
781
782
783static bool SubTestAndCheckIfEnabled(const char *pszName)
784{
785 RTTestSub(g_hTest, pszName);
786 if (IsTestEnabled(pszName))
787 return true;
788 RTTestSkipped(g_hTest, "excluded");
789 return false;
790}
791
792
793static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
794{
795 if (fActual == fExpected)
796 return "";
797
798 uint32_t const fXor = fActual ^ fExpected;
799 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
800 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
801
802 static struct
803 {
804 const char *pszName;
805 uint32_t fFlag;
806 } const s_aFlags[] =
807 {
808#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
809 EFL_ENTRY(CF),
810 EFL_ENTRY(PF),
811 EFL_ENTRY(AF),
812 EFL_ENTRY(ZF),
813 EFL_ENTRY(SF),
814 EFL_ENTRY(TF),
815 EFL_ENTRY(IF),
816 EFL_ENTRY(DF),
817 EFL_ENTRY(OF),
818 EFL_ENTRY(IOPL),
819 EFL_ENTRY(NT),
820 EFL_ENTRY(RF),
821 EFL_ENTRY(VM),
822 EFL_ENTRY(AC),
823 EFL_ENTRY(VIF),
824 EFL_ENTRY(VIP),
825 EFL_ENTRY(ID),
826 };
827 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
828 if (s_aFlags[i].fFlag & fXor)
829 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
830 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
831 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
832 return pszBuf;
833}
834
835
836static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
837{
838 if (fActual == fExpected)
839 return "";
840
841 uint16_t const fXor = fActual ^ fExpected;
842 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
843 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
844
845 static struct
846 {
847 const char *pszName;
848 uint32_t fFlag;
849 } const s_aFlags[] =
850 {
851#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
852 FSW_ENTRY(IE),
853 FSW_ENTRY(DE),
854 FSW_ENTRY(ZE),
855 FSW_ENTRY(OE),
856 FSW_ENTRY(UE),
857 FSW_ENTRY(PE),
858 FSW_ENTRY(SF),
859 FSW_ENTRY(ES),
860 FSW_ENTRY(C0),
861 FSW_ENTRY(C1),
862 FSW_ENTRY(C2),
863 FSW_ENTRY(C3),
864 FSW_ENTRY(B),
865 };
866 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
867 if (s_aFlags[i].fFlag & fXor)
868 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
869 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
870 if (fXor & X86_FSW_TOP_MASK)
871 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
872 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
873 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
874 return pszBuf;
875}
876
877
878static const char *FormatFcw(uint16_t fFcw)
879{
880 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
881
882 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
883 switch (fFcw & X86_FCW_PC_MASK)
884 {
885 case X86_FCW_PC_24: pszPC = "PC24"; break;
886 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
887 case X86_FCW_PC_53: pszPC = "PC53"; break;
888 case X86_FCW_PC_64: pszPC = "PC64"; break;
889 }
890
891 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
892 switch (fFcw & X86_FCW_RC_MASK)
893 {
894 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
895 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
896 case X86_FCW_RC_UP: pszRC = "UP"; break;
897 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
898 }
899 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
900
901 static struct
902 {
903 const char *pszName;
904 uint32_t fFlag;
905 } const s_aFlags[] =
906 {
907#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
908 FCW_ENTRY(IM),
909 FCW_ENTRY(DM),
910 FCW_ENTRY(ZM),
911 FCW_ENTRY(OM),
912 FCW_ENTRY(UM),
913 FCW_ENTRY(PM),
914 { "6M", 64 },
915 };
916 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
917 if (fFcw & s_aFlags[i].fFlag)
918 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
919
920 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
921 return pszBuf;
922}
923
924
925static const char *FormatR80(PCRTFLOAT80U pr80)
926{
927 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
928 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
929 return pszBuf;
930}
931
932
933static const char *FormatR64(PCRTFLOAT64U pr64)
934{
935 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
936 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
937 return pszBuf;
938}
939
940
941static const char *FormatR32(PCRTFLOAT32U pr32)
942{
943 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
944 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
945 return pszBuf;
946}
947
948
949static const char *FormatD80(PCRTPBCD80U pd80)
950{
951 /* There is only one indefinite endcoding (same as for 80-bit
952 floating point), so get it out of the way first: */
953 if (RTPBCD80U_IS_INDEFINITE(pd80))
954 return "Ind";
955
956 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
957 size_t off = 0;
958 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
959 unsigned cBadDigits = 0;
960 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
961 while (iPair-- > 0)
962 {
963 static const char s_szDigits[] = "0123456789abcdef";
964 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
965 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
966 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
967 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
968 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
969 }
970 if (cBadDigits || pd80->s.uPad != 0)
971 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
972 pszBuf[off] = '\0';
973 return pszBuf;
974}
975
976
977#if 0
978static const char *FormatI64(int64_t const *piVal)
979{
980 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
981 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
982 return pszBuf;
983}
984#endif
985
986
987static const char *FormatI32(int32_t const *piVal)
988{
989 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
990 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
991 return pszBuf;
992}
993
994
995static const char *FormatI16(int16_t const *piVal)
996{
997 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
998 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
999 return pszBuf;
1000}
1001
1002
1003/*
1004 * Binary operations.
1005 */
1006TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1007TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1008TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1009TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1010
1011#ifdef TSTIEMAIMPL_WITH_GENERATOR
1012# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1013static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1014{ \
1015 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1016 { \
1017 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1018 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1019 PRTSTREAM pOutFn = pOut; \
1020 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1021 { \
1022 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1023 continue; \
1024 pOutFn = pOutCpu; \
1025 } \
1026 \
1027 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1028 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1029 { \
1030 a_TestType Test; \
1031 Test.fEflIn = RandEFlags(); \
1032 Test.fEflOut = Test.fEflIn; \
1033 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1034 Test.uDstOut = Test.uDstIn; \
1035 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1036 if (g_aBinU ## a_cBits[iFn].uExtra) \
1037 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1038 Test.uMisc = 0; \
1039 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1040 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1041 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1042 } \
1043 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1044 } \
1045}
1046#else
1047# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1048#endif
1049
1050#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1051GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1052\
1053static void BinU ## a_cBits ## Test(void) \
1054{ \
1055 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1056 { \
1057 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1058 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1059 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1060 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1061 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1062 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1063 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1064 { \
1065 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1066 { \
1067 uint32_t fEfl = paTests[iTest].fEflIn; \
1068 a_uType uDst = paTests[iTest].uDstIn; \
1069 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1070 if ( uDst != paTests[iTest].uDstOut \
1071 || fEfl != paTests[iTest].fEflOut) \
1072 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1073 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1074 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1075 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1076 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1077 else \
1078 { \
1079 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1080 *g_pfEfl = paTests[iTest].fEflIn; \
1081 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1082 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1083 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1084 } \
1085 } \
1086 pfn = a_aSubTests[iFn].pfnNative; \
1087 } \
1088 } \
1089}
1090
1091
1092/*
1093 * 8-bit binary operations.
1094 */
1095static const BINU8_T g_aBinU8[] =
1096{
1097 ENTRY(add_u8),
1098 ENTRY(add_u8_locked),
1099 ENTRY(adc_u8),
1100 ENTRY(adc_u8_locked),
1101 ENTRY(sub_u8),
1102 ENTRY(sub_u8_locked),
1103 ENTRY(sbb_u8),
1104 ENTRY(sbb_u8_locked),
1105 ENTRY(or_u8),
1106 ENTRY(or_u8_locked),
1107 ENTRY(xor_u8),
1108 ENTRY(xor_u8_locked),
1109 ENTRY(and_u8),
1110 ENTRY(and_u8_locked),
1111 ENTRY(cmp_u8),
1112 ENTRY(test_u8),
1113};
1114TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1115
1116
1117/*
1118 * 16-bit binary operations.
1119 */
1120static const BINU16_T g_aBinU16[] =
1121{
1122 ENTRY(add_u16),
1123 ENTRY(add_u16_locked),
1124 ENTRY(adc_u16),
1125 ENTRY(adc_u16_locked),
1126 ENTRY(sub_u16),
1127 ENTRY(sub_u16_locked),
1128 ENTRY(sbb_u16),
1129 ENTRY(sbb_u16_locked),
1130 ENTRY(or_u16),
1131 ENTRY(or_u16_locked),
1132 ENTRY(xor_u16),
1133 ENTRY(xor_u16_locked),
1134 ENTRY(and_u16),
1135 ENTRY(and_u16_locked),
1136 ENTRY(cmp_u16),
1137 ENTRY(test_u16),
1138 ENTRY_EX(bt_u16, 1),
1139 ENTRY_EX(btc_u16, 1),
1140 ENTRY_EX(btc_u16_locked, 1),
1141 ENTRY_EX(btr_u16, 1),
1142 ENTRY_EX(btr_u16_locked, 1),
1143 ENTRY_EX(bts_u16, 1),
1144 ENTRY_EX(bts_u16_locked, 1),
1145 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1146 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1147 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1148 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1149 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1150 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1151 ENTRY(arpl),
1152};
1153TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1154
1155
1156/*
1157 * 32-bit binary operations.
1158 */
1159static const BINU32_T g_aBinU32[] =
1160{
1161 ENTRY(add_u32),
1162 ENTRY(add_u32_locked),
1163 ENTRY(adc_u32),
1164 ENTRY(adc_u32_locked),
1165 ENTRY(sub_u32),
1166 ENTRY(sub_u32_locked),
1167 ENTRY(sbb_u32),
1168 ENTRY(sbb_u32_locked),
1169 ENTRY(or_u32),
1170 ENTRY(or_u32_locked),
1171 ENTRY(xor_u32),
1172 ENTRY(xor_u32_locked),
1173 ENTRY(and_u32),
1174 ENTRY(and_u32_locked),
1175 ENTRY(cmp_u32),
1176 ENTRY(test_u32),
1177 ENTRY_EX(bt_u32, 1),
1178 ENTRY_EX(btc_u32, 1),
1179 ENTRY_EX(btc_u32_locked, 1),
1180 ENTRY_EX(btr_u32, 1),
1181 ENTRY_EX(btr_u32_locked, 1),
1182 ENTRY_EX(bts_u32, 1),
1183 ENTRY_EX(bts_u32_locked, 1),
1184 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1185 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1186 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1187 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1188 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1189 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1190};
1191TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1192
1193
1194/*
1195 * 64-bit binary operations.
1196 */
1197static const BINU64_T g_aBinU64[] =
1198{
1199 ENTRY(add_u64),
1200 ENTRY(add_u64_locked),
1201 ENTRY(adc_u64),
1202 ENTRY(adc_u64_locked),
1203 ENTRY(sub_u64),
1204 ENTRY(sub_u64_locked),
1205 ENTRY(sbb_u64),
1206 ENTRY(sbb_u64_locked),
1207 ENTRY(or_u64),
1208 ENTRY(or_u64_locked),
1209 ENTRY(xor_u64),
1210 ENTRY(xor_u64_locked),
1211 ENTRY(and_u64),
1212 ENTRY(and_u64_locked),
1213 ENTRY(cmp_u64),
1214 ENTRY(test_u64),
1215 ENTRY_EX(bt_u64, 1),
1216 ENTRY_EX(btc_u64, 1),
1217 ENTRY_EX(btc_u64_locked, 1),
1218 ENTRY_EX(btr_u64, 1),
1219 ENTRY_EX(btr_u64_locked, 1),
1220 ENTRY_EX(bts_u64, 1),
1221 ENTRY_EX(bts_u64_locked, 1),
1222 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1223 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1224 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1225 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1226 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1227 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1228};
1229TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1230
1231
1232/*
1233 * XCHG
1234 */
1235static void XchgTest(void)
1236{
1237 if (!SubTestAndCheckIfEnabled("xchg"))
1238 return;
1239 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1240 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1241 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1242 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1243
1244 static struct
1245 {
1246 uint8_t cb; uint64_t fMask;
1247 union
1248 {
1249 uintptr_t pfn;
1250 FNIEMAIMPLXCHGU8 *pfnU8;
1251 FNIEMAIMPLXCHGU16 *pfnU16;
1252 FNIEMAIMPLXCHGU32 *pfnU32;
1253 FNIEMAIMPLXCHGU64 *pfnU64;
1254 } u;
1255 }
1256 s_aXchgWorkers[] =
1257 {
1258 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1259 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1260 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1261 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1262 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1263 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1264 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1265 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1266 };
1267 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1268 {
1269 RTUINT64U uIn1, uIn2, uMem, uDst;
1270 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1271 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1272 if (uIn1.u == uIn2.u)
1273 uDst.u = uIn2.u = ~uIn2.u;
1274
1275 switch (s_aXchgWorkers[i].cb)
1276 {
1277 case 1:
1278 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1279 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1280 break;
1281 case 2:
1282 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1283 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1284 break;
1285 case 4:
1286 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1287 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1288 break;
1289 case 8:
1290 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1291 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1292 break;
1293 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1294 }
1295
1296 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1297 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1298 }
1299}
1300
1301
1302/*
1303 * XADD
1304 */
1305static void XaddTest(void)
1306{
1307#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1308 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1309 static struct \
1310 { \
1311 const char *pszName; \
1312 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1313 BINU ## a_cBits ## _TEST_T const *paTests; \
1314 uint32_t const *pcTests; \
1315 } const s_aFuncs[] = \
1316 { \
1317 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1318 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1319 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1320 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1321 }; \
1322 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1323 { \
1324 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1325 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1326 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1327 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1328 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1329 { \
1330 uint32_t fEfl = paTests[iTest].fEflIn; \
1331 a_Type uSrc = paTests[iTest].uSrcIn; \
1332 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1333 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1334 if ( fEfl != paTests[iTest].fEflOut \
1335 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1336 || uSrc != paTests[iTest].uDstIn) \
1337 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1338 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1339 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1340 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1341 } \
1342 } \
1343 } while(0)
1344 TEST_XADD(8, uint8_t, "%#04x");
1345 TEST_XADD(16, uint16_t, "%#06x");
1346 TEST_XADD(32, uint32_t, "%#010RX32");
1347 TEST_XADD(64, uint64_t, "%#010RX64");
1348}
1349
1350
1351/*
1352 * CMPXCHG
1353 */
1354
1355static void CmpXchgTest(void)
1356{
1357#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1358 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1359 static struct \
1360 { \
1361 const char *pszName; \
1362 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1363 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1364 BINU ## a_cBits ## _TEST_T const *paTests; \
1365 uint32_t const *pcTests; \
1366 } const s_aFuncs[] = \
1367 { \
1368 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1369 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1370 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1371 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1372 }; \
1373 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1374 { \
1375 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1376 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1377 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1378 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1379 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1380 { \
1381 /* as is (99% likely to be negative). */ \
1382 uint32_t fEfl = paTests[iTest].fEflIn; \
1383 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1384 a_Type uA = paTests[iTest].uDstIn; \
1385 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1386 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1387 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1388 if ( fEfl != paTests[iTest].fEflOut \
1389 || *g_pu ## a_cBits != uExpect \
1390 || uA != paTests[iTest].uSrcIn) \
1391 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1392 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1393 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1394 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1395 /* positive */ \
1396 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1397 uA = paTests[iTest].uDstIn; \
1398 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1399 fEfl = paTests[iTest].fEflIn; \
1400 uA = paTests[iTest].uDstIn; \
1401 *g_pu ## a_cBits = uA; \
1402 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1403 if ( fEfl != fEflExpect \
1404 || *g_pu ## a_cBits != uNew \
1405 || uA != paTests[iTest].uDstIn) \
1406 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1407 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1408 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1409 EFlagsDiff(fEfl, fEflExpect)); \
1410 } \
1411 } \
1412 } while(0)
1413 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1414 TEST_CMPXCHG(16, uint16_t, "%#06x");
1415 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1416#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1417 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1418#endif
1419}
1420
1421static void CmpXchg8bTest(void)
1422{
1423 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1424 static struct
1425 {
1426 const char *pszName;
1427 FNIEMAIMPLCMPXCHG8B *pfn;
1428 } const s_aFuncs[] =
1429 {
1430 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1431 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1432 };
1433 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1434 {
1435 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1436 continue;
1437 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1438 {
1439 uint64_t const uOldValue = RandU64();
1440 uint64_t const uNewValue = RandU64();
1441
1442 /* positive test. */
1443 RTUINT64U uA, uB;
1444 uB.u = uNewValue;
1445 uA.u = uOldValue;
1446 *g_pu64 = uOldValue;
1447 uint32_t fEflIn = RandEFlags();
1448 uint32_t fEfl = fEflIn;
1449 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1450 if ( fEfl != (fEflIn | X86_EFL_ZF)
1451 || *g_pu64 != uNewValue
1452 || uA.u != uOldValue)
1453 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1454 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1455 fEfl, *g_pu64, uA.u,
1456 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1457 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1458
1459 /* negative */
1460 uint64_t const uExpect = ~uOldValue;
1461 *g_pu64 = uExpect;
1462 uA.u = uOldValue;
1463 uB.u = uNewValue;
1464 fEfl = fEflIn = RandEFlags();
1465 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1466 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1467 || *g_pu64 != uExpect
1468 || uA.u != uExpect)
1469 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1470 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1471 fEfl, *g_pu64, uA.u,
1472 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1473 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1474 }
1475 }
1476}
1477
1478static void CmpXchg16bTest(void)
1479{
1480 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1481 static struct
1482 {
1483 const char *pszName;
1484 FNIEMAIMPLCMPXCHG16B *pfn;
1485 } const s_aFuncs[] =
1486 {
1487 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1488 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1489#if !defined(RT_ARCH_ARM64)
1490 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1491#endif
1492 };
1493 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1494 {
1495 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1496 continue;
1497#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1498 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1499 {
1500 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1501 continue;
1502 }
1503#endif
1504 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1505 {
1506 RTUINT128U const uOldValue = RandU128();
1507 RTUINT128U const uNewValue = RandU128();
1508
1509 /* positive test. */
1510 RTUINT128U uA, uB;
1511 uB = uNewValue;
1512 uA = uOldValue;
1513 *g_pu128 = uOldValue;
1514 uint32_t fEflIn = RandEFlags();
1515 uint32_t fEfl = fEflIn;
1516 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1517 if ( fEfl != (fEflIn | X86_EFL_ZF)
1518 || g_pu128->s.Lo != uNewValue.s.Lo
1519 || g_pu128->s.Hi != uNewValue.s.Hi
1520 || uA.s.Lo != uOldValue.s.Lo
1521 || uA.s.Hi != uOldValue.s.Hi)
1522 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1523 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1524 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1525 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1526 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1527 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1528 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1529 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1530
1531 /* negative */
1532 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1533 *g_pu128 = uExpect;
1534 uA = uOldValue;
1535 uB = uNewValue;
1536 fEfl = fEflIn = RandEFlags();
1537 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1538 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1539 || g_pu128->s.Lo != uExpect.s.Lo
1540 || g_pu128->s.Hi != uExpect.s.Hi
1541 || uA.s.Lo != uExpect.s.Lo
1542 || uA.s.Hi != uExpect.s.Hi)
1543 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1544 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1545 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1546 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1547 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1548 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1549 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1550 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1551 }
1552 }
1553}
1554
1555
1556/*
1557 * Double shifts.
1558 *
1559 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1560 */
1561#ifdef TSTIEMAIMPL_WITH_GENERATOR
1562# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1563void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1564{ \
1565 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1566 { \
1567 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1568 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1569 continue; \
1570 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1571 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1572 { \
1573 a_TestType Test; \
1574 Test.fEflIn = RandEFlags(); \
1575 Test.fEflOut = Test.fEflIn; \
1576 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1577 Test.uDstOut = Test.uDstIn; \
1578 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1579 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1580 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1581 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1582 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1583 } \
1584 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1585 } \
1586}
1587#else
1588# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1589#endif
1590
1591#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1592TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1593\
1594static a_SubTestType const a_aSubTests[] = \
1595{ \
1596 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1597 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1598 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1599 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1600}; \
1601\
1602GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1603\
1604static void ShiftDblU ## a_cBits ## Test(void) \
1605{ \
1606 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1607 { \
1608 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1609 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1610 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1611 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1612 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1613 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1614 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1615 { \
1616 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1617 { \
1618 uint32_t fEfl = paTests[iTest].fEflIn; \
1619 a_Type uDst = paTests[iTest].uDstIn; \
1620 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1621 if ( uDst != paTests[iTest].uDstOut \
1622 || fEfl != paTests[iTest].fEflOut) \
1623 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1624 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1625 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1626 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1627 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1628 else \
1629 { \
1630 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1631 *g_pfEfl = paTests[iTest].fEflIn; \
1632 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1633 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1634 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1635 } \
1636 } \
1637 pfn = a_aSubTests[iFn].pfnNative; \
1638 } \
1639 } \
1640}
1641TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1642TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1643TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1644
1645#ifdef TSTIEMAIMPL_WITH_GENERATOR
1646static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1647{
1648 ShiftDblU16Generate(pOut, cTests);
1649 ShiftDblU32Generate(pOut, cTests);
1650 ShiftDblU64Generate(pOut, cTests);
1651}
1652#endif
1653
1654static void ShiftDblTest(void)
1655{
1656 ShiftDblU16Test();
1657 ShiftDblU32Test();
1658 ShiftDblU64Test();
1659}
1660
1661
1662/*
1663 * Unary operators.
1664 *
1665 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1666 */
1667#ifdef TSTIEMAIMPL_WITH_GENERATOR
1668# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1669void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1670{ \
1671 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1672 { \
1673 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1674 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1675 { \
1676 a_TestType Test; \
1677 Test.fEflIn = RandEFlags(); \
1678 Test.fEflOut = Test.fEflIn; \
1679 Test.uDstIn = RandU ## a_cBits(); \
1680 Test.uDstOut = Test.uDstIn; \
1681 Test.uSrcIn = 0; \
1682 Test.uMisc = 0; \
1683 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1684 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1685 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1686 } \
1687 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1688 } \
1689}
1690#else
1691# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1692#endif
1693
1694#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1695TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1696static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1697{ \
1698 ENTRY(inc_u ## a_cBits), \
1699 ENTRY(inc_u ## a_cBits ## _locked), \
1700 ENTRY(dec_u ## a_cBits), \
1701 ENTRY(dec_u ## a_cBits ## _locked), \
1702 ENTRY(not_u ## a_cBits), \
1703 ENTRY(not_u ## a_cBits ## _locked), \
1704 ENTRY(neg_u ## a_cBits), \
1705 ENTRY(neg_u ## a_cBits ## _locked), \
1706}; \
1707\
1708GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1709\
1710static void UnaryU ## a_cBits ## Test(void) \
1711{ \
1712 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1713 { \
1714 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1715 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1716 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1717 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1718 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1719 { \
1720 uint32_t fEfl = paTests[iTest].fEflIn; \
1721 a_Type uDst = paTests[iTest].uDstIn; \
1722 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1723 if ( uDst != paTests[iTest].uDstOut \
1724 || fEfl != paTests[iTest].fEflOut) \
1725 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1726 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1727 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1728 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1729 else \
1730 { \
1731 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1732 *g_pfEfl = paTests[iTest].fEflIn; \
1733 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1734 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1735 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1736 } \
1737 } \
1738 } \
1739}
1740TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1741TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1742TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1743TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1744
1745#ifdef TSTIEMAIMPL_WITH_GENERATOR
1746static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1747{
1748 UnaryU8Generate(pOut, cTests);
1749 UnaryU16Generate(pOut, cTests);
1750 UnaryU32Generate(pOut, cTests);
1751 UnaryU64Generate(pOut, cTests);
1752}
1753#endif
1754
1755static void UnaryTest(void)
1756{
1757 UnaryU8Test();
1758 UnaryU16Test();
1759 UnaryU32Test();
1760 UnaryU64Test();
1761}
1762
1763
1764/*
1765 * Shifts.
1766 *
1767 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
1768 */
1769#ifdef TSTIEMAIMPL_WITH_GENERATOR
1770# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1771void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1772{ \
1773 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1774 { \
1775 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1776 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1777 continue; \
1778 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1779 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1780 { \
1781 a_TestType Test; \
1782 Test.fEflIn = RandEFlags(); \
1783 Test.fEflOut = Test.fEflIn; \
1784 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1785 Test.uDstOut = Test.uDstIn; \
1786 Test.uSrcIn = 0; \
1787 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1788 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1789 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
1790 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1791 \
1792 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
1793 Test.fEflOut = Test.fEflIn; \
1794 Test.uDstOut = Test.uDstIn; \
1795 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1796 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
1797 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1798 } \
1799 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1800 } \
1801}
1802#else
1803# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1804#endif
1805
1806#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1807TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
1808static a_SubTestType const a_aSubTests[] = \
1809{ \
1810 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
1811 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
1812 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
1813 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
1814 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
1815 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
1816 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
1817 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
1818 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1819 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1820 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1821 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1822 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1823 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1824}; \
1825\
1826GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1827\
1828static void ShiftU ## a_cBits ## Test(void) \
1829{ \
1830 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1831 { \
1832 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1833 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1834 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1835 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1836 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1837 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1838 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1839 { \
1840 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1841 { \
1842 uint32_t fEfl = paTests[iTest].fEflIn; \
1843 a_Type uDst = paTests[iTest].uDstIn; \
1844 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
1845 if ( uDst != paTests[iTest].uDstOut \
1846 || fEfl != paTests[iTest].fEflOut ) \
1847 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1848 iTest, iVar == 0 ? "" : "/n", \
1849 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
1850 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1851 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1852 else \
1853 { \
1854 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1855 *g_pfEfl = paTests[iTest].fEflIn; \
1856 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
1857 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1858 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1859 } \
1860 } \
1861 pfn = a_aSubTests[iFn].pfnNative; \
1862 } \
1863 } \
1864}
1865TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
1866TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
1867TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
1868TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
1869
1870#ifdef TSTIEMAIMPL_WITH_GENERATOR
1871static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
1872{
1873 ShiftU8Generate(pOut, cTests);
1874 ShiftU16Generate(pOut, cTests);
1875 ShiftU32Generate(pOut, cTests);
1876 ShiftU64Generate(pOut, cTests);
1877}
1878#endif
1879
1880static void ShiftTest(void)
1881{
1882 ShiftU8Test();
1883 ShiftU16Test();
1884 ShiftU32Test();
1885 ShiftU64Test();
1886}
1887
1888
1889/*
1890 * Multiplication and division.
1891 *
1892 * Note! The 8-bit functions has a different format, so we need to duplicate things.
1893 * Note! Currently ignoring undefined bits.
1894 */
1895
1896/* U8 */
1897TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
1898static INT_MULDIV_U8_T const g_aMulDivU8[] =
1899{
1900 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
1901 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
1902 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
1903 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
1904 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
1905 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
1906 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
1907 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
1908 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
1909 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
1910};
1911
1912#ifdef TSTIEMAIMPL_WITH_GENERATOR
1913static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
1914{
1915 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
1916 {
1917 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
1918 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
1919 continue;
1920 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
1921 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
1922 {
1923 MULDIVU8_TEST_T Test;
1924 Test.fEflIn = RandEFlags();
1925 Test.fEflOut = Test.fEflIn;
1926 Test.uDstIn = RandU16Dst(iTest);
1927 Test.uDstOut = Test.uDstIn;
1928 Test.uSrcIn = RandU8Src(iTest);
1929 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
1930 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
1931 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
1932 }
1933 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
1934 }
1935}
1936#endif
1937
1938static void MulDivU8Test(void)
1939{
1940 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
1941 {
1942 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
1943 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
1944 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
1945 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
1946 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
1947 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
1948 if (!cTests) RTTestSkipped(g_hTest, "no tests");
1949 for (uint32_t iVar = 0; iVar < cVars; iVar++)
1950 {
1951 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
1952 {
1953 uint32_t fEfl = paTests[iTest].fEflIn;
1954 uint16_t uDst = paTests[iTest].uDstIn;
1955 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
1956 if ( uDst != paTests[iTest].uDstOut
1957 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
1958 || rc != paTests[iTest].rc)
1959 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
1960 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
1961 "%sexpected %#08x %#06RX16 %d%s\n",
1962 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
1963 iVar ? " " : "", fEfl, uDst, rc,
1964 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
1965 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
1966 else
1967 {
1968 *g_pu16 = paTests[iTest].uDstIn;
1969 *g_pfEfl = paTests[iTest].fEflIn;
1970 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
1971 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
1972 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
1973 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
1974 }
1975 }
1976 pfn = g_aMulDivU8[iFn].pfnNative;
1977 }
1978 }
1979}
1980
1981#ifdef TSTIEMAIMPL_WITH_GENERATOR
1982# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1983void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1984{ \
1985 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1986 { \
1987 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1988 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1989 continue; \
1990 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1991 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1992 { \
1993 a_TestType Test; \
1994 Test.fEflIn = RandEFlags(); \
1995 Test.fEflOut = Test.fEflIn; \
1996 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
1997 Test.uDst1Out = Test.uDst1In; \
1998 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
1999 Test.uDst2Out = Test.uDst2In; \
2000 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2001 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2002 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2003 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2004 Test.rc, iTest); \
2005 } \
2006 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2007 } \
2008}
2009#else
2010# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2011#endif
2012
2013#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2014TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2015static a_SubTestType const a_aSubTests [] = \
2016{ \
2017 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2018 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2019 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2020 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2021 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2022 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2023 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2024 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2025}; \
2026\
2027GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2028\
2029static void MulDivU ## a_cBits ## Test(void) \
2030{ \
2031 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2032 { \
2033 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2034 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2035 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2036 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2037 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2038 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2039 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2040 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2041 { \
2042 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2043 { \
2044 uint32_t fEfl = paTests[iTest].fEflIn; \
2045 a_Type uDst1 = paTests[iTest].uDst1In; \
2046 a_Type uDst2 = paTests[iTest].uDst2In; \
2047 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2048 if ( uDst1 != paTests[iTest].uDst1Out \
2049 || uDst2 != paTests[iTest].uDst2Out \
2050 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2051 || rc != paTests[iTest].rc) \
2052 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2053 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2054 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2055 iTest, iVar == 0 ? "" : "/n", \
2056 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2057 fEfl, uDst1, uDst2, rc, \
2058 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2059 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2060 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2061 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2062 else \
2063 { \
2064 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2065 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2066 *g_pfEfl = paTests[iTest].fEflIn; \
2067 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2068 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2069 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2070 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2071 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2072 } \
2073 } \
2074 pfn = a_aSubTests[iFn].pfnNative; \
2075 } \
2076 } \
2077}
2078TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2079TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2080TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2081
2082#ifdef TSTIEMAIMPL_WITH_GENERATOR
2083static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2084{
2085 MulDivU8Generate(pOut, cTests);
2086 MulDivU16Generate(pOut, cTests);
2087 MulDivU32Generate(pOut, cTests);
2088 MulDivU64Generate(pOut, cTests);
2089}
2090#endif
2091
2092static void MulDivTest(void)
2093{
2094 MulDivU8Test();
2095 MulDivU16Test();
2096 MulDivU32Test();
2097 MulDivU64Test();
2098}
2099
2100
2101/*
2102 * BSWAP
2103 */
2104static void BswapTest(void)
2105{
2106 if (SubTestAndCheckIfEnabled("bswap_u16"))
2107 {
2108 *g_pu32 = UINT32_C(0x12345678);
2109 iemAImpl_bswap_u16(g_pu32);
2110#if 0
2111 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2112#else
2113 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2114#endif
2115 *g_pu32 = UINT32_C(0xffff1122);
2116 iemAImpl_bswap_u16(g_pu32);
2117#if 0
2118 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2119#else
2120 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2121#endif
2122 }
2123
2124 if (SubTestAndCheckIfEnabled("bswap_u32"))
2125 {
2126 *g_pu32 = UINT32_C(0x12345678);
2127 iemAImpl_bswap_u32(g_pu32);
2128 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2129 }
2130
2131 if (SubTestAndCheckIfEnabled("bswap_u64"))
2132 {
2133 *g_pu64 = UINT64_C(0x0123456789abcdef);
2134 iemAImpl_bswap_u64(g_pu64);
2135 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2136 }
2137}
2138
2139
2140
2141/*********************************************************************************************************************************
2142* Floating point (x87 style) *
2143*********************************************************************************************************************************/
2144
2145/*
2146 * FPU constant loading.
2147 */
2148TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2149
2150static const FPU_LD_CONST_T g_aFpuLdConst[] =
2151{
2152 ENTRY(fld1),
2153 ENTRY(fldl2t),
2154 ENTRY(fldl2e),
2155 ENTRY(fldpi),
2156 ENTRY(fldlg2),
2157 ENTRY(fldln2),
2158 ENTRY(fldz),
2159};
2160
2161#ifdef TSTIEMAIMPL_WITH_GENERATOR
2162static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2163{
2164 X86FXSTATE State;
2165 RT_ZERO(State);
2166 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2167 {
2168 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2169 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2170 {
2171 State.FCW = RandFcw();
2172 State.FSW = RandFsw();
2173
2174 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2175 {
2176 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2177 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2178 g_aFpuLdConst[iFn].pfn(&State, &Res);
2179 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2180 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2181 }
2182 }
2183 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2184 }
2185}
2186#endif
2187
2188static void FpuLoadConstTest(void)
2189{
2190 /*
2191 * Inputs:
2192 * - FSW: C0, C1, C2, C3
2193 * - FCW: Exception masks, Precision control, Rounding control.
2194 *
2195 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2196 */
2197 X86FXSTATE State;
2198 RT_ZERO(State);
2199 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2200 {
2201 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2202 continue;
2203
2204 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2205 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2206 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2207 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2208 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2209 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2210 {
2211 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2212 {
2213 State.FCW = paTests[iTest].fFcw;
2214 State.FSW = paTests[iTest].fFswIn;
2215 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2216 pfn(&State, &Res);
2217 if ( Res.FSW != paTests[iTest].fFswOut
2218 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2219 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2220 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2221 Res.FSW, FormatR80(&Res.r80Result),
2222 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2223 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2224 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2225 FormatFcw(paTests[iTest].fFcw) );
2226 }
2227 pfn = g_aFpuLdConst[iFn].pfnNative;
2228 }
2229 }
2230}
2231
2232
2233/*
2234 * Load floating point values from memory.
2235 */
2236#ifdef TSTIEMAIMPL_WITH_GENERATOR
2237# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2238static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2239{ \
2240 X86FXSTATE State; \
2241 RT_ZERO(State); \
2242 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2243 { \
2244 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2245 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2246 { \
2247 State.FCW = RandFcw(); \
2248 State.FSW = RandFsw(); \
2249 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2250 \
2251 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2252 { \
2253 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2254 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2255 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2256 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2257 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2258 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2259 } \
2260 } \
2261 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2262 } \
2263}
2264#else
2265# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2266#endif
2267
2268#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2269typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2270typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2271TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2272\
2273static const a_SubTestType a_aSubTests[] = \
2274{ \
2275 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2276}; \
2277GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2278\
2279static void FpuLdR ## a_cBits ## Test(void) \
2280{ \
2281 X86FXSTATE State; \
2282 RT_ZERO(State); \
2283 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2284 { \
2285 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2286 \
2287 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2288 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2289 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2290 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2291 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2292 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2293 { \
2294 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2295 { \
2296 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2297 State.FCW = paTests[iTest].fFcw; \
2298 State.FSW = paTests[iTest].fFswIn; \
2299 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2300 pfn(&State, &Res, &InVal); \
2301 if ( Res.FSW != paTests[iTest].fFswOut \
2302 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2303 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2304 "%s -> fsw=%#06x %s\n" \
2305 "%s expected %#06x %s%s%s (%s)\n", \
2306 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2307 FormatR ## a_cBits(&paTests[iTest].InVal), \
2308 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2309 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2310 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2311 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2312 FormatFcw(paTests[iTest].fFcw) ); \
2313 } \
2314 pfn = a_aSubTests[iFn].pfnNative; \
2315 } \
2316 } \
2317}
2318
2319TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2320TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2321TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2322
2323#ifdef TSTIEMAIMPL_WITH_GENERATOR
2324static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2325{
2326 FpuLdR80Generate(pOut, cTests);
2327 FpuLdR64Generate(pOut, cTests);
2328 FpuLdR32Generate(pOut, cTests);
2329}
2330#endif
2331
2332static void FpuLdMemTest(void)
2333{
2334 FpuLdR80Test();
2335 FpuLdR64Test();
2336 FpuLdR32Test();
2337}
2338
2339
2340/*
2341 * Load integer values from memory.
2342 */
2343#ifdef TSTIEMAIMPL_WITH_GENERATOR
2344# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2345static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2346{ \
2347 X86FXSTATE State; \
2348 RT_ZERO(State); \
2349 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2350 { \
2351 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2352 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2353 { \
2354 State.FCW = RandFcw(); \
2355 State.FSW = RandFsw(); \
2356 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2357 \
2358 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2359 { \
2360 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2361 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2362 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2363 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2364 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2365 } \
2366 } \
2367 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2368 } \
2369}
2370#else
2371# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2372#endif
2373
2374#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2375typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2376typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2377TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2378\
2379static const a_SubTestType a_aSubTests[] = \
2380{ \
2381 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2382}; \
2383GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2384\
2385static void FpuLdI ## a_cBits ## Test(void) \
2386{ \
2387 X86FXSTATE State; \
2388 RT_ZERO(State); \
2389 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2390 { \
2391 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2392 \
2393 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2394 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2395 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2396 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2397 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2398 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2399 { \
2400 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2401 { \
2402 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2403 State.FCW = paTests[iTest].fFcw; \
2404 State.FSW = paTests[iTest].fFswIn; \
2405 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2406 pfn(&State, &Res, &iInVal); \
2407 if ( Res.FSW != paTests[iTest].fFswOut \
2408 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2409 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2410 "%s -> fsw=%#06x %s\n" \
2411 "%s expected %#06x %s%s%s (%s)\n", \
2412 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2413 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2414 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2415 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2416 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2417 FormatFcw(paTests[iTest].fFcw) ); \
2418 } \
2419 pfn = a_aSubTests[iFn].pfnNative; \
2420 } \
2421 } \
2422}
2423
2424TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2425TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2426TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2427
2428#ifdef TSTIEMAIMPL_WITH_GENERATOR
2429static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2430{
2431 FpuLdI64Generate(pOut, cTests);
2432 FpuLdI32Generate(pOut, cTests);
2433 FpuLdI16Generate(pOut, cTests);
2434}
2435#endif
2436
2437static void FpuLdIntTest(void)
2438{
2439 FpuLdI64Test();
2440 FpuLdI32Test();
2441 FpuLdI16Test();
2442}
2443
2444
2445/*
2446 * Load binary coded decimal values from memory.
2447 */
2448typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2449typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2450TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2451
2452static const FPU_LD_D80_T g_aFpuLdD80[] =
2453{
2454 ENTRY(fld_r80_from_d80)
2455};
2456
2457#ifdef TSTIEMAIMPL_WITH_GENERATOR
2458static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2459{
2460 X86FXSTATE State;
2461 RT_ZERO(State);
2462 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2463 {
2464 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2465 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2466 {
2467 State.FCW = RandFcw();
2468 State.FSW = RandFsw();
2469 RTPBCD80U InVal = RandD80Src(iTest);
2470
2471 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2472 {
2473 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2474 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2475 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2476 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2477 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2478 iTest, iRounding);
2479 }
2480 }
2481 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2482 }
2483}
2484#endif
2485
2486static void FpuLdD80Test(void)
2487{
2488 X86FXSTATE State;
2489 RT_ZERO(State);
2490 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2491 {
2492 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2493 continue;
2494
2495 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2496 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2497 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2498 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2499 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2500 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2501 {
2502 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2503 {
2504 RTPBCD80U const InVal = paTests[iTest].InVal;
2505 State.FCW = paTests[iTest].fFcw;
2506 State.FSW = paTests[iTest].fFswIn;
2507 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2508 pfn(&State, &Res, &InVal);
2509 if ( Res.FSW != paTests[iTest].fFswOut
2510 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2511 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2512 "%s -> fsw=%#06x %s\n"
2513 "%s expected %#06x %s%s%s (%s)\n",
2514 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2515 FormatD80(&paTests[iTest].InVal),
2516 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2517 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2518 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2519 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2520 FormatFcw(paTests[iTest].fFcw) );
2521 }
2522 pfn = g_aFpuLdD80[iFn].pfnNative;
2523 }
2524 }
2525}
2526
2527
2528/*
2529 * Store values floating point values to memory.
2530 */
2531#ifdef TSTIEMAIMPL_WITH_GENERATOR
2532static const RTFLOAT80U g_aFpuStR32Specials[] =
2533{
2534 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2535 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2536 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2537 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2538};
2539static const RTFLOAT80U g_aFpuStR64Specials[] =
2540{
2541 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2542 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2543 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2544 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2545 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2546};
2547static const RTFLOAT80U g_aFpuStR80Specials[] =
2548{
2549 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2550};
2551# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2552static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2553{ \
2554 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2555 X86FXSTATE State; \
2556 RT_ZERO(State); \
2557 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2558 { \
2559 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2560 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2561 { \
2562 uint16_t const fFcw = RandFcw(); \
2563 State.FSW = RandFsw(); \
2564 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2565 \
2566 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2567 { \
2568 /* PC doesn't influence these, so leave as is. */ \
2569 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2570 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2571 { \
2572 uint16_t uFswOut = 0; \
2573 a_rdType OutVal; \
2574 RT_ZERO(OutVal); \
2575 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2576 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2577 | (iRounding << X86_FCW_RC_SHIFT); \
2578 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2579 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2580 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2581 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2582 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2583 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2584 } \
2585 } \
2586 } \
2587 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2588 } \
2589}
2590#else
2591# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2592#endif
2593
2594#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2595typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2596 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2597typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2598TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2599\
2600static const a_SubTestType a_aSubTests[] = \
2601{ \
2602 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2603}; \
2604GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2605\
2606static void FpuStR ## a_cBits ## Test(void) \
2607{ \
2608 X86FXSTATE State; \
2609 RT_ZERO(State); \
2610 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2611 { \
2612 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2613 \
2614 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2615 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2616 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2617 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2618 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2619 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2620 { \
2621 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2622 { \
2623 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2624 uint16_t uFswOut = 0; \
2625 a_rdType OutVal; \
2626 RT_ZERO(OutVal); \
2627 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2628 State.FCW = paTests[iTest].fFcw; \
2629 State.FSW = paTests[iTest].fFswIn; \
2630 pfn(&State, &uFswOut, &OutVal, &InVal); \
2631 if ( uFswOut != paTests[iTest].fFswOut \
2632 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2633 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2634 "%s -> fsw=%#06x %s\n" \
2635 "%s expected %#06x %s%s%s (%s)\n", \
2636 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2637 FormatR80(&paTests[iTest].InVal), \
2638 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2639 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2640 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2641 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2642 FormatFcw(paTests[iTest].fFcw) ); \
2643 } \
2644 pfn = a_aSubTests[iFn].pfnNative; \
2645 } \
2646 } \
2647}
2648
2649TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2650TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2651TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2652
2653#ifdef TSTIEMAIMPL_WITH_GENERATOR
2654static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2655{
2656 FpuStR80Generate(pOut, cTests);
2657 FpuStR64Generate(pOut, cTests);
2658 FpuStR32Generate(pOut, cTests);
2659}
2660#endif
2661
2662static void FpuStMemTest(void)
2663{
2664 FpuStR80Test();
2665 FpuStR64Test();
2666 FpuStR32Test();
2667}
2668
2669
2670/*
2671 * Store integer values to memory or register.
2672 */
2673TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2674TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2675TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2676
2677static const FPU_ST_I16_T g_aFpuStI16[] =
2678{
2679 ENTRY(fist_r80_to_i16),
2680 ENTRY_AMD( fistt_r80_to_i16, 0),
2681 ENTRY_INTEL(fistt_r80_to_i16, 0),
2682};
2683static const FPU_ST_I32_T g_aFpuStI32[] =
2684{
2685 ENTRY(fist_r80_to_i32),
2686 ENTRY(fistt_r80_to_i32),
2687};
2688static const FPU_ST_I64_T g_aFpuStI64[] =
2689{
2690 ENTRY(fist_r80_to_i64),
2691 ENTRY(fistt_r80_to_i64),
2692};
2693
2694#ifdef TSTIEMAIMPL_WITH_GENERATOR
2695static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2696{
2697 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2698 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2699 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2700 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2701 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2702 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2703 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2704 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2705 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2706 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2707 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2708 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2709 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2710 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2711 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2712 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2713 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2714 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2715 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2716 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2717 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2718 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2719 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2720 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2721 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2722 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2723 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2724 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2725 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2726 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2727 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2728 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2729 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2730 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2731 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2732 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2733 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2734 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2735 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2736 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2737 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2738 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2739 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2740 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2741 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2742 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2743 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2744};
2745static const RTFLOAT80U g_aFpuStI32Specials[] =
2746{
2747 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2748 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2749 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2750 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2751 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2752 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2753 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2754 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2755 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2756 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2757 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2758 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2759 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2760 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2761 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2762 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2763};
2764static const RTFLOAT80U g_aFpuStI64Specials[] =
2765{
2766 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
2767 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
2768 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2769 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2770 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2771 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2772 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2773 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
2774 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2775 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2776 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2777 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2778 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2779 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2780 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2781 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2782 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
2783};
2784
2785# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2786static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
2787{ \
2788 X86FXSTATE State; \
2789 RT_ZERO(State); \
2790 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2791 { \
2792 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
2793 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
2794 PRTSTREAM pOutFn = pOut; \
2795 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
2796 { \
2797 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2798 continue; \
2799 pOutFn = pOutCpu; \
2800 } \
2801 \
2802 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
2803 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
2804 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2805 { \
2806 uint16_t const fFcw = RandFcw(); \
2807 State.FSW = RandFsw(); \
2808 RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex(a_cBits, true) \
2809 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
2810 \
2811 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2812 { \
2813 /* PC doesn't influence these, so leave as is. */ \
2814 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2815 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2816 { \
2817 uint16_t uFswOut = 0; \
2818 a_iType iOutVal = ~(a_iType)2; \
2819 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2820 | (iRounding << X86_FCW_RC_SHIFT); \
2821 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2822 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2823 pfn(&State, &uFswOut, &iOutVal, &InVal); \
2824 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2825 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2826 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
2827 } \
2828 } \
2829 } \
2830 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
2831 } \
2832}
2833#else
2834# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
2835#endif
2836
2837#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
2838GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2839\
2840static void FpuStI ## a_cBits ## Test(void) \
2841{ \
2842 X86FXSTATE State; \
2843 RT_ZERO(State); \
2844 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2845 { \
2846 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2847 \
2848 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2849 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2850 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2851 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2852 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2853 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2854 { \
2855 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2856 { \
2857 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2858 uint16_t uFswOut = 0; \
2859 a_iType iOutVal = ~(a_iType)2; \
2860 State.FCW = paTests[iTest].fFcw; \
2861 State.FSW = paTests[iTest].fFswIn; \
2862 pfn(&State, &uFswOut, &iOutVal, &InVal); \
2863 if ( uFswOut != paTests[iTest].fFswOut \
2864 || iOutVal != paTests[iTest].iOutVal) \
2865 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2866 "%s -> fsw=%#06x " a_szFmt "\n" \
2867 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
2868 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2869 FormatR80(&paTests[iTest].InVal), \
2870 iVar ? " " : "", uFswOut, iOutVal, \
2871 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
2872 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2873 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
2874 } \
2875 pfn = a_aSubTests[iFn].pfnNative; \
2876 } \
2877 } \
2878}
2879
2880//fistt_r80_to_i16 diffs for AMD, of course :-)
2881
2882TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
2883TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
2884TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
2885
2886#ifdef TSTIEMAIMPL_WITH_GENERATOR
2887static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
2888{
2889 FpuStI64Generate(pOut, pOutCpu, cTests);
2890 FpuStI32Generate(pOut, pOutCpu, cTests);
2891 FpuStI16Generate(pOut, pOutCpu, cTests);
2892}
2893#endif
2894
2895static void FpuStIntTest(void)
2896{
2897 FpuStI64Test();
2898 FpuStI32Test();
2899 FpuStI16Test();
2900}
2901
2902
2903/*
2904 * Store as packed BCD value (memory).
2905 */
2906typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
2907typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
2908TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
2909
2910static const FPU_ST_D80_T g_aFpuStD80[] =
2911{
2912 ENTRY(fst_r80_to_d80),
2913};
2914
2915#ifdef TSTIEMAIMPL_WITH_GENERATOR
2916static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
2917{
2918 static RTFLOAT80U const s_aSpecials[] =
2919 {
2920 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
2921 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
2922 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
2923 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
2924 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
2925 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
2926 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
2927 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
2928 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
2929 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
2930 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
2931 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
2932 };
2933
2934 X86FXSTATE State;
2935 RT_ZERO(State);
2936 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
2937 {
2938 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
2939 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
2940 {
2941 uint16_t const fFcw = RandFcw();
2942 State.FSW = RandFsw();
2943 RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex(59, true) : s_aSpecials[iTest - cTests];
2944
2945 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2946 {
2947 /* PC doesn't influence these, so leave as is. */
2948 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
2949 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
2950 {
2951 uint16_t uFswOut = 0;
2952 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
2953 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
2954 | (iRounding << X86_FCW_RC_SHIFT);
2955 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
2956 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
2957 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
2958 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
2959 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
2960 GenFormatD80(&OutVal), iTest, iRounding, iMask);
2961 }
2962 }
2963 }
2964 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
2965 }
2966}
2967#endif
2968
2969
2970static void FpuStD80Test(void)
2971{
2972 X86FXSTATE State;
2973 RT_ZERO(State);
2974 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
2975 {
2976 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
2977 continue;
2978
2979 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
2980 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
2981 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
2982 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
2983 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2984 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2985 {
2986 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2987 {
2988 RTFLOAT80U const InVal = paTests[iTest].InVal;
2989 uint16_t uFswOut = 0;
2990 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
2991 State.FCW = paTests[iTest].fFcw;
2992 State.FSW = paTests[iTest].fFswIn;
2993 pfn(&State, &uFswOut, &OutVal, &InVal);
2994 if ( uFswOut != paTests[iTest].fFswOut
2995 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
2996 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
2997 "%s -> fsw=%#06x %s\n"
2998 "%s expected %#06x %s%s%s (%s)\n",
2999 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3000 FormatR80(&paTests[iTest].InVal),
3001 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3002 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3003 FswDiff(uFswOut, paTests[iTest].fFswOut),
3004 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3005 FormatFcw(paTests[iTest].fFcw) );
3006 }
3007 pfn = g_aFpuStD80[iFn].pfnNative;
3008 }
3009 }
3010}
3011
3012
3013
3014/*********************************************************************************************************************************
3015* x87 FPU Binary Operations *
3016*********************************************************************************************************************************/
3017
3018/*
3019 * Binary FPU operations on two 80-bit floating point values.
3020 */
3021TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3022
3023static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3024{
3025 ENTRY(fadd_r80_by_r80),
3026 ENTRY(fsub_r80_by_r80),
3027 ENTRY(fsubr_r80_by_r80),
3028 ENTRY(fmul_r80_by_r80),
3029 ENTRY(fdiv_r80_by_r80),
3030 ENTRY(fdivr_r80_by_r80),
3031 ENTRY(fprem_r80_by_r80),
3032 ENTRY(fprem1_r80_by_r80),
3033 ENTRY(fscale_r80_by_r80),
3034 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3035 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3036 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3037 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3038 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3039 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3040};
3041
3042#ifdef TSTIEMAIMPL_WITH_GENERATOR
3043static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3044{
3045 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3046 {
3047 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3048 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3049 };
3050
3051 X86FXSTATE State;
3052 RT_ZERO(State);
3053 uint32_t cMinNormalPairs = cTests / 4;
3054 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3055 {
3056 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3057 PRTSTREAM pOutFn = pOut;
3058 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3059 {
3060 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3061 continue;
3062 pOutFn = pOutCpu;
3063 }
3064
3065 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3066 uint32_t cNormalInputPairs = 0;
3067 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3068 {
3069 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests].Val1;
3070 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests].Val2;
3071 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3072 cNormalInputPairs++;
3073 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3074 {
3075 iTest -= 1;
3076 continue;
3077 }
3078
3079 uint16_t const fFcw = RandFcw();
3080 State.FSW = RandFsw();
3081
3082 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3083 {
3084 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3085 {
3086 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3087 {
3088 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3089 | (iRounding << X86_FCW_RC_SHIFT)
3090 | (iPrecision << X86_FCW_PC_SHIFT)
3091 | iMask;
3092 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3093 pfn(&State, &Res, &InVal1, &InVal2);
3094 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n",
3095 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3096 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3097 }
3098 }
3099 }
3100 }
3101 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3102 }
3103}
3104#endif
3105
3106
3107static void FpuBinaryR80Test(void)
3108{
3109 X86FXSTATE State;
3110 RT_ZERO(State);
3111 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3112 {
3113 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3114 continue;
3115
3116 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3117 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3118 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3119 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3120 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3121 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3122 {
3123 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3124 {
3125 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3126 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3127 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3128 State.FCW = paTests[iTest].fFcw;
3129 State.FSW = paTests[iTest].fFswIn;
3130 pfn(&State, &Res, &InVal1, &InVal2);
3131 if ( Res.FSW != paTests[iTest].fFswOut
3132 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3133 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3134 "%s -> fsw=%#06x %s\n"
3135 "%s expected %#06x %s%s%s (%s)\n",
3136 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3137 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3138 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3139 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3140 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3141 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3142 FormatFcw(paTests[iTest].fFcw) );
3143 }
3144 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3145 }
3146 }
3147}
3148
3149
3150/*
3151 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3152 */
3153#define int64_t_IS_NORMAL(a) 1
3154#define int32_t_IS_NORMAL(a) 1
3155#define int16_t_IS_NORMAL(a) 1
3156
3157#ifdef TSTIEMAIMPL_WITH_GENERATOR
3158static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3159{
3160 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3161 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3162};
3163static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3164{
3165 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3166 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3167};
3168static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3169{
3170 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3171};
3172static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3173{
3174 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3175};
3176
3177# define GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3178static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3179{ \
3180 X86FXSTATE State; \
3181 RT_ZERO(State); \
3182 uint32_t cMinNormalPairs = cTests / 4; \
3183 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3184 { \
3185 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3186 uint32_t cNormalInputPairs = 0; \
3187 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3188 { \
3189 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() \
3190 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3191 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src(a_cBits) \
3192 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3193 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3194 cNormalInputPairs++; \
3195 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3196 { \
3197 iTest -= 1; \
3198 continue; \
3199 } \
3200 \
3201 uint16_t const fFcw = RandFcw(); \
3202 State.FSW = RandFsw(); \
3203 \
3204 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3205 { \
3206 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3207 { \
3208 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3209 { \
3210 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3211 | (iRounding << X86_FCW_RC_SHIFT) \
3212 | (iPrecision << X86_FCW_PC_SHIFT) \
3213 | iMask; \
3214 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3215 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3216 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3217 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3218 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3219 } \
3220 } \
3221 } \
3222 } \
3223 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3224 } \
3225}
3226#else
3227# define GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3228#endif
3229
3230#define TEST_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3231TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3232\
3233static const a_SubTestType a_aSubTests[] = \
3234{ \
3235 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3236 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3237 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3238 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3239 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3240 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3241}; \
3242\
3243GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3244\
3245static void FpuBinary ## a_UpBits ## Test(void) \
3246{ \
3247 X86FXSTATE State; \
3248 RT_ZERO(State); \
3249 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3250 { \
3251 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3252 \
3253 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3254 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3255 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3256 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3257 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3258 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3259 { \
3260 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3261 { \
3262 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3263 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3264 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3265 State.FCW = paTests[iTest].fFcw; \
3266 State.FSW = paTests[iTest].fFswIn; \
3267 pfn(&State, &Res, &InVal1, &InVal2); \
3268 if ( Res.FSW != paTests[iTest].fFswOut \
3269 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3270 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3271 "%s -> fsw=%#06x %s\n" \
3272 "%s expected %#06x %s%s%s (%s)\n", \
3273 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3274 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3275 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3276 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3277 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3278 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3279 FormatFcw(paTests[iTest].fFcw) ); \
3280 } \
3281 pfn = a_aSubTests[iFn].pfnNative; \
3282 } \
3283 } \
3284}
3285
3286TEST_FPU_BINARY_SMALL(64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3287TEST_FPU_BINARY_SMALL(32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3288TEST_FPU_BINARY_SMALL(32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3289TEST_FPU_BINARY_SMALL(16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3290
3291
3292/*
3293 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3294 */
3295#ifdef TSTIEMAIMPL_WITH_GENERATOR
3296static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3297{
3298 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3299 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3300};
3301static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3302{
3303 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3304 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3305};
3306static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3307{
3308 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3309 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3310};
3311static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3312{
3313 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3314};
3315static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3316{
3317 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3318};
3319
3320# define GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3321static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3322{ \
3323 X86FXSTATE State; \
3324 RT_ZERO(State); \
3325 uint32_t cMinNormalPairs = cTests / 4; \
3326 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3327 { \
3328 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3329 uint32_t cNormalInputPairs = 0; \
3330 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3331 { \
3332 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() \
3333 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3334 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src(a_cBits) \
3335 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3336 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3337 cNormalInputPairs++; \
3338 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3339 { \
3340 iTest -= 1; \
3341 continue; \
3342 } \
3343 \
3344 uint16_t const fFcw = RandFcw(); \
3345 State.FSW = RandFsw(); \
3346 \
3347 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3348 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3349 { \
3350 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3351 uint16_t fFswOut = 0; \
3352 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3353 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3354 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3355 iTest, iMask ? 'c' : 'u'); \
3356 } \
3357 } \
3358 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3359 } \
3360}
3361#else
3362# define GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3363#endif
3364
3365#define TEST_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3366TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3367\
3368static const a_SubTestType a_aSubTests[] = \
3369{ \
3370 __VA_ARGS__ \
3371}; \
3372\
3373GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3374\
3375static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3376{ \
3377 X86FXSTATE State; \
3378 RT_ZERO(State); \
3379 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3380 { \
3381 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3382 \
3383 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3384 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3385 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3386 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3387 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3388 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3389 { \
3390 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3391 { \
3392 uint16_t fFswOut = 0; \
3393 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3394 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3395 State.FCW = paTests[iTest].fFcw; \
3396 State.FSW = paTests[iTest].fFswIn; \
3397 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3398 if (fFswOut != paTests[iTest].fFswOut) \
3399 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3400 "%s -> fsw=%#06x\n" \
3401 "%s expected %#06x %s (%s)\n", \
3402 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3403 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3404 iVar ? " " : "", fFswOut, \
3405 iVar ? " " : "", paTests[iTest].fFswOut, \
3406 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3407 } \
3408 pfn = a_aSubTests[iFn].pfnNative; \
3409 } \
3410 } \
3411}
3412
3413TEST_FPU_BINARY_FSW(80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3414TEST_FPU_BINARY_FSW(64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3415TEST_FPU_BINARY_FSW(32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3416TEST_FPU_BINARY_FSW(32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3417TEST_FPU_BINARY_FSW(16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3418
3419
3420/*
3421 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3422 */
3423TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3424
3425static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3426{
3427 ENTRY(fcomi_r80_by_r80),
3428 ENTRY(fucomi_r80_by_r80),
3429};
3430
3431#ifdef TSTIEMAIMPL_WITH_GENERATOR
3432static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3433{
3434 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3435 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3436};
3437
3438static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3439{
3440 X86FXSTATE State;
3441 RT_ZERO(State);
3442 uint32_t cMinNormalPairs = cTests / 4;
3443 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3444 {
3445 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3446 uint32_t cNormalInputPairs = 0;
3447 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3448 {
3449 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3450 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Ex() : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3451 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3452 cNormalInputPairs++;
3453 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3454 {
3455 iTest -= 1;
3456 continue;
3457 }
3458
3459 uint16_t const fFcw = RandFcw();
3460 State.FSW = RandFsw();
3461
3462 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3463 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3464 {
3465 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3466 uint16_t uFswOut = 0;
3467 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3468 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3469 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3470 iTest, iMask ? 'c' : 'u');
3471 }
3472 }
3473 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3474 }
3475}
3476#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3477
3478static void FpuBinaryEflR80Test(void)
3479{
3480 X86FXSTATE State;
3481 RT_ZERO(State);
3482 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3483 {
3484 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3485 continue;
3486
3487 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3488 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3489 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3490 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3491 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3492 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3493 {
3494 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3495 {
3496 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3497 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3498 State.FCW = paTests[iTest].fFcw;
3499 State.FSW = paTests[iTest].fFswIn;
3500 uint16_t uFswOut = 0;
3501 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3502 if ( uFswOut != paTests[iTest].fFswOut
3503 || fEflOut != paTests[iTest].fEflOut)
3504 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3505 "%s -> fsw=%#06x efl=%#08x\n"
3506 "%s expected %#06x %#08x %s (%s)\n",
3507 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3508 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3509 iVar ? " " : "", uFswOut, fEflOut,
3510 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3511 EFlagsDiff(fEflOut, paTests[iTest].fEflOut), FormatFcw(paTests[iTest].fFcw));
3512 }
3513 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3514 }
3515 }
3516}
3517
3518
3519/*********************************************************************************************************************************
3520* x87 FPU Unary Operations *
3521*********************************************************************************************************************************/
3522
3523/*
3524 * Unary FPU operations on one 80-bit floating point value.
3525 *
3526 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3527 * a rounding error or not.
3528 */
3529TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3530
3531enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3532static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3533{
3534 ENTRY_EX( fabs_r80, kUnary_Accurate),
3535 ENTRY_EX( fchs_r80, kUnary_Accurate),
3536 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3537 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3538 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3539 ENTRY_EX( frndint_r80, kUnary_Accurate),
3540 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3541 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3542 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3543 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3544};
3545
3546#ifdef TSTIEMAIMPL_WITH_GENERATOR
3547
3548static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3549{
3550 if ( enmKind == kUnary_Rounding_F2xm1
3551 && RTFLOAT80U_IS_NORMAL(pr80Val)
3552 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3553 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3554 return true;
3555 return false;
3556}
3557
3558static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3559{
3560 static RTFLOAT80U const s_aSpecials[] =
3561 {
3562 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3563 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3564 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3565 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3566 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3567 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3568 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3569 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3570 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3571 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3572 };
3573 X86FXSTATE State;
3574 RT_ZERO(State);
3575 uint32_t cMinNormals = cTests / 4;
3576 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3577 {
3578 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3579 PRTSTREAM pOutFn = pOut;
3580 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3581 {
3582 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3583 continue;
3584 pOutFn = pOutCpu;
3585 }
3586
3587 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3588 uint32_t iTestOutput = 0;
3589 uint32_t cNormalInputs = 0;
3590 uint32_t cTargetRangeInputs = 0;
3591 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3592 {
3593 RTFLOAT80U InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3594 if (RTFLOAT80U_IS_NORMAL(&InVal))
3595 {
3596 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3597 {
3598 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3599 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
3600 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3601 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3602 cTargetRangeInputs++;
3603 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3604 {
3605 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3606 cTargetRangeInputs++;
3607 }
3608 }
3609 cNormalInputs++;
3610 }
3611 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3612 {
3613 iTest -= 1;
3614 continue;
3615 }
3616
3617 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
3618 uint16_t const fFcw = RandFcw();
3619 State.FSW = RandFsw();
3620
3621 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3622 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3623 {
3624 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3625 | (iRounding << X86_FCW_RC_SHIFT)
3626 | (iPrecision << X86_FCW_PC_SHIFT)
3627 | X86_FCW_MASK_ALL;
3628 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3629 pfn(&State, &ResM, &InVal);
3630 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3631 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
3632 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3633
3634 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3635 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3636 pfn(&State, &ResU, &InVal);
3637 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3638 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
3639 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3640
3641 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3642 if (fXcpt)
3643 {
3644 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3645 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3646 pfn(&State, &Res1, &InVal);
3647 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3648 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
3649 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3650 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3651 {
3652 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3653 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3654 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3655 pfn(&State, &Res2, &InVal);
3656 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3657 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
3658 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3659 }
3660 if (!RT_IS_POWER_OF_TWO(fXcpt))
3661 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3662 if (fUnmasked & fXcpt)
3663 {
3664 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3665 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3666 pfn(&State, &Res3, &InVal);
3667 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3668 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
3669 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3670 }
3671 }
3672 }
3673 }
3674 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
3675 }
3676}
3677#endif
3678
3679static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
3680{
3681 if (fFcw1 == fFcw2)
3682 return true;
3683 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
3684 {
3685 *pfRndErr = true;
3686 return true;
3687 }
3688 return false;
3689}
3690
3691static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
3692{
3693 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
3694 return true;
3695 if ( fRndErrOk
3696 && pr80Val1->s.fSign == pr80Val2->s.fSign)
3697 {
3698 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
3699 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
3700 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
3701 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
3702 ||
3703 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
3704 && pr80Val1->s.uMantissa == UINT64_MAX
3705 && pr80Val2->s.uMantissa == RT_BIT_64(63))
3706 ||
3707 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
3708 && pr80Val2->s.uMantissa == UINT64_MAX
3709 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
3710 {
3711 *pfRndErr = true;
3712 return true;
3713 }
3714 }
3715 return false;
3716}
3717
3718
3719static void FpuUnaryR80Test(void)
3720{
3721 X86FXSTATE State;
3722 RT_ZERO(State);
3723 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3724 {
3725 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
3726 continue;
3727
3728 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
3729 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
3730 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
3731 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
3732 uint32_t cRndErrs = 0;
3733 uint32_t cPossibleRndErrs = 0;
3734 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3735 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3736 {
3737 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3738 {
3739 RTFLOAT80U const InVal = paTests[iTest].InVal;
3740 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3741 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
3742 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
3743 State.FSW = paTests[iTest].fFswIn;
3744 pfn(&State, &Res, &InVal);
3745 bool fRndErr = false;
3746 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
3747 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
3748 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3749 "%s -> fsw=%#06x %s\n"
3750 "%s expected %#06x %s%s%s%s (%s)\n",
3751 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3752 FormatR80(&paTests[iTest].InVal),
3753 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3754 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3755 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3756 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3757 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
3758 cRndErrs += fRndErr;
3759 cPossibleRndErrs += fRndErrOk;
3760 }
3761 pfn = g_aFpuUnaryR80[iFn].pfnNative;
3762 }
3763 if (cPossibleRndErrs > 0)
3764 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
3765 }
3766}
3767
3768
3769/*
3770 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
3771 */
3772TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
3773
3774static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
3775{
3776 ENTRY(ftst_r80),
3777 ENTRY_EX(fxam_r80, 1),
3778};
3779
3780#ifdef TSTIEMAIMPL_WITH_GENERATOR
3781static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3782{
3783 static RTFLOAT80U const s_aSpecials[] =
3784 {
3785 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3786 };
3787
3788 X86FXSTATE State;
3789 RT_ZERO(State);
3790 uint32_t cMinNormals = cTests / 4;
3791 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3792 {
3793 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
3794 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
3795 PRTSTREAM pOutFn = pOut;
3796 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3797 {
3798 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3799 continue;
3800 pOutFn = pOutCpu;
3801 }
3802 State.FTW = 0;
3803
3804 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3805 uint32_t cNormalInputs = 0;
3806 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3807 {
3808 RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3809 if (RTFLOAT80U_IS_NORMAL(&InVal))
3810 cNormalInputs++;
3811 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3812 {
3813 iTest -= 1;
3814 continue;
3815 }
3816
3817 uint16_t const fFcw = RandFcw();
3818 State.FSW = RandFsw();
3819 if (!fIsFxam)
3820 {
3821 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3822 {
3823 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3824 {
3825 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3826 {
3827 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3828 | (iRounding << X86_FCW_RC_SHIFT)
3829 | (iPrecision << X86_FCW_PC_SHIFT)
3830 | iMask;
3831 uint16_t fFswOut = 0;
3832 pfn(&State, &fFswOut, &InVal);
3833 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
3834 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
3835 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3836 }
3837 }
3838 }
3839 }
3840 else
3841 {
3842 uint16_t fFswOut = 0;
3843 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
3844 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
3845 State.FCW = fFcw;
3846 pfn(&State, &fFswOut, &InVal);
3847 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
3848 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
3849 }
3850 }
3851 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
3852 }
3853}
3854#endif
3855
3856
3857static void FpuUnaryFswR80Test(void)
3858{
3859 X86FXSTATE State;
3860 RT_ZERO(State);
3861 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3862 {
3863 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
3864 continue;
3865
3866 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
3867 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
3868 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
3869 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
3870 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3871 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3872 {
3873 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3874 {
3875 RTFLOAT80U const InVal = paTests[iTest].InVal;
3876 uint16_t fFswOut = 0;
3877 State.FSW = paTests[iTest].fFswIn;
3878 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
3879 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
3880 pfn(&State, &fFswOut, &InVal);
3881 if (fFswOut != paTests[iTest].fFswOut)
3882 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3883 "%s -> fsw=%#06x\n"
3884 "%s expected %#06x %s (%s%s)\n",
3885 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3886 FormatR80(&paTests[iTest].InVal),
3887 iVar ? " " : "", fFswOut,
3888 iVar ? " " : "", paTests[iTest].fFswOut,
3889 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
3890 paTests[iTest].fFcw & 0x80 ? " empty" : "");
3891 }
3892 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
3893 }
3894 }
3895}
3896
3897/*
3898 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
3899 */
3900TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
3901
3902static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
3903{
3904 ENTRY(fxtract_r80_r80),
3905 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
3906 ENTRY_INTEL(fptan_r80_r80, 0),
3907 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
3908 ENTRY_INTEL(fsincos_r80_r80, 0),
3909};
3910
3911#ifdef TSTIEMAIMPL_WITH_GENERATOR
3912static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3913{
3914 static RTFLOAT80U const s_aSpecials[] =
3915 {
3916 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3917 };
3918
3919 X86FXSTATE State;
3920 RT_ZERO(State);
3921 uint32_t cMinNormals = cTests / 4;
3922 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
3923 {
3924 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
3925 PRTSTREAM pOutFn = pOut;
3926 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3927 {
3928 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3929 continue;
3930 pOutFn = pOutCpu;
3931 }
3932
3933 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
3934 uint32_t iTestOutput = 0;
3935 uint32_t cNormalInputs = 0;
3936 uint32_t cTargetRangeInputs = 0;
3937 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3938 {
3939 RTFLOAT80U InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3940 if (RTFLOAT80U_IS_NORMAL(&InVal))
3941 {
3942 if (iFn != 0)
3943 {
3944 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
3945 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3946 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3947 cTargetRangeInputs++;
3948 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3949 {
3950 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3951 cTargetRangeInputs++;
3952 }
3953 }
3954 cNormalInputs++;
3955 }
3956 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3957 {
3958 iTest -= 1;
3959 continue;
3960 }
3961
3962 uint16_t const fFcwExtra = 0; /* for rounding error indication */
3963 uint16_t const fFcw = RandFcw();
3964 State.FSW = RandFsw();
3965
3966 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3967 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3968 {
3969 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3970 | (iRounding << X86_FCW_RC_SHIFT)
3971 | (iPrecision << X86_FCW_PC_SHIFT)
3972 | X86_FCW_MASK_ALL;
3973 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3974 pfn(&State, &ResM, &InVal);
3975 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3976 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
3977 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
3978
3979 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3980 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3981 pfn(&State, &ResU, &InVal);
3982 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3983 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
3984 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
3985
3986 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3987 if (fXcpt)
3988 {
3989 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3990 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3991 pfn(&State, &Res1, &InVal);
3992 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3993 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
3994 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3995 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3996 {
3997 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3998 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3999 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4000 pfn(&State, &Res2, &InVal);
4001 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4002 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4003 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4004 }
4005 if (!RT_IS_POWER_OF_TWO(fXcpt))
4006 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4007 if (fUnmasked & fXcpt)
4008 {
4009 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4010 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4011 pfn(&State, &Res3, &InVal);
4012 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4013 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4014 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4015 }
4016 }
4017 }
4018 }
4019 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4020 }
4021}
4022#endif
4023
4024
4025static void FpuUnaryTwoR80Test(void)
4026{
4027 X86FXSTATE State;
4028 RT_ZERO(State);
4029 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4030 {
4031 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4032 continue;
4033
4034 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4035 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4036 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4037 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4038 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4039 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4040 {
4041 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4042 {
4043 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4044 RTFLOAT80U const InVal = paTests[iTest].InVal;
4045 State.FCW = paTests[iTest].fFcw;
4046 State.FSW = paTests[iTest].fFswIn;
4047 pfn(&State, &Res, &InVal);
4048 if ( Res.FSW != paTests[iTest].fFswOut
4049 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4050 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4051 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4052 "%s -> fsw=%#06x %s %s\n"
4053 "%s expected %#06x %s %s %s%s%s (%s)\n",
4054 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4055 FormatR80(&paTests[iTest].InVal),
4056 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4057 iVar ? " " : "", paTests[iTest].fFswOut,
4058 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4059 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4060 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4061 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4062 }
4063 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4064 }
4065 }
4066}
4067
4068
4069
4070int main(int argc, char **argv)
4071{
4072 int rc = RTR3InitExe(argc, &argv, 0);
4073 if (RT_FAILURE(rc))
4074 return RTMsgInitFailure(rc);
4075
4076 /*
4077 * Determin the host CPU.
4078 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
4079 */
4080#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
4081 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
4082 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
4083 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4084#else
4085 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4086#endif
4087
4088 /*
4089 * Parse arguments.
4090 */
4091 enum { kModeNotSet, kModeTest, kModeGenerate }
4092 enmMode = kModeNotSet;
4093 bool fInt = true;
4094 bool fFpuLdSt = true;
4095 bool fFpuBinary1 = true;
4096 bool fFpuBinary2 = true;
4097 bool fFpuOther = true;
4098 bool fCpuData = true;
4099 bool fCommonData = true;
4100 uint32_t const cDefaultTests = 96;
4101 uint32_t cTests = cDefaultTests;
4102 RTGETOPTDEF const s_aOptions[] =
4103 {
4104 // mode:
4105 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
4106 { "--test", 't', RTGETOPT_REQ_NOTHING },
4107 // test selection (both)
4108 { "--all", 'a', RTGETOPT_REQ_NOTHING },
4109 { "--none", 'z', RTGETOPT_REQ_NOTHING },
4110 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
4111 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
4112 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
4113 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
4114 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
4115 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
4116 { "--int", 'i', RTGETOPT_REQ_NOTHING },
4117 { "--include", 'I', RTGETOPT_REQ_STRING },
4118 { "--exclude", 'X', RTGETOPT_REQ_STRING },
4119 // generation parameters
4120 { "--common", 'm', RTGETOPT_REQ_NOTHING },
4121 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
4122 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
4123 };
4124
4125 RTGETOPTSTATE State;
4126 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
4127 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4128
4129 RTGETOPTUNION ValueUnion;
4130 while ((rc = RTGetOpt(&State, &ValueUnion)))
4131 {
4132 switch (rc)
4133 {
4134 case 'g':
4135 enmMode = kModeGenerate;
4136 break;
4137 case 't':
4138 enmMode = kModeTest;
4139 break;
4140
4141 case 'a':
4142 fCpuData = true;
4143 fCommonData = true;
4144 fInt = true;
4145 fFpuLdSt = true;
4146 fFpuBinary1 = true;
4147 fFpuBinary2 = true;
4148 fFpuOther = true;
4149 break;
4150 case 'z':
4151 fCpuData = false;
4152 fCommonData = false;
4153 fInt = false;
4154 fFpuLdSt = false;
4155 fFpuBinary1 = false;
4156 fFpuBinary2 = false;
4157 fFpuOther = false;
4158 break;
4159
4160 case 'F':
4161 fFpuLdSt = true;
4162 break;
4163 case 'O':
4164 fFpuOther = true;
4165 break;
4166 case 'B':
4167 fFpuBinary1 = true;
4168 break;
4169 case 'P':
4170 fFpuBinary2 = true;
4171 break;
4172 case 'i':
4173 fInt = true;
4174 break;
4175
4176 case 'I':
4177 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
4178 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
4179 RT_ELEMENTS(g_apszIncludeTestPatterns));
4180 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
4181 break;
4182 case 'X':
4183 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
4184 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
4185 RT_ELEMENTS(g_apszExcludeTestPatterns));
4186 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
4187 break;
4188
4189 case 'm':
4190 fCommonData = true;
4191 break;
4192 case 'c':
4193 fCpuData = true;
4194 break;
4195 case 'n':
4196 cTests = ValueUnion.u32;
4197 break;
4198
4199 case 'h':
4200 RTPrintf("usage: %s <-g|-t> [options]\n"
4201 "\n"
4202 "Mode:\n"
4203 " -g, --generate\n"
4204 " Generate test data.\n"
4205 " -t, --test\n"
4206 " Execute tests.\n"
4207 "\n"
4208 "Test selection (both modes):\n"
4209 " -a, --all\n"
4210 " Enable all tests and generated test data. (default)\n"
4211 " -z, --zap, --none\n"
4212 " Disable all tests and test data types.\n"
4213 " -i, --int\n"
4214 " Enable non-FPU tests.\n"
4215 " -F, --fpu-ld-st\n"
4216 " Enable FPU load and store tests.\n"
4217 " -B, --fpu-binary-1\n"
4218 " Enable FPU binary 80-bit FP tests.\n"
4219 " -P, --fpu-binary-2\n"
4220 " Enable FPU binary 64- and 32-bit FP tests.\n"
4221 " -O, --fpu-other\n"
4222 " Enable other FPU tests.\n"
4223 " -I,--include=<test-patter>\n"
4224 " Enable tests matching the given pattern.\n"
4225 " -X,--exclude=<test-patter>\n"
4226 " Skip tests matching the given pattern (overrides --include).\n"
4227 "\n"
4228 "Generation:\n"
4229 " -m, --common\n"
4230 " Enable generating common test data.\n"
4231 " -c, --only-cpu\n"
4232 " Enable generating CPU specific test data.\n"
4233 " -n, --number-of-test <count>\n"
4234 " Number of tests to generate. Default: %u\n"
4235 , argv[0], cDefaultTests);
4236 return RTEXITCODE_SUCCESS;
4237 default:
4238 return RTGetOptPrintError(rc, &ValueUnion);
4239 }
4240 }
4241
4242 /*
4243 * Generate data?
4244 */
4245 if (enmMode == kModeGenerate)
4246 {
4247#ifdef TSTIEMAIMPL_WITH_GENERATOR
4248 char szCpuDesc[256] = {0};
4249 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
4250 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
4251# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
4252 const char * const pszBitBucket = "NUL";
4253# else
4254 const char * const pszBitBucket = "/dev/null";
4255# endif
4256
4257 if (cTests == 0)
4258 cTests = cDefaultTests;
4259 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
4260 g_cZeroSrcTests = g_cZeroDstTests * 2;
4261
4262 if (fInt)
4263 {
4264 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
4265 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4266 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4267 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
4268 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4269 if (!pStrmData || !pStrmDataCpu)
4270 return RTEXITCODE_FAILURE;
4271
4272 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
4273 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
4274 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
4275 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
4276 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
4277 UnaryGenerate(pStrmData, cTests);
4278 ShiftGenerate(pStrmDataCpu, cTests);
4279 MulDivGenerate(pStrmDataCpu, cTests);
4280
4281 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4282 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4283 if (rcExit != RTEXITCODE_SUCCESS)
4284 return rcExit;
4285 }
4286
4287 if (fFpuLdSt)
4288 {
4289 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4290 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4291 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4292 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4293 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4294 if (!pStrmData || !pStrmDataCpu)
4295 return RTEXITCODE_FAILURE;
4296
4297 FpuLdConstGenerate(pStrmData, cTests);
4298 FpuLdIntGenerate(pStrmData, cTests);
4299 FpuLdD80Generate(pStrmData, cTests);
4300 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4301 FpuStD80Generate(pStrmData, cTests);
4302 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4303 FpuLdMemGenerate(pStrmData, cTests2);
4304 FpuStMemGenerate(pStrmData, cTests2);
4305
4306 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4307 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4308 if (rcExit != RTEXITCODE_SUCCESS)
4309 return rcExit;
4310 }
4311
4312 if (fFpuBinary1)
4313 {
4314 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4315 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4316 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4317 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4318 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4319 if (!pStrmData || !pStrmDataCpu)
4320 return RTEXITCODE_FAILURE;
4321
4322 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4323 FpuBinaryFswR80Generate(pStrmData, cTests);
4324 FpuBinaryEflR80Generate(pStrmData, cTests);
4325
4326 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4327 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4328 if (rcExit != RTEXITCODE_SUCCESS)
4329 return rcExit;
4330 }
4331
4332 if (fFpuBinary2)
4333 {
4334 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4335 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4336 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4337 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4338 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4339 if (!pStrmData || !pStrmDataCpu)
4340 return RTEXITCODE_FAILURE;
4341
4342 FpuBinaryR64Generate(pStrmData, cTests);
4343 FpuBinaryR32Generate(pStrmData, cTests);
4344 FpuBinaryI32Generate(pStrmData, cTests);
4345 FpuBinaryI16Generate(pStrmData, cTests);
4346 FpuBinaryFswR64Generate(pStrmData, cTests);
4347 FpuBinaryFswR32Generate(pStrmData, cTests);
4348 FpuBinaryFswI32Generate(pStrmData, cTests);
4349 FpuBinaryFswI16Generate(pStrmData, cTests);
4350
4351 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4352 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4353 if (rcExit != RTEXITCODE_SUCCESS)
4354 return rcExit;
4355 }
4356
4357 if (fFpuOther)
4358 {
4359 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4360 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4361 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4362 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4363 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4364 if (!pStrmData || !pStrmDataCpu)
4365 return RTEXITCODE_FAILURE;
4366
4367 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4368 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4369 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4370
4371 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4372 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4373 if (rcExit != RTEXITCODE_SUCCESS)
4374 return rcExit;
4375 }
4376
4377 return RTEXITCODE_SUCCESS;
4378#else
4379 return RTMsgErrorExitFailure("Test data generator not compiled in!");
4380#endif
4381 }
4382
4383 /*
4384 * Do testing. Currrently disabled by default as data needs to be checked
4385 * on both intel and AMD systems first.
4386 */
4387 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4388 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4389 if (enmMode == kModeTest)
4390 {
4391 RTTestBanner(g_hTest);
4392
4393 /* Allocate guarded memory for use in the tests. */
4394#define ALLOC_GUARDED_VAR(a_puVar) do { \
4395 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
4396 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
4397 } while (0)
4398 ALLOC_GUARDED_VAR(g_pu8);
4399 ALLOC_GUARDED_VAR(g_pu16);
4400 ALLOC_GUARDED_VAR(g_pu32);
4401 ALLOC_GUARDED_VAR(g_pu64);
4402 ALLOC_GUARDED_VAR(g_pu128);
4403 ALLOC_GUARDED_VAR(g_pu8Two);
4404 ALLOC_GUARDED_VAR(g_pu16Two);
4405 ALLOC_GUARDED_VAR(g_pu32Two);
4406 ALLOC_GUARDED_VAR(g_pu64Two);
4407 ALLOC_GUARDED_VAR(g_pu128Two);
4408 ALLOC_GUARDED_VAR(g_pfEfl);
4409 if (RTTestErrorCount(g_hTest) == 0)
4410 {
4411 if (fInt)
4412 {
4413 BinU8Test();
4414 BinU16Test();
4415 BinU32Test();
4416 BinU64Test();
4417 XchgTest();
4418 XaddTest();
4419 CmpXchgTest();
4420 CmpXchg8bTest();
4421 CmpXchg16bTest();
4422 ShiftDblTest();
4423 UnaryTest();
4424 ShiftTest();
4425 MulDivTest();
4426 BswapTest();
4427 }
4428
4429 if (fFpuLdSt)
4430 {
4431 FpuLoadConstTest();
4432 FpuLdMemTest();
4433 FpuLdIntTest();
4434 FpuLdD80Test();
4435 FpuStMemTest();
4436 FpuStIntTest();
4437 FpuStD80Test();
4438 }
4439
4440 if (fFpuBinary1)
4441 {
4442 FpuBinaryR80Test();
4443 FpuBinaryFswR80Test();
4444 FpuBinaryEflR80Test();
4445 }
4446
4447 if (fFpuBinary2)
4448 {
4449 FpuBinaryR64Test();
4450 FpuBinaryR32Test();
4451 FpuBinaryI32Test();
4452 FpuBinaryI16Test();
4453 FpuBinaryFswR64Test();
4454 FpuBinaryFswR32Test();
4455 FpuBinaryFswI32Test();
4456 FpuBinaryFswI16Test();
4457 }
4458
4459 if (fFpuOther)
4460 {
4461 FpuUnaryR80Test();
4462 FpuUnaryFswR80Test();
4463 FpuUnaryTwoR80Test();
4464 }
4465 }
4466 return RTTestSummaryAndDestroy(g_hTest);
4467 }
4468 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
4469}
4470
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette