VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatYasm.cpp@ 53131

Last change on this file since 53131 was 53131, checked in by vboxsync, 11 years ago

DIS: 6251: Added AVX/VEX 3-byte instructions support.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 64.9 KB
Line 
1/* $Id: DisasmFormatYasm.cpp 53131 2014-10-23 17:13:53Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include <VBox/dis.h>
23#include "DisasmInternal.h"
24#include <iprt/string.h>
25#include <iprt/assert.h>
26#include <iprt/ctype.h>
27
28
29/*******************************************************************************
30* Global Variables *
31*******************************************************************************/
32static const char g_szSpaces[] =
33" ";
34static const char g_aszYasmRegGen8[20][5] =
35{
36 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "ah\0\0", "ch\0\0", "dh\0\0", "bh\0\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", "spl\0", "bpl\0", "sil\0", "dil\0"
37};
38static const char g_aszYasmRegGen16[16][5] =
39{
40 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
41};
42static const char g_aszYasmRegGen1616[8][6] =
43{
44 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
45};
46static const char g_aszYasmRegGen32[16][5] =
47{
48 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
49};
50static const char g_aszYasmRegGen64[16][4] =
51{
52 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
53};
54static const char g_aszYasmRegSeg[6][3] =
55{
56 "es", "cs", "ss", "ds", "fs", "gs"
57};
58static const char g_aszYasmRegFP[8][4] =
59{
60 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
61};
62static const char g_aszYasmRegMMX[8][4] =
63{
64 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
65};
66static const char g_aszYasmRegXMM[16][6] =
67{
68 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
69};
70static const char g_aszYasmRegYMM[16][6] =
71{
72 "ymm0\0", "ymm1\0", "ymm2\0", "ymm3\0", "ymm4\0", "ymm5\0", "ymm6\0", "ymm7\0", "ymm8\0", "ymm9\0", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
73};
74static const char g_aszYasmRegCRx[16][5] =
75{
76 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
77};
78static const char g_aszYasmRegDRx[16][5] =
79{
80 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
81};
82static const char g_aszYasmRegTRx[16][5] =
83{
84 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
85};
86
87
88
89/**
90 * Gets the base register name for the given parameter.
91 *
92 * @returns Pointer to the register name.
93 * @param pDis The disassembler state.
94 * @param pParam The parameter.
95 * @param pcchReg Where to store the length of the name.
96 */
97static const char *disasmFormatYasmBaseReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
98{
99 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
100 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_YMM
101 | DISUSE_REG_CR | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
102
103 {
104 case DISUSE_REG_GEN8:
105 {
106 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen8));
107 const char *psz = g_aszYasmRegGen8[pParam->Base.idxGenReg];
108 *pcchReg = 2 + !!psz[2] + !!psz[3];
109 return psz;
110 }
111
112 case DISUSE_REG_GEN16:
113 {
114 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
115 const char *psz = g_aszYasmRegGen16[pParam->Base.idxGenReg];
116 *pcchReg = 2 + !!psz[2] + !!psz[3];
117 return psz;
118 }
119
120 case DISUSE_REG_GEN32:
121 {
122 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
123 const char *psz = g_aszYasmRegGen32[pParam->Base.idxGenReg];
124 *pcchReg = 2 + !!psz[2] + !!psz[3];
125 return psz;
126 }
127
128 case DISUSE_REG_GEN64:
129 {
130 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
131 const char *psz = g_aszYasmRegGen64[pParam->Base.idxGenReg];
132 *pcchReg = 2 + !!psz[2] + !!psz[3];
133 return psz;
134 }
135
136 case DISUSE_REG_FP:
137 {
138 Assert(pParam->Base.idxFpuReg < RT_ELEMENTS(g_aszYasmRegFP));
139 const char *psz = g_aszYasmRegFP[pParam->Base.idxFpuReg];
140 *pcchReg = 3;
141 return psz;
142 }
143
144 case DISUSE_REG_MMX:
145 {
146 Assert(pParam->Base.idxMmxReg < RT_ELEMENTS(g_aszYasmRegMMX));
147 const char *psz = g_aszYasmRegMMX[pParam->Base.idxMmxReg];
148 *pcchReg = 3;
149 return psz;
150 }
151
152 case DISUSE_REG_XMM:
153 {
154 Assert(pParam->Base.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
155 const char *psz = g_aszYasmRegXMM[pParam->Base.idxXmmReg];
156 *pcchReg = 4 + !!psz[4];
157 return psz;
158 }
159
160 case DISUSE_REG_YMM:
161 {
162 Assert(pParam->Base.idxYmmReg < RT_ELEMENTS(g_aszYasmRegYMM));
163 const char *psz = g_aszYasmRegYMM[pParam->Base.idxYmmReg];
164 *pcchReg = 4 + !!psz[4];
165 return psz;
166 }
167
168 case DISUSE_REG_CR:
169 {
170 Assert(pParam->Base.idxCtrlReg < RT_ELEMENTS(g_aszYasmRegCRx));
171 const char *psz = g_aszYasmRegCRx[pParam->Base.idxCtrlReg];
172 *pcchReg = 3;
173 return psz;
174 }
175
176 case DISUSE_REG_DBG:
177 {
178 Assert(pParam->Base.idxDbgReg < RT_ELEMENTS(g_aszYasmRegDRx));
179 const char *psz = g_aszYasmRegDRx[pParam->Base.idxDbgReg];
180 *pcchReg = 3;
181 return psz;
182 }
183
184 case DISUSE_REG_SEG:
185 {
186 Assert(pParam->Base.idxSegReg < RT_ELEMENTS(g_aszYasmRegCRx));
187 const char *psz = g_aszYasmRegSeg[pParam->Base.idxSegReg];
188 *pcchReg = 2;
189 return psz;
190 }
191
192 case DISUSE_REG_TEST:
193 {
194 Assert(pParam->Base.idxTestReg < RT_ELEMENTS(g_aszYasmRegTRx));
195 const char *psz = g_aszYasmRegTRx[pParam->Base.idxTestReg];
196 *pcchReg = 3;
197 return psz;
198 }
199
200 default:
201 AssertMsgFailed(("%#x\n", pParam->fUse));
202 *pcchReg = 3;
203 return "r??";
204 }
205}
206
207
208/**
209 * Gets the index register name for the given parameter.
210 *
211 * @returns The index register name.
212 * @param pDis The disassembler state.
213 * @param pParam The parameter.
214 * @param pcchReg Where to store the length of the name.
215 */
216static const char *disasmFormatYasmIndexReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
217{
218 switch (pDis->uAddrMode)
219 {
220 case DISCPUMODE_16BIT:
221 {
222 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
223 const char *psz = g_aszYasmRegGen16[pParam->Index.idxGenReg];
224 *pcchReg = 2 + !!psz[2] + !!psz[3];
225 return psz;
226 }
227
228 case DISCPUMODE_32BIT:
229 {
230 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
231 const char *psz = g_aszYasmRegGen32[pParam->Index.idxGenReg];
232 *pcchReg = 2 + !!psz[2] + !!psz[3];
233 return psz;
234 }
235
236 case DISCPUMODE_64BIT:
237 {
238 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
239 const char *psz = g_aszYasmRegGen64[pParam->Index.idxGenReg];
240 *pcchReg = 2 + !!psz[2] + !!psz[3];
241 return psz;
242 }
243
244 default:
245 AssertMsgFailed(("%#x %#x\n", pParam->fUse, pDis->uAddrMode));
246 *pcchReg = 3;
247 return "r??";
248 }
249}
250
251
252/**
253 * Formats the current instruction in Yasm (/ Nasm) style.
254 *
255 *
256 * @returns The number of output characters. If this is >= cchBuf, then the content
257 * of pszBuf will be truncated.
258 * @param pDis Pointer to the disassembler state.
259 * @param pszBuf The output buffer.
260 * @param cchBuf The size of the output buffer.
261 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
262 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
263 * @param pvUser User argument for pfnGetSymbol.
264 */
265DISDECL(size_t) DISFormatYasmEx(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
266 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
267{
268/** @todo monitor and mwait aren't formatted correctly in 64-bit mode. */
269 /*
270 * Input validation and massaging.
271 */
272 AssertPtr(pDis);
273 AssertPtrNull(pszBuf);
274 Assert(pszBuf || !cchBuf);
275 AssertPtrNull(pfnGetSymbol);
276 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
277 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
278 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
279 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
280 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
281
282 PCDISOPCODE const pOp = pDis->pCurInstr;
283
284 /*
285 * Output macros
286 */
287 char *pszDst = pszBuf;
288 size_t cchDst = cchBuf;
289 size_t cchOutput = 0;
290#define PUT_C(ch) \
291 do { \
292 cchOutput++; \
293 if (cchDst > 1) \
294 { \
295 cchDst--; \
296 *pszDst++ = (ch); \
297 } \
298 } while (0)
299#define PUT_STR(pszSrc, cchSrc) \
300 do { \
301 cchOutput += (cchSrc); \
302 if (cchDst > (cchSrc)) \
303 { \
304 memcpy(pszDst, (pszSrc), (cchSrc)); \
305 pszDst += (cchSrc); \
306 cchDst -= (cchSrc); \
307 } \
308 else if (cchDst > 1) \
309 { \
310 memcpy(pszDst, (pszSrc), cchDst - 1); \
311 pszDst += cchDst - 1; \
312 cchDst = 1; \
313 } \
314 } while (0)
315#define PUT_SZ(sz) \
316 PUT_STR((sz), sizeof(sz) - 1)
317#define PUT_SZ_STRICT(szStrict, szRelaxed) \
318 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
319#define PUT_PSZ(psz) \
320 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
321#define PUT_NUM(cch, fmt, num) \
322 do { \
323 cchOutput += (cch); \
324 if (cchDst > 1) \
325 { \
326 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
327 pszDst += cchTmp; \
328 cchDst -= cchTmp; \
329 Assert(cchTmp == (cch) || cchDst == 1); \
330 } \
331 } while (0)
332/** @todo add two flags for choosing between %X / %x and h / 0x. */
333#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
334#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
335#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
336#define PUT_NUM_64(num) PUT_NUM(18, "0%016RX64h", (uint64_t)(num))
337
338#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
339 do { \
340 if ((stype)(num) >= 0) \
341 { \
342 PUT_C('+'); \
343 PUT_NUM(cch, fmt, (utype)(num)); \
344 } \
345 else \
346 { \
347 PUT_C('-'); \
348 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
349 } \
350 } while (0)
351#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0%02xh", num, int8_t, uint8_t)
352#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0%04xh", num, int16_t, uint16_t)
353#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0%08xh", num, int32_t, uint32_t)
354#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0%016RX64h", num, int64_t, uint64_t)
355
356#define PUT_SYMBOL_TWO(a_rcSym, a_szStart, a_chEnd) \
357 do { \
358 if (RT_SUCCESS(a_rcSym)) \
359 { \
360 PUT_SZ(a_szStart); \
361 PUT_PSZ(szSymbol); \
362 if (off != 0) \
363 { \
364 if ((int8_t)off == off) \
365 PUT_NUM_S8(off); \
366 else if ((int16_t)off == off) \
367 PUT_NUM_S16(off); \
368 else if ((int32_t)off == off) \
369 PUT_NUM_S32(off); \
370 else \
371 PUT_NUM_S64(off); \
372 } \
373 PUT_C(a_chEnd); \
374 } \
375 } while (0)
376
377#define PUT_SYMBOL(a_uSeg, a_uAddr, a_szStart, a_chEnd) \
378 do { \
379 if (pfnGetSymbol) \
380 { \
381 int rcSym = pfnGetSymbol(pDis, a_uSeg, a_uAddr, szSymbol, sizeof(szSymbol), &off, pvUser); \
382 PUT_SYMBOL_TWO(rcSym, a_szStart, a_chEnd); \
383 } \
384 } while (0)
385
386
387 /*
388 * The address?
389 */
390 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
391 {
392#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
393 if (pDis->uInstrAddr >= _4G)
394 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
395#endif
396 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
397 PUT_C(' ');
398 }
399
400 /*
401 * The opcode bytes?
402 */
403 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
404 {
405 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
406 cchOutput += cchTmp;
407 if (cchDst > 1)
408 {
409 if (cchTmp <= cchDst)
410 {
411 cchDst -= cchTmp;
412 pszDst += cchTmp;
413 }
414 else
415 {
416 pszDst += cchDst - 1;
417 cchDst = 1;
418 }
419 }
420
421 /* Some padding to align the instruction. */
422 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
423 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
424 + 2;
425 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
426 PUT_STR(g_szSpaces, cchPadding);
427 }
428
429
430 /*
431 * Filter out invalid opcodes first as they need special
432 * treatment. UD2 is an exception and should be handled normally.
433 */
434 size_t const offInstruction = cchOutput;
435 if ( pOp->uOpcode == OP_INVALID
436 || ( pOp->uOpcode == OP_ILLUD2
437 && (pDis->fPrefix & DISPREFIX_LOCK)))
438 PUT_SZ("Illegal opcode");
439 else
440 {
441 /*
442 * Prefixes
443 */
444 if (pDis->fPrefix & DISPREFIX_LOCK)
445 PUT_SZ("lock ");
446 if(pDis->fPrefix & DISPREFIX_REP)
447 PUT_SZ("rep ");
448 else if(pDis->fPrefix & DISPREFIX_REPNE)
449 PUT_SZ("repne ");
450
451 /*
452 * Adjust the format string to the correct mnemonic
453 * or to avoid things the assembler cannot handle correctly.
454 */
455 char szTmpFmt[48];
456 const char *pszFmt = pOp->pszOpcode;
457 switch (pOp->uOpcode)
458 {
459 case OP_JECXZ:
460 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "jcxz %Jb" : pDis->uOpMode == DISCPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
461 break;
462 case OP_PUSHF:
463 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "pushfw" : pDis->uOpMode == DISCPUMODE_32BIT ? "pushfd" : "pushfq";
464 break;
465 case OP_POPF:
466 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "popfw" : pDis->uOpMode == DISCPUMODE_32BIT ? "popfd" : "popfq";
467 break;
468 case OP_PUSHA:
469 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "pushaw" : "pushad";
470 break;
471 case OP_POPA:
472 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "popaw" : "popad";
473 break;
474 case OP_INSB:
475 pszFmt = "insb";
476 break;
477 case OP_INSWD:
478 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "insw" : pDis->uOpMode == DISCPUMODE_32BIT ? "insd" : "insq";
479 break;
480 case OP_OUTSB:
481 pszFmt = "outsb";
482 break;
483 case OP_OUTSWD:
484 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "outsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "outsd" : "outsq";
485 break;
486 case OP_MOVSB:
487 pszFmt = "movsb";
488 break;
489 case OP_MOVSWD:
490 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "movsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "movsd" : "movsq";
491 break;
492 case OP_CMPSB:
493 pszFmt = "cmpsb";
494 break;
495 case OP_CMPWD:
496 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cmpsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "cmpsd" : "cmpsq";
497 break;
498 case OP_SCASB:
499 pszFmt = "scasb";
500 break;
501 case OP_SCASWD:
502 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "scasw" : pDis->uOpMode == DISCPUMODE_32BIT ? "scasd" : "scasq";
503 break;
504 case OP_LODSB:
505 pszFmt = "lodsb";
506 break;
507 case OP_LODSWD:
508 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "lodsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "lodsd" : "lodsq";
509 break;
510 case OP_STOSB:
511 pszFmt = "stosb";
512 break;
513 case OP_STOSWD:
514 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "stosw" : pDis->uOpMode == DISCPUMODE_32BIT ? "stosd" : "stosq";
515 break;
516 case OP_CBW:
517 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cbw" : pDis->uOpMode == DISCPUMODE_32BIT ? "cwde" : "cdqe";
518 break;
519 case OP_CWD:
520 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cwd" : pDis->uOpMode == DISCPUMODE_32BIT ? "cdq" : "cqo";
521 break;
522 case OP_SHL:
523 Assert(pszFmt[3] == '/');
524 pszFmt += 4;
525 break;
526 case OP_XLAT:
527 pszFmt = "xlatb";
528 break;
529 case OP_INT3:
530 pszFmt = "int3";
531 break;
532
533 /*
534 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
535 */
536 case OP_NOP:
537 if (pDis->bOpCode == 0x90)
538 /* fine, fine */;
539 else if (pszFmt[sizeof("nop %Ev") - 1] == '/' && pszFmt[sizeof("nop %Ev")] == 'p')
540 pszFmt = "prefetch %Eb";
541 else if (pDis->bOpCode == 0x1f)
542 {
543 Assert(pDis->cbInstr >= 3);
544 PUT_SZ("db 00fh, 01fh,");
545 PUT_NUM_8(MAKE_MODRM(pDis->ModRM.Bits.Mod, pDis->ModRM.Bits.Reg, pDis->ModRM.Bits.Rm));
546 for (unsigned i = 3; i < pDis->cbInstr; i++)
547 {
548 PUT_C(',');
549 PUT_NUM_8(0x90); ///@todo fixme.
550 }
551 pszFmt = "";
552 }
553 break;
554
555 default:
556 /* ST(X) -> stX (floating point) */
557 if (*pszFmt == 'f' && strchr(pszFmt, '('))
558 {
559 char *pszFmtDst = szTmpFmt;
560 char ch;
561 do
562 {
563 ch = *pszFmt++;
564 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
565 {
566 *pszFmtDst++ = 's';
567 *pszFmtDst++ = 't';
568 pszFmt += 2;
569 ch = *pszFmt;
570 Assert(pszFmt[1] == ')');
571 pszFmt += 2;
572 *pszFmtDst++ = ch;
573 }
574 else
575 *pszFmtDst++ = ch;
576 } while (ch != '\0');
577 pszFmt = szTmpFmt;
578 }
579 break;
580
581 /*
582 * Horrible hacks.
583 */
584 case OP_FLD:
585 if (pDis->bOpCode == 0xdb) /* m80fp workaround. */
586 *(int *)&pDis->Param1.fParam &= ~0x1f; /* make it pure OP_PARM_M */
587 break;
588 case OP_LAR: /* hack w -> v, probably not correct. */
589 *(int *)&pDis->Param2.fParam &= ~0x1f;
590 *(int *)&pDis->Param2.fParam |= OP_PARM_v;
591 break;
592 }
593
594 /*
595 * Formatting context and associated macros.
596 */
597 PCDISOPPARAM pParam = &pDis->Param1;
598 int iParam = 1;
599
600#define PUT_FAR() \
601 do { \
602 if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_p \
603 && pOp->uOpcode != OP_LDS /* table bugs? */ \
604 && pOp->uOpcode != OP_LES \
605 && pOp->uOpcode != OP_LFS \
606 && pOp->uOpcode != OP_LGS \
607 && pOp->uOpcode != OP_LSS ) \
608 PUT_SZ("far "); \
609 } while (0)
610 /** @todo mov ah,ch ends up with a byte 'override'... - check if this wasn't fixed. */
611 /** @todo drop the work/dword/qword override when the src/dst is a register (except for movsx/movzx). */
612#define PUT_SIZE_OVERRIDE() \
613 do { \
614 switch (OP_PARM_VSUBTYPE(pParam->fParam)) \
615 { \
616 case OP_PARM_v: \
617 case OP_PARM_y: \
618 switch (pDis->uOpMode) \
619 { \
620 case DISCPUMODE_16BIT: PUT_SZ("word "); break; \
621 case DISCPUMODE_32BIT: PUT_SZ("dword "); break; \
622 case DISCPUMODE_64BIT: PUT_SZ("qword "); break; \
623 default: break; \
624 } \
625 break; \
626 case OP_PARM_b: PUT_SZ("byte "); break; \
627 case OP_PARM_w: \
628 if (OP_PARM_VTYPE(pParam->fParam) == OP_PARM_W || \
629 OP_PARM_VTYPE(pParam->fParam) == OP_PARM_M) \
630 { \
631 if (VEXREG_IS256B(pDis->bVexDestReg)) PUT_SZ("dword "); \
632 else PUT_SZ("word "); \
633 } \
634 break; \
635 case OP_PARM_d: \
636 if (OP_PARM_VTYPE(pParam->fParam) == OP_PARM_W || \
637 OP_PARM_VTYPE(pParam->fParam) == OP_PARM_M) \
638 { \
639 if (VEXREG_IS256B(pDis->bVexDestReg)) PUT_SZ("qword "); \
640 else PUT_SZ("dword "); \
641 } \
642 break; \
643 case OP_PARM_q: \
644 if (OP_PARM_VTYPE(pParam->fParam) == OP_PARM_W || \
645 OP_PARM_VTYPE(pParam->fParam) == OP_PARM_M) \
646 { \
647 if (VEXREG_IS256B(pDis->bVexDestReg)) PUT_SZ("oword "); \
648 else PUT_SZ("qword "); \
649 } \
650 break; \
651 case OP_PARM_ps: \
652 case OP_PARM_pd: \
653 case OP_PARM_x: if (VEXREG_IS256B(pDis->bVexDestReg)) { PUT_SZ("yword "); break; } \
654 case OP_PARM_ss: \
655 case OP_PARM_sd: \
656 case OP_PARM_dq: PUT_SZ("oword "); break; \
657 case OP_PARM_qq: PUT_SZ("yword "); break; \
658 case OP_PARM_p: break; /* see PUT_FAR */ \
659 case OP_PARM_s: if (pParam->fUse & DISUSE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
660 case OP_PARM_z: break; \
661 case OP_PARM_NONE: \
662 if ( OP_PARM_VTYPE(pParam->fParam) == OP_PARM_M \
663 && ((pParam->fUse & DISUSE_REG_FP) || pOp->uOpcode == OP_FLD)) \
664 PUT_SZ("tword "); \
665 break; \
666 default: break; /*no pointer type specified/necessary*/ \
667 } \
668 } while (0)
669 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
670#define PUT_SEGMENT_OVERRIDE() \
671 do { \
672 if (pDis->fPrefix & DISPREFIX_SEG) \
673 PUT_STR(s_szSegPrefix[pDis->idxSegPrefix], 3); \
674 } while (0)
675
676
677 /*
678 * Segment prefixing for instructions that doesn't do memory access.
679 */
680 if ( (pDis->fPrefix & DISPREFIX_SEG)
681 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
682 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
683 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
684 {
685 PUT_STR(s_szSegPrefix[pDis->idxSegPrefix], 2);
686 PUT_C(' ');
687 }
688
689
690 /*
691 * The formatting loop.
692 */
693 RTINTPTR off;
694 char szSymbol[128];
695 char ch;
696 while ((ch = *pszFmt++) != '\0')
697 {
698 if (ch == '%')
699 {
700 ch = *pszFmt++;
701 switch (ch)
702 {
703 /*
704 * ModRM - Register only.
705 */
706 case 'C': /* Control register (ParseModRM / UseModRM). */
707 case 'D': /* Debug register (ParseModRM / UseModRM). */
708 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
709 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
710 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
711 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
712 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
713 case 'H': /* The VEX.vvvv field of the VEX prefix selects a XMM/YMM register. */
714 case 'L': /* The upper 4 bits of the 8-bit immediate selects a XMM/YMM register. */
715 {
716 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
717 Assert(!(pParam->fUse & (DISUSE_INDEX | DISUSE_SCALE) /* No SIB here... */));
718 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
719
720 size_t cchReg;
721 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
722 PUT_STR(pszReg, cchReg);
723 break;
724 }
725
726 /*
727 * ModRM - Register or memory.
728 */
729 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
730 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
731 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
732 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
733 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
734 {
735 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
736
737 PUT_FAR();
738 uint32_t const fUse = pParam->fUse;
739 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
740 {
741 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
742 while the register variants deals with 16, 32 & 64 in the normal fashion. */
743 if ( pParam->fParam != OP_PARM_Ev
744 || pOp->uOpcode != OP_MOV
745 || ( pOp->fParam1 != OP_PARM_Sw
746 && pOp->fParam2 != OP_PARM_Sw))
747 PUT_SIZE_OVERRIDE();
748 PUT_C('[');
749 }
750 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
751 && (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)))
752 {
753 if ( (fUse & DISUSE_DISPLACEMENT8)
754 && !pParam->uDisp.i8)
755 PUT_SZ("byte ");
756 else if ( (fUse & DISUSE_DISPLACEMENT16)
757 && (int8_t)pParam->uDisp.i16 == (int16_t)pParam->uDisp.i16)
758 PUT_SZ("word ");
759 else if ( (fUse & DISUSE_DISPLACEMENT32)
760 && (int16_t)pParam->uDisp.i32 == (int32_t)pParam->uDisp.i32) //??
761 PUT_SZ("dword ");
762 else if ( (fUse & DISUSE_DISPLACEMENT64)
763 && (pDis->SIB.Bits.Base != 5 || pDis->ModRM.Bits.Mod != 0)
764 && (int32_t)pParam->uDisp.i64 == (int64_t)pParam->uDisp.i64) //??
765 PUT_SZ("qword ");
766 }
767 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
768 PUT_SEGMENT_OVERRIDE();
769
770 bool fBase = (fUse & DISUSE_BASE) /* When exactly is DISUSE_BASE supposed to be set? disasmModRMReg doesn't set it. */
771 || ( (fUse & ( DISUSE_REG_GEN8
772 | DISUSE_REG_GEN16
773 | DISUSE_REG_GEN32
774 | DISUSE_REG_GEN64
775 | DISUSE_REG_FP
776 | DISUSE_REG_MMX
777 | DISUSE_REG_XMM
778 | DISUSE_REG_YMM
779 | DISUSE_REG_CR
780 | DISUSE_REG_DBG
781 | DISUSE_REG_SEG
782 | DISUSE_REG_TEST ))
783 && !DISUSE_IS_EFFECTIVE_ADDR(fUse));
784 if (fBase)
785 {
786 size_t cchReg;
787 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
788 PUT_STR(pszReg, cchReg);
789 }
790
791 if (fUse & DISUSE_INDEX)
792 {
793 if (fBase)
794 PUT_C('+');
795
796 size_t cchReg;
797 const char *pszReg = disasmFormatYasmIndexReg(pDis, pParam, &cchReg);
798 PUT_STR(pszReg, cchReg);
799
800 if (fUse & DISUSE_SCALE)
801 {
802 PUT_C('*');
803 PUT_C('0' + pParam->uScale);
804 }
805 }
806 else
807 Assert(!(fUse & DISUSE_SCALE));
808
809 int64_t off2 = 0;
810 if (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32))
811 {
812 if (fUse & DISUSE_DISPLACEMENT8)
813 off2 = pParam->uDisp.i8;
814 else if (fUse & DISUSE_DISPLACEMENT16)
815 off2 = pParam->uDisp.i16;
816 else if (fUse & (DISUSE_DISPLACEMENT32 | DISUSE_RIPDISPLACEMENT32))
817 off2 = pParam->uDisp.i32;
818 else if (fUse & DISUSE_DISPLACEMENT64)
819 off2 = pParam->uDisp.i64;
820 else
821 {
822 AssertFailed();
823 off2 = 0;
824 }
825
826 if (fBase || (fUse & DISUSE_INDEX))
827 {
828 PUT_C(off2 >= 0 ? '+' : '-');
829 if (off2 < 0)
830 off2 = -off2;
831 }
832 if (fUse & DISUSE_DISPLACEMENT8)
833 PUT_NUM_8( off2);
834 else if (fUse & DISUSE_DISPLACEMENT16)
835 PUT_NUM_16(off2);
836 else if (fUse & DISUSE_DISPLACEMENT32)
837 PUT_NUM_32(off2);
838 else if (fUse & DISUSE_DISPLACEMENT64)
839 PUT_NUM_64(off2);
840 else
841 {
842 PUT_NUM_32(off2);
843 PUT_SZ(" wrt rip"); //??
844 }
845 }
846
847 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
848 {
849 if (pfnGetSymbol && !fBase && !(fUse & DISUSE_INDEX) && off2 != 0)
850 PUT_SYMBOL((pDis->fPrefix & DISPREFIX_SEG)
851 ? DIS_FMT_SEL_FROM_REG(pDis->idxSegPrefix)
852 : DIS_FMT_SEL_FROM_REG(DISSELREG_DS),
853 pDis->uAddrMode == DISCPUMODE_64BIT
854 ? (uint64_t)off2
855 : pDis->uAddrMode == DISCPUMODE_32BIT
856 ? (uint32_t)off2
857 : (uint16_t)off2,
858 " (=", ')');
859 PUT_C(']');
860 }
861 break;
862 }
863
864 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
865 AssertFailed();
866 break;
867
868 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
869 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
870 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
871 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
872 {
873 case DISUSE_IMMEDIATE8:
874 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
875 && ( (pOp->fParam1 >= OP_PARM_REG_GEN8_START && pOp->fParam1 <= OP_PARM_REG_GEN8_END)
876 || (pOp->fParam2 >= OP_PARM_REG_GEN8_START && pOp->fParam2 <= OP_PARM_REG_GEN8_END))
877 )
878 PUT_SZ("strict byte ");
879 PUT_NUM_8(pParam->uValue);
880 break;
881
882 case DISUSE_IMMEDIATE16:
883 if ( pDis->uCpuMode != pDis->uOpMode
884 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
885 && ( (int8_t)pParam->uValue == (int16_t)pParam->uValue
886 || (pOp->fParam1 >= OP_PARM_REG_GEN16_START && pOp->fParam1 <= OP_PARM_REG_GEN16_END)
887 || (pOp->fParam2 >= OP_PARM_REG_GEN16_START && pOp->fParam2 <= OP_PARM_REG_GEN16_END))
888 )
889 )
890 {
891 if (OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_b)
892 PUT_SZ_STRICT("strict byte ", "byte ");
893 else if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_v
894 || OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_z)
895 PUT_SZ_STRICT("strict word ", "word ");
896 }
897 PUT_NUM_16(pParam->uValue);
898 break;
899
900 case DISUSE_IMMEDIATE16_SX8:
901 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
902 || pDis->pCurInstr->uOpcode != OP_PUSH)
903 PUT_SZ_STRICT("strict byte ", "byte ");
904 else
905 PUT_SZ("word ");
906 PUT_NUM_16(pParam->uValue);
907 break;
908
909 case DISUSE_IMMEDIATE32:
910 if ( pDis->uOpMode != (pDis->uCpuMode == DISCPUMODE_16BIT ? DISCPUMODE_16BIT : DISCPUMODE_32BIT) /* not perfect */
911 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
912 && ( (int8_t)pParam->uValue == (int32_t)pParam->uValue
913 || (pOp->fParam1 >= OP_PARM_REG_GEN32_START && pOp->fParam1 <= OP_PARM_REG_GEN32_END)
914 || (pOp->fParam2 >= OP_PARM_REG_GEN32_START && pOp->fParam2 <= OP_PARM_REG_GEN32_END))
915 )
916 )
917 {
918 if (OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_b)
919 PUT_SZ_STRICT("strict byte ", "byte ");
920 else if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_v
921 || OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_z)
922 PUT_SZ_STRICT("strict dword ", "dword ");
923 }
924 PUT_NUM_32(pParam->uValue);
925 if (pDis->uCpuMode == DISCPUMODE_32BIT)
926 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uValue, " (=", ')');
927 break;
928
929 case DISUSE_IMMEDIATE32_SX8:
930 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
931 || pDis->pCurInstr->uOpcode != OP_PUSH)
932 PUT_SZ_STRICT("strict byte ", "byte ");
933 else
934 PUT_SZ("dword ");
935 PUT_NUM_32(pParam->uValue);
936 break;
937
938 case DISUSE_IMMEDIATE64_SX8:
939 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
940 || pDis->pCurInstr->uOpcode != OP_PUSH)
941 PUT_SZ_STRICT("strict byte ", "byte ");
942 else
943 PUT_SZ("qword ");
944 PUT_NUM_64(pParam->uValue);
945 break;
946
947 case DISUSE_IMMEDIATE64:
948 PUT_NUM_64(pParam->uValue);
949 break;
950
951 default:
952 AssertFailed();
953 break;
954 }
955 break;
956
957 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
958 {
959 int32_t offDisplacement;
960 Assert(iParam == 1);
961 bool fPrefix = (fFlags & DIS_FMT_FLAGS_STRICT)
962 && pOp->uOpcode != OP_CALL
963 && pOp->uOpcode != OP_LOOP
964 && pOp->uOpcode != OP_LOOPE
965 && pOp->uOpcode != OP_LOOPNE
966 && pOp->uOpcode != OP_JECXZ;
967 if (pOp->uOpcode == OP_CALL)
968 fFlags &= ~DIS_FMT_FLAGS_RELATIVE_BRANCH;
969
970 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
971 {
972 if (fPrefix)
973 PUT_SZ("short ");
974 offDisplacement = (int8_t)pParam->uValue;
975 Assert(*pszFmt == 'b'); pszFmt++;
976
977 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
978 PUT_NUM_S8(offDisplacement);
979 }
980 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
981 {
982 if (fPrefix)
983 PUT_SZ("near ");
984 offDisplacement = (int16_t)pParam->uValue;
985 Assert(*pszFmt == 'v'); pszFmt++;
986
987 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
988 PUT_NUM_S16(offDisplacement);
989 }
990 else
991 {
992 if (fPrefix)
993 PUT_SZ("near ");
994 offDisplacement = (int32_t)pParam->uValue;
995 Assert(pParam->fUse & (DISUSE_IMMEDIATE32_REL | DISUSE_IMMEDIATE64_REL));
996 Assert(*pszFmt == 'v'); pszFmt++;
997
998 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
999 PUT_NUM_S32(offDisplacement);
1000 }
1001 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1002 PUT_SZ(" (");
1003
1004 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
1005 if (pDis->uCpuMode == DISCPUMODE_16BIT)
1006 PUT_NUM_16(uTrgAddr);
1007 else if (pDis->uCpuMode == DISCPUMODE_32BIT)
1008 PUT_NUM_32(uTrgAddr);
1009 else
1010 PUT_NUM_64(uTrgAddr);
1011
1012 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1013 {
1014 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " = ", ' ');
1015 PUT_C(')');
1016 }
1017 else
1018 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " (", ')');
1019 break;
1020 }
1021
1022 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
1023 {
1024 Assert(*pszFmt == 'p'); pszFmt++;
1025 PUT_FAR();
1026 PUT_SIZE_OVERRIDE();
1027 PUT_SEGMENT_OVERRIDE();
1028 int rc = VERR_SYMBOL_NOT_FOUND;
1029 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
1030 {
1031 case DISUSE_IMMEDIATE_ADDR_16_16:
1032 PUT_NUM_16(pParam->uValue >> 16);
1033 PUT_C(':');
1034 PUT_NUM_16(pParam->uValue);
1035 if (pfnGetSymbol)
1036 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1037 break;
1038 case DISUSE_IMMEDIATE_ADDR_16_32:
1039 PUT_NUM_16(pParam->uValue >> 32);
1040 PUT_C(':');
1041 PUT_NUM_32(pParam->uValue);
1042 if (pfnGetSymbol)
1043 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1044 break;
1045 case DISUSE_DISPLACEMENT16:
1046 PUT_NUM_16(pParam->uValue);
1047 if (pfnGetSymbol)
1048 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1049 break;
1050 case DISUSE_DISPLACEMENT32:
1051 PUT_NUM_32(pParam->uValue);
1052 if (pfnGetSymbol)
1053 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1054 break;
1055 case DISUSE_DISPLACEMENT64:
1056 PUT_NUM_64(pParam->uValue);
1057 if (pfnGetSymbol)
1058 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint64_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1059 break;
1060 default:
1061 AssertFailed();
1062 break;
1063 }
1064
1065 PUT_SYMBOL_TWO(rc, " [", ']');
1066 break;
1067 }
1068
1069 case 'O': /* No ModRM byte (ParseImmAddr). */
1070 {
1071 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1072 PUT_FAR();
1073 PUT_SIZE_OVERRIDE();
1074 PUT_C('[');
1075 PUT_SEGMENT_OVERRIDE();
1076 int rc = VERR_SYMBOL_NOT_FOUND;
1077 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
1078 {
1079 case DISUSE_IMMEDIATE_ADDR_16_16:
1080 PUT_NUM_16(pParam->uValue >> 16);
1081 PUT_C(':');
1082 PUT_NUM_16(pParam->uValue);
1083 if (pfnGetSymbol)
1084 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1085 break;
1086 case DISUSE_IMMEDIATE_ADDR_16_32:
1087 PUT_NUM_16(pParam->uValue >> 32);
1088 PUT_C(':');
1089 PUT_NUM_32(pParam->uValue);
1090 if (pfnGetSymbol)
1091 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1092 break;
1093 case DISUSE_DISPLACEMENT16:
1094 PUT_NUM_16(pParam->uDisp.i16);
1095 if (pfnGetSymbol)
1096 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u16, szSymbol, sizeof(szSymbol), &off, pvUser);
1097 break;
1098 case DISUSE_DISPLACEMENT32:
1099 PUT_NUM_32(pParam->uDisp.i32);
1100 if (pfnGetSymbol)
1101 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u32, szSymbol, sizeof(szSymbol), &off, pvUser);
1102 break;
1103 case DISUSE_DISPLACEMENT64:
1104 PUT_NUM_64(pParam->uDisp.i64);
1105 if (pfnGetSymbol)
1106 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u64, szSymbol, sizeof(szSymbol), &off, pvUser);
1107 break;
1108 default:
1109 AssertFailed();
1110 break;
1111 }
1112 PUT_C(']');
1113
1114 PUT_SYMBOL_TWO(rc, " (", ')');
1115 break;
1116 }
1117
1118 case 'X': /* DS:SI (ParseXb, ParseXv). */
1119 case 'Y': /* ES:DI (ParseYb, ParseYv). */
1120 {
1121 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1122 PUT_FAR();
1123 PUT_SIZE_OVERRIDE();
1124 PUT_C('[');
1125 if (pParam->fUse & DISUSE_POINTER_DS_BASED)
1126 PUT_SZ("ds:");
1127 else
1128 PUT_SZ("es:");
1129
1130 size_t cchReg;
1131 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1132 PUT_STR(pszReg, cchReg);
1133 PUT_C(']');
1134 break;
1135 }
1136
1137 case 'e': /* Register based on operand size (e.g. %eAX, %eAH) (ParseFixedReg). */
1138 {
1139 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2]));
1140 pszFmt += 2;
1141 size_t cchReg;
1142 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1143 PUT_STR(pszReg, cchReg);
1144 break;
1145 }
1146
1147 default:
1148 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
1149 break;
1150 }
1151 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
1152 }
1153 else
1154 {
1155 PUT_C(ch);
1156 if (ch == ',')
1157 {
1158 Assert(*pszFmt != ' ');
1159 PUT_C(' ');
1160 switch (++iParam)
1161 {
1162 case 2: pParam = &pDis->Param2; break;
1163 case 3: pParam = &pDis->Param3; break;
1164 case 4: pParam = &pDis->Param4; break;
1165 default: pParam = NULL; break;
1166 }
1167 }
1168 }
1169 } /* while more to format */
1170 }
1171
1172 /*
1173 * Any additional output to the right of the instruction?
1174 */
1175 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1176 {
1177 /* some up front padding. */
1178 size_t cchPadding = cchOutput - offInstruction;
1179 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
1180 PUT_STR(g_szSpaces, cchPadding);
1181
1182 /* comment? */
1183 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1184 PUT_SZ(";");
1185
1186 /*
1187 * The address?
1188 */
1189 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
1190 {
1191 PUT_C(' ');
1192#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
1193 if (pDis->uInstrAddr >= _4G)
1194 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
1195#endif
1196 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
1197 }
1198
1199 /*
1200 * Opcode bytes?
1201 */
1202 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
1203 {
1204 PUT_C(' ');
1205 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
1206 cchOutput += cchTmp;
1207 if (cchTmp >= cchDst)
1208 cchTmp = cchDst - (cchDst != 0);
1209 cchDst -= cchTmp;
1210 pszDst += cchTmp;
1211 }
1212 }
1213
1214 /*
1215 * Terminate it - on overflow we'll have reserved one byte for this.
1216 */
1217 if (cchDst > 0)
1218 *pszDst = '\0';
1219 else
1220 Assert(!cchBuf);
1221
1222 /* clean up macros */
1223#undef PUT_PSZ
1224#undef PUT_SZ
1225#undef PUT_STR
1226#undef PUT_C
1227 return cchOutput;
1228}
1229
1230
1231/**
1232 * Formats the current instruction in Yasm (/ Nasm) style.
1233 *
1234 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
1235 *
1236 *
1237 * @returns The number of output characters. If this is >= cchBuf, then the content
1238 * of pszBuf will be truncated.
1239 * @param pDis Pointer to the disassembler state.
1240 * @param pszBuf The output buffer.
1241 * @param cchBuf The size of the output buffer.
1242 */
1243DISDECL(size_t) DISFormatYasm(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
1244{
1245 return DISFormatYasmEx(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
1246}
1247
1248
1249/**
1250 * Checks if the encoding of the given disassembled instruction is something we
1251 * can never get YASM to produce.
1252 *
1253 * @returns true if it's odd, false if it isn't.
1254 * @param pDis The disassembler output. The byte fetcher callback will
1255 * be used if present as we might need to fetch opcode
1256 * bytes.
1257 */
1258DISDECL(bool) DISFormatYasmIsOddEncoding(PDISSTATE pDis)
1259{
1260 /*
1261 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
1262 */
1263 if ( pDis->uAddrMode != DISCPUMODE_16BIT ///@todo correct?
1264 && pDis->ModRM.Bits.Rm == 4
1265 && pDis->ModRM.Bits.Mod != 3)
1266 {
1267 /* No scaled index SIB (index=4), except for ESP. */
1268 if ( pDis->SIB.Bits.Index == 4
1269 && pDis->SIB.Bits.Base != 4)
1270 return true;
1271
1272 /* EBP + displacement */
1273 if ( pDis->ModRM.Bits.Mod != 0
1274 && pDis->SIB.Bits.Base == 5
1275 && pDis->SIB.Bits.Scale == 0)
1276 return true;
1277 }
1278
1279 /*
1280 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1281 */
1282 if ( pDis->pCurInstr->uOpcode == OP_SHL
1283 && pDis->ModRM.Bits.Reg == 6)
1284 return true;
1285
1286 /*
1287 * Check for multiple prefixes of the same kind.
1288 */
1289 uint8_t off1stSeg = UINT8_MAX;
1290 uint8_t offOpSize = UINT8_MAX;
1291 uint8_t offAddrSize = UINT8_MAX;
1292 uint32_t fPrefixes = 0;
1293 for (uint32_t offOpcode = 0; offOpcode < RT_ELEMENTS(pDis->abInstr); offOpcode++)
1294 {
1295 uint32_t f;
1296 switch (pDis->abInstr[offOpcode])
1297 {
1298 case 0xf0:
1299 f = DISPREFIX_LOCK;
1300 break;
1301
1302 case 0xf2:
1303 case 0xf3:
1304 f = DISPREFIX_REP; /* yes, both */
1305 break;
1306
1307 case 0x2e:
1308 case 0x3e:
1309 case 0x26:
1310 case 0x36:
1311 case 0x64:
1312 case 0x65:
1313 if (off1stSeg == UINT8_MAX)
1314 off1stSeg = offOpcode;
1315 f = DISPREFIX_SEG;
1316 break;
1317
1318 case 0x66:
1319 if (offOpSize == UINT8_MAX)
1320 offOpSize = offOpcode;
1321 f = DISPREFIX_OPSIZE;
1322 break;
1323
1324 case 0x67:
1325 if (offAddrSize == UINT8_MAX)
1326 offAddrSize = offOpcode;
1327 f = DISPREFIX_ADDRSIZE;
1328 break;
1329
1330 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1331 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1332 f = pDis->uCpuMode == DISCPUMODE_64BIT ? DISPREFIX_REX : 0;
1333 break;
1334
1335 default:
1336 f = 0;
1337 break;
1338 }
1339 if (!f)
1340 break; /* done */
1341 if (fPrefixes & f)
1342 return true;
1343 fPrefixes |= f;
1344 }
1345
1346 /* segment overrides are fun */
1347 if (fPrefixes & DISPREFIX_SEG)
1348 {
1349 /* no effective address which it may apply to. */
1350 Assert((pDis->fPrefix & DISPREFIX_SEG) || pDis->uCpuMode == DISCPUMODE_64BIT);
1351 if ( !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
1352 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
1353 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
1354 return true;
1355
1356 /* Yasm puts the segment prefixes before the operand prefix with no
1357 way of overriding it. */
1358 if (offOpSize < off1stSeg)
1359 return true;
1360 }
1361
1362 /* fixed register + addr override doesn't go down all that well. */
1363 if (fPrefixes & DISPREFIX_ADDRSIZE)
1364 {
1365 Assert(pDis->fPrefix & DISPREFIX_ADDRSIZE);
1366 if ( pDis->pCurInstr->fParam3 == OP_PARM_NONE
1367 && pDis->pCurInstr->fParam2 == OP_PARM_NONE
1368 && ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1369 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END))
1370 return true;
1371 }
1372
1373 /* Almost all prefixes are bad for jumps. */
1374 if (fPrefixes)
1375 {
1376 switch (pDis->pCurInstr->uOpcode)
1377 {
1378 /* nop w/ prefix(es). */
1379 case OP_NOP:
1380 return true;
1381
1382 case OP_JMP:
1383 if ( pDis->pCurInstr->fParam1 != OP_PARM_Jb
1384 && pDis->pCurInstr->fParam1 != OP_PARM_Jv)
1385 break;
1386 /* fall thru */
1387 case OP_JO:
1388 case OP_JNO:
1389 case OP_JC:
1390 case OP_JNC:
1391 case OP_JE:
1392 case OP_JNE:
1393 case OP_JBE:
1394 case OP_JNBE:
1395 case OP_JS:
1396 case OP_JNS:
1397 case OP_JP:
1398 case OP_JNP:
1399 case OP_JL:
1400 case OP_JNL:
1401 case OP_JLE:
1402 case OP_JNLE:
1403 /** @todo branch hinting 0x2e/0x3e... */
1404 return true;
1405 }
1406
1407 }
1408
1409 /* All but the segment prefix is bad news for push/pop. */
1410 if (fPrefixes & ~DISPREFIX_SEG)
1411 {
1412 switch (pDis->pCurInstr->uOpcode)
1413 {
1414 case OP_POP:
1415 case OP_PUSH:
1416 if ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_SEG_START
1417 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_SEG_END)
1418 return true;
1419 if ( (fPrefixes & ~DISPREFIX_OPSIZE)
1420 && pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1421 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END)
1422 return true;
1423 break;
1424
1425 case OP_POPA:
1426 case OP_POPF:
1427 case OP_PUSHA:
1428 case OP_PUSHF:
1429 if (fPrefixes & ~DISPREFIX_OPSIZE)
1430 return true;
1431 break;
1432 }
1433 }
1434
1435 /* Implicit 8-bit register instructions doesn't mix with operand size. */
1436 if ( (fPrefixes & DISPREFIX_OPSIZE)
1437 && ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1438 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1439 || ( pDis->pCurInstr->fParam2 == OP_PARM_Gb /* r8 */
1440 && pDis->pCurInstr->fParam1 == OP_PARM_Eb /* r8/mem8 */))
1441 )
1442 {
1443 switch (pDis->pCurInstr->uOpcode)
1444 {
1445 case OP_ADD:
1446 case OP_OR:
1447 case OP_ADC:
1448 case OP_SBB:
1449 case OP_AND:
1450 case OP_SUB:
1451 case OP_XOR:
1452 case OP_CMP:
1453 return true;
1454 default:
1455 break;
1456 }
1457 }
1458
1459 /* Instructions taking no address or operand which thus may be annoyingly
1460 difficult to format for yasm. */
1461 if (fPrefixes)
1462 {
1463 switch (pDis->pCurInstr->uOpcode)
1464 {
1465 case OP_STI:
1466 case OP_STC:
1467 case OP_CLI:
1468 case OP_CLD:
1469 case OP_CLC:
1470 case OP_INT:
1471 case OP_INT3:
1472 case OP_INTO:
1473 case OP_HLT:
1474 /** @todo Many more to can be added here. */
1475 return true;
1476 default:
1477 break;
1478 }
1479 }
1480
1481 /* FPU and other instructions that ignores operand size override. */
1482 if (fPrefixes & DISPREFIX_OPSIZE)
1483 {
1484 switch (pDis->pCurInstr->uOpcode)
1485 {
1486 /* FPU: */
1487 case OP_FIADD:
1488 case OP_FIMUL:
1489 case OP_FISUB:
1490 case OP_FISUBR:
1491 case OP_FIDIV:
1492 case OP_FIDIVR:
1493 /** @todo there are many more. */
1494 return true;
1495
1496 case OP_MOV:
1497 /** @todo could be that we're not disassembling these correctly. */
1498 if (pDis->pCurInstr->fParam1 == OP_PARM_Sw)
1499 return true;
1500 /** @todo what about the other way? */
1501 break;
1502
1503 default:
1504 break;
1505 }
1506 }
1507
1508
1509 /*
1510 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1511 *
1512 * For example:
1513 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1514 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1515 */
1516 if (pDis->ModRM.Bits.Mod == 3 /* reg,reg */)
1517 {
1518 switch (pDis->pCurInstr->uOpcode)
1519 {
1520 case OP_ADD:
1521 case OP_OR:
1522 case OP_ADC:
1523 case OP_SBB:
1524 case OP_AND:
1525 case OP_SUB:
1526 case OP_XOR:
1527 case OP_CMP:
1528 if ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1529 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1530 || ( pDis->pCurInstr->fParam1 == OP_PARM_Gv /* rX */
1531 && pDis->pCurInstr->fParam2 == OP_PARM_Ev /* rX/memX */))
1532 return true;
1533
1534 /* 82 (see table A-6). */
1535 if (pDis->bOpCode == 0x82)
1536 return true;
1537 break;
1538
1539 /* ff /0, fe /0, ff /1, fe /0 */
1540 case OP_DEC:
1541 case OP_INC:
1542 return true;
1543
1544 case OP_POP:
1545 case OP_PUSH:
1546 Assert(pDis->bOpCode == 0x8f);
1547 return true;
1548
1549 case OP_MOV:
1550 if ( pDis->bOpCode == 0x8a
1551 || pDis->bOpCode == 0x8b)
1552 return true;
1553 break;
1554
1555 default:
1556 break;
1557 }
1558 }
1559
1560 /* shl eax,1 will be assembled to the form without the immediate byte. */
1561 if ( pDis->pCurInstr->fParam2 == OP_PARM_Ib
1562 && (uint8_t)pDis->Param2.uValue == 1)
1563 {
1564 switch (pDis->pCurInstr->uOpcode)
1565 {
1566 case OP_SHL:
1567 case OP_SHR:
1568 case OP_SAR:
1569 case OP_RCL:
1570 case OP_RCR:
1571 case OP_ROL:
1572 case OP_ROR:
1573 return true;
1574 }
1575 }
1576
1577 /* And some more - see table A-6. */
1578 if (pDis->bOpCode == 0x82)
1579 {
1580 switch (pDis->pCurInstr->uOpcode)
1581 {
1582 case OP_ADD:
1583 case OP_OR:
1584 case OP_ADC:
1585 case OP_SBB:
1586 case OP_AND:
1587 case OP_SUB:
1588 case OP_XOR:
1589 case OP_CMP:
1590 return true;
1591 break;
1592 }
1593 }
1594
1595
1596 /* check for REX.X = 1 without SIB. */
1597
1598 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1599 says (intel doesn't appear to care). */
1600 switch (pDis->pCurInstr->uOpcode)
1601 {
1602 case OP_SETO:
1603 case OP_SETNO:
1604 case OP_SETC:
1605 case OP_SETNC:
1606 case OP_SETE:
1607 case OP_SETNE:
1608 case OP_SETBE:
1609 case OP_SETNBE:
1610 case OP_SETS:
1611 case OP_SETNS:
1612 case OP_SETP:
1613 case OP_SETNP:
1614 case OP_SETL:
1615 case OP_SETNL:
1616 case OP_SETLE:
1617 case OP_SETNLE:
1618 AssertMsg(pDis->bOpCode >= 0x90 && pDis->bOpCode <= 0x9f, ("%#x\n", pDis->bOpCode));
1619 if (pDis->ModRM.Bits.Reg != 2)
1620 return true;
1621 break;
1622 }
1623
1624 /*
1625 * The MOVZX reg32,mem16 instruction without an operand size prefix
1626 * doesn't quite make sense...
1627 */
1628 if ( pDis->pCurInstr->uOpcode == OP_MOVZX
1629 && pDis->bOpCode == 0xB7
1630 && (pDis->uCpuMode == DISCPUMODE_16BIT) != !!(fPrefixes & DISPREFIX_OPSIZE))
1631 return true;
1632
1633 return false;
1634}
1635
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette