VirtualBox

source: vbox/trunk/src/VBox/Disassembler/testcase/tstDisasm-2.cpp@ 9098

Last change on this file since 9098 was 9098, checked in by vboxsync, 17 years ago

More or less working yasm formatter. Added some disassembler testcases.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 58.7 KB
Line 
1/* $Id: tstDisasm-2.cpp 9098 2008-05-25 22:16:58Z vboxsync $ */
2/** @file
3 * Testcase - Generic Disassembler Tool.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#include <VBox/dis.h>
26#include <iprt/stream.h>
27#include <iprt/getopt.h>
28#include <iprt/file.h>
29#include <iprt/string.h>
30#include <iprt/runtime.h>
31#include <VBox/err.h>
32#include <iprt/ctype.h>
33
34
35/*******************************************************************************
36* Structures and Typedefs *
37*******************************************************************************/
38typedef enum { kAsmStyle_Default, kAsmStyle_yasm, kAsmStyle_masm, kAsmStyle_gas, kAsmStyle_invalid } ASMSTYLE;
39typedef enum { kUndefOp_Fail, kUndefOp_All, kUndefOp_DefineByte, kUndefOp_End } UNDEFOPHANDLING;
40
41typedef struct MYDISSTATE
42{
43 DISCPUSTATE Cpu;
44 uint64_t uAddress; /**< The current instruction address. */
45 uint8_t *pbInstr; /**< The current instruction (pointer). */
46 uint32_t cbInstr; /**< The size of the current instruction. */
47 bool fUndefOp; /**< Whether the current instruction is really an undefined opcode.*/
48 UNDEFOPHANDLING enmUndefOp; /**< How to treat undefined opcodes. */
49 int rc; /**< Set if we hit EOF. */
50 size_t cbLeft; /**< The number of bytes left. (read) */
51 uint8_t *pbNext; /**< The next byte. (read) */
52 uint64_t uNextAddr; /**< The address of the next byte. (read) */
53 char szLine[256]; /**< The disassembler text output. */
54} MYDISSTATE;
55typedef MYDISSTATE *PMYDISSTATE;
56
57
58/*
59 * Non-logging builds doesn't to full formatting so we must do it on our own.
60 * This should probably be moved into the disassembler later as it's needed for
61 * the vbox debugger as well.
62 *
63 * Comment in USE_MY_FORMATTER to enable it.
64 */
65#define USE_MY_FORMATTER
66
67#ifdef USE_MY_FORMATTER
68static const char g_aszYasmRegGen8x86[8][4] =
69{
70 "al\0", "cl\0", "dl\0", "bl\0", "ah\0", "ch\0", "dh\0", "bh\0"
71};
72static const char g_aszYasmRegGen8Amd64[16][5] =
73{
74 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "spb\0", "bpb\0", "sib\0", "dib\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
75};
76static const char g_aszYasmRegGen16[16][5] =
77{
78 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
79};
80static const char g_aszYasmRegGen1616[8][6] =
81{
82 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
83};
84static const char g_aszYasmRegGen32[16][5] =
85{
86 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
87};
88static const char g_aszYasmRegGen64[16][4] =
89{
90 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
91};
92static const char g_aszYasmRegSeg[6][3] =
93{
94 "es", "cs", "ss", "ds", "fs", "gs"
95};
96static const char g_aszYasmRegFP[8][4] =
97{
98 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
99};
100static const char g_aszYasmRegMMX[8][4] =
101{
102 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
103};
104static const char g_aszYasmRegXMM[16][6] =
105{
106 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
107};
108static const char g_aszYasmRegCRx[16][5] =
109{
110 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
111};
112static const char g_aszYasmRegDRx[16][5] =
113{
114 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
115};
116static const char g_aszYasmRegTRx[16][5] =
117{
118 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
119};
120
121
122DECLINLINE(const char *) MyDisasYasmFormatBaseReg(DISCPUSTATE const *pCpu, PCOP_PARAMETER pParam, size_t *pcchReg, bool fReg1616)
123{
124 switch (pParam->flags & ( USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64
125 | USE_REG_FP | USE_REG_MMX | USE_REG_XMM | USE_REG_CR
126 | USE_REG_DBG | USE_REG_SEG | USE_REG_TEST))
127
128 {
129 case USE_REG_GEN8:
130 if (pCpu->opmode == CPUMODE_64BIT)
131 {
132 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8Amd64));
133 const char *psz = g_aszYasmRegGen8Amd64[pParam->base.reg_gen];
134 *pcchReg = 2 + !!psz[2] + !!psz[3];
135 return psz;
136 }
137 *pcchReg = 2;
138 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8x86));
139 return g_aszYasmRegGen8x86[pParam->base.reg_gen];
140
141 case USE_REG_GEN16:
142 {
143 if (fReg1616)
144 {
145 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen1616));
146 const char *psz = g_aszYasmRegGen1616[pParam->base.reg_gen];
147 *pcchReg = psz[2] ? 5 : 2;
148 return psz;
149 }
150
151 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
152 const char *psz = g_aszYasmRegGen16[pParam->base.reg_gen];
153 *pcchReg = 2 + !!psz[2] + !!psz[3];
154 return psz;
155 }
156
157 case USE_REG_GEN32:
158 {
159 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
160 const char *psz = g_aszYasmRegGen32[pParam->base.reg_gen];
161 *pcchReg = 2 + !!psz[2] + !!psz[3];
162 return psz;
163 }
164
165 case USE_REG_GEN64:
166 {
167 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
168 const char *psz = g_aszYasmRegGen64[pParam->base.reg_gen];
169 *pcchReg = 2 + !!psz[2] + !!psz[3];
170 return psz;
171 }
172
173 case USE_REG_FP:
174 {
175 Assert(pParam->base.reg_fp < RT_ELEMENTS(g_aszYasmRegFP));
176 const char *psz = g_aszYasmRegFP[pParam->base.reg_fp];
177 *pcchReg = 3;
178 return psz;
179 }
180
181 case USE_REG_MMX:
182 {
183 Assert(pParam->base.reg_mmx < RT_ELEMENTS(g_aszYasmRegMMX));
184 const char *psz = g_aszYasmRegMMX[pParam->base.reg_mmx];
185 *pcchReg = 3;
186 return psz;
187 }
188
189 case USE_REG_XMM:
190 {
191 Assert(pParam->base.reg_xmm < RT_ELEMENTS(g_aszYasmRegXMM));
192 const char *psz = g_aszYasmRegXMM[pParam->base.reg_mmx];
193 *pcchReg = 4 + !!psz[4];
194 return psz;
195 }
196
197 case USE_REG_CR:
198 {
199 Assert(pParam->base.reg_ctrl < RT_ELEMENTS(g_aszYasmRegCRx));
200 const char *psz = g_aszYasmRegCRx[pParam->base.reg_ctrl];
201 *pcchReg = 3;
202 return psz;
203 }
204
205 case USE_REG_DBG:
206 {
207 Assert(pParam->base.reg_dbg < RT_ELEMENTS(g_aszYasmRegDRx));
208 const char *psz = g_aszYasmRegDRx[pParam->base.reg_dbg];
209 *pcchReg = 3;
210 return psz;
211 }
212
213 case USE_REG_SEG:
214 {
215 Assert(pParam->base.reg_seg < RT_ELEMENTS(g_aszYasmRegCRx));
216 const char *psz = g_aszYasmRegSeg[pParam->base.reg_seg];
217 *pcchReg = 2;
218 return psz;
219 }
220
221 case USE_REG_TEST:
222 {
223 Assert(pParam->base.reg_test < RT_ELEMENTS(g_aszYasmRegTRx));
224 const char *psz = g_aszYasmRegTRx[pParam->base.reg_test];
225 *pcchReg = 3;
226 return psz;
227 }
228
229 default:
230 AssertMsgFailed(("%#x\n", pParam->flags));
231 *pcchReg = 3;
232 return "r??";
233 }
234}
235
236DECLINLINE(const char *) MyDisasYasmFormatIndexReg(DISCPUSTATE const *pCpu, PCOP_PARAMETER pParam, size_t *pcchReg)
237{
238 switch (pCpu->addrmode)
239 {
240 case CPUMODE_16BIT:
241 {
242 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
243 const char *psz = g_aszYasmRegGen16[pParam->index.reg_gen];
244 *pcchReg = 2 + !!psz[2] + !!psz[3];
245 return psz;
246 }
247
248 case CPUMODE_32BIT:
249 {
250 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
251 const char *psz = g_aszYasmRegGen32[pParam->index.reg_gen];
252 *pcchReg = 2 + !!psz[2] + !!psz[3];
253 return psz;
254 }
255
256 case CPUMODE_64BIT:
257 {
258 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
259 const char *psz = g_aszYasmRegGen64[pParam->index.reg_gen];
260 *pcchReg = 2 + !!psz[2] + !!psz[3];
261 return psz;
262 }
263
264 default:
265 AssertMsgFailed(("%#x %#x\n", pParam->flags, pCpu->addrmode));
266 *pcchReg = 3;
267 return "r??";
268 }
269}
270
271static size_t MyDisasYasmFormat(DISCPUSTATE const *pCpu, char *pszBuf, size_t cchBuf)
272{
273 PCOPCODE const pOp = pCpu->pCurInstr;
274 size_t cchOutput = 0;
275 char *pszDst = pszBuf;
276 size_t cchDst = cchBuf;
277
278 /* output macros */
279#define PUT_C(ch) \
280 do { \
281 cchOutput++; \
282 if (cchDst > 1) \
283 { \
284 cchDst--; \
285 *pszDst++ = (ch); \
286 } \
287 } while (0)
288#define PUT_STR(pszSrc, cchSrc) \
289 do { \
290 cchOutput += (cchSrc); \
291 if (cchDst > (cchSrc)) \
292 { \
293 memcpy(pszDst, (pszSrc), (cchSrc)); \
294 pszDst += (cchSrc); \
295 cchDst -= (cchSrc); \
296 } \
297 else if (cchDst > 1) \
298 { \
299 memcpy(pszDst, (pszSrc), cchDst - 1); \
300 pszDst += cchDst - 1; \
301 cchDst = 1; \
302 } \
303 } while (0)
304#define PUT_SZ(sz) \
305 PUT_STR((sz), sizeof(sz) - 1)
306#define PUT_PSZ(psz) \
307 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
308#define PUT_NUM(cch, fmt, num) \
309 do { \
310 cchOutput += (cch); \
311 if (cchBuf > 1) \
312 { \
313 const size_t cchTmp = RTStrPrintf(pszDst, cchBuf, fmt, (num)); \
314 pszDst += cchTmp; \
315 cchBuf -= cchTmp; \
316 Assert(cchTmp == (cch) || cchBuf == 1); \
317 } \
318 } while (0)
319#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
320#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
321#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
322#define PUT_NUM_64(num) PUT_NUM(18, "0%08xh", (uint64_t)(num))
323
324 /*
325 * Filter out invalid opcodes first as they need special
326 * treatment. UD2 is an exception and should be handled normally.
327 */
328 if ( pOp->opcode == OP_INVALID
329 || ( pOp->opcode == OP_ILLUD2
330 && (pCpu->prefix & PREFIX_LOCK)))
331 {
332
333 }
334 else
335 {
336 /*
337 * Prefixes
338 */
339 if (pCpu->prefix & PREFIX_LOCK)
340 PUT_SZ("lock ");
341 if(pCpu->prefix & PREFIX_REP)
342 PUT_SZ("rep ");
343 else if(pCpu->prefix & PREFIX_REPNE)
344 PUT_SZ("repne ");
345
346 /*
347 * Adjust the format string to avoid stuff the assembler cannot handle.
348 */
349 char szTmpFmt[48];
350 const char *pszFmt = pOp->pszOpcode;
351 switch (pOp->opcode)
352 {
353 case OP_JECXZ:
354 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "jcxz %Jb" : pCpu->opmode == CPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
355 break;
356 case OP_PUSHF:
357 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "pushfw" : pCpu->opmode == CPUMODE_32BIT ? "pushfd" : "pushfq";
358 break;
359 case OP_POPF:
360 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "popfw" : pCpu->opmode == CPUMODE_32BIT ? "popfd" : "popfq";
361 break;
362 case OP_INSB:
363 pszFmt = "insb";
364 break;
365 case OP_INSWD:
366 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "insw" : pCpu->opmode == CPUMODE_32BIT ? "insd" : "insq";
367 break;
368 case OP_OUTSB:
369 pszFmt = "outsb";
370 break;
371 case OP_OUTSWD:
372 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "outsw" : pCpu->opmode == CPUMODE_32BIT ? "outsd" : "outsq";
373 break;
374 case OP_MOVSB:
375 pszFmt = "movsb";
376 break;
377 case OP_MOVSWD:
378 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "movsw" : pCpu->opmode == CPUMODE_32BIT ? "movsd" : "movsq";
379 break;
380 case OP_CMPSB:
381 pszFmt = "cmpsb";
382 break;
383 case OP_CMPWD:
384 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cmpsw" : pCpu->opmode == CPUMODE_32BIT ? "cmpsd" : "cmpsq";
385 break;
386 case OP_SCASB:
387 pszFmt = "scasb";
388 break;
389 case OP_SCASWD:
390 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "scasw" : pCpu->opmode == CPUMODE_32BIT ? "scasd" : "scasq";
391 break;
392 case OP_LODSB:
393 pszFmt = "lodsb";
394 break;
395 case OP_LODSWD:
396 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "lodsw" : pCpu->opmode == CPUMODE_32BIT ? "lodsd" : "lodsq";
397 break;
398 case OP_STOSB:
399 pszFmt = "stosb";
400 break;
401 case OP_STOSWD:
402 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "stosw" : pCpu->opmode == CPUMODE_32BIT ? "stosd" : "stosq";
403 break;
404 case OP_SHL:
405 Assert(pszFmt[3] == '/');
406 pszFmt += 4;
407 break;
408 case OP_XLAT:
409 pszFmt = "xlatb";
410 break;
411 case OP_INT3:
412 pszFmt = "int3";
413 break;
414
415 /*
416 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
417 */
418 case OP_NOP:
419 if (pCpu->opcode == 0x90)
420 /* fine, fine */;
421 else if (pszFmt[sizeof("nop %Ev")] == '/' && pszFmt[sizeof("nop %Ev") + 1] == 'p')
422 pszFmt = "prefetch %Eb";
423 else if (pCpu->opcode == 0x1f)
424 {
425 PUT_SZ("db 01fh,");
426 PUT_NUM_8(pCpu->ModRM.u);
427 for (unsigned i = 2; i < pCpu->opsize; i++)
428 {
429 PUT_C(',');
430 PUT_NUM_8(0x90); ///@todo fixme.
431 }
432 pszFmt = "";
433 }
434 break;
435
436 default:
437 /* ST(X) -> stX (floating point) */
438 if (*pszFmt == 'f' && strchr(pszFmt, '('))
439 {
440 char *pszFmtDst = szTmpFmt;
441 char ch;
442 do
443 {
444 ch = *pszFmt++;
445 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
446 {
447 *pszFmtDst++ = 's';
448 *pszFmtDst++ = 't';
449 pszFmt += 2;
450 ch = *pszFmt;
451 Assert(pszFmt[1] == ')');
452 pszFmt += 2;
453 *pszFmtDst++ = ch;
454 }
455 else
456 *pszFmtDst++ = ch;
457 } while (ch != '\0');
458 pszFmt = szTmpFmt;
459 }
460 break;
461
462 /*
463 * Horrible hacks.
464 */
465 case OP_FLD:
466 if (pCpu->opcode == 0xdb) /* m80fp workaround. */
467 *(int *)&pCpu->param1.param &= ~0x1f; /* make it pure OP_PARM_M */
468 break;
469 case OP_LAR: /* hack w -> v, probably not correct. */
470 *(int *)&pCpu->param2.param &= ~0x1f;
471 *(int *)&pCpu->param2.param |= OP_PARM_v;
472 break;
473 }
474
475 /*
476 * Formatting context and associated macros.
477 */
478 PCOP_PARAMETER pParam = &pCpu->param1;
479 int iParam = 1;
480
481#define PUT_FAR() \
482 do { \
483 if ( OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_p \
484 && pOp->opcode != OP_LDS /* table bugs? */ \
485 && pOp->opcode != OP_LES \
486 && pOp->opcode != OP_LFS \
487 && pOp->opcode != OP_LGS \
488 && pOp->opcode != OP_LSS ) \
489 PUT_SZ("far "); \
490 } while (0)
491 /** @todo mov ah,ch ends up with a byte 'override'... */
492#define PUT_SIZE_OVERRIDE() \
493 do { \
494 switch (OP_PARM_VSUBTYPE(pParam->param)) \
495 { \
496 case OP_PARM_v: \
497 switch (pCpu->opmode) \
498 { \
499 case CPUMODE_16BIT: PUT_SZ("word "); break; \
500 case CPUMODE_32BIT: PUT_SZ("dword "); break; \
501 case CPUMODE_64BIT: PUT_SZ("qword "); break; \
502 default: break; \
503 } \
504 break; \
505 case OP_PARM_b: PUT_SZ("byte "); break; \
506 case OP_PARM_w: PUT_SZ("word "); break; \
507 case OP_PARM_d: PUT_SZ("dword "); break; \
508 case OP_PARM_q: PUT_SZ("qword "); break; \
509 case OP_PARM_dq: \
510 if (OP_PARM_VTYPE(pParam->param) != OP_PARM_W) /* these are 128 bit, pray they are all unambiguous.. */ \
511 PUT_SZ("qword "); \
512 break; \
513 case OP_PARM_p: break; /* see PUT_FAR */ \
514 case OP_PARM_s: if (pParam->flags & USE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
515 case OP_PARM_z: break; \
516 case OP_PARM_NONE: \
517 if ( OP_PARM_VTYPE(pParam->param) == OP_PARM_M \
518 && ((pParam->flags & USE_REG_FP) || pOp->opcode == OP_FLD)) \
519 PUT_SZ("tword "); \
520 break; \
521 default: break; /*no pointer type specified/necessary*/ \
522 } \
523 } while (0)
524 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
525#define PUT_SEGMENT_OVERRIDE() \
526 do { \
527 if (pCpu->prefix & PREFIX_SEG) \
528 PUT_STR(s_szSegPrefix[pCpu->prefix_seg], 3); \
529 } while (0)
530
531
532 /*
533 * The formatting loop.
534 */
535 char ch;
536 while ((ch = *pszFmt++) != '\0')
537 {
538 if (ch == '%')
539 {
540 ch = *pszFmt++;
541 switch (ch)
542 {
543 /*
544 * ModRM - Register only.
545 */
546 case 'C': /* Control register (ParseModRM / UseModRM). */
547 case 'D': /* Debug register (ParseModRM / UseModRM). */
548 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
549 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
550 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
551 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
552 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
553 {
554 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
555 Assert(!(pParam->flags & (USE_INDEX | USE_SCALE) /* No SIB here... */));
556 Assert(!(pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32)));
557
558 size_t cchReg;
559 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0 /* pCpu->addrmode == CPUMODE_16BIT */);
560 PUT_STR(pszReg, cchReg);
561 break;
562 }
563
564 /*
565 * ModRM - Register or memory.
566 */
567 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
568 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
569 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
570 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
571 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
572 {
573 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
574
575 PUT_FAR();
576 if (pParam->flags & USE_EFFICIENT_ADDRESS)
577 {
578 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
579 while the register variants deals with 16, 32 & 64 in the normal fashion. */
580 if ( pParam->param != OP_PARM_Ev
581 || pOp->opcode != OP_MOV
582 || ( pOp->param1 != OP_PARM_Sw
583 && pOp->param2 != OP_PARM_Sw))
584 PUT_SIZE_OVERRIDE();
585 PUT_C('[');
586 }
587 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
588 {
589 if ( (pParam->flags & USE_DISPLACEMENT8)
590 && !pParam->disp8)
591 PUT_SZ("byte ");
592 else if ( (pParam->flags & USE_DISPLACEMENT16)
593 && (int8_t)pParam->disp16 == (int16_t)pParam->disp16)
594 PUT_SZ("word ");
595 else if ( (pParam->flags & USE_DISPLACEMENT32)
596 && (int8_t)pParam->disp32 == (int32_t)pParam->disp32)
597 PUT_SZ("dword ");
598 }
599 if (pParam->flags & USE_EFFICIENT_ADDRESS)
600 PUT_SEGMENT_OVERRIDE();
601
602 bool fBase = (pParam->flags & USE_BASE) /* When exactly is USE_BASE supposed to be set? disasmModRMReg doesn't set it. */
603 || ( (pParam->flags & (USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64))
604 && !(pParam->flags & USE_EFFICIENT_ADDRESS));
605 if (fBase)
606 {
607 size_t cchReg;
608 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0 /*pCpu->addrmode == CPUMODE_16BIT*/);
609 PUT_STR(pszReg, cchReg);
610 }
611
612 if (pParam->flags & USE_INDEX)
613 {
614 if (fBase)
615 PUT_C('+');
616
617 size_t cchReg;
618 const char *pszReg = MyDisasYasmFormatIndexReg(pCpu, pParam, &cchReg);
619 PUT_STR(pszReg, cchReg);
620
621 if (pParam->flags & USE_SCALE)
622 {
623 PUT_C('*');
624 PUT_C('0' + pParam->scale);
625 }
626 }
627 else
628 Assert(!(pParam->flags & USE_SCALE));
629
630 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
631 {
632 Assert(!(pParam->flags & USE_DISPLACEMENT64));
633 int32_t off;
634 if (pParam->flags & USE_DISPLACEMENT8)
635 off = pParam->disp8;
636 else if (pParam->flags & USE_DISPLACEMENT16)
637 off = pParam->disp16;
638 else if (pParam->flags & (USE_DISPLACEMENT32 | USE_RIPDISPLACEMENT32))
639 off = pParam->disp32;
640
641 if (fBase || (pParam->flags & USE_INDEX))
642 PUT_C(off >= 0 ? '+' : '-');
643
644 if (off < 0)
645 off = -off;
646 if (pParam->flags & USE_DISPLACEMENT8)
647 PUT_NUM_8( off);
648 else if (pParam->flags & USE_DISPLACEMENT16)
649 PUT_NUM_16(off);
650 else if (pParam->flags & USE_DISPLACEMENT32)
651 PUT_NUM_32(off);
652 else
653 {
654 PUT_NUM_32(off);
655 PUT_SZ(" wrt rip"); //??
656 }
657 }
658
659 if (pParam->flags & USE_EFFICIENT_ADDRESS)
660 PUT_C(']');
661 break;
662 }
663
664 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
665 AssertFailed();
666 break;
667
668 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
669 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
670 switch (pParam->flags & ( USE_IMMEDIATE8 | USE_IMMEDIATE16 | USE_IMMEDIATE32 | USE_IMMEDIATE64
671 | USE_IMMEDIATE16_SX8 | USE_IMMEDIATE32_SX8))
672 {
673 case USE_IMMEDIATE8:
674 if ( (pOp->param1 >= OP_PARM_REG_GEN8_START && pOp->param1 <= OP_PARM_REG_GEN8_END)
675 || (pOp->param2 >= OP_PARM_REG_GEN8_START && pOp->param2 <= OP_PARM_REG_GEN8_END)
676 )
677 PUT_SZ("strict byte ");
678 PUT_NUM_8(pParam->parval);
679 break;
680
681 case USE_IMMEDIATE16:
682 if ( (int8_t)pParam->parval == (int16_t)pParam->parval
683 || (pOp->param1 >= OP_PARM_REG_GEN16_START && pOp->param1 <= OP_PARM_REG_GEN16_END)
684 || (pOp->param2 >= OP_PARM_REG_GEN16_START && pOp->param2 <= OP_PARM_REG_GEN16_END)
685 )
686 {
687 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
688 PUT_SZ("strict byte ");
689 else if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v)
690 PUT_SZ("strict word ");
691 }
692 PUT_NUM_16(pParam->parval);
693 break;
694
695 case USE_IMMEDIATE16_SX8:
696 PUT_SZ("strict byte ");
697 PUT_NUM_16(pParam->parval);
698 break;
699
700 case USE_IMMEDIATE32:
701 if ( (int8_t)pParam->parval == (int32_t)pParam->parval
702 || (pOp->param1 >= OP_PARM_REG_GEN32_START && pOp->param1 <= OP_PARM_REG_GEN32_END)
703 || (pOp->param2 >= OP_PARM_REG_GEN32_START && pOp->param2 <= OP_PARM_REG_GEN32_END)
704 )
705 {
706 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
707 PUT_SZ("strict byte ");
708 else if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v)
709 PUT_SZ("strict dword ");
710 }
711 PUT_NUM_32(pParam->parval);
712 break;
713
714 case USE_IMMEDIATE32_SX8:
715 PUT_SZ("strict byte ");
716 PUT_NUM_32(pParam->parval);
717 break;
718
719 case USE_IMMEDIATE64:
720 PUT_NUM_64(pParam->parval);
721 break;
722
723 default:
724 AssertFailed();
725 break;
726 }
727 break;
728
729 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
730 {
731 int32_t offDisplacement;
732 Assert(iParam == 1);
733 bool fPrefix = pOp->opcode != OP_CALL
734 && pOp->opcode != OP_LOOP
735 && pOp->opcode != OP_LOOPE
736 && pOp->opcode != OP_LOOPNE
737 && pOp->opcode != OP_JECXZ;
738
739 if (pParam->flags & USE_IMMEDIATE8_REL)
740 {
741 if (fPrefix)
742 PUT_SZ("short ");
743 offDisplacement = (int8_t)pParam->parval;
744 Assert(*pszFmt == 'b'); pszFmt++;
745 }
746 else if (pParam->flags & USE_IMMEDIATE16_REL)
747 {
748 if (fPrefix)
749 PUT_SZ("near ");
750 offDisplacement = (int16_t)pParam->parval;
751 Assert(*pszFmt == 'v'); pszFmt++;
752 }
753 else
754 {
755 if (fPrefix)
756 PUT_SZ("near ");
757 offDisplacement = (int32_t)pParam->parval;
758 Assert(pParam->flags & USE_IMMEDIATE32_REL);
759 Assert(*pszFmt == 'v'); pszFmt++;
760 }
761
762 RTUINTPTR uTrgAddr = pCpu->opaddr + pCpu->opsize + offDisplacement;
763 if (pCpu->mode == CPUMODE_16BIT)
764 PUT_NUM_16(uTrgAddr);
765 else if (pCpu->mode == CPUMODE_32BIT)
766 PUT_NUM_32(uTrgAddr);
767 else
768 PUT_NUM_64(uTrgAddr);
769 break;
770 }
771
772 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
773 Assert(*pszFmt == 'p'); pszFmt++;
774 PUT_FAR();
775 PUT_SIZE_OVERRIDE();
776 PUT_SEGMENT_OVERRIDE();
777 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
778 {
779 case USE_IMMEDIATE_ADDR_16_16:
780 PUT_NUM_16(pParam->parval >> 16);
781 PUT_C(':');
782 PUT_NUM_16(pParam->parval);
783 break;
784 case USE_IMMEDIATE_ADDR_16_32:
785 PUT_NUM_16(pParam->parval >> 32);
786 PUT_C(':');
787 PUT_NUM_32(pParam->parval);
788 break;
789 case USE_DISPLACEMENT16:
790 PUT_NUM_16(pParam->parval);
791 break;
792 case USE_DISPLACEMENT32:
793 PUT_NUM_32(pParam->parval);
794 break;
795 case USE_DISPLACEMENT64:
796 PUT_NUM_64(pParam->parval);
797 break;
798 default:
799 AssertFailed();
800 break;
801 }
802 break;
803
804 case 'O': /* No ModRM byte (ParseImmAddr). */
805 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
806 PUT_FAR();
807 PUT_SIZE_OVERRIDE();
808 PUT_C('[');
809 PUT_SEGMENT_OVERRIDE();
810 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
811 {
812 case USE_IMMEDIATE_ADDR_16_16:
813 PUT_NUM_16(pParam->parval >> 16);
814 PUT_C(':');
815 PUT_NUM_16(pParam->parval);
816 break;
817 case USE_IMMEDIATE_ADDR_16_32:
818 PUT_NUM_16(pParam->parval >> 32);
819 PUT_C(':');
820 PUT_NUM_32(pParam->parval);
821 break;
822 case USE_DISPLACEMENT16:
823 PUT_NUM_16(pParam->disp16);
824 break;
825 case USE_DISPLACEMENT32:
826 PUT_NUM_32(pParam->disp32);
827 break;
828 case USE_DISPLACEMENT64:
829 PUT_NUM_64(pParam->disp64);
830 break;
831 default:
832 AssertFailed();
833 break;
834 }
835 PUT_C(']');
836 break;
837
838 case 'X': /* DS:SI (ParseXb, ParseXv). */
839 case 'Y': /* ES:DI (ParseYb, ParseYv). */
840 {
841 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
842 PUT_FAR();
843 PUT_SIZE_OVERRIDE();
844 PUT_C('[');
845 if (pParam->flags & USE_POINTER_DS_BASED)
846 PUT_SZ("ds:");
847 else
848 PUT_SZ("es:");
849
850 size_t cchReg;
851 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0);
852 PUT_STR(pszReg, cchReg);
853 PUT_C(']');
854 break;
855 }
856
857 case 'e': /* Register based on operand size (e.g. %eAX) (ParseFixedReg). */
858 {
859 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2])); pszFmt += 2;
860 size_t cchReg;
861 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0);
862 PUT_STR(pszReg, cchReg);
863 break;
864 }
865
866 default:
867 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
868 break;
869 }
870 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
871 }
872 else
873 {
874 PUT_C(ch);
875 if (ch == ',')
876 {
877 Assert(*pszFmt != ' ');
878 PUT_C(' ');
879 switch (++iParam)
880 {
881 case 2: pParam = &pCpu->param2; break;
882 case 3: pParam = &pCpu->param3; break;
883 default: pParam = NULL; break;
884 }
885 }
886 }
887 } /* while more to format */
888 }
889
890
891 /* Terminate it - on overflow we'll have reserved one byte for this. */
892 if (cchDst > 0)
893 *pszDst = '\0';
894
895 /* clean up macros */
896#undef PUT_PSZ
897#undef PUT_SZ
898#undef PUT_STR
899#undef PUT_C
900 return cchOutput;
901}
902#endif
903
904
905/**
906 * Default style.
907 *
908 * @param pState The disassembler state.
909 */
910static void MyDisasDefaultFormatter(PMYDISSTATE pState)
911{
912 RTPrintf("%s", pState->szLine);
913}
914
915
916/**
917 * Yasm style.
918 *
919 * @param pState The disassembler state.
920 */
921static void MyDisasYasmFormatter(PMYDISSTATE pState)
922{
923 char szTmp[256];
924#ifndef USE_MY_FORMATTER
925 /* a very quick hack. */
926 strcpy(szTmp, RTStrStripL(strchr(pState->szLine, ':') + 1));
927
928 char *psz = strrchr(szTmp, '[');
929 *psz = '\0';
930 RTStrStripR(szTmp);
931
932 psz = strstr(szTmp, " ptr ");
933 if (psz)
934 memset(psz, ' ', 5);
935
936 char *pszEnd = strchr(szTmp, '\0');
937 while (pszEnd - &szTmp[0] < 71)
938 *pszEnd++ = ' ';
939 *pszEnd = '\0';
940
941#else /* USE_MY_FORMATTER */
942 size_t cch = MyDisasYasmFormat(&pState->Cpu, szTmp, sizeof(szTmp));
943 Assert(cch < sizeof(szTmp));
944 while (cch < 71)
945 szTmp[cch++] = ' ';
946 szTmp[cch] = '\0';
947#endif /* USE_MY_FORMATTER */
948
949 RTPrintf(" %s ; %08llu %s", szTmp, pState->uAddress, pState->szLine);
950}
951
952
953/**
954 * Checks if the encoding of the current instruction is something
955 * we can never get the assembler to produce.
956 *
957 * @returns true if it's odd, false if it isn't.
958 * @param pCpu The disassembler output.
959 */
960static bool MyDisasYasmFormatterIsOddEncoding(PMYDISSTATE pState)
961{
962 /*
963 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
964 */
965 if ( pState->Cpu.addrmode != CPUMODE_16BIT ///@todo correct?
966 && pState->Cpu.ModRM.Bits.Rm == 4
967 && pState->Cpu.ModRM.Bits.Mod != 3)
968 {
969 /* No scaled index SIB (index=4), except for ESP. */
970 if ( pState->Cpu.SIB.Bits.Index == 4
971 && pState->Cpu.SIB.Bits.Base != 4)
972 return true;
973
974 /* EBP + displacement */
975 if ( pState->Cpu.ModRM.Bits.Mod != 0
976 && pState->Cpu.SIB.Bits.Base == 5
977 && pState->Cpu.SIB.Bits.Scale == 0)
978 return true;
979 }
980
981 /*
982 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
983 */
984 if ( pState->Cpu.pCurInstr->opcode == OP_SHL
985 && pState->Cpu.ModRM.Bits.Reg == 6)
986 return true;
987
988 /*
989 * Check for multiple prefixes of the same kind.
990 */
991 bool fSegmentPrefix = false;
992 bool fLockPrefix = false;
993 bool fAddressSize = false;
994 bool fOperandSize = false;
995 bool fRepPrefix = false;
996 bool fRex = false;
997 for (uint8_t const *pu8 = pState->pbInstr;; pu8++)
998 {
999 bool *pf;
1000 switch (*pu8)
1001 {
1002 case 0xf0:
1003 pf = &fLockPrefix;
1004 break;
1005
1006 case 0xf2:
1007 case 0xf3:
1008 pf = &fRepPrefix;
1009 break;
1010
1011 case 0x2e:
1012 case 0x3e:
1013 case 0x26:
1014 case 0x36:
1015 case 0x64:
1016 case 0x65:
1017 pf = &fSegmentPrefix;
1018 break;
1019
1020 case 0x66:
1021 pf = &fOperandSize;
1022 break;
1023
1024 case 0x67:
1025 pf = &fAddressSize;
1026 break;
1027
1028 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1029 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1030 pf = pState->Cpu.mode == CPUMODE_64BIT ? &fRex : NULL;
1031 break;
1032
1033 default:
1034 pf = NULL;
1035 break;
1036 }
1037 if (!pf)
1038 break; /* done */
1039 if (*pf)
1040 return true;
1041 *pf = true;
1042 }
1043
1044 /* segment overrides are fun */
1045 if (fSegmentPrefix)
1046 {
1047 /* no efficient address which it may apply to. */
1048 Assert((pState->Cpu.prefix & PREFIX_SEG) || pState->Cpu.mode == CPUMODE_64BIT);
1049 if ( !(pState->Cpu.param1.flags & USE_EFFICIENT_ADDRESS)
1050 && !(pState->Cpu.param2.flags & USE_EFFICIENT_ADDRESS)
1051 && !(pState->Cpu.param3.flags & USE_EFFICIENT_ADDRESS))
1052 return true;
1053 }
1054
1055 /* fixed register + addr override doesn't go down all that well. */
1056 if (fAddressSize)
1057 {
1058 Assert(pState->Cpu.prefix & PREFIX_ADDRSIZE);
1059 if ( pState->Cpu.pCurInstr->param3 == OP_PARM_NONE
1060 && pState->Cpu.pCurInstr->param2 == OP_PARM_NONE
1061 && ( pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1062 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_GEN32_END))
1063 return true;
1064 }
1065
1066
1067 /* check for the version of xyz reg,reg instruction that the assembler doesn't use.
1068 expected: 1aee sbb ch, dh ; SBB r8, r/m8
1069 yasm: 18F5 sbb ch, dh ; SBB r/m8, r8 */
1070 if (pState->Cpu.ModRM.Bits.Mod == 3 /* reg,reg */)
1071 {
1072 switch (pState->Cpu.pCurInstr->opcode)
1073 {
1074 case OP_ADC:
1075 case OP_ADD:
1076 case OP_AND:
1077 case OP_OR:
1078 case OP_SUB:
1079 case OP_SBB:
1080 case OP_XOR:
1081 if ( ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gb /* r8 */
1082 && pState->Cpu.pCurInstr->param2 == OP_PARM_Eb /* r8/mem8 */)
1083 || ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gv /* rX */
1084 && pState->Cpu.pCurInstr->param2 == OP_PARM_Ev /* rX/memX */))
1085 return true;
1086 break;
1087
1088 /* ff /0, fe /0, ff /1, fe /0 */
1089 case OP_DEC:
1090 case OP_INC:
1091 return true;
1092
1093 default:
1094 break;
1095 }
1096 }
1097
1098 /* check for REX.X = 1 without SIB. */
1099
1100 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1101 says (intel doesn't appear to care). */
1102 switch (pState->Cpu.pCurInstr->opcode)
1103 {
1104 case OP_SETO:
1105 case OP_SETNO:
1106 case OP_SETC:
1107 case OP_SETNC:
1108 case OP_SETE:
1109 case OP_SETNE:
1110 case OP_SETBE:
1111 case OP_SETNBE:
1112 case OP_SETS:
1113 case OP_SETNS:
1114 case OP_SETP:
1115 case OP_SETNP:
1116 case OP_SETL:
1117 case OP_SETNL:
1118 case OP_SETLE:
1119 case OP_SETNLE:
1120 AssertMsg(pState->Cpu.opcode >= 0x90 && pState->Cpu.opcode <= 0x9f, ("%#x\n", pState->Cpu.opcode));
1121 if (pState->Cpu.ModRM.Bits.Reg != 2)
1122 return true;
1123 break;
1124 }
1125
1126 return false;
1127}
1128
1129
1130/**
1131 * Masm style.
1132 *
1133 * @param pState The disassembler state.
1134 */
1135static void MyDisasMasmFormatter(PMYDISSTATE pState)
1136{
1137 RTPrintf("masm not implemented: %s", pState->szLine);
1138}
1139
1140
1141/**
1142 * This is a temporary workaround for catching a few illegal opcodes
1143 * that the disassembler is currently letting thru, just enough to make
1144 * the assemblers happy.
1145 *
1146 * We're too close to a release to dare mess with these things now as
1147 * they may consequences for performance and let alone introduce bugs.
1148 *
1149 * @returns true if it's valid. false if it isn't.
1150 *
1151 * @param pCpu The disassembler output.
1152 */
1153static bool MyDisasIsValidInstruction(DISCPUSTATE const *pCpu)
1154{
1155 switch (pCpu->pCurInstr->opcode)
1156 {
1157 /* These doesn't take memory operands. */
1158 case OP_MOV_CR:
1159 case OP_MOV_DR:
1160 case OP_MOV_TR:
1161 if (pCpu->ModRM.Bits.Mod != 3)
1162 return false;
1163 break;
1164
1165 /* The 0x8f /0 variant of this instruction doesn't get its /r value verified. */
1166 case OP_POP:
1167 if ( pCpu->opcode == 0x8f
1168 && pCpu->ModRM.Bits.Reg != 0)
1169 return false;
1170 break;
1171
1172 /* The 0xc6 /0 and 0xc7 /0 variants of this instruction don't get their /r values verified. */
1173 case OP_MOV:
1174 if ( ( pCpu->opcode == 0xc6
1175 || pCpu->opcode == 0xc7)
1176 && pCpu->ModRM.Bits.Reg != 0)
1177 return false;
1178 break;
1179
1180 default:
1181 break;
1182 }
1183
1184 return true;
1185}
1186
1187
1188/**
1189 * Callback for reading bytes.
1190 *
1191 * @todo This should check that the disassembler doesn't do unnecessary reads,
1192 * however the current doesn't do this and is just complicated...
1193 */
1194static DECLCALLBACK(int) MyDisasInstrRead(RTUINTPTR uSrcAddr, uint8_t *pbDst, uint32_t cbRead, void *pvDisCpu)
1195{
1196 PMYDISSTATE pState = (PMYDISSTATE)pvDisCpu;
1197 if (RT_LIKELY( pState->uNextAddr == uSrcAddr
1198 && pState->cbLeft >= cbRead))
1199 {
1200 /*
1201 * Straight forward reading.
1202 */
1203 if (cbRead == 1)
1204 {
1205 pState->cbLeft--;
1206 *pbDst = *pState->pbNext++;
1207 pState->uNextAddr++;
1208 }
1209 else
1210 {
1211 memcpy(pbDst, pState->pbNext, cbRead);
1212 pState->pbNext += cbRead;
1213 pState->cbLeft -= cbRead;
1214 pState->uNextAddr += cbRead;
1215 }
1216 }
1217 else
1218 {
1219 /*
1220 * Jumping up the stream.
1221 * This occures when the byte sequence is added to the output string.
1222 */
1223 uint64_t offReq64 = uSrcAddr - pState->uAddress;
1224 if (offReq64 < 32)
1225 {
1226 uint32_t offReq = offReq64;
1227 uintptr_t off = pState->pbNext - pState->pbInstr;
1228 if (off + pState->cbLeft <= offReq)
1229 {
1230 pState->pbNext += pState->cbLeft;
1231 pState->uNextAddr += pState->cbLeft;
1232 pState->cbLeft = 0;
1233
1234 memset(pbDst, 0xcc, cbRead);
1235 pState->rc = VERR_EOF;
1236 return VERR_EOF;
1237 }
1238
1239 /* reset the stream. */
1240 pState->cbLeft += off;
1241 pState->pbNext = pState->pbInstr;
1242 pState->uNextAddr = pState->uAddress;
1243
1244 /* skip ahead. */
1245 pState->cbLeft -= offReq;
1246 pState->pbNext += offReq;
1247 pState->uNextAddr += offReq;
1248
1249 /* do the reading. */
1250 if (pState->cbLeft >= cbRead)
1251 {
1252 memcpy(pbDst, pState->pbNext, cbRead);
1253 pState->cbLeft -= cbRead;
1254 pState->pbNext += cbRead;
1255 pState->uNextAddr += cbRead;
1256 }
1257 else
1258 {
1259 if (pState->cbLeft > 0)
1260 {
1261 memcpy(pbDst, pState->pbNext, pState->cbLeft);
1262 pbDst += pState->cbLeft;
1263 cbRead -= pState->cbLeft;
1264 pState->pbNext += pState->cbLeft;
1265 pState->uNextAddr += pState->cbLeft;
1266 pState->cbLeft = 0;
1267 }
1268 memset(pbDst, 0xcc, cbRead);
1269 pState->rc = VERR_EOF;
1270 return VERR_EOF;
1271 }
1272 }
1273 else
1274 {
1275 RTStrmPrintf(g_pStdErr, "Reading before current instruction!\n");
1276 memset(pbDst, 0x90, cbRead);
1277 pState->rc = VERR_INTERNAL_ERROR;
1278 return VERR_INTERNAL_ERROR;
1279 }
1280 }
1281
1282 return VINF_SUCCESS;
1283}
1284
1285
1286/**
1287 * Disassembles a block of memory.
1288 *
1289 * @returns VBox status code.
1290 * @param argv0 Program name (for errors and warnings).
1291 * @param enmCpuMode The cpu mode to disassemble in.
1292 * @param uAddress The address we're starting to disassemble at.
1293 * @param pbFile Where to start disassemble.
1294 * @param cbFile How much to disassemble.
1295 * @param enmStyle The assembly output style.
1296 * @param fListing Whether to print in a listing like mode.
1297 * @param enmUndefOp How to deal with undefined opcodes.
1298 */
1299static int MyDisasmBlock(const char *argv0, DISCPUMODE enmCpuMode, uint64_t uAddress, uint8_t *pbFile, size_t cbFile,
1300 ASMSTYLE enmStyle, bool fListing, UNDEFOPHANDLING enmUndefOp)
1301{
1302 /*
1303 * Initialize the CPU context.
1304 */
1305 MYDISSTATE State;
1306 State.Cpu.mode = enmCpuMode;
1307 State.Cpu.pfnReadBytes = MyDisasInstrRead;
1308 State.uAddress = uAddress;
1309 State.pbInstr = pbFile;
1310 State.cbInstr = 0;
1311 State.enmUndefOp = enmUndefOp;
1312 State.rc = VINF_SUCCESS;
1313 State.cbLeft = cbFile;
1314 State.pbNext = pbFile;
1315 State.uNextAddr = uAddress;
1316
1317 void (*pfnFormatter)(PMYDISSTATE pState);
1318 switch (enmStyle)
1319 {
1320 case kAsmStyle_Default:
1321 pfnFormatter = MyDisasDefaultFormatter;
1322 break;
1323
1324 case kAsmStyle_yasm:
1325 RTPrintf(" BITS %d\n", enmCpuMode == CPUMODE_16BIT ? 16 : enmCpuMode == CPUMODE_32BIT ? 32 : 64);
1326 pfnFormatter = MyDisasYasmFormatter;
1327 break;
1328
1329 case kAsmStyle_masm:
1330 pfnFormatter = MyDisasMasmFormatter;
1331 break;
1332
1333 default:
1334 AssertFailedReturn(VERR_INTERNAL_ERROR);
1335 }
1336
1337 /*
1338 * The loop.
1339 */
1340 int rcRet = VINF_SUCCESS;
1341 while (State.cbLeft > 0)
1342 {
1343 /*
1344 * Disassemble it.
1345 */
1346 State.cbInstr = 0;
1347 State.cbLeft += State.pbNext - State.pbInstr;
1348 State.uNextAddr = State.uAddress;
1349 State.pbNext = State.pbInstr;
1350
1351 int rc = DISInstr(&State.Cpu, State.uAddress, 0, &State.cbInstr, State.szLine);
1352 if ( RT_SUCCESS(rc)
1353 || ( ( rc == VERR_DIS_INVALID_OPCODE
1354 || rc == VERR_DIS_GEN_FAILURE)
1355 && State.enmUndefOp == kUndefOp_DefineByte))
1356 {
1357 State.fUndefOp = rc == VERR_DIS_INVALID_OPCODE
1358 || rc == VERR_DIS_GEN_FAILURE
1359 || State.Cpu.pCurInstr->opcode == OP_INVALID
1360 || State.Cpu.pCurInstr->opcode == OP_ILLUD2
1361 || ( State.enmUndefOp == kUndefOp_DefineByte
1362 && !MyDisasIsValidInstruction(&State.Cpu));
1363 if (State.fUndefOp && State.enmUndefOp == kUndefOp_DefineByte)
1364 {
1365 RTPrintf(" db");
1366 if (!State.cbInstr)
1367 State.cbInstr = 1;
1368 for (unsigned off = 0; off < State.cbInstr; off++)
1369 {
1370 uint8_t b;
1371 State.Cpu.pfnReadBytes(State.uAddress + off, &b, 1, &State.Cpu);
1372 RTPrintf(off ? ", %03xh" : " %03xh", b);
1373 }
1374 RTPrintf(" ; %s\n", State.szLine);
1375 }
1376 else if (!State.fUndefOp && State.enmUndefOp == kUndefOp_All)
1377 {
1378 RTPrintf("%s: error at %#RX64: unexpected valid instruction (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
1379 pfnFormatter(&State);
1380 rcRet = VERR_GENERAL_FAILURE;
1381 }
1382 else if (State.fUndefOp && State.enmUndefOp == kUndefOp_Fail)
1383 {
1384 RTPrintf("%s: error at %#RX64: undefined opcode (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
1385 pfnFormatter(&State);
1386 rcRet = VERR_GENERAL_FAILURE;
1387 }
1388 else
1389 {
1390 /* Use db for odd encodings that we can't make the assembler use. */
1391 if ( State.enmUndefOp == kUndefOp_DefineByte
1392 && MyDisasYasmFormatterIsOddEncoding(&State))
1393 {
1394 RTPrintf(" db");
1395 for (unsigned off = 0; off < State.cbInstr; off++)
1396 {
1397 uint8_t b;
1398 State.Cpu.pfnReadBytes(State.uAddress + off, &b, 1, &State.Cpu);
1399 RTPrintf(off ? ", %03xh" : " %03xh", b);
1400 }
1401 RTPrintf(" ; ");
1402 }
1403
1404 pfnFormatter(&State);
1405 }
1406 }
1407 else
1408 {
1409 State.cbInstr = State.pbNext - State.pbInstr;
1410 if (!State.cbLeft)
1411 RTPrintf("%s: error at %#RX64: read beyond the end (%Rrc)\n", argv0, State.uAddress, rc);
1412 else if (State.cbInstr)
1413 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d\n", argv0, State.uAddress, rc, State.cbInstr);
1414 else
1415 {
1416 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d!\n", argv0, State.uAddress, rc, State.cbInstr);
1417 if (rcRet == VINF_SUCCESS)
1418 rcRet = rc;
1419 break;
1420 }
1421 }
1422
1423
1424 /* next */
1425 State.uAddress += State.cbInstr;
1426 State.pbInstr += State.cbInstr;
1427 }
1428
1429 return rcRet;
1430}
1431
1432
1433/**
1434 * Prints usage info.
1435 *
1436 * @returns 1.
1437 * @param argv0 The program name.
1438 */
1439static int Usage(const char *argv0)
1440{
1441 RTStrmPrintf(g_pStdErr,
1442"usage: %s [options] <file1> [file2..fileN]\n"
1443" or: %s <--help|-h>\n"
1444"\n"
1445"Options:\n"
1446" --address|-a <address>\n"
1447" The base address. Default: 0\n"
1448" --max-bytes|-b <bytes>\n"
1449" The maximum number of bytes to disassemble. Default: 1GB\n"
1450" --cpumode|-c <16|32|64>\n"
1451" The cpu mode. Default: 32\n"
1452" --listing|-l, --no-listing|-L\n"
1453" Enables or disables listing mode. Default: --no-listing\n"
1454" --offset|-o <offset>\n"
1455" The file offset at which to start disassembling. Default: 0\n"
1456" --style|-s <default|yasm|masm>\n"
1457" The assembly output style. Default: default\n"
1458" --undef-op|-u <fail|all|db>\n"
1459" How to treat undefined opcodes. Default: fail\n"
1460 , argv0, argv0);
1461 return 1;
1462}
1463
1464
1465int main(int argc, char **argv)
1466{
1467 RTR3Init();
1468 const char * const argv0 = RTPathFilename(argv[0]);
1469
1470 /* options */
1471 uint64_t uAddress = 0;
1472 ASMSTYLE enmStyle = kAsmStyle_Default;
1473 UNDEFOPHANDLING enmUndefOp = kUndefOp_Fail;
1474 bool fListing = true;
1475 DISCPUMODE enmCpuMode = CPUMODE_32BIT;
1476 RTFOFF off = 0;
1477 RTFOFF cbMax = _1G;
1478
1479 /*
1480 * Parse arguments.
1481 */
1482 static const RTOPTIONDEF g_aOptions[] =
1483 {
1484 { "--address", 'a', RTGETOPT_REQ_UINT64 },
1485 { "--cpumode", 'c', RTGETOPT_REQ_UINT32 },
1486 { "--help", 'h', 0 },
1487 { "--bytes", 'b', RTGETOPT_REQ_INT64 },
1488 { "--listing", 'l', 0 },
1489 { "--no-listing", 'L', 0 },
1490 { "--offset", 'o', RTGETOPT_REQ_INT64 },
1491 { "--style", 's', RTGETOPT_REQ_STRING },
1492 { "--undef-op", 'u', RTGETOPT_REQ_STRING },
1493 };
1494
1495 int ch;
1496 int iArg = 1;
1497 RTOPTIONUNION ValueUnion;
1498 while ((ch = RTGetOpt(argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), &iArg, &ValueUnion)))
1499 {
1500 switch (ch)
1501 {
1502 case 'a':
1503 uAddress = ValueUnion.u64;
1504 break;
1505
1506 case 'b':
1507 cbMax = ValueUnion.i;
1508 break;
1509
1510 case 'c':
1511 if (ValueUnion.u32 == 16)
1512 enmCpuMode = CPUMODE_16BIT;
1513 else if (ValueUnion.u32 == 32)
1514 enmCpuMode = CPUMODE_32BIT;
1515 else if (ValueUnion.u32 == 64)
1516 enmCpuMode = CPUMODE_64BIT;
1517 else
1518 {
1519 RTStrmPrintf(g_pStdErr, "%s: Invalid CPU mode value %RU32\n", argv0, ValueUnion.u32);
1520 return 1;
1521 }
1522 break;
1523
1524 case 'h':
1525 return Usage(argv0);
1526
1527 case 'l':
1528 fListing = true;
1529 break;
1530
1531 case 'L':
1532 fListing = false;
1533 break;
1534
1535 case 'o':
1536 off = ValueUnion.i;
1537 break;
1538
1539 case 's':
1540 if (!strcmp(ValueUnion.psz, "default"))
1541 enmStyle = kAsmStyle_Default;
1542 else if (!strcmp(ValueUnion.psz, "yasm"))
1543 enmStyle = kAsmStyle_yasm;
1544 else if (!strcmp(ValueUnion.psz, "masm"))
1545 {
1546 enmStyle = kAsmStyle_masm;
1547 RTStrmPrintf(g_pStdErr, "%s: masm style isn't implemented yet\n", argv0);
1548 return 1;
1549 }
1550 else
1551 {
1552 RTStrmPrintf(g_pStdErr, "%s: unknown assembly style: %s\n", argv0, ValueUnion.psz);
1553 return 1;
1554 }
1555 break;
1556
1557 case 'u':
1558 if (!strcmp(ValueUnion.psz, "fail"))
1559 enmUndefOp = kUndefOp_Fail;
1560 else if (!strcmp(ValueUnion.psz, "all"))
1561 enmUndefOp = kUndefOp_All;
1562 else if (!strcmp(ValueUnion.psz, "db"))
1563 enmUndefOp = kUndefOp_DefineByte;
1564 else
1565 {
1566 RTStrmPrintf(g_pStdErr, "%s: unknown undefined opcode handling method: %s\n", argv0, ValueUnion.psz);
1567 return 1;
1568 }
1569 break;
1570
1571 default:
1572 RTStrmPrintf(g_pStdErr, "%s: syntax error: %Rrc\n", argv0, ch);
1573 return 1;
1574 }
1575 }
1576 if (iArg >= argc)
1577 return Usage(argv0);
1578
1579 /*
1580 * Process the files.
1581 */
1582 int rc = VINF_SUCCESS;
1583 for ( ; iArg < argc; iArg++)
1584 {
1585 /*
1586 * Read the file into memory.
1587 */
1588 void *pvFile;
1589 size_t cbFile;
1590 rc = RTFileReadAllEx(argv[iArg], off, cbMax, 0, &pvFile, &cbFile);
1591 if (RT_FAILURE(rc))
1592 {
1593 RTStrmPrintf(g_pStdErr, "%s: %s: %Rrc\n", argv0, argv[iArg], rc);
1594 break;
1595 }
1596
1597 /*
1598 * Disassemble it.
1599 */
1600 rc = MyDisasmBlock(argv0, enmCpuMode, uAddress, (uint8_t *)pvFile, cbFile, enmStyle, fListing, enmUndefOp);
1601 if (RT_FAILURE(rc))
1602 break;
1603 }
1604
1605 return RT_SUCCESS(rc) ? 0 : 1;
1606}
1607
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette