VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 95308

Last change on this file since 95308 was 95308, checked in by vboxsync, 3 years ago

VMM/IEM: Implemented ANDN, BEXTR, SHLX, SARX, SHRX, RORX, TZCNT, and LZCNT. Fixed long-mod bug in 32-bit version of BSR and BSF (would clear the upper 32 bits of the destination register when ZF=1). bugref:9898

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 166.6 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 95308 2022-06-19 20:40:26Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017-2022 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.215389.xyz. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 95308 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208## - 4: IEM form matching instruction.
209##
210## Note! See the A.2.1 in SDM vol 2 for the type names.
211g_kdOpTypes = {
212 # Fixed addresses
213 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
214
215 # ModR/M.rm
216 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
217 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
218 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
219 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
220 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
221 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
222 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
223 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
224 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
225 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
226 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
227 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
228 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
229 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
230 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
231 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
232 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
233 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
234 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
235 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
236 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
237 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
238 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
239 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
240 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
241
242 # ModR/M.rm - register only.
243 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
244 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
245 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
246 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
247 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
248 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
249 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
250
251 # ModR/M.rm - memory only.
252 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
253 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
254 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
255 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
256 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
257 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
258 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
259 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
260 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
261 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
262 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
263 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
264 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
265 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
266 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
267
268 # ModR/M.reg
269 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
270 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
271 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
272 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
273 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
274 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
275 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
276 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
277 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
278 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
279 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
280 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
281 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
282 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
283 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
284 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
285 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
286 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
287 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
288 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
289 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
290 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
291 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
292 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
293 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
294 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
295 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
296 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
297 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
298 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
299 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
300
301 # VEX.vvvv
302 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
303 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
304 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
305 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
306
307 # Immediate values.
308 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
309 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
310 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
311 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
312 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
313 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
314
315 # Address operands (no ModR/M).
316 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
317 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
318
319 # Relative jump targets
320 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
321 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
322
323 # DS:rSI
324 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
325 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
326 # ES:rDI
327 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
328 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
329
330 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
331
332 # Fixed registers.
333 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
334 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
335 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
336 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
337 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
338 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
339 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
340 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
341};
342
343# IDX_ParseFixedReg
344# IDX_ParseVexDest
345
346
347## IEMFORM_XXX mappings.
348g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
349 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
350 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
351 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
352 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
353 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
354 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
355 'M': ( 'ModR/M', [ 'rm', ], '', ),
356 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
357 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
358 'R': ( 'ModR/M', [ 'reg', ], '', ),
359
360 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
361 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
362 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
363 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
364 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
365 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
366 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
367 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
368 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
369 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
370 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
371 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
372 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
373 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
374 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
375 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
376 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
377 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
378 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
379 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
380 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
381 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
382
383 'FIXED': ( 'fixed', None, '', ),
384};
385
386## \@oppfx values.
387g_kdPrefixes = {
388 'none': [],
389 '0x66': [],
390 '0xf3': [],
391 '0xf2': [],
392};
393
394## Special \@opcode tag values.
395g_kdSpecialOpcodes = {
396 '/reg': [],
397 'mr/reg': [],
398 '11 /reg': [],
399 '!11 /reg': [],
400 '11 mr/reg': [],
401 '!11 mr/reg': [],
402};
403
404## Special \@opcodesub tag values.
405## The first value is the real value for aliases.
406## The second value is for bs3cg1.
407g_kdSubOpcodes = {
408 'none': [ None, '', ],
409 '11 mr/reg': [ '11 mr/reg', '', ],
410 '11': [ '11 mr/reg', '', ], ##< alias
411 '!11 mr/reg': [ '!11 mr/reg', '', ],
412 '!11': [ '!11 mr/reg', '', ], ##< alias
413 'rex.w=0': [ 'rex.w=0', 'WZ', ],
414 'w=0': [ 'rex.w=0', '', ], ##< alias
415 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
416 'w=1': [ 'rex.w=1', '', ], ##< alias
417 'vex.l=0': [ 'vex.l=0', 'L0', ],
418 'vex.l=1': [ 'vex.l=0', 'L1', ],
419 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
420 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
421 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
422 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
423};
424
425## Valid values for \@openc
426g_kdEncodings = {
427 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
428 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
429 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
430 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
431 'prefix': [ None, ], ##< Prefix
432};
433
434## \@opunused, \@opinvalid, \@opinvlstyle
435g_kdInvalidStyles = {
436 'immediate': [], ##< CPU stops decoding immediately after the opcode.
437 'vex.modrm': [], ##< VEX+ModR/M, everyone.
438 'intel-modrm': [], ##< Intel decodes ModR/M.
439 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
440 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
441 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
442};
443
444g_kdCpuNames = {
445 '8086': (),
446 '80186': (),
447 '80286': (),
448 '80386': (),
449 '80486': (),
450};
451
452## \@opcpuid
453g_kdCpuIdFlags = {
454 'vme': 'X86_CPUID_FEATURE_EDX_VME',
455 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
456 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
457 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
458 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
459 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
460 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
461 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
462 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
463 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
464 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
465 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
466 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
467 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
468 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
469 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
470 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
471 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
472 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
473 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
474 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
475 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
476 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
477 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
478 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
479 'aes': 'X86_CPUID_FEATURE_ECX_AES',
480 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
481 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
482 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
483 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
484 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
485
486 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
487 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
488 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
489 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
490 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
491 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
492 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
493 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
494 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
495 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
496};
497
498## \@ophints values.
499g_kdHints = {
500 'invalid': 'DISOPTYPE_INVALID', ##<
501 'harmless': 'DISOPTYPE_HARMLESS', ##<
502 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
503 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
504 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
505 'portio': 'DISOPTYPE_PORTIO', ##<
506 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
507 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
508 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
509 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
510 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
511 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
512 'illegal': 'DISOPTYPE_ILLEGAL', ##<
513 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
514 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
515 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
516 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
517 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
518 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
519 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
520 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
521 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
522 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
523 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
524 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
525 ## (only in 16 & 32 bits mode!)
526 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
527 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
528 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
529 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
530 'ignores_rexw': '', ##< Ignores REX.W.
531 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
532 'vex_l_zero': '', ##< VEX.L must be 0.
533 'vex_l_ignored': '', ##< VEX.L is ignored.
534 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
535 'lock_allowed': '', ##< Lock prefix allowed.
536};
537
538## \@opxcpttype values (see SDMv2 2.4, 2.7).
539g_kdXcptTypes = {
540 'none': [],
541 '1': [],
542 '2': [],
543 '3': [],
544 '4': [],
545 '4UA': [],
546 '5': [],
547 '5LZ': [], # LZ = VEX.L must be zero.
548 '6': [],
549 '7': [],
550 '7LZ': [],
551 '8': [],
552 '11': [],
553 '12': [],
554 'E1': [],
555 'E1NF': [],
556 'E2': [],
557 'E3': [],
558 'E3NF': [],
559 'E4': [],
560 'E4NF': [],
561 'E5': [],
562 'E5NF': [],
563 'E6': [],
564 'E6NF': [],
565 'E7NF': [],
566 'E9': [],
567 'E9NF': [],
568 'E10': [],
569 'E11': [],
570 'E12': [],
571 'E12NF': [],
572};
573
574
575def _isValidOpcodeByte(sOpcode):
576 """
577 Checks if sOpcode is a valid lower case opcode byte.
578 Returns true/false.
579 """
580 if len(sOpcode) == 4:
581 if sOpcode[:2] == '0x':
582 if sOpcode[2] in '0123456789abcdef':
583 if sOpcode[3] in '0123456789abcdef':
584 return True;
585 return False;
586
587
588class InstructionMap(object):
589 """
590 Instruction map.
591
592 The opcode map provides the lead opcode bytes (empty for the one byte
593 opcode map). An instruction can be member of multiple opcode maps as long
594 as it uses the same opcode value within the map (because of VEX).
595 """
596
597 kdEncodings = {
598 'legacy': [],
599 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
600 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
601 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
602 'xop8': [], ##< XOP prefix with vvvvv = 8
603 'xop9': [], ##< XOP prefix with vvvvv = 9
604 'xop10': [], ##< XOP prefix with vvvvv = 10
605 };
606 ## Selectors.
607 ## 1. The first value is the number of table entries required by a
608 ## decoder or disassembler for this type of selector.
609 ## 2. The second value is how many entries per opcode byte if applicable.
610 kdSelectors = {
611 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
612 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
613 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
614 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
615 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
616 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
617 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
618 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
619 };
620
621 ## Define the subentry number according to the Instruction::sPrefix
622 ## value for 'byte+pfx' selected tables.
623 kiPrefixOrder = {
624 'none': 0,
625 '0x66': 1,
626 '0xf3': 2,
627 '0xf2': 3,
628 };
629
630 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
631 sEncoding = 'legacy', sDisParse = None):
632 assert sSelector in self.kdSelectors;
633 assert sEncoding in self.kdEncodings;
634 if asLeadOpcodes is None:
635 asLeadOpcodes = [];
636 else:
637 for sOpcode in asLeadOpcodes:
638 assert _isValidOpcodeByte(sOpcode);
639 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
640
641 self.sName = sName;
642 self.sIemName = sIemName;
643 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
644 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
645 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
646 self.aoInstructions = [] # type: Instruction
647 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
648
649 def copy(self, sNewName, sPrefixFilter = None):
650 """
651 Copies the table with filtering instruction by sPrefix if not None.
652 """
653 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
654 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
655 else self.sSelector,
656 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
657 if sPrefixFilter is None:
658 oCopy.aoInstructions = list(self.aoInstructions);
659 else:
660 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
661 return oCopy;
662
663 def getTableSize(self):
664 """
665 Number of table entries. This corresponds directly to the selector.
666 """
667 return self.kdSelectors[self.sSelector][0];
668
669 def getEntriesPerByte(self):
670 """
671 Number of table entries per opcode bytes.
672
673 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
674 the others it will just return 1.
675 """
676 return self.kdSelectors[self.sSelector][1];
677
678 def getInstructionIndex(self, oInstr):
679 """
680 Returns the table index for the instruction.
681 """
682 bOpcode = oInstr.getOpcodeByte();
683
684 # The byte selectors are simple. We need a full opcode byte and need just return it.
685 if self.sSelector == 'byte':
686 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
687 return bOpcode;
688
689 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
690 if self.sSelector == 'byte+pfx':
691 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
692 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
693 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
694
695 # The other selectors needs masking and shifting.
696 if self.sSelector == '/r':
697 return (bOpcode >> 3) & 0x7;
698
699 if self.sSelector == 'mod /r':
700 return (bOpcode >> 3) & 0x1f;
701
702 if self.sSelector == 'memreg /r':
703 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
704
705 if self.sSelector == '!11 /r':
706 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
707 return (bOpcode >> 3) & 0x7;
708
709 if self.sSelector == '11 /r':
710 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
711 return (bOpcode >> 3) & 0x7;
712
713 if self.sSelector == '11':
714 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
715 return bOpcode & 0x3f;
716
717 assert False, self.sSelector;
718 return -1;
719
720 def getInstructionsInTableOrder(self):
721 """
722 Get instructions in table order.
723
724 Returns array of instructions. Normally there is exactly one
725 instruction per entry. However the entry could also be None if
726 not instruction was specified for that opcode value. Or there
727 could be a list of instructions to deal with special encodings
728 where for instance prefix (e.g. REX.W) encodes a different
729 instruction or different CPUs have different instructions or
730 prefixes in the same place.
731 """
732 # Start with empty table.
733 cTable = self.getTableSize();
734 aoTable = [None] * cTable;
735
736 # Insert the instructions.
737 for oInstr in self.aoInstructions:
738 if oInstr.sOpcode:
739 idxOpcode = self.getInstructionIndex(oInstr);
740 assert idxOpcode < cTable, str(idxOpcode);
741
742 oExisting = aoTable[idxOpcode];
743 if oExisting is None:
744 aoTable[idxOpcode] = oInstr;
745 elif not isinstance(oExisting, list):
746 aoTable[idxOpcode] = list([oExisting, oInstr]);
747 else:
748 oExisting.append(oInstr);
749
750 return aoTable;
751
752
753 def getDisasTableName(self):
754 """
755 Returns the disassembler table name for this map.
756 """
757 sName = 'g_aDisas';
758 for sWord in self.sName.split('_'):
759 if sWord == 'm': # suffix indicating modrm.mod==mem
760 sName += '_m';
761 elif sWord == 'r': # suffix indicating modrm.mod==reg
762 sName += '_r';
763 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
764 sName += '_' + sWord;
765 else:
766 sWord = sWord.replace('grp', 'Grp');
767 sWord = sWord.replace('map', 'Map');
768 sName += sWord[0].upper() + sWord[1:];
769 return sName;
770
771 def getDisasRangeName(self):
772 """
773 Returns the disassembler table range name for this map.
774 """
775 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
776
777 def isVexMap(self):
778 """ Returns True if a VEX map. """
779 return self.sEncoding.startswith('vex');
780
781
782class TestType(object):
783 """
784 Test value type.
785
786 This base class deals with integer like values. The fUnsigned constructor
787 parameter indicates the default stance on zero vs sign extending. It is
788 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
789 """
790 def __init__(self, sName, acbSizes = None, fUnsigned = True):
791 self.sName = sName;
792 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
793 self.fUnsigned = fUnsigned;
794
795 class BadValue(Exception):
796 """ Bad value exception. """
797 def __init__(self, sMessage):
798 Exception.__init__(self, sMessage);
799 self.sMessage = sMessage;
800
801 ## For ascii ~ operator.
802 kdHexInv = {
803 '0': 'f',
804 '1': 'e',
805 '2': 'd',
806 '3': 'c',
807 '4': 'b',
808 '5': 'a',
809 '6': '9',
810 '7': '8',
811 '8': '7',
812 '9': '6',
813 'a': '5',
814 'b': '4',
815 'c': '3',
816 'd': '2',
817 'e': '1',
818 'f': '0',
819 };
820
821 def get(self, sValue):
822 """
823 Get the shortest normal sized byte representation of oValue.
824
825 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
826 The latter form is for AND+OR pairs where the first entry is what to
827 AND with the field and the second the one or OR with.
828
829 Raises BadValue if invalid value.
830 """
831 if not sValue:
832 raise TestType.BadValue('empty value');
833
834 # Deal with sign and detect hexadecimal or decimal.
835 fSignExtend = not self.fUnsigned;
836 if sValue[0] == '-' or sValue[0] == '+':
837 fSignExtend = True;
838 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
839 else:
840 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
841
842 # try convert it to long integer.
843 try:
844 iValue = long(sValue, 16 if fHex else 10);
845 except Exception as oXcpt:
846 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
847
848 # Convert the hex string and pad it to a decent value. Negative values
849 # needs to be manually converted to something non-negative (~-n + 1).
850 if iValue >= 0:
851 sHex = hex(iValue);
852 if sys.version_info[0] < 3:
853 assert sHex[-1] == 'L';
854 sHex = sHex[:-1];
855 assert sHex[:2] == '0x';
856 sHex = sHex[2:];
857 else:
858 sHex = hex(-iValue - 1);
859 if sys.version_info[0] < 3:
860 assert sHex[-1] == 'L';
861 sHex = sHex[:-1];
862 assert sHex[:2] == '0x';
863 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
864 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
865 sHex = 'f' + sHex;
866
867 cDigits = len(sHex);
868 if cDigits <= self.acbSizes[-1] * 2:
869 for cb in self.acbSizes:
870 cNaturalDigits = cb * 2;
871 if cDigits <= cNaturalDigits:
872 break;
873 else:
874 cNaturalDigits = self.acbSizes[-1] * 2;
875 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
876 assert isinstance(cNaturalDigits, int)
877
878 if cNaturalDigits != cDigits:
879 cNeeded = cNaturalDigits - cDigits;
880 if iValue >= 0:
881 sHex = ('0' * cNeeded) + sHex;
882 else:
883 sHex = ('f' * cNeeded) + sHex;
884
885 # Invert and convert to bytearray and return it.
886 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
887
888 return ((fSignExtend, abValue),);
889
890 def validate(self, sValue):
891 """
892 Returns True if value is okay, error message on failure.
893 """
894 try:
895 self.get(sValue);
896 except TestType.BadValue as oXcpt:
897 return oXcpt.sMessage;
898 return True;
899
900 def isAndOrPair(self, sValue):
901 """
902 Checks if sValue is a pair.
903 """
904 _ = sValue;
905 return False;
906
907
908class TestTypeEflags(TestType):
909 """
910 Special value parsing for EFLAGS/RFLAGS/FLAGS.
911 """
912
913 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
914
915 def __init__(self, sName):
916 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
917
918 def get(self, sValue):
919 fClear = 0;
920 fSet = 0;
921 for sFlag in sValue.split(','):
922 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
923 if sConstant is None:
924 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
925 if sConstant[0] == '!':
926 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
927 else:
928 fSet |= g_kdX86EFlagsConstants[sConstant];
929
930 aoSet = TestType.get(self, '0x%x' % (fSet,));
931 if fClear != 0:
932 aoClear = TestType.get(self, '%#x' % (fClear,))
933 assert self.isAndOrPair(sValue) is True;
934 return (aoClear[0], aoSet[0]);
935 assert self.isAndOrPair(sValue) is False;
936 return aoSet;
937
938 def isAndOrPair(self, sValue):
939 for sZeroFlag in self.kdZeroValueFlags:
940 if sValue.find(sZeroFlag) >= 0:
941 return True;
942 return False;
943
944class TestTypeFromDict(TestType):
945 """
946 Special value parsing for CR0.
947 """
948
949 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
950
951 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
952 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
953 self.kdConstantsAndValues = kdConstantsAndValues;
954 self.sConstantPrefix = sConstantPrefix;
955
956 def get(self, sValue):
957 fValue = 0;
958 for sFlag in sValue.split(','):
959 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
960 if fFlagValue is None:
961 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
962 fValue |= fFlagValue;
963 return TestType.get(self, '0x%x' % (fValue,));
964
965
966class TestInOut(object):
967 """
968 One input or output state modifier.
969
970 This should be thought as values to modify BS3REGCTX and extended (needs
971 to be structured) state.
972 """
973 ## Assigned operators.
974 kasOperators = [
975 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
976 '&~=',
977 '&=',
978 '|=',
979 '='
980 ];
981 ## Types
982 kdTypes = {
983 'uint': TestType('uint', fUnsigned = True),
984 'int': TestType('int'),
985 'efl': TestTypeEflags('efl'),
986 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
987 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
988 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
989 };
990 ## CPU context fields.
991 kdFields = {
992 # name: ( default type, [both|input|output], )
993 # Operands.
994 'op1': ( 'uint', 'both', ), ## \@op1
995 'op2': ( 'uint', 'both', ), ## \@op2
996 'op3': ( 'uint', 'both', ), ## \@op3
997 'op4': ( 'uint', 'both', ), ## \@op4
998 # Flags.
999 'efl': ( 'efl', 'both', ),
1000 'efl_undef': ( 'uint', 'output', ),
1001 # 8-bit GPRs.
1002 'al': ( 'uint', 'both', ),
1003 'cl': ( 'uint', 'both', ),
1004 'dl': ( 'uint', 'both', ),
1005 'bl': ( 'uint', 'both', ),
1006 'ah': ( 'uint', 'both', ),
1007 'ch': ( 'uint', 'both', ),
1008 'dh': ( 'uint', 'both', ),
1009 'bh': ( 'uint', 'both', ),
1010 'r8l': ( 'uint', 'both', ),
1011 'r9l': ( 'uint', 'both', ),
1012 'r10l': ( 'uint', 'both', ),
1013 'r11l': ( 'uint', 'both', ),
1014 'r12l': ( 'uint', 'both', ),
1015 'r13l': ( 'uint', 'both', ),
1016 'r14l': ( 'uint', 'both', ),
1017 'r15l': ( 'uint', 'both', ),
1018 # 16-bit GPRs.
1019 'ax': ( 'uint', 'both', ),
1020 'dx': ( 'uint', 'both', ),
1021 'cx': ( 'uint', 'both', ),
1022 'bx': ( 'uint', 'both', ),
1023 'sp': ( 'uint', 'both', ),
1024 'bp': ( 'uint', 'both', ),
1025 'si': ( 'uint', 'both', ),
1026 'di': ( 'uint', 'both', ),
1027 'r8w': ( 'uint', 'both', ),
1028 'r9w': ( 'uint', 'both', ),
1029 'r10w': ( 'uint', 'both', ),
1030 'r11w': ( 'uint', 'both', ),
1031 'r12w': ( 'uint', 'both', ),
1032 'r13w': ( 'uint', 'both', ),
1033 'r14w': ( 'uint', 'both', ),
1034 'r15w': ( 'uint', 'both', ),
1035 # 32-bit GPRs.
1036 'eax': ( 'uint', 'both', ),
1037 'edx': ( 'uint', 'both', ),
1038 'ecx': ( 'uint', 'both', ),
1039 'ebx': ( 'uint', 'both', ),
1040 'esp': ( 'uint', 'both', ),
1041 'ebp': ( 'uint', 'both', ),
1042 'esi': ( 'uint', 'both', ),
1043 'edi': ( 'uint', 'both', ),
1044 'r8d': ( 'uint', 'both', ),
1045 'r9d': ( 'uint', 'both', ),
1046 'r10d': ( 'uint', 'both', ),
1047 'r11d': ( 'uint', 'both', ),
1048 'r12d': ( 'uint', 'both', ),
1049 'r13d': ( 'uint', 'both', ),
1050 'r14d': ( 'uint', 'both', ),
1051 'r15d': ( 'uint', 'both', ),
1052 # 64-bit GPRs.
1053 'rax': ( 'uint', 'both', ),
1054 'rdx': ( 'uint', 'both', ),
1055 'rcx': ( 'uint', 'both', ),
1056 'rbx': ( 'uint', 'both', ),
1057 'rsp': ( 'uint', 'both', ),
1058 'rbp': ( 'uint', 'both', ),
1059 'rsi': ( 'uint', 'both', ),
1060 'rdi': ( 'uint', 'both', ),
1061 'r8': ( 'uint', 'both', ),
1062 'r9': ( 'uint', 'both', ),
1063 'r10': ( 'uint', 'both', ),
1064 'r11': ( 'uint', 'both', ),
1065 'r12': ( 'uint', 'both', ),
1066 'r13': ( 'uint', 'both', ),
1067 'r14': ( 'uint', 'both', ),
1068 'r15': ( 'uint', 'both', ),
1069 # 16-bit, 32-bit or 64-bit registers according to operand size.
1070 'oz.rax': ( 'uint', 'both', ),
1071 'oz.rdx': ( 'uint', 'both', ),
1072 'oz.rcx': ( 'uint', 'both', ),
1073 'oz.rbx': ( 'uint', 'both', ),
1074 'oz.rsp': ( 'uint', 'both', ),
1075 'oz.rbp': ( 'uint', 'both', ),
1076 'oz.rsi': ( 'uint', 'both', ),
1077 'oz.rdi': ( 'uint', 'both', ),
1078 'oz.r8': ( 'uint', 'both', ),
1079 'oz.r9': ( 'uint', 'both', ),
1080 'oz.r10': ( 'uint', 'both', ),
1081 'oz.r11': ( 'uint', 'both', ),
1082 'oz.r12': ( 'uint', 'both', ),
1083 'oz.r13': ( 'uint', 'both', ),
1084 'oz.r14': ( 'uint', 'both', ),
1085 'oz.r15': ( 'uint', 'both', ),
1086 # Control registers.
1087 'cr0': ( 'cr0', 'both', ),
1088 'cr4': ( 'cr4', 'both', ),
1089 'xcr0': ( 'xcr0', 'both', ),
1090 # FPU Registers
1091 'fcw': ( 'uint', 'both', ),
1092 'fsw': ( 'uint', 'both', ),
1093 'ftw': ( 'uint', 'both', ),
1094 'fop': ( 'uint', 'both', ),
1095 'fpuip': ( 'uint', 'both', ),
1096 'fpucs': ( 'uint', 'both', ),
1097 'fpudp': ( 'uint', 'both', ),
1098 'fpuds': ( 'uint', 'both', ),
1099 'mxcsr': ( 'uint', 'both', ),
1100 'st0': ( 'uint', 'both', ),
1101 'st1': ( 'uint', 'both', ),
1102 'st2': ( 'uint', 'both', ),
1103 'st3': ( 'uint', 'both', ),
1104 'st4': ( 'uint', 'both', ),
1105 'st5': ( 'uint', 'both', ),
1106 'st6': ( 'uint', 'both', ),
1107 'st7': ( 'uint', 'both', ),
1108 # MMX registers.
1109 'mm0': ( 'uint', 'both', ),
1110 'mm1': ( 'uint', 'both', ),
1111 'mm2': ( 'uint', 'both', ),
1112 'mm3': ( 'uint', 'both', ),
1113 'mm4': ( 'uint', 'both', ),
1114 'mm5': ( 'uint', 'both', ),
1115 'mm6': ( 'uint', 'both', ),
1116 'mm7': ( 'uint', 'both', ),
1117 # SSE registers.
1118 'xmm0': ( 'uint', 'both', ),
1119 'xmm1': ( 'uint', 'both', ),
1120 'xmm2': ( 'uint', 'both', ),
1121 'xmm3': ( 'uint', 'both', ),
1122 'xmm4': ( 'uint', 'both', ),
1123 'xmm5': ( 'uint', 'both', ),
1124 'xmm6': ( 'uint', 'both', ),
1125 'xmm7': ( 'uint', 'both', ),
1126 'xmm8': ( 'uint', 'both', ),
1127 'xmm9': ( 'uint', 'both', ),
1128 'xmm10': ( 'uint', 'both', ),
1129 'xmm11': ( 'uint', 'both', ),
1130 'xmm12': ( 'uint', 'both', ),
1131 'xmm13': ( 'uint', 'both', ),
1132 'xmm14': ( 'uint', 'both', ),
1133 'xmm15': ( 'uint', 'both', ),
1134 'xmm0.lo': ( 'uint', 'both', ),
1135 'xmm1.lo': ( 'uint', 'both', ),
1136 'xmm2.lo': ( 'uint', 'both', ),
1137 'xmm3.lo': ( 'uint', 'both', ),
1138 'xmm4.lo': ( 'uint', 'both', ),
1139 'xmm5.lo': ( 'uint', 'both', ),
1140 'xmm6.lo': ( 'uint', 'both', ),
1141 'xmm7.lo': ( 'uint', 'both', ),
1142 'xmm8.lo': ( 'uint', 'both', ),
1143 'xmm9.lo': ( 'uint', 'both', ),
1144 'xmm10.lo': ( 'uint', 'both', ),
1145 'xmm11.lo': ( 'uint', 'both', ),
1146 'xmm12.lo': ( 'uint', 'both', ),
1147 'xmm13.lo': ( 'uint', 'both', ),
1148 'xmm14.lo': ( 'uint', 'both', ),
1149 'xmm15.lo': ( 'uint', 'both', ),
1150 'xmm0.hi': ( 'uint', 'both', ),
1151 'xmm1.hi': ( 'uint', 'both', ),
1152 'xmm2.hi': ( 'uint', 'both', ),
1153 'xmm3.hi': ( 'uint', 'both', ),
1154 'xmm4.hi': ( 'uint', 'both', ),
1155 'xmm5.hi': ( 'uint', 'both', ),
1156 'xmm6.hi': ( 'uint', 'both', ),
1157 'xmm7.hi': ( 'uint', 'both', ),
1158 'xmm8.hi': ( 'uint', 'both', ),
1159 'xmm9.hi': ( 'uint', 'both', ),
1160 'xmm10.hi': ( 'uint', 'both', ),
1161 'xmm11.hi': ( 'uint', 'both', ),
1162 'xmm12.hi': ( 'uint', 'both', ),
1163 'xmm13.hi': ( 'uint', 'both', ),
1164 'xmm14.hi': ( 'uint', 'both', ),
1165 'xmm15.hi': ( 'uint', 'both', ),
1166 'xmm0.lo.zx': ( 'uint', 'both', ),
1167 'xmm1.lo.zx': ( 'uint', 'both', ),
1168 'xmm2.lo.zx': ( 'uint', 'both', ),
1169 'xmm3.lo.zx': ( 'uint', 'both', ),
1170 'xmm4.lo.zx': ( 'uint', 'both', ),
1171 'xmm5.lo.zx': ( 'uint', 'both', ),
1172 'xmm6.lo.zx': ( 'uint', 'both', ),
1173 'xmm7.lo.zx': ( 'uint', 'both', ),
1174 'xmm8.lo.zx': ( 'uint', 'both', ),
1175 'xmm9.lo.zx': ( 'uint', 'both', ),
1176 'xmm10.lo.zx': ( 'uint', 'both', ),
1177 'xmm11.lo.zx': ( 'uint', 'both', ),
1178 'xmm12.lo.zx': ( 'uint', 'both', ),
1179 'xmm13.lo.zx': ( 'uint', 'both', ),
1180 'xmm14.lo.zx': ( 'uint', 'both', ),
1181 'xmm15.lo.zx': ( 'uint', 'both', ),
1182 'xmm0.dw0': ( 'uint', 'both', ),
1183 'xmm1.dw0': ( 'uint', 'both', ),
1184 'xmm2.dw0': ( 'uint', 'both', ),
1185 'xmm3.dw0': ( 'uint', 'both', ),
1186 'xmm4.dw0': ( 'uint', 'both', ),
1187 'xmm5.dw0': ( 'uint', 'both', ),
1188 'xmm6.dw0': ( 'uint', 'both', ),
1189 'xmm7.dw0': ( 'uint', 'both', ),
1190 'xmm8.dw0': ( 'uint', 'both', ),
1191 'xmm9.dw0': ( 'uint', 'both', ),
1192 'xmm10.dw0': ( 'uint', 'both', ),
1193 'xmm11.dw0': ( 'uint', 'both', ),
1194 'xmm12.dw0': ( 'uint', 'both', ),
1195 'xmm13.dw0': ( 'uint', 'both', ),
1196 'xmm14.dw0': ( 'uint', 'both', ),
1197 'xmm15_dw0': ( 'uint', 'both', ),
1198 # AVX registers.
1199 'ymm0': ( 'uint', 'both', ),
1200 'ymm1': ( 'uint', 'both', ),
1201 'ymm2': ( 'uint', 'both', ),
1202 'ymm3': ( 'uint', 'both', ),
1203 'ymm4': ( 'uint', 'both', ),
1204 'ymm5': ( 'uint', 'both', ),
1205 'ymm6': ( 'uint', 'both', ),
1206 'ymm7': ( 'uint', 'both', ),
1207 'ymm8': ( 'uint', 'both', ),
1208 'ymm9': ( 'uint', 'both', ),
1209 'ymm10': ( 'uint', 'both', ),
1210 'ymm11': ( 'uint', 'both', ),
1211 'ymm12': ( 'uint', 'both', ),
1212 'ymm13': ( 'uint', 'both', ),
1213 'ymm14': ( 'uint', 'both', ),
1214 'ymm15': ( 'uint', 'both', ),
1215
1216 # Special ones.
1217 'value.xcpt': ( 'uint', 'output', ),
1218 };
1219
1220 def __init__(self, sField, sOp, sValue, sType):
1221 assert sField in self.kdFields;
1222 assert sOp in self.kasOperators;
1223 self.sField = sField;
1224 self.sOp = sOp;
1225 self.sValue = sValue;
1226 self.sType = sType;
1227 assert isinstance(sField, str);
1228 assert isinstance(sOp, str);
1229 assert isinstance(sType, str);
1230 assert isinstance(sValue, str);
1231
1232
1233class TestSelector(object):
1234 """
1235 One selector for an instruction test.
1236 """
1237 ## Selector compare operators.
1238 kasCompareOps = [ '==', '!=' ];
1239 ## Selector variables and their valid values.
1240 kdVariables = {
1241 # Operand size.
1242 'size': {
1243 'o16': 'size_o16',
1244 'o32': 'size_o32',
1245 'o64': 'size_o64',
1246 },
1247 # VEX.L value.
1248 'vex.l': {
1249 '0': 'vexl_0',
1250 '1': 'vexl_1',
1251 },
1252 # Execution ring.
1253 'ring': {
1254 '0': 'ring_0',
1255 '1': 'ring_1',
1256 '2': 'ring_2',
1257 '3': 'ring_3',
1258 '0..2': 'ring_0_thru_2',
1259 '1..3': 'ring_1_thru_3',
1260 },
1261 # Basic code mode.
1262 'codebits': {
1263 '64': 'code_64bit',
1264 '32': 'code_32bit',
1265 '16': 'code_16bit',
1266 },
1267 # cpu modes.
1268 'mode': {
1269 'real': 'mode_real',
1270 'prot': 'mode_prot',
1271 'long': 'mode_long',
1272 'v86': 'mode_v86',
1273 'smm': 'mode_smm',
1274 'vmx': 'mode_vmx',
1275 'svm': 'mode_svm',
1276 },
1277 # paging on/off
1278 'paging': {
1279 'on': 'paging_on',
1280 'off': 'paging_off',
1281 },
1282 # CPU vendor
1283 'vendor': {
1284 'amd': 'vendor_amd',
1285 'intel': 'vendor_intel',
1286 'via': 'vendor_via',
1287 },
1288 };
1289 ## Selector shorthand predicates.
1290 ## These translates into variable expressions.
1291 kdPredicates = {
1292 'o16': 'size==o16',
1293 'o32': 'size==o32',
1294 'o64': 'size==o64',
1295 'ring0': 'ring==0',
1296 '!ring0': 'ring==1..3',
1297 'ring1': 'ring==1',
1298 'ring2': 'ring==2',
1299 'ring3': 'ring==3',
1300 'user': 'ring==3',
1301 'supervisor': 'ring==0..2',
1302 '16-bit': 'codebits==16',
1303 '32-bit': 'codebits==32',
1304 '64-bit': 'codebits==64',
1305 'real': 'mode==real',
1306 'prot': 'mode==prot',
1307 'long': 'mode==long',
1308 'v86': 'mode==v86',
1309 'smm': 'mode==smm',
1310 'vmx': 'mode==vmx',
1311 'svm': 'mode==svm',
1312 'paging': 'paging==on',
1313 '!paging': 'paging==off',
1314 'amd': 'vendor==amd',
1315 '!amd': 'vendor!=amd',
1316 'intel': 'vendor==intel',
1317 '!intel': 'vendor!=intel',
1318 'via': 'vendor==via',
1319 '!via': 'vendor!=via',
1320 };
1321
1322 def __init__(self, sVariable, sOp, sValue):
1323 assert sVariable in self.kdVariables;
1324 assert sOp in self.kasCompareOps;
1325 assert sValue in self.kdVariables[sVariable];
1326 self.sVariable = sVariable;
1327 self.sOp = sOp;
1328 self.sValue = sValue;
1329
1330
1331class InstructionTest(object):
1332 """
1333 Instruction test.
1334 """
1335
1336 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1337 self.oInstr = oInstr # type: InstructionTest
1338 self.aoInputs = [] # type: list(TestInOut)
1339 self.aoOutputs = [] # type: list(TestInOut)
1340 self.aoSelectors = [] # type: list(TestSelector)
1341
1342 def toString(self, fRepr = False):
1343 """
1344 Converts it to string representation.
1345 """
1346 asWords = [];
1347 if self.aoSelectors:
1348 for oSelector in self.aoSelectors:
1349 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1350 asWords.append('/');
1351
1352 for oModifier in self.aoInputs:
1353 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1354
1355 asWords.append('->');
1356
1357 for oModifier in self.aoOutputs:
1358 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1359
1360 if fRepr:
1361 return '<' + ' '.join(asWords) + '>';
1362 return ' '.join(asWords);
1363
1364 def __str__(self):
1365 """ Provide string represenation. """
1366 return self.toString(False);
1367
1368 def __repr__(self):
1369 """ Provide unambigious string representation. """
1370 return self.toString(True);
1371
1372class Operand(object):
1373 """
1374 Instruction operand.
1375 """
1376
1377 def __init__(self, sWhere, sType):
1378 assert sWhere in g_kdOpLocations, sWhere;
1379 assert sType in g_kdOpTypes, sType;
1380 self.sWhere = sWhere; ##< g_kdOpLocations
1381 self.sType = sType; ##< g_kdOpTypes
1382
1383 def usesModRM(self):
1384 """ Returns True if using some form of ModR/M encoding. """
1385 return self.sType[0] in ['E', 'G', 'M'];
1386
1387
1388
1389class Instruction(object): # pylint: disable=too-many-instance-attributes
1390 """
1391 Instruction.
1392 """
1393
1394 def __init__(self, sSrcFile, iLine):
1395 ## @name Core attributes.
1396 ## @{
1397 self.oParent = None # type: Instruction
1398 self.sMnemonic = None;
1399 self.sBrief = None;
1400 self.asDescSections = [] # type: list(str)
1401 self.aoMaps = [] # type: list(InstructionMap)
1402 self.aoOperands = [] # type: list(Operand)
1403 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1404 self.sOpcode = None # type: str
1405 self.sSubOpcode = None # type: str
1406 self.sEncoding = None;
1407 self.asFlTest = None;
1408 self.asFlModify = None;
1409 self.asFlUndefined = None;
1410 self.asFlSet = None;
1411 self.asFlClear = None;
1412 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1413 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1414 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1415 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1416 self.aoTests = [] # type: list(InstructionTest)
1417 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1418 self.oCpuExpr = None; ##< Some CPU restriction expression...
1419 self.sGroup = None;
1420 self.fUnused = False; ##< Unused instruction.
1421 self.fInvalid = False; ##< Invalid instruction (like UD2).
1422 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1423 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1424 ## @}
1425
1426 ## @name Implementation attributes.
1427 ## @{
1428 self.sStats = None;
1429 self.sFunction = None;
1430 self.fStub = False;
1431 self.fUdStub = False;
1432 ## @}
1433
1434 ## @name Decoding info
1435 ## @{
1436 self.sSrcFile = sSrcFile;
1437 self.iLineCreated = iLine;
1438 self.iLineCompleted = None;
1439 self.cOpTags = 0;
1440 self.iLineFnIemOpMacro = -1;
1441 self.iLineMnemonicMacro = -1;
1442 ## @}
1443
1444 ## @name Intermediate input fields.
1445 ## @{
1446 self.sRawDisOpNo = None;
1447 self.asRawDisParams = [];
1448 self.sRawIemOpFlags = None;
1449 self.sRawOldOpcodes = None;
1450 self.asCopyTests = [];
1451 ## @}
1452
1453 def toString(self, fRepr = False):
1454 """ Turn object into a string. """
1455 aasFields = [];
1456
1457 aasFields.append(['opcode', self.sOpcode]);
1458 if self.sPrefix:
1459 aasFields.append(['prefix', self.sPrefix]);
1460 aasFields.append(['mnemonic', self.sMnemonic]);
1461 for iOperand, oOperand in enumerate(self.aoOperands):
1462 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1463 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1464 aasFields.append(['encoding', self.sEncoding]);
1465 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1466 aasFields.append(['disenum', self.sDisEnum]);
1467 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1468 aasFields.append(['group', self.sGroup]);
1469 if self.fUnused: aasFields.append(['unused', 'True']);
1470 if self.fInvalid: aasFields.append(['invalid', 'True']);
1471 aasFields.append(['invlstyle', self.sInvalidStyle]);
1472 aasFields.append(['fltest', self.asFlTest]);
1473 aasFields.append(['flmodify', self.asFlModify]);
1474 aasFields.append(['flundef', self.asFlUndefined]);
1475 aasFields.append(['flset', self.asFlSet]);
1476 aasFields.append(['flclear', self.asFlClear]);
1477 aasFields.append(['mincpu', self.sMinCpu]);
1478 aasFields.append(['stats', self.sStats]);
1479 aasFields.append(['sFunction', self.sFunction]);
1480 if self.fStub: aasFields.append(['fStub', 'True']);
1481 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1482 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1483 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1484 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1485
1486 sRet = '<' if fRepr else '';
1487 for sField, sValue in aasFields:
1488 if sValue is not None:
1489 if len(sRet) > 1:
1490 sRet += '; ';
1491 sRet += '%s=%s' % (sField, sValue,);
1492 if fRepr:
1493 sRet += '>';
1494
1495 return sRet;
1496
1497 def __str__(self):
1498 """ Provide string represenation. """
1499 return self.toString(False);
1500
1501 def __repr__(self):
1502 """ Provide unambigious string representation. """
1503 return self.toString(True);
1504
1505 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1506 """
1507 Makes a copy of the object for the purpose of putting in a different map
1508 or a different place in the current map.
1509 """
1510 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1511
1512 oCopy.oParent = self;
1513 oCopy.sMnemonic = self.sMnemonic;
1514 oCopy.sBrief = self.sBrief;
1515 oCopy.asDescSections = list(self.asDescSections);
1516 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1517 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1518 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1519 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1520 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1521 oCopy.sEncoding = self.sEncoding;
1522 oCopy.asFlTest = self.asFlTest;
1523 oCopy.asFlModify = self.asFlModify;
1524 oCopy.asFlUndefined = self.asFlUndefined;
1525 oCopy.asFlSet = self.asFlSet;
1526 oCopy.asFlClear = self.asFlClear;
1527 oCopy.dHints = dict(self.dHints);
1528 oCopy.sDisEnum = self.sDisEnum;
1529 oCopy.asCpuIds = list(self.asCpuIds);
1530 oCopy.asReqFeatures = list(self.asReqFeatures);
1531 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1532 oCopy.sMinCpu = self.sMinCpu;
1533 oCopy.oCpuExpr = self.oCpuExpr;
1534 oCopy.sGroup = self.sGroup;
1535 oCopy.fUnused = self.fUnused;
1536 oCopy.fInvalid = self.fInvalid;
1537 oCopy.sInvalidStyle = self.sInvalidStyle;
1538 oCopy.sXcptType = self.sXcptType;
1539
1540 oCopy.sStats = self.sStats;
1541 oCopy.sFunction = self.sFunction;
1542 oCopy.fStub = self.fStub;
1543 oCopy.fUdStub = self.fUdStub;
1544
1545 oCopy.iLineCompleted = self.iLineCompleted;
1546 oCopy.cOpTags = self.cOpTags;
1547 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1548 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1549
1550 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1551 oCopy.asRawDisParams = list(self.asRawDisParams);
1552 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1553 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1554 oCopy.asCopyTests = list(self.asCopyTests);
1555
1556 return oCopy;
1557
1558 def getOpcodeByte(self):
1559 """
1560 Decodes sOpcode into a byte range integer value.
1561 Raises exception if sOpcode is None or invalid.
1562 """
1563 if self.sOpcode is None:
1564 raise Exception('No opcode byte for %s!' % (self,));
1565 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1566
1567 # Full hex byte form.
1568 if sOpcode[:2] == '0x':
1569 return int(sOpcode, 16);
1570
1571 # The /r form:
1572 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1573 return int(sOpcode[-1:]) << 3;
1574
1575 # The 11/r form:
1576 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1577 return (int(sOpcode[-1:]) << 3) | 0xc0;
1578
1579 # The !11/r form (returns mod=1):
1580 ## @todo this doesn't really work...
1581 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1582 return (int(sOpcode[-1:]) << 3) | 0x80;
1583
1584 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1585
1586 @staticmethod
1587 def _flagsToIntegerMask(asFlags):
1588 """
1589 Returns the integer mask value for asFlags.
1590 """
1591 uRet = 0;
1592 if asFlags:
1593 for sFlag in asFlags:
1594 sConstant = g_kdEFlagsMnemonics[sFlag];
1595 assert sConstant[0] != '!', sConstant
1596 uRet |= g_kdX86EFlagsConstants[sConstant];
1597 return uRet;
1598
1599 def getTestedFlagsMask(self):
1600 """ Returns asFlTest into a integer mask value """
1601 return self._flagsToIntegerMask(self.asFlTest);
1602
1603 def getModifiedFlagsMask(self):
1604 """ Returns asFlModify into a integer mask value """
1605 return self._flagsToIntegerMask(self.asFlModify);
1606
1607 def getUndefinedFlagsMask(self):
1608 """ Returns asFlUndefined into a integer mask value """
1609 return self._flagsToIntegerMask(self.asFlUndefined);
1610
1611 def getSetFlagsMask(self):
1612 """ Returns asFlSet into a integer mask value """
1613 return self._flagsToIntegerMask(self.asFlSet);
1614
1615 def getClearedFlagsMask(self):
1616 """ Returns asFlClear into a integer mask value """
1617 return self._flagsToIntegerMask(self.asFlClear);
1618
1619 def onlyInVexMaps(self):
1620 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1621 if not self.aoMaps:
1622 return False;
1623 for oMap in self.aoMaps:
1624 if not oMap.isVexMap():
1625 return False;
1626 return True;
1627
1628
1629
1630## All the instructions.
1631g_aoAllInstructions = [] # type: list(Instruction)
1632
1633## All the instructions indexed by statistics name (opstat).
1634g_dAllInstructionsByStat = {} # type: dict(Instruction)
1635
1636## All the instructions indexed by function name (opfunction).
1637g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1638
1639## Instructions tagged by oponlytest
1640g_aoOnlyTestInstructions = [] # type: list(Instruction)
1641
1642## Instruction maps.
1643g_aoInstructionMaps = [
1644 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1645 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1646 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1647 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1648 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1649 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1650 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1651 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1652 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1653 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1654 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1655 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1656 ## @todo g_apfnEscF1_E0toFF
1657 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1658 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1659 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1660 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1661 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1662 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1663 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1664 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1665
1666 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1667 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1668 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1669 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1670 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1671 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1672 ## @todo What about g_apfnGroup9MemReg?
1673 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1674 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1675 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1676 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1677 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1678 ## @todo What about g_apfnGroup15RegReg?
1679 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1680 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1681 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1682
1683 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1684 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1685
1686 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1687 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1688 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1689 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1690 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1691 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1692
1693 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1694 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1695
1696 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1697 InstructionMap('xopmap8', sEncoding = 'xop8'),
1698 InstructionMap('xopmap9', sEncoding = 'xop9'),
1699 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1700 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1701 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1702 InstructionMap('xopmap10', sEncoding = 'xop10'),
1703 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1704];
1705g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1706g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1707
1708
1709
1710class ParserException(Exception):
1711 """ Parser exception """
1712 def __init__(self, sMessage):
1713 Exception.__init__(self, sMessage);
1714
1715
1716class SimpleParser(object):
1717 """
1718 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1719 """
1720
1721 ## @name Parser state.
1722 ## @{
1723 kiCode = 0;
1724 kiCommentMulti = 1;
1725 ## @}
1726
1727 def __init__(self, sSrcFile, asLines, sDefaultMap):
1728 self.sSrcFile = sSrcFile;
1729 self.asLines = asLines;
1730 self.iLine = 0;
1731 self.iState = self.kiCode;
1732 self.sComment = '';
1733 self.iCommentLine = 0;
1734 self.aoCurInstrs = [];
1735
1736 assert sDefaultMap in g_dInstructionMaps;
1737 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1738
1739 self.cTotalInstr = 0;
1740 self.cTotalStubs = 0;
1741 self.cTotalTagged = 0;
1742
1743 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1744 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1745 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1746 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1747 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1748 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1749 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
1750 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
1751 self.fDebug = True;
1752
1753 self.dTagHandlers = {
1754 '@opbrief': self.parseTagOpBrief,
1755 '@opdesc': self.parseTagOpDesc,
1756 '@opmnemonic': self.parseTagOpMnemonic,
1757 '@op1': self.parseTagOpOperandN,
1758 '@op2': self.parseTagOpOperandN,
1759 '@op3': self.parseTagOpOperandN,
1760 '@op4': self.parseTagOpOperandN,
1761 '@oppfx': self.parseTagOpPfx,
1762 '@opmaps': self.parseTagOpMaps,
1763 '@opcode': self.parseTagOpcode,
1764 '@opcodesub': self.parseTagOpcodeSub,
1765 '@openc': self.parseTagOpEnc,
1766 '@opfltest': self.parseTagOpEFlags,
1767 '@opflmodify': self.parseTagOpEFlags,
1768 '@opflundef': self.parseTagOpEFlags,
1769 '@opflset': self.parseTagOpEFlags,
1770 '@opflclear': self.parseTagOpEFlags,
1771 '@ophints': self.parseTagOpHints,
1772 '@opdisenum': self.parseTagOpDisEnum,
1773 '@opmincpu': self.parseTagOpMinCpu,
1774 '@opcpuid': self.parseTagOpCpuId,
1775 '@opgroup': self.parseTagOpGroup,
1776 '@opunused': self.parseTagOpUnusedInvalid,
1777 '@opinvalid': self.parseTagOpUnusedInvalid,
1778 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1779 '@optest': self.parseTagOpTest,
1780 '@optestign': self.parseTagOpTestIgnore,
1781 '@optestignore': self.parseTagOpTestIgnore,
1782 '@opcopytests': self.parseTagOpCopyTests,
1783 '@oponly': self.parseTagOpOnlyTest,
1784 '@oponlytest': self.parseTagOpOnlyTest,
1785 '@opxcpttype': self.parseTagOpXcptType,
1786 '@opstats': self.parseTagOpStats,
1787 '@opfunction': self.parseTagOpFunction,
1788 '@opdone': self.parseTagOpDone,
1789 };
1790 for i in range(48):
1791 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1792 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1793
1794 self.asErrors = [];
1795
1796 def raiseError(self, sMessage):
1797 """
1798 Raise error prefixed with the source and line number.
1799 """
1800 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1801
1802 def raiseCommentError(self, iLineInComment, sMessage):
1803 """
1804 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1805 """
1806 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1807
1808 def error(self, sMessage):
1809 """
1810 Adds an error.
1811 returns False;
1812 """
1813 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1814 return False;
1815
1816 def errorOnLine(self, iLine, sMessage):
1817 """
1818 Adds an error.
1819 returns False;
1820 """
1821 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
1822 return False;
1823
1824 def errorComment(self, iLineInComment, sMessage):
1825 """
1826 Adds a comment error.
1827 returns False;
1828 """
1829 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1830 return False;
1831
1832 def printErrors(self):
1833 """
1834 Print the errors to stderr.
1835 Returns number of errors.
1836 """
1837 if self.asErrors:
1838 sys.stderr.write(u''.join(self.asErrors));
1839 return len(self.asErrors);
1840
1841 def debug(self, sMessage):
1842 """
1843 For debugging.
1844 """
1845 if self.fDebug:
1846 print('debug: %s' % (sMessage,));
1847
1848 def stripComments(self, sLine):
1849 """
1850 Returns sLine with comments stripped.
1851
1852 Complains if traces of incomplete multi-line comments are encountered.
1853 """
1854 sLine = self.oReComment.sub(" ", sLine);
1855 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
1856 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
1857 return sLine;
1858
1859 def parseFunctionTable(self, sLine):
1860 """
1861 Parses a PFNIEMOP table, updating/checking the @oppfx value.
1862
1863 Note! Updates iLine as it consumes the whole table.
1864 """
1865
1866 #
1867 # Extract the table name.
1868 #
1869 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
1870 oMap = g_dInstructionMapsByIemName.get(sName);
1871 if not oMap:
1872 self.debug('No map for PFNIEMOP table: %s' % (sName,));
1873 oMap = self.oDefaultMap; # This is wrong wrong wrong.
1874
1875 #
1876 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
1877 # entries per byte:
1878 # no prefix, 066h prefix, f3h prefix, f2h prefix
1879 # Those tables has 256 & 32 entries respectively.
1880 #
1881 cEntriesPerByte = 4;
1882 cValidTableLength = 1024;
1883 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
1884
1885 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
1886 if oEntriesMatch:
1887 cEntriesPerByte = 1;
1888 cValidTableLength = int(oEntriesMatch.group(1));
1889 asPrefixes = (None,);
1890
1891 #
1892 # The next line should be '{' and nothing else.
1893 #
1894 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
1895 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
1896 self.iLine += 1;
1897
1898 #
1899 # Parse till we find the end of the table.
1900 #
1901 iEntry = 0;
1902 while self.iLine < len(self.asLines):
1903 # Get the next line and strip comments and spaces (assumes no
1904 # multi-line comments).
1905 sLine = self.asLines[self.iLine];
1906 self.iLine += 1;
1907 sLine = self.stripComments(sLine).strip();
1908
1909 # Split the line up into entries, expanding IEMOP_X4 usage.
1910 asEntries = sLine.split(',');
1911 for i in range(len(asEntries) - 1, -1, -1):
1912 sEntry = asEntries[i].strip();
1913 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
1914 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
1915 asEntries.insert(i + 1, sEntry);
1916 asEntries.insert(i + 1, sEntry);
1917 asEntries.insert(i + 1, sEntry);
1918 if sEntry:
1919 asEntries[i] = sEntry;
1920 else:
1921 del asEntries[i];
1922
1923 # Process the entries.
1924 for sEntry in asEntries:
1925 if sEntry in ('};', '}'):
1926 if iEntry != cValidTableLength:
1927 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
1928 return True;
1929 if sEntry.startswith('iemOp_Invalid'):
1930 pass; # skip
1931 else:
1932 # Look up matching instruction by function.
1933 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
1934 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
1935 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
1936 if aoInstr:
1937 if not isinstance(aoInstr, list):
1938 aoInstr = [aoInstr,];
1939 oInstr = None;
1940 for oCurInstr in aoInstr:
1941 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
1942 pass;
1943 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
1944 oCurInstr.sPrefix = sPrefix;
1945 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
1946 oCurInstr.sOpcode = sOpcode;
1947 oCurInstr.sPrefix = sPrefix;
1948 else:
1949 continue;
1950 oInstr = oCurInstr;
1951 break;
1952 if not oInstr:
1953 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
1954 aoInstr.append(oInstr);
1955 g_dAllInstructionsByFunction[sEntry] = aoInstr;
1956 g_aoAllInstructions.append(oInstr);
1957 oMap.aoInstructions.append(oInstr);
1958 else:
1959 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
1960 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
1961 iEntry += 1;
1962
1963 return self.error('Unexpected end of file in PFNIEMOP table');
1964
1965 def addInstruction(self, iLine = None):
1966 """
1967 Adds an instruction.
1968 """
1969 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1970 g_aoAllInstructions.append(oInstr);
1971 self.aoCurInstrs.append(oInstr);
1972 return oInstr;
1973
1974 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1975 """
1976 Derives the mnemonic and operands from a IEM stats base name like string.
1977 """
1978 if oInstr.sMnemonic is None:
1979 asWords = sStats.split('_');
1980 oInstr.sMnemonic = asWords[0].lower();
1981 if len(asWords) > 1 and not oInstr.aoOperands:
1982 for sType in asWords[1:]:
1983 if sType in g_kdOpTypes:
1984 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1985 else:
1986 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1987 return False;
1988 return True;
1989
1990 def doneInstructionOne(self, oInstr, iLine):
1991 """
1992 Complete the parsing by processing, validating and expanding raw inputs.
1993 """
1994 assert oInstr.iLineCompleted is None;
1995 oInstr.iLineCompleted = iLine;
1996
1997 #
1998 # Specified instructions.
1999 #
2000 if oInstr.cOpTags > 0:
2001 if oInstr.sStats is None:
2002 pass;
2003
2004 #
2005 # Unspecified legacy stuff. We generally only got a few things to go on here.
2006 # /** Opcode 0x0f 0x00 /0. */
2007 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
2008 #
2009 else:
2010 #if oInstr.sRawOldOpcodes:
2011 #
2012 #if oInstr.sMnemonic:
2013 pass;
2014
2015 #
2016 # Common defaults.
2017 #
2018
2019 # Guess mnemonic and operands from stats if the former is missing.
2020 if oInstr.sMnemonic is None:
2021 if oInstr.sStats is not None:
2022 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
2023 elif oInstr.sFunction is not None:
2024 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
2025
2026 # Derive the disassembler op enum constant from the mnemonic.
2027 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
2028 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
2029
2030 # Derive the IEM statistics base name from mnemonic and operand types.
2031 if oInstr.sStats is None:
2032 if oInstr.sFunction is not None:
2033 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
2034 elif oInstr.sMnemonic is not None:
2035 oInstr.sStats = oInstr.sMnemonic;
2036 for oOperand in oInstr.aoOperands:
2037 if oOperand.sType:
2038 oInstr.sStats += '_' + oOperand.sType;
2039
2040 # Derive the IEM function name from mnemonic and operand types.
2041 if oInstr.sFunction is None:
2042 if oInstr.sMnemonic is not None:
2043 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
2044 for oOperand in oInstr.aoOperands:
2045 if oOperand.sType:
2046 oInstr.sFunction += '_' + oOperand.sType;
2047 elif oInstr.sStats:
2048 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
2049
2050 #
2051 # Apply default map and then add the instruction to all it's groups.
2052 #
2053 if not oInstr.aoMaps:
2054 oInstr.aoMaps = [ self.oDefaultMap, ];
2055 for oMap in oInstr.aoMaps:
2056 oMap.aoInstructions.append(oInstr);
2057
2058 #
2059 # Derive encoding from operands and maps.
2060 #
2061 if oInstr.sEncoding is None:
2062 if not oInstr.aoOperands:
2063 if oInstr.fUnused and oInstr.sSubOpcode:
2064 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
2065 else:
2066 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
2067 elif oInstr.aoOperands[0].usesModRM():
2068 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
2069 or oInstr.onlyInVexMaps():
2070 oInstr.sEncoding = 'VEX.ModR/M';
2071 else:
2072 oInstr.sEncoding = 'ModR/M';
2073
2074 #
2075 # Check the opstat value and add it to the opstat indexed dictionary.
2076 #
2077 if oInstr.sStats:
2078 if oInstr.sStats not in g_dAllInstructionsByStat:
2079 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
2080 else:
2081 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
2082 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
2083
2084 #
2085 # Add to function indexed dictionary. We allow multiple instructions per function.
2086 #
2087 if oInstr.sFunction:
2088 if oInstr.sFunction not in g_dAllInstructionsByFunction:
2089 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
2090 else:
2091 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
2092
2093 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
2094 return True;
2095
2096 def doneInstructions(self, iLineInComment = None):
2097 """
2098 Done with current instruction.
2099 """
2100 for oInstr in self.aoCurInstrs:
2101 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
2102 if oInstr.fStub:
2103 self.cTotalStubs += 1;
2104
2105 self.cTotalInstr += len(self.aoCurInstrs);
2106
2107 self.sComment = '';
2108 self.aoCurInstrs = [];
2109 return True;
2110
2111 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
2112 """
2113 Sets the sAttrib of all current instruction to oValue. If fOverwrite
2114 is False, only None values and empty strings are replaced.
2115 """
2116 for oInstr in self.aoCurInstrs:
2117 if fOverwrite is not True:
2118 oOldValue = getattr(oInstr, sAttrib);
2119 if oOldValue is not None:
2120 continue;
2121 setattr(oInstr, sAttrib, oValue);
2122
2123 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
2124 """
2125 Sets the iEntry of the array sAttrib of all current instruction to oValue.
2126 If fOverwrite is False, only None values and empty strings are replaced.
2127 """
2128 for oInstr in self.aoCurInstrs:
2129 aoArray = getattr(oInstr, sAttrib);
2130 while len(aoArray) <= iEntry:
2131 aoArray.append(None);
2132 if fOverwrite is True or aoArray[iEntry] is None:
2133 aoArray[iEntry] = oValue;
2134
2135 def parseCommentOldOpcode(self, asLines):
2136 """ Deals with 'Opcode 0xff /4' like comments """
2137 asWords = asLines[0].split();
2138 if len(asWords) >= 2 \
2139 and asWords[0] == 'Opcode' \
2140 and ( asWords[1].startswith('0x')
2141 or asWords[1].startswith('0X')):
2142 asWords = asWords[:1];
2143 for iWord, sWord in enumerate(asWords):
2144 if sWord.startswith('0X'):
2145 sWord = '0x' + sWord[:2];
2146 asWords[iWord] = asWords;
2147 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
2148
2149 return False;
2150
2151 def ensureInstructionForOpTag(self, iTagLine):
2152 """ Ensure there is an instruction for the op-tag being parsed. """
2153 if not self.aoCurInstrs:
2154 self.addInstruction(self.iCommentLine + iTagLine);
2155 for oInstr in self.aoCurInstrs:
2156 oInstr.cOpTags += 1;
2157 if oInstr.cOpTags == 1:
2158 self.cTotalTagged += 1;
2159 return self.aoCurInstrs[-1];
2160
2161 @staticmethod
2162 def flattenSections(aasSections):
2163 """
2164 Flattens multiline sections into stripped single strings.
2165 Returns list of strings, on section per string.
2166 """
2167 asRet = [];
2168 for asLines in aasSections:
2169 if asLines:
2170 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
2171 return asRet;
2172
2173 @staticmethod
2174 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
2175 """
2176 Flattens sections into a simple stripped string with newlines as
2177 section breaks. The final section does not sport a trailing newline.
2178 """
2179 # Typical: One section with a single line.
2180 if len(aasSections) == 1 and len(aasSections[0]) == 1:
2181 return aasSections[0][0].strip();
2182
2183 sRet = '';
2184 for iSection, asLines in enumerate(aasSections):
2185 if asLines:
2186 if iSection > 0:
2187 sRet += sSectionSep;
2188 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
2189 return sRet;
2190
2191
2192
2193 ## @name Tag parsers
2194 ## @{
2195
2196 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
2197 """
2198 Tag: \@opbrief
2199 Value: Text description, multiple sections, appended.
2200
2201 Brief description. If not given, it's the first sentence from @opdesc.
2202 """
2203 oInstr = self.ensureInstructionForOpTag(iTagLine);
2204
2205 # Flatten and validate the value.
2206 sBrief = self.flattenAllSections(aasSections);
2207 if not sBrief:
2208 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2209 if sBrief[-1] != '.':
2210 sBrief = sBrief + '.';
2211 if len(sBrief) > 180:
2212 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
2213 offDot = sBrief.find('.');
2214 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
2215 offDot = sBrief.find('.', offDot + 1);
2216 if offDot >= 0 and offDot != len(sBrief) - 1:
2217 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
2218
2219 # Update the instruction.
2220 if oInstr.sBrief is not None:
2221 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
2222 % (sTag, oInstr.sBrief, sBrief,));
2223 _ = iEndLine;
2224 return True;
2225
2226 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
2227 """
2228 Tag: \@opdesc
2229 Value: Text description, multiple sections, appended.
2230
2231 It is used to describe instructions.
2232 """
2233 oInstr = self.ensureInstructionForOpTag(iTagLine);
2234 if aasSections:
2235 oInstr.asDescSections.extend(self.flattenSections(aasSections));
2236 return True;
2237
2238 _ = sTag; _ = iEndLine;
2239 return True;
2240
2241 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
2242 """
2243 Tag: @opmenmonic
2244 Value: mnemonic
2245
2246 The 'mnemonic' value must be a valid C identifier string. Because of
2247 prefixes, groups and whatnot, there times when the mnemonic isn't that
2248 of an actual assembler mnemonic.
2249 """
2250 oInstr = self.ensureInstructionForOpTag(iTagLine);
2251
2252 # Flatten and validate the value.
2253 sMnemonic = self.flattenAllSections(aasSections);
2254 if not self.oReMnemonic.match(sMnemonic):
2255 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
2256 if oInstr.sMnemonic is not None:
2257 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
2258 % (sTag, oInstr.sMnemonic, sMnemonic,));
2259 oInstr.sMnemonic = sMnemonic
2260
2261 _ = iEndLine;
2262 return True;
2263
2264 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2265 """
2266 Tags: \@op1, \@op2, \@op3, \@op4
2267 Value: [where:]type
2268
2269 The 'where' value indicates where the operand is found, like the 'reg'
2270 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2271 a list.
2272
2273 The 'type' value indicates the operand type. These follow the types
2274 given in the opcode tables in the CPU reference manuals.
2275 See Instruction.kdOperandTypes for a list.
2276
2277 """
2278 oInstr = self.ensureInstructionForOpTag(iTagLine);
2279 idxOp = int(sTag[-1]) - 1;
2280 assert 0 <= idxOp < 4;
2281
2282 # flatten, split up, and validate the "where:type" value.
2283 sFlattened = self.flattenAllSections(aasSections);
2284 asSplit = sFlattened.split(':');
2285 if len(asSplit) == 1:
2286 sType = asSplit[0];
2287 sWhere = None;
2288 elif len(asSplit) == 2:
2289 (sWhere, sType) = asSplit;
2290 else:
2291 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2292
2293 if sType not in g_kdOpTypes:
2294 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2295 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2296 if sWhere is None:
2297 sWhere = g_kdOpTypes[sType][1];
2298 elif sWhere not in g_kdOpLocations:
2299 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2300 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2301
2302 # Insert the operand, refusing to overwrite an existing one.
2303 while idxOp >= len(oInstr.aoOperands):
2304 oInstr.aoOperands.append(None);
2305 if oInstr.aoOperands[idxOp] is not None:
2306 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2307 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2308 sWhere, sType,));
2309 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2310
2311 _ = iEndLine;
2312 return True;
2313
2314 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2315 """
2316 Tag: \@opmaps
2317 Value: map[,map2]
2318
2319 Indicates which maps the instruction is in. There is a default map
2320 associated with each input file.
2321 """
2322 oInstr = self.ensureInstructionForOpTag(iTagLine);
2323
2324 # Flatten, split up and validate the value.
2325 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2326 asMaps = sFlattened.split(',');
2327 if not asMaps:
2328 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2329 for sMap in asMaps:
2330 if sMap not in g_dInstructionMaps:
2331 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2332 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2333
2334 # Add the maps to the current list. Throw errors on duplicates.
2335 for oMap in oInstr.aoMaps:
2336 if oMap.sName in asMaps:
2337 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2338
2339 for sMap in asMaps:
2340 oMap = g_dInstructionMaps[sMap];
2341 if oMap not in oInstr.aoMaps:
2342 oInstr.aoMaps.append(oMap);
2343 else:
2344 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2345
2346 _ = iEndLine;
2347 return True;
2348
2349 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2350 """
2351 Tag: \@oppfx
2352 Value: n/a|none|0x66|0xf3|0xf2
2353
2354 Required prefix for the instruction. (In a (E)VEX context this is the
2355 value of the 'pp' field rather than an actual prefix.)
2356 """
2357 oInstr = self.ensureInstructionForOpTag(iTagLine);
2358
2359 # Flatten and validate the value.
2360 sFlattened = self.flattenAllSections(aasSections);
2361 asPrefixes = sFlattened.split();
2362 if len(asPrefixes) > 1:
2363 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2364
2365 sPrefix = asPrefixes[0].lower();
2366 if sPrefix == 'none':
2367 sPrefix = 'none';
2368 elif sPrefix == 'n/a':
2369 sPrefix = None;
2370 else:
2371 if len(sPrefix) == 2:
2372 sPrefix = '0x' + sPrefix;
2373 if not _isValidOpcodeByte(sPrefix):
2374 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2375
2376 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2377 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2378
2379 # Set it.
2380 if oInstr.sPrefix is not None:
2381 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2382 oInstr.sPrefix = sPrefix;
2383
2384 _ = iEndLine;
2385 return True;
2386
2387 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2388 """
2389 Tag: \@opcode
2390 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2391
2392 The opcode byte or sub-byte for the instruction in the context of a map.
2393 """
2394 oInstr = self.ensureInstructionForOpTag(iTagLine);
2395
2396 # Flatten and validate the value.
2397 sOpcode = self.flattenAllSections(aasSections);
2398 if _isValidOpcodeByte(sOpcode):
2399 pass;
2400 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2401 pass;
2402 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2403 pass;
2404 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2405 pass;
2406 else:
2407 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2408
2409 # Set it.
2410 if oInstr.sOpcode is not None:
2411 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2412 oInstr.sOpcode = sOpcode;
2413
2414 _ = iEndLine;
2415 return True;
2416
2417 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2418 """
2419 Tag: \@opcodesub
2420 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
2421 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
2422
2423 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2424 represents exactly two different instructions. The more proper way would
2425 be to go via maps with two members, but this is faster.
2426 """
2427 oInstr = self.ensureInstructionForOpTag(iTagLine);
2428
2429 # Flatten and validate the value.
2430 sSubOpcode = self.flattenAllSections(aasSections);
2431 if sSubOpcode not in g_kdSubOpcodes:
2432 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2433 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2434
2435 # Set it.
2436 if oInstr.sSubOpcode is not None:
2437 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2438 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2439 oInstr.sSubOpcode = sSubOpcode;
2440
2441 _ = iEndLine;
2442 return True;
2443
2444 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2445 """
2446 Tag: \@openc
2447 Value: ModR/M|fixed|prefix|<map name>
2448
2449 The instruction operand encoding style.
2450 """
2451 oInstr = self.ensureInstructionForOpTag(iTagLine);
2452
2453 # Flatten and validate the value.
2454 sEncoding = self.flattenAllSections(aasSections);
2455 if sEncoding in g_kdEncodings:
2456 pass;
2457 elif sEncoding in g_dInstructionMaps:
2458 pass;
2459 elif not _isValidOpcodeByte(sEncoding):
2460 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2461
2462 # Set it.
2463 if oInstr.sEncoding is not None:
2464 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2465 % ( sTag, oInstr.sEncoding, sEncoding,));
2466 oInstr.sEncoding = sEncoding;
2467
2468 _ = iEndLine;
2469 return True;
2470
2471 ## EFlags tag to Instruction attribute name.
2472 kdOpFlagToAttr = {
2473 '@opfltest': 'asFlTest',
2474 '@opflmodify': 'asFlModify',
2475 '@opflundef': 'asFlUndefined',
2476 '@opflset': 'asFlSet',
2477 '@opflclear': 'asFlClear',
2478 };
2479
2480 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2481 """
2482 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2483 Value: <eflags specifier>
2484
2485 """
2486 oInstr = self.ensureInstructionForOpTag(iTagLine);
2487
2488 # Flatten, split up and validate the values.
2489 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2490 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2491 asFlags = [];
2492 else:
2493 fRc = True;
2494 for iFlag, sFlag in enumerate(asFlags):
2495 if sFlag not in g_kdEFlagsMnemonics:
2496 if sFlag.strip() in g_kdEFlagsMnemonics:
2497 asFlags[iFlag] = sFlag.strip();
2498 else:
2499 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2500 if not fRc:
2501 return False;
2502
2503 # Set them.
2504 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2505 if asOld is not None:
2506 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2507 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2508
2509 _ = iEndLine;
2510 return True;
2511
2512 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2513 """
2514 Tag: \@ophints
2515 Value: Comma or space separated list of flags and hints.
2516
2517 This covers the disassembler flags table and more.
2518 """
2519 oInstr = self.ensureInstructionForOpTag(iTagLine);
2520
2521 # Flatten as a space separated list, split it up and validate the values.
2522 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2523 if len(asHints) == 1 and asHints[0].lower() == 'none':
2524 asHints = [];
2525 else:
2526 fRc = True;
2527 for iHint, sHint in enumerate(asHints):
2528 if sHint not in g_kdHints:
2529 if sHint.strip() in g_kdHints:
2530 sHint[iHint] = sHint.strip();
2531 else:
2532 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2533 if not fRc:
2534 return False;
2535
2536 # Append them.
2537 for sHint in asHints:
2538 if sHint not in oInstr.dHints:
2539 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2540 else:
2541 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2542
2543 _ = iEndLine;
2544 return True;
2545
2546 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2547 """
2548 Tag: \@opdisenum
2549 Value: OP_XXXX
2550
2551 This is for select a specific (legacy) disassembler enum value for the
2552 instruction.
2553 """
2554 oInstr = self.ensureInstructionForOpTag(iTagLine);
2555
2556 # Flatten and split.
2557 asWords = self.flattenAllSections(aasSections).split();
2558 if len(asWords) != 1:
2559 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2560 if not asWords:
2561 return False;
2562 sDisEnum = asWords[0];
2563 if not self.oReDisEnum.match(sDisEnum):
2564 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2565 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2566
2567 # Set it.
2568 if oInstr.sDisEnum is not None:
2569 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2570 oInstr.sDisEnum = sDisEnum;
2571
2572 _ = iEndLine;
2573 return True;
2574
2575 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2576 """
2577 Tag: \@opmincpu
2578 Value: <simple CPU name>
2579
2580 Indicates when this instruction was introduced.
2581 """
2582 oInstr = self.ensureInstructionForOpTag(iTagLine);
2583
2584 # Flatten the value, split into words, make sure there's just one, valid it.
2585 asCpus = self.flattenAllSections(aasSections).split();
2586 if len(asCpus) > 1:
2587 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2588
2589 sMinCpu = asCpus[0];
2590 if sMinCpu in g_kdCpuNames:
2591 oInstr.sMinCpu = sMinCpu;
2592 else:
2593 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2594 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2595
2596 # Set it.
2597 if oInstr.sMinCpu is None:
2598 oInstr.sMinCpu = sMinCpu;
2599 elif oInstr.sMinCpu != sMinCpu:
2600 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2601
2602 _ = iEndLine;
2603 return True;
2604
2605 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2606 """
2607 Tag: \@opcpuid
2608 Value: none | <CPUID flag specifier>
2609
2610 CPUID feature bit which is required for the instruction to be present.
2611 """
2612 oInstr = self.ensureInstructionForOpTag(iTagLine);
2613
2614 # Flatten as a space separated list, split it up and validate the values.
2615 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2616 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2617 asCpuIds = [];
2618 else:
2619 fRc = True;
2620 for iCpuId, sCpuId in enumerate(asCpuIds):
2621 if sCpuId not in g_kdCpuIdFlags:
2622 if sCpuId.strip() in g_kdCpuIdFlags:
2623 sCpuId[iCpuId] = sCpuId.strip();
2624 else:
2625 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2626 if not fRc:
2627 return False;
2628
2629 # Append them.
2630 for sCpuId in asCpuIds:
2631 if sCpuId not in oInstr.asCpuIds:
2632 oInstr.asCpuIds.append(sCpuId);
2633 else:
2634 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2635
2636 _ = iEndLine;
2637 return True;
2638
2639 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2640 """
2641 Tag: \@opgroup
2642 Value: op_grp1[_subgrp2[_subsubgrp3]]
2643
2644 Instruction grouping.
2645 """
2646 oInstr = self.ensureInstructionForOpTag(iTagLine);
2647
2648 # Flatten as a space separated list, split it up and validate the values.
2649 asGroups = self.flattenAllSections(aasSections).split();
2650 if len(asGroups) != 1:
2651 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2652 sGroup = asGroups[0];
2653 if not self.oReGroupName.match(sGroup):
2654 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2655 % (sTag, sGroup, self.oReGroupName.pattern));
2656
2657 # Set it.
2658 if oInstr.sGroup is not None:
2659 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2660 oInstr.sGroup = sGroup;
2661
2662 _ = iEndLine;
2663 return True;
2664
2665 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2666 """
2667 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2668 Value: <invalid opcode behaviour style>
2669
2670 The \@opunused indicates the specification is for a currently unused
2671 instruction encoding.
2672
2673 The \@opinvalid indicates the specification is for an invalid currently
2674 instruction encoding (like UD2).
2675
2676 The \@opinvlstyle just indicates how CPUs decode the instruction when
2677 not supported (\@opcpuid, \@opmincpu) or disabled.
2678 """
2679 oInstr = self.ensureInstructionForOpTag(iTagLine);
2680
2681 # Flatten as a space separated list, split it up and validate the values.
2682 asStyles = self.flattenAllSections(aasSections).split();
2683 if len(asStyles) != 1:
2684 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2685 sStyle = asStyles[0];
2686 if sStyle not in g_kdInvalidStyles:
2687 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2688 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2689 # Set it.
2690 if oInstr.sInvalidStyle is not None:
2691 return self.errorComment(iTagLine,
2692 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2693 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2694 oInstr.sInvalidStyle = sStyle;
2695 if sTag == '@opunused':
2696 oInstr.fUnused = True;
2697 elif sTag == '@opinvalid':
2698 oInstr.fInvalid = True;
2699
2700 _ = iEndLine;
2701 return True;
2702
2703 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2704 """
2705 Tag: \@optest
2706 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2707 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2708
2709 The main idea here is to generate basic instruction tests.
2710
2711 The probably simplest way of handling the diverse input, would be to use
2712 it to produce size optimized byte code for a simple interpreter that
2713 modifies the register input and output states.
2714
2715 An alternative to the interpreter would be creating multiple tables,
2716 but that becomes rather complicated wrt what goes where and then to use
2717 them in an efficient manner.
2718 """
2719 oInstr = self.ensureInstructionForOpTag(iTagLine);
2720
2721 #
2722 # Do it section by section.
2723 #
2724 for asSectionLines in aasSections:
2725 #
2726 # Sort the input into outputs, inputs and selector conditions.
2727 #
2728 sFlatSection = self.flattenAllSections([asSectionLines,]);
2729 if not sFlatSection:
2730 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2731 continue;
2732 oTest = InstructionTest(oInstr);
2733
2734 asSelectors = [];
2735 asInputs = [];
2736 asOutputs = [];
2737 asCur = asOutputs;
2738 fRc = True;
2739 asWords = sFlatSection.split();
2740 for iWord in range(len(asWords) - 1, -1, -1):
2741 sWord = asWords[iWord];
2742 # Check for array switchers.
2743 if sWord == '->':
2744 if asCur != asOutputs:
2745 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2746 break;
2747 asCur = asInputs;
2748 elif sWord == '/':
2749 if asCur != asInputs:
2750 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2751 break;
2752 asCur = asSelectors;
2753 else:
2754 asCur.insert(0, sWord);
2755
2756 #
2757 # Validate and add selectors.
2758 #
2759 for sCond in asSelectors:
2760 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2761 oSelector = None;
2762 for sOp in TestSelector.kasCompareOps:
2763 off = sCondExp.find(sOp);
2764 if off >= 0:
2765 sVariable = sCondExp[:off];
2766 sValue = sCondExp[off + len(sOp):];
2767 if sVariable in TestSelector.kdVariables:
2768 if sValue in TestSelector.kdVariables[sVariable]:
2769 oSelector = TestSelector(sVariable, sOp, sValue);
2770 else:
2771 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2772 % ( sTag, sValue, sCond,
2773 TestSelector.kdVariables[sVariable].keys(),));
2774 else:
2775 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2776 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2777 break;
2778 if oSelector is not None:
2779 for oExisting in oTest.aoSelectors:
2780 if oExisting.sVariable == oSelector.sVariable:
2781 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2782 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2783 oTest.aoSelectors.append(oSelector);
2784 else:
2785 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2786
2787 #
2788 # Validate outputs and inputs, adding them to the test as we go along.
2789 #
2790 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2791 asValidFieldKinds = [ 'both', sDesc, ];
2792 for sItem in asItems:
2793 oItem = None;
2794 for sOp in TestInOut.kasOperators:
2795 off = sItem.find(sOp);
2796 if off < 0:
2797 continue;
2798 sField = sItem[:off];
2799 sValueType = sItem[off + len(sOp):];
2800 if sField in TestInOut.kdFields \
2801 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2802 asSplit = sValueType.split(':', 1);
2803 sValue = asSplit[0];
2804 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2805 if sType in TestInOut.kdTypes:
2806 oValid = TestInOut.kdTypes[sType].validate(sValue);
2807 if oValid is True:
2808 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2809 oItem = TestInOut(sField, sOp, sValue, sType);
2810 else:
2811 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2812 % ( sTag, sDesc, sItem, ));
2813 else:
2814 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2815 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2816 else:
2817 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2818 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2819 else:
2820 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2821 % ( sTag, sDesc, sField, sItem,
2822 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
2823 if asVal[1] in asValidFieldKinds]),));
2824 break;
2825 if oItem is not None:
2826 for oExisting in aoDst:
2827 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2828 self.errorComment(iTagLine,
2829 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2830 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2831 aoDst.append(oItem);
2832 else:
2833 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2834
2835 #
2836 # .
2837 #
2838 if fRc:
2839 oInstr.aoTests.append(oTest);
2840 else:
2841 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2842 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2843 % (sTag, asSelectors, asInputs, asOutputs,));
2844
2845 _ = iEndLine;
2846 return True;
2847
2848 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2849 """
2850 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2851 """
2852 oInstr = self.ensureInstructionForOpTag(iTagLine);
2853
2854 iTest = 0;
2855 if sTag[-1] == ']':
2856 iTest = int(sTag[8:-1]);
2857 else:
2858 iTest = int(sTag[7:]);
2859
2860 if iTest != len(oInstr.aoTests):
2861 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2862 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2863
2864 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2865 """
2866 Tag: \@optestign | \@optestignore
2867 Value: <value is ignored>
2868
2869 This is a simple trick to ignore a test while debugging another.
2870
2871 See also \@oponlytest.
2872 """
2873 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2874 return True;
2875
2876 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2877 """
2878 Tag: \@opcopytests
2879 Value: <opstat | function> [..]
2880 Example: \@opcopytests add_Eb_Gb
2881
2882 Trick to avoid duplicating tests for different encodings of the same
2883 operation.
2884 """
2885 oInstr = self.ensureInstructionForOpTag(iTagLine);
2886
2887 # Flatten, validate and append the copy job to the instruction. We execute
2888 # them after parsing all the input so we can handle forward references.
2889 asToCopy = self.flattenAllSections(aasSections).split();
2890 if not asToCopy:
2891 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2892 for sToCopy in asToCopy:
2893 if sToCopy not in oInstr.asCopyTests:
2894 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2895 oInstr.asCopyTests.append(sToCopy);
2896 else:
2897 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2898 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2899 else:
2900 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2901
2902 _ = iEndLine;
2903 return True;
2904
2905 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2906 """
2907 Tag: \@oponlytest | \@oponly
2908 Value: none
2909
2910 Only test instructions with this tag. This is a trick that is handy
2911 for singling out one or two new instructions or tests.
2912
2913 See also \@optestignore.
2914 """
2915 oInstr = self.ensureInstructionForOpTag(iTagLine);
2916
2917 # Validate and add instruction to only test dictionary.
2918 sValue = self.flattenAllSections(aasSections).strip();
2919 if sValue:
2920 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2921
2922 if oInstr not in g_aoOnlyTestInstructions:
2923 g_aoOnlyTestInstructions.append(oInstr);
2924
2925 _ = iEndLine;
2926 return True;
2927
2928 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2929 """
2930 Tag: \@opxcpttype
2931 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2932
2933 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2934 """
2935 oInstr = self.ensureInstructionForOpTag(iTagLine);
2936
2937 # Flatten as a space separated list, split it up and validate the values.
2938 asTypes = self.flattenAllSections(aasSections).split();
2939 if len(asTypes) != 1:
2940 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2941 sType = asTypes[0];
2942 if sType not in g_kdXcptTypes:
2943 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2944 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2945 # Set it.
2946 if oInstr.sXcptType is not None:
2947 return self.errorComment(iTagLine,
2948 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2949 % ( sTag, oInstr.sXcptType, sType,));
2950 oInstr.sXcptType = sType;
2951
2952 _ = iEndLine;
2953 return True;
2954
2955 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2956 """
2957 Tag: \@opfunction
2958 Value: <VMM function name>
2959
2960 This is for explicitly setting the IEM function name. Normally we pick
2961 this up from the FNIEMOP_XXX macro invocation after the description, or
2962 generate it from the mnemonic and operands.
2963
2964 It it thought it maybe necessary to set it when specifying instructions
2965 which implementation isn't following immediately or aren't implemented yet.
2966 """
2967 oInstr = self.ensureInstructionForOpTag(iTagLine);
2968
2969 # Flatten and validate the value.
2970 sFunction = self.flattenAllSections(aasSections);
2971 if not self.oReFunctionName.match(sFunction):
2972 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2973 % (sTag, sFunction, self.oReFunctionName.pattern));
2974
2975 if oInstr.sFunction is not None:
2976 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2977 % (sTag, oInstr.sFunction, sFunction,));
2978 oInstr.sFunction = sFunction;
2979
2980 _ = iEndLine;
2981 return True;
2982
2983 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2984 """
2985 Tag: \@opstats
2986 Value: <VMM statistics base name>
2987
2988 This is for explicitly setting the statistics name. Normally we pick
2989 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2990 the mnemonic and operands.
2991
2992 It it thought it maybe necessary to set it when specifying instructions
2993 which implementation isn't following immediately or aren't implemented yet.
2994 """
2995 oInstr = self.ensureInstructionForOpTag(iTagLine);
2996
2997 # Flatten and validate the value.
2998 sStats = self.flattenAllSections(aasSections);
2999 if not self.oReStatsName.match(sStats):
3000 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
3001 % (sTag, sStats, self.oReStatsName.pattern));
3002
3003 if oInstr.sStats is not None:
3004 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
3005 % (sTag, oInstr.sStats, sStats,));
3006 oInstr.sStats = sStats;
3007
3008 _ = iEndLine;
3009 return True;
3010
3011 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
3012 """
3013 Tag: \@opdone
3014 Value: none
3015
3016 Used to explictily flush the instructions that have been specified.
3017 """
3018 sFlattened = self.flattenAllSections(aasSections);
3019 if sFlattened != '':
3020 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
3021 _ = sTag; _ = iEndLine;
3022 return self.doneInstructions();
3023
3024 ## @}
3025
3026
3027 def parseComment(self):
3028 """
3029 Parse the current comment (self.sComment).
3030
3031 If it's a opcode specifiying comment, we reset the macro stuff.
3032 """
3033 #
3034 # Reject if comment doesn't seem to contain anything interesting.
3035 #
3036 if self.sComment.find('Opcode') < 0 \
3037 and self.sComment.find('@') < 0:
3038 return False;
3039
3040 #
3041 # Split the comment into lines, removing leading asterisks and spaces.
3042 # Also remove leading and trailing empty lines.
3043 #
3044 asLines = self.sComment.split('\n');
3045 for iLine, sLine in enumerate(asLines):
3046 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
3047
3048 while asLines and not asLines[0]:
3049 self.iCommentLine += 1;
3050 asLines.pop(0);
3051
3052 while asLines and not asLines[-1]:
3053 asLines.pop(len(asLines) - 1);
3054
3055 #
3056 # Check for old style: Opcode 0x0f 0x12
3057 #
3058 if asLines[0].startswith('Opcode '):
3059 self.parseCommentOldOpcode(asLines);
3060
3061 #
3062 # Look for @op* tagged data.
3063 #
3064 cOpTags = 0;
3065 sFlatDefault = None;
3066 sCurTag = '@default';
3067 iCurTagLine = 0;
3068 asCurSection = [];
3069 aasSections = [ asCurSection, ];
3070 for iLine, sLine in enumerate(asLines):
3071 if not sLine.startswith('@'):
3072 if sLine:
3073 asCurSection.append(sLine);
3074 elif asCurSection:
3075 asCurSection = [];
3076 aasSections.append(asCurSection);
3077 else:
3078 #
3079 # Process the previous tag.
3080 #
3081 if not asCurSection and len(aasSections) > 1:
3082 aasSections.pop(-1);
3083 if sCurTag in self.dTagHandlers:
3084 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3085 cOpTags += 1;
3086 elif sCurTag.startswith('@op'):
3087 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3088 elif sCurTag == '@default':
3089 sFlatDefault = self.flattenAllSections(aasSections);
3090 elif '@op' + sCurTag[1:] in self.dTagHandlers:
3091 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
3092 elif sCurTag in ['@encoding', '@opencoding']:
3093 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
3094
3095 #
3096 # New tag.
3097 #
3098 asSplit = sLine.split(None, 1);
3099 sCurTag = asSplit[0].lower();
3100 if len(asSplit) > 1:
3101 asCurSection = [asSplit[1],];
3102 else:
3103 asCurSection = [];
3104 aasSections = [asCurSection, ];
3105 iCurTagLine = iLine;
3106
3107 #
3108 # Process the final tag.
3109 #
3110 if not asCurSection and len(aasSections) > 1:
3111 aasSections.pop(-1);
3112 if sCurTag in self.dTagHandlers:
3113 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3114 cOpTags += 1;
3115 elif sCurTag.startswith('@op'):
3116 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3117 elif sCurTag == '@default':
3118 sFlatDefault = self.flattenAllSections(aasSections);
3119
3120 #
3121 # Don't allow default text in blocks containing @op*.
3122 #
3123 if cOpTags > 0 and sFlatDefault:
3124 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
3125
3126 return True;
3127
3128 def parseMacroInvocation(self, sInvocation):
3129 """
3130 Parses a macro invocation.
3131
3132 Returns a tuple, first element is the offset following the macro
3133 invocation. The second element is a list of macro arguments, where the
3134 zero'th is the macro name.
3135 """
3136 # First the name.
3137 offOpen = sInvocation.find('(');
3138 if offOpen <= 0:
3139 self.raiseError("macro invocation open parenthesis not found");
3140 sName = sInvocation[:offOpen].strip();
3141 if not self.oReMacroName.match(sName):
3142 return self.error("invalid macro name '%s'" % (sName,));
3143 asRet = [sName, ];
3144
3145 # Arguments.
3146 iLine = self.iLine;
3147 cDepth = 1;
3148 off = offOpen + 1;
3149 offStart = off;
3150 chQuote = None;
3151 while cDepth > 0:
3152 if off >= len(sInvocation):
3153 if iLine >= len(self.asLines):
3154 self.error('macro invocation beyond end of file');
3155 return (off, asRet);
3156 sInvocation += self.asLines[iLine];
3157 iLine += 1;
3158 ch = sInvocation[off];
3159
3160 if chQuote:
3161 if ch == '\\' and off + 1 < len(sInvocation):
3162 off += 1;
3163 elif ch == chQuote:
3164 chQuote = None;
3165 elif ch in ('"', '\'',):
3166 chQuote = ch;
3167 elif ch in (',', ')',):
3168 if cDepth == 1:
3169 asRet.append(sInvocation[offStart:off].strip());
3170 offStart = off + 1;
3171 if ch == ')':
3172 cDepth -= 1;
3173 elif ch == '(':
3174 cDepth += 1;
3175 off += 1;
3176
3177 return (off, asRet);
3178
3179 def findAndParseMacroInvocationEx(self, sCode, sMacro):
3180 """
3181 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
3182 """
3183 offHit = sCode.find(sMacro);
3184 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
3185 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
3186 return (offHit + offAfter, asRet);
3187 return (len(sCode), None);
3188
3189 def findAndParseMacroInvocation(self, sCode, sMacro):
3190 """
3191 Returns None if not found, arguments as per parseMacroInvocation if found.
3192 """
3193 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
3194
3195 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
3196 """
3197 Returns same as findAndParseMacroInvocation.
3198 """
3199 for sMacro in asMacro:
3200 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
3201 if asRet is not None:
3202 return asRet;
3203 return None;
3204
3205 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
3206 sDisHints, sIemHints, asOperands):
3207 """
3208 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
3209 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
3210 """
3211 #
3212 # Some invocation checks.
3213 #
3214 if sUpper != sUpper.upper():
3215 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
3216 if sLower != sLower.lower():
3217 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
3218 if sUpper.lower() != sLower:
3219 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
3220 if not self.oReMnemonic.match(sLower):
3221 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
3222
3223 #
3224 # Check if sIemHints tells us to not consider this macro invocation.
3225 #
3226 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
3227 return True;
3228
3229 # Apply to the last instruction only for now.
3230 if not self.aoCurInstrs:
3231 self.addInstruction();
3232 oInstr = self.aoCurInstrs[-1];
3233 if oInstr.iLineMnemonicMacro == -1:
3234 oInstr.iLineMnemonicMacro = self.iLine;
3235 else:
3236 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
3237 % (sMacro, oInstr.iLineMnemonicMacro,));
3238
3239 # Mnemonic
3240 if oInstr.sMnemonic is None:
3241 oInstr.sMnemonic = sLower;
3242 elif oInstr.sMnemonic != sLower:
3243 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
3244
3245 # Process operands.
3246 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
3247 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
3248 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
3249 for iOperand, sType in enumerate(asOperands):
3250 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
3251 if sWhere is None:
3252 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
3253 if iOperand < len(oInstr.aoOperands): # error recovery.
3254 sWhere = oInstr.aoOperands[iOperand].sWhere;
3255 sType = oInstr.aoOperands[iOperand].sType;
3256 else:
3257 sWhere = 'reg';
3258 sType = 'Gb';
3259 if iOperand == len(oInstr.aoOperands):
3260 oInstr.aoOperands.append(Operand(sWhere, sType))
3261 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
3262 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
3263 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
3264 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
3265
3266 # Encoding.
3267 if sForm not in g_kdIemForms:
3268 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
3269 else:
3270 if oInstr.sEncoding is None:
3271 oInstr.sEncoding = g_kdIemForms[sForm][0];
3272 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3273 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3274 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3275
3276 # Check the parameter locations for the encoding.
3277 if g_kdIemForms[sForm][1] is not None:
3278 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3279 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3280 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3281 else:
3282 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3283 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3284 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3285 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3286 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
3287 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
3288 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
3289 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
3290 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
3291 or sForm.replace('VEX','').find('V') < 0) ):
3292 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
3293 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
3294
3295 # Check @opcodesub
3296 if oInstr.sSubOpcode \
3297 and g_kdIemForms[sForm][2] \
3298 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
3299 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
3300 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
3301
3302 # Stats.
3303 if not self.oReStatsName.match(sStats):
3304 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3305 elif oInstr.sStats is None:
3306 oInstr.sStats = sStats;
3307 elif oInstr.sStats != sStats:
3308 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3309 % (sMacro, oInstr.sStats, sStats,));
3310
3311 # Process the hints (simply merge with @ophints w/o checking anything).
3312 for sHint in sDisHints.split('|'):
3313 sHint = sHint.strip();
3314 if sHint.startswith('DISOPTYPE_'):
3315 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3316 if sShortHint in g_kdHints:
3317 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3318 else:
3319 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3320 elif sHint != '0':
3321 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3322
3323 for sHint in sIemHints.split('|'):
3324 sHint = sHint.strip();
3325 if sHint.startswith('IEMOPHINT_'):
3326 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3327 if sShortHint in g_kdHints:
3328 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3329 else:
3330 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3331 elif sHint != '0':
3332 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3333
3334 _ = sAsm;
3335 return True;
3336
3337 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3338 """
3339 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3340 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3341 """
3342 if not asOperands:
3343 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3344 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3345 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3346
3347 def checkCodeForMacro(self, sCode):
3348 """
3349 Checks code for relevant macro invocation.
3350 """
3351 #
3352 # Scan macro invocations.
3353 #
3354 if sCode.find('(') > 0:
3355 # Look for instruction decoder function definitions. ASSUME single line.
3356 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3357 [ 'FNIEMOP_DEF',
3358 'FNIEMOP_STUB',
3359 'FNIEMOP_STUB_1',
3360 'FNIEMOP_UD_STUB',
3361 'FNIEMOP_UD_STUB_1' ]);
3362 if asArgs is not None:
3363 sFunction = asArgs[1];
3364
3365 if not self.aoCurInstrs:
3366 self.addInstruction();
3367 for oInstr in self.aoCurInstrs:
3368 if oInstr.iLineFnIemOpMacro == -1:
3369 oInstr.iLineFnIemOpMacro = self.iLine;
3370 else:
3371 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3372 self.setInstrunctionAttrib('sFunction', sFunction);
3373 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3374 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3375 if asArgs[0].find('STUB') > 0:
3376 self.doneInstructions();
3377 return True;
3378
3379 # IEMOP_HLP_DONE_VEX_DECODING_*
3380 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3381 [ 'IEMOP_HLP_DONE_VEX_DECODING',
3382 'IEMOP_HLP_DONE_VEX_DECODING_L0',
3383 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
3384 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
3385 ]);
3386 if asArgs is not None:
3387 sMacro = asArgs[0];
3388 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
3389 for oInstr in self.aoCurInstrs:
3390 if 'vex_l_zero' not in oInstr.dHints:
3391 if oInstr.iLineMnemonicMacro >= 0:
3392 self.errorOnLine(oInstr.iLineMnemonicMacro,
3393 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
3394 oInstr.dHints['vex_l_zero'] = True;
3395 return True;
3396
3397 #
3398 # IEMOP_MNEMONIC*
3399 #
3400
3401 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3402 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3403 if asArgs is not None:
3404 if len(self.aoCurInstrs) == 1:
3405 oInstr = self.aoCurInstrs[0];
3406 if oInstr.sStats is None:
3407 oInstr.sStats = asArgs[1];
3408 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3409
3410 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3411 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3412 if asArgs is not None:
3413 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3414 []);
3415 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3416 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3417 if asArgs is not None:
3418 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3419 [asArgs[6],]);
3420 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3421 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3422 if asArgs is not None:
3423 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3424 [asArgs[6], asArgs[7]]);
3425 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3426 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3427 if asArgs is not None:
3428 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3429 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3430 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3431 # a_fIemHints)
3432 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3433 if asArgs is not None:
3434 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3435 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3436
3437 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3438 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3439 if asArgs is not None:
3440 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3441 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3442 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3443 if asArgs is not None:
3444 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3445 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3446 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3447 if asArgs is not None:
3448 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3449 [asArgs[4], asArgs[5],]);
3450 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3451 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3452 if asArgs is not None:
3453 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3454 [asArgs[4], asArgs[5], asArgs[6],]);
3455 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3456 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3457 if asArgs is not None:
3458 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3459 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3460
3461 return False;
3462
3463
3464 def parse(self):
3465 """
3466 Parses the given file.
3467 Returns number or errors.
3468 Raises exception on fatal trouble.
3469 """
3470 #self.debug('Parsing %s' % (self.sSrcFile,));
3471
3472 while self.iLine < len(self.asLines):
3473 sLine = self.asLines[self.iLine];
3474 self.iLine += 1;
3475
3476 # We only look for comments, so only lines with a slash might possibly
3477 # influence the parser state.
3478 offSlash = sLine.find('/');
3479 if offSlash >= 0:
3480 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3481 offLine = 0;
3482 while offLine < len(sLine):
3483 if self.iState == self.kiCode:
3484 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3485 if offHit >= 0:
3486 self.checkCodeForMacro(sLine[offLine:offHit]);
3487 self.sComment = '';
3488 self.iCommentLine = self.iLine;
3489 self.iState = self.kiCommentMulti;
3490 offLine = offHit + 2;
3491 else:
3492 self.checkCodeForMacro(sLine[offLine:]);
3493 offLine = len(sLine);
3494
3495 elif self.iState == self.kiCommentMulti:
3496 offHit = sLine.find('*/', offLine);
3497 if offHit >= 0:
3498 self.sComment += sLine[offLine:offHit];
3499 self.iState = self.kiCode;
3500 offLine = offHit + 2;
3501 self.parseComment();
3502 else:
3503 self.sComment += sLine[offLine:];
3504 offLine = len(sLine);
3505 else:
3506 assert False;
3507 # C++ line comment.
3508 elif offSlash > 0:
3509 self.checkCodeForMacro(sLine[:offSlash]);
3510
3511 # No slash, but append the line if in multi-line comment.
3512 elif self.iState == self.kiCommentMulti:
3513 #self.debug('line %d: multi' % (self.iLine,));
3514 self.sComment += sLine;
3515
3516 # No slash, but check code line for relevant macro.
3517 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3518 #self.debug('line %d: macro' % (self.iLine,));
3519 self.checkCodeForMacro(sLine);
3520
3521 # If the line is a '}' in the first position, complete the instructions.
3522 elif self.iState == self.kiCode and sLine[0] == '}':
3523 #self.debug('line %d: }' % (self.iLine,));
3524 self.doneInstructions();
3525
3526 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
3527 # so we can check/add @oppfx info from it.
3528 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
3529 self.parseFunctionTable(sLine);
3530
3531 self.doneInstructions();
3532 self.debug('%3s stubs out of %3s instructions in %s'
3533 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3534 return self.printErrors();
3535
3536
3537def __parseFileByName(sSrcFile, sDefaultMap):
3538 """
3539 Parses one source file for instruction specfications.
3540 """
3541 #
3542 # Read sSrcFile into a line array.
3543 #
3544 try:
3545 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with
3546 except Exception as oXcpt:
3547 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3548 try:
3549 asLines = oFile.readlines();
3550 except Exception as oXcpt:
3551 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3552 finally:
3553 oFile.close();
3554
3555 #
3556 # Do the parsing.
3557 #
3558 try:
3559 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3560 except ParserException as oXcpt:
3561 print(str(oXcpt));
3562 raise;
3563
3564 return cErrors;
3565
3566
3567def __doTestCopying():
3568 """
3569 Executes the asCopyTests instructions.
3570 """
3571 asErrors = [];
3572 for oDstInstr in g_aoAllInstructions:
3573 if oDstInstr.asCopyTests:
3574 for sSrcInstr in oDstInstr.asCopyTests:
3575 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3576 if oSrcInstr:
3577 aoSrcInstrs = [oSrcInstr,];
3578 else:
3579 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3580 if aoSrcInstrs:
3581 for oSrcInstr in aoSrcInstrs:
3582 if oSrcInstr != oDstInstr:
3583 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3584 else:
3585 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3586 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3587 else:
3588 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3589 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3590
3591 if asErrors:
3592 sys.stderr.write(u''.join(asErrors));
3593 return len(asErrors);
3594
3595
3596def __applyOnlyTest():
3597 """
3598 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3599 all other instructions so that only these get tested.
3600 """
3601 if g_aoOnlyTestInstructions:
3602 for oInstr in g_aoAllInstructions:
3603 if oInstr.aoTests:
3604 if oInstr not in g_aoOnlyTestInstructions:
3605 oInstr.aoTests = [];
3606 return 0;
3607
3608def __parseAll():
3609 """
3610 Parses all the IEMAllInstruction*.cpp.h files.
3611
3612 Raises exception on failure.
3613 """
3614 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3615 cErrors = 0;
3616 for sDefaultMap, sName in [
3617 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3618 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3619 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3620 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3621 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3622 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3623 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3624 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3625 ]:
3626 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3627 cErrors += __doTestCopying();
3628 cErrors += __applyOnlyTest();
3629
3630 if cErrors != 0:
3631 #raise Exception('%d parse errors' % (cErrors,));
3632 sys.exit(1);
3633 return True;
3634
3635
3636
3637__parseAll();
3638
3639
3640#
3641# Generators (may perhaps move later).
3642#
3643def __formatDisassemblerTableEntry(oInstr):
3644 """
3645 """
3646 sMacro = 'OP';
3647 cMaxOperands = 3;
3648 if len(oInstr.aoOperands) > 3:
3649 sMacro = 'OPVEX'
3650 cMaxOperands = 4;
3651 assert len(oInstr.aoOperands) <= cMaxOperands;
3652
3653 #
3654 # Format string.
3655 #
3656 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3657 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3658 sTmp += ' ' if iOperand == 0 else ',';
3659 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3660 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3661 else:
3662 sTmp += g_kdOpTypes[oOperand.sType][2];
3663 sTmp += '",';
3664 asColumns = [ sTmp, ];
3665
3666 #
3667 # Decoders.
3668 #
3669 iStart = len(asColumns);
3670 if oInstr.sEncoding is None:
3671 pass;
3672 elif oInstr.sEncoding == 'ModR/M':
3673 # ASSUME the first operand is using the ModR/M encoding
3674 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3675 asColumns.append('IDX_ParseModRM,');
3676 elif oInstr.sEncoding in [ 'prefix', ]:
3677 for oOperand in oInstr.aoOperands:
3678 asColumns.append('0,');
3679 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
3680 pass;
3681 elif oInstr.sEncoding == 'VEX.ModR/M':
3682 asColumns.append('IDX_ParseModRM,');
3683 elif oInstr.sEncoding == 'vex2':
3684 asColumns.append('IDX_ParseVex2b,')
3685 elif oInstr.sEncoding == 'vex3':
3686 asColumns.append('IDX_ParseVex3b,')
3687 elif oInstr.sEncoding in g_dInstructionMaps:
3688 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3689 else:
3690 ## @todo
3691 #IDX_ParseTwoByteEsc,
3692 #IDX_ParseGrp1,
3693 #IDX_ParseShiftGrp2,
3694 #IDX_ParseGrp3,
3695 #IDX_ParseGrp4,
3696 #IDX_ParseGrp5,
3697 #IDX_Parse3DNow,
3698 #IDX_ParseGrp6,
3699 #IDX_ParseGrp7,
3700 #IDX_ParseGrp8,
3701 #IDX_ParseGrp9,
3702 #IDX_ParseGrp10,
3703 #IDX_ParseGrp12,
3704 #IDX_ParseGrp13,
3705 #IDX_ParseGrp14,
3706 #IDX_ParseGrp15,
3707 #IDX_ParseGrp16,
3708 #IDX_ParseThreeByteEsc4,
3709 #IDX_ParseThreeByteEsc5,
3710 #IDX_ParseModFence,
3711 #IDX_ParseEscFP,
3712 #IDX_ParseNopPause,
3713 #IDX_ParseInvOpModRM,
3714 assert False, str(oInstr);
3715
3716 # Check for immediates and stuff in the remaining operands.
3717 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3718 sIdx = g_kdOpTypes[oOperand.sType][0];
3719 #if sIdx != 'IDX_UseModRM':
3720 asColumns.append(sIdx + ',');
3721 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3722
3723 #
3724 # Opcode and operands.
3725 #
3726 assert oInstr.sDisEnum, str(oInstr);
3727 asColumns.append(oInstr.sDisEnum + ',');
3728 iStart = len(asColumns)
3729 for oOperand in oInstr.aoOperands:
3730 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3731 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3732
3733 #
3734 # Flags.
3735 #
3736 sTmp = '';
3737 for sHint in sorted(oInstr.dHints.keys()):
3738 sDefine = g_kdHints[sHint];
3739 if sDefine.startswith('DISOPTYPE_'):
3740 if sTmp:
3741 sTmp += ' | ' + sDefine;
3742 else:
3743 sTmp += sDefine;
3744 if sTmp:
3745 sTmp += '),';
3746 else:
3747 sTmp += '0),';
3748 asColumns.append(sTmp);
3749
3750 #
3751 # Format the columns into a line.
3752 #
3753 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3754 sLine = '';
3755 for i, s in enumerate(asColumns):
3756 if len(sLine) < aoffColumns[i]:
3757 sLine += ' ' * (aoffColumns[i] - len(sLine));
3758 else:
3759 sLine += ' ';
3760 sLine += s;
3761
3762 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3763 # DISOPTYPE_HARMLESS),
3764 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3765 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3766 return sLine;
3767
3768def __checkIfShortTable(aoTableOrdered, oMap):
3769 """
3770 Returns (iInstr, cInstructions, fShortTable)
3771 """
3772
3773 # Determin how much we can trim off.
3774 cInstructions = len(aoTableOrdered);
3775 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
3776 cInstructions -= 1;
3777
3778 iInstr = 0;
3779 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
3780 iInstr += 1;
3781
3782 # If we can save more than 30%, we go for the short table version.
3783 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
3784 return (iInstr, cInstructions, True);
3785 _ = oMap; # Use this for overriding.
3786
3787 # Output the full table.
3788 return (0, len(aoTableOrdered), False);
3789
3790def generateDisassemblerTables(oDstFile = sys.stdout):
3791 """
3792 Generates disassembler tables.
3793 """
3794
3795 #
3796 # The disassembler uses a slightly different table layout to save space,
3797 # since several of the prefix varia
3798 #
3799 aoDisasmMaps = [];
3800 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3801 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3802 if oMap.sSelector != 'byte+pfx':
3803 aoDisasmMaps.append(oMap);
3804 else:
3805 # Split the map by prefix.
3806 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
3807 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
3808 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
3809 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
3810
3811 #
3812 # Dump each map.
3813 #
3814 asHeaderLines = [];
3815 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),));
3816 for oMap in aoDisasmMaps:
3817 sName = oMap.sName;
3818
3819 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
3820
3821 #
3822 # Get the instructions for the map and see if we can do a short version or not.
3823 #
3824 aoTableOrder = oMap.getInstructionsInTableOrder();
3825 cEntriesPerByte = oMap.getEntriesPerByte();
3826 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
3827
3828 #
3829 # Output the table start.
3830 # Note! Short tables are static and only accessible via the map range record.
3831 #
3832 asLines = [];
3833 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3834 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3835 if fShortTable:
3836 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
3837 else:
3838 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3839 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3840 asLines.append('{');
3841
3842 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
3843 asLines.append(' /* %#04x: */' % (iInstrStart,));
3844
3845 #
3846 # Output the instructions.
3847 #
3848 iInstr = iInstrStart;
3849 while iInstr < iInstrEnd:
3850 oInstr = aoTableOrder[iInstr];
3851 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
3852 if iInstr != iInstrStart:
3853 asLines.append('');
3854 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
3855
3856 if oInstr is None:
3857 # Invalid. Optimize blocks of invalid instructions.
3858 cInvalidInstrs = 1;
3859 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
3860 cInvalidInstrs += 1;
3861 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
3862 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
3863 iInstr += 0x10 * cEntriesPerByte - 1;
3864 elif cEntriesPerByte > 1:
3865 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
3866 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
3867 iInstr += 3;
3868 else:
3869 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
3870 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
3871 else:
3872 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
3873 elif isinstance(oInstr, list):
3874 if len(oInstr) != 0:
3875 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
3876 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
3877 else:
3878 asLines.append(__formatDisassemblerTableEntry(oInstr));
3879 else:
3880 asLines.append(__formatDisassemblerTableEntry(oInstr));
3881
3882 iInstr += 1;
3883
3884 if iInstrStart >= iInstrEnd:
3885 asLines.append(' /* dummy */ INVALID_OPCODE');
3886
3887 asLines.append('};');
3888 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3889
3890 #
3891 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
3892 #
3893 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
3894 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
3895 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
3896
3897 #
3898 # Write out the lines.
3899 #
3900 oDstFile.write('\n'.join(asLines));
3901 oDstFile.write('\n');
3902 oDstFile.write('\n');
3903 #break; #for now
3904
3905if __name__ == '__main__':
3906 generateDisassemblerTables();
3907
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette