VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 95167

Last change on this file since 95167 was 95167, checked in by vboxsync, 3 years ago

VMM/IEM,DIS: Refreshed my memory on generating disassembler tables from IEM, making some related VEX table+code optimizations (saves a few Ks of data and some code bytes too). bugref:6251

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 166.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 95167 2022-06-01 19:38:29Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017-2022 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.215389.xyz. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 95167 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208## - 4: IEM form matching instruction.
209##
210## Note! See the A.2.1 in SDM vol 2 for the type names.
211g_kdOpTypes = {
212 # Fixed addresses
213 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
214
215 # ModR/M.rm
216 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
217 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
218 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
219 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
220 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
221 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
222 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
223 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
224 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
225 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
226 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
227 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
228 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
229 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
230 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
231 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
232 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
233 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
234 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
235 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
236 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
237 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
238 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
239 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
240 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
241
242 # ModR/M.rm - register only.
243 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
244 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
245 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
246 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
247 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
248 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
249 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
250
251 # ModR/M.rm - memory only.
252 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
253 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
254 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
255 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
256 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
257 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
258 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
259 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
260 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
261 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
262 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
263 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
264 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
265 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
266 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
267
268 # ModR/M.reg
269 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
270 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
271 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
272 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
273 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
274 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
275 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
276 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
277 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
278 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
279 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
280 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
281 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
282 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
283 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
284 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
285 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
286 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
287 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
288 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
289 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
290 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
291 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
292 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
293 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
294 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
295 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
296 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
297 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
298 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
299 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
300
301 # VEX.vvvv
302 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
303 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
304 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
305 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
306
307 # Immediate values.
308 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
309 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
310 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
311 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
312 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
313 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
314
315 # Address operands (no ModR/M).
316 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
317 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
318
319 # Relative jump targets
320 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
321 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
322
323 # DS:rSI
324 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
325 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
326 # ES:rDI
327 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
328 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
329
330 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
331
332 # Fixed registers.
333 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
334 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
335 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
336 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
337 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
338 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
339 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
340 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
341};
342
343# IDX_ParseFixedReg
344# IDX_ParseVexDest
345
346
347## IEMFORM_XXX mappings.
348g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
349 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
350 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
351 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
352 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
353 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
354 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
355 'M': ( 'ModR/M', [ 'rm', ], '', ),
356 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
357 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
358 'R': ( 'ModR/M', [ 'reg', ], '', ),
359
360 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
361 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
362 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
363 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
364 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
365 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
366 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
367 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
368 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
369 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
370 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
371 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
372 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
373 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
374 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
375 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
376 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
377 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
378 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
379
380 'FIXED': ( 'fixed', None, '', ),
381};
382
383## \@oppfx values.
384g_kdPrefixes = {
385 'none': [],
386 '0x66': [],
387 '0xf3': [],
388 '0xf2': [],
389};
390
391## Special \@opcode tag values.
392g_kdSpecialOpcodes = {
393 '/reg': [],
394 'mr/reg': [],
395 '11 /reg': [],
396 '!11 /reg': [],
397 '11 mr/reg': [],
398 '!11 mr/reg': [],
399};
400
401## Special \@opcodesub tag values.
402## The first value is the real value for aliases.
403## The second value is for bs3cg1.
404g_kdSubOpcodes = {
405 'none': [ None, '', ],
406 '11 mr/reg': [ '11 mr/reg', '', ],
407 '11': [ '11 mr/reg', '', ], ##< alias
408 '!11 mr/reg': [ '!11 mr/reg', '', ],
409 '!11': [ '!11 mr/reg', '', ], ##< alias
410 'rex.w=0': [ 'rex.w=0', 'WZ', ],
411 'w=0': [ 'rex.w=0', '', ], ##< alias
412 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
413 'w=1': [ 'rex.w=1', '', ], ##< alias
414 'vex.l=0': [ 'vex.l=0', 'L0', ],
415 'vex.l=1': [ 'vex.l=0', 'L1', ],
416 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
417 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
418 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
419 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
420};
421
422## Valid values for \@openc
423g_kdEncodings = {
424 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
425 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
426 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
427 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
428 'prefix': [ None, ], ##< Prefix
429};
430
431## \@opunused, \@opinvalid, \@opinvlstyle
432g_kdInvalidStyles = {
433 'immediate': [], ##< CPU stops decoding immediately after the opcode.
434 'vex.modrm': [], ##< VEX+ModR/M, everyone.
435 'intel-modrm': [], ##< Intel decodes ModR/M.
436 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
437 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
438 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
439};
440
441g_kdCpuNames = {
442 '8086': (),
443 '80186': (),
444 '80286': (),
445 '80386': (),
446 '80486': (),
447};
448
449## \@opcpuid
450g_kdCpuIdFlags = {
451 'vme': 'X86_CPUID_FEATURE_EDX_VME',
452 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
453 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
454 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
455 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
456 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
457 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
458 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
459 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
460 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
461 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
462 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
463 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
464 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
465 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
466 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
467 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
468 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
469 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
470 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
471 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
472 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
473 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
474 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
475 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
476 'aes': 'X86_CPUID_FEATURE_ECX_AES',
477 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
478 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
479 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
480 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
481 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
482
483 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
484 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
485 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
486 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
487 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
488 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
489 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
490 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
491 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
492 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
493};
494
495## \@ophints values.
496g_kdHints = {
497 'invalid': 'DISOPTYPE_INVALID', ##<
498 'harmless': 'DISOPTYPE_HARMLESS', ##<
499 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
500 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
501 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
502 'portio': 'DISOPTYPE_PORTIO', ##<
503 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
504 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
505 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
506 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
507 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
508 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
509 'illegal': 'DISOPTYPE_ILLEGAL', ##<
510 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
511 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
512 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
513 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
514 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
515 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
516 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
517 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
518 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
519 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
520 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
521 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
522 ## (only in 16 & 32 bits mode!)
523 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
524 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
525 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
526 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
527 'ignores_rexw': '', ##< Ignores REX.W.
528 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
529 'vex_l_zero': '', ##< VEX.L must be 0.
530 'vex_l_ignored': '', ##< VEX.L is ignored.
531 'lock_allowed': '', ##< Lock prefix allowed.
532};
533
534## \@opxcpttype values (see SDMv2 2.4, 2.7).
535g_kdXcptTypes = {
536 'none': [],
537 '1': [],
538 '2': [],
539 '3': [],
540 '4': [],
541 '4UA': [],
542 '5': [],
543 '5LZ': [], # LZ = VEX.L must be zero.
544 '6': [],
545 '7': [],
546 '7LZ': [],
547 '8': [],
548 '11': [],
549 '12': [],
550 'E1': [],
551 'E1NF': [],
552 'E2': [],
553 'E3': [],
554 'E3NF': [],
555 'E4': [],
556 'E4NF': [],
557 'E5': [],
558 'E5NF': [],
559 'E6': [],
560 'E6NF': [],
561 'E7NF': [],
562 'E9': [],
563 'E9NF': [],
564 'E10': [],
565 'E11': [],
566 'E12': [],
567 'E12NF': [],
568};
569
570
571def _isValidOpcodeByte(sOpcode):
572 """
573 Checks if sOpcode is a valid lower case opcode byte.
574 Returns true/false.
575 """
576 if len(sOpcode) == 4:
577 if sOpcode[:2] == '0x':
578 if sOpcode[2] in '0123456789abcdef':
579 if sOpcode[3] in '0123456789abcdef':
580 return True;
581 return False;
582
583
584class InstructionMap(object):
585 """
586 Instruction map.
587
588 The opcode map provides the lead opcode bytes (empty for the one byte
589 opcode map). An instruction can be member of multiple opcode maps as long
590 as it uses the same opcode value within the map (because of VEX).
591 """
592
593 kdEncodings = {
594 'legacy': [],
595 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
596 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
597 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
598 'xop8': [], ##< XOP prefix with vvvvv = 8
599 'xop9': [], ##< XOP prefix with vvvvv = 9
600 'xop10': [], ##< XOP prefix with vvvvv = 10
601 };
602 ## Selectors.
603 ## 1. The first value is the number of table entries required by a
604 ## decoder or disassembler for this type of selector.
605 ## 2. The second value is how many entries per opcode byte if applicable.
606 kdSelectors = {
607 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
608 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
609 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
610 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
611 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
612 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
613 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
614 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
615 };
616
617 ## Define the subentry number according to the Instruction::sPrefix
618 ## value for 'byte+pfx' selected tables.
619 kiPrefixOrder = {
620 'none': 0,
621 '0x66': 1,
622 '0xf3': 2,
623 '0xf2': 3,
624 };
625
626 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
627 sEncoding = 'legacy', sDisParse = None):
628 assert sSelector in self.kdSelectors;
629 assert sEncoding in self.kdEncodings;
630 if asLeadOpcodes is None:
631 asLeadOpcodes = [];
632 else:
633 for sOpcode in asLeadOpcodes:
634 assert _isValidOpcodeByte(sOpcode);
635 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
636
637 self.sName = sName;
638 self.sIemName = sIemName;
639 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
640 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
641 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
642 self.aoInstructions = [] # type: Instruction
643 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
644
645 def copy(self, sNewName, sPrefixFilter = None):
646 """
647 Copies the table with filtering instruction by sPrefix if not None.
648 """
649 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
650 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
651 else self.sSelector,
652 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
653 if sPrefixFilter is None:
654 oCopy.aoInstructions = list(self.aoInstructions);
655 else:
656 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
657 return oCopy;
658
659 def getTableSize(self):
660 """
661 Number of table entries. This corresponds directly to the selector.
662 """
663 return self.kdSelectors[self.sSelector][0];
664
665 def getEntriesPerByte(self):
666 """
667 Number of table entries per opcode bytes.
668
669 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
670 the others it will just return 1.
671 """
672 return self.kdSelectors[self.sSelector][1];
673
674 def getInstructionIndex(self, oInstr):
675 """
676 Returns the table index for the instruction.
677 """
678 bOpcode = oInstr.getOpcodeByte();
679
680 # The byte selectors are simple. We need a full opcode byte and need just return it.
681 if self.sSelector == 'byte':
682 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
683 return bOpcode;
684
685 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
686 if self.sSelector == 'byte+pfx':
687 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
688 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
689 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
690
691 # The other selectors needs masking and shifting.
692 if self.sSelector == '/r':
693 return (bOpcode >> 3) & 0x7;
694
695 if self.sSelector == 'mod /r':
696 return (bOpcode >> 3) & 0x1f;
697
698 if self.sSelector == 'memreg /r':
699 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
700
701 if self.sSelector == '!11 /r':
702 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
703 return (bOpcode >> 3) & 0x7;
704
705 if self.sSelector == '11 /r':
706 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
707 return (bOpcode >> 3) & 0x7;
708
709 if self.sSelector == '11':
710 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
711 return bOpcode & 0x3f;
712
713 assert False, self.sSelector;
714 return -1;
715
716 def getInstructionsInTableOrder(self):
717 """
718 Get instructions in table order.
719
720 Returns array of instructions. Normally there is exactly one
721 instruction per entry. However the entry could also be None if
722 not instruction was specified for that opcode value. Or there
723 could be a list of instructions to deal with special encodings
724 where for instance prefix (e.g. REX.W) encodes a different
725 instruction or different CPUs have different instructions or
726 prefixes in the same place.
727 """
728 # Start with empty table.
729 cTable = self.getTableSize();
730 aoTable = [None] * cTable;
731
732 # Insert the instructions.
733 for oInstr in self.aoInstructions:
734 if oInstr.sOpcode:
735 idxOpcode = self.getInstructionIndex(oInstr);
736 assert idxOpcode < cTable, str(idxOpcode);
737
738 oExisting = aoTable[idxOpcode];
739 if oExisting is None:
740 aoTable[idxOpcode] = oInstr;
741 elif not isinstance(oExisting, list):
742 aoTable[idxOpcode] = list([oExisting, oInstr]);
743 else:
744 oExisting.append(oInstr);
745
746 return aoTable;
747
748
749 def getDisasTableName(self):
750 """
751 Returns the disassembler table name for this map.
752 """
753 sName = 'g_aDisas';
754 for sWord in self.sName.split('_'):
755 if sWord == 'm': # suffix indicating modrm.mod==mem
756 sName += '_m';
757 elif sWord == 'r': # suffix indicating modrm.mod==reg
758 sName += '_r';
759 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
760 sName += '_' + sWord;
761 else:
762 sWord = sWord.replace('grp', 'Grp');
763 sWord = sWord.replace('map', 'Map');
764 sName += sWord[0].upper() + sWord[1:];
765 return sName;
766
767 def getDisasRangeName(self):
768 """
769 Returns the disassembler table range name for this map.
770 """
771 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
772
773 def isVexMap(self):
774 """ Returns True if a VEX map. """
775 return self.sEncoding.startswith('vex');
776
777
778class TestType(object):
779 """
780 Test value type.
781
782 This base class deals with integer like values. The fUnsigned constructor
783 parameter indicates the default stance on zero vs sign extending. It is
784 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
785 """
786 def __init__(self, sName, acbSizes = None, fUnsigned = True):
787 self.sName = sName;
788 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
789 self.fUnsigned = fUnsigned;
790
791 class BadValue(Exception):
792 """ Bad value exception. """
793 def __init__(self, sMessage):
794 Exception.__init__(self, sMessage);
795 self.sMessage = sMessage;
796
797 ## For ascii ~ operator.
798 kdHexInv = {
799 '0': 'f',
800 '1': 'e',
801 '2': 'd',
802 '3': 'c',
803 '4': 'b',
804 '5': 'a',
805 '6': '9',
806 '7': '8',
807 '8': '7',
808 '9': '6',
809 'a': '5',
810 'b': '4',
811 'c': '3',
812 'd': '2',
813 'e': '1',
814 'f': '0',
815 };
816
817 def get(self, sValue):
818 """
819 Get the shortest normal sized byte representation of oValue.
820
821 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
822 The latter form is for AND+OR pairs where the first entry is what to
823 AND with the field and the second the one or OR with.
824
825 Raises BadValue if invalid value.
826 """
827 if not sValue:
828 raise TestType.BadValue('empty value');
829
830 # Deal with sign and detect hexadecimal or decimal.
831 fSignExtend = not self.fUnsigned;
832 if sValue[0] == '-' or sValue[0] == '+':
833 fSignExtend = True;
834 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
835 else:
836 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
837
838 # try convert it to long integer.
839 try:
840 iValue = long(sValue, 16 if fHex else 10);
841 except Exception as oXcpt:
842 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
843
844 # Convert the hex string and pad it to a decent value. Negative values
845 # needs to be manually converted to something non-negative (~-n + 1).
846 if iValue >= 0:
847 sHex = hex(iValue);
848 if sys.version_info[0] < 3:
849 assert sHex[-1] == 'L';
850 sHex = sHex[:-1];
851 assert sHex[:2] == '0x';
852 sHex = sHex[2:];
853 else:
854 sHex = hex(-iValue - 1);
855 if sys.version_info[0] < 3:
856 assert sHex[-1] == 'L';
857 sHex = sHex[:-1];
858 assert sHex[:2] == '0x';
859 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
860 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
861 sHex = 'f' + sHex;
862
863 cDigits = len(sHex);
864 if cDigits <= self.acbSizes[-1] * 2:
865 for cb in self.acbSizes:
866 cNaturalDigits = cb * 2;
867 if cDigits <= cNaturalDigits:
868 break;
869 else:
870 cNaturalDigits = self.acbSizes[-1] * 2;
871 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
872 assert isinstance(cNaturalDigits, int)
873
874 if cNaturalDigits != cDigits:
875 cNeeded = cNaturalDigits - cDigits;
876 if iValue >= 0:
877 sHex = ('0' * cNeeded) + sHex;
878 else:
879 sHex = ('f' * cNeeded) + sHex;
880
881 # Invert and convert to bytearray and return it.
882 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
883
884 return ((fSignExtend, abValue),);
885
886 def validate(self, sValue):
887 """
888 Returns True if value is okay, error message on failure.
889 """
890 try:
891 self.get(sValue);
892 except TestType.BadValue as oXcpt:
893 return oXcpt.sMessage;
894 return True;
895
896 def isAndOrPair(self, sValue):
897 """
898 Checks if sValue is a pair.
899 """
900 _ = sValue;
901 return False;
902
903
904class TestTypeEflags(TestType):
905 """
906 Special value parsing for EFLAGS/RFLAGS/FLAGS.
907 """
908
909 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
910
911 def __init__(self, sName):
912 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
913
914 def get(self, sValue):
915 fClear = 0;
916 fSet = 0;
917 for sFlag in sValue.split(','):
918 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
919 if sConstant is None:
920 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
921 if sConstant[0] == '!':
922 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
923 else:
924 fSet |= g_kdX86EFlagsConstants[sConstant];
925
926 aoSet = TestType.get(self, '0x%x' % (fSet,));
927 if fClear != 0:
928 aoClear = TestType.get(self, '%#x' % (fClear,))
929 assert self.isAndOrPair(sValue) is True;
930 return (aoClear[0], aoSet[0]);
931 assert self.isAndOrPair(sValue) is False;
932 return aoSet;
933
934 def isAndOrPair(self, sValue):
935 for sZeroFlag in self.kdZeroValueFlags:
936 if sValue.find(sZeroFlag) >= 0:
937 return True;
938 return False;
939
940class TestTypeFromDict(TestType):
941 """
942 Special value parsing for CR0.
943 """
944
945 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
946
947 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
948 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
949 self.kdConstantsAndValues = kdConstantsAndValues;
950 self.sConstantPrefix = sConstantPrefix;
951
952 def get(self, sValue):
953 fValue = 0;
954 for sFlag in sValue.split(','):
955 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
956 if fFlagValue is None:
957 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
958 fValue |= fFlagValue;
959 return TestType.get(self, '0x%x' % (fValue,));
960
961
962class TestInOut(object):
963 """
964 One input or output state modifier.
965
966 This should be thought as values to modify BS3REGCTX and extended (needs
967 to be structured) state.
968 """
969 ## Assigned operators.
970 kasOperators = [
971 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
972 '&~=',
973 '&=',
974 '|=',
975 '='
976 ];
977 ## Types
978 kdTypes = {
979 'uint': TestType('uint', fUnsigned = True),
980 'int': TestType('int'),
981 'efl': TestTypeEflags('efl'),
982 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
983 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
984 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
985 };
986 ## CPU context fields.
987 kdFields = {
988 # name: ( default type, [both|input|output], )
989 # Operands.
990 'op1': ( 'uint', 'both', ), ## \@op1
991 'op2': ( 'uint', 'both', ), ## \@op2
992 'op3': ( 'uint', 'both', ), ## \@op3
993 'op4': ( 'uint', 'both', ), ## \@op4
994 # Flags.
995 'efl': ( 'efl', 'both', ),
996 'efl_undef': ( 'uint', 'output', ),
997 # 8-bit GPRs.
998 'al': ( 'uint', 'both', ),
999 'cl': ( 'uint', 'both', ),
1000 'dl': ( 'uint', 'both', ),
1001 'bl': ( 'uint', 'both', ),
1002 'ah': ( 'uint', 'both', ),
1003 'ch': ( 'uint', 'both', ),
1004 'dh': ( 'uint', 'both', ),
1005 'bh': ( 'uint', 'both', ),
1006 'r8l': ( 'uint', 'both', ),
1007 'r9l': ( 'uint', 'both', ),
1008 'r10l': ( 'uint', 'both', ),
1009 'r11l': ( 'uint', 'both', ),
1010 'r12l': ( 'uint', 'both', ),
1011 'r13l': ( 'uint', 'both', ),
1012 'r14l': ( 'uint', 'both', ),
1013 'r15l': ( 'uint', 'both', ),
1014 # 16-bit GPRs.
1015 'ax': ( 'uint', 'both', ),
1016 'dx': ( 'uint', 'both', ),
1017 'cx': ( 'uint', 'both', ),
1018 'bx': ( 'uint', 'both', ),
1019 'sp': ( 'uint', 'both', ),
1020 'bp': ( 'uint', 'both', ),
1021 'si': ( 'uint', 'both', ),
1022 'di': ( 'uint', 'both', ),
1023 'r8w': ( 'uint', 'both', ),
1024 'r9w': ( 'uint', 'both', ),
1025 'r10w': ( 'uint', 'both', ),
1026 'r11w': ( 'uint', 'both', ),
1027 'r12w': ( 'uint', 'both', ),
1028 'r13w': ( 'uint', 'both', ),
1029 'r14w': ( 'uint', 'both', ),
1030 'r15w': ( 'uint', 'both', ),
1031 # 32-bit GPRs.
1032 'eax': ( 'uint', 'both', ),
1033 'edx': ( 'uint', 'both', ),
1034 'ecx': ( 'uint', 'both', ),
1035 'ebx': ( 'uint', 'both', ),
1036 'esp': ( 'uint', 'both', ),
1037 'ebp': ( 'uint', 'both', ),
1038 'esi': ( 'uint', 'both', ),
1039 'edi': ( 'uint', 'both', ),
1040 'r8d': ( 'uint', 'both', ),
1041 'r9d': ( 'uint', 'both', ),
1042 'r10d': ( 'uint', 'both', ),
1043 'r11d': ( 'uint', 'both', ),
1044 'r12d': ( 'uint', 'both', ),
1045 'r13d': ( 'uint', 'both', ),
1046 'r14d': ( 'uint', 'both', ),
1047 'r15d': ( 'uint', 'both', ),
1048 # 64-bit GPRs.
1049 'rax': ( 'uint', 'both', ),
1050 'rdx': ( 'uint', 'both', ),
1051 'rcx': ( 'uint', 'both', ),
1052 'rbx': ( 'uint', 'both', ),
1053 'rsp': ( 'uint', 'both', ),
1054 'rbp': ( 'uint', 'both', ),
1055 'rsi': ( 'uint', 'both', ),
1056 'rdi': ( 'uint', 'both', ),
1057 'r8': ( 'uint', 'both', ),
1058 'r9': ( 'uint', 'both', ),
1059 'r10': ( 'uint', 'both', ),
1060 'r11': ( 'uint', 'both', ),
1061 'r12': ( 'uint', 'both', ),
1062 'r13': ( 'uint', 'both', ),
1063 'r14': ( 'uint', 'both', ),
1064 'r15': ( 'uint', 'both', ),
1065 # 16-bit, 32-bit or 64-bit registers according to operand size.
1066 'oz.rax': ( 'uint', 'both', ),
1067 'oz.rdx': ( 'uint', 'both', ),
1068 'oz.rcx': ( 'uint', 'both', ),
1069 'oz.rbx': ( 'uint', 'both', ),
1070 'oz.rsp': ( 'uint', 'both', ),
1071 'oz.rbp': ( 'uint', 'both', ),
1072 'oz.rsi': ( 'uint', 'both', ),
1073 'oz.rdi': ( 'uint', 'both', ),
1074 'oz.r8': ( 'uint', 'both', ),
1075 'oz.r9': ( 'uint', 'both', ),
1076 'oz.r10': ( 'uint', 'both', ),
1077 'oz.r11': ( 'uint', 'both', ),
1078 'oz.r12': ( 'uint', 'both', ),
1079 'oz.r13': ( 'uint', 'both', ),
1080 'oz.r14': ( 'uint', 'both', ),
1081 'oz.r15': ( 'uint', 'both', ),
1082 # Control registers.
1083 'cr0': ( 'cr0', 'both', ),
1084 'cr4': ( 'cr4', 'both', ),
1085 'xcr0': ( 'xcr0', 'both', ),
1086 # FPU Registers
1087 'fcw': ( 'uint', 'both', ),
1088 'fsw': ( 'uint', 'both', ),
1089 'ftw': ( 'uint', 'both', ),
1090 'fop': ( 'uint', 'both', ),
1091 'fpuip': ( 'uint', 'both', ),
1092 'fpucs': ( 'uint', 'both', ),
1093 'fpudp': ( 'uint', 'both', ),
1094 'fpuds': ( 'uint', 'both', ),
1095 'mxcsr': ( 'uint', 'both', ),
1096 'st0': ( 'uint', 'both', ),
1097 'st1': ( 'uint', 'both', ),
1098 'st2': ( 'uint', 'both', ),
1099 'st3': ( 'uint', 'both', ),
1100 'st4': ( 'uint', 'both', ),
1101 'st5': ( 'uint', 'both', ),
1102 'st6': ( 'uint', 'both', ),
1103 'st7': ( 'uint', 'both', ),
1104 # MMX registers.
1105 'mm0': ( 'uint', 'both', ),
1106 'mm1': ( 'uint', 'both', ),
1107 'mm2': ( 'uint', 'both', ),
1108 'mm3': ( 'uint', 'both', ),
1109 'mm4': ( 'uint', 'both', ),
1110 'mm5': ( 'uint', 'both', ),
1111 'mm6': ( 'uint', 'both', ),
1112 'mm7': ( 'uint', 'both', ),
1113 # SSE registers.
1114 'xmm0': ( 'uint', 'both', ),
1115 'xmm1': ( 'uint', 'both', ),
1116 'xmm2': ( 'uint', 'both', ),
1117 'xmm3': ( 'uint', 'both', ),
1118 'xmm4': ( 'uint', 'both', ),
1119 'xmm5': ( 'uint', 'both', ),
1120 'xmm6': ( 'uint', 'both', ),
1121 'xmm7': ( 'uint', 'both', ),
1122 'xmm8': ( 'uint', 'both', ),
1123 'xmm9': ( 'uint', 'both', ),
1124 'xmm10': ( 'uint', 'both', ),
1125 'xmm11': ( 'uint', 'both', ),
1126 'xmm12': ( 'uint', 'both', ),
1127 'xmm13': ( 'uint', 'both', ),
1128 'xmm14': ( 'uint', 'both', ),
1129 'xmm15': ( 'uint', 'both', ),
1130 'xmm0.lo': ( 'uint', 'both', ),
1131 'xmm1.lo': ( 'uint', 'both', ),
1132 'xmm2.lo': ( 'uint', 'both', ),
1133 'xmm3.lo': ( 'uint', 'both', ),
1134 'xmm4.lo': ( 'uint', 'both', ),
1135 'xmm5.lo': ( 'uint', 'both', ),
1136 'xmm6.lo': ( 'uint', 'both', ),
1137 'xmm7.lo': ( 'uint', 'both', ),
1138 'xmm8.lo': ( 'uint', 'both', ),
1139 'xmm9.lo': ( 'uint', 'both', ),
1140 'xmm10.lo': ( 'uint', 'both', ),
1141 'xmm11.lo': ( 'uint', 'both', ),
1142 'xmm12.lo': ( 'uint', 'both', ),
1143 'xmm13.lo': ( 'uint', 'both', ),
1144 'xmm14.lo': ( 'uint', 'both', ),
1145 'xmm15.lo': ( 'uint', 'both', ),
1146 'xmm0.hi': ( 'uint', 'both', ),
1147 'xmm1.hi': ( 'uint', 'both', ),
1148 'xmm2.hi': ( 'uint', 'both', ),
1149 'xmm3.hi': ( 'uint', 'both', ),
1150 'xmm4.hi': ( 'uint', 'both', ),
1151 'xmm5.hi': ( 'uint', 'both', ),
1152 'xmm6.hi': ( 'uint', 'both', ),
1153 'xmm7.hi': ( 'uint', 'both', ),
1154 'xmm8.hi': ( 'uint', 'both', ),
1155 'xmm9.hi': ( 'uint', 'both', ),
1156 'xmm10.hi': ( 'uint', 'both', ),
1157 'xmm11.hi': ( 'uint', 'both', ),
1158 'xmm12.hi': ( 'uint', 'both', ),
1159 'xmm13.hi': ( 'uint', 'both', ),
1160 'xmm14.hi': ( 'uint', 'both', ),
1161 'xmm15.hi': ( 'uint', 'both', ),
1162 'xmm0.lo.zx': ( 'uint', 'both', ),
1163 'xmm1.lo.zx': ( 'uint', 'both', ),
1164 'xmm2.lo.zx': ( 'uint', 'both', ),
1165 'xmm3.lo.zx': ( 'uint', 'both', ),
1166 'xmm4.lo.zx': ( 'uint', 'both', ),
1167 'xmm5.lo.zx': ( 'uint', 'both', ),
1168 'xmm6.lo.zx': ( 'uint', 'both', ),
1169 'xmm7.lo.zx': ( 'uint', 'both', ),
1170 'xmm8.lo.zx': ( 'uint', 'both', ),
1171 'xmm9.lo.zx': ( 'uint', 'both', ),
1172 'xmm10.lo.zx': ( 'uint', 'both', ),
1173 'xmm11.lo.zx': ( 'uint', 'both', ),
1174 'xmm12.lo.zx': ( 'uint', 'both', ),
1175 'xmm13.lo.zx': ( 'uint', 'both', ),
1176 'xmm14.lo.zx': ( 'uint', 'both', ),
1177 'xmm15.lo.zx': ( 'uint', 'both', ),
1178 'xmm0.dw0': ( 'uint', 'both', ),
1179 'xmm1.dw0': ( 'uint', 'both', ),
1180 'xmm2.dw0': ( 'uint', 'both', ),
1181 'xmm3.dw0': ( 'uint', 'both', ),
1182 'xmm4.dw0': ( 'uint', 'both', ),
1183 'xmm5.dw0': ( 'uint', 'both', ),
1184 'xmm6.dw0': ( 'uint', 'both', ),
1185 'xmm7.dw0': ( 'uint', 'both', ),
1186 'xmm8.dw0': ( 'uint', 'both', ),
1187 'xmm9.dw0': ( 'uint', 'both', ),
1188 'xmm10.dw0': ( 'uint', 'both', ),
1189 'xmm11.dw0': ( 'uint', 'both', ),
1190 'xmm12.dw0': ( 'uint', 'both', ),
1191 'xmm13.dw0': ( 'uint', 'both', ),
1192 'xmm14.dw0': ( 'uint', 'both', ),
1193 'xmm15_dw0': ( 'uint', 'both', ),
1194 # AVX registers.
1195 'ymm0': ( 'uint', 'both', ),
1196 'ymm1': ( 'uint', 'both', ),
1197 'ymm2': ( 'uint', 'both', ),
1198 'ymm3': ( 'uint', 'both', ),
1199 'ymm4': ( 'uint', 'both', ),
1200 'ymm5': ( 'uint', 'both', ),
1201 'ymm6': ( 'uint', 'both', ),
1202 'ymm7': ( 'uint', 'both', ),
1203 'ymm8': ( 'uint', 'both', ),
1204 'ymm9': ( 'uint', 'both', ),
1205 'ymm10': ( 'uint', 'both', ),
1206 'ymm11': ( 'uint', 'both', ),
1207 'ymm12': ( 'uint', 'both', ),
1208 'ymm13': ( 'uint', 'both', ),
1209 'ymm14': ( 'uint', 'both', ),
1210 'ymm15': ( 'uint', 'both', ),
1211
1212 # Special ones.
1213 'value.xcpt': ( 'uint', 'output', ),
1214 };
1215
1216 def __init__(self, sField, sOp, sValue, sType):
1217 assert sField in self.kdFields;
1218 assert sOp in self.kasOperators;
1219 self.sField = sField;
1220 self.sOp = sOp;
1221 self.sValue = sValue;
1222 self.sType = sType;
1223 assert isinstance(sField, str);
1224 assert isinstance(sOp, str);
1225 assert isinstance(sType, str);
1226 assert isinstance(sValue, str);
1227
1228
1229class TestSelector(object):
1230 """
1231 One selector for an instruction test.
1232 """
1233 ## Selector compare operators.
1234 kasCompareOps = [ '==', '!=' ];
1235 ## Selector variables and their valid values.
1236 kdVariables = {
1237 # Operand size.
1238 'size': {
1239 'o16': 'size_o16',
1240 'o32': 'size_o32',
1241 'o64': 'size_o64',
1242 },
1243 # VEX.L value.
1244 'vex.l': {
1245 '0': 'vexl_0',
1246 '1': 'vexl_1',
1247 },
1248 # Execution ring.
1249 'ring': {
1250 '0': 'ring_0',
1251 '1': 'ring_1',
1252 '2': 'ring_2',
1253 '3': 'ring_3',
1254 '0..2': 'ring_0_thru_2',
1255 '1..3': 'ring_1_thru_3',
1256 },
1257 # Basic code mode.
1258 'codebits': {
1259 '64': 'code_64bit',
1260 '32': 'code_32bit',
1261 '16': 'code_16bit',
1262 },
1263 # cpu modes.
1264 'mode': {
1265 'real': 'mode_real',
1266 'prot': 'mode_prot',
1267 'long': 'mode_long',
1268 'v86': 'mode_v86',
1269 'smm': 'mode_smm',
1270 'vmx': 'mode_vmx',
1271 'svm': 'mode_svm',
1272 },
1273 # paging on/off
1274 'paging': {
1275 'on': 'paging_on',
1276 'off': 'paging_off',
1277 },
1278 # CPU vendor
1279 'vendor': {
1280 'amd': 'vendor_amd',
1281 'intel': 'vendor_intel',
1282 'via': 'vendor_via',
1283 },
1284 };
1285 ## Selector shorthand predicates.
1286 ## These translates into variable expressions.
1287 kdPredicates = {
1288 'o16': 'size==o16',
1289 'o32': 'size==o32',
1290 'o64': 'size==o64',
1291 'ring0': 'ring==0',
1292 '!ring0': 'ring==1..3',
1293 'ring1': 'ring==1',
1294 'ring2': 'ring==2',
1295 'ring3': 'ring==3',
1296 'user': 'ring==3',
1297 'supervisor': 'ring==0..2',
1298 '16-bit': 'codebits==16',
1299 '32-bit': 'codebits==32',
1300 '64-bit': 'codebits==64',
1301 'real': 'mode==real',
1302 'prot': 'mode==prot',
1303 'long': 'mode==long',
1304 'v86': 'mode==v86',
1305 'smm': 'mode==smm',
1306 'vmx': 'mode==vmx',
1307 'svm': 'mode==svm',
1308 'paging': 'paging==on',
1309 '!paging': 'paging==off',
1310 'amd': 'vendor==amd',
1311 '!amd': 'vendor!=amd',
1312 'intel': 'vendor==intel',
1313 '!intel': 'vendor!=intel',
1314 'via': 'vendor==via',
1315 '!via': 'vendor!=via',
1316 };
1317
1318 def __init__(self, sVariable, sOp, sValue):
1319 assert sVariable in self.kdVariables;
1320 assert sOp in self.kasCompareOps;
1321 assert sValue in self.kdVariables[sVariable];
1322 self.sVariable = sVariable;
1323 self.sOp = sOp;
1324 self.sValue = sValue;
1325
1326
1327class InstructionTest(object):
1328 """
1329 Instruction test.
1330 """
1331
1332 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1333 self.oInstr = oInstr # type: InstructionTest
1334 self.aoInputs = [] # type: list(TestInOut)
1335 self.aoOutputs = [] # type: list(TestInOut)
1336 self.aoSelectors = [] # type: list(TestSelector)
1337
1338 def toString(self, fRepr = False):
1339 """
1340 Converts it to string representation.
1341 """
1342 asWords = [];
1343 if self.aoSelectors:
1344 for oSelector in self.aoSelectors:
1345 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1346 asWords.append('/');
1347
1348 for oModifier in self.aoInputs:
1349 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1350
1351 asWords.append('->');
1352
1353 for oModifier in self.aoOutputs:
1354 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1355
1356 if fRepr:
1357 return '<' + ' '.join(asWords) + '>';
1358 return ' '.join(asWords);
1359
1360 def __str__(self):
1361 """ Provide string represenation. """
1362 return self.toString(False);
1363
1364 def __repr__(self):
1365 """ Provide unambigious string representation. """
1366 return self.toString(True);
1367
1368class Operand(object):
1369 """
1370 Instruction operand.
1371 """
1372
1373 def __init__(self, sWhere, sType):
1374 assert sWhere in g_kdOpLocations, sWhere;
1375 assert sType in g_kdOpTypes, sType;
1376 self.sWhere = sWhere; ##< g_kdOpLocations
1377 self.sType = sType; ##< g_kdOpTypes
1378
1379 def usesModRM(self):
1380 """ Returns True if using some form of ModR/M encoding. """
1381 return self.sType[0] in ['E', 'G', 'M'];
1382
1383
1384
1385class Instruction(object): # pylint: disable=too-many-instance-attributes
1386 """
1387 Instruction.
1388 """
1389
1390 def __init__(self, sSrcFile, iLine):
1391 ## @name Core attributes.
1392 ## @{
1393 self.oParent = None # type: Instruction
1394 self.sMnemonic = None;
1395 self.sBrief = None;
1396 self.asDescSections = [] # type: list(str)
1397 self.aoMaps = [] # type: list(InstructionMap)
1398 self.aoOperands = [] # type: list(Operand)
1399 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1400 self.sOpcode = None # type: str
1401 self.sSubOpcode = None # type: str
1402 self.sEncoding = None;
1403 self.asFlTest = None;
1404 self.asFlModify = None;
1405 self.asFlUndefined = None;
1406 self.asFlSet = None;
1407 self.asFlClear = None;
1408 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1409 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1410 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1411 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1412 self.aoTests = [] # type: list(InstructionTest)
1413 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1414 self.oCpuExpr = None; ##< Some CPU restriction expression...
1415 self.sGroup = None;
1416 self.fUnused = False; ##< Unused instruction.
1417 self.fInvalid = False; ##< Invalid instruction (like UD2).
1418 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1419 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1420 ## @}
1421
1422 ## @name Implementation attributes.
1423 ## @{
1424 self.sStats = None;
1425 self.sFunction = None;
1426 self.fStub = False;
1427 self.fUdStub = False;
1428 ## @}
1429
1430 ## @name Decoding info
1431 ## @{
1432 self.sSrcFile = sSrcFile;
1433 self.iLineCreated = iLine;
1434 self.iLineCompleted = None;
1435 self.cOpTags = 0;
1436 self.iLineFnIemOpMacro = -1;
1437 self.iLineMnemonicMacro = -1;
1438 ## @}
1439
1440 ## @name Intermediate input fields.
1441 ## @{
1442 self.sRawDisOpNo = None;
1443 self.asRawDisParams = [];
1444 self.sRawIemOpFlags = None;
1445 self.sRawOldOpcodes = None;
1446 self.asCopyTests = [];
1447 ## @}
1448
1449 def toString(self, fRepr = False):
1450 """ Turn object into a string. """
1451 aasFields = [];
1452
1453 aasFields.append(['opcode', self.sOpcode]);
1454 if self.sPrefix:
1455 aasFields.append(['prefix', self.sPrefix]);
1456 aasFields.append(['mnemonic', self.sMnemonic]);
1457 for iOperand, oOperand in enumerate(self.aoOperands):
1458 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1459 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1460 aasFields.append(['encoding', self.sEncoding]);
1461 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1462 aasFields.append(['disenum', self.sDisEnum]);
1463 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1464 aasFields.append(['group', self.sGroup]);
1465 if self.fUnused: aasFields.append(['unused', 'True']);
1466 if self.fInvalid: aasFields.append(['invalid', 'True']);
1467 aasFields.append(['invlstyle', self.sInvalidStyle]);
1468 aasFields.append(['fltest', self.asFlTest]);
1469 aasFields.append(['flmodify', self.asFlModify]);
1470 aasFields.append(['flundef', self.asFlUndefined]);
1471 aasFields.append(['flset', self.asFlSet]);
1472 aasFields.append(['flclear', self.asFlClear]);
1473 aasFields.append(['mincpu', self.sMinCpu]);
1474 aasFields.append(['stats', self.sStats]);
1475 aasFields.append(['sFunction', self.sFunction]);
1476 if self.fStub: aasFields.append(['fStub', 'True']);
1477 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1478 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1479 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1480 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1481
1482 sRet = '<' if fRepr else '';
1483 for sField, sValue in aasFields:
1484 if sValue is not None:
1485 if len(sRet) > 1:
1486 sRet += '; ';
1487 sRet += '%s=%s' % (sField, sValue,);
1488 if fRepr:
1489 sRet += '>';
1490
1491 return sRet;
1492
1493 def __str__(self):
1494 """ Provide string represenation. """
1495 return self.toString(False);
1496
1497 def __repr__(self):
1498 """ Provide unambigious string representation. """
1499 return self.toString(True);
1500
1501 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1502 """
1503 Makes a copy of the object for the purpose of putting in a different map
1504 or a different place in the current map.
1505 """
1506 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1507
1508 oCopy.oParent = self;
1509 oCopy.sMnemonic = self.sMnemonic;
1510 oCopy.sBrief = self.sBrief;
1511 oCopy.asDescSections = list(self.asDescSections);
1512 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1513 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1514 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1515 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1516 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1517 oCopy.sEncoding = self.sEncoding;
1518 oCopy.asFlTest = self.asFlTest;
1519 oCopy.asFlModify = self.asFlModify;
1520 oCopy.asFlUndefined = self.asFlUndefined;
1521 oCopy.asFlSet = self.asFlSet;
1522 oCopy.asFlClear = self.asFlClear;
1523 oCopy.dHints = dict(self.dHints);
1524 oCopy.sDisEnum = self.sDisEnum;
1525 oCopy.asCpuIds = list(self.asCpuIds);
1526 oCopy.asReqFeatures = list(self.asReqFeatures);
1527 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1528 oCopy.sMinCpu = self.sMinCpu;
1529 oCopy.oCpuExpr = self.oCpuExpr;
1530 oCopy.sGroup = self.sGroup;
1531 oCopy.fUnused = self.fUnused;
1532 oCopy.fInvalid = self.fInvalid;
1533 oCopy.sInvalidStyle = self.sInvalidStyle;
1534 oCopy.sXcptType = self.sXcptType;
1535
1536 oCopy.sStats = self.sStats;
1537 oCopy.sFunction = self.sFunction;
1538 oCopy.fStub = self.fStub;
1539 oCopy.fUdStub = self.fUdStub;
1540
1541 oCopy.iLineCompleted = self.iLineCompleted;
1542 oCopy.cOpTags = self.cOpTags;
1543 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1544 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1545
1546 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1547 oCopy.asRawDisParams = list(self.asRawDisParams);
1548 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1549 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1550 oCopy.asCopyTests = list(self.asCopyTests);
1551
1552 return oCopy;
1553
1554 def getOpcodeByte(self):
1555 """
1556 Decodes sOpcode into a byte range integer value.
1557 Raises exception if sOpcode is None or invalid.
1558 """
1559 if self.sOpcode is None:
1560 raise Exception('No opcode byte for %s!' % (self,));
1561 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1562
1563 # Full hex byte form.
1564 if sOpcode[:2] == '0x':
1565 return int(sOpcode, 16);
1566
1567 # The /r form:
1568 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1569 return int(sOpcode[-1:]) << 3;
1570
1571 # The 11/r form:
1572 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1573 return (int(sOpcode[-1:]) << 3) | 0xc0;
1574
1575 # The !11/r form (returns mod=1):
1576 ## @todo this doesn't really work...
1577 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1578 return (int(sOpcode[-1:]) << 3) | 0x80;
1579
1580 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1581
1582 @staticmethod
1583 def _flagsToIntegerMask(asFlags):
1584 """
1585 Returns the integer mask value for asFlags.
1586 """
1587 uRet = 0;
1588 if asFlags:
1589 for sFlag in asFlags:
1590 sConstant = g_kdEFlagsMnemonics[sFlag];
1591 assert sConstant[0] != '!', sConstant
1592 uRet |= g_kdX86EFlagsConstants[sConstant];
1593 return uRet;
1594
1595 def getTestedFlagsMask(self):
1596 """ Returns asFlTest into a integer mask value """
1597 return self._flagsToIntegerMask(self.asFlTest);
1598
1599 def getModifiedFlagsMask(self):
1600 """ Returns asFlModify into a integer mask value """
1601 return self._flagsToIntegerMask(self.asFlModify);
1602
1603 def getUndefinedFlagsMask(self):
1604 """ Returns asFlUndefined into a integer mask value """
1605 return self._flagsToIntegerMask(self.asFlUndefined);
1606
1607 def getSetFlagsMask(self):
1608 """ Returns asFlSet into a integer mask value """
1609 return self._flagsToIntegerMask(self.asFlSet);
1610
1611 def getClearedFlagsMask(self):
1612 """ Returns asFlClear into a integer mask value """
1613 return self._flagsToIntegerMask(self.asFlClear);
1614
1615 def onlyInVexMaps(self):
1616 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1617 if not self.aoMaps:
1618 return False;
1619 for oMap in self.aoMaps:
1620 if not oMap.isVexMap():
1621 return False;
1622 return True;
1623
1624
1625
1626## All the instructions.
1627g_aoAllInstructions = [] # type: list(Instruction)
1628
1629## All the instructions indexed by statistics name (opstat).
1630g_dAllInstructionsByStat = {} # type: dict(Instruction)
1631
1632## All the instructions indexed by function name (opfunction).
1633g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1634
1635## Instructions tagged by oponlytest
1636g_aoOnlyTestInstructions = [] # type: list(Instruction)
1637
1638## Instruction maps.
1639g_aoInstructionMaps = [
1640 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1641 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1642 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1643 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1644 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1645 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1646 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1647 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1648 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1649 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1650 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1651 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1652 ## @todo g_apfnEscF1_E0toFF
1653 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1654 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1655 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1656 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1657 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1658 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1659 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1660 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1661
1662 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1663 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1664 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1665 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1666 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1667 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1668 ## @todo What about g_apfnGroup9MemReg?
1669 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1670 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1671 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1672 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1673 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1674 ## @todo What about g_apfnGroup15RegReg?
1675 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1676 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1677 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1678
1679 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1680 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1681
1682 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1683 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1684 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1685 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1686 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1687 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1688
1689 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1690 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1691
1692 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1693 InstructionMap('xopmap8', sEncoding = 'xop8'),
1694 InstructionMap('xopmap9', sEncoding = 'xop9'),
1695 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1696 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1697 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1698 InstructionMap('xopmap10', sEncoding = 'xop10'),
1699 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1700];
1701g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1702g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1703
1704
1705
1706class ParserException(Exception):
1707 """ Parser exception """
1708 def __init__(self, sMessage):
1709 Exception.__init__(self, sMessage);
1710
1711
1712class SimpleParser(object):
1713 """
1714 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1715 """
1716
1717 ## @name Parser state.
1718 ## @{
1719 kiCode = 0;
1720 kiCommentMulti = 1;
1721 ## @}
1722
1723 def __init__(self, sSrcFile, asLines, sDefaultMap):
1724 self.sSrcFile = sSrcFile;
1725 self.asLines = asLines;
1726 self.iLine = 0;
1727 self.iState = self.kiCode;
1728 self.sComment = '';
1729 self.iCommentLine = 0;
1730 self.aoCurInstrs = [];
1731
1732 assert sDefaultMap in g_dInstructionMaps;
1733 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1734
1735 self.cTotalInstr = 0;
1736 self.cTotalStubs = 0;
1737 self.cTotalTagged = 0;
1738
1739 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1740 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1741 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1742 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1743 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1744 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1745 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
1746 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
1747 self.fDebug = True;
1748
1749 self.dTagHandlers = {
1750 '@opbrief': self.parseTagOpBrief,
1751 '@opdesc': self.parseTagOpDesc,
1752 '@opmnemonic': self.parseTagOpMnemonic,
1753 '@op1': self.parseTagOpOperandN,
1754 '@op2': self.parseTagOpOperandN,
1755 '@op3': self.parseTagOpOperandN,
1756 '@op4': self.parseTagOpOperandN,
1757 '@oppfx': self.parseTagOpPfx,
1758 '@opmaps': self.parseTagOpMaps,
1759 '@opcode': self.parseTagOpcode,
1760 '@opcodesub': self.parseTagOpcodeSub,
1761 '@openc': self.parseTagOpEnc,
1762 '@opfltest': self.parseTagOpEFlags,
1763 '@opflmodify': self.parseTagOpEFlags,
1764 '@opflundef': self.parseTagOpEFlags,
1765 '@opflset': self.parseTagOpEFlags,
1766 '@opflclear': self.parseTagOpEFlags,
1767 '@ophints': self.parseTagOpHints,
1768 '@opdisenum': self.parseTagOpDisEnum,
1769 '@opmincpu': self.parseTagOpMinCpu,
1770 '@opcpuid': self.parseTagOpCpuId,
1771 '@opgroup': self.parseTagOpGroup,
1772 '@opunused': self.parseTagOpUnusedInvalid,
1773 '@opinvalid': self.parseTagOpUnusedInvalid,
1774 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1775 '@optest': self.parseTagOpTest,
1776 '@optestign': self.parseTagOpTestIgnore,
1777 '@optestignore': self.parseTagOpTestIgnore,
1778 '@opcopytests': self.parseTagOpCopyTests,
1779 '@oponly': self.parseTagOpOnlyTest,
1780 '@oponlytest': self.parseTagOpOnlyTest,
1781 '@opxcpttype': self.parseTagOpXcptType,
1782 '@opstats': self.parseTagOpStats,
1783 '@opfunction': self.parseTagOpFunction,
1784 '@opdone': self.parseTagOpDone,
1785 };
1786 for i in range(48):
1787 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1788 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1789
1790 self.asErrors = [];
1791
1792 def raiseError(self, sMessage):
1793 """
1794 Raise error prefixed with the source and line number.
1795 """
1796 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1797
1798 def raiseCommentError(self, iLineInComment, sMessage):
1799 """
1800 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1801 """
1802 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1803
1804 def error(self, sMessage):
1805 """
1806 Adds an error.
1807 returns False;
1808 """
1809 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1810 return False;
1811
1812 def errorOnLine(self, iLine, sMessage):
1813 """
1814 Adds an error.
1815 returns False;
1816 """
1817 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
1818 return False;
1819
1820 def errorComment(self, iLineInComment, sMessage):
1821 """
1822 Adds a comment error.
1823 returns False;
1824 """
1825 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1826 return False;
1827
1828 def printErrors(self):
1829 """
1830 Print the errors to stderr.
1831 Returns number of errors.
1832 """
1833 if self.asErrors:
1834 sys.stderr.write(u''.join(self.asErrors));
1835 return len(self.asErrors);
1836
1837 def debug(self, sMessage):
1838 """
1839 For debugging.
1840 """
1841 if self.fDebug:
1842 print('debug: %s' % (sMessage,));
1843
1844 def stripComments(self, sLine):
1845 """
1846 Returns sLine with comments stripped.
1847
1848 Complains if traces of incomplete multi-line comments are encountered.
1849 """
1850 sLine = self.oReComment.sub(" ", sLine);
1851 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
1852 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
1853 return sLine;
1854
1855 def parseFunctionTable(self, sLine):
1856 """
1857 Parses a PFNIEMOP table, updating/checking the @oppfx value.
1858
1859 Note! Updates iLine as it consumes the whole table.
1860 """
1861
1862 #
1863 # Extract the table name.
1864 #
1865 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
1866 oMap = g_dInstructionMapsByIemName.get(sName);
1867 if not oMap:
1868 self.debug('No map for PFNIEMOP table: %s' % (sName,));
1869 oMap = self.oDefaultMap; # This is wrong wrong wrong.
1870
1871 #
1872 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
1873 # entries per byte:
1874 # no prefix, 066h prefix, f3h prefix, f2h prefix
1875 # Those tables has 256 & 32 entries respectively.
1876 #
1877 cEntriesPerByte = 4;
1878 cValidTableLength = 1024;
1879 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
1880
1881 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
1882 if oEntriesMatch:
1883 cEntriesPerByte = 1;
1884 cValidTableLength = int(oEntriesMatch.group(1));
1885 asPrefixes = (None,);
1886
1887 #
1888 # The next line should be '{' and nothing else.
1889 #
1890 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
1891 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
1892 self.iLine += 1;
1893
1894 #
1895 # Parse till we find the end of the table.
1896 #
1897 iEntry = 0;
1898 while self.iLine < len(self.asLines):
1899 # Get the next line and strip comments and spaces (assumes no
1900 # multi-line comments).
1901 sLine = self.asLines[self.iLine];
1902 self.iLine += 1;
1903 sLine = self.stripComments(sLine).strip();
1904
1905 # Split the line up into entries, expanding IEMOP_X4 usage.
1906 asEntries = sLine.split(',');
1907 for i in range(len(asEntries) - 1, -1, -1):
1908 sEntry = asEntries[i].strip();
1909 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
1910 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
1911 asEntries.insert(i + 1, sEntry);
1912 asEntries.insert(i + 1, sEntry);
1913 asEntries.insert(i + 1, sEntry);
1914 if sEntry:
1915 asEntries[i] = sEntry;
1916 else:
1917 del asEntries[i];
1918
1919 # Process the entries.
1920 for sEntry in asEntries:
1921 if sEntry in ('};', '}'):
1922 if iEntry != cValidTableLength:
1923 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
1924 return True;
1925 if sEntry.startswith('iemOp_Invalid'):
1926 pass; # skip
1927 else:
1928 # Look up matching instruction by function.
1929 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
1930 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
1931 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
1932 if aoInstr:
1933 if not isinstance(aoInstr, list):
1934 aoInstr = [aoInstr,];
1935 oInstr = None;
1936 for oCurInstr in aoInstr:
1937 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
1938 pass;
1939 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
1940 oCurInstr.sPrefix = sPrefix;
1941 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
1942 oCurInstr.sOpcode = sOpcode;
1943 oCurInstr.sPrefix = sPrefix;
1944 else:
1945 continue;
1946 oInstr = oCurInstr;
1947 break;
1948 if not oInstr:
1949 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
1950 aoInstr.append(oInstr);
1951 g_dAllInstructionsByFunction[sEntry] = aoInstr;
1952 g_aoAllInstructions.append(oInstr);
1953 oMap.aoInstructions.append(oInstr);
1954 else:
1955 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
1956 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
1957 iEntry += 1;
1958
1959 return self.error('Unexpected end of file in PFNIEMOP table');
1960
1961 def addInstruction(self, iLine = None):
1962 """
1963 Adds an instruction.
1964 """
1965 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1966 g_aoAllInstructions.append(oInstr);
1967 self.aoCurInstrs.append(oInstr);
1968 return oInstr;
1969
1970 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1971 """
1972 Derives the mnemonic and operands from a IEM stats base name like string.
1973 """
1974 if oInstr.sMnemonic is None:
1975 asWords = sStats.split('_');
1976 oInstr.sMnemonic = asWords[0].lower();
1977 if len(asWords) > 1 and not oInstr.aoOperands:
1978 for sType in asWords[1:]:
1979 if sType in g_kdOpTypes:
1980 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1981 else:
1982 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1983 return False;
1984 return True;
1985
1986 def doneInstructionOne(self, oInstr, iLine):
1987 """
1988 Complete the parsing by processing, validating and expanding raw inputs.
1989 """
1990 assert oInstr.iLineCompleted is None;
1991 oInstr.iLineCompleted = iLine;
1992
1993 #
1994 # Specified instructions.
1995 #
1996 if oInstr.cOpTags > 0:
1997 if oInstr.sStats is None:
1998 pass;
1999
2000 #
2001 # Unspecified legacy stuff. We generally only got a few things to go on here.
2002 # /** Opcode 0x0f 0x00 /0. */
2003 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
2004 #
2005 else:
2006 #if oInstr.sRawOldOpcodes:
2007 #
2008 #if oInstr.sMnemonic:
2009 pass;
2010
2011 #
2012 # Common defaults.
2013 #
2014
2015 # Guess mnemonic and operands from stats if the former is missing.
2016 if oInstr.sMnemonic is None:
2017 if oInstr.sStats is not None:
2018 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
2019 elif oInstr.sFunction is not None:
2020 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
2021
2022 # Derive the disassembler op enum constant from the mnemonic.
2023 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
2024 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
2025
2026 # Derive the IEM statistics base name from mnemonic and operand types.
2027 if oInstr.sStats is None:
2028 if oInstr.sFunction is not None:
2029 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
2030 elif oInstr.sMnemonic is not None:
2031 oInstr.sStats = oInstr.sMnemonic;
2032 for oOperand in oInstr.aoOperands:
2033 if oOperand.sType:
2034 oInstr.sStats += '_' + oOperand.sType;
2035
2036 # Derive the IEM function name from mnemonic and operand types.
2037 if oInstr.sFunction is None:
2038 if oInstr.sMnemonic is not None:
2039 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
2040 for oOperand in oInstr.aoOperands:
2041 if oOperand.sType:
2042 oInstr.sFunction += '_' + oOperand.sType;
2043 elif oInstr.sStats:
2044 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
2045
2046 #
2047 # Apply default map and then add the instruction to all it's groups.
2048 #
2049 if not oInstr.aoMaps:
2050 oInstr.aoMaps = [ self.oDefaultMap, ];
2051 for oMap in oInstr.aoMaps:
2052 oMap.aoInstructions.append(oInstr);
2053
2054 #
2055 # Derive encoding from operands and maps.
2056 #
2057 if oInstr.sEncoding is None:
2058 if not oInstr.aoOperands:
2059 if oInstr.fUnused and oInstr.sSubOpcode:
2060 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
2061 else:
2062 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
2063 elif oInstr.aoOperands[0].usesModRM():
2064 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
2065 or oInstr.onlyInVexMaps():
2066 oInstr.sEncoding = 'VEX.ModR/M';
2067 else:
2068 oInstr.sEncoding = 'ModR/M';
2069
2070 #
2071 # Check the opstat value and add it to the opstat indexed dictionary.
2072 #
2073 if oInstr.sStats:
2074 if oInstr.sStats not in g_dAllInstructionsByStat:
2075 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
2076 else:
2077 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
2078 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
2079
2080 #
2081 # Add to function indexed dictionary. We allow multiple instructions per function.
2082 #
2083 if oInstr.sFunction:
2084 if oInstr.sFunction not in g_dAllInstructionsByFunction:
2085 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
2086 else:
2087 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
2088
2089 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
2090 return True;
2091
2092 def doneInstructions(self, iLineInComment = None):
2093 """
2094 Done with current instruction.
2095 """
2096 for oInstr in self.aoCurInstrs:
2097 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
2098 if oInstr.fStub:
2099 self.cTotalStubs += 1;
2100
2101 self.cTotalInstr += len(self.aoCurInstrs);
2102
2103 self.sComment = '';
2104 self.aoCurInstrs = [];
2105 return True;
2106
2107 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
2108 """
2109 Sets the sAttrib of all current instruction to oValue. If fOverwrite
2110 is False, only None values and empty strings are replaced.
2111 """
2112 for oInstr in self.aoCurInstrs:
2113 if fOverwrite is not True:
2114 oOldValue = getattr(oInstr, sAttrib);
2115 if oOldValue is not None:
2116 continue;
2117 setattr(oInstr, sAttrib, oValue);
2118
2119 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
2120 """
2121 Sets the iEntry of the array sAttrib of all current instruction to oValue.
2122 If fOverwrite is False, only None values and empty strings are replaced.
2123 """
2124 for oInstr in self.aoCurInstrs:
2125 aoArray = getattr(oInstr, sAttrib);
2126 while len(aoArray) <= iEntry:
2127 aoArray.append(None);
2128 if fOverwrite is True or aoArray[iEntry] is None:
2129 aoArray[iEntry] = oValue;
2130
2131 def parseCommentOldOpcode(self, asLines):
2132 """ Deals with 'Opcode 0xff /4' like comments """
2133 asWords = asLines[0].split();
2134 if len(asWords) >= 2 \
2135 and asWords[0] == 'Opcode' \
2136 and ( asWords[1].startswith('0x')
2137 or asWords[1].startswith('0X')):
2138 asWords = asWords[:1];
2139 for iWord, sWord in enumerate(asWords):
2140 if sWord.startswith('0X'):
2141 sWord = '0x' + sWord[:2];
2142 asWords[iWord] = asWords;
2143 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
2144
2145 return False;
2146
2147 def ensureInstructionForOpTag(self, iTagLine):
2148 """ Ensure there is an instruction for the op-tag being parsed. """
2149 if not self.aoCurInstrs:
2150 self.addInstruction(self.iCommentLine + iTagLine);
2151 for oInstr in self.aoCurInstrs:
2152 oInstr.cOpTags += 1;
2153 if oInstr.cOpTags == 1:
2154 self.cTotalTagged += 1;
2155 return self.aoCurInstrs[-1];
2156
2157 @staticmethod
2158 def flattenSections(aasSections):
2159 """
2160 Flattens multiline sections into stripped single strings.
2161 Returns list of strings, on section per string.
2162 """
2163 asRet = [];
2164 for asLines in aasSections:
2165 if asLines:
2166 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
2167 return asRet;
2168
2169 @staticmethod
2170 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
2171 """
2172 Flattens sections into a simple stripped string with newlines as
2173 section breaks. The final section does not sport a trailing newline.
2174 """
2175 # Typical: One section with a single line.
2176 if len(aasSections) == 1 and len(aasSections[0]) == 1:
2177 return aasSections[0][0].strip();
2178
2179 sRet = '';
2180 for iSection, asLines in enumerate(aasSections):
2181 if asLines:
2182 if iSection > 0:
2183 sRet += sSectionSep;
2184 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
2185 return sRet;
2186
2187
2188
2189 ## @name Tag parsers
2190 ## @{
2191
2192 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
2193 """
2194 Tag: \@opbrief
2195 Value: Text description, multiple sections, appended.
2196
2197 Brief description. If not given, it's the first sentence from @opdesc.
2198 """
2199 oInstr = self.ensureInstructionForOpTag(iTagLine);
2200
2201 # Flatten and validate the value.
2202 sBrief = self.flattenAllSections(aasSections);
2203 if not sBrief:
2204 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2205 if sBrief[-1] != '.':
2206 sBrief = sBrief + '.';
2207 if len(sBrief) > 180:
2208 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
2209 offDot = sBrief.find('.');
2210 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
2211 offDot = sBrief.find('.', offDot + 1);
2212 if offDot >= 0 and offDot != len(sBrief) - 1:
2213 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
2214
2215 # Update the instruction.
2216 if oInstr.sBrief is not None:
2217 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
2218 % (sTag, oInstr.sBrief, sBrief,));
2219 _ = iEndLine;
2220 return True;
2221
2222 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
2223 """
2224 Tag: \@opdesc
2225 Value: Text description, multiple sections, appended.
2226
2227 It is used to describe instructions.
2228 """
2229 oInstr = self.ensureInstructionForOpTag(iTagLine);
2230 if aasSections:
2231 oInstr.asDescSections.extend(self.flattenSections(aasSections));
2232 return True;
2233
2234 _ = sTag; _ = iEndLine;
2235 return True;
2236
2237 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
2238 """
2239 Tag: @opmenmonic
2240 Value: mnemonic
2241
2242 The 'mnemonic' value must be a valid C identifier string. Because of
2243 prefixes, groups and whatnot, there times when the mnemonic isn't that
2244 of an actual assembler mnemonic.
2245 """
2246 oInstr = self.ensureInstructionForOpTag(iTagLine);
2247
2248 # Flatten and validate the value.
2249 sMnemonic = self.flattenAllSections(aasSections);
2250 if not self.oReMnemonic.match(sMnemonic):
2251 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
2252 if oInstr.sMnemonic is not None:
2253 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
2254 % (sTag, oInstr.sMnemonic, sMnemonic,));
2255 oInstr.sMnemonic = sMnemonic
2256
2257 _ = iEndLine;
2258 return True;
2259
2260 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2261 """
2262 Tags: \@op1, \@op2, \@op3, \@op4
2263 Value: [where:]type
2264
2265 The 'where' value indicates where the operand is found, like the 'reg'
2266 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2267 a list.
2268
2269 The 'type' value indicates the operand type. These follow the types
2270 given in the opcode tables in the CPU reference manuals.
2271 See Instruction.kdOperandTypes for a list.
2272
2273 """
2274 oInstr = self.ensureInstructionForOpTag(iTagLine);
2275 idxOp = int(sTag[-1]) - 1;
2276 assert 0 <= idxOp < 4;
2277
2278 # flatten, split up, and validate the "where:type" value.
2279 sFlattened = self.flattenAllSections(aasSections);
2280 asSplit = sFlattened.split(':');
2281 if len(asSplit) == 1:
2282 sType = asSplit[0];
2283 sWhere = None;
2284 elif len(asSplit) == 2:
2285 (sWhere, sType) = asSplit;
2286 else:
2287 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2288
2289 if sType not in g_kdOpTypes:
2290 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2291 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2292 if sWhere is None:
2293 sWhere = g_kdOpTypes[sType][1];
2294 elif sWhere not in g_kdOpLocations:
2295 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2296 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2297
2298 # Insert the operand, refusing to overwrite an existing one.
2299 while idxOp >= len(oInstr.aoOperands):
2300 oInstr.aoOperands.append(None);
2301 if oInstr.aoOperands[idxOp] is not None:
2302 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2303 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2304 sWhere, sType,));
2305 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2306
2307 _ = iEndLine;
2308 return True;
2309
2310 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2311 """
2312 Tag: \@opmaps
2313 Value: map[,map2]
2314
2315 Indicates which maps the instruction is in. There is a default map
2316 associated with each input file.
2317 """
2318 oInstr = self.ensureInstructionForOpTag(iTagLine);
2319
2320 # Flatten, split up and validate the value.
2321 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2322 asMaps = sFlattened.split(',');
2323 if not asMaps:
2324 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2325 for sMap in asMaps:
2326 if sMap not in g_dInstructionMaps:
2327 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2328 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2329
2330 # Add the maps to the current list. Throw errors on duplicates.
2331 for oMap in oInstr.aoMaps:
2332 if oMap.sName in asMaps:
2333 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2334
2335 for sMap in asMaps:
2336 oMap = g_dInstructionMaps[sMap];
2337 if oMap not in oInstr.aoMaps:
2338 oInstr.aoMaps.append(oMap);
2339 else:
2340 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2341
2342 _ = iEndLine;
2343 return True;
2344
2345 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2346 """
2347 Tag: \@oppfx
2348 Value: n/a|none|0x66|0xf3|0xf2
2349
2350 Required prefix for the instruction. (In a (E)VEX context this is the
2351 value of the 'pp' field rather than an actual prefix.)
2352 """
2353 oInstr = self.ensureInstructionForOpTag(iTagLine);
2354
2355 # Flatten and validate the value.
2356 sFlattened = self.flattenAllSections(aasSections);
2357 asPrefixes = sFlattened.split();
2358 if len(asPrefixes) > 1:
2359 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2360
2361 sPrefix = asPrefixes[0].lower();
2362 if sPrefix == 'none':
2363 sPrefix = 'none';
2364 elif sPrefix == 'n/a':
2365 sPrefix = None;
2366 else:
2367 if len(sPrefix) == 2:
2368 sPrefix = '0x' + sPrefix;
2369 if not _isValidOpcodeByte(sPrefix):
2370 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2371
2372 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2373 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2374
2375 # Set it.
2376 if oInstr.sPrefix is not None:
2377 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2378 oInstr.sPrefix = sPrefix;
2379
2380 _ = iEndLine;
2381 return True;
2382
2383 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2384 """
2385 Tag: \@opcode
2386 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2387
2388 The opcode byte or sub-byte for the instruction in the context of a map.
2389 """
2390 oInstr = self.ensureInstructionForOpTag(iTagLine);
2391
2392 # Flatten and validate the value.
2393 sOpcode = self.flattenAllSections(aasSections);
2394 if _isValidOpcodeByte(sOpcode):
2395 pass;
2396 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2397 pass;
2398 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2399 pass;
2400 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2401 pass;
2402 else:
2403 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2404
2405 # Set it.
2406 if oInstr.sOpcode is not None:
2407 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2408 oInstr.sOpcode = sOpcode;
2409
2410 _ = iEndLine;
2411 return True;
2412
2413 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2414 """
2415 Tag: \@opcodesub
2416 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
2417 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
2418
2419 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2420 represents exactly two different instructions. The more proper way would
2421 be to go via maps with two members, but this is faster.
2422 """
2423 oInstr = self.ensureInstructionForOpTag(iTagLine);
2424
2425 # Flatten and validate the value.
2426 sSubOpcode = self.flattenAllSections(aasSections);
2427 if sSubOpcode not in g_kdSubOpcodes:
2428 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2429 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2430
2431 # Set it.
2432 if oInstr.sSubOpcode is not None:
2433 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2434 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2435 oInstr.sSubOpcode = sSubOpcode;
2436
2437 _ = iEndLine;
2438 return True;
2439
2440 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2441 """
2442 Tag: \@openc
2443 Value: ModR/M|fixed|prefix|<map name>
2444
2445 The instruction operand encoding style.
2446 """
2447 oInstr = self.ensureInstructionForOpTag(iTagLine);
2448
2449 # Flatten and validate the value.
2450 sEncoding = self.flattenAllSections(aasSections);
2451 if sEncoding in g_kdEncodings:
2452 pass;
2453 elif sEncoding in g_dInstructionMaps:
2454 pass;
2455 elif not _isValidOpcodeByte(sEncoding):
2456 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2457
2458 # Set it.
2459 if oInstr.sEncoding is not None:
2460 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2461 % ( sTag, oInstr.sEncoding, sEncoding,));
2462 oInstr.sEncoding = sEncoding;
2463
2464 _ = iEndLine;
2465 return True;
2466
2467 ## EFlags tag to Instruction attribute name.
2468 kdOpFlagToAttr = {
2469 '@opfltest': 'asFlTest',
2470 '@opflmodify': 'asFlModify',
2471 '@opflundef': 'asFlUndefined',
2472 '@opflset': 'asFlSet',
2473 '@opflclear': 'asFlClear',
2474 };
2475
2476 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2477 """
2478 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2479 Value: <eflags specifier>
2480
2481 """
2482 oInstr = self.ensureInstructionForOpTag(iTagLine);
2483
2484 # Flatten, split up and validate the values.
2485 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2486 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2487 asFlags = [];
2488 else:
2489 fRc = True;
2490 for iFlag, sFlag in enumerate(asFlags):
2491 if sFlag not in g_kdEFlagsMnemonics:
2492 if sFlag.strip() in g_kdEFlagsMnemonics:
2493 asFlags[iFlag] = sFlag.strip();
2494 else:
2495 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2496 if not fRc:
2497 return False;
2498
2499 # Set them.
2500 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2501 if asOld is not None:
2502 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2503 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2504
2505 _ = iEndLine;
2506 return True;
2507
2508 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2509 """
2510 Tag: \@ophints
2511 Value: Comma or space separated list of flags and hints.
2512
2513 This covers the disassembler flags table and more.
2514 """
2515 oInstr = self.ensureInstructionForOpTag(iTagLine);
2516
2517 # Flatten as a space separated list, split it up and validate the values.
2518 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2519 if len(asHints) == 1 and asHints[0].lower() == 'none':
2520 asHints = [];
2521 else:
2522 fRc = True;
2523 for iHint, sHint in enumerate(asHints):
2524 if sHint not in g_kdHints:
2525 if sHint.strip() in g_kdHints:
2526 sHint[iHint] = sHint.strip();
2527 else:
2528 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2529 if not fRc:
2530 return False;
2531
2532 # Append them.
2533 for sHint in asHints:
2534 if sHint not in oInstr.dHints:
2535 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2536 else:
2537 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2538
2539 _ = iEndLine;
2540 return True;
2541
2542 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2543 """
2544 Tag: \@opdisenum
2545 Value: OP_XXXX
2546
2547 This is for select a specific (legacy) disassembler enum value for the
2548 instruction.
2549 """
2550 oInstr = self.ensureInstructionForOpTag(iTagLine);
2551
2552 # Flatten and split.
2553 asWords = self.flattenAllSections(aasSections).split();
2554 if len(asWords) != 1:
2555 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2556 if not asWords:
2557 return False;
2558 sDisEnum = asWords[0];
2559 if not self.oReDisEnum.match(sDisEnum):
2560 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2561 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2562
2563 # Set it.
2564 if oInstr.sDisEnum is not None:
2565 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2566 oInstr.sDisEnum = sDisEnum;
2567
2568 _ = iEndLine;
2569 return True;
2570
2571 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2572 """
2573 Tag: \@opmincpu
2574 Value: <simple CPU name>
2575
2576 Indicates when this instruction was introduced.
2577 """
2578 oInstr = self.ensureInstructionForOpTag(iTagLine);
2579
2580 # Flatten the value, split into words, make sure there's just one, valid it.
2581 asCpus = self.flattenAllSections(aasSections).split();
2582 if len(asCpus) > 1:
2583 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2584
2585 sMinCpu = asCpus[0];
2586 if sMinCpu in g_kdCpuNames:
2587 oInstr.sMinCpu = sMinCpu;
2588 else:
2589 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2590 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2591
2592 # Set it.
2593 if oInstr.sMinCpu is None:
2594 oInstr.sMinCpu = sMinCpu;
2595 elif oInstr.sMinCpu != sMinCpu:
2596 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2597
2598 _ = iEndLine;
2599 return True;
2600
2601 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2602 """
2603 Tag: \@opcpuid
2604 Value: none | <CPUID flag specifier>
2605
2606 CPUID feature bit which is required for the instruction to be present.
2607 """
2608 oInstr = self.ensureInstructionForOpTag(iTagLine);
2609
2610 # Flatten as a space separated list, split it up and validate the values.
2611 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2612 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2613 asCpuIds = [];
2614 else:
2615 fRc = True;
2616 for iCpuId, sCpuId in enumerate(asCpuIds):
2617 if sCpuId not in g_kdCpuIdFlags:
2618 if sCpuId.strip() in g_kdCpuIdFlags:
2619 sCpuId[iCpuId] = sCpuId.strip();
2620 else:
2621 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2622 if not fRc:
2623 return False;
2624
2625 # Append them.
2626 for sCpuId in asCpuIds:
2627 if sCpuId not in oInstr.asCpuIds:
2628 oInstr.asCpuIds.append(sCpuId);
2629 else:
2630 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2631
2632 _ = iEndLine;
2633 return True;
2634
2635 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2636 """
2637 Tag: \@opgroup
2638 Value: op_grp1[_subgrp2[_subsubgrp3]]
2639
2640 Instruction grouping.
2641 """
2642 oInstr = self.ensureInstructionForOpTag(iTagLine);
2643
2644 # Flatten as a space separated list, split it up and validate the values.
2645 asGroups = self.flattenAllSections(aasSections).split();
2646 if len(asGroups) != 1:
2647 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2648 sGroup = asGroups[0];
2649 if not self.oReGroupName.match(sGroup):
2650 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2651 % (sTag, sGroup, self.oReGroupName.pattern));
2652
2653 # Set it.
2654 if oInstr.sGroup is not None:
2655 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2656 oInstr.sGroup = sGroup;
2657
2658 _ = iEndLine;
2659 return True;
2660
2661 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2662 """
2663 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2664 Value: <invalid opcode behaviour style>
2665
2666 The \@opunused indicates the specification is for a currently unused
2667 instruction encoding.
2668
2669 The \@opinvalid indicates the specification is for an invalid currently
2670 instruction encoding (like UD2).
2671
2672 The \@opinvlstyle just indicates how CPUs decode the instruction when
2673 not supported (\@opcpuid, \@opmincpu) or disabled.
2674 """
2675 oInstr = self.ensureInstructionForOpTag(iTagLine);
2676
2677 # Flatten as a space separated list, split it up and validate the values.
2678 asStyles = self.flattenAllSections(aasSections).split();
2679 if len(asStyles) != 1:
2680 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2681 sStyle = asStyles[0];
2682 if sStyle not in g_kdInvalidStyles:
2683 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2684 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2685 # Set it.
2686 if oInstr.sInvalidStyle is not None:
2687 return self.errorComment(iTagLine,
2688 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2689 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2690 oInstr.sInvalidStyle = sStyle;
2691 if sTag == '@opunused':
2692 oInstr.fUnused = True;
2693 elif sTag == '@opinvalid':
2694 oInstr.fInvalid = True;
2695
2696 _ = iEndLine;
2697 return True;
2698
2699 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2700 """
2701 Tag: \@optest
2702 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2703 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2704
2705 The main idea here is to generate basic instruction tests.
2706
2707 The probably simplest way of handling the diverse input, would be to use
2708 it to produce size optimized byte code for a simple interpreter that
2709 modifies the register input and output states.
2710
2711 An alternative to the interpreter would be creating multiple tables,
2712 but that becomes rather complicated wrt what goes where and then to use
2713 them in an efficient manner.
2714 """
2715 oInstr = self.ensureInstructionForOpTag(iTagLine);
2716
2717 #
2718 # Do it section by section.
2719 #
2720 for asSectionLines in aasSections:
2721 #
2722 # Sort the input into outputs, inputs and selector conditions.
2723 #
2724 sFlatSection = self.flattenAllSections([asSectionLines,]);
2725 if not sFlatSection:
2726 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2727 continue;
2728 oTest = InstructionTest(oInstr);
2729
2730 asSelectors = [];
2731 asInputs = [];
2732 asOutputs = [];
2733 asCur = asOutputs;
2734 fRc = True;
2735 asWords = sFlatSection.split();
2736 for iWord in range(len(asWords) - 1, -1, -1):
2737 sWord = asWords[iWord];
2738 # Check for array switchers.
2739 if sWord == '->':
2740 if asCur != asOutputs:
2741 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2742 break;
2743 asCur = asInputs;
2744 elif sWord == '/':
2745 if asCur != asInputs:
2746 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2747 break;
2748 asCur = asSelectors;
2749 else:
2750 asCur.insert(0, sWord);
2751
2752 #
2753 # Validate and add selectors.
2754 #
2755 for sCond in asSelectors:
2756 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2757 oSelector = None;
2758 for sOp in TestSelector.kasCompareOps:
2759 off = sCondExp.find(sOp);
2760 if off >= 0:
2761 sVariable = sCondExp[:off];
2762 sValue = sCondExp[off + len(sOp):];
2763 if sVariable in TestSelector.kdVariables:
2764 if sValue in TestSelector.kdVariables[sVariable]:
2765 oSelector = TestSelector(sVariable, sOp, sValue);
2766 else:
2767 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2768 % ( sTag, sValue, sCond,
2769 TestSelector.kdVariables[sVariable].keys(),));
2770 else:
2771 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2772 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2773 break;
2774 if oSelector is not None:
2775 for oExisting in oTest.aoSelectors:
2776 if oExisting.sVariable == oSelector.sVariable:
2777 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2778 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2779 oTest.aoSelectors.append(oSelector);
2780 else:
2781 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2782
2783 #
2784 # Validate outputs and inputs, adding them to the test as we go along.
2785 #
2786 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2787 asValidFieldKinds = [ 'both', sDesc, ];
2788 for sItem in asItems:
2789 oItem = None;
2790 for sOp in TestInOut.kasOperators:
2791 off = sItem.find(sOp);
2792 if off < 0:
2793 continue;
2794 sField = sItem[:off];
2795 sValueType = sItem[off + len(sOp):];
2796 if sField in TestInOut.kdFields \
2797 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2798 asSplit = sValueType.split(':', 1);
2799 sValue = asSplit[0];
2800 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2801 if sType in TestInOut.kdTypes:
2802 oValid = TestInOut.kdTypes[sType].validate(sValue);
2803 if oValid is True:
2804 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2805 oItem = TestInOut(sField, sOp, sValue, sType);
2806 else:
2807 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2808 % ( sTag, sDesc, sItem, ));
2809 else:
2810 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2811 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2812 else:
2813 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2814 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2815 else:
2816 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2817 % ( sTag, sDesc, sField, sItem,
2818 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
2819 if asVal[1] in asValidFieldKinds]),));
2820 break;
2821 if oItem is not None:
2822 for oExisting in aoDst:
2823 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2824 self.errorComment(iTagLine,
2825 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2826 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2827 aoDst.append(oItem);
2828 else:
2829 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2830
2831 #
2832 # .
2833 #
2834 if fRc:
2835 oInstr.aoTests.append(oTest);
2836 else:
2837 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2838 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2839 % (sTag, asSelectors, asInputs, asOutputs,));
2840
2841 _ = iEndLine;
2842 return True;
2843
2844 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2845 """
2846 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2847 """
2848 oInstr = self.ensureInstructionForOpTag(iTagLine);
2849
2850 iTest = 0;
2851 if sTag[-1] == ']':
2852 iTest = int(sTag[8:-1]);
2853 else:
2854 iTest = int(sTag[7:]);
2855
2856 if iTest != len(oInstr.aoTests):
2857 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2858 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2859
2860 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2861 """
2862 Tag: \@optestign | \@optestignore
2863 Value: <value is ignored>
2864
2865 This is a simple trick to ignore a test while debugging another.
2866
2867 See also \@oponlytest.
2868 """
2869 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2870 return True;
2871
2872 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2873 """
2874 Tag: \@opcopytests
2875 Value: <opstat | function> [..]
2876 Example: \@opcopytests add_Eb_Gb
2877
2878 Trick to avoid duplicating tests for different encodings of the same
2879 operation.
2880 """
2881 oInstr = self.ensureInstructionForOpTag(iTagLine);
2882
2883 # Flatten, validate and append the copy job to the instruction. We execute
2884 # them after parsing all the input so we can handle forward references.
2885 asToCopy = self.flattenAllSections(aasSections).split();
2886 if not asToCopy:
2887 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2888 for sToCopy in asToCopy:
2889 if sToCopy not in oInstr.asCopyTests:
2890 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2891 oInstr.asCopyTests.append(sToCopy);
2892 else:
2893 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2894 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2895 else:
2896 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2897
2898 _ = iEndLine;
2899 return True;
2900
2901 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2902 """
2903 Tag: \@oponlytest | \@oponly
2904 Value: none
2905
2906 Only test instructions with this tag. This is a trick that is handy
2907 for singling out one or two new instructions or tests.
2908
2909 See also \@optestignore.
2910 """
2911 oInstr = self.ensureInstructionForOpTag(iTagLine);
2912
2913 # Validate and add instruction to only test dictionary.
2914 sValue = self.flattenAllSections(aasSections).strip();
2915 if sValue:
2916 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2917
2918 if oInstr not in g_aoOnlyTestInstructions:
2919 g_aoOnlyTestInstructions.append(oInstr);
2920
2921 _ = iEndLine;
2922 return True;
2923
2924 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2925 """
2926 Tag: \@opxcpttype
2927 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2928
2929 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2930 """
2931 oInstr = self.ensureInstructionForOpTag(iTagLine);
2932
2933 # Flatten as a space separated list, split it up and validate the values.
2934 asTypes = self.flattenAllSections(aasSections).split();
2935 if len(asTypes) != 1:
2936 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2937 sType = asTypes[0];
2938 if sType not in g_kdXcptTypes:
2939 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2940 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2941 # Set it.
2942 if oInstr.sXcptType is not None:
2943 return self.errorComment(iTagLine,
2944 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2945 % ( sTag, oInstr.sXcptType, sType,));
2946 oInstr.sXcptType = sType;
2947
2948 _ = iEndLine;
2949 return True;
2950
2951 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2952 """
2953 Tag: \@opfunction
2954 Value: <VMM function name>
2955
2956 This is for explicitly setting the IEM function name. Normally we pick
2957 this up from the FNIEMOP_XXX macro invocation after the description, or
2958 generate it from the mnemonic and operands.
2959
2960 It it thought it maybe necessary to set it when specifying instructions
2961 which implementation isn't following immediately or aren't implemented yet.
2962 """
2963 oInstr = self.ensureInstructionForOpTag(iTagLine);
2964
2965 # Flatten and validate the value.
2966 sFunction = self.flattenAllSections(aasSections);
2967 if not self.oReFunctionName.match(sFunction):
2968 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2969 % (sTag, sFunction, self.oReFunctionName.pattern));
2970
2971 if oInstr.sFunction is not None:
2972 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2973 % (sTag, oInstr.sFunction, sFunction,));
2974 oInstr.sFunction = sFunction;
2975
2976 _ = iEndLine;
2977 return True;
2978
2979 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2980 """
2981 Tag: \@opstats
2982 Value: <VMM statistics base name>
2983
2984 This is for explicitly setting the statistics name. Normally we pick
2985 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2986 the mnemonic and operands.
2987
2988 It it thought it maybe necessary to set it when specifying instructions
2989 which implementation isn't following immediately or aren't implemented yet.
2990 """
2991 oInstr = self.ensureInstructionForOpTag(iTagLine);
2992
2993 # Flatten and validate the value.
2994 sStats = self.flattenAllSections(aasSections);
2995 if not self.oReStatsName.match(sStats):
2996 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
2997 % (sTag, sStats, self.oReStatsName.pattern));
2998
2999 if oInstr.sStats is not None:
3000 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
3001 % (sTag, oInstr.sStats, sStats,));
3002 oInstr.sStats = sStats;
3003
3004 _ = iEndLine;
3005 return True;
3006
3007 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
3008 """
3009 Tag: \@opdone
3010 Value: none
3011
3012 Used to explictily flush the instructions that have been specified.
3013 """
3014 sFlattened = self.flattenAllSections(aasSections);
3015 if sFlattened != '':
3016 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
3017 _ = sTag; _ = iEndLine;
3018 return self.doneInstructions();
3019
3020 ## @}
3021
3022
3023 def parseComment(self):
3024 """
3025 Parse the current comment (self.sComment).
3026
3027 If it's a opcode specifiying comment, we reset the macro stuff.
3028 """
3029 #
3030 # Reject if comment doesn't seem to contain anything interesting.
3031 #
3032 if self.sComment.find('Opcode') < 0 \
3033 and self.sComment.find('@') < 0:
3034 return False;
3035
3036 #
3037 # Split the comment into lines, removing leading asterisks and spaces.
3038 # Also remove leading and trailing empty lines.
3039 #
3040 asLines = self.sComment.split('\n');
3041 for iLine, sLine in enumerate(asLines):
3042 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
3043
3044 while asLines and not asLines[0]:
3045 self.iCommentLine += 1;
3046 asLines.pop(0);
3047
3048 while asLines and not asLines[-1]:
3049 asLines.pop(len(asLines) - 1);
3050
3051 #
3052 # Check for old style: Opcode 0x0f 0x12
3053 #
3054 if asLines[0].startswith('Opcode '):
3055 self.parseCommentOldOpcode(asLines);
3056
3057 #
3058 # Look for @op* tagged data.
3059 #
3060 cOpTags = 0;
3061 sFlatDefault = None;
3062 sCurTag = '@default';
3063 iCurTagLine = 0;
3064 asCurSection = [];
3065 aasSections = [ asCurSection, ];
3066 for iLine, sLine in enumerate(asLines):
3067 if not sLine.startswith('@'):
3068 if sLine:
3069 asCurSection.append(sLine);
3070 elif asCurSection:
3071 asCurSection = [];
3072 aasSections.append(asCurSection);
3073 else:
3074 #
3075 # Process the previous tag.
3076 #
3077 if not asCurSection and len(aasSections) > 1:
3078 aasSections.pop(-1);
3079 if sCurTag in self.dTagHandlers:
3080 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3081 cOpTags += 1;
3082 elif sCurTag.startswith('@op'):
3083 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3084 elif sCurTag == '@default':
3085 sFlatDefault = self.flattenAllSections(aasSections);
3086 elif '@op' + sCurTag[1:] in self.dTagHandlers:
3087 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
3088 elif sCurTag in ['@encoding', '@opencoding']:
3089 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
3090
3091 #
3092 # New tag.
3093 #
3094 asSplit = sLine.split(None, 1);
3095 sCurTag = asSplit[0].lower();
3096 if len(asSplit) > 1:
3097 asCurSection = [asSplit[1],];
3098 else:
3099 asCurSection = [];
3100 aasSections = [asCurSection, ];
3101 iCurTagLine = iLine;
3102
3103 #
3104 # Process the final tag.
3105 #
3106 if not asCurSection and len(aasSections) > 1:
3107 aasSections.pop(-1);
3108 if sCurTag in self.dTagHandlers:
3109 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3110 cOpTags += 1;
3111 elif sCurTag.startswith('@op'):
3112 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3113 elif sCurTag == '@default':
3114 sFlatDefault = self.flattenAllSections(aasSections);
3115
3116 #
3117 # Don't allow default text in blocks containing @op*.
3118 #
3119 if cOpTags > 0 and sFlatDefault:
3120 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
3121
3122 return True;
3123
3124 def parseMacroInvocation(self, sInvocation):
3125 """
3126 Parses a macro invocation.
3127
3128 Returns a tuple, first element is the offset following the macro
3129 invocation. The second element is a list of macro arguments, where the
3130 zero'th is the macro name.
3131 """
3132 # First the name.
3133 offOpen = sInvocation.find('(');
3134 if offOpen <= 0:
3135 self.raiseError("macro invocation open parenthesis not found");
3136 sName = sInvocation[:offOpen].strip();
3137 if not self.oReMacroName.match(sName):
3138 return self.error("invalid macro name '%s'" % (sName,));
3139 asRet = [sName, ];
3140
3141 # Arguments.
3142 iLine = self.iLine;
3143 cDepth = 1;
3144 off = offOpen + 1;
3145 offStart = off;
3146 chQuote = None;
3147 while cDepth > 0:
3148 if off >= len(sInvocation):
3149 if iLine >= len(self.asLines):
3150 self.error('macro invocation beyond end of file');
3151 return (off, asRet);
3152 sInvocation += self.asLines[iLine];
3153 iLine += 1;
3154 ch = sInvocation[off];
3155
3156 if chQuote:
3157 if ch == '\\' and off + 1 < len(sInvocation):
3158 off += 1;
3159 elif ch == chQuote:
3160 chQuote = None;
3161 elif ch in ('"', '\'',):
3162 chQuote = ch;
3163 elif ch in (',', ')',):
3164 if cDepth == 1:
3165 asRet.append(sInvocation[offStart:off].strip());
3166 offStart = off + 1;
3167 if ch == ')':
3168 cDepth -= 1;
3169 elif ch == '(':
3170 cDepth += 1;
3171 off += 1;
3172
3173 return (off, asRet);
3174
3175 def findAndParseMacroInvocationEx(self, sCode, sMacro):
3176 """
3177 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
3178 """
3179 offHit = sCode.find(sMacro);
3180 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
3181 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
3182 return (offHit + offAfter, asRet);
3183 return (len(sCode), None);
3184
3185 def findAndParseMacroInvocation(self, sCode, sMacro):
3186 """
3187 Returns None if not found, arguments as per parseMacroInvocation if found.
3188 """
3189 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
3190
3191 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
3192 """
3193 Returns same as findAndParseMacroInvocation.
3194 """
3195 for sMacro in asMacro:
3196 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
3197 if asRet is not None:
3198 return asRet;
3199 return None;
3200
3201 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
3202 sDisHints, sIemHints, asOperands):
3203 """
3204 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
3205 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
3206 """
3207 #
3208 # Some invocation checks.
3209 #
3210 if sUpper != sUpper.upper():
3211 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
3212 if sLower != sLower.lower():
3213 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
3214 if sUpper.lower() != sLower:
3215 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
3216 if not self.oReMnemonic.match(sLower):
3217 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
3218
3219 #
3220 # Check if sIemHints tells us to not consider this macro invocation.
3221 #
3222 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
3223 return True;
3224
3225 # Apply to the last instruction only for now.
3226 if not self.aoCurInstrs:
3227 self.addInstruction();
3228 oInstr = self.aoCurInstrs[-1];
3229 if oInstr.iLineMnemonicMacro == -1:
3230 oInstr.iLineMnemonicMacro = self.iLine;
3231 else:
3232 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
3233 % (sMacro, oInstr.iLineMnemonicMacro,));
3234
3235 # Mnemonic
3236 if oInstr.sMnemonic is None:
3237 oInstr.sMnemonic = sLower;
3238 elif oInstr.sMnemonic != sLower:
3239 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
3240
3241 # Process operands.
3242 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
3243 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
3244 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
3245 for iOperand, sType in enumerate(asOperands):
3246 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
3247 if sWhere is None:
3248 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
3249 if iOperand < len(oInstr.aoOperands): # error recovery.
3250 sWhere = oInstr.aoOperands[iOperand].sWhere;
3251 sType = oInstr.aoOperands[iOperand].sType;
3252 else:
3253 sWhere = 'reg';
3254 sType = 'Gb';
3255 if iOperand == len(oInstr.aoOperands):
3256 oInstr.aoOperands.append(Operand(sWhere, sType))
3257 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
3258 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
3259 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
3260 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
3261
3262 # Encoding.
3263 if sForm not in g_kdIemForms:
3264 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
3265 else:
3266 if oInstr.sEncoding is None:
3267 oInstr.sEncoding = g_kdIemForms[sForm][0];
3268 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3269 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3270 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3271
3272 # Check the parameter locations for the encoding.
3273 if g_kdIemForms[sForm][1] is not None:
3274 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3275 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3276 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3277 else:
3278 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3279 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3280 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3281 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3282 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
3283 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
3284 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
3285 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
3286 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
3287 or sForm.replace('VEX','').find('V') < 0) ):
3288 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
3289 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
3290
3291 # Check @opcodesub
3292 if oInstr.sSubOpcode \
3293 and g_kdIemForms[sForm][2] \
3294 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
3295 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
3296 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
3297
3298 # Stats.
3299 if not self.oReStatsName.match(sStats):
3300 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3301 elif oInstr.sStats is None:
3302 oInstr.sStats = sStats;
3303 elif oInstr.sStats != sStats:
3304 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3305 % (sMacro, oInstr.sStats, sStats,));
3306
3307 # Process the hints (simply merge with @ophints w/o checking anything).
3308 for sHint in sDisHints.split('|'):
3309 sHint = sHint.strip();
3310 if sHint.startswith('DISOPTYPE_'):
3311 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3312 if sShortHint in g_kdHints:
3313 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3314 else:
3315 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3316 elif sHint != '0':
3317 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3318
3319 for sHint in sIemHints.split('|'):
3320 sHint = sHint.strip();
3321 if sHint.startswith('IEMOPHINT_'):
3322 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3323 if sShortHint in g_kdHints:
3324 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3325 else:
3326 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3327 elif sHint != '0':
3328 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3329
3330 _ = sAsm;
3331 return True;
3332
3333 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3334 """
3335 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3336 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3337 """
3338 if not asOperands:
3339 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3340 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3341 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3342
3343 def checkCodeForMacro(self, sCode):
3344 """
3345 Checks code for relevant macro invocation.
3346 """
3347 #
3348 # Scan macro invocations.
3349 #
3350 if sCode.find('(') > 0:
3351 # Look for instruction decoder function definitions. ASSUME single line.
3352 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3353 [ 'FNIEMOP_DEF',
3354 'FNIEMOP_STUB',
3355 'FNIEMOP_STUB_1',
3356 'FNIEMOP_UD_STUB',
3357 'FNIEMOP_UD_STUB_1' ]);
3358 if asArgs is not None:
3359 sFunction = asArgs[1];
3360
3361 if not self.aoCurInstrs:
3362 self.addInstruction();
3363 for oInstr in self.aoCurInstrs:
3364 if oInstr.iLineFnIemOpMacro == -1:
3365 oInstr.iLineFnIemOpMacro = self.iLine;
3366 else:
3367 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3368 self.setInstrunctionAttrib('sFunction', sFunction);
3369 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3370 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3371 if asArgs[0].find('STUB') > 0:
3372 self.doneInstructions();
3373 return True;
3374
3375 # IEMOP_HLP_DONE_VEX_DECODING_*
3376 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3377 [ 'IEMOP_HLP_DONE_VEX_DECODING',
3378 'IEMOP_HLP_DONE_VEX_DECODING_L0',
3379 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
3380 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
3381 ]);
3382 if asArgs is not None:
3383 sMacro = asArgs[0];
3384 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
3385 for oInstr in self.aoCurInstrs:
3386 if 'vex_l_zero' not in oInstr.dHints:
3387 if oInstr.iLineMnemonicMacro >= 0:
3388 self.errorOnLine(oInstr.iLineMnemonicMacro,
3389 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
3390 oInstr.dHints['vex_l_zero'] = True;
3391 return True;
3392
3393 #
3394 # IEMOP_MNEMONIC*
3395 #
3396
3397 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3398 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3399 if asArgs is not None:
3400 if len(self.aoCurInstrs) == 1:
3401 oInstr = self.aoCurInstrs[0];
3402 if oInstr.sStats is None:
3403 oInstr.sStats = asArgs[1];
3404 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3405
3406 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3407 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3408 if asArgs is not None:
3409 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3410 []);
3411 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3412 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3413 if asArgs is not None:
3414 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3415 [asArgs[6],]);
3416 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3417 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3418 if asArgs is not None:
3419 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3420 [asArgs[6], asArgs[7]]);
3421 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3422 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3423 if asArgs is not None:
3424 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3425 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3426 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3427 # a_fIemHints)
3428 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3429 if asArgs is not None:
3430 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3431 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3432
3433 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3434 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3435 if asArgs is not None:
3436 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3437 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3438 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3439 if asArgs is not None:
3440 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3441 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3442 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3443 if asArgs is not None:
3444 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3445 [asArgs[4], asArgs[5],]);
3446 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3447 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3448 if asArgs is not None:
3449 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3450 [asArgs[4], asArgs[5], asArgs[6],]);
3451 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3452 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3453 if asArgs is not None:
3454 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3455 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3456
3457 return False;
3458
3459
3460 def parse(self):
3461 """
3462 Parses the given file.
3463 Returns number or errors.
3464 Raises exception on fatal trouble.
3465 """
3466 #self.debug('Parsing %s' % (self.sSrcFile,));
3467
3468 while self.iLine < len(self.asLines):
3469 sLine = self.asLines[self.iLine];
3470 self.iLine += 1;
3471
3472 # We only look for comments, so only lines with a slash might possibly
3473 # influence the parser state.
3474 offSlash = sLine.find('/');
3475 if offSlash >= 0:
3476 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3477 offLine = 0;
3478 while offLine < len(sLine):
3479 if self.iState == self.kiCode:
3480 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3481 if offHit >= 0:
3482 self.checkCodeForMacro(sLine[offLine:offHit]);
3483 self.sComment = '';
3484 self.iCommentLine = self.iLine;
3485 self.iState = self.kiCommentMulti;
3486 offLine = offHit + 2;
3487 else:
3488 self.checkCodeForMacro(sLine[offLine:]);
3489 offLine = len(sLine);
3490
3491 elif self.iState == self.kiCommentMulti:
3492 offHit = sLine.find('*/', offLine);
3493 if offHit >= 0:
3494 self.sComment += sLine[offLine:offHit];
3495 self.iState = self.kiCode;
3496 offLine = offHit + 2;
3497 self.parseComment();
3498 else:
3499 self.sComment += sLine[offLine:];
3500 offLine = len(sLine);
3501 else:
3502 assert False;
3503 # C++ line comment.
3504 elif offSlash > 0:
3505 self.checkCodeForMacro(sLine[:offSlash]);
3506
3507 # No slash, but append the line if in multi-line comment.
3508 elif self.iState == self.kiCommentMulti:
3509 #self.debug('line %d: multi' % (self.iLine,));
3510 self.sComment += sLine;
3511
3512 # No slash, but check code line for relevant macro.
3513 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3514 #self.debug('line %d: macro' % (self.iLine,));
3515 self.checkCodeForMacro(sLine);
3516
3517 # If the line is a '}' in the first position, complete the instructions.
3518 elif self.iState == self.kiCode and sLine[0] == '}':
3519 #self.debug('line %d: }' % (self.iLine,));
3520 self.doneInstructions();
3521
3522 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
3523 # so we can check/add @oppfx info from it.
3524 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
3525 self.parseFunctionTable(sLine);
3526
3527 self.doneInstructions();
3528 self.debug('%3s stubs out of %3s instructions in %s'
3529 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3530 return self.printErrors();
3531
3532
3533def __parseFileByName(sSrcFile, sDefaultMap):
3534 """
3535 Parses one source file for instruction specfications.
3536 """
3537 #
3538 # Read sSrcFile into a line array.
3539 #
3540 try:
3541 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with
3542 except Exception as oXcpt:
3543 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3544 try:
3545 asLines = oFile.readlines();
3546 except Exception as oXcpt:
3547 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3548 finally:
3549 oFile.close();
3550
3551 #
3552 # Do the parsing.
3553 #
3554 try:
3555 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3556 except ParserException as oXcpt:
3557 print(str(oXcpt));
3558 raise;
3559
3560 return cErrors;
3561
3562
3563def __doTestCopying():
3564 """
3565 Executes the asCopyTests instructions.
3566 """
3567 asErrors = [];
3568 for oDstInstr in g_aoAllInstructions:
3569 if oDstInstr.asCopyTests:
3570 for sSrcInstr in oDstInstr.asCopyTests:
3571 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3572 if oSrcInstr:
3573 aoSrcInstrs = [oSrcInstr,];
3574 else:
3575 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3576 if aoSrcInstrs:
3577 for oSrcInstr in aoSrcInstrs:
3578 if oSrcInstr != oDstInstr:
3579 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3580 else:
3581 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3582 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3583 else:
3584 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3585 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3586
3587 if asErrors:
3588 sys.stderr.write(u''.join(asErrors));
3589 return len(asErrors);
3590
3591
3592def __applyOnlyTest():
3593 """
3594 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3595 all other instructions so that only these get tested.
3596 """
3597 if g_aoOnlyTestInstructions:
3598 for oInstr in g_aoAllInstructions:
3599 if oInstr.aoTests:
3600 if oInstr not in g_aoOnlyTestInstructions:
3601 oInstr.aoTests = [];
3602 return 0;
3603
3604def __parseAll():
3605 """
3606 Parses all the IEMAllInstruction*.cpp.h files.
3607
3608 Raises exception on failure.
3609 """
3610 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3611 cErrors = 0;
3612 for sDefaultMap, sName in [
3613 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3614 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3615 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3616 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3617 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3618 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3619 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3620 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3621 ]:
3622 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3623 cErrors += __doTestCopying();
3624 cErrors += __applyOnlyTest();
3625
3626 if cErrors != 0:
3627 #raise Exception('%d parse errors' % (cErrors,));
3628 sys.exit(1);
3629 return True;
3630
3631
3632
3633__parseAll();
3634
3635
3636#
3637# Generators (may perhaps move later).
3638#
3639def __formatDisassemblerTableEntry(oInstr):
3640 """
3641 """
3642 sMacro = 'OP';
3643 cMaxOperands = 3;
3644 if len(oInstr.aoOperands) > 3:
3645 sMacro = 'OPVEX'
3646 cMaxOperands = 4;
3647 assert len(oInstr.aoOperands) <= cMaxOperands;
3648
3649 #
3650 # Format string.
3651 #
3652 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3653 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3654 sTmp += ' ' if iOperand == 0 else ',';
3655 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3656 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3657 else:
3658 sTmp += g_kdOpTypes[oOperand.sType][2];
3659 sTmp += '",';
3660 asColumns = [ sTmp, ];
3661
3662 #
3663 # Decoders.
3664 #
3665 iStart = len(asColumns);
3666 if oInstr.sEncoding is None:
3667 pass;
3668 elif oInstr.sEncoding == 'ModR/M':
3669 # ASSUME the first operand is using the ModR/M encoding
3670 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3671 asColumns.append('IDX_ParseModRM,');
3672 elif oInstr.sEncoding in [ 'prefix', ]:
3673 for oOperand in oInstr.aoOperands:
3674 asColumns.append('0,');
3675 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
3676 pass;
3677 elif oInstr.sEncoding == 'VEX.ModR/M':
3678 asColumns.append('IDX_ParseModRM,');
3679 elif oInstr.sEncoding == 'vex2':
3680 asColumns.append('IDX_ParseVex2b,')
3681 elif oInstr.sEncoding == 'vex3':
3682 asColumns.append('IDX_ParseVex3b,')
3683 elif oInstr.sEncoding in g_dInstructionMaps:
3684 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3685 else:
3686 ## @todo
3687 #IDX_ParseTwoByteEsc,
3688 #IDX_ParseGrp1,
3689 #IDX_ParseShiftGrp2,
3690 #IDX_ParseGrp3,
3691 #IDX_ParseGrp4,
3692 #IDX_ParseGrp5,
3693 #IDX_Parse3DNow,
3694 #IDX_ParseGrp6,
3695 #IDX_ParseGrp7,
3696 #IDX_ParseGrp8,
3697 #IDX_ParseGrp9,
3698 #IDX_ParseGrp10,
3699 #IDX_ParseGrp12,
3700 #IDX_ParseGrp13,
3701 #IDX_ParseGrp14,
3702 #IDX_ParseGrp15,
3703 #IDX_ParseGrp16,
3704 #IDX_ParseThreeByteEsc4,
3705 #IDX_ParseThreeByteEsc5,
3706 #IDX_ParseModFence,
3707 #IDX_ParseEscFP,
3708 #IDX_ParseNopPause,
3709 #IDX_ParseInvOpModRM,
3710 assert False, str(oInstr);
3711
3712 # Check for immediates and stuff in the remaining operands.
3713 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3714 sIdx = g_kdOpTypes[oOperand.sType][0];
3715 #if sIdx != 'IDX_UseModRM':
3716 asColumns.append(sIdx + ',');
3717 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3718
3719 #
3720 # Opcode and operands.
3721 #
3722 assert oInstr.sDisEnum, str(oInstr);
3723 asColumns.append(oInstr.sDisEnum + ',');
3724 iStart = len(asColumns)
3725 for oOperand in oInstr.aoOperands:
3726 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3727 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3728
3729 #
3730 # Flags.
3731 #
3732 sTmp = '';
3733 for sHint in sorted(oInstr.dHints.keys()):
3734 sDefine = g_kdHints[sHint];
3735 if sDefine.startswith('DISOPTYPE_'):
3736 if sTmp:
3737 sTmp += ' | ' + sDefine;
3738 else:
3739 sTmp += sDefine;
3740 if sTmp:
3741 sTmp += '),';
3742 else:
3743 sTmp += '0),';
3744 asColumns.append(sTmp);
3745
3746 #
3747 # Format the columns into a line.
3748 #
3749 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3750 sLine = '';
3751 for i, s in enumerate(asColumns):
3752 if len(sLine) < aoffColumns[i]:
3753 sLine += ' ' * (aoffColumns[i] - len(sLine));
3754 else:
3755 sLine += ' ';
3756 sLine += s;
3757
3758 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3759 # DISOPTYPE_HARMLESS),
3760 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3761 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3762 return sLine;
3763
3764def __checkIfShortTable(aoTableOrdered, oMap):
3765 """
3766 Returns (iInstr, cInstructions, fShortTable)
3767 """
3768
3769 # Determin how much we can trim off.
3770 cInstructions = len(aoTableOrdered);
3771 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
3772 cInstructions -= 1;
3773
3774 iInstr = 0;
3775 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
3776 iInstr += 1;
3777
3778 # If we can save more than 30%, we go for the short table version.
3779 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
3780 return (iInstr, cInstructions, True);
3781 _ = oMap; # Use this for overriding.
3782
3783 # Output the full table.
3784 return (0, len(aoTableOrdered), False);
3785
3786def generateDisassemblerTables(oDstFile = sys.stdout):
3787 """
3788 Generates disassembler tables.
3789 """
3790
3791 #
3792 # The disassembler uses a slightly different table layout to save space,
3793 # since several of the prefix varia
3794 #
3795 aoDisasmMaps = [];
3796 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3797 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3798 if oMap.sSelector != 'byte+pfx':
3799 aoDisasmMaps.append(oMap);
3800 else:
3801 # Split the map by prefix.
3802 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
3803 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
3804 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
3805 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
3806
3807 #
3808 # Dump each map.
3809 #
3810 asHeaderLines = [];
3811 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),));
3812 for oMap in aoDisasmMaps:
3813 sName = oMap.sName;
3814
3815 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
3816
3817 #
3818 # Get the instructions for the map and see if we can do a short version or not.
3819 #
3820 aoTableOrder = oMap.getInstructionsInTableOrder();
3821 cEntriesPerByte = oMap.getEntriesPerByte();
3822 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
3823
3824 #
3825 # Output the table start.
3826 # Note! Short tables are static and only accessible via the map range record.
3827 #
3828 asLines = [];
3829 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3830 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3831 if fShortTable:
3832 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
3833 else:
3834 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3835 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3836 asLines.append('{');
3837
3838 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
3839 asLines.append(' /* %#04x: */' % (iInstrStart,));
3840
3841 #
3842 # Output the instructions.
3843 #
3844 iInstr = iInstrStart;
3845 while iInstr < iInstrEnd:
3846 oInstr = aoTableOrder[iInstr];
3847 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
3848 if iInstr != iInstrStart:
3849 asLines.append('');
3850 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
3851
3852 if oInstr is None:
3853 # Invalid. Optimize blocks of invalid instructions.
3854 cInvalidInstrs = 1;
3855 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
3856 cInvalidInstrs += 1;
3857 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
3858 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
3859 iInstr += 0x10 * cEntriesPerByte - 1;
3860 elif cEntriesPerByte > 1:
3861 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
3862 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
3863 iInstr += 3;
3864 else:
3865 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
3866 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
3867 else:
3868 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
3869 elif isinstance(oInstr, list):
3870 if len(oInstr) != 0:
3871 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
3872 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
3873 else:
3874 asLines.append(__formatDisassemblerTableEntry(oInstr));
3875 else:
3876 asLines.append(__formatDisassemblerTableEntry(oInstr));
3877
3878 iInstr += 1;
3879
3880 if iInstrStart >= iInstrEnd:
3881 asLines.append(' /* dummy */ INVALID_OPCODE');
3882
3883 asLines.append('};');
3884 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3885
3886 #
3887 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
3888 #
3889 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
3890 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
3891 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
3892
3893 #
3894 # Write out the lines.
3895 #
3896 oDstFile.write('\n'.join(asLines));
3897 oDstFile.write('\n');
3898 oDstFile.write('\n');
3899 #break; #for now
3900
3901if __name__ == '__main__':
3902 generateDisassemblerTables();
3903
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette