VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 95341

Last change on this file since 95341 was 95341, checked in by vboxsync, 3 years ago

VMM/IEM: Implemented the BLSR, BLSMSK and BLSI instructions. bugref:9898

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 166.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 95341 2022-06-22 10:37:37Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13__copyright__ = \
14"""
15Copyright (C) 2017-2022 Oracle Corporation
16
17This file is part of VirtualBox Open Source Edition (OSE), as
18available from http://www.215389.xyz. This file is free software;
19you can redistribute it and/or modify it under the terms of the GNU
20General Public License (GPL) as published by the Free Software
21Foundation, in version 2 as it comes in the "COPYING" file of the
22VirtualBox OSE distribution. VirtualBox OSE is distributed in the
23hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
24
25The contents of this file may alternatively be used under the terms
26of the Common Development and Distribution License Version 1.0
27(CDDL) only, as it comes in the "COPYING.CDDL" file of the
28VirtualBox OSE distribution, in which case the provisions of the
29CDDL are applicable instead of those of the GPL.
30
31You may elect to license modified versions of this file under the
32terms and conditions of either the GPL or the CDDL or both.
33"""
34__version__ = "$Revision: 95341 $"
35
36# pylint: disable=anomalous-backslash-in-string
37
38# Standard python imports.
39import os
40import re
41import sys
42
43## Only the main script needs to modify the path.
44#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
45# 'ValidationKit');
46#sys.path.append(g_ksValidationKitDir);
47#
48#from common import utils; - Windows build boxes doesn't have pywin32.
49
50# Python 3 hacks:
51if sys.version_info[0] >= 3:
52 long = int; # pylint: disable=redefined-builtin,invalid-name
53
54
55g_kdX86EFlagsConstants = {
56 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
57 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
58 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
59 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
60 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
61 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
62 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
63 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
64 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
65 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
66 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
67 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
68 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
69 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
70 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
71 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
72 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
73 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
74 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
75 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
76};
77
78## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
79g_kdEFlagsMnemonics = {
80 # Debugger flag notation (sorted by value):
81 'cf': 'X86_EFL_CF', ##< Carry Flag.
82 'nc': '!X86_EFL_CF', ##< No Carry.
83
84 'po': 'X86_EFL_PF', ##< Parity Pdd.
85 'pe': '!X86_EFL_PF', ##< Parity Even.
86
87 'af': 'X86_EFL_AF', ##< Aux Flag.
88 'na': '!X86_EFL_AF', ##< No Aux.
89
90 'zr': 'X86_EFL_ZF', ##< ZeRo.
91 'nz': '!X86_EFL_ZF', ##< No Zero.
92
93 'ng': 'X86_EFL_SF', ##< NeGative (sign).
94 'pl': '!X86_EFL_SF', ##< PLuss (sign).
95
96 'tf': 'X86_EFL_TF', ##< Trap flag.
97
98 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
99 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
100
101 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
102 'up': '!X86_EFL_DF', ##< UP (string op direction).
103
104 'ov': 'X86_EFL_OF', ##< OVerflow.
105 'nv': '!X86_EFL_OF', ##< No Overflow.
106
107 'nt': 'X86_EFL_NT', ##< Nested Task.
108 'rf': 'X86_EFL_RF', ##< Resume Flag.
109 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
110 'ac': 'X86_EFL_AC', ##< Alignment Check.
111 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
112 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
113
114 # Reference manual notation not covered above (sorted by value):
115 'pf': 'X86_EFL_PF',
116 'zf': 'X86_EFL_ZF',
117 'sf': 'X86_EFL_SF',
118 'if': 'X86_EFL_IF',
119 'df': 'X86_EFL_DF',
120 'of': 'X86_EFL_OF',
121 'iopl': 'X86_EFL_IOPL',
122 'id': 'X86_EFL_ID',
123};
124
125## Constants and values for CR0.
126g_kdX86Cr0Constants = {
127 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
128 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
129 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
130 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
131 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
132 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
133 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
134 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
135 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
136 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
137 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
138};
139
140## Constants and values for CR4.
141g_kdX86Cr4Constants = {
142 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
143 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
144 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
145 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
146 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
147 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
148 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
149 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
150 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
151 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
152 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
153 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
154 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
155 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
156 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
157 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
158 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
159 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
160};
161
162## XSAVE components (XCR0).
163g_kdX86XSaveCConstants = {
164 'XSAVE_C_X87': 0x00000001,
165 'XSAVE_C_SSE': 0x00000002,
166 'XSAVE_C_YMM': 0x00000004,
167 'XSAVE_C_BNDREGS': 0x00000008,
168 'XSAVE_C_BNDCSR': 0x00000010,
169 'XSAVE_C_OPMASK': 0x00000020,
170 'XSAVE_C_ZMM_HI256': 0x00000040,
171 'XSAVE_C_ZMM_16HI': 0x00000080,
172 'XSAVE_C_PKRU': 0x00000200,
173 'XSAVE_C_LWP': 0x4000000000000000,
174 'XSAVE_C_X': 0x8000000000000000,
175 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
176 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
177};
178
179
180## \@op[1-4] locations
181g_kdOpLocations = {
182 'reg': [], ## modrm.reg
183 'rm': [], ## modrm.rm
184 'imm': [], ## immediate instruction data
185 'vvvv': [], ## VEX.vvvv
186
187 # fixed registers.
188 'AL': [],
189 'rAX': [],
190 'rSI': [],
191 'rDI': [],
192 'rFLAGS': [],
193 'CS': [],
194 'DS': [],
195 'ES': [],
196 'FS': [],
197 'GS': [],
198 'SS': [],
199};
200
201## \@op[1-4] types
202##
203## Value fields:
204## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
205## - 1: the location (g_kdOpLocations).
206## - 2: disassembler format string version of the type.
207## - 3: disassembler OP_PARAM_XXX (XXX only).
208## - 4: IEM form matching instruction.
209##
210## Note! See the A.2.1 in SDM vol 2 for the type names.
211g_kdOpTypes = {
212 # Fixed addresses
213 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
214
215 # ModR/M.rm
216 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
217 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
218 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
219 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
220 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
221 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
222 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
223 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
224 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
225 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
226 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
227 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
228 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
229 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
230 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
231 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
232 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
233 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
234 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
235 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
236 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
237 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
238 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
239 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
240 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
241
242 # ModR/M.rm - register only.
243 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
244 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
245 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
246 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
247 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
248 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
249 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
250
251 # ModR/M.rm - memory only.
252 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
253 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
254 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
255 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
256 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
257 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
258 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
259 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
260 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
261 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
262 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
263 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
264 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
265 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
266 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
267
268 # ModR/M.reg
269 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
270 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
271 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
272 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
273 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
274 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
275 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
276 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
277 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
278 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
279 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
280 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
281 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
282 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
283 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
284 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
285 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
286 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
287 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
288 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
289 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
290 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
291 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
292 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
293 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
294 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
295 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
296 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
297 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
298 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
299 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
300
301 # VEX.vvvv
302 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
303 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
304 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
305 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
306
307 # Immediate values.
308 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
309 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
310 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
311 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
312 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
313 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
314
315 # Address operands (no ModR/M).
316 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
317 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
318
319 # Relative jump targets
320 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
321 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
322
323 # DS:rSI
324 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
325 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
326 # ES:rDI
327 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
328 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
329
330 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
331
332 # Fixed registers.
333 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
334 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
335 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
336 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
337 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
338 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
339 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
340 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
341};
342
343# IDX_ParseFixedReg
344# IDX_ParseVexDest
345
346
347## IEMFORM_XXX mappings.
348g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
349 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
350 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
351 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
352 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
353 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
354 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
355 'M': ( 'ModR/M', [ 'rm', ], '', ),
356 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
357 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
358 'R': ( 'ModR/M', [ 'reg', ], '', ),
359
360 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
361 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
362 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
363 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
364 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
365 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
366 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
367 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
368 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
369 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
370 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
371 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
372 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
373 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
374 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
375 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
376 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
377 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
378 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
379 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
380 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
381 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
382
383 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
384 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
385 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
386
387 'FIXED': ( 'fixed', None, '', ),
388};
389
390## \@oppfx values.
391g_kdPrefixes = {
392 'none': [],
393 '0x66': [],
394 '0xf3': [],
395 '0xf2': [],
396};
397
398## Special \@opcode tag values.
399g_kdSpecialOpcodes = {
400 '/reg': [],
401 'mr/reg': [],
402 '11 /reg': [],
403 '!11 /reg': [],
404 '11 mr/reg': [],
405 '!11 mr/reg': [],
406};
407
408## Special \@opcodesub tag values.
409## The first value is the real value for aliases.
410## The second value is for bs3cg1.
411g_kdSubOpcodes = {
412 'none': [ None, '', ],
413 '11 mr/reg': [ '11 mr/reg', '', ],
414 '11': [ '11 mr/reg', '', ], ##< alias
415 '!11 mr/reg': [ '!11 mr/reg', '', ],
416 '!11': [ '!11 mr/reg', '', ], ##< alias
417 'rex.w=0': [ 'rex.w=0', 'WZ', ],
418 'w=0': [ 'rex.w=0', '', ], ##< alias
419 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
420 'w=1': [ 'rex.w=1', '', ], ##< alias
421 'vex.l=0': [ 'vex.l=0', 'L0', ],
422 'vex.l=1': [ 'vex.l=0', 'L1', ],
423 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
424 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
425 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
426 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
427};
428
429## Valid values for \@openc
430g_kdEncodings = {
431 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
432 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
433 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
434 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
435 'prefix': [ None, ], ##< Prefix
436};
437
438## \@opunused, \@opinvalid, \@opinvlstyle
439g_kdInvalidStyles = {
440 'immediate': [], ##< CPU stops decoding immediately after the opcode.
441 'vex.modrm': [], ##< VEX+ModR/M, everyone.
442 'intel-modrm': [], ##< Intel decodes ModR/M.
443 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
444 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
445 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
446};
447
448g_kdCpuNames = {
449 '8086': (),
450 '80186': (),
451 '80286': (),
452 '80386': (),
453 '80486': (),
454};
455
456## \@opcpuid
457g_kdCpuIdFlags = {
458 'vme': 'X86_CPUID_FEATURE_EDX_VME',
459 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
460 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
461 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
462 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
463 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
464 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
465 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
466 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
467 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
468 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
469 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
470 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
471 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
472 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
473 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
474 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
475 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
476 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
477 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
478 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
479 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
480 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
481 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
482 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
483 'aes': 'X86_CPUID_FEATURE_ECX_AES',
484 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
485 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
486 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
487 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
488 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
489
490 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
491 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
492 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
493 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
494 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
495 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
496 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
497 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
498 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
499 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
500};
501
502## \@ophints values.
503g_kdHints = {
504 'invalid': 'DISOPTYPE_INVALID', ##<
505 'harmless': 'DISOPTYPE_HARMLESS', ##<
506 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
507 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
508 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
509 'portio': 'DISOPTYPE_PORTIO', ##<
510 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
511 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
512 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
513 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
514 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
515 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
516 'illegal': 'DISOPTYPE_ILLEGAL', ##<
517 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
518 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
519 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
520 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
521 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
522 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
523 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
524 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
525 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
526 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
527 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
528 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
529 ## (only in 16 & 32 bits mode!)
530 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,AVX,++ instruction. Not implemented yet!
531 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
532 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
533 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
534 'ignores_rexw': '', ##< Ignores REX.W.
535 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
536 'vex_l_zero': '', ##< VEX.L must be 0.
537 'vex_l_ignored': '', ##< VEX.L is ignored.
538 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
539 'lock_allowed': '', ##< Lock prefix allowed.
540};
541
542## \@opxcpttype values (see SDMv2 2.4, 2.7).
543g_kdXcptTypes = {
544 'none': [],
545 '1': [],
546 '2': [],
547 '3': [],
548 '4': [],
549 '4UA': [],
550 '5': [],
551 '5LZ': [], # LZ = VEX.L must be zero.
552 '6': [],
553 '7': [],
554 '7LZ': [],
555 '8': [],
556 '11': [],
557 '12': [],
558 'E1': [],
559 'E1NF': [],
560 'E2': [],
561 'E3': [],
562 'E3NF': [],
563 'E4': [],
564 'E4NF': [],
565 'E5': [],
566 'E5NF': [],
567 'E6': [],
568 'E6NF': [],
569 'E7NF': [],
570 'E9': [],
571 'E9NF': [],
572 'E10': [],
573 'E11': [],
574 'E12': [],
575 'E12NF': [],
576};
577
578
579def _isValidOpcodeByte(sOpcode):
580 """
581 Checks if sOpcode is a valid lower case opcode byte.
582 Returns true/false.
583 """
584 if len(sOpcode) == 4:
585 if sOpcode[:2] == '0x':
586 if sOpcode[2] in '0123456789abcdef':
587 if sOpcode[3] in '0123456789abcdef':
588 return True;
589 return False;
590
591
592class InstructionMap(object):
593 """
594 Instruction map.
595
596 The opcode map provides the lead opcode bytes (empty for the one byte
597 opcode map). An instruction can be member of multiple opcode maps as long
598 as it uses the same opcode value within the map (because of VEX).
599 """
600
601 kdEncodings = {
602 'legacy': [],
603 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
604 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
605 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
606 'xop8': [], ##< XOP prefix with vvvvv = 8
607 'xop9': [], ##< XOP prefix with vvvvv = 9
608 'xop10': [], ##< XOP prefix with vvvvv = 10
609 };
610 ## Selectors.
611 ## 1. The first value is the number of table entries required by a
612 ## decoder or disassembler for this type of selector.
613 ## 2. The second value is how many entries per opcode byte if applicable.
614 kdSelectors = {
615 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
616 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
617 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
618 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
619 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
620 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
621 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
622 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
623 };
624
625 ## Define the subentry number according to the Instruction::sPrefix
626 ## value for 'byte+pfx' selected tables.
627 kiPrefixOrder = {
628 'none': 0,
629 '0x66': 1,
630 '0xf3': 2,
631 '0xf2': 3,
632 };
633
634 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
635 sEncoding = 'legacy', sDisParse = None):
636 assert sSelector in self.kdSelectors;
637 assert sEncoding in self.kdEncodings;
638 if asLeadOpcodes is None:
639 asLeadOpcodes = [];
640 else:
641 for sOpcode in asLeadOpcodes:
642 assert _isValidOpcodeByte(sOpcode);
643 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
644
645 self.sName = sName;
646 self.sIemName = sIemName;
647 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
648 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
649 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
650 self.aoInstructions = [] # type: Instruction
651 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
652
653 def copy(self, sNewName, sPrefixFilter = None):
654 """
655 Copies the table with filtering instruction by sPrefix if not None.
656 """
657 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
658 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
659 else self.sSelector,
660 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
661 if sPrefixFilter is None:
662 oCopy.aoInstructions = list(self.aoInstructions);
663 else:
664 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
665 return oCopy;
666
667 def getTableSize(self):
668 """
669 Number of table entries. This corresponds directly to the selector.
670 """
671 return self.kdSelectors[self.sSelector][0];
672
673 def getEntriesPerByte(self):
674 """
675 Number of table entries per opcode bytes.
676
677 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
678 the others it will just return 1.
679 """
680 return self.kdSelectors[self.sSelector][1];
681
682 def getInstructionIndex(self, oInstr):
683 """
684 Returns the table index for the instruction.
685 """
686 bOpcode = oInstr.getOpcodeByte();
687
688 # The byte selectors are simple. We need a full opcode byte and need just return it.
689 if self.sSelector == 'byte':
690 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
691 return bOpcode;
692
693 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
694 if self.sSelector == 'byte+pfx':
695 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
696 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
697 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
698
699 # The other selectors needs masking and shifting.
700 if self.sSelector == '/r':
701 return (bOpcode >> 3) & 0x7;
702
703 if self.sSelector == 'mod /r':
704 return (bOpcode >> 3) & 0x1f;
705
706 if self.sSelector == 'memreg /r':
707 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
708
709 if self.sSelector == '!11 /r':
710 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
711 return (bOpcode >> 3) & 0x7;
712
713 if self.sSelector == '11 /r':
714 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
715 return (bOpcode >> 3) & 0x7;
716
717 if self.sSelector == '11':
718 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
719 return bOpcode & 0x3f;
720
721 assert False, self.sSelector;
722 return -1;
723
724 def getInstructionsInTableOrder(self):
725 """
726 Get instructions in table order.
727
728 Returns array of instructions. Normally there is exactly one
729 instruction per entry. However the entry could also be None if
730 not instruction was specified for that opcode value. Or there
731 could be a list of instructions to deal with special encodings
732 where for instance prefix (e.g. REX.W) encodes a different
733 instruction or different CPUs have different instructions or
734 prefixes in the same place.
735 """
736 # Start with empty table.
737 cTable = self.getTableSize();
738 aoTable = [None] * cTable;
739
740 # Insert the instructions.
741 for oInstr in self.aoInstructions:
742 if oInstr.sOpcode:
743 idxOpcode = self.getInstructionIndex(oInstr);
744 assert idxOpcode < cTable, str(idxOpcode);
745
746 oExisting = aoTable[idxOpcode];
747 if oExisting is None:
748 aoTable[idxOpcode] = oInstr;
749 elif not isinstance(oExisting, list):
750 aoTable[idxOpcode] = list([oExisting, oInstr]);
751 else:
752 oExisting.append(oInstr);
753
754 return aoTable;
755
756
757 def getDisasTableName(self):
758 """
759 Returns the disassembler table name for this map.
760 """
761 sName = 'g_aDisas';
762 for sWord in self.sName.split('_'):
763 if sWord == 'm': # suffix indicating modrm.mod==mem
764 sName += '_m';
765 elif sWord == 'r': # suffix indicating modrm.mod==reg
766 sName += '_r';
767 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
768 sName += '_' + sWord;
769 else:
770 sWord = sWord.replace('grp', 'Grp');
771 sWord = sWord.replace('map', 'Map');
772 sName += sWord[0].upper() + sWord[1:];
773 return sName;
774
775 def getDisasRangeName(self):
776 """
777 Returns the disassembler table range name for this map.
778 """
779 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
780
781 def isVexMap(self):
782 """ Returns True if a VEX map. """
783 return self.sEncoding.startswith('vex');
784
785
786class TestType(object):
787 """
788 Test value type.
789
790 This base class deals with integer like values. The fUnsigned constructor
791 parameter indicates the default stance on zero vs sign extending. It is
792 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
793 """
794 def __init__(self, sName, acbSizes = None, fUnsigned = True):
795 self.sName = sName;
796 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
797 self.fUnsigned = fUnsigned;
798
799 class BadValue(Exception):
800 """ Bad value exception. """
801 def __init__(self, sMessage):
802 Exception.__init__(self, sMessage);
803 self.sMessage = sMessage;
804
805 ## For ascii ~ operator.
806 kdHexInv = {
807 '0': 'f',
808 '1': 'e',
809 '2': 'd',
810 '3': 'c',
811 '4': 'b',
812 '5': 'a',
813 '6': '9',
814 '7': '8',
815 '8': '7',
816 '9': '6',
817 'a': '5',
818 'b': '4',
819 'c': '3',
820 'd': '2',
821 'e': '1',
822 'f': '0',
823 };
824
825 def get(self, sValue):
826 """
827 Get the shortest normal sized byte representation of oValue.
828
829 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
830 The latter form is for AND+OR pairs where the first entry is what to
831 AND with the field and the second the one or OR with.
832
833 Raises BadValue if invalid value.
834 """
835 if not sValue:
836 raise TestType.BadValue('empty value');
837
838 # Deal with sign and detect hexadecimal or decimal.
839 fSignExtend = not self.fUnsigned;
840 if sValue[0] == '-' or sValue[0] == '+':
841 fSignExtend = True;
842 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
843 else:
844 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
845
846 # try convert it to long integer.
847 try:
848 iValue = long(sValue, 16 if fHex else 10);
849 except Exception as oXcpt:
850 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
851
852 # Convert the hex string and pad it to a decent value. Negative values
853 # needs to be manually converted to something non-negative (~-n + 1).
854 if iValue >= 0:
855 sHex = hex(iValue);
856 if sys.version_info[0] < 3:
857 assert sHex[-1] == 'L';
858 sHex = sHex[:-1];
859 assert sHex[:2] == '0x';
860 sHex = sHex[2:];
861 else:
862 sHex = hex(-iValue - 1);
863 if sys.version_info[0] < 3:
864 assert sHex[-1] == 'L';
865 sHex = sHex[:-1];
866 assert sHex[:2] == '0x';
867 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
868 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
869 sHex = 'f' + sHex;
870
871 cDigits = len(sHex);
872 if cDigits <= self.acbSizes[-1] * 2:
873 for cb in self.acbSizes:
874 cNaturalDigits = cb * 2;
875 if cDigits <= cNaturalDigits:
876 break;
877 else:
878 cNaturalDigits = self.acbSizes[-1] * 2;
879 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
880 assert isinstance(cNaturalDigits, int)
881
882 if cNaturalDigits != cDigits:
883 cNeeded = cNaturalDigits - cDigits;
884 if iValue >= 0:
885 sHex = ('0' * cNeeded) + sHex;
886 else:
887 sHex = ('f' * cNeeded) + sHex;
888
889 # Invert and convert to bytearray and return it.
890 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
891
892 return ((fSignExtend, abValue),);
893
894 def validate(self, sValue):
895 """
896 Returns True if value is okay, error message on failure.
897 """
898 try:
899 self.get(sValue);
900 except TestType.BadValue as oXcpt:
901 return oXcpt.sMessage;
902 return True;
903
904 def isAndOrPair(self, sValue):
905 """
906 Checks if sValue is a pair.
907 """
908 _ = sValue;
909 return False;
910
911
912class TestTypeEflags(TestType):
913 """
914 Special value parsing for EFLAGS/RFLAGS/FLAGS.
915 """
916
917 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
918
919 def __init__(self, sName):
920 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
921
922 def get(self, sValue):
923 fClear = 0;
924 fSet = 0;
925 for sFlag in sValue.split(','):
926 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
927 if sConstant is None:
928 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
929 if sConstant[0] == '!':
930 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
931 else:
932 fSet |= g_kdX86EFlagsConstants[sConstant];
933
934 aoSet = TestType.get(self, '0x%x' % (fSet,));
935 if fClear != 0:
936 aoClear = TestType.get(self, '%#x' % (fClear,))
937 assert self.isAndOrPair(sValue) is True;
938 return (aoClear[0], aoSet[0]);
939 assert self.isAndOrPair(sValue) is False;
940 return aoSet;
941
942 def isAndOrPair(self, sValue):
943 for sZeroFlag in self.kdZeroValueFlags:
944 if sValue.find(sZeroFlag) >= 0:
945 return True;
946 return False;
947
948class TestTypeFromDict(TestType):
949 """
950 Special value parsing for CR0.
951 """
952
953 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
954
955 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
956 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
957 self.kdConstantsAndValues = kdConstantsAndValues;
958 self.sConstantPrefix = sConstantPrefix;
959
960 def get(self, sValue):
961 fValue = 0;
962 for sFlag in sValue.split(','):
963 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
964 if fFlagValue is None:
965 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
966 fValue |= fFlagValue;
967 return TestType.get(self, '0x%x' % (fValue,));
968
969
970class TestInOut(object):
971 """
972 One input or output state modifier.
973
974 This should be thought as values to modify BS3REGCTX and extended (needs
975 to be structured) state.
976 """
977 ## Assigned operators.
978 kasOperators = [
979 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
980 '&~=',
981 '&=',
982 '|=',
983 '='
984 ];
985 ## Types
986 kdTypes = {
987 'uint': TestType('uint', fUnsigned = True),
988 'int': TestType('int'),
989 'efl': TestTypeEflags('efl'),
990 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
991 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
992 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
993 };
994 ## CPU context fields.
995 kdFields = {
996 # name: ( default type, [both|input|output], )
997 # Operands.
998 'op1': ( 'uint', 'both', ), ## \@op1
999 'op2': ( 'uint', 'both', ), ## \@op2
1000 'op3': ( 'uint', 'both', ), ## \@op3
1001 'op4': ( 'uint', 'both', ), ## \@op4
1002 # Flags.
1003 'efl': ( 'efl', 'both', ),
1004 'efl_undef': ( 'uint', 'output', ),
1005 # 8-bit GPRs.
1006 'al': ( 'uint', 'both', ),
1007 'cl': ( 'uint', 'both', ),
1008 'dl': ( 'uint', 'both', ),
1009 'bl': ( 'uint', 'both', ),
1010 'ah': ( 'uint', 'both', ),
1011 'ch': ( 'uint', 'both', ),
1012 'dh': ( 'uint', 'both', ),
1013 'bh': ( 'uint', 'both', ),
1014 'r8l': ( 'uint', 'both', ),
1015 'r9l': ( 'uint', 'both', ),
1016 'r10l': ( 'uint', 'both', ),
1017 'r11l': ( 'uint', 'both', ),
1018 'r12l': ( 'uint', 'both', ),
1019 'r13l': ( 'uint', 'both', ),
1020 'r14l': ( 'uint', 'both', ),
1021 'r15l': ( 'uint', 'both', ),
1022 # 16-bit GPRs.
1023 'ax': ( 'uint', 'both', ),
1024 'dx': ( 'uint', 'both', ),
1025 'cx': ( 'uint', 'both', ),
1026 'bx': ( 'uint', 'both', ),
1027 'sp': ( 'uint', 'both', ),
1028 'bp': ( 'uint', 'both', ),
1029 'si': ( 'uint', 'both', ),
1030 'di': ( 'uint', 'both', ),
1031 'r8w': ( 'uint', 'both', ),
1032 'r9w': ( 'uint', 'both', ),
1033 'r10w': ( 'uint', 'both', ),
1034 'r11w': ( 'uint', 'both', ),
1035 'r12w': ( 'uint', 'both', ),
1036 'r13w': ( 'uint', 'both', ),
1037 'r14w': ( 'uint', 'both', ),
1038 'r15w': ( 'uint', 'both', ),
1039 # 32-bit GPRs.
1040 'eax': ( 'uint', 'both', ),
1041 'edx': ( 'uint', 'both', ),
1042 'ecx': ( 'uint', 'both', ),
1043 'ebx': ( 'uint', 'both', ),
1044 'esp': ( 'uint', 'both', ),
1045 'ebp': ( 'uint', 'both', ),
1046 'esi': ( 'uint', 'both', ),
1047 'edi': ( 'uint', 'both', ),
1048 'r8d': ( 'uint', 'both', ),
1049 'r9d': ( 'uint', 'both', ),
1050 'r10d': ( 'uint', 'both', ),
1051 'r11d': ( 'uint', 'both', ),
1052 'r12d': ( 'uint', 'both', ),
1053 'r13d': ( 'uint', 'both', ),
1054 'r14d': ( 'uint', 'both', ),
1055 'r15d': ( 'uint', 'both', ),
1056 # 64-bit GPRs.
1057 'rax': ( 'uint', 'both', ),
1058 'rdx': ( 'uint', 'both', ),
1059 'rcx': ( 'uint', 'both', ),
1060 'rbx': ( 'uint', 'both', ),
1061 'rsp': ( 'uint', 'both', ),
1062 'rbp': ( 'uint', 'both', ),
1063 'rsi': ( 'uint', 'both', ),
1064 'rdi': ( 'uint', 'both', ),
1065 'r8': ( 'uint', 'both', ),
1066 'r9': ( 'uint', 'both', ),
1067 'r10': ( 'uint', 'both', ),
1068 'r11': ( 'uint', 'both', ),
1069 'r12': ( 'uint', 'both', ),
1070 'r13': ( 'uint', 'both', ),
1071 'r14': ( 'uint', 'both', ),
1072 'r15': ( 'uint', 'both', ),
1073 # 16-bit, 32-bit or 64-bit registers according to operand size.
1074 'oz.rax': ( 'uint', 'both', ),
1075 'oz.rdx': ( 'uint', 'both', ),
1076 'oz.rcx': ( 'uint', 'both', ),
1077 'oz.rbx': ( 'uint', 'both', ),
1078 'oz.rsp': ( 'uint', 'both', ),
1079 'oz.rbp': ( 'uint', 'both', ),
1080 'oz.rsi': ( 'uint', 'both', ),
1081 'oz.rdi': ( 'uint', 'both', ),
1082 'oz.r8': ( 'uint', 'both', ),
1083 'oz.r9': ( 'uint', 'both', ),
1084 'oz.r10': ( 'uint', 'both', ),
1085 'oz.r11': ( 'uint', 'both', ),
1086 'oz.r12': ( 'uint', 'both', ),
1087 'oz.r13': ( 'uint', 'both', ),
1088 'oz.r14': ( 'uint', 'both', ),
1089 'oz.r15': ( 'uint', 'both', ),
1090 # Control registers.
1091 'cr0': ( 'cr0', 'both', ),
1092 'cr4': ( 'cr4', 'both', ),
1093 'xcr0': ( 'xcr0', 'both', ),
1094 # FPU Registers
1095 'fcw': ( 'uint', 'both', ),
1096 'fsw': ( 'uint', 'both', ),
1097 'ftw': ( 'uint', 'both', ),
1098 'fop': ( 'uint', 'both', ),
1099 'fpuip': ( 'uint', 'both', ),
1100 'fpucs': ( 'uint', 'both', ),
1101 'fpudp': ( 'uint', 'both', ),
1102 'fpuds': ( 'uint', 'both', ),
1103 'mxcsr': ( 'uint', 'both', ),
1104 'st0': ( 'uint', 'both', ),
1105 'st1': ( 'uint', 'both', ),
1106 'st2': ( 'uint', 'both', ),
1107 'st3': ( 'uint', 'both', ),
1108 'st4': ( 'uint', 'both', ),
1109 'st5': ( 'uint', 'both', ),
1110 'st6': ( 'uint', 'both', ),
1111 'st7': ( 'uint', 'both', ),
1112 # MMX registers.
1113 'mm0': ( 'uint', 'both', ),
1114 'mm1': ( 'uint', 'both', ),
1115 'mm2': ( 'uint', 'both', ),
1116 'mm3': ( 'uint', 'both', ),
1117 'mm4': ( 'uint', 'both', ),
1118 'mm5': ( 'uint', 'both', ),
1119 'mm6': ( 'uint', 'both', ),
1120 'mm7': ( 'uint', 'both', ),
1121 # SSE registers.
1122 'xmm0': ( 'uint', 'both', ),
1123 'xmm1': ( 'uint', 'both', ),
1124 'xmm2': ( 'uint', 'both', ),
1125 'xmm3': ( 'uint', 'both', ),
1126 'xmm4': ( 'uint', 'both', ),
1127 'xmm5': ( 'uint', 'both', ),
1128 'xmm6': ( 'uint', 'both', ),
1129 'xmm7': ( 'uint', 'both', ),
1130 'xmm8': ( 'uint', 'both', ),
1131 'xmm9': ( 'uint', 'both', ),
1132 'xmm10': ( 'uint', 'both', ),
1133 'xmm11': ( 'uint', 'both', ),
1134 'xmm12': ( 'uint', 'both', ),
1135 'xmm13': ( 'uint', 'both', ),
1136 'xmm14': ( 'uint', 'both', ),
1137 'xmm15': ( 'uint', 'both', ),
1138 'xmm0.lo': ( 'uint', 'both', ),
1139 'xmm1.lo': ( 'uint', 'both', ),
1140 'xmm2.lo': ( 'uint', 'both', ),
1141 'xmm3.lo': ( 'uint', 'both', ),
1142 'xmm4.lo': ( 'uint', 'both', ),
1143 'xmm5.lo': ( 'uint', 'both', ),
1144 'xmm6.lo': ( 'uint', 'both', ),
1145 'xmm7.lo': ( 'uint', 'both', ),
1146 'xmm8.lo': ( 'uint', 'both', ),
1147 'xmm9.lo': ( 'uint', 'both', ),
1148 'xmm10.lo': ( 'uint', 'both', ),
1149 'xmm11.lo': ( 'uint', 'both', ),
1150 'xmm12.lo': ( 'uint', 'both', ),
1151 'xmm13.lo': ( 'uint', 'both', ),
1152 'xmm14.lo': ( 'uint', 'both', ),
1153 'xmm15.lo': ( 'uint', 'both', ),
1154 'xmm0.hi': ( 'uint', 'both', ),
1155 'xmm1.hi': ( 'uint', 'both', ),
1156 'xmm2.hi': ( 'uint', 'both', ),
1157 'xmm3.hi': ( 'uint', 'both', ),
1158 'xmm4.hi': ( 'uint', 'both', ),
1159 'xmm5.hi': ( 'uint', 'both', ),
1160 'xmm6.hi': ( 'uint', 'both', ),
1161 'xmm7.hi': ( 'uint', 'both', ),
1162 'xmm8.hi': ( 'uint', 'both', ),
1163 'xmm9.hi': ( 'uint', 'both', ),
1164 'xmm10.hi': ( 'uint', 'both', ),
1165 'xmm11.hi': ( 'uint', 'both', ),
1166 'xmm12.hi': ( 'uint', 'both', ),
1167 'xmm13.hi': ( 'uint', 'both', ),
1168 'xmm14.hi': ( 'uint', 'both', ),
1169 'xmm15.hi': ( 'uint', 'both', ),
1170 'xmm0.lo.zx': ( 'uint', 'both', ),
1171 'xmm1.lo.zx': ( 'uint', 'both', ),
1172 'xmm2.lo.zx': ( 'uint', 'both', ),
1173 'xmm3.lo.zx': ( 'uint', 'both', ),
1174 'xmm4.lo.zx': ( 'uint', 'both', ),
1175 'xmm5.lo.zx': ( 'uint', 'both', ),
1176 'xmm6.lo.zx': ( 'uint', 'both', ),
1177 'xmm7.lo.zx': ( 'uint', 'both', ),
1178 'xmm8.lo.zx': ( 'uint', 'both', ),
1179 'xmm9.lo.zx': ( 'uint', 'both', ),
1180 'xmm10.lo.zx': ( 'uint', 'both', ),
1181 'xmm11.lo.zx': ( 'uint', 'both', ),
1182 'xmm12.lo.zx': ( 'uint', 'both', ),
1183 'xmm13.lo.zx': ( 'uint', 'both', ),
1184 'xmm14.lo.zx': ( 'uint', 'both', ),
1185 'xmm15.lo.zx': ( 'uint', 'both', ),
1186 'xmm0.dw0': ( 'uint', 'both', ),
1187 'xmm1.dw0': ( 'uint', 'both', ),
1188 'xmm2.dw0': ( 'uint', 'both', ),
1189 'xmm3.dw0': ( 'uint', 'both', ),
1190 'xmm4.dw0': ( 'uint', 'both', ),
1191 'xmm5.dw0': ( 'uint', 'both', ),
1192 'xmm6.dw0': ( 'uint', 'both', ),
1193 'xmm7.dw0': ( 'uint', 'both', ),
1194 'xmm8.dw0': ( 'uint', 'both', ),
1195 'xmm9.dw0': ( 'uint', 'both', ),
1196 'xmm10.dw0': ( 'uint', 'both', ),
1197 'xmm11.dw0': ( 'uint', 'both', ),
1198 'xmm12.dw0': ( 'uint', 'both', ),
1199 'xmm13.dw0': ( 'uint', 'both', ),
1200 'xmm14.dw0': ( 'uint', 'both', ),
1201 'xmm15_dw0': ( 'uint', 'both', ),
1202 # AVX registers.
1203 'ymm0': ( 'uint', 'both', ),
1204 'ymm1': ( 'uint', 'both', ),
1205 'ymm2': ( 'uint', 'both', ),
1206 'ymm3': ( 'uint', 'both', ),
1207 'ymm4': ( 'uint', 'both', ),
1208 'ymm5': ( 'uint', 'both', ),
1209 'ymm6': ( 'uint', 'both', ),
1210 'ymm7': ( 'uint', 'both', ),
1211 'ymm8': ( 'uint', 'both', ),
1212 'ymm9': ( 'uint', 'both', ),
1213 'ymm10': ( 'uint', 'both', ),
1214 'ymm11': ( 'uint', 'both', ),
1215 'ymm12': ( 'uint', 'both', ),
1216 'ymm13': ( 'uint', 'both', ),
1217 'ymm14': ( 'uint', 'both', ),
1218 'ymm15': ( 'uint', 'both', ),
1219
1220 # Special ones.
1221 'value.xcpt': ( 'uint', 'output', ),
1222 };
1223
1224 def __init__(self, sField, sOp, sValue, sType):
1225 assert sField in self.kdFields;
1226 assert sOp in self.kasOperators;
1227 self.sField = sField;
1228 self.sOp = sOp;
1229 self.sValue = sValue;
1230 self.sType = sType;
1231 assert isinstance(sField, str);
1232 assert isinstance(sOp, str);
1233 assert isinstance(sType, str);
1234 assert isinstance(sValue, str);
1235
1236
1237class TestSelector(object):
1238 """
1239 One selector for an instruction test.
1240 """
1241 ## Selector compare operators.
1242 kasCompareOps = [ '==', '!=' ];
1243 ## Selector variables and their valid values.
1244 kdVariables = {
1245 # Operand size.
1246 'size': {
1247 'o16': 'size_o16',
1248 'o32': 'size_o32',
1249 'o64': 'size_o64',
1250 },
1251 # VEX.L value.
1252 'vex.l': {
1253 '0': 'vexl_0',
1254 '1': 'vexl_1',
1255 },
1256 # Execution ring.
1257 'ring': {
1258 '0': 'ring_0',
1259 '1': 'ring_1',
1260 '2': 'ring_2',
1261 '3': 'ring_3',
1262 '0..2': 'ring_0_thru_2',
1263 '1..3': 'ring_1_thru_3',
1264 },
1265 # Basic code mode.
1266 'codebits': {
1267 '64': 'code_64bit',
1268 '32': 'code_32bit',
1269 '16': 'code_16bit',
1270 },
1271 # cpu modes.
1272 'mode': {
1273 'real': 'mode_real',
1274 'prot': 'mode_prot',
1275 'long': 'mode_long',
1276 'v86': 'mode_v86',
1277 'smm': 'mode_smm',
1278 'vmx': 'mode_vmx',
1279 'svm': 'mode_svm',
1280 },
1281 # paging on/off
1282 'paging': {
1283 'on': 'paging_on',
1284 'off': 'paging_off',
1285 },
1286 # CPU vendor
1287 'vendor': {
1288 'amd': 'vendor_amd',
1289 'intel': 'vendor_intel',
1290 'via': 'vendor_via',
1291 },
1292 };
1293 ## Selector shorthand predicates.
1294 ## These translates into variable expressions.
1295 kdPredicates = {
1296 'o16': 'size==o16',
1297 'o32': 'size==o32',
1298 'o64': 'size==o64',
1299 'ring0': 'ring==0',
1300 '!ring0': 'ring==1..3',
1301 'ring1': 'ring==1',
1302 'ring2': 'ring==2',
1303 'ring3': 'ring==3',
1304 'user': 'ring==3',
1305 'supervisor': 'ring==0..2',
1306 '16-bit': 'codebits==16',
1307 '32-bit': 'codebits==32',
1308 '64-bit': 'codebits==64',
1309 'real': 'mode==real',
1310 'prot': 'mode==prot',
1311 'long': 'mode==long',
1312 'v86': 'mode==v86',
1313 'smm': 'mode==smm',
1314 'vmx': 'mode==vmx',
1315 'svm': 'mode==svm',
1316 'paging': 'paging==on',
1317 '!paging': 'paging==off',
1318 'amd': 'vendor==amd',
1319 '!amd': 'vendor!=amd',
1320 'intel': 'vendor==intel',
1321 '!intel': 'vendor!=intel',
1322 'via': 'vendor==via',
1323 '!via': 'vendor!=via',
1324 };
1325
1326 def __init__(self, sVariable, sOp, sValue):
1327 assert sVariable in self.kdVariables;
1328 assert sOp in self.kasCompareOps;
1329 assert sValue in self.kdVariables[sVariable];
1330 self.sVariable = sVariable;
1331 self.sOp = sOp;
1332 self.sValue = sValue;
1333
1334
1335class InstructionTest(object):
1336 """
1337 Instruction test.
1338 """
1339
1340 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1341 self.oInstr = oInstr # type: InstructionTest
1342 self.aoInputs = [] # type: list(TestInOut)
1343 self.aoOutputs = [] # type: list(TestInOut)
1344 self.aoSelectors = [] # type: list(TestSelector)
1345
1346 def toString(self, fRepr = False):
1347 """
1348 Converts it to string representation.
1349 """
1350 asWords = [];
1351 if self.aoSelectors:
1352 for oSelector in self.aoSelectors:
1353 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1354 asWords.append('/');
1355
1356 for oModifier in self.aoInputs:
1357 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1358
1359 asWords.append('->');
1360
1361 for oModifier in self.aoOutputs:
1362 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1363
1364 if fRepr:
1365 return '<' + ' '.join(asWords) + '>';
1366 return ' '.join(asWords);
1367
1368 def __str__(self):
1369 """ Provide string represenation. """
1370 return self.toString(False);
1371
1372 def __repr__(self):
1373 """ Provide unambigious string representation. """
1374 return self.toString(True);
1375
1376class Operand(object):
1377 """
1378 Instruction operand.
1379 """
1380
1381 def __init__(self, sWhere, sType):
1382 assert sWhere in g_kdOpLocations, sWhere;
1383 assert sType in g_kdOpTypes, sType;
1384 self.sWhere = sWhere; ##< g_kdOpLocations
1385 self.sType = sType; ##< g_kdOpTypes
1386
1387 def usesModRM(self):
1388 """ Returns True if using some form of ModR/M encoding. """
1389 return self.sType[0] in ['E', 'G', 'M'];
1390
1391
1392
1393class Instruction(object): # pylint: disable=too-many-instance-attributes
1394 """
1395 Instruction.
1396 """
1397
1398 def __init__(self, sSrcFile, iLine):
1399 ## @name Core attributes.
1400 ## @{
1401 self.oParent = None # type: Instruction
1402 self.sMnemonic = None;
1403 self.sBrief = None;
1404 self.asDescSections = [] # type: list(str)
1405 self.aoMaps = [] # type: list(InstructionMap)
1406 self.aoOperands = [] # type: list(Operand)
1407 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1408 self.sOpcode = None # type: str
1409 self.sSubOpcode = None # type: str
1410 self.sEncoding = None;
1411 self.asFlTest = None;
1412 self.asFlModify = None;
1413 self.asFlUndefined = None;
1414 self.asFlSet = None;
1415 self.asFlClear = None;
1416 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1417 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1418 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1419 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1420 self.aoTests = [] # type: list(InstructionTest)
1421 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1422 self.oCpuExpr = None; ##< Some CPU restriction expression...
1423 self.sGroup = None;
1424 self.fUnused = False; ##< Unused instruction.
1425 self.fInvalid = False; ##< Invalid instruction (like UD2).
1426 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1427 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1428 ## @}
1429
1430 ## @name Implementation attributes.
1431 ## @{
1432 self.sStats = None;
1433 self.sFunction = None;
1434 self.fStub = False;
1435 self.fUdStub = False;
1436 ## @}
1437
1438 ## @name Decoding info
1439 ## @{
1440 self.sSrcFile = sSrcFile;
1441 self.iLineCreated = iLine;
1442 self.iLineCompleted = None;
1443 self.cOpTags = 0;
1444 self.iLineFnIemOpMacro = -1;
1445 self.iLineMnemonicMacro = -1;
1446 ## @}
1447
1448 ## @name Intermediate input fields.
1449 ## @{
1450 self.sRawDisOpNo = None;
1451 self.asRawDisParams = [];
1452 self.sRawIemOpFlags = None;
1453 self.sRawOldOpcodes = None;
1454 self.asCopyTests = [];
1455 ## @}
1456
1457 def toString(self, fRepr = False):
1458 """ Turn object into a string. """
1459 aasFields = [];
1460
1461 aasFields.append(['opcode', self.sOpcode]);
1462 if self.sPrefix:
1463 aasFields.append(['prefix', self.sPrefix]);
1464 aasFields.append(['mnemonic', self.sMnemonic]);
1465 for iOperand, oOperand in enumerate(self.aoOperands):
1466 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1467 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1468 aasFields.append(['encoding', self.sEncoding]);
1469 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1470 aasFields.append(['disenum', self.sDisEnum]);
1471 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1472 aasFields.append(['group', self.sGroup]);
1473 if self.fUnused: aasFields.append(['unused', 'True']);
1474 if self.fInvalid: aasFields.append(['invalid', 'True']);
1475 aasFields.append(['invlstyle', self.sInvalidStyle]);
1476 aasFields.append(['fltest', self.asFlTest]);
1477 aasFields.append(['flmodify', self.asFlModify]);
1478 aasFields.append(['flundef', self.asFlUndefined]);
1479 aasFields.append(['flset', self.asFlSet]);
1480 aasFields.append(['flclear', self.asFlClear]);
1481 aasFields.append(['mincpu', self.sMinCpu]);
1482 aasFields.append(['stats', self.sStats]);
1483 aasFields.append(['sFunction', self.sFunction]);
1484 if self.fStub: aasFields.append(['fStub', 'True']);
1485 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1486 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1487 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1488 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1489
1490 sRet = '<' if fRepr else '';
1491 for sField, sValue in aasFields:
1492 if sValue is not None:
1493 if len(sRet) > 1:
1494 sRet += '; ';
1495 sRet += '%s=%s' % (sField, sValue,);
1496 if fRepr:
1497 sRet += '>';
1498
1499 return sRet;
1500
1501 def __str__(self):
1502 """ Provide string represenation. """
1503 return self.toString(False);
1504
1505 def __repr__(self):
1506 """ Provide unambigious string representation. """
1507 return self.toString(True);
1508
1509 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1510 """
1511 Makes a copy of the object for the purpose of putting in a different map
1512 or a different place in the current map.
1513 """
1514 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1515
1516 oCopy.oParent = self;
1517 oCopy.sMnemonic = self.sMnemonic;
1518 oCopy.sBrief = self.sBrief;
1519 oCopy.asDescSections = list(self.asDescSections);
1520 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1521 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1522 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1523 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1524 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1525 oCopy.sEncoding = self.sEncoding;
1526 oCopy.asFlTest = self.asFlTest;
1527 oCopy.asFlModify = self.asFlModify;
1528 oCopy.asFlUndefined = self.asFlUndefined;
1529 oCopy.asFlSet = self.asFlSet;
1530 oCopy.asFlClear = self.asFlClear;
1531 oCopy.dHints = dict(self.dHints);
1532 oCopy.sDisEnum = self.sDisEnum;
1533 oCopy.asCpuIds = list(self.asCpuIds);
1534 oCopy.asReqFeatures = list(self.asReqFeatures);
1535 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1536 oCopy.sMinCpu = self.sMinCpu;
1537 oCopy.oCpuExpr = self.oCpuExpr;
1538 oCopy.sGroup = self.sGroup;
1539 oCopy.fUnused = self.fUnused;
1540 oCopy.fInvalid = self.fInvalid;
1541 oCopy.sInvalidStyle = self.sInvalidStyle;
1542 oCopy.sXcptType = self.sXcptType;
1543
1544 oCopy.sStats = self.sStats;
1545 oCopy.sFunction = self.sFunction;
1546 oCopy.fStub = self.fStub;
1547 oCopy.fUdStub = self.fUdStub;
1548
1549 oCopy.iLineCompleted = self.iLineCompleted;
1550 oCopy.cOpTags = self.cOpTags;
1551 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1552 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1553
1554 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1555 oCopy.asRawDisParams = list(self.asRawDisParams);
1556 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1557 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1558 oCopy.asCopyTests = list(self.asCopyTests);
1559
1560 return oCopy;
1561
1562 def getOpcodeByte(self):
1563 """
1564 Decodes sOpcode into a byte range integer value.
1565 Raises exception if sOpcode is None or invalid.
1566 """
1567 if self.sOpcode is None:
1568 raise Exception('No opcode byte for %s!' % (self,));
1569 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1570
1571 # Full hex byte form.
1572 if sOpcode[:2] == '0x':
1573 return int(sOpcode, 16);
1574
1575 # The /r form:
1576 if len(sOpcode) == 4 and sOpcode.startswith('/') and sOpcode[-1].isdigit():
1577 return int(sOpcode[-1:]) << 3;
1578
1579 # The 11/r form:
1580 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1581 return (int(sOpcode[-1:]) << 3) | 0xc0;
1582
1583 # The !11/r form (returns mod=1):
1584 ## @todo this doesn't really work...
1585 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1586 return (int(sOpcode[-1:]) << 3) | 0x80;
1587
1588 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1589
1590 @staticmethod
1591 def _flagsToIntegerMask(asFlags):
1592 """
1593 Returns the integer mask value for asFlags.
1594 """
1595 uRet = 0;
1596 if asFlags:
1597 for sFlag in asFlags:
1598 sConstant = g_kdEFlagsMnemonics[sFlag];
1599 assert sConstant[0] != '!', sConstant
1600 uRet |= g_kdX86EFlagsConstants[sConstant];
1601 return uRet;
1602
1603 def getTestedFlagsMask(self):
1604 """ Returns asFlTest into a integer mask value """
1605 return self._flagsToIntegerMask(self.asFlTest);
1606
1607 def getModifiedFlagsMask(self):
1608 """ Returns asFlModify into a integer mask value """
1609 return self._flagsToIntegerMask(self.asFlModify);
1610
1611 def getUndefinedFlagsMask(self):
1612 """ Returns asFlUndefined into a integer mask value """
1613 return self._flagsToIntegerMask(self.asFlUndefined);
1614
1615 def getSetFlagsMask(self):
1616 """ Returns asFlSet into a integer mask value """
1617 return self._flagsToIntegerMask(self.asFlSet);
1618
1619 def getClearedFlagsMask(self):
1620 """ Returns asFlClear into a integer mask value """
1621 return self._flagsToIntegerMask(self.asFlClear);
1622
1623 def onlyInVexMaps(self):
1624 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1625 if not self.aoMaps:
1626 return False;
1627 for oMap in self.aoMaps:
1628 if not oMap.isVexMap():
1629 return False;
1630 return True;
1631
1632
1633
1634## All the instructions.
1635g_aoAllInstructions = [] # type: list(Instruction)
1636
1637## All the instructions indexed by statistics name (opstat).
1638g_dAllInstructionsByStat = {} # type: dict(Instruction)
1639
1640## All the instructions indexed by function name (opfunction).
1641g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1642
1643## Instructions tagged by oponlytest
1644g_aoOnlyTestInstructions = [] # type: list(Instruction)
1645
1646## Instruction maps.
1647g_aoInstructionMaps = [
1648 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1649 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1650 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1651 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1652 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1653 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1654 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1655 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1656 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1657 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1658 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1659 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1660 ## @todo g_apfnEscF1_E0toFF
1661 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1662 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1663 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1664 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1665 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1666 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1667 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1668 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1669
1670 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1671 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1672 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1673 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1674 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1675 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1676 ## @todo What about g_apfnGroup9MemReg?
1677 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1678 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1679 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1680 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1681 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1682 ## @todo What about g_apfnGroup15RegReg?
1683 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1684 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1685 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1686
1687 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1688 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1689
1690 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1691 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1692 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1693 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1694 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1695 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1696
1697 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1698 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1699
1700 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1701 InstructionMap('xopmap8', sEncoding = 'xop8'),
1702 InstructionMap('xopmap9', sEncoding = 'xop9'),
1703 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1704 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1705 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1706 InstructionMap('xopmap10', sEncoding = 'xop10'),
1707 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1708];
1709g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1710g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1711
1712
1713
1714class ParserException(Exception):
1715 """ Parser exception """
1716 def __init__(self, sMessage):
1717 Exception.__init__(self, sMessage);
1718
1719
1720class SimpleParser(object):
1721 """
1722 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1723 """
1724
1725 ## @name Parser state.
1726 ## @{
1727 kiCode = 0;
1728 kiCommentMulti = 1;
1729 ## @}
1730
1731 def __init__(self, sSrcFile, asLines, sDefaultMap):
1732 self.sSrcFile = sSrcFile;
1733 self.asLines = asLines;
1734 self.iLine = 0;
1735 self.iState = self.kiCode;
1736 self.sComment = '';
1737 self.iCommentLine = 0;
1738 self.aoCurInstrs = [];
1739
1740 assert sDefaultMap in g_dInstructionMaps;
1741 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1742
1743 self.cTotalInstr = 0;
1744 self.cTotalStubs = 0;
1745 self.cTotalTagged = 0;
1746
1747 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1748 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1749 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1750 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1751 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1752 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1753 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
1754 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
1755 self.fDebug = True;
1756
1757 self.dTagHandlers = {
1758 '@opbrief': self.parseTagOpBrief,
1759 '@opdesc': self.parseTagOpDesc,
1760 '@opmnemonic': self.parseTagOpMnemonic,
1761 '@op1': self.parseTagOpOperandN,
1762 '@op2': self.parseTagOpOperandN,
1763 '@op3': self.parseTagOpOperandN,
1764 '@op4': self.parseTagOpOperandN,
1765 '@oppfx': self.parseTagOpPfx,
1766 '@opmaps': self.parseTagOpMaps,
1767 '@opcode': self.parseTagOpcode,
1768 '@opcodesub': self.parseTagOpcodeSub,
1769 '@openc': self.parseTagOpEnc,
1770 '@opfltest': self.parseTagOpEFlags,
1771 '@opflmodify': self.parseTagOpEFlags,
1772 '@opflundef': self.parseTagOpEFlags,
1773 '@opflset': self.parseTagOpEFlags,
1774 '@opflclear': self.parseTagOpEFlags,
1775 '@ophints': self.parseTagOpHints,
1776 '@opdisenum': self.parseTagOpDisEnum,
1777 '@opmincpu': self.parseTagOpMinCpu,
1778 '@opcpuid': self.parseTagOpCpuId,
1779 '@opgroup': self.parseTagOpGroup,
1780 '@opunused': self.parseTagOpUnusedInvalid,
1781 '@opinvalid': self.parseTagOpUnusedInvalid,
1782 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1783 '@optest': self.parseTagOpTest,
1784 '@optestign': self.parseTagOpTestIgnore,
1785 '@optestignore': self.parseTagOpTestIgnore,
1786 '@opcopytests': self.parseTagOpCopyTests,
1787 '@oponly': self.parseTagOpOnlyTest,
1788 '@oponlytest': self.parseTagOpOnlyTest,
1789 '@opxcpttype': self.parseTagOpXcptType,
1790 '@opstats': self.parseTagOpStats,
1791 '@opfunction': self.parseTagOpFunction,
1792 '@opdone': self.parseTagOpDone,
1793 };
1794 for i in range(48):
1795 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1796 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1797
1798 self.asErrors = [];
1799
1800 def raiseError(self, sMessage):
1801 """
1802 Raise error prefixed with the source and line number.
1803 """
1804 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1805
1806 def raiseCommentError(self, iLineInComment, sMessage):
1807 """
1808 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1809 """
1810 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1811
1812 def error(self, sMessage):
1813 """
1814 Adds an error.
1815 returns False;
1816 """
1817 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1818 return False;
1819
1820 def errorOnLine(self, iLine, sMessage):
1821 """
1822 Adds an error.
1823 returns False;
1824 """
1825 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
1826 return False;
1827
1828 def errorComment(self, iLineInComment, sMessage):
1829 """
1830 Adds a comment error.
1831 returns False;
1832 """
1833 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1834 return False;
1835
1836 def printErrors(self):
1837 """
1838 Print the errors to stderr.
1839 Returns number of errors.
1840 """
1841 if self.asErrors:
1842 sys.stderr.write(u''.join(self.asErrors));
1843 return len(self.asErrors);
1844
1845 def debug(self, sMessage):
1846 """
1847 For debugging.
1848 """
1849 if self.fDebug:
1850 print('debug: %s' % (sMessage,));
1851
1852 def stripComments(self, sLine):
1853 """
1854 Returns sLine with comments stripped.
1855
1856 Complains if traces of incomplete multi-line comments are encountered.
1857 """
1858 sLine = self.oReComment.sub(" ", sLine);
1859 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
1860 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
1861 return sLine;
1862
1863 def parseFunctionTable(self, sLine):
1864 """
1865 Parses a PFNIEMOP table, updating/checking the @oppfx value.
1866
1867 Note! Updates iLine as it consumes the whole table.
1868 """
1869
1870 #
1871 # Extract the table name.
1872 #
1873 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
1874 oMap = g_dInstructionMapsByIemName.get(sName);
1875 if not oMap:
1876 self.debug('No map for PFNIEMOP table: %s' % (sName,));
1877 oMap = self.oDefaultMap; # This is wrong wrong wrong.
1878
1879 #
1880 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
1881 # entries per byte:
1882 # no prefix, 066h prefix, f3h prefix, f2h prefix
1883 # Those tables has 256 & 32 entries respectively.
1884 #
1885 cEntriesPerByte = 4;
1886 cValidTableLength = 1024;
1887 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
1888
1889 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
1890 if oEntriesMatch:
1891 cEntriesPerByte = 1;
1892 cValidTableLength = int(oEntriesMatch.group(1));
1893 asPrefixes = (None,);
1894
1895 #
1896 # The next line should be '{' and nothing else.
1897 #
1898 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
1899 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
1900 self.iLine += 1;
1901
1902 #
1903 # Parse till we find the end of the table.
1904 #
1905 iEntry = 0;
1906 while self.iLine < len(self.asLines):
1907 # Get the next line and strip comments and spaces (assumes no
1908 # multi-line comments).
1909 sLine = self.asLines[self.iLine];
1910 self.iLine += 1;
1911 sLine = self.stripComments(sLine).strip();
1912
1913 # Split the line up into entries, expanding IEMOP_X4 usage.
1914 asEntries = sLine.split(',');
1915 for i in range(len(asEntries) - 1, -1, -1):
1916 sEntry = asEntries[i].strip();
1917 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
1918 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
1919 asEntries.insert(i + 1, sEntry);
1920 asEntries.insert(i + 1, sEntry);
1921 asEntries.insert(i + 1, sEntry);
1922 if sEntry:
1923 asEntries[i] = sEntry;
1924 else:
1925 del asEntries[i];
1926
1927 # Process the entries.
1928 for sEntry in asEntries:
1929 if sEntry in ('};', '}'):
1930 if iEntry != cValidTableLength:
1931 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
1932 return True;
1933 if sEntry.startswith('iemOp_Invalid'):
1934 pass; # skip
1935 else:
1936 # Look up matching instruction by function.
1937 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
1938 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
1939 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
1940 if aoInstr:
1941 if not isinstance(aoInstr, list):
1942 aoInstr = [aoInstr,];
1943 oInstr = None;
1944 for oCurInstr in aoInstr:
1945 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
1946 pass;
1947 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
1948 oCurInstr.sPrefix = sPrefix;
1949 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
1950 oCurInstr.sOpcode = sOpcode;
1951 oCurInstr.sPrefix = sPrefix;
1952 else:
1953 continue;
1954 oInstr = oCurInstr;
1955 break;
1956 if not oInstr:
1957 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
1958 aoInstr.append(oInstr);
1959 g_dAllInstructionsByFunction[sEntry] = aoInstr;
1960 g_aoAllInstructions.append(oInstr);
1961 oMap.aoInstructions.append(oInstr);
1962 else:
1963 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
1964 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
1965 iEntry += 1;
1966
1967 return self.error('Unexpected end of file in PFNIEMOP table');
1968
1969 def addInstruction(self, iLine = None):
1970 """
1971 Adds an instruction.
1972 """
1973 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
1974 g_aoAllInstructions.append(oInstr);
1975 self.aoCurInstrs.append(oInstr);
1976 return oInstr;
1977
1978 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
1979 """
1980 Derives the mnemonic and operands from a IEM stats base name like string.
1981 """
1982 if oInstr.sMnemonic is None:
1983 asWords = sStats.split('_');
1984 oInstr.sMnemonic = asWords[0].lower();
1985 if len(asWords) > 1 and not oInstr.aoOperands:
1986 for sType in asWords[1:]:
1987 if sType in g_kdOpTypes:
1988 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
1989 else:
1990 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
1991 return False;
1992 return True;
1993
1994 def doneInstructionOne(self, oInstr, iLine):
1995 """
1996 Complete the parsing by processing, validating and expanding raw inputs.
1997 """
1998 assert oInstr.iLineCompleted is None;
1999 oInstr.iLineCompleted = iLine;
2000
2001 #
2002 # Specified instructions.
2003 #
2004 if oInstr.cOpTags > 0:
2005 if oInstr.sStats is None:
2006 pass;
2007
2008 #
2009 # Unspecified legacy stuff. We generally only got a few things to go on here.
2010 # /** Opcode 0x0f 0x00 /0. */
2011 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
2012 #
2013 else:
2014 #if oInstr.sRawOldOpcodes:
2015 #
2016 #if oInstr.sMnemonic:
2017 pass;
2018
2019 #
2020 # Common defaults.
2021 #
2022
2023 # Guess mnemonic and operands from stats if the former is missing.
2024 if oInstr.sMnemonic is None:
2025 if oInstr.sStats is not None:
2026 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
2027 elif oInstr.sFunction is not None:
2028 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
2029
2030 # Derive the disassembler op enum constant from the mnemonic.
2031 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
2032 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
2033
2034 # Derive the IEM statistics base name from mnemonic and operand types.
2035 if oInstr.sStats is None:
2036 if oInstr.sFunction is not None:
2037 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
2038 elif oInstr.sMnemonic is not None:
2039 oInstr.sStats = oInstr.sMnemonic;
2040 for oOperand in oInstr.aoOperands:
2041 if oOperand.sType:
2042 oInstr.sStats += '_' + oOperand.sType;
2043
2044 # Derive the IEM function name from mnemonic and operand types.
2045 if oInstr.sFunction is None:
2046 if oInstr.sMnemonic is not None:
2047 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
2048 for oOperand in oInstr.aoOperands:
2049 if oOperand.sType:
2050 oInstr.sFunction += '_' + oOperand.sType;
2051 elif oInstr.sStats:
2052 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
2053
2054 #
2055 # Apply default map and then add the instruction to all it's groups.
2056 #
2057 if not oInstr.aoMaps:
2058 oInstr.aoMaps = [ self.oDefaultMap, ];
2059 for oMap in oInstr.aoMaps:
2060 oMap.aoInstructions.append(oInstr);
2061
2062 #
2063 # Derive encoding from operands and maps.
2064 #
2065 if oInstr.sEncoding is None:
2066 if not oInstr.aoOperands:
2067 if oInstr.fUnused and oInstr.sSubOpcode:
2068 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
2069 else:
2070 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
2071 elif oInstr.aoOperands[0].usesModRM():
2072 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
2073 or oInstr.onlyInVexMaps():
2074 oInstr.sEncoding = 'VEX.ModR/M';
2075 else:
2076 oInstr.sEncoding = 'ModR/M';
2077
2078 #
2079 # Check the opstat value and add it to the opstat indexed dictionary.
2080 #
2081 if oInstr.sStats:
2082 if oInstr.sStats not in g_dAllInstructionsByStat:
2083 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
2084 else:
2085 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
2086 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
2087
2088 #
2089 # Add to function indexed dictionary. We allow multiple instructions per function.
2090 #
2091 if oInstr.sFunction:
2092 if oInstr.sFunction not in g_dAllInstructionsByFunction:
2093 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
2094 else:
2095 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
2096
2097 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
2098 return True;
2099
2100 def doneInstructions(self, iLineInComment = None):
2101 """
2102 Done with current instruction.
2103 """
2104 for oInstr in self.aoCurInstrs:
2105 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
2106 if oInstr.fStub:
2107 self.cTotalStubs += 1;
2108
2109 self.cTotalInstr += len(self.aoCurInstrs);
2110
2111 self.sComment = '';
2112 self.aoCurInstrs = [];
2113 return True;
2114
2115 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
2116 """
2117 Sets the sAttrib of all current instruction to oValue. If fOverwrite
2118 is False, only None values and empty strings are replaced.
2119 """
2120 for oInstr in self.aoCurInstrs:
2121 if fOverwrite is not True:
2122 oOldValue = getattr(oInstr, sAttrib);
2123 if oOldValue is not None:
2124 continue;
2125 setattr(oInstr, sAttrib, oValue);
2126
2127 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
2128 """
2129 Sets the iEntry of the array sAttrib of all current instruction to oValue.
2130 If fOverwrite is False, only None values and empty strings are replaced.
2131 """
2132 for oInstr in self.aoCurInstrs:
2133 aoArray = getattr(oInstr, sAttrib);
2134 while len(aoArray) <= iEntry:
2135 aoArray.append(None);
2136 if fOverwrite is True or aoArray[iEntry] is None:
2137 aoArray[iEntry] = oValue;
2138
2139 def parseCommentOldOpcode(self, asLines):
2140 """ Deals with 'Opcode 0xff /4' like comments """
2141 asWords = asLines[0].split();
2142 if len(asWords) >= 2 \
2143 and asWords[0] == 'Opcode' \
2144 and ( asWords[1].startswith('0x')
2145 or asWords[1].startswith('0X')):
2146 asWords = asWords[:1];
2147 for iWord, sWord in enumerate(asWords):
2148 if sWord.startswith('0X'):
2149 sWord = '0x' + sWord[:2];
2150 asWords[iWord] = asWords;
2151 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
2152
2153 return False;
2154
2155 def ensureInstructionForOpTag(self, iTagLine):
2156 """ Ensure there is an instruction for the op-tag being parsed. """
2157 if not self.aoCurInstrs:
2158 self.addInstruction(self.iCommentLine + iTagLine);
2159 for oInstr in self.aoCurInstrs:
2160 oInstr.cOpTags += 1;
2161 if oInstr.cOpTags == 1:
2162 self.cTotalTagged += 1;
2163 return self.aoCurInstrs[-1];
2164
2165 @staticmethod
2166 def flattenSections(aasSections):
2167 """
2168 Flattens multiline sections into stripped single strings.
2169 Returns list of strings, on section per string.
2170 """
2171 asRet = [];
2172 for asLines in aasSections:
2173 if asLines:
2174 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
2175 return asRet;
2176
2177 @staticmethod
2178 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
2179 """
2180 Flattens sections into a simple stripped string with newlines as
2181 section breaks. The final section does not sport a trailing newline.
2182 """
2183 # Typical: One section with a single line.
2184 if len(aasSections) == 1 and len(aasSections[0]) == 1:
2185 return aasSections[0][0].strip();
2186
2187 sRet = '';
2188 for iSection, asLines in enumerate(aasSections):
2189 if asLines:
2190 if iSection > 0:
2191 sRet += sSectionSep;
2192 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
2193 return sRet;
2194
2195
2196
2197 ## @name Tag parsers
2198 ## @{
2199
2200 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
2201 """
2202 Tag: \@opbrief
2203 Value: Text description, multiple sections, appended.
2204
2205 Brief description. If not given, it's the first sentence from @opdesc.
2206 """
2207 oInstr = self.ensureInstructionForOpTag(iTagLine);
2208
2209 # Flatten and validate the value.
2210 sBrief = self.flattenAllSections(aasSections);
2211 if not sBrief:
2212 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2213 if sBrief[-1] != '.':
2214 sBrief = sBrief + '.';
2215 if len(sBrief) > 180:
2216 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
2217 offDot = sBrief.find('.');
2218 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
2219 offDot = sBrief.find('.', offDot + 1);
2220 if offDot >= 0 and offDot != len(sBrief) - 1:
2221 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
2222
2223 # Update the instruction.
2224 if oInstr.sBrief is not None:
2225 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
2226 % (sTag, oInstr.sBrief, sBrief,));
2227 _ = iEndLine;
2228 return True;
2229
2230 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
2231 """
2232 Tag: \@opdesc
2233 Value: Text description, multiple sections, appended.
2234
2235 It is used to describe instructions.
2236 """
2237 oInstr = self.ensureInstructionForOpTag(iTagLine);
2238 if aasSections:
2239 oInstr.asDescSections.extend(self.flattenSections(aasSections));
2240 return True;
2241
2242 _ = sTag; _ = iEndLine;
2243 return True;
2244
2245 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
2246 """
2247 Tag: @opmenmonic
2248 Value: mnemonic
2249
2250 The 'mnemonic' value must be a valid C identifier string. Because of
2251 prefixes, groups and whatnot, there times when the mnemonic isn't that
2252 of an actual assembler mnemonic.
2253 """
2254 oInstr = self.ensureInstructionForOpTag(iTagLine);
2255
2256 # Flatten and validate the value.
2257 sMnemonic = self.flattenAllSections(aasSections);
2258 if not self.oReMnemonic.match(sMnemonic):
2259 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
2260 if oInstr.sMnemonic is not None:
2261 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
2262 % (sTag, oInstr.sMnemonic, sMnemonic,));
2263 oInstr.sMnemonic = sMnemonic
2264
2265 _ = iEndLine;
2266 return True;
2267
2268 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2269 """
2270 Tags: \@op1, \@op2, \@op3, \@op4
2271 Value: [where:]type
2272
2273 The 'where' value indicates where the operand is found, like the 'reg'
2274 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2275 a list.
2276
2277 The 'type' value indicates the operand type. These follow the types
2278 given in the opcode tables in the CPU reference manuals.
2279 See Instruction.kdOperandTypes for a list.
2280
2281 """
2282 oInstr = self.ensureInstructionForOpTag(iTagLine);
2283 idxOp = int(sTag[-1]) - 1;
2284 assert 0 <= idxOp < 4;
2285
2286 # flatten, split up, and validate the "where:type" value.
2287 sFlattened = self.flattenAllSections(aasSections);
2288 asSplit = sFlattened.split(':');
2289 if len(asSplit) == 1:
2290 sType = asSplit[0];
2291 sWhere = None;
2292 elif len(asSplit) == 2:
2293 (sWhere, sType) = asSplit;
2294 else:
2295 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2296
2297 if sType not in g_kdOpTypes:
2298 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2299 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2300 if sWhere is None:
2301 sWhere = g_kdOpTypes[sType][1];
2302 elif sWhere not in g_kdOpLocations:
2303 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2304 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2305
2306 # Insert the operand, refusing to overwrite an existing one.
2307 while idxOp >= len(oInstr.aoOperands):
2308 oInstr.aoOperands.append(None);
2309 if oInstr.aoOperands[idxOp] is not None:
2310 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2311 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2312 sWhere, sType,));
2313 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2314
2315 _ = iEndLine;
2316 return True;
2317
2318 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2319 """
2320 Tag: \@opmaps
2321 Value: map[,map2]
2322
2323 Indicates which maps the instruction is in. There is a default map
2324 associated with each input file.
2325 """
2326 oInstr = self.ensureInstructionForOpTag(iTagLine);
2327
2328 # Flatten, split up and validate the value.
2329 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2330 asMaps = sFlattened.split(',');
2331 if not asMaps:
2332 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2333 for sMap in asMaps:
2334 if sMap not in g_dInstructionMaps:
2335 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2336 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2337
2338 # Add the maps to the current list. Throw errors on duplicates.
2339 for oMap in oInstr.aoMaps:
2340 if oMap.sName in asMaps:
2341 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2342
2343 for sMap in asMaps:
2344 oMap = g_dInstructionMaps[sMap];
2345 if oMap not in oInstr.aoMaps:
2346 oInstr.aoMaps.append(oMap);
2347 else:
2348 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2349
2350 _ = iEndLine;
2351 return True;
2352
2353 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2354 """
2355 Tag: \@oppfx
2356 Value: n/a|none|0x66|0xf3|0xf2
2357
2358 Required prefix for the instruction. (In a (E)VEX context this is the
2359 value of the 'pp' field rather than an actual prefix.)
2360 """
2361 oInstr = self.ensureInstructionForOpTag(iTagLine);
2362
2363 # Flatten and validate the value.
2364 sFlattened = self.flattenAllSections(aasSections);
2365 asPrefixes = sFlattened.split();
2366 if len(asPrefixes) > 1:
2367 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2368
2369 sPrefix = asPrefixes[0].lower();
2370 if sPrefix == 'none':
2371 sPrefix = 'none';
2372 elif sPrefix == 'n/a':
2373 sPrefix = None;
2374 else:
2375 if len(sPrefix) == 2:
2376 sPrefix = '0x' + sPrefix;
2377 if not _isValidOpcodeByte(sPrefix):
2378 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2379
2380 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2381 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2382
2383 # Set it.
2384 if oInstr.sPrefix is not None:
2385 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2386 oInstr.sPrefix = sPrefix;
2387
2388 _ = iEndLine;
2389 return True;
2390
2391 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2392 """
2393 Tag: \@opcode
2394 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2395
2396 The opcode byte or sub-byte for the instruction in the context of a map.
2397 """
2398 oInstr = self.ensureInstructionForOpTag(iTagLine);
2399
2400 # Flatten and validate the value.
2401 sOpcode = self.flattenAllSections(aasSections);
2402 if _isValidOpcodeByte(sOpcode):
2403 pass;
2404 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2405 pass;
2406 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2407 pass;
2408 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2409 pass;
2410 else:
2411 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2412
2413 # Set it.
2414 if oInstr.sOpcode is not None:
2415 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2416 oInstr.sOpcode = sOpcode;
2417
2418 _ = iEndLine;
2419 return True;
2420
2421 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2422 """
2423 Tag: \@opcodesub
2424 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
2425 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
2426
2427 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2428 represents exactly two different instructions. The more proper way would
2429 be to go via maps with two members, but this is faster.
2430 """
2431 oInstr = self.ensureInstructionForOpTag(iTagLine);
2432
2433 # Flatten and validate the value.
2434 sSubOpcode = self.flattenAllSections(aasSections);
2435 if sSubOpcode not in g_kdSubOpcodes:
2436 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2437 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2438
2439 # Set it.
2440 if oInstr.sSubOpcode is not None:
2441 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2442 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2443 oInstr.sSubOpcode = sSubOpcode;
2444
2445 _ = iEndLine;
2446 return True;
2447
2448 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2449 """
2450 Tag: \@openc
2451 Value: ModR/M|fixed|prefix|<map name>
2452
2453 The instruction operand encoding style.
2454 """
2455 oInstr = self.ensureInstructionForOpTag(iTagLine);
2456
2457 # Flatten and validate the value.
2458 sEncoding = self.flattenAllSections(aasSections);
2459 if sEncoding in g_kdEncodings:
2460 pass;
2461 elif sEncoding in g_dInstructionMaps:
2462 pass;
2463 elif not _isValidOpcodeByte(sEncoding):
2464 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2465
2466 # Set it.
2467 if oInstr.sEncoding is not None:
2468 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2469 % ( sTag, oInstr.sEncoding, sEncoding,));
2470 oInstr.sEncoding = sEncoding;
2471
2472 _ = iEndLine;
2473 return True;
2474
2475 ## EFlags tag to Instruction attribute name.
2476 kdOpFlagToAttr = {
2477 '@opfltest': 'asFlTest',
2478 '@opflmodify': 'asFlModify',
2479 '@opflundef': 'asFlUndefined',
2480 '@opflset': 'asFlSet',
2481 '@opflclear': 'asFlClear',
2482 };
2483
2484 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2485 """
2486 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2487 Value: <eflags specifier>
2488
2489 """
2490 oInstr = self.ensureInstructionForOpTag(iTagLine);
2491
2492 # Flatten, split up and validate the values.
2493 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2494 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2495 asFlags = [];
2496 else:
2497 fRc = True;
2498 for iFlag, sFlag in enumerate(asFlags):
2499 if sFlag not in g_kdEFlagsMnemonics:
2500 if sFlag.strip() in g_kdEFlagsMnemonics:
2501 asFlags[iFlag] = sFlag.strip();
2502 else:
2503 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2504 if not fRc:
2505 return False;
2506
2507 # Set them.
2508 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2509 if asOld is not None:
2510 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2511 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2512
2513 _ = iEndLine;
2514 return True;
2515
2516 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2517 """
2518 Tag: \@ophints
2519 Value: Comma or space separated list of flags and hints.
2520
2521 This covers the disassembler flags table and more.
2522 """
2523 oInstr = self.ensureInstructionForOpTag(iTagLine);
2524
2525 # Flatten as a space separated list, split it up and validate the values.
2526 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2527 if len(asHints) == 1 and asHints[0].lower() == 'none':
2528 asHints = [];
2529 else:
2530 fRc = True;
2531 for iHint, sHint in enumerate(asHints):
2532 if sHint not in g_kdHints:
2533 if sHint.strip() in g_kdHints:
2534 sHint[iHint] = sHint.strip();
2535 else:
2536 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2537 if not fRc:
2538 return False;
2539
2540 # Append them.
2541 for sHint in asHints:
2542 if sHint not in oInstr.dHints:
2543 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2544 else:
2545 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2546
2547 _ = iEndLine;
2548 return True;
2549
2550 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2551 """
2552 Tag: \@opdisenum
2553 Value: OP_XXXX
2554
2555 This is for select a specific (legacy) disassembler enum value for the
2556 instruction.
2557 """
2558 oInstr = self.ensureInstructionForOpTag(iTagLine);
2559
2560 # Flatten and split.
2561 asWords = self.flattenAllSections(aasSections).split();
2562 if len(asWords) != 1:
2563 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2564 if not asWords:
2565 return False;
2566 sDisEnum = asWords[0];
2567 if not self.oReDisEnum.match(sDisEnum):
2568 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2569 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2570
2571 # Set it.
2572 if oInstr.sDisEnum is not None:
2573 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2574 oInstr.sDisEnum = sDisEnum;
2575
2576 _ = iEndLine;
2577 return True;
2578
2579 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2580 """
2581 Tag: \@opmincpu
2582 Value: <simple CPU name>
2583
2584 Indicates when this instruction was introduced.
2585 """
2586 oInstr = self.ensureInstructionForOpTag(iTagLine);
2587
2588 # Flatten the value, split into words, make sure there's just one, valid it.
2589 asCpus = self.flattenAllSections(aasSections).split();
2590 if len(asCpus) > 1:
2591 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2592
2593 sMinCpu = asCpus[0];
2594 if sMinCpu in g_kdCpuNames:
2595 oInstr.sMinCpu = sMinCpu;
2596 else:
2597 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2598 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2599
2600 # Set it.
2601 if oInstr.sMinCpu is None:
2602 oInstr.sMinCpu = sMinCpu;
2603 elif oInstr.sMinCpu != sMinCpu:
2604 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2605
2606 _ = iEndLine;
2607 return True;
2608
2609 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2610 """
2611 Tag: \@opcpuid
2612 Value: none | <CPUID flag specifier>
2613
2614 CPUID feature bit which is required for the instruction to be present.
2615 """
2616 oInstr = self.ensureInstructionForOpTag(iTagLine);
2617
2618 # Flatten as a space separated list, split it up and validate the values.
2619 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2620 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2621 asCpuIds = [];
2622 else:
2623 fRc = True;
2624 for iCpuId, sCpuId in enumerate(asCpuIds):
2625 if sCpuId not in g_kdCpuIdFlags:
2626 if sCpuId.strip() in g_kdCpuIdFlags:
2627 sCpuId[iCpuId] = sCpuId.strip();
2628 else:
2629 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2630 if not fRc:
2631 return False;
2632
2633 # Append them.
2634 for sCpuId in asCpuIds:
2635 if sCpuId not in oInstr.asCpuIds:
2636 oInstr.asCpuIds.append(sCpuId);
2637 else:
2638 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2639
2640 _ = iEndLine;
2641 return True;
2642
2643 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2644 """
2645 Tag: \@opgroup
2646 Value: op_grp1[_subgrp2[_subsubgrp3]]
2647
2648 Instruction grouping.
2649 """
2650 oInstr = self.ensureInstructionForOpTag(iTagLine);
2651
2652 # Flatten as a space separated list, split it up and validate the values.
2653 asGroups = self.flattenAllSections(aasSections).split();
2654 if len(asGroups) != 1:
2655 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2656 sGroup = asGroups[0];
2657 if not self.oReGroupName.match(sGroup):
2658 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2659 % (sTag, sGroup, self.oReGroupName.pattern));
2660
2661 # Set it.
2662 if oInstr.sGroup is not None:
2663 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2664 oInstr.sGroup = sGroup;
2665
2666 _ = iEndLine;
2667 return True;
2668
2669 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2670 """
2671 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2672 Value: <invalid opcode behaviour style>
2673
2674 The \@opunused indicates the specification is for a currently unused
2675 instruction encoding.
2676
2677 The \@opinvalid indicates the specification is for an invalid currently
2678 instruction encoding (like UD2).
2679
2680 The \@opinvlstyle just indicates how CPUs decode the instruction when
2681 not supported (\@opcpuid, \@opmincpu) or disabled.
2682 """
2683 oInstr = self.ensureInstructionForOpTag(iTagLine);
2684
2685 # Flatten as a space separated list, split it up and validate the values.
2686 asStyles = self.flattenAllSections(aasSections).split();
2687 if len(asStyles) != 1:
2688 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2689 sStyle = asStyles[0];
2690 if sStyle not in g_kdInvalidStyles:
2691 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2692 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2693 # Set it.
2694 if oInstr.sInvalidStyle is not None:
2695 return self.errorComment(iTagLine,
2696 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2697 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2698 oInstr.sInvalidStyle = sStyle;
2699 if sTag == '@opunused':
2700 oInstr.fUnused = True;
2701 elif sTag == '@opinvalid':
2702 oInstr.fInvalid = True;
2703
2704 _ = iEndLine;
2705 return True;
2706
2707 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2708 """
2709 Tag: \@optest
2710 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2711 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2712
2713 The main idea here is to generate basic instruction tests.
2714
2715 The probably simplest way of handling the diverse input, would be to use
2716 it to produce size optimized byte code for a simple interpreter that
2717 modifies the register input and output states.
2718
2719 An alternative to the interpreter would be creating multiple tables,
2720 but that becomes rather complicated wrt what goes where and then to use
2721 them in an efficient manner.
2722 """
2723 oInstr = self.ensureInstructionForOpTag(iTagLine);
2724
2725 #
2726 # Do it section by section.
2727 #
2728 for asSectionLines in aasSections:
2729 #
2730 # Sort the input into outputs, inputs and selector conditions.
2731 #
2732 sFlatSection = self.flattenAllSections([asSectionLines,]);
2733 if not sFlatSection:
2734 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2735 continue;
2736 oTest = InstructionTest(oInstr);
2737
2738 asSelectors = [];
2739 asInputs = [];
2740 asOutputs = [];
2741 asCur = asOutputs;
2742 fRc = True;
2743 asWords = sFlatSection.split();
2744 for iWord in range(len(asWords) - 1, -1, -1):
2745 sWord = asWords[iWord];
2746 # Check for array switchers.
2747 if sWord == '->':
2748 if asCur != asOutputs:
2749 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2750 break;
2751 asCur = asInputs;
2752 elif sWord == '/':
2753 if asCur != asInputs:
2754 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2755 break;
2756 asCur = asSelectors;
2757 else:
2758 asCur.insert(0, sWord);
2759
2760 #
2761 # Validate and add selectors.
2762 #
2763 for sCond in asSelectors:
2764 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2765 oSelector = None;
2766 for sOp in TestSelector.kasCompareOps:
2767 off = sCondExp.find(sOp);
2768 if off >= 0:
2769 sVariable = sCondExp[:off];
2770 sValue = sCondExp[off + len(sOp):];
2771 if sVariable in TestSelector.kdVariables:
2772 if sValue in TestSelector.kdVariables[sVariable]:
2773 oSelector = TestSelector(sVariable, sOp, sValue);
2774 else:
2775 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2776 % ( sTag, sValue, sCond,
2777 TestSelector.kdVariables[sVariable].keys(),));
2778 else:
2779 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2780 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2781 break;
2782 if oSelector is not None:
2783 for oExisting in oTest.aoSelectors:
2784 if oExisting.sVariable == oSelector.sVariable:
2785 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2786 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2787 oTest.aoSelectors.append(oSelector);
2788 else:
2789 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2790
2791 #
2792 # Validate outputs and inputs, adding them to the test as we go along.
2793 #
2794 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2795 asValidFieldKinds = [ 'both', sDesc, ];
2796 for sItem in asItems:
2797 oItem = None;
2798 for sOp in TestInOut.kasOperators:
2799 off = sItem.find(sOp);
2800 if off < 0:
2801 continue;
2802 sField = sItem[:off];
2803 sValueType = sItem[off + len(sOp):];
2804 if sField in TestInOut.kdFields \
2805 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2806 asSplit = sValueType.split(':', 1);
2807 sValue = asSplit[0];
2808 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2809 if sType in TestInOut.kdTypes:
2810 oValid = TestInOut.kdTypes[sType].validate(sValue);
2811 if oValid is True:
2812 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2813 oItem = TestInOut(sField, sOp, sValue, sType);
2814 else:
2815 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2816 % ( sTag, sDesc, sItem, ));
2817 else:
2818 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2819 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2820 else:
2821 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2822 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2823 else:
2824 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2825 % ( sTag, sDesc, sField, sItem,
2826 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
2827 if asVal[1] in asValidFieldKinds]),));
2828 break;
2829 if oItem is not None:
2830 for oExisting in aoDst:
2831 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2832 self.errorComment(iTagLine,
2833 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2834 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2835 aoDst.append(oItem);
2836 else:
2837 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2838
2839 #
2840 # .
2841 #
2842 if fRc:
2843 oInstr.aoTests.append(oTest);
2844 else:
2845 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2846 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2847 % (sTag, asSelectors, asInputs, asOutputs,));
2848
2849 _ = iEndLine;
2850 return True;
2851
2852 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2853 """
2854 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2855 """
2856 oInstr = self.ensureInstructionForOpTag(iTagLine);
2857
2858 iTest = 0;
2859 if sTag[-1] == ']':
2860 iTest = int(sTag[8:-1]);
2861 else:
2862 iTest = int(sTag[7:]);
2863
2864 if iTest != len(oInstr.aoTests):
2865 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2866 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2867
2868 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2869 """
2870 Tag: \@optestign | \@optestignore
2871 Value: <value is ignored>
2872
2873 This is a simple trick to ignore a test while debugging another.
2874
2875 See also \@oponlytest.
2876 """
2877 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2878 return True;
2879
2880 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
2881 """
2882 Tag: \@opcopytests
2883 Value: <opstat | function> [..]
2884 Example: \@opcopytests add_Eb_Gb
2885
2886 Trick to avoid duplicating tests for different encodings of the same
2887 operation.
2888 """
2889 oInstr = self.ensureInstructionForOpTag(iTagLine);
2890
2891 # Flatten, validate and append the copy job to the instruction. We execute
2892 # them after parsing all the input so we can handle forward references.
2893 asToCopy = self.flattenAllSections(aasSections).split();
2894 if not asToCopy:
2895 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
2896 for sToCopy in asToCopy:
2897 if sToCopy not in oInstr.asCopyTests:
2898 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
2899 oInstr.asCopyTests.append(sToCopy);
2900 else:
2901 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
2902 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
2903 else:
2904 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
2905
2906 _ = iEndLine;
2907 return True;
2908
2909 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
2910 """
2911 Tag: \@oponlytest | \@oponly
2912 Value: none
2913
2914 Only test instructions with this tag. This is a trick that is handy
2915 for singling out one or two new instructions or tests.
2916
2917 See also \@optestignore.
2918 """
2919 oInstr = self.ensureInstructionForOpTag(iTagLine);
2920
2921 # Validate and add instruction to only test dictionary.
2922 sValue = self.flattenAllSections(aasSections).strip();
2923 if sValue:
2924 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
2925
2926 if oInstr not in g_aoOnlyTestInstructions:
2927 g_aoOnlyTestInstructions.append(oInstr);
2928
2929 _ = iEndLine;
2930 return True;
2931
2932 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
2933 """
2934 Tag: \@opxcpttype
2935 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
2936
2937 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
2938 """
2939 oInstr = self.ensureInstructionForOpTag(iTagLine);
2940
2941 # Flatten as a space separated list, split it up and validate the values.
2942 asTypes = self.flattenAllSections(aasSections).split();
2943 if len(asTypes) != 1:
2944 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
2945 sType = asTypes[0];
2946 if sType not in g_kdXcptTypes:
2947 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
2948 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
2949 # Set it.
2950 if oInstr.sXcptType is not None:
2951 return self.errorComment(iTagLine,
2952 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
2953 % ( sTag, oInstr.sXcptType, sType,));
2954 oInstr.sXcptType = sType;
2955
2956 _ = iEndLine;
2957 return True;
2958
2959 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
2960 """
2961 Tag: \@opfunction
2962 Value: <VMM function name>
2963
2964 This is for explicitly setting the IEM function name. Normally we pick
2965 this up from the FNIEMOP_XXX macro invocation after the description, or
2966 generate it from the mnemonic and operands.
2967
2968 It it thought it maybe necessary to set it when specifying instructions
2969 which implementation isn't following immediately or aren't implemented yet.
2970 """
2971 oInstr = self.ensureInstructionForOpTag(iTagLine);
2972
2973 # Flatten and validate the value.
2974 sFunction = self.flattenAllSections(aasSections);
2975 if not self.oReFunctionName.match(sFunction):
2976 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
2977 % (sTag, sFunction, self.oReFunctionName.pattern));
2978
2979 if oInstr.sFunction is not None:
2980 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
2981 % (sTag, oInstr.sFunction, sFunction,));
2982 oInstr.sFunction = sFunction;
2983
2984 _ = iEndLine;
2985 return True;
2986
2987 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
2988 """
2989 Tag: \@opstats
2990 Value: <VMM statistics base name>
2991
2992 This is for explicitly setting the statistics name. Normally we pick
2993 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
2994 the mnemonic and operands.
2995
2996 It it thought it maybe necessary to set it when specifying instructions
2997 which implementation isn't following immediately or aren't implemented yet.
2998 """
2999 oInstr = self.ensureInstructionForOpTag(iTagLine);
3000
3001 # Flatten and validate the value.
3002 sStats = self.flattenAllSections(aasSections);
3003 if not self.oReStatsName.match(sStats):
3004 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
3005 % (sTag, sStats, self.oReStatsName.pattern));
3006
3007 if oInstr.sStats is not None:
3008 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
3009 % (sTag, oInstr.sStats, sStats,));
3010 oInstr.sStats = sStats;
3011
3012 _ = iEndLine;
3013 return True;
3014
3015 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
3016 """
3017 Tag: \@opdone
3018 Value: none
3019
3020 Used to explictily flush the instructions that have been specified.
3021 """
3022 sFlattened = self.flattenAllSections(aasSections);
3023 if sFlattened != '':
3024 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
3025 _ = sTag; _ = iEndLine;
3026 return self.doneInstructions();
3027
3028 ## @}
3029
3030
3031 def parseComment(self):
3032 """
3033 Parse the current comment (self.sComment).
3034
3035 If it's a opcode specifiying comment, we reset the macro stuff.
3036 """
3037 #
3038 # Reject if comment doesn't seem to contain anything interesting.
3039 #
3040 if self.sComment.find('Opcode') < 0 \
3041 and self.sComment.find('@') < 0:
3042 return False;
3043
3044 #
3045 # Split the comment into lines, removing leading asterisks and spaces.
3046 # Also remove leading and trailing empty lines.
3047 #
3048 asLines = self.sComment.split('\n');
3049 for iLine, sLine in enumerate(asLines):
3050 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
3051
3052 while asLines and not asLines[0]:
3053 self.iCommentLine += 1;
3054 asLines.pop(0);
3055
3056 while asLines and not asLines[-1]:
3057 asLines.pop(len(asLines) - 1);
3058
3059 #
3060 # Check for old style: Opcode 0x0f 0x12
3061 #
3062 if asLines[0].startswith('Opcode '):
3063 self.parseCommentOldOpcode(asLines);
3064
3065 #
3066 # Look for @op* tagged data.
3067 #
3068 cOpTags = 0;
3069 sFlatDefault = None;
3070 sCurTag = '@default';
3071 iCurTagLine = 0;
3072 asCurSection = [];
3073 aasSections = [ asCurSection, ];
3074 for iLine, sLine in enumerate(asLines):
3075 if not sLine.startswith('@'):
3076 if sLine:
3077 asCurSection.append(sLine);
3078 elif asCurSection:
3079 asCurSection = [];
3080 aasSections.append(asCurSection);
3081 else:
3082 #
3083 # Process the previous tag.
3084 #
3085 if not asCurSection and len(aasSections) > 1:
3086 aasSections.pop(-1);
3087 if sCurTag in self.dTagHandlers:
3088 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3089 cOpTags += 1;
3090 elif sCurTag.startswith('@op'):
3091 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3092 elif sCurTag == '@default':
3093 sFlatDefault = self.flattenAllSections(aasSections);
3094 elif '@op' + sCurTag[1:] in self.dTagHandlers:
3095 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
3096 elif sCurTag in ['@encoding', '@opencoding']:
3097 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
3098
3099 #
3100 # New tag.
3101 #
3102 asSplit = sLine.split(None, 1);
3103 sCurTag = asSplit[0].lower();
3104 if len(asSplit) > 1:
3105 asCurSection = [asSplit[1],];
3106 else:
3107 asCurSection = [];
3108 aasSections = [asCurSection, ];
3109 iCurTagLine = iLine;
3110
3111 #
3112 # Process the final tag.
3113 #
3114 if not asCurSection and len(aasSections) > 1:
3115 aasSections.pop(-1);
3116 if sCurTag in self.dTagHandlers:
3117 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3118 cOpTags += 1;
3119 elif sCurTag.startswith('@op'):
3120 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3121 elif sCurTag == '@default':
3122 sFlatDefault = self.flattenAllSections(aasSections);
3123
3124 #
3125 # Don't allow default text in blocks containing @op*.
3126 #
3127 if cOpTags > 0 and sFlatDefault:
3128 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
3129
3130 return True;
3131
3132 def parseMacroInvocation(self, sInvocation):
3133 """
3134 Parses a macro invocation.
3135
3136 Returns a tuple, first element is the offset following the macro
3137 invocation. The second element is a list of macro arguments, where the
3138 zero'th is the macro name.
3139 """
3140 # First the name.
3141 offOpen = sInvocation.find('(');
3142 if offOpen <= 0:
3143 self.raiseError("macro invocation open parenthesis not found");
3144 sName = sInvocation[:offOpen].strip();
3145 if not self.oReMacroName.match(sName):
3146 return self.error("invalid macro name '%s'" % (sName,));
3147 asRet = [sName, ];
3148
3149 # Arguments.
3150 iLine = self.iLine;
3151 cDepth = 1;
3152 off = offOpen + 1;
3153 offStart = off;
3154 chQuote = None;
3155 while cDepth > 0:
3156 if off >= len(sInvocation):
3157 if iLine >= len(self.asLines):
3158 self.error('macro invocation beyond end of file');
3159 return (off, asRet);
3160 sInvocation += self.asLines[iLine];
3161 iLine += 1;
3162 ch = sInvocation[off];
3163
3164 if chQuote:
3165 if ch == '\\' and off + 1 < len(sInvocation):
3166 off += 1;
3167 elif ch == chQuote:
3168 chQuote = None;
3169 elif ch in ('"', '\'',):
3170 chQuote = ch;
3171 elif ch in (',', ')',):
3172 if cDepth == 1:
3173 asRet.append(sInvocation[offStart:off].strip());
3174 offStart = off + 1;
3175 if ch == ')':
3176 cDepth -= 1;
3177 elif ch == '(':
3178 cDepth += 1;
3179 off += 1;
3180
3181 return (off, asRet);
3182
3183 def findAndParseMacroInvocationEx(self, sCode, sMacro):
3184 """
3185 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
3186 """
3187 offHit = sCode.find(sMacro);
3188 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
3189 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
3190 return (offHit + offAfter, asRet);
3191 return (len(sCode), None);
3192
3193 def findAndParseMacroInvocation(self, sCode, sMacro):
3194 """
3195 Returns None if not found, arguments as per parseMacroInvocation if found.
3196 """
3197 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
3198
3199 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
3200 """
3201 Returns same as findAndParseMacroInvocation.
3202 """
3203 for sMacro in asMacro:
3204 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
3205 if asRet is not None:
3206 return asRet;
3207 return None;
3208
3209 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
3210 sDisHints, sIemHints, asOperands):
3211 """
3212 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
3213 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
3214 """
3215 #
3216 # Some invocation checks.
3217 #
3218 if sUpper != sUpper.upper():
3219 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
3220 if sLower != sLower.lower():
3221 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
3222 if sUpper.lower() != sLower:
3223 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
3224 if not self.oReMnemonic.match(sLower):
3225 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
3226
3227 #
3228 # Check if sIemHints tells us to not consider this macro invocation.
3229 #
3230 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
3231 return True;
3232
3233 # Apply to the last instruction only for now.
3234 if not self.aoCurInstrs:
3235 self.addInstruction();
3236 oInstr = self.aoCurInstrs[-1];
3237 if oInstr.iLineMnemonicMacro == -1:
3238 oInstr.iLineMnemonicMacro = self.iLine;
3239 else:
3240 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
3241 % (sMacro, oInstr.iLineMnemonicMacro,));
3242
3243 # Mnemonic
3244 if oInstr.sMnemonic is None:
3245 oInstr.sMnemonic = sLower;
3246 elif oInstr.sMnemonic != sLower:
3247 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
3248
3249 # Process operands.
3250 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
3251 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
3252 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
3253 for iOperand, sType in enumerate(asOperands):
3254 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
3255 if sWhere is None:
3256 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
3257 if iOperand < len(oInstr.aoOperands): # error recovery.
3258 sWhere = oInstr.aoOperands[iOperand].sWhere;
3259 sType = oInstr.aoOperands[iOperand].sType;
3260 else:
3261 sWhere = 'reg';
3262 sType = 'Gb';
3263 if iOperand == len(oInstr.aoOperands):
3264 oInstr.aoOperands.append(Operand(sWhere, sType))
3265 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
3266 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
3267 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
3268 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
3269
3270 # Encoding.
3271 if sForm not in g_kdIemForms:
3272 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
3273 else:
3274 if oInstr.sEncoding is None:
3275 oInstr.sEncoding = g_kdIemForms[sForm][0];
3276 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3277 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3278 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3279
3280 # Check the parameter locations for the encoding.
3281 if g_kdIemForms[sForm][1] is not None:
3282 if len(g_kdIemForms[sForm][1]) != len(oInstr.aoOperands):
3283 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3284 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3285 else:
3286 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3287 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3288 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3289 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3290 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
3291 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
3292 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
3293 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
3294 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
3295 or sForm.replace('VEX','').find('V') < 0) ):
3296 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
3297 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
3298
3299 # Check @opcodesub
3300 if oInstr.sSubOpcode \
3301 and g_kdIemForms[sForm][2] \
3302 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
3303 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
3304 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
3305
3306 # Stats.
3307 if not self.oReStatsName.match(sStats):
3308 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3309 elif oInstr.sStats is None:
3310 oInstr.sStats = sStats;
3311 elif oInstr.sStats != sStats:
3312 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3313 % (sMacro, oInstr.sStats, sStats,));
3314
3315 # Process the hints (simply merge with @ophints w/o checking anything).
3316 for sHint in sDisHints.split('|'):
3317 sHint = sHint.strip();
3318 if sHint.startswith('DISOPTYPE_'):
3319 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3320 if sShortHint in g_kdHints:
3321 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3322 else:
3323 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3324 elif sHint != '0':
3325 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3326
3327 for sHint in sIemHints.split('|'):
3328 sHint = sHint.strip();
3329 if sHint.startswith('IEMOPHINT_'):
3330 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3331 if sShortHint in g_kdHints:
3332 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3333 else:
3334 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3335 elif sHint != '0':
3336 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3337
3338 _ = sAsm;
3339 return True;
3340
3341 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3342 """
3343 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3344 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3345 """
3346 if not asOperands:
3347 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3348 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3349 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3350
3351 def checkCodeForMacro(self, sCode):
3352 """
3353 Checks code for relevant macro invocation.
3354 """
3355 #
3356 # Scan macro invocations.
3357 #
3358 if sCode.find('(') > 0:
3359 # Look for instruction decoder function definitions. ASSUME single line.
3360 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3361 [ 'FNIEMOP_DEF',
3362 'FNIEMOP_STUB',
3363 'FNIEMOP_STUB_1',
3364 'FNIEMOP_UD_STUB',
3365 'FNIEMOP_UD_STUB_1' ]);
3366 if asArgs is not None:
3367 sFunction = asArgs[1];
3368
3369 if not self.aoCurInstrs:
3370 self.addInstruction();
3371 for oInstr in self.aoCurInstrs:
3372 if oInstr.iLineFnIemOpMacro == -1:
3373 oInstr.iLineFnIemOpMacro = self.iLine;
3374 else:
3375 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3376 self.setInstrunctionAttrib('sFunction', sFunction);
3377 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3378 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3379 if asArgs[0].find('STUB') > 0:
3380 self.doneInstructions();
3381 return True;
3382
3383 # IEMOP_HLP_DONE_VEX_DECODING_*
3384 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3385 [ 'IEMOP_HLP_DONE_VEX_DECODING',
3386 'IEMOP_HLP_DONE_VEX_DECODING_L0',
3387 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
3388 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
3389 ]);
3390 if asArgs is not None:
3391 sMacro = asArgs[0];
3392 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
3393 for oInstr in self.aoCurInstrs:
3394 if 'vex_l_zero' not in oInstr.dHints:
3395 if oInstr.iLineMnemonicMacro >= 0:
3396 self.errorOnLine(oInstr.iLineMnemonicMacro,
3397 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
3398 oInstr.dHints['vex_l_zero'] = True;
3399 return True;
3400
3401 #
3402 # IEMOP_MNEMONIC*
3403 #
3404
3405 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3406 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3407 if asArgs is not None:
3408 if len(self.aoCurInstrs) == 1:
3409 oInstr = self.aoCurInstrs[0];
3410 if oInstr.sStats is None:
3411 oInstr.sStats = asArgs[1];
3412 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3413
3414 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3415 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3416 if asArgs is not None:
3417 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6], asArgs[7],
3418 []);
3419 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3420 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3421 if asArgs is not None:
3422 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7], asArgs[8],
3423 [asArgs[6],]);
3424 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3425 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3426 if asArgs is not None:
3427 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8], asArgs[9],
3428 [asArgs[6], asArgs[7]]);
3429 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3430 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3431 if asArgs is not None:
3432 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3433 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3434 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3435 # a_fIemHints)
3436 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3437 if asArgs is not None:
3438 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3439 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3440
3441 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3442 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3443 if asArgs is not None:
3444 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3445 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3446 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3447 if asArgs is not None:
3448 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3449 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3450 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3451 if asArgs is not None:
3452 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3453 [asArgs[4], asArgs[5],]);
3454 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3455 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3456 if asArgs is not None:
3457 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3458 [asArgs[4], asArgs[5], asArgs[6],]);
3459 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3460 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3461 if asArgs is not None:
3462 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3463 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3464
3465 return False;
3466
3467
3468 def parse(self):
3469 """
3470 Parses the given file.
3471 Returns number or errors.
3472 Raises exception on fatal trouble.
3473 """
3474 #self.debug('Parsing %s' % (self.sSrcFile,));
3475
3476 while self.iLine < len(self.asLines):
3477 sLine = self.asLines[self.iLine];
3478 self.iLine += 1;
3479
3480 # We only look for comments, so only lines with a slash might possibly
3481 # influence the parser state.
3482 offSlash = sLine.find('/');
3483 if offSlash >= 0:
3484 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3485 offLine = 0;
3486 while offLine < len(sLine):
3487 if self.iState == self.kiCode:
3488 offHit = sLine.find('/*', offLine); # only multiline comments for now.
3489 if offHit >= 0:
3490 self.checkCodeForMacro(sLine[offLine:offHit]);
3491 self.sComment = '';
3492 self.iCommentLine = self.iLine;
3493 self.iState = self.kiCommentMulti;
3494 offLine = offHit + 2;
3495 else:
3496 self.checkCodeForMacro(sLine[offLine:]);
3497 offLine = len(sLine);
3498
3499 elif self.iState == self.kiCommentMulti:
3500 offHit = sLine.find('*/', offLine);
3501 if offHit >= 0:
3502 self.sComment += sLine[offLine:offHit];
3503 self.iState = self.kiCode;
3504 offLine = offHit + 2;
3505 self.parseComment();
3506 else:
3507 self.sComment += sLine[offLine:];
3508 offLine = len(sLine);
3509 else:
3510 assert False;
3511 # C++ line comment.
3512 elif offSlash > 0:
3513 self.checkCodeForMacro(sLine[:offSlash]);
3514
3515 # No slash, but append the line if in multi-line comment.
3516 elif self.iState == self.kiCommentMulti:
3517 #self.debug('line %d: multi' % (self.iLine,));
3518 self.sComment += sLine;
3519
3520 # No slash, but check code line for relevant macro.
3521 elif self.iState == self.kiCode and sLine.find('IEMOP_') >= 0:
3522 #self.debug('line %d: macro' % (self.iLine,));
3523 self.checkCodeForMacro(sLine);
3524
3525 # If the line is a '}' in the first position, complete the instructions.
3526 elif self.iState == self.kiCode and sLine[0] == '}':
3527 #self.debug('line %d: }' % (self.iLine,));
3528 self.doneInstructions();
3529
3530 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
3531 # so we can check/add @oppfx info from it.
3532 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
3533 self.parseFunctionTable(sLine);
3534
3535 self.doneInstructions();
3536 self.debug('%3s stubs out of %3s instructions in %s'
3537 % (self.cTotalStubs, self.cTotalInstr, os.path.basename(self.sSrcFile),));
3538 return self.printErrors();
3539
3540
3541def __parseFileByName(sSrcFile, sDefaultMap):
3542 """
3543 Parses one source file for instruction specfications.
3544 """
3545 #
3546 # Read sSrcFile into a line array.
3547 #
3548 try:
3549 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with
3550 except Exception as oXcpt:
3551 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3552 try:
3553 asLines = oFile.readlines();
3554 except Exception as oXcpt:
3555 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3556 finally:
3557 oFile.close();
3558
3559 #
3560 # Do the parsing.
3561 #
3562 try:
3563 cErrors = SimpleParser(sSrcFile, asLines, sDefaultMap).parse();
3564 except ParserException as oXcpt:
3565 print(str(oXcpt));
3566 raise;
3567
3568 return cErrors;
3569
3570
3571def __doTestCopying():
3572 """
3573 Executes the asCopyTests instructions.
3574 """
3575 asErrors = [];
3576 for oDstInstr in g_aoAllInstructions:
3577 if oDstInstr.asCopyTests:
3578 for sSrcInstr in oDstInstr.asCopyTests:
3579 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3580 if oSrcInstr:
3581 aoSrcInstrs = [oSrcInstr,];
3582 else:
3583 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3584 if aoSrcInstrs:
3585 for oSrcInstr in aoSrcInstrs:
3586 if oSrcInstr != oDstInstr:
3587 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3588 else:
3589 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3590 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3591 else:
3592 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3593 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3594
3595 if asErrors:
3596 sys.stderr.write(u''.join(asErrors));
3597 return len(asErrors);
3598
3599
3600def __applyOnlyTest():
3601 """
3602 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
3603 all other instructions so that only these get tested.
3604 """
3605 if g_aoOnlyTestInstructions:
3606 for oInstr in g_aoAllInstructions:
3607 if oInstr.aoTests:
3608 if oInstr not in g_aoOnlyTestInstructions:
3609 oInstr.aoTests = [];
3610 return 0;
3611
3612def __parseAll():
3613 """
3614 Parses all the IEMAllInstruction*.cpp.h files.
3615
3616 Raises exception on failure.
3617 """
3618 sSrcDir = os.path.dirname(os.path.abspath(__file__));
3619 cErrors = 0;
3620 for sDefaultMap, sName in [
3621 ( 'one', 'IEMAllInstructionsOneByte.cpp.h'),
3622 ( 'two0f', 'IEMAllInstructionsTwoByte0f.cpp.h'),
3623 ( 'three0f38', 'IEMAllInstructionsThree0f38.cpp.h'),
3624 ( 'three0f3a', 'IEMAllInstructionsThree0f3a.cpp.h'),
3625 ( 'vexmap1', 'IEMAllInstructionsVexMap1.cpp.h'),
3626 ( 'vexmap2', 'IEMAllInstructionsVexMap2.cpp.h'),
3627 ( 'vexmap3', 'IEMAllInstructionsVexMap3.cpp.h'),
3628 ( '3dnow', 'IEMAllInstructions3DNow.cpp.h'),
3629 ]:
3630 cErrors += __parseFileByName(os.path.join(sSrcDir, sName), sDefaultMap);
3631 cErrors += __doTestCopying();
3632 cErrors += __applyOnlyTest();
3633
3634 if cErrors != 0:
3635 #raise Exception('%d parse errors' % (cErrors,));
3636 sys.exit(1);
3637 return True;
3638
3639
3640
3641__parseAll();
3642
3643
3644#
3645# Generators (may perhaps move later).
3646#
3647def __formatDisassemblerTableEntry(oInstr):
3648 """
3649 """
3650 sMacro = 'OP';
3651 cMaxOperands = 3;
3652 if len(oInstr.aoOperands) > 3:
3653 sMacro = 'OPVEX'
3654 cMaxOperands = 4;
3655 assert len(oInstr.aoOperands) <= cMaxOperands;
3656
3657 #
3658 # Format string.
3659 #
3660 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
3661 for iOperand, oOperand in enumerate(oInstr.aoOperands):
3662 sTmp += ' ' if iOperand == 0 else ',';
3663 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
3664 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
3665 else:
3666 sTmp += g_kdOpTypes[oOperand.sType][2];
3667 sTmp += '",';
3668 asColumns = [ sTmp, ];
3669
3670 #
3671 # Decoders.
3672 #
3673 iStart = len(asColumns);
3674 if oInstr.sEncoding is None:
3675 pass;
3676 elif oInstr.sEncoding == 'ModR/M':
3677 # ASSUME the first operand is using the ModR/M encoding
3678 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM();
3679 asColumns.append('IDX_ParseModRM,');
3680 elif oInstr.sEncoding in [ 'prefix', ]:
3681 for oOperand in oInstr.aoOperands:
3682 asColumns.append('0,');
3683 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
3684 pass;
3685 elif oInstr.sEncoding == 'VEX.ModR/M':
3686 asColumns.append('IDX_ParseModRM,');
3687 elif oInstr.sEncoding == 'vex2':
3688 asColumns.append('IDX_ParseVex2b,')
3689 elif oInstr.sEncoding == 'vex3':
3690 asColumns.append('IDX_ParseVex3b,')
3691 elif oInstr.sEncoding in g_dInstructionMaps:
3692 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
3693 else:
3694 ## @todo
3695 #IDX_ParseTwoByteEsc,
3696 #IDX_ParseGrp1,
3697 #IDX_ParseShiftGrp2,
3698 #IDX_ParseGrp3,
3699 #IDX_ParseGrp4,
3700 #IDX_ParseGrp5,
3701 #IDX_Parse3DNow,
3702 #IDX_ParseGrp6,
3703 #IDX_ParseGrp7,
3704 #IDX_ParseGrp8,
3705 #IDX_ParseGrp9,
3706 #IDX_ParseGrp10,
3707 #IDX_ParseGrp12,
3708 #IDX_ParseGrp13,
3709 #IDX_ParseGrp14,
3710 #IDX_ParseGrp15,
3711 #IDX_ParseGrp16,
3712 #IDX_ParseThreeByteEsc4,
3713 #IDX_ParseThreeByteEsc5,
3714 #IDX_ParseModFence,
3715 #IDX_ParseEscFP,
3716 #IDX_ParseNopPause,
3717 #IDX_ParseInvOpModRM,
3718 assert False, str(oInstr);
3719
3720 # Check for immediates and stuff in the remaining operands.
3721 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
3722 sIdx = g_kdOpTypes[oOperand.sType][0];
3723 #if sIdx != 'IDX_UseModRM':
3724 asColumns.append(sIdx + ',');
3725 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
3726
3727 #
3728 # Opcode and operands.
3729 #
3730 assert oInstr.sDisEnum, str(oInstr);
3731 asColumns.append(oInstr.sDisEnum + ',');
3732 iStart = len(asColumns)
3733 for oOperand in oInstr.aoOperands:
3734 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
3735 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
3736
3737 #
3738 # Flags.
3739 #
3740 sTmp = '';
3741 for sHint in sorted(oInstr.dHints.keys()):
3742 sDefine = g_kdHints[sHint];
3743 if sDefine.startswith('DISOPTYPE_'):
3744 if sTmp:
3745 sTmp += ' | ' + sDefine;
3746 else:
3747 sTmp += sDefine;
3748 if sTmp:
3749 sTmp += '),';
3750 else:
3751 sTmp += '0),';
3752 asColumns.append(sTmp);
3753
3754 #
3755 # Format the columns into a line.
3756 #
3757 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
3758 sLine = '';
3759 for i, s in enumerate(asColumns):
3760 if len(sLine) < aoffColumns[i]:
3761 sLine += ' ' * (aoffColumns[i] - len(sLine));
3762 else:
3763 sLine += ' ';
3764 sLine += s;
3765
3766 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
3767 # DISOPTYPE_HARMLESS),
3768 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
3769 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
3770 return sLine;
3771
3772def __checkIfShortTable(aoTableOrdered, oMap):
3773 """
3774 Returns (iInstr, cInstructions, fShortTable)
3775 """
3776
3777 # Determin how much we can trim off.
3778 cInstructions = len(aoTableOrdered);
3779 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
3780 cInstructions -= 1;
3781
3782 iInstr = 0;
3783 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
3784 iInstr += 1;
3785
3786 # If we can save more than 30%, we go for the short table version.
3787 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
3788 return (iInstr, cInstructions, True);
3789 _ = oMap; # Use this for overriding.
3790
3791 # Output the full table.
3792 return (0, len(aoTableOrdered), False);
3793
3794def generateDisassemblerTables(oDstFile = sys.stdout):
3795 """
3796 Generates disassembler tables.
3797 """
3798
3799 #
3800 # The disassembler uses a slightly different table layout to save space,
3801 # since several of the prefix varia
3802 #
3803 aoDisasmMaps = [];
3804 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
3805 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
3806 if oMap.sSelector != 'byte+pfx':
3807 aoDisasmMaps.append(oMap);
3808 else:
3809 # Split the map by prefix.
3810 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
3811 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
3812 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
3813 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
3814
3815 #
3816 # Dump each map.
3817 #
3818 asHeaderLines = [];
3819 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),));
3820 for oMap in aoDisasmMaps:
3821 sName = oMap.sName;
3822
3823 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
3824
3825 #
3826 # Get the instructions for the map and see if we can do a short version or not.
3827 #
3828 aoTableOrder = oMap.getInstructionsInTableOrder();
3829 cEntriesPerByte = oMap.getEntriesPerByte();
3830 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
3831
3832 #
3833 # Output the table start.
3834 # Note! Short tables are static and only accessible via the map range record.
3835 #
3836 asLines = [];
3837 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
3838 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
3839 if fShortTable:
3840 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
3841 else:
3842 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3843 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3844 asLines.append('{');
3845
3846 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
3847 asLines.append(' /* %#04x: */' % (iInstrStart,));
3848
3849 #
3850 # Output the instructions.
3851 #
3852 iInstr = iInstrStart;
3853 while iInstr < iInstrEnd:
3854 oInstr = aoTableOrder[iInstr];
3855 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
3856 if iInstr != iInstrStart:
3857 asLines.append('');
3858 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
3859
3860 if oInstr is None:
3861 # Invalid. Optimize blocks of invalid instructions.
3862 cInvalidInstrs = 1;
3863 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
3864 cInvalidInstrs += 1;
3865 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
3866 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
3867 iInstr += 0x10 * cEntriesPerByte - 1;
3868 elif cEntriesPerByte > 1:
3869 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
3870 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
3871 iInstr += 3;
3872 else:
3873 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
3874 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
3875 else:
3876 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
3877 elif isinstance(oInstr, list):
3878 if len(oInstr) != 0:
3879 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
3880 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
3881 else:
3882 asLines.append(__formatDisassemblerTableEntry(oInstr));
3883 else:
3884 asLines.append(__formatDisassemblerTableEntry(oInstr));
3885
3886 iInstr += 1;
3887
3888 if iInstrStart >= iInstrEnd:
3889 asLines.append(' /* dummy */ INVALID_OPCODE');
3890
3891 asLines.append('};');
3892 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
3893
3894 #
3895 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
3896 #
3897 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
3898 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
3899 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
3900
3901 #
3902 # Write out the lines.
3903 #
3904 oDstFile.write('\n'.join(asLines));
3905 oDstFile.write('\n');
3906 oDstFile.write('\n');
3907 #break; #for now
3908
3909if __name__ == '__main__':
3910 generateDisassemblerTables();
3911
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette