VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py@ 102011

Last change on this file since 102011 was 102011, checked in by vboxsync, 19 months ago

VMM/IEM: Added a flush mask for guest register shadows to the IEM_MC_CALL_CIMPL_X macros to better manage register optimizations when recompiling to native code, replacing the IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG/SREG macros added earlier today. Added a IEM_MC_HINT_FLUSH_GUEST_SHADOW macro for debugging purposes. bugref:10371

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 297.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstPython.py 102011 2023-11-08 22:10:48Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.215389.xyz.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 102011 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: List[TestInOut]
1374 self.aoOutputs = [] # type: List[TestInOut]
1375 self.aoSelectors = [] # type: List[TestSelector]
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: List[str]
1436 self.aoMaps = [] # type: List[InstructionMap]
1437 self.aoOperands = [] # type: List[Operand]
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: List[InstructionTest]
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: List[Instruction]
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: Dict[Instruction]
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: Dict[List[Instruction]]
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: List[Instruction]
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: List[str] ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL, IEM_MC_LOCAL_ASSIGN, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sValue = sValue; ##< None if no assigned / const value.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCall(McCppGeneric):
1885 """
1886 A generic C++/C call statement.
1887
1888 The sName is still 'C++', so the function name is in the first parameter
1889 and the the arguments in the subsequent ones.
1890 """
1891 def __init__(self, sFnName, asArgs, fDecode = True, cchIndent = 0):
1892 McCppGeneric.__init__(self, sFnName, fDecode = fDecode, cchIndent = cchIndent);
1893 self.asParams.extend(asArgs);
1894
1895 def renderCode(self, cchIndent = 0):
1896 cchIndent += self.cchIndent;
1897 sRet = ' ' * cchIndent + self.asParams[0] + '(' + ', '.join(self.asParams[1:]) + ');';
1898 if self.fDecode:
1899 sRet += ' // C++ decode\n';
1900 else:
1901 sRet += ' // C++ normal\n';
1902 return sRet;
1903
1904class McCppCond(McStmtCond):
1905 """
1906 C++/C 'if' statement.
1907 """
1908 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1909 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1910 self.fDecode = fDecode;
1911 self.cchIndent = cchIndent;
1912
1913 def renderCode(self, cchIndent = 0):
1914 cchIndent += self.cchIndent;
1915 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1916 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1917 sRet += ' ' * cchIndent + '{\n';
1918 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1919 sRet += ' ' * cchIndent + '}\n';
1920 if self.aoElseBranch:
1921 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1922 sRet += ' ' * cchIndent + '{\n';
1923 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1924 sRet += ' ' * cchIndent + '}\n';
1925 return sRet;
1926
1927class McCppPreProc(McCppGeneric):
1928 """
1929 C++/C Preprocessor directive.
1930 """
1931 def __init__(self, sCode):
1932 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1933
1934 def renderCode(self, cchIndent = 0):
1935 return self.asParams[0] + '\n';
1936
1937
1938## IEM_MC_F_XXX values.
1939g_kdMcFlags = {
1940 'IEM_MC_F_ONLY_8086': (),
1941 'IEM_MC_F_MIN_186': (),
1942 'IEM_MC_F_MIN_286': (),
1943 'IEM_MC_F_NOT_286_OR_OLDER': (),
1944 'IEM_MC_F_MIN_386': ('IEM_MC_F_NOT_286_OR_OLDER',),
1945 'IEM_MC_F_MIN_486': ('IEM_MC_F_NOT_286_OR_OLDER',),
1946 'IEM_MC_F_MIN_PENTIUM': ('IEM_MC_F_NOT_286_OR_OLDER',),
1947 'IEM_MC_F_MIN_PENTIUM_II': ('IEM_MC_F_NOT_286_OR_OLDER',),
1948 'IEM_MC_F_MIN_CORE': ('IEM_MC_F_NOT_286_OR_OLDER',),
1949 'IEM_MC_F_64BIT': ('IEM_MC_F_NOT_286_OR_OLDER',),
1950 'IEM_MC_F_NOT_64BIT': (),
1951};
1952## IEM_MC_F_XXX values.
1953g_kdCImplFlags = {
1954 'IEM_CIMPL_F_BRANCH_DIRECT': (),
1955 'IEM_CIMPL_F_BRANCH_INDIRECT': (),
1956 'IEM_CIMPL_F_BRANCH_RELATIVE': (),
1957 'IEM_CIMPL_F_BRANCH_CONDITIONAL': (),
1958 'IEM_CIMPL_F_BRANCH_FAR': (),
1959 'IEM_CIMPL_F_BRANCH_ANY': ('IEM_CIMPL_F_BRANCH_DIRECT', 'IEM_CIMPL_F_BRANCH_INDIRECT',
1960 'IEM_CIMPL_F_BRANCH_RELATIVE',),
1961 'IEM_CIMPL_F_BRANCH_STACK': (),
1962 'IEM_CIMPL_F_BRANCH_STACK_FAR': (),
1963 'IEM_CIMPL_F_MODE': (),
1964 'IEM_CIMPL_F_RFLAGS': (),
1965 'IEM_CIMPL_F_INHIBIT_SHADOW': (),
1966 'IEM_CIMPL_F_STATUS_FLAGS': (),
1967 'IEM_CIMPL_F_CHECK_IRQ_AFTER': (),
1968 'IEM_CIMPL_F_CHECK_IRQ_BEFORE': (),
1969 'IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER': ('IEM_CIMPL_F_CHECK_IRQ_BEFORE', 'IEM_CIMPL_F_CHECK_IRQ_AFTER',),
1970 'IEM_CIMPL_F_VMEXIT': (),
1971 'IEM_CIMPL_F_FPU': (),
1972 'IEM_CIMPL_F_REP': (),
1973 'IEM_CIMPL_F_IO': (),
1974 'IEM_CIMPL_F_END_TB': (),
1975 'IEM_CIMPL_F_XCPT': ('IEM_CIMPL_F_BRANCH_INDIRECT', 'IEM_CIMPL_F_BRANCH_FAR',
1976 'IEM_CIMPL_F_MODE', 'IEM_CIMPL_F_RFLAGS', 'IEM_CIMPL_F_VMEXIT', ),
1977 'IEM_CIMPL_F_CALLS_CIMPL': (),
1978 'IEM_CIMPL_F_CALLS_AIMPL': (),
1979 'IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE': (),
1980};
1981class McBlock(object):
1982 """
1983 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1984 """
1985
1986 ## @name Macro expansion types.
1987 ## @{
1988 kiMacroExp_None = 0;
1989 kiMacroExp_Entire = 1; ##< Entire block (iBeginLine == iEndLine), original line may contain multiple blocks.
1990 kiMacroExp_Partial = 2; ##< Partial/mixed (cmpxchg16b), safe to assume single block.
1991 ## @}
1992
1993 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1994 ## The source file containing the block.
1995 self.sSrcFile = sSrcFile;
1996 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1997 self.iBeginLine = iBeginLine;
1998 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1999 self.offBeginLine = offBeginLine;
2000 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
2001 self.iEndLine = -1;
2002 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
2003 self.offEndLine = 0;
2004 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
2005 self.offAfterEnd = 0;
2006 ## The function the block resides in.
2007 self.oFunction = oFunction;
2008 ## The name of the function the block resides in. DEPRECATED.
2009 self.sFunction = oFunction.sName;
2010 ## The block number within the function.
2011 self.iInFunction = iInFunction;
2012 self.cchIndent = cchIndent if cchIndent else offBeginLine;
2013 ##< The raw lines the block is made up of.
2014 self.asLines = [] # type: List[str]
2015 ## Indicates whether the block includes macro expansion parts (kiMacroExp_None,
2016 ## kiMacroExp_Entrie, kiMacroExp_Partial).
2017 self.iMacroExp = self.kiMacroExp_None;
2018 ## IEM_MC_BEGIN: Argument count.
2019 self.cArgs = -1;
2020 ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
2021 self.aoArgs = [] # type: List[McStmtArg]
2022 ## IEM_MC_BEGIN: Locals count.
2023 self.cLocals = -1;
2024 ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
2025 self.aoLocals = [] # type: List[McStmtVar]
2026 ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
2027 self.dsMcFlags = {} # type: Dict[str, bool]
2028 ## IEM_MC_[DEFER_TO|CALL]_CIMPL_XXX: IEM_CIMPL_F_XXX dictionary
2029 self.dsCImplFlags = {} # type: Dict[str, bool]
2030 ## Decoded statements in the block.
2031 self.aoStmts = [] # type: List[McStmt]
2032
2033 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
2034 """
2035 Completes the microcode block.
2036 """
2037 assert self.iEndLine == -1;
2038 self.iEndLine = iEndLine;
2039 self.offEndLine = offEndLine;
2040 self.offAfterEnd = offAfterEnd;
2041 self.asLines = asLines;
2042
2043 def raiseDecodeError(self, sRawCode, off, sMessage):
2044 """ Raises a decoding error. """
2045 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
2046 iLine = sRawCode.count('\n', 0, off);
2047 raise ParserException('%s:%d:%d: parsing error: %s'
2048 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
2049
2050 def raiseStmtError(self, sName, sMessage):
2051 """ Raises a statement parser error. """
2052 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
2053
2054 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
2055 """ Check the parameter count, raising an error it doesn't match. """
2056 if len(asParams) != cParamsExpected:
2057 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
2058 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
2059 return True;
2060
2061 @staticmethod
2062 def parseMcGeneric(oSelf, sName, asParams):
2063 """ Generic parser that returns a plain McStmt object. """
2064 _ = oSelf;
2065 return McStmt(sName, asParams);
2066
2067 @staticmethod
2068 def parseMcGenericCond(oSelf, sName, asParams):
2069 """ Generic parser that returns a plain McStmtCond object. """
2070 _ = oSelf;
2071 return McStmtCond(sName, asParams);
2072
2073 @staticmethod
2074 def parseMcBegin(oSelf, sName, asParams):
2075 """ IEM_MC_BEGIN """
2076 oSelf.checkStmtParamCount(sName, asParams, 4);
2077 if oSelf.cArgs != -1 or oSelf.cLocals != -1 or oSelf.dsMcFlags:
2078 oSelf.raiseStmtError(sName, 'Used more than once!');
2079 oSelf.cArgs = int(asParams[0]);
2080 oSelf.cLocals = int(asParams[1]);
2081
2082 if asParams[2] != '0':
2083 for sFlag in asParams[2].split('|'):
2084 sFlag = sFlag.strip();
2085 if sFlag not in g_kdMcFlags:
2086 oSelf.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2087 oSelf.dsMcFlags[sFlag] = True;
2088 for sFlag2 in g_kdMcFlags[sFlag]:
2089 oSelf.dsMcFlags[sFlag2] = True;
2090
2091 if asParams[3] != '0':
2092 oSelf.parseCImplFlags(sName, asParams[3]);
2093
2094 return McBlock.parseMcGeneric(oSelf, sName, asParams);
2095
2096 @staticmethod
2097 def parseMcArg(oSelf, sName, asParams):
2098 """ IEM_MC_ARG """
2099 oSelf.checkStmtParamCount(sName, asParams, 3);
2100 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
2101 oSelf.aoArgs.append(oStmt);
2102 return oStmt;
2103
2104 @staticmethod
2105 def parseMcArgConst(oSelf, sName, asParams):
2106 """ IEM_MC_ARG_CONST """
2107 oSelf.checkStmtParamCount(sName, asParams, 4);
2108 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2109 oSelf.aoArgs.append(oStmt);
2110 return oStmt;
2111
2112 @staticmethod
2113 def parseMcArgLocalRef(oSelf, sName, asParams):
2114 """ IEM_MC_ARG_LOCAL_REF """
2115 oSelf.checkStmtParamCount(sName, asParams, 4);
2116 oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2117 oSelf.aoArgs.append(oStmt);
2118 return oStmt;
2119
2120 @staticmethod
2121 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2122 """ IEM_MC_ARG_LOCAL_EFLAGS """
2123 oSelf.checkStmtParamCount(sName, asParams, 3);
2124 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2125 oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
2126 oSelf.aoLocals.append(oStmtLocal);
2127 oStmtArg = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2128 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
2129 oSelf.aoArgs.append(oStmtArg);
2130 return (oStmtLocal, oStmtArg,);
2131
2132 @staticmethod
2133 def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
2134 """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
2135 oSelf.checkStmtParamCount(sName, asParams, 0);
2136 # Note! Translate to IEM_MC_ARG_CONST
2137 oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
2138 'PX86XSAVEAREA', 'pXState', 0, '&pVCpu->cpum.GstCtx.XState');
2139 oSelf.aoArgs.append(oStmt);
2140 return oStmt;
2141
2142 @staticmethod
2143 def parseMcLocal(oSelf, sName, asParams):
2144 """ IEM_MC_LOCAL """
2145 oSelf.checkStmtParamCount(sName, asParams, 2);
2146 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
2147 oSelf.aoLocals.append(oStmt);
2148 return oStmt;
2149
2150 @staticmethod
2151 def parseMcLocalAssign(oSelf, sName, asParams):
2152 """ IEM_MC_LOCAL_ASSIGN """
2153 oSelf.checkStmtParamCount(sName, asParams, 3);
2154 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2155 oSelf.aoLocals.append(oStmt);
2156 return oStmt;
2157
2158 @staticmethod
2159 def parseMcLocalConst(oSelf, sName, asParams):
2160 """ IEM_MC_LOCAL_CONST """
2161 oSelf.checkStmtParamCount(sName, asParams, 3);
2162 oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sValue = asParams[2]);
2163 oSelf.aoLocals.append(oStmt);
2164 return oStmt;
2165
2166 @staticmethod
2167 def parseMcCallAImpl(oSelf, sName, asParams):
2168 """ IEM_MC_CALL_AIMPL_3|4 """
2169 cArgs = int(sName[-1]);
2170 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2171 return McStmtCall(sName, asParams, 1, 0);
2172
2173 @staticmethod
2174 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2175 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2176 cArgs = int(sName[-1]);
2177 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2178 return McStmtCall(sName, asParams, 0);
2179
2180 @staticmethod
2181 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2182 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2183 cArgs = int(sName[-1]);
2184 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2185 return McStmtCall(sName, asParams, 0);
2186
2187 @staticmethod
2188 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2189 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2190 cArgs = int(sName[-1]);
2191 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2192 return McStmtCall(sName, asParams, 0);
2193
2194 @staticmethod
2195 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2196 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2197 cArgs = int(sName[-1]);
2198 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2199 return McStmtCall(sName, asParams, 0);
2200
2201 @staticmethod
2202 def parseMcCallSseAImpl(oSelf, sName, asParams):
2203 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2204 cArgs = int(sName[-1]);
2205 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2206 return McStmtCall(sName, asParams, 0);
2207
2208 def parseCImplFlags(self, sName, sFlags):
2209 """
2210 Helper for parseMcCallCImpl and parseMcDeferToCImpl to validate and
2211 merge a bunch of IEM_CIMPL_F_XXX value into dsCImplFlags.
2212 """
2213 if sFlags != '0':
2214 sFlags = self.stripComments(sFlags);
2215 #print('debug: %s: %s' % (self.oFunction.sName,' | '.join(''.join(sFlags.split()).split('|')),));
2216 for sFlag in sFlags.split('|'):
2217 sFlag = sFlag.strip();
2218 if sFlag[0] == '(': sFlag = sFlag[1:].strip();
2219 if sFlag[-1] == ')': sFlag = sFlag[:-1].strip();
2220 #print('debug: %s' % sFlag)
2221 if sFlag not in g_kdCImplFlags:
2222 if sFlag == '0':
2223 continue;
2224 self.raiseStmtError(sName, 'Unknown flag: %s' % (sFlag, ));
2225 self.dsCImplFlags[sFlag] = True;
2226 for sFlag2 in g_kdCImplFlags[sFlag]:
2227 self.dsCImplFlags[sFlag2] = True;
2228 return None;
2229
2230 @staticmethod
2231 def parseMcCallCImpl(oSelf, sName, asParams):
2232 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2233 cArgs = int(sName[-1]);
2234 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2235 oSelf.parseCImplFlags(sName, asParams[0]);
2236 return McStmtCall(sName, asParams, 2);
2237
2238 @staticmethod
2239 def parseMcDeferToCImpl(oSelf, sName, asParams):
2240 """ IEM_MC_DEFER_TO_CIMPL_[0|1|2|3]_RET """
2241 # Note! This code is called by workerIemMcDeferToCImplXRet.
2242 #print('debug: %s, %s,...' % (sName, asParams[0],));
2243 cArgs = int(sName[-5]);
2244 oSelf.checkStmtParamCount(sName, asParams, 3 + cArgs);
2245 oSelf.parseCImplFlags(sName, asParams[0]);
2246 return McStmtCall(sName, asParams, 2);
2247
2248 @staticmethod
2249 def stripComments(sCode):
2250 """ Returns sCode with comments removed. """
2251 off = 0;
2252 while off < len(sCode):
2253 off = sCode.find('/', off);
2254 if off < 0 or off + 1 >= len(sCode):
2255 break;
2256
2257 if sCode[off + 1] == '/':
2258 # C++ comment.
2259 offEnd = sCode.find('\n', off + 2);
2260 if offEnd < 0:
2261 return sCode[:off].rstrip();
2262 sCode = sCode[ : off] + sCode[offEnd : ];
2263 off += 1;
2264
2265 elif sCode[off + 1] == '*':
2266 # C comment
2267 offEnd = sCode.find('*/', off + 2);
2268 if offEnd < 0:
2269 return sCode[:off].rstrip();
2270 sSep = ' ';
2271 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2272 sSep = '';
2273 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2274 off += len(sSep);
2275
2276 else:
2277 # Not a comment.
2278 off += 1;
2279 return sCode;
2280
2281 @staticmethod
2282 def extractParam(sCode, offParam):
2283 """
2284 Extracts the parameter value at offParam in sCode.
2285 Returns stripped value and the end offset of the terminating ',' or ')'.
2286 """
2287 # Extract it.
2288 cNesting = 0;
2289 offStart = offParam;
2290 while offParam < len(sCode):
2291 ch = sCode[offParam];
2292 if ch == '(':
2293 cNesting += 1;
2294 elif ch == ')':
2295 if cNesting == 0:
2296 break;
2297 cNesting -= 1;
2298 elif ch == ',' and cNesting == 0:
2299 break;
2300 offParam += 1;
2301 return (sCode[offStart : offParam].strip(), offParam);
2302
2303 @staticmethod
2304 def extractParams(sCode, offOpenParen):
2305 """
2306 Parses a parameter list.
2307 Returns the list of parameter values and the offset of the closing parentheses.
2308 Returns (None, len(sCode)) on if no closing parentheses was found.
2309 """
2310 assert sCode[offOpenParen] == '(';
2311 asParams = [];
2312 off = offOpenParen + 1;
2313 while off < len(sCode):
2314 ch = sCode[off];
2315 if ch.isspace():
2316 off += 1;
2317 elif ch != ')':
2318 (sParam, off) = McBlock.extractParam(sCode, off);
2319 asParams.append(sParam);
2320 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2321 if sCode[off] == ',':
2322 off += 1;
2323 else:
2324 return (asParams, off);
2325 return (None, off);
2326
2327 @staticmethod
2328 def findClosingBraces(sCode, off, offStop):
2329 """
2330 Finds the matching '}' for the '{' at off in sCode.
2331 Returns offset of the matching '}' on success, otherwise -1.
2332
2333 Note! Does not take comments into account.
2334 """
2335 cDepth = 1;
2336 off += 1;
2337 while off < offStop:
2338 offClose = sCode.find('}', off, offStop);
2339 if offClose < 0:
2340 break;
2341 cDepth += sCode.count('{', off, offClose);
2342 cDepth -= 1;
2343 if cDepth == 0:
2344 return offClose;
2345 off = offClose + 1;
2346 return -1;
2347
2348 @staticmethod
2349 def countSpacesAt(sCode, off, offStop):
2350 """ Returns the number of space characters at off in sCode. """
2351 offStart = off;
2352 while off < offStop and sCode[off].isspace():
2353 off += 1;
2354 return off - offStart;
2355
2356 @staticmethod
2357 def skipSpacesAt(sCode, off, offStop):
2358 """ Returns first offset at or after off for a non-space character. """
2359 return off + McBlock.countSpacesAt(sCode, off, offStop);
2360
2361 @staticmethod
2362 def isSubstrAt(sStr, off, sSubStr):
2363 """ Returns true of sSubStr is found at off in sStr. """
2364 return sStr[off : off + len(sSubStr)] == sSubStr;
2365
2366 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2367 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2368 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2369 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2370 + r')');
2371
2372 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2373 """
2374 Decodes sRawCode[off : offStop].
2375
2376 Returns list of McStmt instances.
2377 Raises ParserException on failure.
2378 """
2379 if offStop < 0:
2380 offStop = len(sRawCode);
2381 aoStmts = [];
2382 while off < offStop:
2383 ch = sRawCode[off];
2384
2385 #
2386 # Skip spaces and comments.
2387 #
2388 if ch.isspace():
2389 off += 1;
2390
2391 elif ch == '/':
2392 ch = sRawCode[off + 1];
2393 if ch == '/': # C++ comment.
2394 off = sRawCode.find('\n', off + 2);
2395 if off < 0:
2396 break;
2397 off += 1;
2398 elif ch == '*': # C comment.
2399 off = sRawCode.find('*/', off + 2);
2400 if off < 0:
2401 break;
2402 off += 2;
2403 else:
2404 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2405
2406 #
2407 # Is it a MC statement.
2408 #
2409 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2410 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2411 # Extract it and strip comments from it.
2412 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2413 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2414 if offEnd <= off:
2415 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2416 else:
2417 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2418 if offEnd <= off:
2419 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2420 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2421 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2422 offEnd -= 1;
2423 while offEnd > off and sRawCode[offEnd - 1].isspace():
2424 offEnd -= 1;
2425
2426 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2427
2428 # Isolate the statement name.
2429 offOpenParen = sRawStmt.find('(');
2430 if offOpenParen < 0:
2431 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2432 sName = sRawStmt[: offOpenParen].strip();
2433
2434 # Extract the parameters.
2435 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2436 if asParams is None:
2437 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2438 if offCloseParen + 1 != len(sRawStmt):
2439 self.raiseDecodeError(sRawCode, off,
2440 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2441
2442 # Hand it to the handler.
2443 fnParser = g_dMcStmtParsers.get(sName)[0];
2444 if not fnParser:
2445 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2446 oStmt = fnParser(self, sName, asParams);
2447 if not isinstance(oStmt, (list, tuple)):
2448 aoStmts.append(oStmt);
2449 else:
2450 aoStmts.extend(oStmt);
2451
2452 #
2453 # If conditional, we need to parse the whole statement.
2454 #
2455 # For reasons of simplicity, we assume the following structure
2456 # and parse each branch in a recursive call:
2457 # IEM_MC_IF_XXX() {
2458 # IEM_MC_WHATEVER();
2459 # } IEM_MC_ELSE() {
2460 # IEM_MC_WHATEVER();
2461 # } IEM_MC_ENDIF();
2462 #
2463 if sName.startswith('IEM_MC_IF_'):
2464 if iLevel > 1:
2465 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2466
2467 # Find start of the IF block:
2468 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2469 if sRawCode[offBlock1] != '{':
2470 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2471
2472 # Find the end of it.
2473 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2474 if offBlock1End < 0:
2475 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2476
2477 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2478
2479 # Is there an else section?
2480 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2481 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2482 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2483 if sRawCode[off] != '(':
2484 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2485 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2486 if sRawCode[off] != ')':
2487 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2488
2489 # Find start of the ELSE block.
2490 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2491 if sRawCode[offBlock2] != '{':
2492 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2493
2494 # Find the end of it.
2495 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2496 if offBlock2End < 0:
2497 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2498
2499 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2500 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2501
2502 # Parse past the endif statement.
2503 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2504 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2505 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2506 if sRawCode[off] != '(':
2507 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2508 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2509 if sRawCode[off] != ')':
2510 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2511 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2512 if sRawCode[off] != ';':
2513 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2514 off += 1;
2515
2516 else:
2517 # Advance.
2518 off = offEnd + 1;
2519
2520 #
2521 # Otherwise it must be a C/C++ statement of sorts.
2522 #
2523 else:
2524 # Find the end of the statement. if and else requires special handling.
2525 sCondExpr = None;
2526 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2527 if oMatch:
2528 if oMatch.group(1)[-1] == '(':
2529 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2530 else:
2531 offEnd = oMatch.end();
2532 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2533 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2534 elif ch == '#':
2535 offEnd = sRawCode.find('\n', off, offStop);
2536 if offEnd < 0:
2537 offEnd = offStop;
2538 offEnd -= 1;
2539 while offEnd > off and sRawCode[offEnd - 1].isspace():
2540 offEnd -= 1;
2541 else:
2542 offEnd = sRawCode.find(';', off);
2543 if offEnd < 0:
2544 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2545
2546 # Check this and the following statement whether it might have
2547 # something to do with decoding. This is a statement filter
2548 # criteria when generating the threaded functions blocks.
2549 offNextEnd = sRawCode.find(';', offEnd + 1);
2550 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2551 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2552 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2553 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2554 or sRawCode.find('IEMOP_HLP_VMX_INSTR', off, offEnd) >= 0
2555 or sRawCode.find('IEMOP_HLP_IN_VMX_OPERATION', off, offEnd) >= 0 ## @todo wrong
2556 );
2557
2558 if not oMatch:
2559 if ch != '#':
2560 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2561 else:
2562 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2563 off = offEnd + 1;
2564 elif oMatch.group(1).startswith('if'):
2565 #
2566 # if () xxx [else yyy] statement.
2567 #
2568 oStmt = McCppCond(sCondExpr, fDecode);
2569 aoStmts.append(oStmt);
2570 off = offEnd + 1;
2571
2572 # Following the if () we can either have a {} containing zero or more statements
2573 # or we have a single statement.
2574 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2575 if sRawCode[offBlock1] == '{':
2576 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2577 if offBlock1End < 0:
2578 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2579 offBlock1 += 1;
2580 else:
2581 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2582 if offBlock1End < 0:
2583 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2584
2585 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2586
2587 # The else is optional and can likewise be followed by {} or a single statement.
2588 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2589 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2590 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2591 if sRawCode[offBlock2] == '{':
2592 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2593 if offBlock2End < 0:
2594 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2595 offBlock2 += 1;
2596 else:
2597 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2598 if offBlock2End < 0:
2599 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2600
2601 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2602 off = offBlock2End + 1;
2603
2604 elif oMatch.group(1) == 'else':
2605 # Problematic 'else' branch, typically involving #ifdefs.
2606 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2607
2608 return aoStmts;
2609
2610 def decode(self):
2611 """
2612 Decodes the block, populating self.aoStmts if necessary.
2613 Returns the statement list.
2614 Raises ParserException on failure.
2615 """
2616 if not self.aoStmts:
2617 self.aoStmts = self.decodeCode(''.join(self.asLines));
2618 return self.aoStmts;
2619
2620
2621 def checkForTooEarlyEffSegUse(self, aoStmts):
2622 """
2623 Checks if iEffSeg is used before the effective address has been decoded.
2624 Returns None on success, error string on failure.
2625
2626 See r158454 for an example of this issue.
2627 """
2628
2629 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2630 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2631 # as we're ASSUMING these will not occur before address calculation.
2632 for iStmt, oStmt in enumerate(aoStmts):
2633 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2634 while iStmt > 0:
2635 iStmt -= 1;
2636 oStmt = aoStmts[iStmt];
2637 for sArg in oStmt.asParams:
2638 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2639 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2640 break;
2641 return None;
2642
2643 koReCppFirstWord = re.compile(r'^\s*(\w+)[ (;]');
2644 kdDecodeCppStmtOkayAfterDone = {
2645 'IEMOP_HLP_IN_VMX_OPERATION': True,
2646 'IEMOP_HLP_VMX_INSTR': True,
2647 };
2648
2649 def checkForDoneDecoding(self, aoStmts):
2650 """
2651 Checks that the block contains a IEMOP_HLP_DONE_*DECODING* macro
2652 invocation.
2653 Returns None on success, error string on failure.
2654
2655 This ensures safe instruction restarting in case the recompiler runs
2656 out of TB resources during recompilation (e.g. aRanges or aGCPhysPages
2657 entries).
2658 """
2659
2660 # The IEMOP_HLP_DONE_ stuff is not allowed inside conditionals, so we
2661 # don't need to look.
2662 cIemOpHlpDone = 0;
2663 for iStmt, oStmt in enumerate(aoStmts):
2664 if oStmt.isCppStmt():
2665 #print('dbg: #%u[%u]: %s %s (%s)'
2666 # % (iStmt + 1, cIemOpHlpDone, oStmt.sName, 'd' if oStmt.fDecode else 'r', oStmt.asParams[0],));
2667
2668 oMatch = self.koReCppFirstWord.match(oStmt.asParams[0]);
2669 if oMatch:
2670 sFirstWord = oMatch.group(1);
2671 if ( sFirstWord.startswith('IEMOP_HLP_DONE_')
2672 or sFirstWord.startswith('IEMOP_HLP_DECODED_')):
2673 cIemOpHlpDone += 1;
2674 elif cIemOpHlpDone > 0 and oStmt.fDecode and sFirstWord not in self.kdDecodeCppStmtOkayAfterDone:
2675 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2676 #else: print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.asParams[0]));
2677 else:
2678 #print('dbg: #%u[%u]: %s' % (iStmt + 1, cIemOpHlpDone, oStmt.sName));
2679 if oStmt.sName.startswith('IEM_MC_DEFER_TO_CIMPL_') and iStmt == 0: # implicit
2680 cIemOpHlpDone += 1;
2681 elif cIemOpHlpDone == 0 and g_dMcStmtParsers.get(oStmt.sName, (None, False))[1]:
2682 return "statement #%u: State modifying MC statement before IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2683 elif cIemOpHlpDone > 0 and oStmt.sName in ('IEM_MC_CALC_RM_EFF_ADDR',):
2684 return "statement #%u: Decoding statement following IEMOP_HLP_DONE_*DECODING*!" % (iStmt + 1,);
2685 if cIemOpHlpDone == 1:
2686 return None;
2687 if cIemOpHlpDone > 1:
2688 return "Block has more than one IEMOP_HLP_DONE_*DECODING* invocation!";
2689 return "Block is missing IEMOP_HLP_DONE_*DECODING* invocation!";
2690
2691 def check(self):
2692 """
2693 Performs some sanity checks on the block.
2694 Returns error string list, empty if all is fine.
2695 """
2696 aoStmts = self.decode();
2697 asRet = [];
2698
2699 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2700 if sRet:
2701 asRet.append(sRet);
2702
2703 sRet = self.checkForDoneDecoding(aoStmts);
2704 if sRet:
2705 asRet.append(sRet);
2706
2707 return asRet;
2708
2709
2710
2711## IEM_MC_XXX -> parser + info dictionary.
2712#
2713# The info columns:
2714# - col 0: boolean entry indicating whether the statement modifies state and
2715# must not be used before IEMOP_HL_DONE_*.
2716# - col 1: boolean entry indicating native recompiler support.
2717#
2718# The raw table was generated via the following command
2719# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2720# | sort | uniq | gawk "{printf """ %%-60s (%%s, True)\n""", $1, $2}"
2721g_dMcStmtParsers = {
2722 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2723 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2724 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2725 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2726 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': (McBlock.parseMcGeneric, False, False, ),
2727 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': (McBlock.parseMcGeneric, False, False, ),
2728 'IEM_MC_ADD_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2729 'IEM_MC_ADD_GREG_U16_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2730 'IEM_MC_ADD_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2731 'IEM_MC_ADD_GREG_U32_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2732 'IEM_MC_ADD_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2733 'IEM_MC_ADD_GREG_U64_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2734 'IEM_MC_ADD_GREG_U8_TO_LOCAL': (McBlock.parseMcGeneric, False, False, ),
2735 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2736 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2737 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': (McBlock.parseMcGeneric, True, False, ),
2738 'IEM_MC_ADVANCE_RIP_AND_FINISH': (McBlock.parseMcGeneric, True, True, ),
2739 'IEM_MC_AND_2LOCS_U32': (McBlock.parseMcGeneric, False, False, ),
2740 'IEM_MC_AND_ARG_U16': (McBlock.parseMcGeneric, False, False, ),
2741 'IEM_MC_AND_ARG_U32': (McBlock.parseMcGeneric, False, False, ),
2742 'IEM_MC_AND_ARG_U64': (McBlock.parseMcGeneric, False, False, ),
2743 'IEM_MC_AND_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2744 'IEM_MC_AND_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2745 'IEM_MC_AND_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2746 'IEM_MC_AND_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
2747 'IEM_MC_AND_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2748 'IEM_MC_AND_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2749 'IEM_MC_AND_LOCAL_U64': (McBlock.parseMcGeneric, False, False, ),
2750 'IEM_MC_AND_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
2751 'IEM_MC_ARG': (McBlock.parseMcArg, False, True, ),
2752 'IEM_MC_ARG_CONST': (McBlock.parseMcArgConst, False, True, ),
2753 'IEM_MC_ARG_LOCAL_EFLAGS': (McBlock.parseMcArgLocalEFlags, False, True, ),
2754 'IEM_MC_ARG_LOCAL_REF': (McBlock.parseMcArgLocalRef, False, True, ),
2755 'IEM_MC_ASSIGN_TO_SMALLER': (McBlock.parseMcGeneric, False, False, ),
2756 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, True, ),
2757 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2758 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2759 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2760 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2761 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2762 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2763 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2764 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2765 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2766 'IEM_MC_BSWAP_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2767 'IEM_MC_BSWAP_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2768 'IEM_MC_BSWAP_LOCAL_U64': (McBlock.parseMcGeneric, False, False, ),
2769 'IEM_MC_CALC_RM_EFF_ADDR': (McBlock.parseMcGeneric, False, False, ),
2770 'IEM_MC_CALL_AIMPL_3': (McBlock.parseMcCallAImpl, True, False, ),
2771 'IEM_MC_CALL_AIMPL_4': (McBlock.parseMcCallAImpl, True, False, ),
2772 'IEM_MC_CALL_AVX_AIMPL_2': (McBlock.parseMcCallAvxAImpl, True, False, ),
2773 'IEM_MC_CALL_AVX_AIMPL_3': (McBlock.parseMcCallAvxAImpl, True, False, ),
2774 'IEM_MC_CALL_CIMPL_0': (McBlock.parseMcCallCImpl, True, False, ),
2775 'IEM_MC_CALL_CIMPL_1': (McBlock.parseMcCallCImpl, True, False, ),
2776 'IEM_MC_CALL_CIMPL_2': (McBlock.parseMcCallCImpl, True, False, ),
2777 'IEM_MC_CALL_CIMPL_3': (McBlock.parseMcCallCImpl, True, False, ),
2778 'IEM_MC_CALL_CIMPL_4': (McBlock.parseMcCallCImpl, True, False, ),
2779 'IEM_MC_CALL_CIMPL_5': (McBlock.parseMcCallCImpl, True, False, ),
2780 'IEM_MC_CALL_FPU_AIMPL_1': (McBlock.parseMcCallFpuAImpl, True, False, ),
2781 'IEM_MC_CALL_FPU_AIMPL_2': (McBlock.parseMcCallFpuAImpl, True, False, ),
2782 'IEM_MC_CALL_FPU_AIMPL_3': (McBlock.parseMcCallFpuAImpl, True, False, ),
2783 'IEM_MC_CALL_MMX_AIMPL_2': (McBlock.parseMcCallMmxAImpl, True, False, ),
2784 'IEM_MC_CALL_MMX_AIMPL_3': (McBlock.parseMcCallMmxAImpl, True, False, ),
2785 'IEM_MC_CALL_SSE_AIMPL_2': (McBlock.parseMcCallSseAImpl, True, False, ),
2786 'IEM_MC_CALL_SSE_AIMPL_3': (McBlock.parseMcCallSseAImpl, True, False, ),
2787 'IEM_MC_CALL_VOID_AIMPL_0': (McBlock.parseMcCallVoidAImpl, True, False, ),
2788 'IEM_MC_CALL_VOID_AIMPL_1': (McBlock.parseMcCallVoidAImpl, True, False, ),
2789 'IEM_MC_CALL_VOID_AIMPL_2': (McBlock.parseMcCallVoidAImpl, True, False, ),
2790 'IEM_MC_CALL_VOID_AIMPL_3': (McBlock.parseMcCallVoidAImpl, True, False, ),
2791 'IEM_MC_CALL_VOID_AIMPL_4': (McBlock.parseMcCallVoidAImpl, True, False, ),
2792 'IEM_MC_CLEAR_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
2793 'IEM_MC_CLEAR_FSW_EX': (McBlock.parseMcGeneric, True, False, ),
2794 'IEM_MC_CLEAR_HIGH_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2795 'IEM_MC_CLEAR_XREG_U32_MASK': (McBlock.parseMcGeneric, True, False, ),
2796 'IEM_MC_CLEAR_YREG_128_UP': (McBlock.parseMcGeneric, True, False, ),
2797 'IEM_MC_COMMIT_EFLAGS': (McBlock.parseMcGeneric, True, False, ),
2798 'IEM_MC_COPY_XREG_U128': (McBlock.parseMcGeneric, True, False, ),
2799 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2800 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2801 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2802 'IEM_MC_DEFER_TO_CIMPL_0_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2803 'IEM_MC_DEFER_TO_CIMPL_1_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2804 'IEM_MC_DEFER_TO_CIMPL_2_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2805 'IEM_MC_DEFER_TO_CIMPL_3_RET': (McBlock.parseMcDeferToCImpl, False, False, ),
2806 'IEM_MC_END': (McBlock.parseMcGeneric, True, True, ),
2807 'IEM_MC_FETCH_EFLAGS': (McBlock.parseMcGeneric, False, False, ),
2808 'IEM_MC_FETCH_EFLAGS_U8': (McBlock.parseMcGeneric, False, False, ),
2809 'IEM_MC_FETCH_FCW': (McBlock.parseMcGeneric, False, False, ),
2810 'IEM_MC_FETCH_FSW': (McBlock.parseMcGeneric, False, False, ),
2811 'IEM_MC_FETCH_GREG_U16': (McBlock.parseMcGeneric, False, True, ),
2812 'IEM_MC_FETCH_GREG_U16_SX_U32': (McBlock.parseMcGeneric, False, False, ),
2813 'IEM_MC_FETCH_GREG_U16_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2814 'IEM_MC_FETCH_GREG_U16_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2815 'IEM_MC_FETCH_GREG_U16_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2816 'IEM_MC_FETCH_GREG_U32': (McBlock.parseMcGeneric, False, False, ),
2817 'IEM_MC_FETCH_GREG_U32_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2818 'IEM_MC_FETCH_GREG_U32_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2819 'IEM_MC_FETCH_GREG_U64': (McBlock.parseMcGeneric, False, False, ),
2820 'IEM_MC_FETCH_GREG_U64_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2821 'IEM_MC_FETCH_GREG_U8': (McBlock.parseMcGeneric, False, False, ),
2822 'IEM_MC_FETCH_GREG_U8_SX_U16': (McBlock.parseMcGeneric, False, False, ),
2823 'IEM_MC_FETCH_GREG_U8_SX_U32': (McBlock.parseMcGeneric, False, False, ),
2824 'IEM_MC_FETCH_GREG_U8_SX_U64': (McBlock.parseMcGeneric, False, False, ),
2825 'IEM_MC_FETCH_GREG_U8_ZX_U16': (McBlock.parseMcGeneric, False, False, ),
2826 'IEM_MC_FETCH_GREG_U8_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2827 'IEM_MC_FETCH_GREG_U8_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2828 'IEM_MC_FETCH_MEM_D80': (McBlock.parseMcGeneric, True, False, ),
2829 'IEM_MC_FETCH_MEM_I16': (McBlock.parseMcGeneric, True, False, ),
2830 'IEM_MC_FETCH_MEM_I32': (McBlock.parseMcGeneric, True, False, ),
2831 'IEM_MC_FETCH_MEM_I64': (McBlock.parseMcGeneric, True, False, ),
2832 'IEM_MC_FETCH_MEM_R32': (McBlock.parseMcGeneric, True, False, ),
2833 'IEM_MC_FETCH_MEM_R64': (McBlock.parseMcGeneric, True, False, ),
2834 'IEM_MC_FETCH_MEM_R80': (McBlock.parseMcGeneric, True, False, ),
2835 'IEM_MC_FETCH_MEM_S32_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2836 'IEM_MC_FETCH_MEM_U128': (McBlock.parseMcGeneric, True, False, ),
2837 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
2838 'IEM_MC_FETCH_MEM_U128_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2839 'IEM_MC_FETCH_MEM_U16': (McBlock.parseMcGeneric, True, False, ),
2840 'IEM_MC_FETCH_MEM_U16_DISP': (McBlock.parseMcGeneric, True, False, ),
2841 'IEM_MC_FETCH_MEM_U16_SX_U32': (McBlock.parseMcGeneric, True, False, ),
2842 'IEM_MC_FETCH_MEM_U16_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2843 'IEM_MC_FETCH_MEM_U16_ZX_U32': (McBlock.parseMcGeneric, True, False, ),
2844 'IEM_MC_FETCH_MEM_U16_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2845 'IEM_MC_FETCH_MEM_U256': (McBlock.parseMcGeneric, True, False, ),
2846 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
2847 'IEM_MC_FETCH_MEM_U256_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2848 'IEM_MC_FETCH_MEM_U32': (McBlock.parseMcGeneric, True, False, ),
2849 'IEM_MC_FETCH_MEM_U32_DISP': (McBlock.parseMcGeneric, True, False, ),
2850 'IEM_MC_FETCH_MEM_U32_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2851 'IEM_MC_FETCH_MEM_U32_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2852 'IEM_MC_FETCH_MEM_U64': (McBlock.parseMcGeneric, True, False, ),
2853 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': (McBlock.parseMcGeneric, True, False, ),
2854 'IEM_MC_FETCH_MEM_U64_DISP': (McBlock.parseMcGeneric, True, False, ),
2855 'IEM_MC_FETCH_MEM_U8': (McBlock.parseMcGeneric, True, False, ),
2856 'IEM_MC_FETCH_MEM_U8_SX_U16': (McBlock.parseMcGeneric, True, False, ),
2857 'IEM_MC_FETCH_MEM_U8_SX_U32': (McBlock.parseMcGeneric, True, False, ),
2858 'IEM_MC_FETCH_MEM_U8_SX_U64': (McBlock.parseMcGeneric, True, False, ),
2859 'IEM_MC_FETCH_MEM_U8_ZX_U16': (McBlock.parseMcGeneric, True, False, ),
2860 'IEM_MC_FETCH_MEM_U8_ZX_U32': (McBlock.parseMcGeneric, True, False, ),
2861 'IEM_MC_FETCH_MEM_U8_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
2862 'IEM_MC_FETCH_MEM_XMM': (McBlock.parseMcGeneric, True, False, ),
2863 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
2864 'IEM_MC_FETCH_MEM_XMM_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2865 'IEM_MC_FETCH_MEM_XMM_U32': (McBlock.parseMcGeneric, True, False, ),
2866 'IEM_MC_FETCH_MEM_XMM_U64': (McBlock.parseMcGeneric, True, False, ),
2867 'IEM_MC_FETCH_MEM_YMM': (McBlock.parseMcGeneric, True, False, ),
2868 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
2869 'IEM_MC_FETCH_MEM_YMM_NO_AC': (McBlock.parseMcGeneric, True, False, ),
2870 'IEM_MC_FETCH_MEM16_U8': (McBlock.parseMcGeneric, True, False, ),
2871 'IEM_MC_FETCH_MEM32_U8': (McBlock.parseMcGeneric, True, False, ),
2872 'IEM_MC_FETCH_MREG_U32': (McBlock.parseMcGeneric, False, False, ),
2873 'IEM_MC_FETCH_MREG_U64': (McBlock.parseMcGeneric, False, False, ),
2874 'IEM_MC_FETCH_SREG_BASE_U32': (McBlock.parseMcGeneric, False, False, ),
2875 'IEM_MC_FETCH_SREG_BASE_U64': (McBlock.parseMcGeneric, False, False, ),
2876 'IEM_MC_FETCH_SREG_U16': (McBlock.parseMcGeneric, False, False, ),
2877 'IEM_MC_FETCH_SREG_ZX_U32': (McBlock.parseMcGeneric, False, False, ),
2878 'IEM_MC_FETCH_SREG_ZX_U64': (McBlock.parseMcGeneric, False, False, ),
2879 'IEM_MC_FETCH_XREG_U128': (McBlock.parseMcGeneric, False, False, ),
2880 'IEM_MC_FETCH_XREG_U16': (McBlock.parseMcGeneric, False, False, ),
2881 'IEM_MC_FETCH_XREG_U32': (McBlock.parseMcGeneric, False, False, ),
2882 'IEM_MC_FETCH_XREG_U64': (McBlock.parseMcGeneric, False, False, ),
2883 'IEM_MC_FETCH_XREG_U8': (McBlock.parseMcGeneric, False, False, ),
2884 'IEM_MC_FETCH_XREG_XMM': (McBlock.parseMcGeneric, False, False, ),
2885 'IEM_MC_FETCH_YREG_2ND_U64': (McBlock.parseMcGeneric, False, False, ),
2886 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, ),
2887 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, ),
2888 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, ),
2889 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, ),
2890 'IEM_MC_FLIP_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
2891 'IEM_MC_FPU_FROM_MMX_MODE': (McBlock.parseMcGeneric, True, False, ),
2892 'IEM_MC_FPU_STACK_DEC_TOP': (McBlock.parseMcGeneric, True, False, ),
2893 'IEM_MC_FPU_STACK_FREE': (McBlock.parseMcGeneric, True, False, ),
2894 'IEM_MC_FPU_STACK_INC_TOP': (McBlock.parseMcGeneric, True, False, ),
2895 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': (McBlock.parseMcGeneric, True, False, ),
2896 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2897 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': (McBlock.parseMcGeneric, True, False, ),
2898 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': (McBlock.parseMcGeneric, True, False, ),
2899 'IEM_MC_FPU_STACK_UNDERFLOW': (McBlock.parseMcGeneric, True, False, ),
2900 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2901 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
2902 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
2903 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': (McBlock.parseMcGeneric, True, False, ),
2904 'IEM_MC_FPU_TO_MMX_MODE': (McBlock.parseMcGeneric, True, False, ),
2905 'IEM_MC_HINT_FLUSH_GUEST_SHADOW': (McBlock.parseMcGeneric, True, True, ),
2906 'IEM_MC_IF_CX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2907 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2908 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2909 'IEM_MC_IF_ECX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2910 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2911 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2912 'IEM_MC_IF_EFL_ANY_BITS_SET': (McBlock.parseMcGenericCond, True, True, ),
2913 'IEM_MC_IF_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2914 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': (McBlock.parseMcGenericCond, True, True, ),
2915 'IEM_MC_IF_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2916 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': (McBlock.parseMcGenericCond, True, True, ),
2917 'IEM_MC_IF_EFL_BITS_EQ': (McBlock.parseMcGenericCond, True, True, ),
2918 'IEM_MC_IF_EFL_BITS_NE': (McBlock.parseMcGenericCond, True, True, ),
2919 'IEM_MC_IF_EFL_NO_BITS_SET': (McBlock.parseMcGenericCond, True, True, ),
2920 'IEM_MC_IF_FCW_IM': (McBlock.parseMcGenericCond, True, False, ),
2921 'IEM_MC_IF_FPUREG_IS_EMPTY': (McBlock.parseMcGenericCond, True, False, ),
2922 'IEM_MC_IF_FPUREG_NOT_EMPTY': (McBlock.parseMcGenericCond, True, False, ),
2923 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, False, ),
2924 'IEM_MC_IF_GREG_BIT_SET': (McBlock.parseMcGenericCond, True, False, ),
2925 'IEM_MC_IF_LOCAL_IS_Z': (McBlock.parseMcGenericCond, True, False, ),
2926 'IEM_MC_IF_MXCSR_XCPT_PENDING': (McBlock.parseMcGenericCond, True, False, ),
2927 'IEM_MC_IF_RCX_IS_NZ': (McBlock.parseMcGenericCond, True, True, ),
2928 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': (McBlock.parseMcGenericCond, True, True, ),
2929 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': (McBlock.parseMcGenericCond, True, True, ),
2930 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': (McBlock.parseMcGenericCond, True, False, ),
2931 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': (McBlock.parseMcGenericCond, True, False, ),
2932 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': (McBlock.parseMcImplicitAvxAArgs, False, False, ),
2933 'IEM_MC_INT_CLEAR_ZMM_256_UP': (McBlock.parseMcGeneric, True, False, ),
2934 'IEM_MC_LOCAL': (McBlock.parseMcLocal, False, True, ),
2935 'IEM_MC_LOCAL_ASSIGN': (McBlock.parseMcLocalAssign, False, True, ),
2936 'IEM_MC_LOCAL_CONST': (McBlock.parseMcLocalConst, False, True, ),
2937 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2938 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, False, ),
2939 'IEM_MC_MAYBE_RAISE_FPU_XCPT': (McBlock.parseMcGeneric, True, False, ),
2940 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': (McBlock.parseMcGeneric, True, False, ),
2941 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2942 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': (McBlock.parseMcGeneric, True, False, ),
2943 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, False, ),
2944 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': (McBlock.parseMcGeneric, True, False, ),
2945 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': (McBlock.parseMcGeneric, True, False, ),
2946 'IEM_MC_MEM_COMMIT_AND_UNMAP': (McBlock.parseMcGeneric, True, False, ),
2947 'IEM_MC_MEM_COMMIT_AND_UNMAP_RW': (McBlock.parseMcGeneric, True, False, ),
2948 'IEM_MC_MEM_COMMIT_AND_UNMAP_RO': (McBlock.parseMcGeneric, True, False, ),
2949 'IEM_MC_MEM_COMMIT_AND_UNMAP_WO': (McBlock.parseMcGeneric, True, False, ),
2950 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': (McBlock.parseMcGeneric, True, False, ),
2951 'IEM_MC_MEM_MAP': (McBlock.parseMcGeneric, True, False, ),
2952 'IEM_MC_MEM_MAP_EX': (McBlock.parseMcGeneric, True, False, ),
2953 'IEM_MC_MEM_MAP_U8_RW': (McBlock.parseMcGeneric, True, False, ),
2954 'IEM_MC_MEM_MAP_U8_RO': (McBlock.parseMcGeneric, True, False, ),
2955 'IEM_MC_MEM_MAP_U8_WO': (McBlock.parseMcGeneric, True, False, ),
2956 'IEM_MC_MEM_MAP_U16_RW': (McBlock.parseMcGeneric, True, False, ),
2957 'IEM_MC_MEM_MAP_U16_RO': (McBlock.parseMcGeneric, True, False, ),
2958 'IEM_MC_MEM_MAP_U16_WO': (McBlock.parseMcGeneric, True, False, ),
2959 'IEM_MC_MEM_MAP_U32_RW': (McBlock.parseMcGeneric, True, False, ),
2960 'IEM_MC_MEM_MAP_U32_RO': (McBlock.parseMcGeneric, True, False, ),
2961 'IEM_MC_MEM_MAP_U32_WO': (McBlock.parseMcGeneric, True, False, ),
2962 'IEM_MC_MEM_MAP_U64_RW': (McBlock.parseMcGeneric, True, False, ),
2963 'IEM_MC_MEM_MAP_U64_RO': (McBlock.parseMcGeneric, True, False, ),
2964 'IEM_MC_MEM_MAP_U64_WO': (McBlock.parseMcGeneric, True, False, ),
2965 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2966 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2967 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2968 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2969 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2970 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
2971 'IEM_MC_MODIFIED_MREG': (McBlock.parseMcGeneric, True, False, ),
2972 'IEM_MC_MODIFIED_MREG_BY_REF': (McBlock.parseMcGeneric, True, False, ),
2973 'IEM_MC_OR_2LOCS_U32': (McBlock.parseMcGeneric, False, False, ),
2974 'IEM_MC_OR_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
2975 'IEM_MC_OR_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
2976 'IEM_MC_OR_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
2977 'IEM_MC_OR_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
2978 'IEM_MC_OR_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
2979 'IEM_MC_OR_LOCAL_U32': (McBlock.parseMcGeneric, False, False, ),
2980 'IEM_MC_OR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
2981 'IEM_MC_POP_U16': (McBlock.parseMcGeneric, True, False, ),
2982 'IEM_MC_POP_U32': (McBlock.parseMcGeneric, True, False, ),
2983 'IEM_MC_POP_U64': (McBlock.parseMcGeneric, True, False, ),
2984 'IEM_MC_PREPARE_AVX_USAGE': (McBlock.parseMcGeneric, False, False, ),
2985 'IEM_MC_PREPARE_FPU_USAGE': (McBlock.parseMcGeneric, False, False, ),
2986 'IEM_MC_PREPARE_SSE_USAGE': (McBlock.parseMcGeneric, False, False, ),
2987 'IEM_MC_PUSH_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
2988 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
2989 'IEM_MC_PUSH_FPU_RESULT_TWO': (McBlock.parseMcGeneric, True, False, ),
2990 'IEM_MC_PUSH_U16': (McBlock.parseMcGeneric, True, False, ),
2991 'IEM_MC_PUSH_U32': (McBlock.parseMcGeneric, True, False, ),
2992 'IEM_MC_PUSH_U32_SREG': (McBlock.parseMcGeneric, True, False, ),
2993 'IEM_MC_PUSH_U64': (McBlock.parseMcGeneric, True, False, ),
2994 'IEM_MC_RAISE_DIVIDE_ERROR': (McBlock.parseMcGeneric, True, False, ),
2995 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': (McBlock.parseMcGeneric, True, False, ),
2996 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': (McBlock.parseMcGeneric, True, False, ),
2997 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': (McBlock.parseMcGeneric, True, False, ),
2998 'IEM_MC_REF_EFLAGS': (McBlock.parseMcGeneric, False, False, ),
2999 'IEM_MC_REF_FPUREG': (McBlock.parseMcGeneric, False, False, ),
3000 'IEM_MC_REF_GREG_I32': (McBlock.parseMcGeneric, False, False, ),
3001 'IEM_MC_REF_GREG_I32_CONST': (McBlock.parseMcGeneric, False, False, ),
3002 'IEM_MC_REF_GREG_I64': (McBlock.parseMcGeneric, False, False, ),
3003 'IEM_MC_REF_GREG_I64_CONST': (McBlock.parseMcGeneric, False, False, ),
3004 'IEM_MC_REF_GREG_U16': (McBlock.parseMcGeneric, False, False, ),
3005 'IEM_MC_REF_GREG_U16_CONST': (McBlock.parseMcGeneric, False, False, ),
3006 'IEM_MC_REF_GREG_U32': (McBlock.parseMcGeneric, False, False, ),
3007 'IEM_MC_REF_GREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3008 'IEM_MC_REF_GREG_U64': (McBlock.parseMcGeneric, False, False, ),
3009 'IEM_MC_REF_GREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3010 'IEM_MC_REF_GREG_U8': (McBlock.parseMcGeneric, False, False, ),
3011 'IEM_MC_REF_GREG_U8_CONST': (McBlock.parseMcGeneric, False, False, ),
3012 'IEM_MC_REF_LOCAL': (McBlock.parseMcGeneric, False, False, ),
3013 'IEM_MC_REF_MREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3014 'IEM_MC_REF_MREG_U64': (McBlock.parseMcGeneric, False, False, ),
3015 'IEM_MC_REF_MREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3016 'IEM_MC_REF_MXCSR': (McBlock.parseMcGeneric, False, False, ),
3017 'IEM_MC_REF_XREG_R32_CONST': (McBlock.parseMcGeneric, False, False, ),
3018 'IEM_MC_REF_XREG_R64_CONST': (McBlock.parseMcGeneric, False, False, ),
3019 'IEM_MC_REF_XREG_U128': (McBlock.parseMcGeneric, False, False, ),
3020 'IEM_MC_REF_XREG_U128_CONST': (McBlock.parseMcGeneric, False, False, ),
3021 'IEM_MC_REF_XREG_U32_CONST': (McBlock.parseMcGeneric, False, False, ),
3022 'IEM_MC_REF_XREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3023 'IEM_MC_REF_XREG_XMM_CONST': (McBlock.parseMcGeneric, False, False, ),
3024 'IEM_MC_REF_YREG_U128': (McBlock.parseMcGeneric, False, False, ),
3025 'IEM_MC_REF_YREG_U128_CONST': (McBlock.parseMcGeneric, False, False, ),
3026 'IEM_MC_REF_YREG_U64_CONST': (McBlock.parseMcGeneric, False, False, ),
3027 'IEM_MC_REL_JMP_S16_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3028 'IEM_MC_REL_JMP_S32_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3029 'IEM_MC_REL_JMP_S8_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3030 'IEM_MC_RETURN_ON_FAILURE': (McBlock.parseMcGeneric, False, False, ),
3031 'IEM_MC_SAR_LOCAL_S16': (McBlock.parseMcGeneric, False, False, ),
3032 'IEM_MC_SAR_LOCAL_S32': (McBlock.parseMcGeneric, False, False, ),
3033 'IEM_MC_SAR_LOCAL_S64': (McBlock.parseMcGeneric, False, False, ),
3034 'IEM_MC_SET_EFL_BIT': (McBlock.parseMcGeneric, True, False, ),
3035 'IEM_MC_SET_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
3036 'IEM_MC_SET_RIP_U16_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3037 'IEM_MC_SET_RIP_U32_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3038 'IEM_MC_SET_RIP_U64_AND_FINISH': (McBlock.parseMcGeneric, True, False, ),
3039 'IEM_MC_SHL_LOCAL_S16': (McBlock.parseMcGeneric, False, False, ),
3040 'IEM_MC_SHL_LOCAL_S32': (McBlock.parseMcGeneric, False, False, ),
3041 'IEM_MC_SHL_LOCAL_S64': (McBlock.parseMcGeneric, False, False, ),
3042 'IEM_MC_SHR_LOCAL_U8': (McBlock.parseMcGeneric, False, False, ),
3043 'IEM_MC_SSE_UPDATE_MXCSR': (McBlock.parseMcGeneric, True, False, ),
3044 'IEM_MC_STORE_FPU_RESULT': (McBlock.parseMcGeneric, True, False, ),
3045 'IEM_MC_STORE_FPU_RESULT_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
3046 'IEM_MC_STORE_FPU_RESULT_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3047 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3048 'IEM_MC_STORE_FPUREG_R80_SRC_REF': (McBlock.parseMcGeneric, True, False, ),
3049 'IEM_MC_STORE_GREG_I64': (McBlock.parseMcGeneric, True, False, ),
3050 'IEM_MC_STORE_GREG_U16': (McBlock.parseMcGeneric, True, False, ),
3051 'IEM_MC_STORE_GREG_U16_CONST': (McBlock.parseMcGeneric, True, False, ),
3052 'IEM_MC_STORE_GREG_U32': (McBlock.parseMcGeneric, True, False, ),
3053 'IEM_MC_STORE_GREG_U32_CONST': (McBlock.parseMcGeneric, True, False, ),
3054 'IEM_MC_STORE_GREG_U64': (McBlock.parseMcGeneric, True, False, ),
3055 'IEM_MC_STORE_GREG_U64_CONST': (McBlock.parseMcGeneric, True, False, ),
3056 'IEM_MC_STORE_GREG_U8': (McBlock.parseMcGeneric, True, False, ),
3057 'IEM_MC_STORE_GREG_U8_CONST': (McBlock.parseMcGeneric, True, False, ),
3058 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3059 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3060 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3061 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3062 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3063 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3064 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3065 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': (McBlock.parseMcGeneric, True, False, ),
3066 'IEM_MC_STORE_MEM_U128': (McBlock.parseMcGeneric, True, False, ),
3067 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': (McBlock.parseMcGeneric, True, False, ),
3068 'IEM_MC_STORE_MEM_U16': (McBlock.parseMcGeneric, True, False, ),
3069 'IEM_MC_STORE_MEM_U16_CONST': (McBlock.parseMcGeneric, True, False, ),
3070 'IEM_MC_STORE_MEM_U256': (McBlock.parseMcGeneric, True, False, ),
3071 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': (McBlock.parseMcGeneric, True, False, ),
3072 'IEM_MC_STORE_MEM_U32': (McBlock.parseMcGeneric, True, False, ),
3073 'IEM_MC_STORE_MEM_U32_CONST': (McBlock.parseMcGeneric, True, False, ),
3074 'IEM_MC_STORE_MEM_U64': (McBlock.parseMcGeneric, True, False, ),
3075 'IEM_MC_STORE_MEM_U64_CONST': (McBlock.parseMcGeneric, True, False, ),
3076 'IEM_MC_STORE_MEM_U8': (McBlock.parseMcGeneric, True, False, ),
3077 'IEM_MC_STORE_MEM_U8_CONST': (McBlock.parseMcGeneric, True, False, ),
3078 'IEM_MC_STORE_MREG_U32_ZX_U64': (McBlock.parseMcGeneric, True, False, ),
3079 'IEM_MC_STORE_MREG_U64': (McBlock.parseMcGeneric, True, False, ),
3080 'IEM_MC_STORE_SREG_BASE_U32': (McBlock.parseMcGeneric, True, False, ),
3081 'IEM_MC_STORE_SREG_BASE_U64': (McBlock.parseMcGeneric, True, False, ),
3082 'IEM_MC_STORE_SSE_RESULT': (McBlock.parseMcGeneric, True, False, ),
3083 'IEM_MC_STORE_XREG_HI_U64': (McBlock.parseMcGeneric, True, False, ),
3084 'IEM_MC_STORE_XREG_R32': (McBlock.parseMcGeneric, True, False, ),
3085 'IEM_MC_STORE_XREG_R64': (McBlock.parseMcGeneric, True, False, ),
3086 'IEM_MC_STORE_XREG_U128': (McBlock.parseMcGeneric, True, False, ),
3087 'IEM_MC_STORE_XREG_U16': (McBlock.parseMcGeneric, True, False, ),
3088 'IEM_MC_STORE_XREG_U32': (McBlock.parseMcGeneric, True, False, ),
3089 'IEM_MC_STORE_XREG_U32_U128': (McBlock.parseMcGeneric, True, False, ),
3090 'IEM_MC_STORE_XREG_U32_ZX_U128': (McBlock.parseMcGeneric, True, False, ),
3091 'IEM_MC_STORE_XREG_U64': (McBlock.parseMcGeneric, True, False, ),
3092 'IEM_MC_STORE_XREG_U64_ZX_U128': (McBlock.parseMcGeneric, True, False, ),
3093 'IEM_MC_STORE_XREG_U8': (McBlock.parseMcGeneric, True, False, ),
3094 'IEM_MC_STORE_XREG_XMM': (McBlock.parseMcGeneric, True, False, ),
3095 'IEM_MC_STORE_XREG_XMM_U32': (McBlock.parseMcGeneric, True, False, ),
3096 'IEM_MC_STORE_XREG_XMM_U64': (McBlock.parseMcGeneric, True, False, ),
3097 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, False, ),
3098 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3099 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3100 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3101 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, False, ),
3102 'IEM_MC_SUB_GREG_U16': (McBlock.parseMcGeneric, True, True, ),
3103 'IEM_MC_SUB_GREG_U32': (McBlock.parseMcGeneric, True, True, ),
3104 'IEM_MC_SUB_GREG_U64': (McBlock.parseMcGeneric, True, True, ),
3105 'IEM_MC_SUB_LOCAL_U16': (McBlock.parseMcGeneric, False, False, ),
3106 'IEM_MC_UPDATE_FPU_OPCODE_IP': (McBlock.parseMcGeneric, True, False, ),
3107 'IEM_MC_UPDATE_FSW': (McBlock.parseMcGeneric, True, False, ),
3108 'IEM_MC_UPDATE_FSW_CONST': (McBlock.parseMcGeneric, True, False, ),
3109 'IEM_MC_UPDATE_FSW_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3110 'IEM_MC_UPDATE_FSW_THEN_POP_POP': (McBlock.parseMcGeneric, True, False, ),
3111 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': (McBlock.parseMcGeneric, True, False, ),
3112 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': (McBlock.parseMcGeneric, True, False, ),
3113 'IEM_MC_NO_NATIVE_RECOMPILE': (McBlock.parseMcGeneric, False, False, ),
3114};
3115
3116## List of microcode blocks.
3117g_aoMcBlocks = [] # type: List[McBlock]
3118
3119
3120
3121class ParserException(Exception):
3122 """ Parser exception """
3123 def __init__(self, sMessage):
3124 Exception.__init__(self, sMessage);
3125
3126
3127class SimpleParser(object): # pylint: disable=too-many-instance-attributes
3128 """
3129 Parser of IEMAllInstruction*.cpp.h instruction specifications.
3130 """
3131
3132 ## @name Parser state.
3133 ## @{
3134 kiCode = 0;
3135 kiCommentMulti = 1;
3136 ## @}
3137
3138 class Macro(object):
3139 """ Macro """
3140 def __init__(self, sName, asArgs, sBody, iLine):
3141 self.sName = sName; ##< The macro name.
3142 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
3143 self.sBody = sBody;
3144 self.iLine = iLine;
3145 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
3146
3147 @staticmethod
3148 def _needSpace(ch):
3149 """ This is just to make the expanded output a bit prettier. """
3150 return ch.isspace() and ch != '(';
3151
3152 def expandMacro(self, oParent, asArgs = None):
3153 """ Expands the macro body with the given arguments. """
3154 _ = oParent;
3155 sBody = self.sBody;
3156
3157 if self.oReArgMatch:
3158 assert len(asArgs) == len(self.asArgs);
3159 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
3160
3161 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
3162 oMatch = self.oReArgMatch.search(sBody);
3163 while oMatch:
3164 sName = oMatch.group(2);
3165 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
3166 sValue = dArgs[sName];
3167 sPre = '';
3168 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
3169 sPre = ' ';
3170 sPost = '';
3171 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
3172 sPost = ' ';
3173 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
3174 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
3175 else:
3176 assert not asArgs;
3177
3178 return sBody;
3179
3180 class PreprocessorConditional(object):
3181 """ Preprocessor conditional (#if/#ifdef/#ifndef/#elif/#else/#endif). """
3182
3183 ## Known defines.
3184 # - A value of 1 indicates that it's always defined.
3185 # - A value of 0 if it's always undefined
3186 # - A value of -1 if it's an arch and it depends of script parameters.
3187 # - A value of -2 if it's not recognized when filtering MC blocks.
3188 kdKnownDefines = {
3189 'IEM_WITH_ONE_BYTE_TABLE': 1,
3190 'IEM_WITH_TWO_BYTE_TABLE': 1,
3191 'IEM_WITH_THREE_0F_38': 1,
3192 'IEM_WITH_THREE_0F_3A': 1,
3193 'IEM_WITH_THREE_BYTE_TABLES': 1,
3194 'IEM_WITH_3DNOW': 1,
3195 'IEM_WITH_3DNOW_TABLE': 1,
3196 'IEM_WITH_VEX': 1,
3197 'IEM_WITH_VEX_TABLES': 1,
3198 'VBOX_WITH_NESTED_HWVIRT_VMX': 1,
3199 'VBOX_WITH_NESTED_HWVIRT_VMX_EPT': 1,
3200 'VBOX_WITH_NESTED_HWVIRT_SVM': 1,
3201 'LOG_ENABLED': 1,
3202 'RT_WITHOUT_PRAGMA_ONCE': 0,
3203 'TST_IEM_CHECK_MC': 0,
3204 'IEM_WITHOUT_ASSEMBLY': -2, ##< @todo ??
3205 'RT_ARCH_AMD64': -1,
3206 'RT_ARCH_ARM64': -1,
3207 'RT_ARCH_ARM32': -1,
3208 'RT_ARCH_X86': -1,
3209 'RT_ARCH_SPARC': -1,
3210 'RT_ARCH_SPARC64': -1,
3211 };
3212 kdBuildArchToIprt = {
3213 'amd64': 'RT_ARCH_AMD64',
3214 'arm64': 'RT_ARCH_ARM64',
3215 'sparc32': 'RT_ARCH_SPARC64',
3216 };
3217 ## For parsing the next defined(xxxx).
3218 koMatchDefined = re.compile(r'\s*defined\s*\(\s*([^ \t)]+)\s*\)\s*');
3219
3220 def __init__(self, sType, sExpr):
3221 self.sType = sType;
3222 self.sExpr = sExpr; ##< Expression without command and no leading or trailing spaces.
3223 self.aoElif = [] # type: List[PreprocessorConditional]
3224 self.fInElse = [];
3225 if sType in ('if', 'elif'):
3226 self.checkExpression(sExpr);
3227 else:
3228 self.checkSupportedDefine(sExpr)
3229
3230 @staticmethod
3231 def checkSupportedDefine(sDefine):
3232 """ Checks that sDefine is one that we support. Raises exception if unuspported. """
3233 #print('debug: checkSupportedDefine: %s' % (sDefine,), file = sys.stderr);
3234 if sDefine in SimpleParser.PreprocessorConditional.kdKnownDefines:
3235 return True;
3236 if sDefine.startswith('VMM_INCLUDED_') and sDefine.endswith('_h'):
3237 return True;
3238 raise Exception('Unsupported define: %s' % (sDefine,));
3239
3240 @staticmethod
3241 def checkExpression(sExpr):
3242 """ Check that the expression is supported. Raises exception if not. """
3243 #print('debug: checkExpression: %s' % (sExpr,), file = sys.stderr);
3244 if sExpr in ('0', '1'):
3245 return True;
3246
3247 off = 0;
3248 cParan = 0;
3249 while off < len(sExpr):
3250 ch = sExpr[off];
3251
3252 # Unary operator or parentheses:
3253 if ch in ('(', '!'):
3254 if ch == '(':
3255 cParan += 1;
3256 off += 1;
3257 else:
3258 # defined(xxxx)
3259 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3260 if oMatch:
3261 SimpleParser.PreprocessorConditional.checkSupportedDefine(oMatch.group(1));
3262 elif sExpr[off:] != '1':
3263 raise Exception('Cannot grok: \'%s\' (at %u in: \'%s\')' % (sExpr[off:10], off + 1, sExpr,));
3264 off = oMatch.end();
3265
3266 # Look for closing parentheses.
3267 while off < len(sExpr) and sExpr[off].isspace():
3268 off += 1;
3269 if cParan > 0:
3270 while off < len(sExpr) and sExpr[off] == ')':
3271 if cParan <= 0:
3272 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3273 cParan -= 1;
3274 off += 1;
3275 while off < len(sExpr) and sExpr[off].isspace():
3276 off += 1;
3277
3278 # Look for binary operator.
3279 if off >= len(sExpr):
3280 break;
3281 if sExpr[off:off + 2] in ('||', '&&'):
3282 off += 2;
3283 else:
3284 raise Exception('Cannot grok operator: \'%s\' (at %u in: \'%s\')' % (sExpr[off:2], off + 1, sExpr,));
3285
3286 # Skip spaces.
3287 while off < len(sExpr) and sExpr[off].isspace():
3288 off += 1;
3289 if cParan != 0:
3290 raise Exception('Unbalanced parentheses at %u in \'%s\'' % (off + 1, sExpr,));
3291 return True;
3292
3293 @staticmethod
3294 def isArchIncludedInExpr(sExpr, sArch):
3295 """ Checks if sArch is included in the given expression. """
3296 # We only grok defined() [|| defined()...] and [1|0] at the moment.
3297 if sExpr == '0':
3298 return False;
3299 if sExpr == '1':
3300 return True;
3301 off = 0;
3302 while off < len(sExpr):
3303 # defined(xxxx)
3304 oMatch = SimpleParser.PreprocessorConditional.koMatchDefined.match(sExpr, off);
3305 if not oMatch:
3306 if sExpr[off:] == '1':
3307 return True;
3308 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3309 if SimpleParser.PreprocessorConditional.matchDefined(oMatch.group(1), sArch):
3310 return True;
3311 off = oMatch.end();
3312
3313 # Look for OR operator.
3314 while off + 1 < len(sExpr) and sExpr[off + 1].isspace():
3315 off += 1;
3316 if off >= len(sExpr):
3317 break;
3318 if sExpr.startswith('||'):
3319 off += 2;
3320 else:
3321 raise Exception('Cannot grok: %s (at %u in: %s)' % (sExpr[off:10], off + 1, sExpr,));
3322
3323 return False;
3324
3325 @staticmethod
3326 def matchArch(sDefine, sArch):
3327 """ Compares sDefine (RT_ARCH_XXXX) and sArch (x86, amd64, arm64, ++). """
3328 return SimpleParser.PreprocessorConditional.kdBuildArchToIprt[sArch] == sDefine;
3329
3330 @staticmethod
3331 def matchDefined(sExpr, sArch):
3332 """ Check the result of an ifdef/ifndef expression, given sArch. """
3333 iDefine = SimpleParser.PreprocessorConditional.kdKnownDefines.get(sExpr, 0);
3334 if iDefine == -2:
3335 raise Exception('Unsupported define for MC block filtering: %s' % (sExpr,));
3336 return iDefine == 1 or (iDefine == -1 and SimpleParser.PreprocessorConditional.matchArch(sExpr, sArch));
3337
3338 def isArchIncludedInPrimaryBlock(self, sArch):
3339 """ Checks if sArch is included in the (primary) 'if' block. """
3340 if self.sType == 'ifdef':
3341 return self.matchDefined(self.sExpr, sArch);
3342 if self.sType == 'ifndef':
3343 return not self.matchDefined(self.sExpr, sArch);
3344 return self.isArchIncludedInExpr(self.sExpr, sArch);
3345
3346 @staticmethod
3347 def isInBlockForArch(aoCppCondStack, sArch, iLine):
3348 """ Checks if sArch is included in the current conditional block. """
3349 _ = iLine;
3350 #print('debug: isInBlockForArch(%s,%s); line %s' % (len(aoCppCondStack), sArch, iLine), file = sys.stderr);
3351 for oCond in aoCppCondStack:
3352 if oCond.isArchIncludedInPrimaryBlock(sArch):
3353 if oCond.aoElif or oCond.fInElse:
3354 #print('debug: isInBlockForArch -> False #1', file = sys.stderr);
3355 return False;
3356 #print('debug: isInBlockForArch(%s,%s): in IF-block' % (len(aoCppCondStack), sArch), file = sys.stderr);
3357 else:
3358 fFine = False;
3359 for oElifCond in oCond.aoElif:
3360 if oElifCond.isArchIncludedInPrimaryBlock(sArch):
3361 if oElifCond is not oCond.aoElif[-1] or oCond.fInElse:
3362 #print('debug: isInBlockForArch -> False #3', file = sys.stderr);
3363 return False;
3364 fFine = True;
3365 if not fFine and not oCond.fInElse:
3366 #print('debug: isInBlockForArch -> False #4', file = sys.stderr);
3367 return False;
3368 #print('debug: isInBlockForArch -> True', file = sys.stderr);
3369 return True;
3370
3371 def __init__(self, sSrcFile, asLines, sDefaultMap, sHostArch, oInheritMacrosFrom = None):
3372 self.sSrcFile = sSrcFile;
3373 self.asLines = asLines;
3374 self.iLine = 0;
3375 self.iState = self.kiCode;
3376 self.sComment = '';
3377 self.iCommentLine = 0;
3378 self.aoCurInstrs = [] # type: List[Instruction]
3379 self.oCurFunction = None # type: DecoderFunction
3380 self.iMcBlockInFunc = 0;
3381 self.oCurMcBlock = None # type: McBlock
3382 self.dMacros = {} # type: Dict[str, SimpleParser.Macro]
3383 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
3384 if oInheritMacrosFrom:
3385 self.dMacros = dict(oInheritMacrosFrom.dMacros);
3386 self.oReMacros = oInheritMacrosFrom.oReMacros;
3387 self.aoCppCondStack = [] # type: List[PreprocessorConditional] ##< Preprocessor conditional stack.
3388 self.sHostArch = sHostArch;
3389
3390 assert sDefaultMap in g_dInstructionMaps;
3391 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
3392
3393 self.cTotalInstr = 0;
3394 self.cTotalStubs = 0;
3395 self.cTotalTagged = 0;
3396 self.cTotalMcBlocks = 0;
3397
3398 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3399 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3400 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
3401 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
3402 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
3403 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
3404 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
3405 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
3406 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
3407 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
3408 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[1-5]_RET)\s*\('); ##> Not DEFER_TO_CIMPL_0_RET!
3409 self.fDebug = True;
3410 self.fDebugMc = False;
3411 self.fDebugPreproc = False;
3412
3413 self.dTagHandlers = {
3414 '@opbrief': self.parseTagOpBrief,
3415 '@opdesc': self.parseTagOpDesc,
3416 '@opmnemonic': self.parseTagOpMnemonic,
3417 '@op1': self.parseTagOpOperandN,
3418 '@op2': self.parseTagOpOperandN,
3419 '@op3': self.parseTagOpOperandN,
3420 '@op4': self.parseTagOpOperandN,
3421 '@oppfx': self.parseTagOpPfx,
3422 '@opmaps': self.parseTagOpMaps,
3423 '@opcode': self.parseTagOpcode,
3424 '@opcodesub': self.parseTagOpcodeSub,
3425 '@openc': self.parseTagOpEnc,
3426 '@opfltest': self.parseTagOpEFlags,
3427 '@opflmodify': self.parseTagOpEFlags,
3428 '@opflundef': self.parseTagOpEFlags,
3429 '@opflset': self.parseTagOpEFlags,
3430 '@opflclear': self.parseTagOpEFlags,
3431 '@ophints': self.parseTagOpHints,
3432 '@opdisenum': self.parseTagOpDisEnum,
3433 '@opmincpu': self.parseTagOpMinCpu,
3434 '@opcpuid': self.parseTagOpCpuId,
3435 '@opgroup': self.parseTagOpGroup,
3436 '@opunused': self.parseTagOpUnusedInvalid,
3437 '@opinvalid': self.parseTagOpUnusedInvalid,
3438 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3439 '@optest': self.parseTagOpTest,
3440 '@optestign': self.parseTagOpTestIgnore,
3441 '@optestignore': self.parseTagOpTestIgnore,
3442 '@opcopytests': self.parseTagOpCopyTests,
3443 '@oponly': self.parseTagOpOnlyTest,
3444 '@oponlytest': self.parseTagOpOnlyTest,
3445 '@opxcpttype': self.parseTagOpXcptType,
3446 '@opstats': self.parseTagOpStats,
3447 '@opfunction': self.parseTagOpFunction,
3448 '@opdone': self.parseTagOpDone,
3449 };
3450 for i in range(48):
3451 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3452 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3453
3454 self.asErrors = [];
3455
3456 def raiseError(self, sMessage):
3457 """
3458 Raise error prefixed with the source and line number.
3459 """
3460 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3461
3462 def raiseCommentError(self, iLineInComment, sMessage):
3463 """
3464 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3465 """
3466 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3467
3468 def error(self, sMessage):
3469 """
3470 Adds an error.
3471 returns False;
3472 """
3473 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3474 return False;
3475
3476 def errorOnLine(self, iLine, sMessage):
3477 """
3478 Adds an error.
3479 returns False;
3480 """
3481 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3482 return False;
3483
3484 def errorComment(self, iLineInComment, sMessage):
3485 """
3486 Adds a comment error.
3487 returns False;
3488 """
3489 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3490 return False;
3491
3492 def printErrors(self):
3493 """
3494 Print the errors to stderr.
3495 Returns number of errors.
3496 """
3497 if self.asErrors:
3498 sys.stderr.write(u''.join(self.asErrors));
3499 return len(self.asErrors);
3500
3501 def debug(self, sMessage):
3502 """
3503 For debugging.
3504 """
3505 if self.fDebug:
3506 print('debug: %s' % (sMessage,), file = sys.stderr);
3507
3508 def stripComments(self, sLine):
3509 """
3510 Returns sLine with comments stripped.
3511
3512 Complains if traces of incomplete multi-line comments are encountered.
3513 """
3514 sLine = self.oReComment.sub(" ", sLine);
3515 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3516 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3517 return sLine;
3518
3519 def parseFunctionTable(self, sLine):
3520 """
3521 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3522
3523 Note! Updates iLine as it consumes the whole table.
3524 """
3525
3526 #
3527 # Extract the table name.
3528 #
3529 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3530 oMap = g_dInstructionMapsByIemName.get(sName);
3531 if not oMap:
3532 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3533 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3534
3535 #
3536 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3537 # entries per byte:
3538 # no prefix, 066h prefix, f3h prefix, f2h prefix
3539 # Those tables has 256 & 32 entries respectively.
3540 #
3541 cEntriesPerByte = 4;
3542 cValidTableLength = 1024;
3543 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3544
3545 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3546 if oEntriesMatch:
3547 cEntriesPerByte = 1;
3548 cValidTableLength = int(oEntriesMatch.group(1));
3549 asPrefixes = (None,);
3550
3551 #
3552 # The next line should be '{' and nothing else.
3553 #
3554 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3555 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3556 self.iLine += 1;
3557
3558 #
3559 # Parse till we find the end of the table.
3560 #
3561 iEntry = 0;
3562 while self.iLine < len(self.asLines):
3563 # Get the next line and strip comments and spaces (assumes no
3564 # multi-line comments).
3565 sLine = self.asLines[self.iLine];
3566 self.iLine += 1;
3567 sLine = self.stripComments(sLine).strip();
3568
3569 # Split the line up into entries, expanding IEMOP_X4 usage.
3570 asEntries = sLine.split(',');
3571 for i in range(len(asEntries) - 1, -1, -1):
3572 sEntry = asEntries[i].strip();
3573 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3574 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3575 asEntries.insert(i + 1, sEntry);
3576 asEntries.insert(i + 1, sEntry);
3577 asEntries.insert(i + 1, sEntry);
3578 if sEntry:
3579 asEntries[i] = sEntry;
3580 else:
3581 del asEntries[i];
3582
3583 # Process the entries.
3584 for sEntry in asEntries:
3585 if sEntry in ('};', '}'):
3586 if iEntry != cValidTableLength:
3587 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3588 return True;
3589 if sEntry.startswith('iemOp_Invalid'):
3590 pass; # skip
3591 else:
3592 # Look up matching instruction by function.
3593 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3594 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3595 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3596 if aoInstr:
3597 if not isinstance(aoInstr, list):
3598 aoInstr = [aoInstr,];
3599 oInstr = None;
3600 for oCurInstr in aoInstr:
3601 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3602 pass;
3603 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3604 oCurInstr.sPrefix = sPrefix;
3605 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3606 oCurInstr.sOpcode = sOpcode;
3607 oCurInstr.sPrefix = sPrefix;
3608 else:
3609 continue;
3610 oInstr = oCurInstr;
3611 break;
3612 if not oInstr:
3613 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3614 aoInstr.append(oInstr);
3615 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3616 g_aoAllInstructions.append(oInstr);
3617 oMap.aoInstructions.append(oInstr);
3618 else:
3619 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3620 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3621 iEntry += 1;
3622
3623 return self.error('Unexpected end of file in PFNIEMOP table');
3624
3625 def addInstruction(self, iLine = None):
3626 """
3627 Adds an instruction.
3628 """
3629 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3630 g_aoAllInstructions.append(oInstr);
3631 self.aoCurInstrs.append(oInstr);
3632 return oInstr;
3633
3634 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3635 """
3636 Derives the mnemonic and operands from a IEM stats base name like string.
3637 """
3638 if oInstr.sMnemonic is None:
3639 asWords = sStats.split('_');
3640 oInstr.sMnemonic = asWords[0].lower();
3641 if len(asWords) > 1 and not oInstr.aoOperands:
3642 for sType in asWords[1:]:
3643 if sType in g_kdOpTypes:
3644 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3645 else:
3646 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3647 return False;
3648 return True;
3649
3650 def doneInstructionOne(self, oInstr, iLine):
3651 """
3652 Complete the parsing by processing, validating and expanding raw inputs.
3653 """
3654 assert oInstr.iLineCompleted is None;
3655 oInstr.iLineCompleted = iLine;
3656
3657 #
3658 # Specified instructions.
3659 #
3660 if oInstr.cOpTags > 0:
3661 if oInstr.sStats is None:
3662 pass;
3663
3664 #
3665 # Unspecified legacy stuff. We generally only got a few things to go on here.
3666 # /** Opcode 0x0f 0x00 /0. */
3667 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3668 #
3669 else:
3670 #if oInstr.sRawOldOpcodes:
3671 #
3672 #if oInstr.sMnemonic:
3673 pass;
3674
3675 #
3676 # Common defaults.
3677 #
3678
3679 # Guess mnemonic and operands from stats if the former is missing.
3680 if oInstr.sMnemonic is None:
3681 if oInstr.sStats is not None:
3682 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3683 elif oInstr.sFunction is not None:
3684 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3685
3686 # Derive the disassembler op enum constant from the mnemonic.
3687 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3688 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3689
3690 # Derive the IEM statistics base name from mnemonic and operand types.
3691 if oInstr.sStats is None:
3692 if oInstr.sFunction is not None:
3693 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3694 elif oInstr.sMnemonic is not None:
3695 oInstr.sStats = oInstr.sMnemonic;
3696 for oOperand in oInstr.aoOperands:
3697 if oOperand.sType:
3698 oInstr.sStats += '_' + oOperand.sType;
3699
3700 # Derive the IEM function name from mnemonic and operand types.
3701 if oInstr.sFunction is None:
3702 if oInstr.sMnemonic is not None:
3703 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3704 for oOperand in oInstr.aoOperands:
3705 if oOperand.sType:
3706 oInstr.sFunction += '_' + oOperand.sType;
3707 elif oInstr.sStats:
3708 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3709
3710 #
3711 # Apply default map and then add the instruction to all it's groups.
3712 #
3713 if not oInstr.aoMaps:
3714 oInstr.aoMaps = [ self.oDefaultMap, ];
3715 for oMap in oInstr.aoMaps:
3716 oMap.aoInstructions.append(oInstr);
3717
3718 #
3719 # Derive encoding from operands and maps.
3720 #
3721 if oInstr.sEncoding is None:
3722 if not oInstr.aoOperands:
3723 if oInstr.fUnused and oInstr.sSubOpcode:
3724 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3725 else:
3726 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3727 elif oInstr.aoOperands[0].usesModRM():
3728 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3729 or oInstr.onlyInVexMaps():
3730 oInstr.sEncoding = 'VEX.ModR/M';
3731 else:
3732 oInstr.sEncoding = 'ModR/M';
3733
3734 #
3735 # Check the opstat value and add it to the opstat indexed dictionary.
3736 #
3737 if oInstr.sStats:
3738 if oInstr.sStats not in g_dAllInstructionsByStat:
3739 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3740 else:
3741 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3742 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3743
3744 #
3745 # Add to function indexed dictionary. We allow multiple instructions per function.
3746 #
3747 if oInstr.sFunction:
3748 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3749 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3750 else:
3751 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3752
3753 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3754 return True;
3755
3756 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3757 """
3758 Done with current instruction.
3759 """
3760 for oInstr in self.aoCurInstrs:
3761 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3762 if oInstr.fStub:
3763 self.cTotalStubs += 1;
3764
3765 self.cTotalInstr += len(self.aoCurInstrs);
3766
3767 self.sComment = '';
3768 self.aoCurInstrs = [];
3769 if fEndOfFunction:
3770 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3771 if self.oCurFunction:
3772 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3773 self.oCurFunction = None;
3774 self.iMcBlockInFunc = 0;
3775 return True;
3776
3777 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3778 """
3779 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3780 is False, only None values and empty strings are replaced.
3781 """
3782 for oInstr in self.aoCurInstrs:
3783 if fOverwrite is not True:
3784 oOldValue = getattr(oInstr, sAttrib);
3785 if oOldValue is not None:
3786 continue;
3787 setattr(oInstr, sAttrib, oValue);
3788
3789 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3790 """
3791 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3792 If fOverwrite is False, only None values and empty strings are replaced.
3793 """
3794 for oInstr in self.aoCurInstrs:
3795 aoArray = getattr(oInstr, sAttrib);
3796 while len(aoArray) <= iEntry:
3797 aoArray.append(None);
3798 if fOverwrite is True or aoArray[iEntry] is None:
3799 aoArray[iEntry] = oValue;
3800
3801 def parseCommentOldOpcode(self, asLines):
3802 """ Deals with 'Opcode 0xff /4' like comments """
3803 asWords = asLines[0].split();
3804 if len(asWords) >= 2 \
3805 and asWords[0] == 'Opcode' \
3806 and ( asWords[1].startswith('0x')
3807 or asWords[1].startswith('0X')):
3808 asWords = asWords[:1];
3809 for iWord, sWord in enumerate(asWords):
3810 if sWord.startswith('0X'):
3811 sWord = '0x' + sWord[:2];
3812 asWords[iWord] = asWords;
3813 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3814
3815 return False;
3816
3817 def ensureInstructionForOpTag(self, iTagLine):
3818 """ Ensure there is an instruction for the op-tag being parsed. """
3819 if not self.aoCurInstrs:
3820 self.addInstruction(self.iCommentLine + iTagLine);
3821 for oInstr in self.aoCurInstrs:
3822 oInstr.cOpTags += 1;
3823 if oInstr.cOpTags == 1:
3824 self.cTotalTagged += 1;
3825 return self.aoCurInstrs[-1];
3826
3827 @staticmethod
3828 def flattenSections(aasSections):
3829 """
3830 Flattens multiline sections into stripped single strings.
3831 Returns list of strings, on section per string.
3832 """
3833 asRet = [];
3834 for asLines in aasSections:
3835 if asLines:
3836 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3837 return asRet;
3838
3839 @staticmethod
3840 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3841 """
3842 Flattens sections into a simple stripped string with newlines as
3843 section breaks. The final section does not sport a trailing newline.
3844 """
3845 # Typical: One section with a single line.
3846 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3847 return aasSections[0][0].strip();
3848
3849 sRet = '';
3850 for iSection, asLines in enumerate(aasSections):
3851 if asLines:
3852 if iSection > 0:
3853 sRet += sSectionSep;
3854 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3855 return sRet;
3856
3857
3858
3859 ## @name Tag parsers
3860 ## @{
3861
3862 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3863 """
3864 Tag: \@opbrief
3865 Value: Text description, multiple sections, appended.
3866
3867 Brief description. If not given, it's the first sentence from @opdesc.
3868 """
3869 oInstr = self.ensureInstructionForOpTag(iTagLine);
3870
3871 # Flatten and validate the value.
3872 sBrief = self.flattenAllSections(aasSections);
3873 if not sBrief:
3874 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3875 if sBrief[-1] != '.':
3876 sBrief = sBrief + '.';
3877 if len(sBrief) > 180:
3878 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3879 offDot = sBrief.find('.');
3880 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3881 offDot = sBrief.find('.', offDot + 1);
3882 if offDot >= 0 and offDot != len(sBrief) - 1:
3883 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3884
3885 # Update the instruction.
3886 if oInstr.sBrief is not None:
3887 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3888 % (sTag, oInstr.sBrief, sBrief,));
3889 _ = iEndLine;
3890 return True;
3891
3892 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3893 """
3894 Tag: \@opdesc
3895 Value: Text description, multiple sections, appended.
3896
3897 It is used to describe instructions.
3898 """
3899 oInstr = self.ensureInstructionForOpTag(iTagLine);
3900 if aasSections:
3901 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3902 return True;
3903
3904 _ = sTag; _ = iEndLine;
3905 return True;
3906
3907 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3908 """
3909 Tag: @opmenmonic
3910 Value: mnemonic
3911
3912 The 'mnemonic' value must be a valid C identifier string. Because of
3913 prefixes, groups and whatnot, there times when the mnemonic isn't that
3914 of an actual assembler mnemonic.
3915 """
3916 oInstr = self.ensureInstructionForOpTag(iTagLine);
3917
3918 # Flatten and validate the value.
3919 sMnemonic = self.flattenAllSections(aasSections);
3920 if not self.oReMnemonic.match(sMnemonic):
3921 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3922 if oInstr.sMnemonic is not None:
3923 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3924 % (sTag, oInstr.sMnemonic, sMnemonic,));
3925 oInstr.sMnemonic = sMnemonic
3926
3927 _ = iEndLine;
3928 return True;
3929
3930 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3931 """
3932 Tags: \@op1, \@op2, \@op3, \@op4
3933 Value: [where:]type
3934
3935 The 'where' value indicates where the operand is found, like the 'reg'
3936 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3937 a list.
3938
3939 The 'type' value indicates the operand type. These follow the types
3940 given in the opcode tables in the CPU reference manuals.
3941 See Instruction.kdOperandTypes for a list.
3942
3943 """
3944 oInstr = self.ensureInstructionForOpTag(iTagLine);
3945 idxOp = int(sTag[-1]) - 1;
3946 assert 0 <= idxOp < 4;
3947
3948 # flatten, split up, and validate the "where:type" value.
3949 sFlattened = self.flattenAllSections(aasSections);
3950 asSplit = sFlattened.split(':');
3951 if len(asSplit) == 1:
3952 sType = asSplit[0];
3953 sWhere = None;
3954 elif len(asSplit) == 2:
3955 (sWhere, sType) = asSplit;
3956 else:
3957 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3958
3959 if sType not in g_kdOpTypes:
3960 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3961 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3962 if sWhere is None:
3963 sWhere = g_kdOpTypes[sType][1];
3964 elif sWhere not in g_kdOpLocations:
3965 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3966 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3967
3968 # Insert the operand, refusing to overwrite an existing one.
3969 while idxOp >= len(oInstr.aoOperands):
3970 oInstr.aoOperands.append(None);
3971 if oInstr.aoOperands[idxOp] is not None:
3972 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3973 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3974 sWhere, sType,));
3975 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3976
3977 _ = iEndLine;
3978 return True;
3979
3980 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3981 """
3982 Tag: \@opmaps
3983 Value: map[,map2]
3984
3985 Indicates which maps the instruction is in. There is a default map
3986 associated with each input file.
3987 """
3988 oInstr = self.ensureInstructionForOpTag(iTagLine);
3989
3990 # Flatten, split up and validate the value.
3991 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3992 asMaps = sFlattened.split(',');
3993 if not asMaps:
3994 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3995 for sMap in asMaps:
3996 if sMap not in g_dInstructionMaps:
3997 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3998 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3999
4000 # Add the maps to the current list. Throw errors on duplicates.
4001 for oMap in oInstr.aoMaps:
4002 if oMap.sName in asMaps:
4003 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
4004
4005 for sMap in asMaps:
4006 oMap = g_dInstructionMaps[sMap];
4007 if oMap not in oInstr.aoMaps:
4008 oInstr.aoMaps.append(oMap);
4009 else:
4010 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
4011
4012 _ = iEndLine;
4013 return True;
4014
4015 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
4016 """
4017 Tag: \@oppfx
4018 Value: n/a|none|0x66|0xf3|0xf2
4019
4020 Required prefix for the instruction. (In a (E)VEX context this is the
4021 value of the 'pp' field rather than an actual prefix.)
4022 """
4023 oInstr = self.ensureInstructionForOpTag(iTagLine);
4024
4025 # Flatten and validate the value.
4026 sFlattened = self.flattenAllSections(aasSections);
4027 asPrefixes = sFlattened.split();
4028 if len(asPrefixes) > 1:
4029 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
4030
4031 sPrefix = asPrefixes[0].lower();
4032 if sPrefix == 'none':
4033 sPrefix = 'none';
4034 elif sPrefix == 'n/a':
4035 sPrefix = None;
4036 else:
4037 if len(sPrefix) == 2:
4038 sPrefix = '0x' + sPrefix;
4039 if not _isValidOpcodeByte(sPrefix):
4040 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
4041
4042 if sPrefix is not None and sPrefix not in g_kdPrefixes:
4043 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
4044
4045 # Set it.
4046 if oInstr.sPrefix is not None:
4047 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
4048 oInstr.sPrefix = sPrefix;
4049
4050 _ = iEndLine;
4051 return True;
4052
4053 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
4054 """
4055 Tag: \@opcode
4056 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
4057
4058 The opcode byte or sub-byte for the instruction in the context of a map.
4059 """
4060 oInstr = self.ensureInstructionForOpTag(iTagLine);
4061
4062 # Flatten and validate the value.
4063 sOpcode = self.flattenAllSections(aasSections);
4064 if _isValidOpcodeByte(sOpcode):
4065 pass;
4066 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
4067 pass;
4068 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
4069 pass;
4070 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
4071 pass;
4072 else:
4073 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
4074
4075 # Set it.
4076 if oInstr.sOpcode is not None:
4077 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
4078 oInstr.sOpcode = sOpcode;
4079
4080 _ = iEndLine;
4081 return True;
4082
4083 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
4084 """
4085 Tag: \@opcodesub
4086 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
4087 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
4088
4089 This is a simple way of dealing with encodings where the mod=3 and mod!=3
4090 represents exactly two different instructions. The more proper way would
4091 be to go via maps with two members, but this is faster.
4092 """
4093 oInstr = self.ensureInstructionForOpTag(iTagLine);
4094
4095 # Flatten and validate the value.
4096 sSubOpcode = self.flattenAllSections(aasSections);
4097 if sSubOpcode not in g_kdSubOpcodes:
4098 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
4099 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
4100
4101 # Set it.
4102 if oInstr.sSubOpcode is not None:
4103 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4104 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
4105 oInstr.sSubOpcode = sSubOpcode;
4106
4107 _ = iEndLine;
4108 return True;
4109
4110 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
4111 """
4112 Tag: \@openc
4113 Value: ModR/M|fixed|prefix|<map name>
4114
4115 The instruction operand encoding style.
4116 """
4117 oInstr = self.ensureInstructionForOpTag(iTagLine);
4118
4119 # Flatten and validate the value.
4120 sEncoding = self.flattenAllSections(aasSections);
4121 if sEncoding in g_kdEncodings:
4122 pass;
4123 elif sEncoding in g_dInstructionMaps:
4124 pass;
4125 elif not _isValidOpcodeByte(sEncoding):
4126 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
4127
4128 # Set it.
4129 if oInstr.sEncoding is not None:
4130 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
4131 % ( sTag, oInstr.sEncoding, sEncoding,));
4132 oInstr.sEncoding = sEncoding;
4133
4134 _ = iEndLine;
4135 return True;
4136
4137 ## EFlags tag to Instruction attribute name.
4138 kdOpFlagToAttr = {
4139 '@opfltest': 'asFlTest',
4140 '@opflmodify': 'asFlModify',
4141 '@opflundef': 'asFlUndefined',
4142 '@opflset': 'asFlSet',
4143 '@opflclear': 'asFlClear',
4144 };
4145
4146 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
4147 """
4148 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
4149 Value: <eflags specifier>
4150
4151 """
4152 oInstr = self.ensureInstructionForOpTag(iTagLine);
4153
4154 # Flatten, split up and validate the values.
4155 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
4156 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
4157 asFlags = [];
4158 else:
4159 fRc = True;
4160 for iFlag, sFlag in enumerate(asFlags):
4161 if sFlag not in g_kdEFlagsMnemonics:
4162 if sFlag.strip() in g_kdEFlagsMnemonics:
4163 asFlags[iFlag] = sFlag.strip();
4164 else:
4165 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
4166 if not fRc:
4167 return False;
4168
4169 # Set them.
4170 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
4171 if asOld is not None:
4172 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
4173 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
4174
4175 _ = iEndLine;
4176 return True;
4177
4178 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
4179 """
4180 Tag: \@ophints
4181 Value: Comma or space separated list of flags and hints.
4182
4183 This covers the disassembler flags table and more.
4184 """
4185 oInstr = self.ensureInstructionForOpTag(iTagLine);
4186
4187 # Flatten as a space separated list, split it up and validate the values.
4188 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4189 if len(asHints) == 1 and asHints[0].lower() == 'none':
4190 asHints = [];
4191 else:
4192 fRc = True;
4193 for iHint, sHint in enumerate(asHints):
4194 if sHint not in g_kdHints:
4195 if sHint.strip() in g_kdHints:
4196 sHint[iHint] = sHint.strip();
4197 else:
4198 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
4199 if not fRc:
4200 return False;
4201
4202 # Append them.
4203 for sHint in asHints:
4204 if sHint not in oInstr.dHints:
4205 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
4206 else:
4207 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
4208
4209 _ = iEndLine;
4210 return True;
4211
4212 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
4213 """
4214 Tag: \@opdisenum
4215 Value: OP_XXXX
4216
4217 This is for select a specific (legacy) disassembler enum value for the
4218 instruction.
4219 """
4220 oInstr = self.ensureInstructionForOpTag(iTagLine);
4221
4222 # Flatten and split.
4223 asWords = self.flattenAllSections(aasSections).split();
4224 if len(asWords) != 1:
4225 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
4226 if not asWords:
4227 return False;
4228 sDisEnum = asWords[0];
4229 if not self.oReDisEnum.match(sDisEnum):
4230 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
4231 % (sTag, sDisEnum, self.oReDisEnum.pattern));
4232
4233 # Set it.
4234 if oInstr.sDisEnum is not None:
4235 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
4236 oInstr.sDisEnum = sDisEnum;
4237
4238 _ = iEndLine;
4239 return True;
4240
4241 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
4242 """
4243 Tag: \@opmincpu
4244 Value: <simple CPU name>
4245
4246 Indicates when this instruction was introduced.
4247 """
4248 oInstr = self.ensureInstructionForOpTag(iTagLine);
4249
4250 # Flatten the value, split into words, make sure there's just one, valid it.
4251 asCpus = self.flattenAllSections(aasSections).split();
4252 if len(asCpus) > 1:
4253 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
4254
4255 sMinCpu = asCpus[0];
4256 if sMinCpu in g_kdCpuNames:
4257 oInstr.sMinCpu = sMinCpu;
4258 else:
4259 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
4260 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
4261
4262 # Set it.
4263 if oInstr.sMinCpu is None:
4264 oInstr.sMinCpu = sMinCpu;
4265 elif oInstr.sMinCpu != sMinCpu:
4266 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
4267
4268 _ = iEndLine;
4269 return True;
4270
4271 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
4272 """
4273 Tag: \@opcpuid
4274 Value: none | <CPUID flag specifier>
4275
4276 CPUID feature bit which is required for the instruction to be present.
4277 """
4278 oInstr = self.ensureInstructionForOpTag(iTagLine);
4279
4280 # Flatten as a space separated list, split it up and validate the values.
4281 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
4282 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
4283 asCpuIds = [];
4284 else:
4285 fRc = True;
4286 for iCpuId, sCpuId in enumerate(asCpuIds):
4287 if sCpuId not in g_kdCpuIdFlags:
4288 if sCpuId.strip() in g_kdCpuIdFlags:
4289 sCpuId[iCpuId] = sCpuId.strip();
4290 else:
4291 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
4292 if not fRc:
4293 return False;
4294
4295 # Append them.
4296 for sCpuId in asCpuIds:
4297 if sCpuId not in oInstr.asCpuIds:
4298 oInstr.asCpuIds.append(sCpuId);
4299 else:
4300 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
4301
4302 _ = iEndLine;
4303 return True;
4304
4305 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
4306 """
4307 Tag: \@opgroup
4308 Value: op_grp1[_subgrp2[_subsubgrp3]]
4309
4310 Instruction grouping.
4311 """
4312 oInstr = self.ensureInstructionForOpTag(iTagLine);
4313
4314 # Flatten as a space separated list, split it up and validate the values.
4315 asGroups = self.flattenAllSections(aasSections).split();
4316 if len(asGroups) != 1:
4317 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
4318 sGroup = asGroups[0];
4319 if not self.oReGroupName.match(sGroup):
4320 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
4321 % (sTag, sGroup, self.oReGroupName.pattern));
4322
4323 # Set it.
4324 if oInstr.sGroup is not None:
4325 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
4326 oInstr.sGroup = sGroup;
4327
4328 _ = iEndLine;
4329 return True;
4330
4331 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
4332 """
4333 Tag: \@opunused, \@opinvalid, \@opinvlstyle
4334 Value: <invalid opcode behaviour style>
4335
4336 The \@opunused indicates the specification is for a currently unused
4337 instruction encoding.
4338
4339 The \@opinvalid indicates the specification is for an invalid currently
4340 instruction encoding (like UD2).
4341
4342 The \@opinvlstyle just indicates how CPUs decode the instruction when
4343 not supported (\@opcpuid, \@opmincpu) or disabled.
4344 """
4345 oInstr = self.ensureInstructionForOpTag(iTagLine);
4346
4347 # Flatten as a space separated list, split it up and validate the values.
4348 asStyles = self.flattenAllSections(aasSections).split();
4349 if len(asStyles) != 1:
4350 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
4351 sStyle = asStyles[0];
4352 if sStyle not in g_kdInvalidStyles:
4353 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
4354 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
4355 # Set it.
4356 if oInstr.sInvalidStyle is not None:
4357 return self.errorComment(iTagLine,
4358 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
4359 % ( sTag, oInstr.sInvalidStyle, sStyle,));
4360 oInstr.sInvalidStyle = sStyle;
4361 if sTag == '@opunused':
4362 oInstr.fUnused = True;
4363 elif sTag == '@opinvalid':
4364 oInstr.fInvalid = True;
4365
4366 _ = iEndLine;
4367 return True;
4368
4369 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
4370 """
4371 Tag: \@optest
4372 Value: [<selectors>[ ]?] <inputs> -> <outputs>
4373 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
4374
4375 The main idea here is to generate basic instruction tests.
4376
4377 The probably simplest way of handling the diverse input, would be to use
4378 it to produce size optimized byte code for a simple interpreter that
4379 modifies the register input and output states.
4380
4381 An alternative to the interpreter would be creating multiple tables,
4382 but that becomes rather complicated wrt what goes where and then to use
4383 them in an efficient manner.
4384 """
4385 oInstr = self.ensureInstructionForOpTag(iTagLine);
4386
4387 #
4388 # Do it section by section.
4389 #
4390 for asSectionLines in aasSections:
4391 #
4392 # Sort the input into outputs, inputs and selector conditions.
4393 #
4394 sFlatSection = self.flattenAllSections([asSectionLines,]);
4395 if not sFlatSection:
4396 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
4397 continue;
4398 oTest = InstructionTest(oInstr);
4399
4400 asSelectors = [];
4401 asInputs = [];
4402 asOutputs = [];
4403 asCur = asOutputs;
4404 fRc = True;
4405 asWords = sFlatSection.split();
4406 for iWord in range(len(asWords) - 1, -1, -1):
4407 sWord = asWords[iWord];
4408 # Check for array switchers.
4409 if sWord == '->':
4410 if asCur != asOutputs:
4411 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
4412 break;
4413 asCur = asInputs;
4414 elif sWord == '/':
4415 if asCur != asInputs:
4416 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
4417 break;
4418 asCur = asSelectors;
4419 else:
4420 asCur.insert(0, sWord);
4421
4422 #
4423 # Validate and add selectors.
4424 #
4425 for sCond in asSelectors:
4426 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
4427 oSelector = None;
4428 for sOp in TestSelector.kasCompareOps:
4429 off = sCondExp.find(sOp);
4430 if off >= 0:
4431 sVariable = sCondExp[:off];
4432 sValue = sCondExp[off + len(sOp):];
4433 if sVariable in TestSelector.kdVariables:
4434 if sValue in TestSelector.kdVariables[sVariable]:
4435 oSelector = TestSelector(sVariable, sOp, sValue);
4436 else:
4437 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4438 % ( sTag, sValue, sCond,
4439 TestSelector.kdVariables[sVariable].keys(),));
4440 else:
4441 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4442 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4443 break;
4444 if oSelector is not None:
4445 for oExisting in oTest.aoSelectors:
4446 if oExisting.sVariable == oSelector.sVariable:
4447 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4448 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4449 oTest.aoSelectors.append(oSelector);
4450 else:
4451 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4452
4453 #
4454 # Validate outputs and inputs, adding them to the test as we go along.
4455 #
4456 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4457 asValidFieldKinds = [ 'both', sDesc, ];
4458 for sItem in asItems:
4459 oItem = None;
4460 for sOp in TestInOut.kasOperators:
4461 off = sItem.find(sOp);
4462 if off < 0:
4463 continue;
4464 sField = sItem[:off];
4465 sValueType = sItem[off + len(sOp):];
4466 if sField in TestInOut.kdFields \
4467 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4468 asSplit = sValueType.split(':', 1);
4469 sValue = asSplit[0];
4470 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4471 if sType in TestInOut.kdTypes:
4472 oValid = TestInOut.kdTypes[sType].validate(sValue);
4473 if oValid is True:
4474 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4475 oItem = TestInOut(sField, sOp, sValue, sType);
4476 else:
4477 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4478 % ( sTag, sDesc, sItem, ));
4479 else:
4480 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4481 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4482 else:
4483 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4484 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4485 else:
4486 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4487 % ( sTag, sDesc, sField, sItem,
4488 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4489 if asVal[1] in asValidFieldKinds]),));
4490 break;
4491 if oItem is not None:
4492 for oExisting in aoDst:
4493 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4494 self.errorComment(iTagLine,
4495 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4496 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4497 aoDst.append(oItem);
4498 else:
4499 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4500
4501 #
4502 # .
4503 #
4504 if fRc:
4505 oInstr.aoTests.append(oTest);
4506 else:
4507 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4508 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4509 % (sTag, asSelectors, asInputs, asOutputs,));
4510
4511 _ = iEndLine;
4512 return True;
4513
4514 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4515 """
4516 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4517 """
4518 oInstr = self.ensureInstructionForOpTag(iTagLine);
4519
4520 iTest = 0;
4521 if sTag[-1] == ']':
4522 iTest = int(sTag[8:-1]);
4523 else:
4524 iTest = int(sTag[7:]);
4525
4526 if iTest != len(oInstr.aoTests):
4527 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4528 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4529
4530 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4531 """
4532 Tag: \@optestign | \@optestignore
4533 Value: <value is ignored>
4534
4535 This is a simple trick to ignore a test while debugging another.
4536
4537 See also \@oponlytest.
4538 """
4539 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4540 return True;
4541
4542 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4543 """
4544 Tag: \@opcopytests
4545 Value: <opstat | function> [..]
4546 Example: \@opcopytests add_Eb_Gb
4547
4548 Trick to avoid duplicating tests for different encodings of the same
4549 operation.
4550 """
4551 oInstr = self.ensureInstructionForOpTag(iTagLine);
4552
4553 # Flatten, validate and append the copy job to the instruction. We execute
4554 # them after parsing all the input so we can handle forward references.
4555 asToCopy = self.flattenAllSections(aasSections).split();
4556 if not asToCopy:
4557 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4558 for sToCopy in asToCopy:
4559 if sToCopy not in oInstr.asCopyTests:
4560 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4561 oInstr.asCopyTests.append(sToCopy);
4562 else:
4563 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4564 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4565 else:
4566 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4567
4568 _ = iEndLine;
4569 return True;
4570
4571 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4572 """
4573 Tag: \@oponlytest | \@oponly
4574 Value: none
4575
4576 Only test instructions with this tag. This is a trick that is handy
4577 for singling out one or two new instructions or tests.
4578
4579 See also \@optestignore.
4580 """
4581 oInstr = self.ensureInstructionForOpTag(iTagLine);
4582
4583 # Validate and add instruction to only test dictionary.
4584 sValue = self.flattenAllSections(aasSections).strip();
4585 if sValue:
4586 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4587
4588 if oInstr not in g_aoOnlyTestInstructions:
4589 g_aoOnlyTestInstructions.append(oInstr);
4590
4591 _ = iEndLine;
4592 return True;
4593
4594 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4595 """
4596 Tag: \@opxcpttype
4597 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4598
4599 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4600 """
4601 oInstr = self.ensureInstructionForOpTag(iTagLine);
4602
4603 # Flatten as a space separated list, split it up and validate the values.
4604 asTypes = self.flattenAllSections(aasSections).split();
4605 if len(asTypes) != 1:
4606 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4607 sType = asTypes[0];
4608 if sType not in g_kdXcptTypes:
4609 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4610 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4611 # Set it.
4612 if oInstr.sXcptType is not None:
4613 return self.errorComment(iTagLine,
4614 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4615 % ( sTag, oInstr.sXcptType, sType,));
4616 oInstr.sXcptType = sType;
4617
4618 _ = iEndLine;
4619 return True;
4620
4621 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4622 """
4623 Tag: \@opfunction
4624 Value: <VMM function name>
4625
4626 This is for explicitly setting the IEM function name. Normally we pick
4627 this up from the FNIEMOP_XXX macro invocation after the description, or
4628 generate it from the mnemonic and operands.
4629
4630 It it thought it maybe necessary to set it when specifying instructions
4631 which implementation isn't following immediately or aren't implemented yet.
4632 """
4633 oInstr = self.ensureInstructionForOpTag(iTagLine);
4634
4635 # Flatten and validate the value.
4636 sFunction = self.flattenAllSections(aasSections);
4637 if not self.oReFunctionName.match(sFunction):
4638 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4639 % (sTag, sFunction, self.oReFunctionName.pattern));
4640
4641 if oInstr.sFunction is not None:
4642 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4643 % (sTag, oInstr.sFunction, sFunction,));
4644 oInstr.sFunction = sFunction;
4645
4646 _ = iEndLine;
4647 return True;
4648
4649 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4650 """
4651 Tag: \@opstats
4652 Value: <VMM statistics base name>
4653
4654 This is for explicitly setting the statistics name. Normally we pick
4655 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4656 the mnemonic and operands.
4657
4658 It it thought it maybe necessary to set it when specifying instructions
4659 which implementation isn't following immediately or aren't implemented yet.
4660 """
4661 oInstr = self.ensureInstructionForOpTag(iTagLine);
4662
4663 # Flatten and validate the value.
4664 sStats = self.flattenAllSections(aasSections);
4665 if not self.oReStatsName.match(sStats):
4666 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4667 % (sTag, sStats, self.oReStatsName.pattern));
4668
4669 if oInstr.sStats is not None:
4670 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4671 % (sTag, oInstr.sStats, sStats,));
4672 oInstr.sStats = sStats;
4673
4674 _ = iEndLine;
4675 return True;
4676
4677 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4678 """
4679 Tag: \@opdone
4680 Value: none
4681
4682 Used to explictily flush the instructions that have been specified.
4683 """
4684 sFlattened = self.flattenAllSections(aasSections);
4685 if sFlattened != '':
4686 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4687 _ = sTag; _ = iEndLine;
4688 return self.doneInstructions();
4689
4690 ## @}
4691
4692
4693 def parseComment(self):
4694 """
4695 Parse the current comment (self.sComment).
4696
4697 If it's a opcode specifiying comment, we reset the macro stuff.
4698 """
4699 #
4700 # Reject if comment doesn't seem to contain anything interesting.
4701 #
4702 if self.sComment.find('Opcode') < 0 \
4703 and self.sComment.find('@') < 0:
4704 return False;
4705
4706 #
4707 # Split the comment into lines, removing leading asterisks and spaces.
4708 # Also remove leading and trailing empty lines.
4709 #
4710 asLines = self.sComment.split('\n');
4711 for iLine, sLine in enumerate(asLines):
4712 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4713
4714 while asLines and not asLines[0]:
4715 self.iCommentLine += 1;
4716 asLines.pop(0);
4717
4718 while asLines and not asLines[-1]:
4719 asLines.pop(len(asLines) - 1);
4720
4721 #
4722 # Check for old style: Opcode 0x0f 0x12
4723 #
4724 if asLines[0].startswith('Opcode '):
4725 self.parseCommentOldOpcode(asLines);
4726
4727 #
4728 # Look for @op* tagged data.
4729 #
4730 cOpTags = 0;
4731 sFlatDefault = None;
4732 sCurTag = '@default';
4733 iCurTagLine = 0;
4734 asCurSection = [];
4735 aasSections = [ asCurSection, ];
4736 for iLine, sLine in enumerate(asLines):
4737 if not sLine.startswith('@'):
4738 if sLine:
4739 asCurSection.append(sLine);
4740 elif asCurSection:
4741 asCurSection = [];
4742 aasSections.append(asCurSection);
4743 else:
4744 #
4745 # Process the previous tag.
4746 #
4747 if not asCurSection and len(aasSections) > 1:
4748 aasSections.pop(-1);
4749 if sCurTag in self.dTagHandlers:
4750 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4751 cOpTags += 1;
4752 elif sCurTag.startswith('@op'):
4753 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4754 elif sCurTag == '@default':
4755 sFlatDefault = self.flattenAllSections(aasSections);
4756 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4757 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4758 elif sCurTag in ['@encoding', '@opencoding']:
4759 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4760
4761 #
4762 # New tag.
4763 #
4764 asSplit = sLine.split(None, 1);
4765 sCurTag = asSplit[0].lower();
4766 if len(asSplit) > 1:
4767 asCurSection = [asSplit[1],];
4768 else:
4769 asCurSection = [];
4770 aasSections = [asCurSection, ];
4771 iCurTagLine = iLine;
4772
4773 #
4774 # Process the final tag.
4775 #
4776 if not asCurSection and len(aasSections) > 1:
4777 aasSections.pop(-1);
4778 if sCurTag in self.dTagHandlers:
4779 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4780 cOpTags += 1;
4781 elif sCurTag.startswith('@op'):
4782 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4783 elif sCurTag == '@default':
4784 sFlatDefault = self.flattenAllSections(aasSections);
4785
4786 #
4787 # Don't allow default text in blocks containing @op*.
4788 #
4789 if cOpTags > 0 and sFlatDefault:
4790 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4791
4792 return True;
4793
4794 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4795 """
4796 Parses a macro invocation.
4797
4798 Returns three values:
4799 1. A list of macro arguments, where the zero'th is the macro name.
4800 2. The offset following the macro invocation, into sInvocation of
4801 this is on the same line or into the last line if it is on a
4802 different line.
4803 3. Number of additional lines the invocation spans (i.e. zero if
4804 it is all contained within sInvocation).
4805 """
4806 # First the name.
4807 offOpen = sInvocation.find('(', offStartInvocation);
4808 if offOpen <= offStartInvocation:
4809 self.raiseError("macro invocation open parenthesis not found");
4810 sName = sInvocation[offStartInvocation:offOpen].strip();
4811 if not self.oReMacroName.match(sName):
4812 self.raiseError("invalid macro name '%s'" % (sName,));
4813 asRet = [sName, ];
4814
4815 # Arguments.
4816 iLine = self.iLine;
4817 cDepth = 1;
4818 off = offOpen + 1;
4819 offStart = off;
4820 offCurLn = 0;
4821 chQuote = None;
4822 while cDepth > 0:
4823 if off >= len(sInvocation):
4824 if iLine >= len(self.asLines):
4825 self.error('macro invocation beyond end of file');
4826 return (asRet, off - offCurLn, iLine - self.iLine);
4827 offCurLn = off;
4828 sInvocation += self.asLines[iLine];
4829 iLine += 1;
4830 ch = sInvocation[off];
4831
4832 if chQuote:
4833 if ch == '\\' and off + 1 < len(sInvocation):
4834 off += 1;
4835 elif ch == chQuote:
4836 chQuote = None;
4837 elif ch in ('"', '\'',):
4838 chQuote = ch;
4839 elif ch in (',', ')',):
4840 if cDepth == 1:
4841 asRet.append(sInvocation[offStart:off].strip());
4842 offStart = off + 1;
4843 if ch == ')':
4844 cDepth -= 1;
4845 elif ch == '(':
4846 cDepth += 1;
4847 off += 1;
4848
4849 return (asRet, off - offCurLn, iLine - self.iLine);
4850
4851 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4852 """
4853 Returns (None, len(sCode), 0) if not found, otherwise the
4854 parseMacroInvocation() return value.
4855 """
4856 offHit = sCode.find(sMacro, offStart);
4857 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4858 return self.parseMacroInvocation(sCode, offHit);
4859 return (None, len(sCode), 0);
4860
4861 def findAndParseMacroInvocation(self, sCode, sMacro):
4862 """
4863 Returns None if not found, arguments as per parseMacroInvocation if found.
4864 """
4865 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4866
4867 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4868 """
4869 Returns same as findAndParseMacroInvocation.
4870 """
4871 for sMacro in asMacro:
4872 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4873 if asRet is not None:
4874 return asRet;
4875 return None;
4876
4877 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4878 sDisHints, sIemHints, asOperands):
4879 """
4880 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4881 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4882 """
4883 #
4884 # Some invocation checks.
4885 #
4886 if sUpper != sUpper.upper():
4887 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4888 if sLower != sLower.lower():
4889 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4890 if sUpper.lower() != sLower:
4891 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4892 if not self.oReMnemonic.match(sLower):
4893 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4894
4895 #
4896 # Check if sIemHints tells us to not consider this macro invocation.
4897 #
4898 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4899 return True;
4900
4901 # Apply to the last instruction only for now.
4902 if not self.aoCurInstrs:
4903 self.addInstruction();
4904 oInstr = self.aoCurInstrs[-1];
4905 if oInstr.iLineMnemonicMacro == -1:
4906 oInstr.iLineMnemonicMacro = self.iLine;
4907 else:
4908 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4909 % (sMacro, oInstr.iLineMnemonicMacro,));
4910
4911 # Mnemonic
4912 if oInstr.sMnemonic is None:
4913 oInstr.sMnemonic = sLower;
4914 elif oInstr.sMnemonic != sLower:
4915 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4916
4917 # Process operands.
4918 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4919 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4920 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4921 for iOperand, sType in enumerate(asOperands):
4922 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4923 if sWhere is None:
4924 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4925 if iOperand < len(oInstr.aoOperands): # error recovery.
4926 sWhere = oInstr.aoOperands[iOperand].sWhere;
4927 sType = oInstr.aoOperands[iOperand].sType;
4928 else:
4929 sWhere = 'reg';
4930 sType = 'Gb';
4931 if iOperand == len(oInstr.aoOperands):
4932 oInstr.aoOperands.append(Operand(sWhere, sType))
4933 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4934 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4935 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4936 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4937
4938 # Encoding.
4939 if sForm not in g_kdIemForms:
4940 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4941 else:
4942 if oInstr.sEncoding is None:
4943 oInstr.sEncoding = g_kdIemForms[sForm][0];
4944 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4945 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4946 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4947
4948 # Check the parameter locations for the encoding.
4949 if g_kdIemForms[sForm][1] is not None:
4950 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4951 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4952 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4953 else:
4954 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4955 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4956 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4957 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4958 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4959 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4960 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4961 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4962 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4963 or sForm.replace('VEX','').find('V') < 0) ):
4964 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4965 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4966 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4967 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4968 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4969 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4970 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4971 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4972 oInstr.aoOperands[iOperand].sWhere));
4973
4974
4975 # Check @opcodesub
4976 if oInstr.sSubOpcode \
4977 and g_kdIemForms[sForm][2] \
4978 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4979 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4980 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4981
4982 # Stats.
4983 if not self.oReStatsName.match(sStats):
4984 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4985 elif oInstr.sStats is None:
4986 oInstr.sStats = sStats;
4987 elif oInstr.sStats != sStats:
4988 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4989 % (sMacro, oInstr.sStats, sStats,));
4990
4991 # Process the hints (simply merge with @ophints w/o checking anything).
4992 for sHint in sDisHints.split('|'):
4993 sHint = sHint.strip();
4994 if sHint.startswith('DISOPTYPE_'):
4995 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4996 if sShortHint in g_kdHints:
4997 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4998 else:
4999 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
5000 elif sHint != '0':
5001 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
5002
5003 for sHint in sIemHints.split('|'):
5004 sHint = sHint.strip();
5005 if sHint.startswith('IEMOPHINT_'):
5006 sShortHint = sHint[len('IEMOPHINT_'):].lower();
5007 if sShortHint in g_kdHints:
5008 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
5009 else:
5010 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
5011 elif sHint != '0':
5012 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
5013
5014 _ = sAsm;
5015 return True;
5016
5017 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
5018 """
5019 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
5020 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
5021 """
5022 if not asOperands:
5023 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5024 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
5025 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
5026
5027 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
5028 """
5029 Process a IEM_MC_BEGIN macro invocation.
5030 """
5031 if self.fDebugMc:
5032 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
5033 #self.debug('%s<eos>' % (sCode,));
5034
5035 # Check preconditions.
5036 if not self.oCurFunction:
5037 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
5038 if self.oCurMcBlock:
5039 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
5040
5041 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5042 cchIndent = offBeginStatementInCodeStr;
5043 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5044 if offPrevNewline >= 0:
5045 cchIndent -= offPrevNewline + 1;
5046 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5047
5048 # Start a new block.
5049 # But don't add it to the list unless the context matches the host architecture.
5050 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5051 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5052 try:
5053 if ( not self.aoCppCondStack
5054 or not self.sHostArch
5055 or self.PreprocessorConditional.isInBlockForArch(self.aoCppCondStack, self.sHostArch, self.iLine)):
5056 g_aoMcBlocks.append(self.oCurMcBlock);
5057 self.cTotalMcBlocks += 1;
5058 except Exception as oXcpt:
5059 self.raiseError(oXcpt.args[0]);
5060
5061 self.iMcBlockInFunc += 1;
5062 return True;
5063
5064 @staticmethod
5065 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
5066 """
5067 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
5068 extracting a statement block from a string that's the result of macro
5069 expansion and therefore contains multiple "sub-lines" as it were.
5070
5071 Returns list of lines covering offBegin thru offEnd in sRawLine.
5072 """
5073
5074 off = sRawLine.find('\n', offEnd);
5075 if off > 0:
5076 sRawLine = sRawLine[:off + 1];
5077
5078 off = sRawLine.rfind('\n', 0, offBegin) + 1;
5079 sRawLine = sRawLine[off:];
5080 if not sRawLine.strip().startswith(sBeginStmt):
5081 sRawLine = sRawLine[offBegin - off:]
5082
5083 return [sLine + '\n' for sLine in sRawLine.split('\n')];
5084
5085 def workerIemMcEnd(self, offEndStatementInLine):
5086 """
5087 Process a IEM_MC_END macro invocation.
5088 """
5089 if self.fDebugMc:
5090 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
5091
5092 # Check preconditions.
5093 if not self.oCurMcBlock:
5094 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
5095
5096 #
5097 # HACK ALERT! For blocks originating from macro expansion the start and
5098 # end line will be the same, but the line has multiple
5099 # newlines inside it. So, we have to do some extra tricks
5100 # to get the lines out of there. We ASSUME macros aren't
5101 # messy, but keep IEM_MC_BEGIN/END on separate lines.
5102 #
5103 if self.iLine > self.oCurMcBlock.iBeginLine:
5104 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
5105 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
5106 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
5107
5108 # Hack alert! Detect mixed tail/head macros a la cmpxchg16b and split up the lines
5109 # so we can deal correctly with IEM_MC_END below and everything else.
5110 for sLine in asLines:
5111 cNewLines = sLine.count('\n');
5112 assert cNewLines > 0;
5113 if cNewLines > 1:
5114 asLines = self.extractLinesFromMacroExpansionLine(''.join(asLines),
5115 self.oCurMcBlock.offBeginLine,
5116 offEndStatementInLine
5117 + sum(len(s) for s in asLines)
5118 - len(asLines[-1]));
5119 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Partial;
5120 break;
5121 else:
5122 self.oCurMcBlock.iMacroExp = McBlock.kiMacroExp_Entire;
5123 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
5124 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
5125
5126 #
5127 # Strip anything following the IEM_MC_END(); statement in the final line,
5128 # so that we don't carry on any trailing 'break' after macro expansions
5129 # like for iemOp_movsb_Xb_Yb.
5130 #
5131 while asLines[-1].strip() == '':
5132 asLines.pop();
5133 sFinal = asLines[-1];
5134 offFinalEnd = sFinal.find('IEM_MC_END');
5135 offEndInFinal = offFinalEnd;
5136 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
5137 offFinalEnd += len('IEM_MC_END');
5138
5139 while sFinal[offFinalEnd].isspace():
5140 offFinalEnd += 1;
5141 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
5142 offFinalEnd += 1;
5143
5144 while sFinal[offFinalEnd].isspace():
5145 offFinalEnd += 1;
5146 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
5147 offFinalEnd += 1;
5148
5149 while sFinal[offFinalEnd].isspace():
5150 offFinalEnd += 1;
5151 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
5152 offFinalEnd += 1;
5153
5154 asLines[-1] = sFinal[: offFinalEnd];
5155
5156 #
5157 # Complete and discard the current block.
5158 #
5159 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
5160 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
5161 self.oCurMcBlock = None;
5162 return True;
5163
5164 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
5165 """
5166 Process a IEM_MC_DEFER_TO_CIMPL_[1-5]_RET macro invocation.
5167 """
5168 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
5169 if self.fDebugMc:
5170 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
5171 #self.debug('%s<eos>' % (sCode,));
5172
5173 # Check preconditions.
5174 if not self.oCurFunction:
5175 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
5176 if self.oCurMcBlock:
5177 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
5178
5179 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
5180 cchIndent = offBeginStatementInCodeStr;
5181 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
5182 if offPrevNewline >= 0:
5183 cchIndent -= offPrevNewline + 1;
5184 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
5185
5186 # Start a new block.
5187 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
5188 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
5189
5190 # Parse the statment.
5191 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
5192 if asArgs is None:
5193 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
5194 if len(asArgs) != cParams + 4:
5195 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s! (%s)'
5196 % (sStmt, len(asArgs), cParams + 4, asArgs));
5197
5198 oMcBlock.aoStmts = [ McBlock.parseMcDeferToCImpl(oMcBlock, asArgs[0], asArgs[1:]), ];
5199
5200 # These MCs are not typically part of macro expansions, but let's get
5201 # it out of the way immediately if it's the case.
5202 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
5203 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
5204 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
5205 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
5206 asLines[-1] = asLines[-1][:offAfter + 1];
5207 else:
5208 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
5209 offAfter, sStmt);
5210 assert asLines[-1].find(';') >= 0;
5211 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
5212
5213 assert asLines[0].find(sStmt) >= 0;
5214 #if not asLines[0].strip().startswith(sStmt):
5215 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
5216
5217 # Advance to the line with the closing ')'.
5218 self.iLine += cLines;
5219
5220 # Complete the block.
5221 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
5222
5223 g_aoMcBlocks.append(oMcBlock);
5224 self.cTotalMcBlocks += 1;
5225 self.iMcBlockInFunc += 1;
5226
5227 return True;
5228
5229 def workerStartFunction(self, asArgs):
5230 """
5231 Deals with the start of a decoder function.
5232
5233 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
5234 macros, so we get a argument list for these where the 0th argument is the
5235 macro name.
5236 """
5237 # Complete any existing function.
5238 if self.oCurFunction:
5239 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
5240
5241 # Create the new function.
5242 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
5243 return True;
5244
5245 def checkCodeForMacro(self, sCode, offLine):
5246 """
5247 Checks code for relevant macro invocation.
5248 """
5249
5250 #
5251 # Scan macro invocations.
5252 #
5253 if sCode.find('(') > 0:
5254 # Look for instruction decoder function definitions. ASSUME single line.
5255 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5256 [ 'FNIEMOP_DEF',
5257 'FNIEMOPRM_DEF',
5258 'FNIEMOP_STUB',
5259 'FNIEMOP_STUB_1',
5260 'FNIEMOP_UD_STUB',
5261 'FNIEMOP_UD_STUB_1' ]);
5262 if asArgs is not None:
5263 self.workerStartFunction(asArgs);
5264 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
5265
5266 if not self.aoCurInstrs:
5267 self.addInstruction();
5268 for oInstr in self.aoCurInstrs:
5269 if oInstr.iLineFnIemOpMacro == -1:
5270 oInstr.iLineFnIemOpMacro = self.iLine;
5271 else:
5272 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
5273 self.setInstrunctionAttrib('sFunction', asArgs[1]);
5274 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
5275 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
5276 if asArgs[0].find('STUB') > 0:
5277 self.doneInstructions(fEndOfFunction = True);
5278 return True;
5279
5280 # Check for worker function definitions, so we can get a context for MC blocks.
5281 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5282 [ 'FNIEMOP_DEF_1',
5283 'FNIEMOP_DEF_2', ]);
5284 if asArgs is not None:
5285 self.workerStartFunction(asArgs);
5286 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
5287 return True;
5288
5289 # IEMOP_HLP_DONE_VEX_DECODING_*
5290 asArgs = self.findAndParseFirstMacroInvocation(sCode,
5291 [ 'IEMOP_HLP_DONE_VEX_DECODING',
5292 'IEMOP_HLP_DONE_VEX_DECODING_L0',
5293 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
5294 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
5295 ]);
5296 if asArgs is not None:
5297 sMacro = asArgs[0];
5298 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
5299 for oInstr in self.aoCurInstrs:
5300 if 'vex_l_zero' not in oInstr.dHints:
5301 if oInstr.iLineMnemonicMacro >= 0:
5302 self.errorOnLine(oInstr.iLineMnemonicMacro,
5303 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
5304 oInstr.dHints['vex_l_zero'] = True;
5305
5306 #
5307 # IEMOP_MNEMONIC*
5308 #
5309 if sCode.find('IEMOP_MNEMONIC') >= 0:
5310 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
5311 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
5312 if asArgs is not None:
5313 if len(self.aoCurInstrs) == 1:
5314 oInstr = self.aoCurInstrs[0];
5315 if oInstr.sStats is None:
5316 oInstr.sStats = asArgs[1];
5317 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
5318
5319 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5320 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
5321 if asArgs is not None:
5322 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
5323 asArgs[7], []);
5324 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5325 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
5326 if asArgs is not None:
5327 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
5328 asArgs[8], [asArgs[6],]);
5329 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5330 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
5331 if asArgs is not None:
5332 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
5333 asArgs[9], [asArgs[6], asArgs[7]]);
5334 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
5335 # a_fIemHints)
5336 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
5337 if asArgs is not None:
5338 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
5339 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
5340 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
5341 # a_fIemHints)
5342 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
5343 if asArgs is not None:
5344 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
5345 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
5346
5347 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
5348 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
5349 if asArgs is not None:
5350 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
5351 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
5352 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
5353 if asArgs is not None:
5354 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
5355 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
5356 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
5357 if asArgs is not None:
5358 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
5359 [asArgs[4], asArgs[5],]);
5360 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
5361 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
5362 if asArgs is not None:
5363 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
5364 [asArgs[4], asArgs[5], asArgs[6],]);
5365 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
5366 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
5367 if asArgs is not None:
5368 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
5369 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
5370
5371 #
5372 # IEM_MC_BEGIN + IEM_MC_END.
5373 # We must support multiple instances per code snippet.
5374 #
5375 offCode = sCode.find('IEM_MC_');
5376 if offCode >= 0:
5377 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
5378 if oMatch.group(1) == 'END':
5379 self.workerIemMcEnd(offLine + oMatch.start());
5380 elif oMatch.group(1) == 'BEGIN':
5381 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
5382 else:
5383 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
5384 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
5385 return True;
5386
5387 return False;
5388
5389 def workerPreprocessorRecreateMacroRegex(self):
5390 """
5391 Recreates self.oReMacros when self.dMacros changes.
5392 """
5393 if self.dMacros:
5394 sRegex = '';
5395 for sName, oMacro in self.dMacros.items():
5396 if sRegex:
5397 sRegex += '|' + sName;
5398 else:
5399 sRegex = '\\b(' + sName;
5400 if oMacro.asArgs is not None:
5401 sRegex += '\s*\(';
5402 else:
5403 sRegex += '\\b';
5404 sRegex += ')';
5405 self.oReMacros = re.compile(sRegex);
5406 else:
5407 self.oReMacros = None;
5408 return True;
5409
5410 def workerPreprocessorDefine(self, sRest):
5411 """
5412 Handles a macro #define, the sRest is what follows after the directive word.
5413 """
5414 assert sRest[-1] == '\n';
5415
5416 #
5417 # If using line continutation, just concat all the lines together,
5418 # preserving the newline character but not the escaping.
5419 #
5420 iLineStart = self.iLine;
5421 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5422 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
5423 self.iLine += 1;
5424 #self.debug('workerPreprocessorDefine: sRest=%s<EOS>' % (sRest,));
5425
5426 #
5427 # Use regex to split out the name, argument list and body.
5428 # If this fails, we assume it's a simple macro.
5429 #
5430 oMatch = self.oReHashDefine2.match(sRest);
5431 if oMatch:
5432 sAllArgs = oMatch.group(2).strip();
5433 asArgs = [sParam.strip() for sParam in sAllArgs.split(',')] if sAllArgs else None;
5434 sBody = oMatch.group(3);
5435 else:
5436 oMatch = self.oReHashDefine3.match(sRest);
5437 if not oMatch:
5438 self.debug('workerPreprocessorDefine: wtf? sRest=%s' % (sRest,));
5439 return self.error('bogus macro definition: %s' % (sRest,));
5440 asArgs = None;
5441 sBody = oMatch.group(2);
5442 sName = oMatch.group(1);
5443 assert sName == sName.strip();
5444 #self.debug('workerPreprocessorDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
5445
5446 #
5447 # Is this of any interest to us? We do NOT support MC blocks wihtin
5448 # nested macro expansion, just to avoid lots of extra work.
5449 #
5450 # There is only limited support for macros expanding to partial MC blocks.
5451 #
5452 # Note! IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX and other macros someone making
5453 # use of IEMOP_RAISE_INVALID_LOCK_PREFIX_RET() will be ignored here and
5454 # dealt with by overriding IEMOP_RAISE_INVALID_LOCK_PREFIX_RET and its
5455 # siblings in the recompiler. This is a lot simpler than nested macro
5456 # expansion and lots of heuristics for locating all the relevant macros.
5457 # Also, this way we don't produce lots of unnecessary threaded functions.
5458 #
5459 if sBody.find("IEM_MC_BEGIN") < 0 and sBody.find("IEM_MC_END") < 0:
5460 #self.debug('workerPreprocessorDefine: irrelevant (%s: %s)' % (sName, sBody));
5461 return True;
5462
5463 #
5464 # Add the macro.
5465 #
5466 if self.fDebugPreproc:
5467 self.debug('#define %s on line %u' % (sName, self.iLine,));
5468 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5469 return self.workerPreprocessorRecreateMacroRegex();
5470
5471 def workerPreprocessorUndef(self, sRest):
5472 """
5473 Handles a macro #undef, the sRest is what follows after the directive word.
5474 """
5475 # Quick comment strip and isolate the name.
5476 offSlash = sRest.find('/');
5477 if offSlash > 0:
5478 sRest = sRest[:offSlash];
5479 sName = sRest.strip();
5480
5481 # Remove the macro if we're clocking it.
5482 if sName in self.dMacros:
5483 if self.fDebugPreproc:
5484 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5485 del self.dMacros[sName];
5486 return self.workerPreprocessorRecreateMacroRegex();
5487
5488 return True;
5489
5490 def workerPreprocessorIfOrElif(self, sDirective, sRest):
5491 """
5492 Handles an #if, #ifdef, #ifndef or #elif directive.
5493 """
5494 #
5495 # Sanity check #elif.
5496 #
5497 if sDirective == 'elif':
5498 if len(self.aoCppCondStack) == 0:
5499 self.raiseError('#elif without #if');
5500 if self.aoCppCondStack[-1].fInElse:
5501 self.raiseError('#elif after #else');
5502
5503 #
5504 # If using line continutation, just concat all the lines together,
5505 # stripping both the newline and escape characters.
5506 #
5507 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
5508 sRest = sRest[0:-2].rstrip() + ' ' + self.asLines[self.iLine];
5509 self.iLine += 1;
5510
5511 # Strip it of all comments and leading and trailing blanks.
5512 sRest = self.stripComments(sRest).strip();
5513
5514 #
5515 # Stash it.
5516 #
5517 try:
5518 oPreprocCond = self.PreprocessorConditional(sDirective, sRest);
5519 except Exception as oXcpt:
5520 self.raiseError(oXcpt.args[0]);
5521
5522 if sDirective == 'elif':
5523 self.aoCppCondStack[-1].aoElif.append(oPreprocCond);
5524 else:
5525 self.aoCppCondStack.append(oPreprocCond);
5526
5527 return True;
5528
5529 def workerPreprocessorElse(self):
5530 """
5531 Handles an #else directive.
5532 """
5533 if len(self.aoCppCondStack) == 0:
5534 self.raiseError('#else without #if');
5535 if self.aoCppCondStack[-1].fInElse:
5536 self.raiseError('Another #else after #else');
5537
5538 self.aoCppCondStack[-1].fInElse = True;
5539 return True;
5540
5541 def workerPreprocessorEndif(self):
5542 """
5543 Handles an #endif directive.
5544 """
5545 if len(self.aoCppCondStack) == 0:
5546 self.raiseError('#endif without #if');
5547
5548 self.aoCppCondStack.pop();
5549 return True;
5550
5551 def checkPreprocessorDirective(self, sLine):
5552 """
5553 Handles a preprocessor directive.
5554 """
5555 # Skip past the preprocessor hash.
5556 off = sLine.find('#');
5557 assert off >= 0;
5558 off += 1;
5559 while off < len(sLine) and sLine[off].isspace():
5560 off += 1;
5561
5562 # Extract the directive.
5563 offDirective = off;
5564 while off < len(sLine) and not sLine[off].isspace():
5565 off += 1;
5566 sDirective = sLine[offDirective:off];
5567 if self.fDebugPreproc:
5568 self.debug('line %d: #%s...' % (self.iLine, sDirective));
5569
5570 # Skip spaces following it to where the arguments/whatever starts.
5571 while off + 1 < len(sLine) and sLine[off + 1].isspace():
5572 off += 1;
5573 sTail = sLine[off:];
5574
5575 # Handle the directive.
5576 if sDirective == 'define':
5577 return self.workerPreprocessorDefine(sTail);
5578 if sDirective == 'undef':
5579 return self.workerPreprocessorUndef(sTail);
5580 if sDirective in ('if', 'ifdef', 'ifndef', 'elif',):
5581 return self.workerPreprocessorIfOrElif(sDirective, sTail);
5582 if sDirective == 'else':
5583 return self.workerPreprocessorElse();
5584 if sDirective == 'endif':
5585 return self.workerPreprocessorEndif();
5586
5587 if self.fDebugPreproc:
5588 self.debug('line %d: Unknown preprocessor directive: %s' % (self.iLine, sDirective));
5589 return False;
5590
5591 def expandMacros(self, sLine, oMatch):
5592 """
5593 Expands macros we know about in the given line.
5594 Currently we ASSUME there is only one and that is what oMatch matched.
5595 """
5596 #
5597 # Get our bearings.
5598 #
5599 offMatch = oMatch.start();
5600 sName = oMatch.group(1);
5601 assert sName == sLine[oMatch.start() : oMatch.end()];
5602 fWithArgs = sName.endswith('(');
5603 if fWithArgs:
5604 sName = sName[:-1].strip();
5605 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5606
5607 #
5608 # Deal with simple macro invocations w/o parameters.
5609 #
5610 if not fWithArgs:
5611 if self.fDebugPreproc:
5612 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5613 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5614
5615 #
5616 # Complicated macro with parameters.
5617 # Start by extracting the parameters. ASSUMES they are all on the same line!
5618 #
5619 cLevel = 1;
5620 offCur = oMatch.end();
5621 offCurArg = offCur;
5622 asArgs = [];
5623 while True:
5624 if offCur >= len(sLine):
5625 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5626 ch = sLine[offCur];
5627 if ch == '(':
5628 cLevel += 1;
5629 elif ch == ')':
5630 cLevel -= 1;
5631 if cLevel == 0:
5632 asArgs.append(sLine[offCurArg:offCur].strip());
5633 break;
5634 elif ch == ',' and cLevel == 1:
5635 asArgs.append(sLine[offCurArg:offCur].strip());
5636 offCurArg = offCur + 1;
5637 offCur += 1;
5638 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5639 asArgs = [];
5640 if len(oMacro.asArgs) != len(asArgs):
5641 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5642
5643 #
5644 # Do the expanding.
5645 #
5646 if self.fDebugPreproc:
5647 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5648 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5649
5650 def parse(self):
5651 """
5652 Parses the given file.
5653
5654 Returns number or errors.
5655 Raises exception on fatal trouble.
5656 """
5657 #self.debug('Parsing %s' % (self.sSrcFile,));
5658
5659 #
5660 # Loop thru the lines.
5661 #
5662 # Please mind that self.iLine may be updated by checkCodeForMacro and
5663 # other worker methods.
5664 #
5665 while self.iLine < len(self.asLines):
5666 sLine = self.asLines[self.iLine];
5667 self.iLine += 1;
5668 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5669
5670 # Expand macros we know about if we're currently in code.
5671 if self.iState == self.kiCode and self.oReMacros:
5672 oMatch = self.oReMacros.search(sLine);
5673 if oMatch:
5674 sLine = self.expandMacros(sLine, oMatch);
5675 if self.fDebugPreproc:
5676 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5677 self.asLines[self.iLine - 1] = sLine;
5678
5679 # Check for preprocessor directives before comments and other stuff.
5680 # ASSUMES preprocessor directives doesn't end with multiline comments.
5681 if self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5682 if self.fDebugPreproc:
5683 self.debug('line %d: preproc' % (self.iLine,));
5684 self.checkPreprocessorDirective(sLine);
5685 else:
5686 # Look for comments.
5687 offSlash = sLine.find('/');
5688 if offSlash >= 0:
5689 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5690 offLine = 0;
5691 while offLine < len(sLine):
5692 if self.iState == self.kiCode:
5693 # Look for substantial multiline comment so we pass the following MC as a whole line:
5694 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5695 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5696 offHit = sLine.find('/*', offLine);
5697 while offHit >= 0:
5698 offEnd = sLine.find('*/', offHit + 2);
5699 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5700 break;
5701 offHit = sLine.find('/*', offEnd);
5702
5703 if offHit >= 0:
5704 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5705 self.sComment = '';
5706 self.iCommentLine = self.iLine;
5707 self.iState = self.kiCommentMulti;
5708 offLine = offHit + 2;
5709 else:
5710 self.checkCodeForMacro(sLine[offLine:], offLine);
5711 offLine = len(sLine);
5712
5713 elif self.iState == self.kiCommentMulti:
5714 offHit = sLine.find('*/', offLine);
5715 if offHit >= 0:
5716 self.sComment += sLine[offLine:offHit];
5717 self.iState = self.kiCode;
5718 offLine = offHit + 2;
5719 self.parseComment();
5720 else:
5721 self.sComment += sLine[offLine:];
5722 offLine = len(sLine);
5723 else:
5724 assert False;
5725 # C++ line comment.
5726 elif offSlash > 0:
5727 self.checkCodeForMacro(sLine[:offSlash], 0);
5728
5729 # No slash, but append the line if in multi-line comment.
5730 elif self.iState == self.kiCommentMulti:
5731 #self.debug('line %d: multi' % (self.iLine,));
5732 self.sComment += sLine;
5733
5734 # No slash, but check code line for relevant macro.
5735 elif ( self.iState == self.kiCode
5736 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5737 #self.debug('line %d: macro' % (self.iLine,));
5738 self.checkCodeForMacro(sLine, 0);
5739
5740 # If the line is a '}' in the first position, complete the instructions.
5741 elif self.iState == self.kiCode and sLine[0] == '}':
5742 #self.debug('line %d: }' % (self.iLine,));
5743 self.doneInstructions(fEndOfFunction = True);
5744
5745 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5746 # so we can check/add @oppfx info from it.
5747 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5748 self.parseFunctionTable(sLine);
5749
5750 self.doneInstructions(fEndOfFunction = True);
5751 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5752 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5753 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5754 return self.printErrors();
5755
5756## The parsed content of IEMAllInstCommonBodyMacros.h.
5757g_oParsedCommonBodyMacros = None # type: SimpleParser
5758
5759def __parseFileByName(sSrcFile, sDefaultMap, sHostArch):
5760 """
5761 Parses one source file for instruction specfications.
5762 """
5763 #
5764 # Read sSrcFile into a line array.
5765 #
5766 try:
5767 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5768 except Exception as oXcpt:
5769 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5770 try:
5771 asLines = oFile.readlines();
5772 except Exception as oXcpt:
5773 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5774 finally:
5775 oFile.close();
5776
5777 #
5778 # On the first call, we parse IEMAllInstCommonBodyMacros.h so we
5779 # can use the macros from it when processing the other files.
5780 #
5781 global g_oParsedCommonBodyMacros;
5782 if g_oParsedCommonBodyMacros is None:
5783 # Locate the file.
5784 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstCommonBodyMacros.h');
5785 if not os.path.isfile(sCommonBodyMacros):
5786 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstCommonBodyMacros.h');
5787
5788 # Read it.
5789 try:
5790 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5791 asIncFiles = oIncFile.readlines();
5792 except Exception as oXcpt:
5793 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5794
5795 # Parse it.
5796 try:
5797 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one', sHostArch);
5798 if oParser.parse() != 0:
5799 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5800 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5801 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5802 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5803 oParser.cTotalMcBlocks,
5804 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5805 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5806 except ParserException as oXcpt:
5807 print(str(oXcpt), file = sys.stderr);
5808 raise;
5809 g_oParsedCommonBodyMacros = oParser;
5810
5811 #
5812 # Do the parsing.
5813 #
5814 try:
5815 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, sHostArch, g_oParsedCommonBodyMacros);
5816 return (oParser.parse(), oParser) ;
5817 except ParserException as oXcpt:
5818 print(str(oXcpt), file = sys.stderr);
5819 raise;
5820
5821
5822def __doTestCopying():
5823 """
5824 Executes the asCopyTests instructions.
5825 """
5826 asErrors = [];
5827 for oDstInstr in g_aoAllInstructions:
5828 if oDstInstr.asCopyTests:
5829 for sSrcInstr in oDstInstr.asCopyTests:
5830 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5831 if oSrcInstr:
5832 aoSrcInstrs = [oSrcInstr,];
5833 else:
5834 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5835 if aoSrcInstrs:
5836 for oSrcInstr in aoSrcInstrs:
5837 if oSrcInstr != oDstInstr:
5838 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5839 else:
5840 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5841 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5842 else:
5843 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5844 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5845
5846 if asErrors:
5847 sys.stderr.write(u''.join(asErrors));
5848 return len(asErrors);
5849
5850
5851def __applyOnlyTest():
5852 """
5853 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5854 all other instructions so that only these get tested.
5855 """
5856 if g_aoOnlyTestInstructions:
5857 for oInstr in g_aoAllInstructions:
5858 if oInstr.aoTests:
5859 if oInstr not in g_aoOnlyTestInstructions:
5860 oInstr.aoTests = [];
5861 return 0;
5862
5863## List of all main instruction files, their default maps and file sets (-1 means included it all sets).
5864g_aaoAllInstrFilesAndDefaultMapAndSet = (
5865 ( 'IEMAllInstCommon.cpp.h', 'one', -1, ),
5866 ( 'IEMAllInstOneByte.cpp.h', 'one', 1, ),
5867 ( 'IEMAllInst3DNow.cpp.h', '3dnow', 2, ),
5868 ( 'IEMAllInstTwoByte0f.cpp.h', 'two0f', 2, ),
5869 ( 'IEMAllInstThree0f38.cpp.h', 'three0f38', 3, ),
5870 ( 'IEMAllInstThree0f3a.cpp.h', 'three0f3a', 3, ),
5871 ( 'IEMAllInstVexMap1.cpp.h', 'vexmap1', 4, ),
5872 ( 'IEMAllInstVexMap2.cpp.h', 'vexmap2', 4, ),
5873 ( 'IEMAllInstVexMap3.cpp.h', 'vexmap3', 4, ),
5874);
5875
5876def __parseFilesWorker(asFilesAndDefaultMap, sHostArch):
5877 """
5878 Parses all the IEMAllInstruction*.cpp.h files.
5879
5880 Returns a list of the parsers on success.
5881 Raises exception on failure.
5882 """
5883 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5884 cErrors = 0;
5885 aoParsers = [];
5886 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5887 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5888 sFilename = os.path.join(sSrcDir, sFilename);
5889 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap, sHostArch);
5890 cErrors += cThisErrors;
5891 aoParsers.append(oParser);
5892 cErrors += __doTestCopying();
5893 cErrors += __applyOnlyTest();
5894
5895 # Total stub stats:
5896 cTotalStubs = 0;
5897 for oInstr in g_aoAllInstructions:
5898 cTotalStubs += oInstr.fStub;
5899 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5900 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5901 file = sys.stderr);
5902
5903 if cErrors != 0:
5904 raise Exception('%d parse errors' % (cErrors,));
5905 return aoParsers;
5906
5907
5908def parseFiles(asFiles, sHostArch = None):
5909 """
5910 Parses a selection of IEMAllInstruction*.cpp.h files.
5911
5912 Returns a list of the parsers on success.
5913 Raises exception on failure.
5914 """
5915 # Look up default maps for the files and call __parseFilesWorker to do the job.
5916 asFilesAndDefaultMap = [];
5917 for sFilename in asFiles:
5918 sName = os.path.split(sFilename)[1].lower();
5919 sMap = None;
5920 for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet:
5921 if aoInfo[0].lower() == sName:
5922 sMap = aoInfo[1];
5923 break;
5924 if not sMap:
5925 raise Exception('Unable to classify file: %s' % (sFilename,));
5926 asFilesAndDefaultMap.append((sFilename, sMap));
5927
5928 return __parseFilesWorker(asFilesAndDefaultMap, sHostArch);
5929
5930
5931def parseAll(sHostArch = None):
5932 """
5933 Parses all the IEMAllInstruction*.cpp.h files.
5934
5935 Returns a list of the parsers on success.
5936 Raises exception on failure.
5937 """
5938 return __parseFilesWorker([aoInfo[0:2] for aoInfo in g_aaoAllInstrFilesAndDefaultMapAndSet], sHostArch);
5939
5940
5941#
5942# Generators (may perhaps move later).
5943#
5944def __formatDisassemblerTableEntry(oInstr):
5945 """
5946 """
5947 sMacro = 'OP';
5948 cMaxOperands = 3;
5949 if len(oInstr.aoOperands) > 3:
5950 sMacro = 'OPVEX'
5951 cMaxOperands = 4;
5952 assert len(oInstr.aoOperands) <= cMaxOperands;
5953
5954 #
5955 # Format string.
5956 #
5957 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5958 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5959 sTmp += ' ' if iOperand == 0 else ',';
5960 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5961 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5962 else:
5963 sTmp += g_kdOpTypes[oOperand.sType][2];
5964 sTmp += '",';
5965 asColumns = [ sTmp, ];
5966
5967 #
5968 # Decoders.
5969 #
5970 iStart = len(asColumns);
5971 if oInstr.sEncoding is None:
5972 pass;
5973 elif oInstr.sEncoding == 'ModR/M':
5974 # ASSUME the first operand is using the ModR/M encoding
5975 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5976 asColumns.append('IDX_ParseModRM,');
5977 elif oInstr.sEncoding in [ 'prefix', ]:
5978 for oOperand in oInstr.aoOperands:
5979 asColumns.append('0,');
5980 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5981 pass;
5982 elif oInstr.sEncoding == 'VEX.ModR/M':
5983 asColumns.append('IDX_ParseModRM,');
5984 elif oInstr.sEncoding == 'vex2':
5985 asColumns.append('IDX_ParseVex2b,')
5986 elif oInstr.sEncoding == 'vex3':
5987 asColumns.append('IDX_ParseVex3b,')
5988 elif oInstr.sEncoding in g_dInstructionMaps:
5989 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5990 else:
5991 ## @todo
5992 #IDX_ParseTwoByteEsc,
5993 #IDX_ParseGrp1,
5994 #IDX_ParseShiftGrp2,
5995 #IDX_ParseGrp3,
5996 #IDX_ParseGrp4,
5997 #IDX_ParseGrp5,
5998 #IDX_Parse3DNow,
5999 #IDX_ParseGrp6,
6000 #IDX_ParseGrp7,
6001 #IDX_ParseGrp8,
6002 #IDX_ParseGrp9,
6003 #IDX_ParseGrp10,
6004 #IDX_ParseGrp12,
6005 #IDX_ParseGrp13,
6006 #IDX_ParseGrp14,
6007 #IDX_ParseGrp15,
6008 #IDX_ParseGrp16,
6009 #IDX_ParseThreeByteEsc4,
6010 #IDX_ParseThreeByteEsc5,
6011 #IDX_ParseModFence,
6012 #IDX_ParseEscFP,
6013 #IDX_ParseNopPause,
6014 #IDX_ParseInvOpModRM,
6015 assert False, str(oInstr);
6016
6017 # Check for immediates and stuff in the remaining operands.
6018 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
6019 sIdx = g_kdOpTypes[oOperand.sType][0];
6020 #if sIdx != 'IDX_UseModRM':
6021 asColumns.append(sIdx + ',');
6022 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
6023
6024 #
6025 # Opcode and operands.
6026 #
6027 assert oInstr.sDisEnum, str(oInstr);
6028 asColumns.append(oInstr.sDisEnum + ',');
6029 iStart = len(asColumns)
6030 for oOperand in oInstr.aoOperands:
6031 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
6032 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
6033
6034 #
6035 # Flags.
6036 #
6037 sTmp = '';
6038 for sHint in sorted(oInstr.dHints.keys()):
6039 sDefine = g_kdHints[sHint];
6040 if sDefine.startswith('DISOPTYPE_'):
6041 if sTmp:
6042 sTmp += ' | ' + sDefine;
6043 else:
6044 sTmp += sDefine;
6045 if sTmp:
6046 sTmp += '),';
6047 else:
6048 sTmp += '0),';
6049 asColumns.append(sTmp);
6050
6051 #
6052 # Format the columns into a line.
6053 #
6054 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
6055 sLine = '';
6056 for i, s in enumerate(asColumns):
6057 if len(sLine) < aoffColumns[i]:
6058 sLine += ' ' * (aoffColumns[i] - len(sLine));
6059 else:
6060 sLine += ' ';
6061 sLine += s;
6062
6063 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
6064 # DISOPTYPE_HARMLESS),
6065 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
6066 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
6067 return sLine;
6068
6069def __checkIfShortTable(aoTableOrdered, oMap):
6070 """
6071 Returns (iInstr, cInstructions, fShortTable)
6072 """
6073
6074 # Determin how much we can trim off.
6075 cInstructions = len(aoTableOrdered);
6076 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
6077 cInstructions -= 1;
6078
6079 iInstr = 0;
6080 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
6081 iInstr += 1;
6082
6083 # If we can save more than 30%, we go for the short table version.
6084 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
6085 return (iInstr, cInstructions, True);
6086 _ = oMap; # Use this for overriding.
6087
6088 # Output the full table.
6089 return (0, len(aoTableOrdered), False);
6090
6091def generateDisassemblerTables(oDstFile = sys.stdout):
6092 """
6093 Generates disassembler tables.
6094
6095 Returns exit code.
6096 """
6097
6098 #
6099 # Parse all.
6100 #
6101 try:
6102 parseAll();
6103 except Exception as oXcpt:
6104 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
6105 traceback.print_exc(file = sys.stderr);
6106 return 1;
6107
6108
6109 #
6110 # The disassembler uses a slightly different table layout to save space,
6111 # since several of the prefix varia
6112 #
6113 aoDisasmMaps = [];
6114 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
6115 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
6116 if oMap.sSelector != 'byte+pfx':
6117 aoDisasmMaps.append(oMap);
6118 else:
6119 # Split the map by prefix.
6120 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
6121 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
6122 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
6123 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
6124
6125 #
6126 # Dump each map.
6127 #
6128 asHeaderLines = [];
6129 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
6130 for oMap in aoDisasmMaps:
6131 sName = oMap.sName;
6132
6133 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
6134
6135 #
6136 # Get the instructions for the map and see if we can do a short version or not.
6137 #
6138 aoTableOrder = oMap.getInstructionsInTableOrder();
6139 cEntriesPerByte = oMap.getEntriesPerByte();
6140 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
6141
6142 #
6143 # Output the table start.
6144 # Note! Short tables are static and only accessible via the map range record.
6145 #
6146 asLines = [];
6147 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
6148 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
6149 if fShortTable:
6150 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
6151 else:
6152 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6153 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6154 asLines.append('{');
6155
6156 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
6157 asLines.append(' /* %#04x: */' % (iInstrStart,));
6158
6159 #
6160 # Output the instructions.
6161 #
6162 iInstr = iInstrStart;
6163 while iInstr < iInstrEnd:
6164 oInstr = aoTableOrder[iInstr];
6165 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
6166 if iInstr != iInstrStart:
6167 asLines.append('');
6168 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
6169
6170 if oInstr is None:
6171 # Invalid. Optimize blocks of invalid instructions.
6172 cInvalidInstrs = 1;
6173 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
6174 cInvalidInstrs += 1;
6175 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
6176 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
6177 iInstr += 0x10 * cEntriesPerByte - 1;
6178 elif cEntriesPerByte > 1:
6179 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
6180 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
6181 iInstr += 3;
6182 else:
6183 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
6184 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
6185 else:
6186 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
6187 elif isinstance(oInstr, list):
6188 if len(oInstr) != 0:
6189 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
6190 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
6191 else:
6192 asLines.append(__formatDisassemblerTableEntry(oInstr));
6193 else:
6194 asLines.append(__formatDisassemblerTableEntry(oInstr));
6195
6196 iInstr += 1;
6197
6198 if iInstrStart >= iInstrEnd:
6199 asLines.append(' /* dummy */ INVALID_OPCODE');
6200
6201 asLines.append('};');
6202 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
6203
6204 #
6205 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
6206 #
6207 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
6208 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
6209 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
6210
6211 #
6212 # Write out the lines.
6213 #
6214 oDstFile.write('\n'.join(asLines));
6215 oDstFile.write('\n');
6216 oDstFile.write('\n');
6217 #break; #for now
6218 return 0;
6219
6220if __name__ == '__main__':
6221 sys.exit(generateDisassemblerTables());
6222
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette