Commit e2195274 authored by Jan Beulich's avatar Jan Beulich Committed by Jan Beulich

x86: fold various non-memory operand AVX512VL templates

There's little point carrying up to three templates per insn flavor
when the sole difference is operand size and the dependency on AVX512VL
being enabled. Instead the need for AVX512VL can be derived from an
operand allowing for ZMMword as well as one or both or XMMword and
YMMword (irrespective of whether this is a register or memory operand).
Without further abstraction to deal with the different Disp8MemShift
values between the templates, only a limited set (mostly ones only
allowing for non-memory operands) can be folded, which is being done
here.

Also drop IgnoreSize wherever possible from anything that's being
touched anyway.
parent 7a69eac3
2018-04-26 Jan Beulich <jbeulich@suse.com>
* config/tc-i386.c (check_VecOperands): Add AVX512VL check. Set
.baseindex.
(match_template): Don't set suffix_check when Intel syntax and
broadcast. Make check_register a per-operand bitmap.
2018-04-26 Jan Beulich <jbeulich@suse.com>
* config/tc-i386.c (optimize_encoding): Check for zeroing
......
......@@ -4966,6 +4966,30 @@ static int
check_VecOperands (const insn_template *t)
{
unsigned int op;
i386_cpu_flags cpu;
static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
/* Templates allowing for ZMMword as well as YMMword and/or XMMword for
any one operand are implicity requiring AVX512VL support if the actual
operand size is YMMword or XMMword. Since this function runs after
template matching, there's no need to check for YMMword/XMMword in
the template. */
cpu = cpu_flags_and (t->cpu_flags, avx512);
if (!cpu_flags_all_zero (&cpu)
&& !t->cpu_flags.bitfield.cpuavx512vl
&& !cpu_arch_flags.bitfield.cpuavx512vl)
{
for (op = 0; op < t->operands; ++op)
{
if (t->operand_types[op].bitfield.zmmword
&& (i.types[op].bitfield.ymmword
|| i.types[op].bitfield.xmmword))
{
i.error = unsupported;
return 1;
}
}
}
/* Without VSIB byte, we can't have a vector register for index. */
if (!t->opcode_modifier.vecsib
......@@ -5095,6 +5119,7 @@ check_VecOperands (const insn_template *t)
{
unsigned int j;
type.bitfield.baseindex = 1;
for (j = 0; j < i.operands; ++j)
{
if (j != op
......@@ -5263,7 +5288,9 @@ match_template (char mnem_suffix)
addr_prefix_disp = -1;
memset (&suffix_check, 0, sizeof (suffix_check));
if (i.suffix == BYTE_MNEM_SUFFIX)
if (intel_syntax && i.broadcast)
/* nothing */;
else if (i.suffix == BYTE_MNEM_SUFFIX)
suffix_check.no_bsuf = 1;
else if (i.suffix == WORD_MNEM_SUFFIX)
suffix_check.no_wsuf = 1;
......@@ -5431,7 +5458,15 @@ match_template (char mnem_suffix)
continue;
/* We check register size if needed. */
check_register = t->opcode_modifier.checkregsize;
if (t->opcode_modifier.checkregsize)
{
check_register = (1 << t->operands) - 1;
if (i.broadcast)
check_register &= ~(1 << i.broadcast->operand);
}
else
check_register = 0;
overlap0 = operand_type_and (i.types[0], operand_types[0]);
switch (t->operands)
{
......@@ -5475,7 +5510,7 @@ match_template (char mnem_suffix)
overlap1 = operand_type_and (i.types[1], operand_types[1]);
if (!operand_type_match (overlap0, i.types[0])
|| !operand_type_match (overlap1, i.types[1])
|| (check_register
|| ((check_register & 3) == 3
&& !operand_type_register_match (i.types[0],
operand_types[0],
i.types[1],
......@@ -5542,30 +5577,32 @@ check_reverse:
/* Fall through. */
case 4:
if (!operand_type_match (overlap3, i.types[3])
|| (check_register
&& (!operand_type_register_match (i.types[1],
|| ((check_register & 0xa) == 0xa
&& !operand_type_register_match (i.types[1],
operand_types[1],
i.types[3],
operand_types[3])
|| !operand_type_register_match (i.types[2],
operand_types[2],
i.types[3],
operand_types[3]))))
operand_types[3]))
|| ((check_register & 0xc) == 0xc
&& !operand_type_register_match (i.types[2],
operand_types[2],
i.types[3],
operand_types[3])))
continue;
/* Fall through. */
case 3:
/* Here we make use of the fact that there are no
reverse match 3 operand instructions. */
if (!operand_type_match (overlap2, i.types[2])
|| (check_register
&& (!operand_type_register_match (i.types[0],
|| ((check_register & 5) == 5
&& !operand_type_register_match (i.types[0],
operand_types[0],
i.types[2],
operand_types[2])
|| !operand_type_register_match (i.types[1],
operand_types[1],
i.types[2],
operand_types[2]))))
operand_types[2]))
|| ((check_register & 6) == 6
&& !operand_type_register_match (i.types[1],
operand_types[1],
i.types[2],
operand_types[2])))
continue;
break;
}
......
2018-04-26 Jan Beulich <jbeulich@suse.com>
* i386-opc.tbl: Fold various non-memory operand AVX512VL
templates into their base ones.
* i386-tlb.h: Re-generate.
2018-04-26 Jan Beulich <jbeulich@suse.com>
* i386-gen.c (cpu_flag_init): Use CPU_XOP_FLAGS for
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment