mirror of
git://git.openembedded.org/meta-openembedded
synced 2026-05-18 22:12:33 +00:00
gcc-4.6: Update linaro patches past 2012.12 release
Signed-off-by: Khem Raj <raj.khem@gmail.com> Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
This commit is contained in:
parent
f2179dabaa
commit
8413bf3c5d
@ -0,0 +1,388 @@
|
||||
2011-11-22 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-10-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
gcc/
|
||||
PR tree-optimization/50596
|
||||
* tree-vectorizer.h (vect_is_simple_cond): New prototype.
|
||||
(NUM_PATTERNS): Change to 6.
|
||||
* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New
|
||||
function.
|
||||
(vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern.
|
||||
(vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt
|
||||
if it already has one, and don't set STMT_VINFO_VECTYPE in it
|
||||
if it is already set.
|
||||
* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle
|
||||
COND_EXPR in pattern stmts.
|
||||
(vect_is_simple_cond): No longer static.
|
||||
|
||||
gcc/testsuite:
|
||||
PR tree-optimization/50596
|
||||
* gcc.dg/vect/vect-cond-8.c: New test.
|
||||
|
||||
2011-10-07 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
gcc/
|
||||
PR tree-optimization/50650
|
||||
* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Don't
|
||||
call vect_is_simple_cond here, instead fail if cond_expr isn't
|
||||
COMPARISON_CLASS_P or if get_vectype_for_scalar_type returns NULL
|
||||
for cond_expr's first operand.
|
||||
* tree-vect-stmts.c (vect_is_simple_cond): Static again.
|
||||
* tree-vectorizer.h (vect_is_simple_cond): Remove prototype.
|
||||
|
||||
|
||||
gcc/
|
||||
* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Reduce
|
||||
it to integral types only.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/pr30858.c: Expect the error message twice for targets
|
||||
with multiple vector sizes.
|
||||
* gcc.dg/vect/vect-cond-8.c: Rename to...
|
||||
* gcc.dg/vect/vect-cond-8a.c: ... this and change the type from float
|
||||
to int.
|
||||
* lib/target-supports.exp (check_effective_target_vect_condition):
|
||||
Return true for NEON.
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/pr30858.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/pr30858.c 2007-02-22 08:16:18 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/pr30858.c 2011-11-20 09:11:09 +0000
|
||||
@@ -11,5 +11,6 @@
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cond-8a.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c 2011-11-20 09:11:09 +0000
|
||||
@@ -0,0 +1,75 @@
|
||||
+/* { dg-require-effective-target vect_condition } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 1024
|
||||
+int a[N], b[N], c[N];
|
||||
+char d[N], e[N], f[N];
|
||||
+unsigned char k[N];
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f1 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N; ++i)
|
||||
+ k[i] = a[i] < b[i] ? 17 : 0;
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f2 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N; ++i)
|
||||
+ k[i] = a[i] < b[i] ? 0 : 24;
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f3 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N; ++i)
|
||||
+ k[i] = a[i] < b[i] ? 51 : 12;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ switch (i % 9)
|
||||
+ {
|
||||
+ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
|
||||
+ case 1: a[i] = 0; b[i] = 0; break;
|
||||
+ case 2: a[i] = i + 1; b[i] = - i - 1; break;
|
||||
+ case 3: a[i] = i; b[i] = i + 7; break;
|
||||
+ case 4: a[i] = i; b[i] = i; break;
|
||||
+ case 5: a[i] = i + 16; b[i] = i + 3; break;
|
||||
+ case 6: a[i] = - i - 5; b[i] = - i; break;
|
||||
+ case 7: a[i] = - i; b[i] = - i; break;
|
||||
+ case 8: a[i] = - i; b[i] = - i - 7; break;
|
||||
+ }
|
||||
+ d[i] = i;
|
||||
+ e[i] = 2 * i;
|
||||
+ }
|
||||
+ f1 ();
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (k[i] != ((i % 3) == 0 ? 17 : 0))
|
||||
+ abort ();
|
||||
+ f2 ();
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (k[i] != ((i % 3) == 0 ? 0 : 24))
|
||||
+ abort ();
|
||||
+ f3 ();
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (k[i] != ((i % 3) == 0 ? 51 : 12))
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/lib/target-supports.exp'
|
||||
--- old/gcc/testsuite/lib/target-supports.exp 2011-11-21 01:45:54 +0000
|
||||
+++ new/gcc/testsuite/lib/target-supports.exp 2011-11-22 16:52:23 +0000
|
||||
@@ -3150,7 +3150,8 @@
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
- || [istarget x86_64-*-*] } {
|
||||
+ || [istarget x86_64-*-*]
|
||||
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
|
||||
set et_vect_cond_saved 1
|
||||
}
|
||||
}
|
||||
|
||||
=== modified file 'gcc/tree-vect-patterns.c'
|
||||
--- old/gcc/tree-vect-patterns.c 2011-10-23 13:33:07 +0000
|
||||
+++ new/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000
|
||||
@@ -50,13 +50,16 @@
|
||||
tree *);
|
||||
static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
|
||||
tree *, tree *);
|
||||
+static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
|
||||
+ tree *, tree *);
|
||||
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
|
||||
vect_recog_widen_mult_pattern,
|
||||
vect_recog_widen_sum_pattern,
|
||||
vect_recog_dot_prod_pattern,
|
||||
vect_recog_pow_pattern,
|
||||
vect_recog_over_widening_pattern,
|
||||
- vect_recog_widen_shift_pattern};
|
||||
+ vect_recog_widen_shift_pattern,
|
||||
+ vect_recog_mixed_size_cond_pattern};
|
||||
|
||||
|
||||
/* Function widened_name_p
|
||||
@@ -1441,6 +1444,118 @@
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
+/* Function vect_recog_mixed_size_cond_pattern
|
||||
+
|
||||
+ Try to find the following pattern:
|
||||
+
|
||||
+ type x_t, y_t;
|
||||
+ TYPE a_T, b_T, c_T;
|
||||
+ loop:
|
||||
+ S1 a_T = x_t CMP y_t ? b_T : c_T;
|
||||
+
|
||||
+ where type 'TYPE' is an integral type which has different size
|
||||
+ from 'type'. b_T and c_T are constants and if 'TYPE' is wider
|
||||
+ than 'type', the constants need to fit into an integer type
|
||||
+ with the same width as 'type'.
|
||||
+
|
||||
+ Input:
|
||||
+
|
||||
+ * LAST_STMT: A stmt from which the pattern search begins.
|
||||
+
|
||||
+ Output:
|
||||
+
|
||||
+ * TYPE_IN: The type of the input arguments to the pattern.
|
||||
+
|
||||
+ * TYPE_OUT: The type of the output of this pattern.
|
||||
+
|
||||
+ * Return value: A new stmt that will be used to replace the pattern.
|
||||
+ Additionally a def_stmt is added.
|
||||
+
|
||||
+ a_it = x_t CMP y_t ? b_it : c_it;
|
||||
+ a_T = (TYPE) a_it; */
|
||||
+
|
||||
+static gimple
|
||||
+vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
|
||||
+ tree *type_out)
|
||||
+{
|
||||
+ gimple last_stmt = VEC_index (gimple, *stmts, 0);
|
||||
+ tree cond_expr, then_clause, else_clause;
|
||||
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
|
||||
+ tree type, vectype, comp_vectype, comp_type, op, tmp;
|
||||
+ enum machine_mode cmpmode;
|
||||
+ gimple pattern_stmt, def_stmt;
|
||||
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
+
|
||||
+ if (!is_gimple_assign (last_stmt)
|
||||
+ || gimple_assign_rhs_code (last_stmt) != COND_EXPR
|
||||
+ || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
|
||||
+ return NULL;
|
||||
+
|
||||
+ op = gimple_assign_rhs1 (last_stmt);
|
||||
+ cond_expr = TREE_OPERAND (op, 0);
|
||||
+ then_clause = TREE_OPERAND (op, 1);
|
||||
+ else_clause = TREE_OPERAND (op, 2);
|
||||
+
|
||||
+ if (TREE_CODE (then_clause) != INTEGER_CST
|
||||
+ || TREE_CODE (else_clause) != INTEGER_CST)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (!COMPARISON_CLASS_P (cond_expr))
|
||||
+ return NULL;
|
||||
+
|
||||
+ type = gimple_expr_type (last_stmt);
|
||||
+ comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
|
||||
+ if (!INTEGRAL_TYPE_P (comp_type)
|
||||
+ || !INTEGRAL_TYPE_P (type))
|
||||
+ return NULL;
|
||||
+
|
||||
+ comp_vectype = get_vectype_for_scalar_type (comp_type);
|
||||
+ if (comp_vectype == NULL_TREE)
|
||||
+ return NULL;
|
||||
+
|
||||
+ cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
|
||||
+
|
||||
+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
|
||||
+ return NULL;
|
||||
+
|
||||
+ vectype = get_vectype_for_scalar_type (type);
|
||||
+ if (vectype == NULL_TREE)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (types_compatible_p (vectype, comp_vectype))
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
|
||||
+ {
|
||||
+ if (!int_fits_type_p (then_clause, comp_type)
|
||||
+ || !int_fits_type_p (else_clause, comp_type))
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
|
||||
+ fold_convert (comp_type, then_clause),
|
||||
+ fold_convert (comp_type, else_clause));
|
||||
+ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp);
|
||||
+
|
||||
+ pattern_stmt
|
||||
+ = gimple_build_assign_with_ops (NOP_EXPR,
|
||||
+ vect_recog_temp_ssa_var (type, NULL),
|
||||
+ gimple_assign_lhs (def_stmt), NULL_TREE);
|
||||
+
|
||||
+ STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
|
||||
+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
|
||||
+ set_vinfo_for_stmt (def_stmt, def_stmt_info);
|
||||
+ STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
|
||||
+ *type_in = vectype;
|
||||
+ *type_out = vectype;
|
||||
+
|
||||
+ return pattern_stmt;
|
||||
+}
|
||||
+
|
||||
+
|
||||
/* Mark statements that are involved in a pattern. */
|
||||
|
||||
static inline void
|
||||
@@ -1468,14 +1583,18 @@
|
||||
if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
|
||||
{
|
||||
def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
|
||||
- set_vinfo_for_stmt (def_stmt,
|
||||
- new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
|
||||
+ def_stmt_info = vinfo_for_stmt (def_stmt);
|
||||
+ if (def_stmt_info == NULL)
|
||||
+ {
|
||||
+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
|
||||
+ set_vinfo_for_stmt (def_stmt, def_stmt_info);
|
||||
+ }
|
||||
gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
|
||||
- def_stmt_info = vinfo_for_stmt (def_stmt);
|
||||
STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
|
||||
STMT_VINFO_DEF_TYPE (def_stmt_info)
|
||||
= STMT_VINFO_DEF_TYPE (orig_stmt_info);
|
||||
- STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
|
||||
+ if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
|
||||
+ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
=== modified file 'gcc/tree-vect-stmts.c'
|
||||
--- old/gcc/tree-vect-stmts.c 2011-11-14 11:38:08 +0000
|
||||
+++ new/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000
|
||||
@@ -655,20 +655,40 @@
|
||||
tree rhs = gimple_assign_rhs1 (stmt);
|
||||
unsigned int op_num;
|
||||
tree op;
|
||||
+ enum tree_code rhs_code;
|
||||
switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
|
||||
{
|
||||
case GIMPLE_SINGLE_RHS:
|
||||
- op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
|
||||
- for (i = 0; i < op_num; i++)
|
||||
- {
|
||||
- op = TREE_OPERAND (rhs, i);
|
||||
- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
|
||||
- &worklist))
|
||||
- {
|
||||
- VEC_free (gimple, heap, worklist);
|
||||
- return false;
|
||||
- }
|
||||
- }
|
||||
+ op = gimple_assign_rhs1 (stmt);
|
||||
+ rhs_code = gimple_assign_rhs_code (stmt);
|
||||
+ i = 0;
|
||||
+ if (rhs_code == COND_EXPR
|
||||
+ && COMPARISON_CLASS_P (TREE_OPERAND (op, 0)))
|
||||
+ {
|
||||
+ op = TREE_OPERAND (op, 0);
|
||||
+ if (!process_use (stmt, TREE_OPERAND (op, 0),
|
||||
+ loop_vinfo,
|
||||
+ live_p, relevant, &worklist)
|
||||
+ || !process_use (stmt, TREE_OPERAND (op, 1),
|
||||
+ loop_vinfo,
|
||||
+ live_p, relevant, &worklist))
|
||||
+ {
|
||||
+ VEC_free (gimple, heap, worklist);
|
||||
+ return false;
|
||||
+ }
|
||||
+ i = 1;
|
||||
+ }
|
||||
+ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
|
||||
+ for (i; i < op_num; i++)
|
||||
+ {
|
||||
+ op = TREE_OPERAND (rhs, i);
|
||||
+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
|
||||
+ &worklist))
|
||||
+ {
|
||||
+ VEC_free (gimple, heap, worklist);
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
break;
|
||||
|
||||
case GIMPLE_BINARY_RHS:
|
||||
|
||||
=== modified file 'gcc/tree-vectorizer.h'
|
||||
--- old/gcc/tree-vectorizer.h 2011-11-14 11:38:08 +0000
|
||||
+++ new/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000
|
||||
@@ -917,7 +917,7 @@
|
||||
Additional pattern recognition functions can (and will) be added
|
||||
in the future. */
|
||||
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
|
||||
-#define NUM_PATTERNS 6
|
||||
+#define NUM_PATTERNS 7
|
||||
void vect_pattern_recog (loop_vec_info);
|
||||
|
||||
/* In tree-vectorizer.c. */
|
||||
|
||||
@ -0,0 +1,805 @@
|
||||
2011-11-22 Ira Rosen <ira.rosen@linaro.org>
|
||||
Backport from mainline:
|
||||
|
||||
2011-11-06 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vectorizer.h (vectorizable_condition): Add argument.
|
||||
* tree-vect-loop.c (vectorizable_reduction): Fail for condition
|
||||
in SLP. Update calls to vectorizable_condition.
|
||||
* tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to
|
||||
the arguments. Pass it to vect_is_simple_use_1.
|
||||
(vectorizable_condition): Add slp_node to the arguments. Support
|
||||
vectorization of basic blocks. Fail for reduction in SLP. Update
|
||||
calls to vect_is_simple_cond and vect_is_simple_use. Support SLP:
|
||||
call vect_get_slp_defs to get vector operands.
|
||||
(vect_analyze_stmt): Update calls to vectorizable_condition.
|
||||
(vect_transform_stmt): Likewise.
|
||||
* tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR.
|
||||
(vect_get_and_check_slp_defs): Handle COND_EXPR. Allow pattern
|
||||
def stmts.
|
||||
(vect_build_slp_tree): Handle COND_EXPR.
|
||||
(vect_analyze_slp_instance): Push pattern statements to root node.
|
||||
(vect_get_constant_vectors): Fix comments. Handle COND_EXPR.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-cond-1.c: New test.
|
||||
* gcc.dg/vect/slp-cond-1.c: New test.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 2011-11-20 08:24:08 +0000
|
||||
@@ -0,0 +1,46 @@
|
||||
+/* { dg-require-effective-target vect_condition } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 128
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+foo (int *a, int stride)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < N/stride; i++, a += stride)
|
||||
+ {
|
||||
+ a[0] = a[0] ? 1 : 5;
|
||||
+ a[1] = a[1] ? 2 : 6;
|
||||
+ a[2] = a[2] ? 3 : 7;
|
||||
+ a[3] = a[3] ? 4 : 8;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int a[N];
|
||||
+int main ()
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ a[i] = i;
|
||||
+
|
||||
+ foo (a, 4);
|
||||
+
|
||||
+ for (i = 1; i < N; i++)
|
||||
+ if (a[i] != i%4 + 1)
|
||||
+ abort ();
|
||||
+
|
||||
+ if (a[0] != 5)
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/slp-cond-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/slp-cond-1.c 2011-11-20 08:24:08 +0000
|
||||
@@ -0,0 +1,126 @@
|
||||
+/* { dg-require-effective-target vect_condition } */
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 32
|
||||
+int a[N], b[N];
|
||||
+int d[N], e[N];
|
||||
+int k[N];
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f1 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N/4; i++)
|
||||
+ {
|
||||
+ k[4*i] = a[4*i] < b[4*i] ? 17 : 0;
|
||||
+ k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0;
|
||||
+ k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0;
|
||||
+ k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f2 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N/2; ++i)
|
||||
+ {
|
||||
+ k[2*i] = a[2*i] < b[2*i] ? 0 : 24;
|
||||
+ k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f3 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N/2; ++i)
|
||||
+ {
|
||||
+ k[2*i] = a[2*i] < b[2*i] ? 51 : 12;
|
||||
+ k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+f4 (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N/2; ++i)
|
||||
+ {
|
||||
+ int d0 = d[2*i], e0 = e[2*i];
|
||||
+ int d1 = d[2*i+1], e1 = e[2*i+1];
|
||||
+ k[2*i] = a[2*i] >= b[2*i] ? d0 : e0;
|
||||
+ k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ switch (i % 9)
|
||||
+ {
|
||||
+ case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
|
||||
+ case 1: a[i] = 0; b[i] = 0; break;
|
||||
+ case 2: a[i] = i + 1; b[i] = - i - 1; break;
|
||||
+ case 3: a[i] = i; b[i] = i + 7; break;
|
||||
+ case 4: a[i] = i; b[i] = i; break;
|
||||
+ case 5: a[i] = i + 16; b[i] = i + 3; break;
|
||||
+ case 6: a[i] = - i - 5; b[i] = - i; break;
|
||||
+ case 7: a[i] = - i; b[i] = - i; break;
|
||||
+ case 8: a[i] = - i; b[i] = - i - 7; break;
|
||||
+ }
|
||||
+ d[i] = i;
|
||||
+ e[i] = 2 * i;
|
||||
+ }
|
||||
+ f1 ();
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (k[i] != ((i % 3) == 0 ? 17 : 0))
|
||||
+ abort ();
|
||||
+
|
||||
+ f2 ();
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ switch (i % 9)
|
||||
+ {
|
||||
+ case 0:
|
||||
+ case 6:
|
||||
+ if (k[i] != ((i/9 % 2) == 0 ? 0 : 7))
|
||||
+ abort ();
|
||||
+ break;
|
||||
+ case 1:
|
||||
+ case 5:
|
||||
+ case 7:
|
||||
+ if (k[i] != ((i/9 % 2) == 0 ? 4 : 24))
|
||||
+ abort ();
|
||||
+ break;
|
||||
+ case 2:
|
||||
+ case 4:
|
||||
+ case 8:
|
||||
+ if (k[i] != ((i/9 % 2) == 0 ? 24 : 4))
|
||||
+ abort ();
|
||||
+ break;
|
||||
+ case 3:
|
||||
+ if (k[i] != ((i/9 % 2) == 0 ? 7 : 0))
|
||||
+ abort ();
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ f3 ();
|
||||
+
|
||||
+ f4 ();
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (k[i] != ((i % 3) == 0 ? e[i] : d[i]))
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/tree-vect-loop.c'
|
||||
--- old/gcc/tree-vect-loop.c 2011-11-14 11:38:08 +0000
|
||||
+++ new/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000
|
||||
@@ -4087,6 +4087,9 @@
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
+ if (code == COND_EXPR && slp_node)
|
||||
+ return false;
|
||||
+
|
||||
scalar_dest = gimple_assign_lhs (stmt);
|
||||
scalar_type = TREE_TYPE (scalar_dest);
|
||||
if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
|
||||
@@ -4161,7 +4164,7 @@
|
||||
|
||||
if (code == COND_EXPR)
|
||||
{
|
||||
- if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
|
||||
+ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "unsupported condition in reduction");
|
||||
@@ -4433,7 +4436,7 @@
|
||||
gcc_assert (!slp_node);
|
||||
vectorizable_condition (stmt, gsi, vec_stmt,
|
||||
PHI_RESULT (VEC_index (gimple, phis, 0)),
|
||||
- reduc_index);
|
||||
+ reduc_index, NULL);
|
||||
/* Multiple types are not supported for condition. */
|
||||
break;
|
||||
}
|
||||
|
||||
=== modified file 'gcc/tree-vect-slp.c'
|
||||
--- old/gcc/tree-vect-slp.c 2011-11-14 11:38:08 +0000
|
||||
+++ new/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000
|
||||
@@ -109,7 +109,11 @@
|
||||
if (is_gimple_call (stmt))
|
||||
nops = gimple_call_num_args (stmt);
|
||||
else if (is_gimple_assign (stmt))
|
||||
- nops = gimple_num_ops (stmt) - 1;
|
||||
+ {
|
||||
+ nops = gimple_num_ops (stmt) - 1;
|
||||
+ if (gimple_assign_rhs_code (stmt) == COND_EXPR)
|
||||
+ nops = 4;
|
||||
+ }
|
||||
else
|
||||
return NULL;
|
||||
|
||||
@@ -190,20 +194,51 @@
|
||||
bool different_types = false;
|
||||
bool pattern = false;
|
||||
slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
|
||||
+ int op_idx = 1;
|
||||
+ tree compare_rhs = NULL_TREE, rhs = NULL_TREE;
|
||||
+ int cond_idx = -1;
|
||||
|
||||
if (loop_vinfo)
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
|
||||
if (is_gimple_call (stmt))
|
||||
number_of_oprnds = gimple_call_num_args (stmt);
|
||||
+ else if (is_gimple_assign (stmt))
|
||||
+ {
|
||||
+ number_of_oprnds = gimple_num_ops (stmt) - 1;
|
||||
+ if (gimple_assign_rhs_code (stmt) == COND_EXPR)
|
||||
+ {
|
||||
+ number_of_oprnds = 4;
|
||||
+ cond_idx = 0;
|
||||
+ rhs = gimple_assign_rhs1 (stmt);
|
||||
+ }
|
||||
+ }
|
||||
else
|
||||
- number_of_oprnds = gimple_num_ops (stmt) - 1;
|
||||
+ return false;
|
||||
|
||||
for (i = 0; i < number_of_oprnds; i++)
|
||||
{
|
||||
- oprnd = gimple_op (stmt, i + 1);
|
||||
+ if (compare_rhs)
|
||||
+ oprnd = compare_rhs;
|
||||
+ else
|
||||
+ oprnd = gimple_op (stmt, op_idx++);
|
||||
+
|
||||
oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
|
||||
|
||||
+ if (-1 < cond_idx && cond_idx < 4)
|
||||
+ {
|
||||
+ if (compare_rhs)
|
||||
+ compare_rhs = NULL_TREE;
|
||||
+ else
|
||||
+ oprnd = TREE_OPERAND (rhs, cond_idx++);
|
||||
+ }
|
||||
+
|
||||
+ if (COMPARISON_CLASS_P (oprnd))
|
||||
+ {
|
||||
+ compare_rhs = TREE_OPERAND (oprnd, 1);
|
||||
+ oprnd = TREE_OPERAND (oprnd, 0);
|
||||
+ }
|
||||
+
|
||||
if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
|
||||
&dt)
|
||||
|| (!def_stmt && dt != vect_constant_def))
|
||||
@@ -243,8 +278,7 @@
|
||||
def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
|
||||
dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
|
||||
|
||||
- if (dt == vect_unknown_def_type
|
||||
- || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
|
||||
+ if (dt == vect_unknown_def_type)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Unsupported pattern.");
|
||||
@@ -423,6 +457,7 @@
|
||||
VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
|
||||
gimple stmt = VEC_index (gimple, stmts, 0);
|
||||
enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
|
||||
+ enum tree_code first_cond_code = ERROR_MARK;
|
||||
tree lhs;
|
||||
bool stop_recursion = false, need_same_oprnds = false;
|
||||
tree vectype, scalar_type, first_op1 = NULL_TREE;
|
||||
@@ -439,11 +474,18 @@
|
||||
VEC (slp_oprnd_info, heap) *oprnds_info;
|
||||
unsigned int nops;
|
||||
slp_oprnd_info oprnd_info;
|
||||
+ tree cond;
|
||||
|
||||
if (is_gimple_call (stmt))
|
||||
nops = gimple_call_num_args (stmt);
|
||||
+ else if (is_gimple_assign (stmt))
|
||||
+ {
|
||||
+ nops = gimple_num_ops (stmt) - 1;
|
||||
+ if (gimple_assign_rhs_code (stmt) == COND_EXPR)
|
||||
+ nops = 4;
|
||||
+ }
|
||||
else
|
||||
- nops = gimple_num_ops (stmt) - 1;
|
||||
+ return false;
|
||||
|
||||
oprnds_info = vect_create_oprnd_info (nops, group_size);
|
||||
|
||||
@@ -484,6 +526,22 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
+ if (is_gimple_assign (stmt)
|
||||
+ && gimple_assign_rhs_code (stmt) == COND_EXPR
|
||||
+ && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0))
|
||||
+ && !COMPARISON_CLASS_P (cond))
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_SLP))
|
||||
+ {
|
||||
+ fprintf (vect_dump,
|
||||
+ "Build SLP failed: condition is not comparison ");
|
||||
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
+ }
|
||||
+
|
||||
+ vect_free_oprnd_info (&oprnds_info);
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
|
||||
vectype = get_vectype_for_scalar_type (scalar_type);
|
||||
if (!vectype)
|
||||
@@ -737,7 +795,8 @@
|
||||
|
||||
/* Not memory operation. */
|
||||
if (TREE_CODE_CLASS (rhs_code) != tcc_binary
|
||||
- && TREE_CODE_CLASS (rhs_code) != tcc_unary)
|
||||
+ && TREE_CODE_CLASS (rhs_code) != tcc_unary
|
||||
+ && rhs_code != COND_EXPR)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
{
|
||||
@@ -750,6 +809,26 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
+ if (rhs_code == COND_EXPR)
|
||||
+ {
|
||||
+ tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
||||
+
|
||||
+ if (i == 0)
|
||||
+ first_cond_code = TREE_CODE (cond_expr);
|
||||
+ else if (first_cond_code != TREE_CODE (cond_expr))
|
||||
+ {
|
||||
+ if (vect_print_dump_info (REPORT_SLP))
|
||||
+ {
|
||||
+ fprintf (vect_dump, "Build SLP failed: different"
|
||||
+ " operation");
|
||||
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
||||
+ }
|
||||
+
|
||||
+ vect_free_oprnd_info (&oprnds_info);
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Find the def-stmts. */
|
||||
if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
|
||||
ncopies_for_cost, (i == 0),
|
||||
@@ -1395,7 +1474,12 @@
|
||||
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
|
||||
while (next)
|
||||
{
|
||||
- VEC_safe_push (gimple, heap, scalar_stmts, next);
|
||||
+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
|
||||
+ && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
|
||||
+ VEC_safe_push (gimple, heap, scalar_stmts,
|
||||
+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
|
||||
+ else
|
||||
+ VEC_safe_push (gimple, heap, scalar_stmts, next);
|
||||
next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
|
||||
}
|
||||
}
|
||||
@@ -1404,7 +1488,7 @@
|
||||
/* Collect reduction statements. */
|
||||
VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
|
||||
for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
|
||||
- VEC_safe_push (gimple, heap, scalar_stmts, next);
|
||||
+ VEC_safe_push (gimple, heap, scalar_stmts, next);
|
||||
}
|
||||
|
||||
node = vect_create_new_slp_node (scalar_stmts);
|
||||
@@ -2160,15 +2244,15 @@
|
||||
|
||||
For example, we have two scalar operands, s1 and s2 (e.g., group of
|
||||
strided accesses of size two), while NUNITS is four (i.e., four scalars
|
||||
- of this type can be packed in a vector). The output vector will contain
|
||||
- two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
|
||||
+ of this type can be packed in a vector). The output vector will contain
|
||||
+ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
|
||||
will be 2).
|
||||
|
||||
If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
|
||||
containing the operands.
|
||||
|
||||
For example, NUNITS is four as before, and the group size is 8
|
||||
- (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
|
||||
+ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
|
||||
{s5, s6, s7, s8}. */
|
||||
|
||||
number_of_copies = least_common_multiple (nunits, group_size) / group_size;
|
||||
@@ -2180,8 +2264,18 @@
|
||||
{
|
||||
if (is_store)
|
||||
op = gimple_assign_rhs1 (stmt);
|
||||
- else
|
||||
+ else if (gimple_assign_rhs_code (stmt) != COND_EXPR)
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
+ else
|
||||
+ {
|
||||
+ if (op_num == 0 || op_num == 1)
|
||||
+ {
|
||||
+ tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
||||
+ op = TREE_OPERAND (cond, op_num);
|
||||
+ }
|
||||
+ else
|
||||
+ op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1);
|
||||
+ }
|
||||
|
||||
if (reduc_index != -1)
|
||||
{
|
||||
|
||||
=== modified file 'gcc/tree-vect-stmts.c'
|
||||
--- old/gcc/tree-vect-stmts.c 2011-11-22 16:52:23 +0000
|
||||
+++ new/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000
|
||||
@@ -4816,7 +4816,7 @@
|
||||
condition operands are supportable using vec_is_simple_use. */
|
||||
|
||||
static bool
|
||||
-vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
|
||||
+vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
||||
{
|
||||
tree lhs, rhs;
|
||||
tree def;
|
||||
@@ -4831,7 +4831,7 @@
|
||||
if (TREE_CODE (lhs) == SSA_NAME)
|
||||
{
|
||||
gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
|
||||
- if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
|
||||
+ if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
|
||||
&dt))
|
||||
return false;
|
||||
}
|
||||
@@ -4842,7 +4842,7 @@
|
||||
if (TREE_CODE (rhs) == SSA_NAME)
|
||||
{
|
||||
gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
|
||||
- if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
|
||||
+ if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
|
||||
&dt))
|
||||
return false;
|
||||
}
|
||||
@@ -4868,7 +4868,8 @@
|
||||
|
||||
bool
|
||||
vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
- gimple *vec_stmt, tree reduc_def, int reduc_index)
|
||||
+ gimple *vec_stmt, tree reduc_def, int reduc_index,
|
||||
+ slp_tree slp_node)
|
||||
{
|
||||
tree scalar_dest = NULL_TREE;
|
||||
tree vec_dest = NULL_TREE;
|
||||
@@ -4885,19 +4886,24 @@
|
||||
tree def;
|
||||
enum vect_def_type dt, dts[4];
|
||||
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
|
||||
+ int ncopies;
|
||||
enum tree_code code;
|
||||
stmt_vec_info prev_stmt_info = NULL;
|
||||
- int j;
|
||||
+ int i, j;
|
||||
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
+ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
|
||||
+ VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
|
||||
|
||||
- /* FORNOW: unsupported in basic block SLP. */
|
||||
- gcc_assert (loop_vinfo);
|
||||
+ if (slp_node || PURE_SLP_STMT (stmt_info))
|
||||
+ ncopies = 1;
|
||||
+ else
|
||||
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
|
||||
|
||||
gcc_assert (ncopies >= 1);
|
||||
- if (reduc_index && ncopies > 1)
|
||||
+ if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info)))
|
||||
return false; /* FORNOW */
|
||||
|
||||
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
|
||||
@@ -4905,10 +4911,6 @@
|
||||
&& reduc_def))
|
||||
return false;
|
||||
|
||||
- /* FORNOW: SLP not supported. */
|
||||
- if (STMT_SLP_TYPE (stmt_info))
|
||||
- return false;
|
||||
-
|
||||
/* FORNOW: not yet supported. */
|
||||
if (STMT_VINFO_LIVE_P (stmt_info))
|
||||
{
|
||||
@@ -4932,7 +4934,7 @@
|
||||
then_clause = TREE_OPERAND (op, 1);
|
||||
else_clause = TREE_OPERAND (op, 2);
|
||||
|
||||
- if (!vect_is_simple_cond (cond_expr, loop_vinfo))
|
||||
+ if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo))
|
||||
return false;
|
||||
|
||||
/* We do not handle two different vector types for the condition
|
||||
@@ -4944,7 +4946,7 @@
|
||||
if (TREE_CODE (then_clause) == SSA_NAME)
|
||||
{
|
||||
gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
|
||||
- if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
|
||||
+ if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
|
||||
&then_def_stmt, &def, &dt))
|
||||
return false;
|
||||
}
|
||||
@@ -4956,7 +4958,7 @@
|
||||
if (TREE_CODE (else_clause) == SSA_NAME)
|
||||
{
|
||||
gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
|
||||
- if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
|
||||
+ if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
|
||||
&else_def_stmt, &def, &dt))
|
||||
return false;
|
||||
}
|
||||
@@ -4974,7 +4976,15 @@
|
||||
return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
|
||||
}
|
||||
|
||||
- /* Transform */
|
||||
+ /* Transform. */
|
||||
+
|
||||
+ if (!slp_node)
|
||||
+ {
|
||||
+ vec_oprnds0 = VEC_alloc (tree, heap, 1);
|
||||
+ vec_oprnds1 = VEC_alloc (tree, heap, 1);
|
||||
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
|
||||
+ vec_oprnds3 = VEC_alloc (tree, heap, 1);
|
||||
+ }
|
||||
|
||||
/* Handle def. */
|
||||
scalar_dest = gimple_assign_lhs (stmt);
|
||||
@@ -4983,67 +4993,118 @@
|
||||
/* Handle cond expr. */
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
- gimple new_stmt;
|
||||
+ gimple new_stmt = NULL;
|
||||
if (j == 0)
|
||||
{
|
||||
- gimple gtemp;
|
||||
- vec_cond_lhs =
|
||||
+ if (slp_node)
|
||||
+ {
|
||||
+ VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
|
||||
+ VEC (slp_void_p, heap) *vec_defs;
|
||||
+
|
||||
+ vec_defs = VEC_alloc (slp_void_p, heap, 4);
|
||||
+ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
|
||||
+ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
|
||||
+ VEC_safe_push (tree, heap, ops, then_clause);
|
||||
+ VEC_safe_push (tree, heap, ops, else_clause);
|
||||
+ vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
|
||||
+ vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
|
||||
+ vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
|
||||
+ vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
|
||||
+ vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
|
||||
+
|
||||
+ VEC_free (tree, heap, ops);
|
||||
+ VEC_free (slp_void_p, heap, vec_defs);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ gimple gtemp;
|
||||
+ vec_cond_lhs =
|
||||
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
|
||||
stmt, NULL);
|
||||
- vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
|
||||
+ vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
|
||||
NULL, >emp, &def, &dts[0]);
|
||||
- vec_cond_rhs =
|
||||
- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
|
||||
- stmt, NULL);
|
||||
- vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
|
||||
- NULL, >emp, &def, &dts[1]);
|
||||
- if (reduc_index == 1)
|
||||
- vec_then_clause = reduc_def;
|
||||
- else
|
||||
- {
|
||||
- vec_then_clause = vect_get_vec_def_for_operand (then_clause,
|
||||
- stmt, NULL);
|
||||
- vect_is_simple_use (then_clause, loop_vinfo,
|
||||
- NULL, >emp, &def, &dts[2]);
|
||||
- }
|
||||
- if (reduc_index == 2)
|
||||
- vec_else_clause = reduc_def;
|
||||
- else
|
||||
- {
|
||||
- vec_else_clause = vect_get_vec_def_for_operand (else_clause,
|
||||
- stmt, NULL);
|
||||
- vect_is_simple_use (else_clause, loop_vinfo,
|
||||
+
|
||||
+ vec_cond_rhs =
|
||||
+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
|
||||
+ stmt, NULL);
|
||||
+ vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
|
||||
+ NULL, >emp, &def, &dts[1]);
|
||||
+ if (reduc_index == 1)
|
||||
+ vec_then_clause = reduc_def;
|
||||
+ else
|
||||
+ {
|
||||
+ vec_then_clause = vect_get_vec_def_for_operand (then_clause,
|
||||
+ stmt, NULL);
|
||||
+ vect_is_simple_use (then_clause, loop_vinfo,
|
||||
+ NULL, >emp, &def, &dts[2]);
|
||||
+ }
|
||||
+ if (reduc_index == 2)
|
||||
+ vec_else_clause = reduc_def;
|
||||
+ else
|
||||
+ {
|
||||
+ vec_else_clause = vect_get_vec_def_for_operand (else_clause,
|
||||
+ stmt, NULL);
|
||||
+ vect_is_simple_use (else_clause, loop_vinfo,
|
||||
NULL, >emp, &def, &dts[3]);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
- vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
|
||||
- vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
|
||||
+ vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
|
||||
+ VEC_pop (tree, vec_oprnds0));
|
||||
+ vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
|
||||
+ VEC_pop (tree, vec_oprnds1));
|
||||
vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
|
||||
- vec_then_clause);
|
||||
+ VEC_pop (tree, vec_oprnds2));
|
||||
vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
|
||||
- vec_else_clause);
|
||||
+ VEC_pop (tree, vec_oprnds3));
|
||||
+ }
|
||||
+
|
||||
+ if (!slp_node)
|
||||
+ {
|
||||
+ VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
|
||||
+ VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
|
||||
+ VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
|
||||
+ VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
|
||||
}
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
- vec_compare = build2 (TREE_CODE (cond_expr), vectype,
|
||||
- vec_cond_lhs, vec_cond_rhs);
|
||||
- vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
|
||||
- vec_compare, vec_then_clause, vec_else_clause);
|
||||
-
|
||||
- new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
|
||||
- new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
- gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
- if (j == 0)
|
||||
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
- else
|
||||
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
-
|
||||
- prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
+ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
|
||||
+ {
|
||||
+ vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
|
||||
+ vec_then_clause = VEC_index (tree, vec_oprnds2, i);
|
||||
+ vec_else_clause = VEC_index (tree, vec_oprnds3, i);
|
||||
+
|
||||
+ vec_compare = build2 (TREE_CODE (cond_expr), vectype,
|
||||
+ vec_cond_lhs, vec_cond_rhs);
|
||||
+ vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
|
||||
+ vec_compare, vec_then_clause, vec_else_clause);
|
||||
+
|
||||
+ new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
|
||||
+ new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
+ gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
+ if (slp_node)
|
||||
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
|
||||
+ }
|
||||
+
|
||||
+ if (slp_node)
|
||||
+ continue;
|
||||
+
|
||||
+ if (j == 0)
|
||||
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
+ else
|
||||
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
+
|
||||
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
|
||||
+ VEC_free (tree, heap, vec_oprnds0);
|
||||
+ VEC_free (tree, heap, vec_oprnds1);
|
||||
+ VEC_free (tree, heap, vec_oprnds2);
|
||||
+ VEC_free (tree, heap, vec_oprnds3);
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -5217,7 +5278,7 @@
|
||||
|| vectorizable_call (stmt, NULL, NULL)
|
||||
|| vectorizable_store (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_reduction (stmt, NULL, NULL, NULL)
|
||||
- || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
|
||||
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
|
||||
else
|
||||
{
|
||||
if (bb_vinfo)
|
||||
@@ -5227,7 +5288,8 @@
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node)
|
||||
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
|
||||
- || vectorizable_store (stmt, NULL, NULL, node));
|
||||
+ || vectorizable_store (stmt, NULL, NULL, node)
|
||||
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
|
||||
}
|
||||
|
||||
if (!ok)
|
||||
@@ -5343,8 +5405,7 @@
|
||||
break;
|
||||
|
||||
case condition_vec_info_type:
|
||||
- gcc_assert (!slp_node);
|
||||
- done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
|
||||
+ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
|
||||
gcc_assert (done);
|
||||
break;
|
||||
|
||||
|
||||
=== modified file 'gcc/tree-vectorizer.h'
|
||||
--- old/gcc/tree-vectorizer.h 2011-11-22 16:52:23 +0000
|
||||
+++ new/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000
|
||||
@@ -832,7 +832,7 @@
|
||||
extern void vect_remove_stores (gimple);
|
||||
extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
|
||||
extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
|
||||
- tree, int);
|
||||
+ tree, int, slp_tree);
|
||||
extern void vect_get_load_cost (struct data_reference *, int, bool,
|
||||
unsigned int *, unsigned int *);
|
||||
extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
|
||||
|
||||
@ -0,0 +1,495 @@
|
||||
2011-11-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-vectorizer.h (vect_pattern_recog): Add new argument.
|
||||
* tree-vect-loop.c (vect_analyze_loop_2): Update call to
|
||||
vect_pattern_recog.
|
||||
* tree-vect-patterns.c (widened_name_p): Pass basic block
|
||||
info to vect_is_simple_use.
|
||||
(vect_recog_dot_prod_pattern): Fail for basic blocks.
|
||||
(vect_recog_widen_sum_pattern): Likewise.
|
||||
(vect_handle_widen_op_by_const): Support basic blocks.
|
||||
(vect_operation_fits_smaller_type,
|
||||
vect_recog_over_widening_pattern): Likewise.
|
||||
(vect_recog_mixed_size_cond_pattern): Support basic blocks.
|
||||
Add printing.
|
||||
(vect_mark_pattern_stmts): Update calls to new_stmt_vec_info.
|
||||
(vect_pattern_recog_1): Check for reduction only in loops.
|
||||
(vect_pattern_recog): Add new argument. Support basic blocks.
|
||||
* tree-vect-stmts.c (vectorizable_conversion): Pass basic block
|
||||
info to vect_is_simple_use_1.
|
||||
* tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic
|
||||
blocks.
|
||||
(vect_slp_analyze_bb_1): Call vect_pattern_recog.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-pattern-1.c: New test.
|
||||
* gcc.dg/vect/bb-slp-pattern-2.c: New test.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c 2011-11-23 06:37:10 +0000
|
||||
@@ -0,0 +1,55 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 8
|
||||
+
|
||||
+unsigned short X[N];
|
||||
+unsigned short Y[N];
|
||||
+unsigned int result[N];
|
||||
+
|
||||
+/* unsigned short->unsigned int widening-mult. */
|
||||
+__attribute__ ((noinline, noclone)) void
|
||||
+foo (void)
|
||||
+{
|
||||
+ result[0] = (unsigned int)(X[0] * Y[0]);
|
||||
+ result[1] = (unsigned int)(X[1] * Y[1]);
|
||||
+ result[2] = (unsigned int)(X[2] * Y[2]);
|
||||
+ result[3] = (unsigned int)(X[3] * Y[3]);
|
||||
+ result[4] = (unsigned int)(X[4] * Y[4]);
|
||||
+ result[5] = (unsigned int)(X[5] * Y[5]);
|
||||
+ result[6] = (unsigned int)(X[6] * Y[6]);
|
||||
+ result[7] = (unsigned int)(X[7] * Y[7]);
|
||||
+}
|
||||
+
|
||||
+int main (void)
|
||||
+{
|
||||
+ int i, tmp;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ X[i] = i;
|
||||
+ Y[i] = 64-i;
|
||||
+ }
|
||||
+
|
||||
+ foo ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ __asm__ volatile ("");
|
||||
+ tmp = X[i] * Y[i];
|
||||
+ if (result[i] != tmp)
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
+/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c 2011-11-23 06:37:10 +0000
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* { dg-require-effective-target vect_condition } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 128
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+foo (short * __restrict__ a, int * __restrict__ b, int stride)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < N/stride; i++, a += stride, b += stride)
|
||||
+ {
|
||||
+ a[0] = b[0] ? 1 : 7;
|
||||
+ a[1] = b[1] ? 2 : 0;
|
||||
+ a[2] = b[2] ? 3 : 0;
|
||||
+ a[3] = b[3] ? 4 : 0;
|
||||
+ a[4] = b[4] ? 5 : 0;
|
||||
+ a[5] = b[5] ? 6 : 0;
|
||||
+ a[6] = b[6] ? 7 : 0;
|
||||
+ a[7] = b[7] ? 8 : 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+short a[N];
|
||||
+int b[N];
|
||||
+int main ()
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ a[i] = i;
|
||||
+ b[i] = -i;
|
||||
+ }
|
||||
+
|
||||
+ foo (a, b, 8);
|
||||
+
|
||||
+ for (i = 1; i < N; i++)
|
||||
+ if (a[i] != i%8 + 1)
|
||||
+ abort ();
|
||||
+
|
||||
+ if (a[0] != 7)
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== modified file 'gcc/tree-vect-loop.c'
|
||||
--- old/gcc/tree-vect-loop.c 2011-11-20 08:24:08 +0000
|
||||
+++ new/gcc/tree-vect-loop.c 2011-11-23 06:47:35 +0000
|
||||
@@ -1458,7 +1458,7 @@
|
||||
|
||||
vect_analyze_scalar_cycles (loop_vinfo);
|
||||
|
||||
- vect_pattern_recog (loop_vinfo);
|
||||
+ vect_pattern_recog (loop_vinfo, NULL);
|
||||
|
||||
/* Data-flow analysis to detect stmts that do not need to be vectorized. */
|
||||
|
||||
|
||||
=== modified file 'gcc/tree-vect-patterns.c'
|
||||
--- old/gcc/tree-vect-patterns.c 2011-11-20 09:11:09 +0000
|
||||
+++ new/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000
|
||||
@@ -83,11 +83,13 @@
|
||||
tree oprnd0;
|
||||
enum vect_def_type dt;
|
||||
tree def;
|
||||
+ bb_vec_info bb_vinfo;
|
||||
|
||||
stmt_vinfo = vinfo_for_stmt (use_stmt);
|
||||
loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
|
||||
|
||||
- if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
|
||||
+ if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt))
|
||||
return false;
|
||||
|
||||
if (dt != vect_internal_def
|
||||
@@ -111,7 +113,7 @@
|
||||
|| (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
|
||||
return false;
|
||||
|
||||
- if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
|
||||
+ if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
|
||||
&dt))
|
||||
return false;
|
||||
|
||||
@@ -188,9 +190,14 @@
|
||||
gimple pattern_stmt;
|
||||
tree prod_type;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+ struct loop *loop;
|
||||
tree var, rhs;
|
||||
|
||||
+ if (!loop_info)
|
||||
+ return NULL;
|
||||
+
|
||||
+ loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
|
||||
@@ -358,8 +365,16 @@
|
||||
{
|
||||
tree new_type, new_oprnd, tmp;
|
||||
gimple new_stmt;
|
||||
- loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
|
||||
- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+ loop_vec_info loop_vinfo;
|
||||
+ struct loop *loop = NULL;
|
||||
+ bb_vec_info bb_vinfo;
|
||||
+ stmt_vec_info stmt_vinfo;
|
||||
+
|
||||
+ stmt_vinfo = vinfo_for_stmt (stmt);
|
||||
+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
|
||||
+ if (loop_vinfo)
|
||||
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
|
||||
if (code != MULT_EXPR && code != LSHIFT_EXPR)
|
||||
return false;
|
||||
@@ -377,7 +392,9 @@
|
||||
|
||||
if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
|
||||
|| !gimple_bb (def_stmt)
|
||||
- || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
||||
+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
|
||||
+ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo)
|
||||
+ && gimple_code (def_stmt) != GIMPLE_PHI)
|
||||
|| !vinfo_for_stmt (def_stmt))
|
||||
return false;
|
||||
|
||||
@@ -774,9 +791,14 @@
|
||||
tree type, half_type;
|
||||
gimple pattern_stmt;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+ struct loop *loop;
|
||||
tree var;
|
||||
|
||||
+ if (!loop_info)
|
||||
+ return NULL;
|
||||
+
|
||||
+ loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
|
||||
@@ -877,7 +899,11 @@
|
||||
gimple def_stmt, new_stmt;
|
||||
bool first = false;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
|
||||
- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
|
||||
+ bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
|
||||
+ struct loop *loop = NULL;
|
||||
+
|
||||
+ if (loop_info)
|
||||
+ loop = LOOP_VINFO_LOOP (loop_info);
|
||||
|
||||
*new_def_stmt = NULL;
|
||||
|
||||
@@ -909,7 +935,9 @@
|
||||
first = true;
|
||||
if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
|
||||
|| !gimple_bb (def_stmt)
|
||||
- || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
||||
+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
|
||||
+ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
|
||||
+ && gimple_code (def_stmt) != GIMPLE_PHI)
|
||||
|| !vinfo_for_stmt (def_stmt))
|
||||
return false;
|
||||
}
|
||||
@@ -1087,7 +1115,16 @@
|
||||
int nuses = 0;
|
||||
tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
|
||||
bool first;
|
||||
- struct loop *loop = (gimple_bb (stmt))->loop_father;
|
||||
+ loop_vec_info loop_vinfo;
|
||||
+ struct loop *loop = NULL;
|
||||
+ bb_vec_info bb_vinfo;
|
||||
+ stmt_vec_info stmt_vinfo;
|
||||
+
|
||||
+ stmt_vinfo = vinfo_for_stmt (stmt);
|
||||
+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
|
||||
+ if (loop_vinfo)
|
||||
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
|
||||
first = true;
|
||||
while (1)
|
||||
@@ -1120,7 +1157,8 @@
|
||||
|
||||
if (nuses != 1 || !is_gimple_assign (use_stmt)
|
||||
|| !gimple_bb (use_stmt)
|
||||
- || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
|
||||
+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
|
||||
+ || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo)))
|
||||
return NULL;
|
||||
|
||||
/* Create pattern statement for STMT. */
|
||||
@@ -1485,6 +1523,7 @@
|
||||
enum machine_mode cmpmode;
|
||||
gimple pattern_stmt, def_stmt;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
|
||||
|
||||
if (!is_gimple_assign (last_stmt)
|
||||
|| gimple_assign_rhs_code (last_stmt) != COND_EXPR
|
||||
@@ -1538,7 +1577,8 @@
|
||||
tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
|
||||
fold_convert (comp_type, then_clause),
|
||||
fold_convert (comp_type, else_clause));
|
||||
- def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), tmp);
|
||||
+ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL),
|
||||
+ tmp);
|
||||
|
||||
pattern_stmt
|
||||
= gimple_build_assign_with_ops (NOP_EXPR,
|
||||
@@ -1546,12 +1586,15 @@
|
||||
gimple_assign_lhs (def_stmt), NULL_TREE);
|
||||
|
||||
STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
|
||||
- def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
|
||||
+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
|
||||
set_vinfo_for_stmt (def_stmt, def_stmt_info);
|
||||
STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
|
||||
*type_in = vectype;
|
||||
*type_out = vectype;
|
||||
|
||||
+ if (vect_print_dump_info (REPORT_DETAILS))
|
||||
+ fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
|
||||
+
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
@@ -1565,10 +1608,11 @@
|
||||
stmt_vec_info pattern_stmt_info, def_stmt_info;
|
||||
stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
|
||||
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
|
||||
gimple def_stmt;
|
||||
|
||||
set_vinfo_for_stmt (pattern_stmt,
|
||||
- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
|
||||
+ new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo));
|
||||
gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
|
||||
pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
|
||||
|
||||
@@ -1586,7 +1630,7 @@
|
||||
def_stmt_info = vinfo_for_stmt (def_stmt);
|
||||
if (def_stmt_info == NULL)
|
||||
{
|
||||
- def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
|
||||
+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
|
||||
set_vinfo_for_stmt (def_stmt, def_stmt_info);
|
||||
}
|
||||
gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
|
||||
@@ -1697,9 +1741,10 @@
|
||||
|
||||
/* Patterns cannot be vectorized using SLP, because they change the order of
|
||||
computation. */
|
||||
- FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
|
||||
- if (next == stmt)
|
||||
- VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
|
||||
+ if (loop_vinfo)
|
||||
+ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
|
||||
+ if (next == stmt)
|
||||
+ VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
|
||||
|
||||
/* It is possible that additional pattern stmts are created and inserted in
|
||||
STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
|
||||
@@ -1799,26 +1844,46 @@
|
||||
be recorded in S3. */
|
||||
|
||||
void
|
||||
-vect_pattern_recog (loop_vec_info loop_vinfo)
|
||||
+vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
||||
{
|
||||
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
|
||||
- unsigned int nbbs = loop->num_nodes;
|
||||
+ struct loop *loop;
|
||||
+ basic_block *bbs, bb;
|
||||
+ unsigned int nbbs;
|
||||
gimple_stmt_iterator si;
|
||||
unsigned int i, j;
|
||||
gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
|
||||
VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
|
||||
+ gimple stmt;
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vect_pattern_recog ===");
|
||||
|
||||
- /* Scan through the loop stmts, applying the pattern recognition
|
||||
+ if (loop_vinfo)
|
||||
+ {
|
||||
+ loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
+ bbs = LOOP_VINFO_BBS (loop_vinfo);
|
||||
+ nbbs = loop->num_nodes;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ bb = BB_VINFO_BB (bb_vinfo);
|
||||
+ nbbs = 1;
|
||||
+ bbs = XNEW (basic_block);
|
||||
+ bbs[0] = bb;
|
||||
+ }
|
||||
+
|
||||
+ /* Scan through the stmts, applying the pattern recognition
|
||||
functions starting at each stmt visited: */
|
||||
for (i = 0; i < nbbs; i++)
|
||||
{
|
||||
basic_block bb = bbs[i];
|
||||
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
||||
{
|
||||
+ if (bb_vinfo && (stmt = gsi_stmt (si))
|
||||
+ && vinfo_for_stmt (stmt)
|
||||
+ && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
|
||||
+ continue;
|
||||
+
|
||||
/* Scan over all generic vect_recog_xxx_pattern functions. */
|
||||
for (j = 0; j < NUM_PATTERNS; j++)
|
||||
{
|
||||
@@ -1830,4 +1895,6 @@
|
||||
}
|
||||
|
||||
VEC_free (gimple, heap, stmts_to_replace);
|
||||
+ if (bb_vinfo)
|
||||
+ free (bbs);
|
||||
}
|
||||
|
||||
=== modified file 'gcc/tree-vect-slp.c'
|
||||
--- old/gcc/tree-vect-slp.c 2011-11-21 06:58:40 +0000
|
||||
+++ new/gcc/tree-vect-slp.c 2011-11-23 06:47:35 +0000
|
||||
@@ -255,12 +255,14 @@
|
||||
/* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
|
||||
from the pattern. Check that all the stmts of the node are in the
|
||||
pattern. */
|
||||
- if (loop && def_stmt && gimple_bb (def_stmt)
|
||||
- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
||||
+ if (def_stmt && gimple_bb (def_stmt)
|
||||
+ && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
|
||||
+ || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
|
||||
+ && gimple_code (def_stmt) != GIMPLE_PHI))
|
||||
&& vinfo_for_stmt (def_stmt)
|
||||
&& STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
|
||||
- && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
|
||||
- && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
|
||||
+ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
|
||||
+ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
|
||||
{
|
||||
pattern = true;
|
||||
if (!first && !oprnd_info->first_pattern)
|
||||
@@ -1972,6 +1974,8 @@
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+ vect_pattern_recog (NULL, bb_vinfo);
|
||||
+
|
||||
if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
|
||||
&data_dependence_in_bb)
|
||||
|| min_vf > max_vf
|
||||
|
||||
=== modified file 'gcc/tree-vect-stmts.c'
|
||||
--- old/gcc/tree-vect-stmts.c 2011-11-22 17:10:17 +0000
|
||||
+++ new/gcc/tree-vect-stmts.c 2011-11-23 06:47:35 +0000
|
||||
@@ -3451,11 +3451,11 @@
|
||||
/* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
|
||||
OP1. */
|
||||
if (CONSTANT_CLASS_P (op0))
|
||||
- ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
|
||||
+ ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo,
|
||||
&def_stmt, &def, &dt[1], &vectype_in);
|
||||
else
|
||||
- ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
|
||||
- &dt[1]);
|
||||
+ ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt,
|
||||
+ &def, &dt[1]);
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
|
||||
=== modified file 'gcc/tree-vectorizer.h'
|
||||
--- old/gcc/tree-vectorizer.h 2011-11-22 17:10:17 +0000
|
||||
+++ new/gcc/tree-vectorizer.h 2011-11-23 06:47:35 +0000
|
||||
@@ -918,7 +918,7 @@
|
||||
in the future. */
|
||||
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
|
||||
#define NUM_PATTERNS 7
|
||||
-void vect_pattern_recog (loop_vec_info);
|
||||
+void vect_pattern_recog (loop_vec_info, bb_vec_info);
|
||||
|
||||
/* In tree-vectorizer.c. */
|
||||
unsigned vectorize_loops (void);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,487 @@
|
||||
2011-11-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Needs to be merged upstream:
|
||||
|
||||
gcc/
|
||||
* tree-vect-patterns.c (widened_name_p): Rename to ...
|
||||
(type_conversion_p): ... this. Add new argument to determine
|
||||
if it's a promotion or demotion operation. Check for
|
||||
CONVERT_EXPR_CODE_P instead of NOP_EXPR.
|
||||
(vect_recog_dot_prod_pattern): Call type_conversion_p instead
|
||||
widened_name_p.
|
||||
(vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern,
|
||||
vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern):
|
||||
Likewise.
|
||||
(vect_recog_mixed_size_cond_pattern): Likewise and allow
|
||||
non-constant then and else clauses.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/bb-slp-cond-3.c: New test.
|
||||
* gcc.dg/vect/bb-slp-cond-4.c: New test.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c 2011-11-27 11:29:32 +0000
|
||||
@@ -0,0 +1,85 @@
|
||||
+/* { dg-require-effective-target vect_condition } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 64
|
||||
+
|
||||
+/* Comparison in int, then/else and result in unsigned char. */
|
||||
+
|
||||
+static inline unsigned char
|
||||
+foo (int x, int y, int a, int b)
|
||||
+{
|
||||
+ if (x >= y)
|
||||
+ return a;
|
||||
+ else
|
||||
+ return b;
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b,
|
||||
+ unsigned char * __restrict__ c, unsigned char * __restrict__ d,
|
||||
+ unsigned char * __restrict__ e, int stride, int w)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
|
||||
+ d += stride, e += stride)
|
||||
+ {
|
||||
+ e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
|
||||
+ e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
|
||||
+ e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
|
||||
+ e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
|
||||
+ e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
|
||||
+ e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
|
||||
+ e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
|
||||
+ e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
|
||||
+ e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
|
||||
+ e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
|
||||
+ e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
|
||||
+ e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
|
||||
+ e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
|
||||
+ e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
|
||||
+ e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
|
||||
+ e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+unsigned char a[N], b[N], c[N], d[N], e[N];
|
||||
+
|
||||
+int main ()
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ a[i] = i;
|
||||
+ b[i] = 5;
|
||||
+ e[i] = 0;
|
||||
+
|
||||
+ switch (i % 9)
|
||||
+ {
|
||||
+ case 0: asm (""); c[i] = i; d[i] = i + 1; break;
|
||||
+ case 1: c[i] = 0; d[i] = 0; break;
|
||||
+ case 2: c[i] = i + 1; d[i] = i - 1; break;
|
||||
+ case 3: c[i] = i; d[i] = i + 7; break;
|
||||
+ case 4: c[i] = i; d[i] = i; break;
|
||||
+ case 5: c[i] = i + 16; d[i] = i + 3; break;
|
||||
+ case 6: c[i] = i - 5; d[i] = i; break;
|
||||
+ case 7: c[i] = i; d[i] = i; break;
|
||||
+ case 8: c[i] = i; d[i] = i - 7; break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ bar (a, b, c, d, e, 16, 2);
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (e[i] != ((i % 3) == 0 ? 10 : 2 * i))
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_int_mult } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c 2011-11-27 11:29:32 +0000
|
||||
@@ -0,0 +1,85 @@
|
||||
+/* { dg-require-effective-target vect_condition } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 64
|
||||
+
|
||||
+/* Comparison in short, then/else and result in int. */
|
||||
+static inline int
|
||||
+foo (short x, short y, int a, int b)
|
||||
+{
|
||||
+ if (x >= y)
|
||||
+ return a;
|
||||
+ else
|
||||
+ return b;
|
||||
+}
|
||||
+
|
||||
+__attribute__((noinline, noclone)) void
|
||||
+bar (short * __restrict__ a, short * __restrict__ b,
|
||||
+ short * __restrict__ c, short * __restrict__ d,
|
||||
+ int * __restrict__ e, int stride, int w)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
|
||||
+ d += stride, e += stride)
|
||||
+ {
|
||||
+ e[0] = foo (c[0], d[0], a[0], b[0]);
|
||||
+ e[1] = foo (c[1], d[1], a[1], b[1]);
|
||||
+ e[2] = foo (c[2], d[2], a[2], b[2]);
|
||||
+ e[3] = foo (c[3], d[3], a[3], b[3]);
|
||||
+ e[4] = foo (c[4], d[4], a[4], b[4]);
|
||||
+ e[5] = foo (c[5], d[5], a[5], b[5]);
|
||||
+ e[6] = foo (c[6], d[6], a[6], b[6]);
|
||||
+ e[7] = foo (c[7], d[7], a[7], b[7]);
|
||||
+ e[8] = foo (c[8], d[8], a[8], b[8]);
|
||||
+ e[9] = foo (c[9], d[9], a[9], b[9]);
|
||||
+ e[10] = foo (c[10], d[10], a[10], b[10]);
|
||||
+ e[11] = foo (c[11], d[11], a[11], b[11]);
|
||||
+ e[12] = foo (c[12], d[12], a[12], b[12]);
|
||||
+ e[13] = foo (c[13], d[13], a[13], b[13]);
|
||||
+ e[14] = foo (c[14], d[14], a[14], b[14]);
|
||||
+ e[15] = foo (c[15], d[15], a[15], b[15]);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+short a[N], b[N], c[N], d[N];
|
||||
+int e[N];
|
||||
+
|
||||
+int main ()
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ a[i] = i;
|
||||
+ b[i] = 5;
|
||||
+ e[i] = 0;
|
||||
+
|
||||
+ switch (i % 9)
|
||||
+ {
|
||||
+ case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break;
|
||||
+ case 1: c[i] = 0; d[i] = 0; break;
|
||||
+ case 2: c[i] = i + 1; d[i] = - i - 1; break;
|
||||
+ case 3: c[i] = i; d[i] = i + 7; break;
|
||||
+ case 4: c[i] = i; d[i] = i; break;
|
||||
+ case 5: c[i] = i + 16; d[i] = i + 3; break;
|
||||
+ case 6: c[i] = - i - 5; d[i] = - i; break;
|
||||
+ case 7: c[i] = - i; d[i] = - i; break;
|
||||
+ case 8: c[i] = - i; d[i] = - i - 7; break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ bar (a, b, c, d, e, 16, 2);
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (e[i] != ((i % 3) == 0 ? 5 : i))
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
+
|
||||
|
||||
=== modified file 'gcc/tree-vect-patterns.c'
|
||||
--- old/gcc/tree-vect-patterns.c 2011-11-23 07:49:33 +0000
|
||||
+++ new/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000
|
||||
@@ -62,18 +62,16 @@
|
||||
vect_recog_mixed_size_cond_pattern};
|
||||
|
||||
|
||||
-/* Function widened_name_p
|
||||
-
|
||||
- Check whether NAME, an ssa-name used in USE_STMT,
|
||||
- is a result of a type-promotion, such that:
|
||||
- DEF_STMT: NAME = NOP (name0)
|
||||
- where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
|
||||
+/* Check whether NAME, an ssa-name used in USE_STMT,
|
||||
+ is a result of a type promotion or demotion, such that:
|
||||
+ DEF_STMT: NAME = NOP (name0)
|
||||
+ where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
|
||||
If CHECK_SIGN is TRUE, check that either both types are signed or both are
|
||||
unsigned. */
|
||||
|
||||
static bool
|
||||
-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
|
||||
- bool check_sign)
|
||||
+type_conversion_p (tree name, gimple use_stmt, bool check_sign,
|
||||
+ tree *orig_type, gimple *def_stmt, bool *promotion)
|
||||
{
|
||||
tree dummy;
|
||||
gimple dummy_gimple;
|
||||
@@ -96,21 +94,27 @@
|
||||
&& dt != vect_external_def && dt != vect_constant_def)
|
||||
return false;
|
||||
|
||||
- if (! *def_stmt)
|
||||
+ if (!*def_stmt)
|
||||
return false;
|
||||
|
||||
if (!is_gimple_assign (*def_stmt))
|
||||
return false;
|
||||
|
||||
- if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
|
||||
+ if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
|
||||
return false;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (*def_stmt);
|
||||
|
||||
- *half_type = TREE_TYPE (oprnd0);
|
||||
- if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
|
||||
- || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
|
||||
- || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
|
||||
+ *orig_type = TREE_TYPE (oprnd0);
|
||||
+ if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
|
||||
+ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
|
||||
+ return false;
|
||||
+
|
||||
+ if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
|
||||
+ *promotion = true;
|
||||
+ else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
|
||||
+ *promotion = false;
|
||||
+ else
|
||||
return false;
|
||||
|
||||
if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
|
||||
@@ -192,6 +196,7 @@
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
struct loop *loop;
|
||||
tree var, rhs;
|
||||
+ bool promotion;
|
||||
|
||||
if (!loop_info)
|
||||
return NULL;
|
||||
@@ -255,7 +260,9 @@
|
||||
return NULL;
|
||||
stmt = last_stmt;
|
||||
|
||||
- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
|
||||
+ if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
|
||||
+ &promotion)
|
||||
+ && promotion)
|
||||
{
|
||||
stmt = def_stmt;
|
||||
oprnd0 = gimple_assign_rhs1 (stmt);
|
||||
@@ -310,10 +317,14 @@
|
||||
if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
|
||||
|| !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
|
||||
return NULL;
|
||||
- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
|
||||
+ if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
|
||||
+ &promotion)
|
||||
+ || !promotion)
|
||||
return NULL;
|
||||
oprnd00 = gimple_assign_rhs1 (def_stmt);
|
||||
- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
|
||||
+ if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
|
||||
+ &promotion)
|
||||
+ || !promotion)
|
||||
return NULL;
|
||||
oprnd01 = gimple_assign_rhs1 (def_stmt);
|
||||
if (!types_compatible_p (half_type0, half_type1))
|
||||
@@ -526,7 +537,7 @@
|
||||
enum tree_code dummy_code;
|
||||
int dummy_int;
|
||||
VEC (tree, heap) *dummy_vec;
|
||||
- bool op1_ok;
|
||||
+ bool op1_ok, promotion;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
@@ -546,12 +557,14 @@
|
||||
return NULL;
|
||||
|
||||
/* Check argument 0. */
|
||||
- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
|
||||
+ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
|
||||
+ &promotion)
|
||||
+ || !promotion)
|
||||
return NULL;
|
||||
- /* Check argument 1. */
|
||||
- op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
|
||||
-
|
||||
- if (op1_ok)
|
||||
+ /* Check argument 1. */
|
||||
+ op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
|
||||
+ &def_stmt1, &promotion);
|
||||
+ if (op1_ok && promotion)
|
||||
{
|
||||
oprnd0 = gimple_assign_rhs1 (def_stmt0);
|
||||
oprnd1 = gimple_assign_rhs1 (def_stmt1);
|
||||
@@ -793,6 +806,7 @@
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
struct loop *loop;
|
||||
tree var;
|
||||
+ bool promotion;
|
||||
|
||||
if (!loop_info)
|
||||
return NULL;
|
||||
@@ -832,8 +846,10 @@
|
||||
Left to check that oprnd0 is defined by a cast from type 'type' to type
|
||||
'TYPE'. */
|
||||
|
||||
- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
|
||||
- return NULL;
|
||||
+ if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
|
||||
+ &promotion)
|
||||
+ || !promotion)
|
||||
+ return NULL;
|
||||
|
||||
oprnd0 = gimple_assign_rhs1 (stmt);
|
||||
*type_in = half_type;
|
||||
@@ -899,6 +915,7 @@
|
||||
gimple def_stmt, new_stmt;
|
||||
bool first = false;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
|
||||
+ bool promotion;
|
||||
bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
|
||||
struct loop *loop = NULL;
|
||||
|
||||
@@ -933,7 +950,9 @@
|
||||
else
|
||||
{
|
||||
first = true;
|
||||
- if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
|
||||
+ if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
|
||||
+ &promotion)
|
||||
+ || !promotion
|
||||
|| !gimple_bb (def_stmt)
|
||||
|| (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
|
||||
|| (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
|
||||
@@ -1327,6 +1346,7 @@
|
||||
VEC (tree, heap) * dummy_vec;
|
||||
gimple use_stmt = NULL;
|
||||
bool over_widen = false;
|
||||
+ bool promotion;
|
||||
|
||||
if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
|
||||
return NULL;
|
||||
@@ -1381,8 +1401,10 @@
|
||||
return NULL;
|
||||
|
||||
/* Check operand 0: it has to be defined by a type promotion. */
|
||||
- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
|
||||
- return NULL;
|
||||
+ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
|
||||
+ &promotion)
|
||||
+ || !promotion)
|
||||
+ return NULL;
|
||||
|
||||
/* Check operand 1: has to be positive. We check that it fits the type
|
||||
in vect_handle_widen_op_by_const (). */
|
||||
@@ -1492,9 +1514,9 @@
|
||||
S1 a_T = x_t CMP y_t ? b_T : c_T;
|
||||
|
||||
where type 'TYPE' is an integral type which has different size
|
||||
- from 'type'. b_T and c_T are constants and if 'TYPE' is wider
|
||||
+ from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
|
||||
than 'type', the constants need to fit into an integer type
|
||||
- with the same width as 'type'.
|
||||
+ with the same width as 'type') or results of conversion from 'type'.
|
||||
|
||||
Input:
|
||||
|
||||
@@ -1523,6 +1545,9 @@
|
||||
enum machine_mode cmpmode;
|
||||
gimple pattern_stmt, def_stmt;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
+ tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
|
||||
+ gimple def_stmt0 = NULL, def_stmt1 = NULL;
|
||||
+ bool promotion;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
|
||||
|
||||
if (!is_gimple_assign (last_stmt)
|
||||
@@ -1535,25 +1560,40 @@
|
||||
then_clause = TREE_OPERAND (op, 1);
|
||||
else_clause = TREE_OPERAND (op, 2);
|
||||
|
||||
- if (TREE_CODE (then_clause) != INTEGER_CST
|
||||
- || TREE_CODE (else_clause) != INTEGER_CST)
|
||||
- return NULL;
|
||||
-
|
||||
if (!COMPARISON_CLASS_P (cond_expr))
|
||||
return NULL;
|
||||
|
||||
type = gimple_expr_type (last_stmt);
|
||||
comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
|
||||
- if (!INTEGRAL_TYPE_P (comp_type)
|
||||
- || !INTEGRAL_TYPE_P (type))
|
||||
- return NULL;
|
||||
-
|
||||
comp_vectype = get_vectype_for_scalar_type (comp_type);
|
||||
if (comp_vectype == NULL_TREE)
|
||||
return NULL;
|
||||
|
||||
+ if (types_compatible_p (type, comp_type)
|
||||
+ || !INTEGRAL_TYPE_P (comp_type)
|
||||
+ || !INTEGRAL_TYPE_P (type))
|
||||
+ return NULL;
|
||||
+
|
||||
+ if ((TREE_CODE (then_clause) != INTEGER_CST
|
||||
+ && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
|
||||
+ &def_stmt0, &promotion))
|
||||
+ || (TREE_CODE (else_clause) != INTEGER_CST
|
||||
+ && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
|
||||
+ &def_stmt1, &promotion)))
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (orig_type0 && orig_type1
|
||||
+ && (!types_compatible_p (orig_type0, orig_type1)
|
||||
+ || !types_compatible_p (orig_type0, comp_type)))
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (orig_type0)
|
||||
+ then_clause = gimple_assign_rhs1 (def_stmt0);
|
||||
+
|
||||
+ if (orig_type1)
|
||||
+ else_clause = gimple_assign_rhs1 (def_stmt1);
|
||||
+
|
||||
cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
|
||||
-
|
||||
if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
|
||||
return NULL;
|
||||
|
||||
@@ -1561,18 +1601,15 @@
|
||||
if (vectype == NULL_TREE)
|
||||
return NULL;
|
||||
|
||||
- if (types_compatible_p (vectype, comp_vectype))
|
||||
- return NULL;
|
||||
-
|
||||
if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
|
||||
return NULL;
|
||||
|
||||
- if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
|
||||
- {
|
||||
- if (!int_fits_type_p (then_clause, comp_type)
|
||||
- || !int_fits_type_p (else_clause, comp_type))
|
||||
- return NULL;
|
||||
- }
|
||||
+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)
|
||||
+ && ((TREE_CODE (then_clause) == INTEGER_CST
|
||||
+ && !int_fits_type_p (then_clause, comp_type))
|
||||
+ || (TREE_CODE (else_clause) == INTEGER_CST
|
||||
+ && !int_fits_type_p (else_clause, comp_type))))
|
||||
+ return NULL;
|
||||
|
||||
tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
|
||||
fold_convert (comp_type, then_clause),
|
||||
|
||||
@ -0,0 +1,276 @@
|
||||
2011-12-05 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline -A15 tuning.
|
||||
2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
|
||||
|
||||
* config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue.
|
||||
* config/arm/arm.md (mul64): New attribute.
|
||||
(generic_sched): Cortex-A15 is not scheduled generically.
|
||||
(cortex-a15.md): Include.
|
||||
* config/arm/cortex-a15.md: New machine description.
|
||||
* config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md.
|
||||
|
||||
2011-11-30 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
|
||||
* config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-12-05 10:55:48 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-12-05 12:33:25 +0000
|
||||
@@ -24056,6 +24056,9 @@
|
||||
{
|
||||
switch (arm_tune)
|
||||
{
|
||||
+ case cortexa15:
|
||||
+ return 3;
|
||||
+
|
||||
case cortexr4:
|
||||
case cortexr4f:
|
||||
case cortexr5:
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-10-26 11:38:30 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-12-02 00:38:59 +0000
|
||||
@@ -345,6 +345,13 @@
|
||||
(const_string "mult")
|
||||
(const_string "alu")))
|
||||
|
||||
+; Is this an (integer side) multiply with a 64-bit result?
|
||||
+(define_attr "mul64" "no,yes"
|
||||
+ (if_then_else
|
||||
+ (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
|
||||
+ (const_string "yes")
|
||||
+ (const_string "no")))
|
||||
+
|
||||
; Load scheduling, set from the arm_ld_sched variable
|
||||
; initialized by arm_option_override()
|
||||
(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
|
||||
@@ -511,7 +518,7 @@
|
||||
|
||||
(define_attr "generic_sched" "yes,no"
|
||||
(const (if_then_else
|
||||
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
|
||||
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
|
||||
(eq_attr "tune_cortexr4" "yes"))
|
||||
(const_string "no")
|
||||
(const_string "yes"))))
|
||||
@@ -537,6 +544,7 @@
|
||||
(include "cortex-a5.md")
|
||||
(include "cortex-a8.md")
|
||||
(include "cortex-a9.md")
|
||||
+(include "cortex-a15.md")
|
||||
(include "cortex-r4.md")
|
||||
(include "cortex-r4f.md")
|
||||
(include "cortex-m4.md")
|
||||
|
||||
=== added file 'gcc/config/arm/cortex-a15.md'
|
||||
--- old/gcc/config/arm/cortex-a15.md 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/config/arm/cortex-a15.md 2011-12-02 00:38:59 +0000
|
||||
@@ -0,0 +1,186 @@
|
||||
+;; ARM Cortex-A15 pipeline description
|
||||
+;; Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
+;;
|
||||
+;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
|
||||
+
|
||||
+;; This file is part of GCC.
|
||||
+;;
|
||||
+;; GCC is free software; you can redistribute it and/or modify it
|
||||
+;; under the terms of the GNU General Public License as published by
|
||||
+;; the Free Software Foundation; either version 3, or (at your option)
|
||||
+;; any later version.
|
||||
+;;
|
||||
+;; GCC is distributed in the hope that it will be useful, but
|
||||
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+;; General Public License for more details.
|
||||
+;;
|
||||
+;; You should have received a copy of the GNU General Public License
|
||||
+;; along with GCC; see the file COPYING3. If not see
|
||||
+;; <http://www.gnu.org/licenses/>.
|
||||
+
|
||||
+(define_automaton "cortex_a15")
|
||||
+
|
||||
+;; The Cortex-A15 core is modelled as a triple issue pipeline that has
|
||||
+;; the following dispatch units.
|
||||
+;; 1. Two pipelines for simple integer operations: SX1, SX2
|
||||
+;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2
|
||||
+;; 3. One pipeline for branch operations: BX
|
||||
+;; 4. One pipeline for integer multiply and divide operations: MX
|
||||
+;; 5. Two pipelines for load and store operations: LS1, LS2
|
||||
+;;
|
||||
+;; We can issue into three pipelines per-cycle.
|
||||
+;;
|
||||
+;; We assume that where we have unit pairs xx1 is always filled before xx2.
|
||||
+
|
||||
+;; The three issue units
|
||||
+(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15")
|
||||
+
|
||||
+(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)")
|
||||
+(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))")
|
||||
+(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)")
|
||||
+(final_presence_set "ca15_i1" "ca15_i0")
|
||||
+(final_presence_set "ca15_i2" "ca15_i1")
|
||||
+
|
||||
+;; The main dispatch units
|
||||
+(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15")
|
||||
+(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15")
|
||||
+(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15")
|
||||
+(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15")
|
||||
+
|
||||
+(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)")
|
||||
+
|
||||
+;; The extended load-store pipeline
|
||||
+(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15")
|
||||
+
|
||||
+;; The extended ALU pipeline
|
||||
+(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15")
|
||||
+(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15")
|
||||
+
|
||||
+;; Simple Execution Unit:
|
||||
+;;
|
||||
+;; Simple ALU without shift
|
||||
+(define_insn_reservation "cortex_a15_alu" 2
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "alu")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
|
||||
+
|
||||
+;; ALU ops with immediate shift
|
||||
+(define_insn_reservation "cortex_a15_alu_shift" 3
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "alu_shift")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
|
||||
+ |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
|
||||
+
|
||||
+;; ALU ops with register controlled shift
|
||||
+(define_insn_reservation "cortex_a15_alu_shift_reg" 3
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "alu_shift_reg")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\
|
||||
+ |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\
|
||||
+ |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)")
|
||||
+
|
||||
+;; Multiply Execution Unit:
|
||||
+;;
|
||||
+;; 32-bit multiplies
|
||||
+(define_insn_reservation "cortex_a15_mult32" 3
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "mult")
|
||||
+ (and (eq_attr "neon_type" "none")
|
||||
+ (eq_attr "mul64" "no"))))
|
||||
+ "ca15_issue1,ca15_mx")
|
||||
+
|
||||
+;; 64-bit multiplies
|
||||
+(define_insn_reservation "cortex_a15_mult64" 4
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "mult")
|
||||
+ (and (eq_attr "neon_type" "none")
|
||||
+ (eq_attr "mul64" "yes"))))
|
||||
+ "ca15_issue1,ca15_mx*2")
|
||||
+
|
||||
+;; Integer divide
|
||||
+(define_insn_reservation "cortex_a15_udiv" 9
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (eq_attr "insn" "udiv"))
|
||||
+ "ca15_issue1,ca15_mx")
|
||||
+
|
||||
+(define_insn_reservation "cortex_a15_sdiv" 10
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (eq_attr "insn" "sdiv"))
|
||||
+ "ca15_issue1,ca15_mx")
|
||||
+
|
||||
+;; Block all issue pipes for a cycle
|
||||
+(define_insn_reservation "cortex_a15_block" 1
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "block")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue3")
|
||||
+
|
||||
+;; Branch execution Unit
|
||||
+;;
|
||||
+;; Branches take one issue slot.
|
||||
+;; No latency as there is no result
|
||||
+(define_insn_reservation "cortex_a15_branch" 0
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "branch")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue1,ca15_bx")
|
||||
+
|
||||
+
|
||||
+;; We lie with calls. They take up all issue slots, and form a block in the
|
||||
+;; pipeline. The result however is available the next cycle.
|
||||
+;;
|
||||
+;; Addition of new units requires this to be updated.
|
||||
+(define_insn_reservation "cortex_a15_call" 1
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "call")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue3,\
|
||||
+ ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\
|
||||
+ ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\
|
||||
+ +ca15_sx2_sat+ca15_ldr+ca15_str")
|
||||
+
|
||||
+;; Load-store execution Unit
|
||||
+;;
|
||||
+;; Loads of up to two words.
|
||||
+(define_insn_reservation "cortex_a15_load1" 4
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "load_byte,load1,load2")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue1,ca15_ls,ca15_ldr,nothing")
|
||||
+
|
||||
+;; Loads of three or four words.
|
||||
+(define_insn_reservation "cortex_a15_load3" 5
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "load3,load4")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
|
||||
+
|
||||
+;; Stores of up to two words.
|
||||
+(define_insn_reservation "cortex_a15_store1" 0
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "store1,store2")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue1,ca15_ls,ca15_str")
|
||||
+
|
||||
+;; Stores of three or four words.
|
||||
+(define_insn_reservation "cortex_a15_store3" 0
|
||||
+ (and (eq_attr "tune" "cortexa15")
|
||||
+ (and (eq_attr "type" "store3,store4")
|
||||
+ (eq_attr "neon_type" "none")))
|
||||
+ "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
|
||||
+
|
||||
+;; Simple execution unit bypasses
|
||||
+(define_bypass 1 "cortex_a15_alu"
|
||||
+ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
|
||||
+(define_bypass 2 "cortex_a15_alu_shift"
|
||||
+ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
|
||||
+(define_bypass 2 "cortex_a15_alu_shift_reg"
|
||||
+ "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
|
||||
+(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3")
|
||||
+(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3")
|
||||
+(define_bypass 2 "cortex_a15_alu_shift_reg"
|
||||
+ "cortex_a15_load1,cortex_a15_load3")
|
||||
|
||||
=== modified file 'gcc/config/arm/t-arm'
|
||||
--- old/gcc/config/arm/t-arm 2011-01-03 20:52:22 +0000
|
||||
+++ new/gcc/config/arm/t-arm 2011-12-02 00:38:59 +0000
|
||||
@@ -31,6 +31,16 @@
|
||||
$(srcdir)/config/arm/fmp626.md \
|
||||
$(srcdir)/config/arm/fa726te.md \
|
||||
$(srcdir)/config/arm/arm926ejs.md \
|
||||
+ $(srcdir)/config/arm/cortex-a15.md \
|
||||
+ $(srcdir)/config/arm/cortex-a5.md \
|
||||
+ $(srcdir)/config/arm/cortex-a8.md \
|
||||
+ $(srcdir)/config/arm/cortex-a8-neon.md \
|
||||
+ $(srcdir)/config/arm/cortex-a9.md \
|
||||
+ $(srcdir)/config/arm/cortex-a9-neon.md \
|
||||
+ $(srcdir)/config/arm/cortex-m4-fpu.md \
|
||||
+ $(srcdir)/config/arm/cortex-m4.md \
|
||||
+ $(srcdir)/config/arm/cortex-r4f.md \
|
||||
+ $(srcdir)/config/arm/cortex-r4.md \
|
||||
$(srcdir)/config/arm/cirrus.md \
|
||||
$(srcdir)/config/arm/fpa.md \
|
||||
$(srcdir)/config/arm/vec-common.md \
|
||||
|
||||
@ -0,0 +1,69 @@
|
||||
2011-12-20 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-11-29 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
PR tree-optimization/51301
|
||||
gcc/
|
||||
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that
|
||||
the last statement doesn't convert to a bigger type than the original
|
||||
type of the computation.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/pr51301.c: New test.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/pr51301.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/pr51301.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/pr51301.c 2011-11-30 17:54:51 +0000
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+
|
||||
+typedef signed char int8_t;
|
||||
+typedef signed long long int64_t;
|
||||
+int64_t
|
||||
+f0a (int8_t * __restrict__ arg1)
|
||||
+{
|
||||
+ int idx;
|
||||
+ int64_t result = 0;
|
||||
+ for (idx = 0; idx < 416; idx += 1)
|
||||
+ result += arg1[idx] << (arg1[idx] == arg1[idx]);
|
||||
+ return result;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/tree-vect-patterns.c'
|
||||
--- old/gcc/tree-vect-patterns.c 2011-11-27 12:17:31 +0000
|
||||
+++ new/gcc/tree-vect-patterns.c 2011-12-20 07:47:44 +0000
|
||||
@@ -1138,6 +1138,7 @@
|
||||
struct loop *loop = NULL;
|
||||
bb_vec_info bb_vinfo;
|
||||
stmt_vec_info stmt_vinfo;
|
||||
+ tree type = NULL;
|
||||
|
||||
stmt_vinfo = vinfo_for_stmt (stmt);
|
||||
loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
@@ -1207,6 +1208,7 @@
|
||||
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
|
||||
}
|
||||
|
||||
+ type = gimple_expr_type (stmt);
|
||||
prev_stmt = stmt;
|
||||
stmt = use_stmt;
|
||||
|
||||
@@ -1222,9 +1224,11 @@
|
||||
{
|
||||
use_lhs = gimple_assign_lhs (use_stmt);
|
||||
use_type = TREE_TYPE (use_lhs);
|
||||
- /* Support only type promotion or signedess change. */
|
||||
+ /* Support only type promotion or signedess change. Check that USE_TYPE
|
||||
+ is not bigger than the original type. */
|
||||
if (!INTEGRAL_TYPE_P (use_type)
|
||||
- || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
|
||||
+ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)
|
||||
+ || TYPE_PRECISION (type) < TYPE_PRECISION (use_type))
|
||||
return NULL;
|
||||
|
||||
if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
|
||||
|
||||
@ -0,0 +1,354 @@
|
||||
2011-12-30 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-10-12 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* expr.h (copy_blkmode_to_reg): Declare.
|
||||
* expr.c (copy_blkmode_to_reg): New function.
|
||||
(expand_assignment): Don't expand register RESULT_DECLs before
|
||||
the lhs. Use copy_blkmode_to_reg to copy BLKmode values into a
|
||||
RESULT_DECL register.
|
||||
(expand_expr_real_1): Handle BLKmode decls when looking for promotion.
|
||||
* stmt.c (expand_return): Move BLKmode-to-register code into
|
||||
copy_blkmode_to_reg.
|
||||
|
||||
=== modified file 'gcc/expr.c'
|
||||
--- old/gcc/expr.c 2011-10-23 13:33:07 +0000
|
||||
+++ new/gcc/expr.c 2011-12-30 09:41:30 +0000
|
||||
@@ -2180,6 +2180,111 @@
|
||||
return tgtblk;
|
||||
}
|
||||
|
||||
+/* Copy BLKmode value SRC into a register of mode MODE. Return the
|
||||
+ register if it contains any data, otherwise return null.
|
||||
+
|
||||
+ This is used on targets that return BLKmode values in registers. */
|
||||
+
|
||||
+rtx
|
||||
+copy_blkmode_to_reg (enum machine_mode mode, tree src)
|
||||
+{
|
||||
+ int i, n_regs;
|
||||
+ unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
|
||||
+ unsigned int bitsize;
|
||||
+ rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
|
||||
+ enum machine_mode dst_mode;
|
||||
+
|
||||
+ gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode);
|
||||
+
|
||||
+ x = expand_normal (src);
|
||||
+
|
||||
+ bytes = int_size_in_bytes (TREE_TYPE (src));
|
||||
+ if (bytes == 0)
|
||||
+ return NULL_RTX;
|
||||
+
|
||||
+ /* If the structure doesn't take up a whole number of words, see
|
||||
+ whether the register value should be padded on the left or on
|
||||
+ the right. Set PADDING_CORRECTION to the number of padding
|
||||
+ bits needed on the left side.
|
||||
+
|
||||
+ In most ABIs, the structure will be returned at the least end of
|
||||
+ the register, which translates to right padding on little-endian
|
||||
+ targets and left padding on big-endian targets. The opposite
|
||||
+ holds if the structure is returned at the most significant
|
||||
+ end of the register. */
|
||||
+ if (bytes % UNITS_PER_WORD != 0
|
||||
+ && (targetm.calls.return_in_msb (TREE_TYPE (src))
|
||||
+ ? !BYTES_BIG_ENDIAN
|
||||
+ : BYTES_BIG_ENDIAN))
|
||||
+ padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
|
||||
+ * BITS_PER_UNIT));
|
||||
+
|
||||
+ n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
||||
+ dst_words = XALLOCAVEC (rtx, n_regs);
|
||||
+ bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
|
||||
+
|
||||
+ /* Copy the structure BITSIZE bits at a time. */
|
||||
+ for (bitpos = 0, xbitpos = padding_correction;
|
||||
+ bitpos < bytes * BITS_PER_UNIT;
|
||||
+ bitpos += bitsize, xbitpos += bitsize)
|
||||
+ {
|
||||
+ /* We need a new destination pseudo each time xbitpos is
|
||||
+ on a word boundary and when xbitpos == padding_correction
|
||||
+ (the first time through). */
|
||||
+ if (xbitpos % BITS_PER_WORD == 0
|
||||
+ || xbitpos == padding_correction)
|
||||
+ {
|
||||
+ /* Generate an appropriate register. */
|
||||
+ dst_word = gen_reg_rtx (word_mode);
|
||||
+ dst_words[xbitpos / BITS_PER_WORD] = dst_word;
|
||||
+
|
||||
+ /* Clear the destination before we move anything into it. */
|
||||
+ emit_move_insn (dst_word, CONST0_RTX (word_mode));
|
||||
+ }
|
||||
+
|
||||
+ /* We need a new source operand each time bitpos is on a word
|
||||
+ boundary. */
|
||||
+ if (bitpos % BITS_PER_WORD == 0)
|
||||
+ src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode);
|
||||
+
|
||||
+ /* Use bitpos for the source extraction (left justified) and
|
||||
+ xbitpos for the destination store (right justified). */
|
||||
+ store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD, word_mode,
|
||||
+ extract_bit_field (src_word, bitsize,
|
||||
+ bitpos % BITS_PER_WORD, 1, false,
|
||||
+ NULL_RTX, word_mode, word_mode));
|
||||
+ }
|
||||
+
|
||||
+ if (mode == BLKmode)
|
||||
+ {
|
||||
+ /* Find the smallest integer mode large enough to hold the
|
||||
+ entire structure. */
|
||||
+ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
|
||||
+ mode != VOIDmode;
|
||||
+ mode = GET_MODE_WIDER_MODE (mode))
|
||||
+ /* Have we found a large enough mode? */
|
||||
+ if (GET_MODE_SIZE (mode) >= bytes)
|
||||
+ break;
|
||||
+
|
||||
+ /* A suitable mode should have been found. */
|
||||
+ gcc_assert (mode != VOIDmode);
|
||||
+ }
|
||||
+
|
||||
+ if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode))
|
||||
+ dst_mode = word_mode;
|
||||
+ else
|
||||
+ dst_mode = mode;
|
||||
+ dst = gen_reg_rtx (dst_mode);
|
||||
+
|
||||
+ for (i = 0; i < n_regs; i++)
|
||||
+ emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]);
|
||||
+
|
||||
+ if (mode != dst_mode)
|
||||
+ dst = gen_lowpart (mode, dst);
|
||||
+
|
||||
+ return dst;
|
||||
+}
|
||||
+
|
||||
/* Add a USE expression for REG to the (possibly empty) list pointed
|
||||
to by CALL_FUSAGE. REG must denote a hard register. */
|
||||
|
||||
@@ -4382,7 +4487,9 @@
|
||||
if (TREE_CODE (from) == CALL_EXPR && ! aggregate_value_p (from, from)
|
||||
&& COMPLETE_TYPE_P (TREE_TYPE (from))
|
||||
&& TREE_CODE (TYPE_SIZE (TREE_TYPE (from))) == INTEGER_CST
|
||||
- && ! (((TREE_CODE (to) == VAR_DECL || TREE_CODE (to) == PARM_DECL)
|
||||
+ && ! (((TREE_CODE (to) == VAR_DECL
|
||||
+ || TREE_CODE (to) == PARM_DECL
|
||||
+ || TREE_CODE (to) == RESULT_DECL)
|
||||
&& REG_P (DECL_RTL (to)))
|
||||
|| TREE_CODE (to) == SSA_NAME))
|
||||
{
|
||||
@@ -4428,12 +4535,15 @@
|
||||
rtx temp;
|
||||
|
||||
push_temp_slots ();
|
||||
- temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
|
||||
+ if (REG_P (to_rtx) && TYPE_MODE (TREE_TYPE (from)) == BLKmode)
|
||||
+ temp = copy_blkmode_to_reg (GET_MODE (to_rtx), from);
|
||||
+ else
|
||||
+ temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
|
||||
|
||||
if (GET_CODE (to_rtx) == PARALLEL)
|
||||
emit_group_load (to_rtx, temp, TREE_TYPE (from),
|
||||
int_size_in_bytes (TREE_TYPE (from)));
|
||||
- else
|
||||
+ else if (temp)
|
||||
emit_move_insn (to_rtx, temp);
|
||||
|
||||
preserve_temp_slots (to_rtx);
|
||||
@@ -8567,10 +8677,15 @@
|
||||
return temp;
|
||||
}
|
||||
|
||||
- /* If the mode of DECL_RTL does not match that of the decl, it
|
||||
- must be a promoted value. We return a SUBREG of the wanted mode,
|
||||
- but mark it so that we know that it was already extended. */
|
||||
- if (REG_P (decl_rtl) && GET_MODE (decl_rtl) != DECL_MODE (exp))
|
||||
+ /* If the mode of DECL_RTL does not match that of the decl,
|
||||
+ there are two cases: we are dealing with a BLKmode value
|
||||
+ that is returned in a register, or we are dealing with
|
||||
+ a promoted value. In the latter case, return a SUBREG
|
||||
+ of the wanted mode, but mark it so that we know that it
|
||||
+ was already extended. */
|
||||
+ if (REG_P (decl_rtl)
|
||||
+ && DECL_MODE (exp) != BLKmode
|
||||
+ && GET_MODE (decl_rtl) != DECL_MODE (exp))
|
||||
{
|
||||
enum machine_mode pmode;
|
||||
|
||||
|
||||
=== modified file 'gcc/expr.h'
|
||||
--- old/gcc/expr.h 2011-03-03 21:56:58 +0000
|
||||
+++ new/gcc/expr.h 2011-10-12 08:01:43 +0000
|
||||
@@ -324,6 +324,8 @@
|
||||
/* Copy BLKmode object from a set of registers. */
|
||||
extern rtx copy_blkmode_from_reg (rtx, rtx, tree);
|
||||
|
||||
+extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
|
||||
+
|
||||
/* Mark REG as holding a parameter for the next CALL_INSN. */
|
||||
extern void use_reg (rtx *, rtx);
|
||||
|
||||
|
||||
=== modified file 'gcc/stmt.c'
|
||||
--- old/gcc/stmt.c 2011-03-03 21:56:58 +0000
|
||||
+++ new/gcc/stmt.c 2011-10-12 08:01:43 +0000
|
||||
@@ -1684,119 +1684,21 @@
|
||||
expand_value_return (result_rtl);
|
||||
|
||||
/* If the result is an aggregate that is being returned in one (or more)
|
||||
- registers, load the registers here. The compiler currently can't handle
|
||||
- copying a BLKmode value into registers. We could put this code in a
|
||||
- more general area (for use by everyone instead of just function
|
||||
- call/return), but until this feature is generally usable it is kept here
|
||||
- (and in expand_call). */
|
||||
+ registers, load the registers here. */
|
||||
|
||||
else if (retval_rhs != 0
|
||||
&& TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode
|
||||
&& REG_P (result_rtl))
|
||||
{
|
||||
- int i;
|
||||
- unsigned HOST_WIDE_INT bitpos, xbitpos;
|
||||
- unsigned HOST_WIDE_INT padding_correction = 0;
|
||||
- unsigned HOST_WIDE_INT bytes
|
||||
- = int_size_in_bytes (TREE_TYPE (retval_rhs));
|
||||
- int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
||||
- unsigned int bitsize
|
||||
- = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD);
|
||||
- rtx *result_pseudos = XALLOCAVEC (rtx, n_regs);
|
||||
- rtx result_reg, src = NULL_RTX, dst = NULL_RTX;
|
||||
- rtx result_val = expand_normal (retval_rhs);
|
||||
- enum machine_mode tmpmode, result_reg_mode;
|
||||
-
|
||||
- if (bytes == 0)
|
||||
- {
|
||||
- expand_null_return ();
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- /* If the structure doesn't take up a whole number of words, see
|
||||
- whether the register value should be padded on the left or on
|
||||
- the right. Set PADDING_CORRECTION to the number of padding
|
||||
- bits needed on the left side.
|
||||
-
|
||||
- In most ABIs, the structure will be returned at the least end of
|
||||
- the register, which translates to right padding on little-endian
|
||||
- targets and left padding on big-endian targets. The opposite
|
||||
- holds if the structure is returned at the most significant
|
||||
- end of the register. */
|
||||
- if (bytes % UNITS_PER_WORD != 0
|
||||
- && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs))
|
||||
- ? !BYTES_BIG_ENDIAN
|
||||
- : BYTES_BIG_ENDIAN))
|
||||
- padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
|
||||
- * BITS_PER_UNIT));
|
||||
-
|
||||
- /* Copy the structure BITSIZE bits at a time. */
|
||||
- for (bitpos = 0, xbitpos = padding_correction;
|
||||
- bitpos < bytes * BITS_PER_UNIT;
|
||||
- bitpos += bitsize, xbitpos += bitsize)
|
||||
- {
|
||||
- /* We need a new destination pseudo each time xbitpos is
|
||||
- on a word boundary and when xbitpos == padding_correction
|
||||
- (the first time through). */
|
||||
- if (xbitpos % BITS_PER_WORD == 0
|
||||
- || xbitpos == padding_correction)
|
||||
- {
|
||||
- /* Generate an appropriate register. */
|
||||
- dst = gen_reg_rtx (word_mode);
|
||||
- result_pseudos[xbitpos / BITS_PER_WORD] = dst;
|
||||
-
|
||||
- /* Clear the destination before we move anything into it. */
|
||||
- emit_move_insn (dst, CONST0_RTX (GET_MODE (dst)));
|
||||
- }
|
||||
-
|
||||
- /* We need a new source operand each time bitpos is on a word
|
||||
- boundary. */
|
||||
- if (bitpos % BITS_PER_WORD == 0)
|
||||
- src = operand_subword_force (result_val,
|
||||
- bitpos / BITS_PER_WORD,
|
||||
- BLKmode);
|
||||
-
|
||||
- /* Use bitpos for the source extraction (left justified) and
|
||||
- xbitpos for the destination store (right justified). */
|
||||
- store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, word_mode,
|
||||
- extract_bit_field (src, bitsize,
|
||||
- bitpos % BITS_PER_WORD, 1, false,
|
||||
- NULL_RTX, word_mode, word_mode));
|
||||
- }
|
||||
-
|
||||
- tmpmode = GET_MODE (result_rtl);
|
||||
- if (tmpmode == BLKmode)
|
||||
- {
|
||||
- /* Find the smallest integer mode large enough to hold the
|
||||
- entire structure and use that mode instead of BLKmode
|
||||
- on the USE insn for the return register. */
|
||||
- for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
|
||||
- tmpmode != VOIDmode;
|
||||
- tmpmode = GET_MODE_WIDER_MODE (tmpmode))
|
||||
- /* Have we found a large enough mode? */
|
||||
- if (GET_MODE_SIZE (tmpmode) >= bytes)
|
||||
- break;
|
||||
-
|
||||
- /* A suitable mode should have been found. */
|
||||
- gcc_assert (tmpmode != VOIDmode);
|
||||
-
|
||||
- PUT_MODE (result_rtl, tmpmode);
|
||||
- }
|
||||
-
|
||||
- if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode))
|
||||
- result_reg_mode = word_mode;
|
||||
+ val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs);
|
||||
+ if (val)
|
||||
+ {
|
||||
+ /* Use the mode of the result value on the return register. */
|
||||
+ PUT_MODE (result_rtl, GET_MODE (val));
|
||||
+ expand_value_return (val);
|
||||
+ }
|
||||
else
|
||||
- result_reg_mode = tmpmode;
|
||||
- result_reg = gen_reg_rtx (result_reg_mode);
|
||||
-
|
||||
- for (i = 0; i < n_regs; i++)
|
||||
- emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode),
|
||||
- result_pseudos[i]);
|
||||
-
|
||||
- if (tmpmode != result_reg_mode)
|
||||
- result_reg = gen_lowpart (tmpmode, result_reg);
|
||||
-
|
||||
- expand_value_return (result_reg);
|
||||
+ expand_null_return ();
|
||||
}
|
||||
else if (retval_rhs != 0
|
||||
&& !VOID_TYPE_P (TREE_TYPE (retval_rhs))
|
||||
|
||||
=== added file 'gcc/testsuite/g++.dg/pr48660.C'
|
||||
--- old/gcc/testsuite/g++.dg/pr48660.C 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/g++.dg/pr48660.C 2011-10-12 08:01:43 +0000
|
||||
@@ -0,0 +1,22 @@
|
||||
+template<int N> struct val { char a[N]; };
|
||||
+
|
||||
+class Base
|
||||
+{
|
||||
+public:
|
||||
+ virtual val<1> get1() const = 0;
|
||||
+ virtual val<2> get2() const = 0;
|
||||
+ virtual val<3> get3() const = 0;
|
||||
+ virtual val<4> get4() const = 0;
|
||||
+};
|
||||
+
|
||||
+class Derived : public virtual Base
|
||||
+{
|
||||
+public:
|
||||
+ virtual val<1> get1() const { return foo->get1(); }
|
||||
+ virtual val<2> get2() const { return foo->get2(); }
|
||||
+ virtual val<3> get3() const { return foo->get3(); }
|
||||
+ virtual val<4> get4() const { return foo->get4(); }
|
||||
+ Base *foo;
|
||||
+};
|
||||
+
|
||||
+Base* make() { return new Derived; }
|
||||
|
||||
@ -0,0 +1,22 @@
|
||||
2012-01-05 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
Backport from mainline r182271:
|
||||
|
||||
2011-12-13 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
gcc/
|
||||
* modulo-sched.c (mark_loop_unsched): Free bbs.
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-10-30 05:31:00 +0000
|
||||
+++ new/gcc/modulo-sched.c 2012-01-05 02:45:23 +0000
|
||||
@@ -1204,6 +1204,8 @@
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
bbs[i]->flags |= BB_DISABLE_SCHEDULE;
|
||||
+
|
||||
+ free (bbs);
|
||||
}
|
||||
|
||||
/* Return true if all the BBs of the loop are empty except the
|
||||
|
||||
@ -74,4 +74,13 @@ file://linaro/gcc-4.6-linaro-r106836.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106839.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106840.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106841.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106842.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106843.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106844.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106845.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106846.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106848.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106853.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106854.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106855.patch \
|
||||
"
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# this will prepend this layer to FILESPATH
|
||||
FILESEXTRAPATHS := "${THISDIR}/gcc-4.6"
|
||||
PRINC = "4"
|
||||
PRINC = "5"
|
||||
ARM_INSTRUCTION_SET = "arm"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user