Add bbappend files for gcc 4.6.0 from oe-core

The bbappends add patches for linaro so with this
we can enhance gcc 4.6.0 from oe-core and have patches
that are in meta-oe on top of it.

Add patches from for linaro 4.6

This obsoletes the need of having gcc 4.6
recipes in meta-oe

Signed-off-by: Khem Raj <raj.khem@gmail.com>
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
This commit is contained in:
Khem Raj 2011-06-12 21:06:58 -07:00 committed by Koen Kooi
parent eac31f4e3a
commit 8052751332
31 changed files with 14300 additions and 1 deletions

View File

@ -0,0 +1,51 @@
2011-02-21 Andrew Stubbs <ams@codesourcery.com>
Julian Brown <julian@codesourcery.com>
Mark Shinwell <shinwell@codesourcery.com>
Forward-ported from Linaro GCC 4.5 (bzr99324).
gcc/
* config/arm/arm.h (arm_class_likely_spilled_p): Check against
LO_REGS only for Thumb-1.
(MODE_BASE_REG_CLASS): Restrict base registers to those which can
be used in short instructions when optimising for size on Thumb-2.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-01-29 03:20:57 +0000
+++ new/gcc/config/arm/arm.c 2011-02-21 14:04:51 +0000
@@ -22304,14 +22304,16 @@
/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
- We need to define this for LO_REGS on thumb. Otherwise we can end up
- using r0-r4 for function arguments, r7 for the stack frame and don't
- have enough left over to do doubleword arithmetic. */
-
+ We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
+ using r0-r4 for function arguments, r7 for the stack frame and don't have
+ enough left over to do doubleword arithmetic. For Thumb-2 all the
+ potentially problematic instructions accept high registers so this is not
+ necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
+ that require many low registers. */
static bool
arm_class_likely_spilled_p (reg_class_t rclass)
{
- if ((TARGET_THUMB && rclass == LO_REGS)
+ if ((TARGET_THUMB1 && rclass == LO_REGS)
|| rclass == CC_REG)
return true;
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-01-29 03:20:57 +0000
+++ new/gcc/config/arm/arm.h 2011-02-21 14:04:51 +0000
@@ -1185,7 +1185,7 @@
when addressing quantities in QI or HI mode; if we don't know the
mode, then we must be conservative. */
#define MODE_BASE_REG_CLASS(MODE) \
- (TARGET_32BIT ? CORE_REGS : \
+ (TARGET_ARM || (TARGET_THUMB2 && !optimize_size) ? CORE_REGS : \
(((MODE) == SImode) ? BASE_REGS : LO_REGS))
/* For Thumb we can not support SP+reg addressing, so we return LO_REGS

View File

@ -0,0 +1,63 @@
2011-02-02 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
PR target/47551
* config/arm/arm.c (coproc_secondary_reload_class): Handle
structure modes. Don't check neon_vector_mem_operand for
vector or structure modes.
gcc/testsuite/
PR target/47551
* gcc.target/arm/neon-modes-2.c: New test.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-02-21 14:04:51 +0000
+++ new/gcc/config/arm/arm.c 2011-03-02 11:38:43 +0000
@@ -9139,11 +9139,14 @@
return GENERAL_REGS;
}
+ /* The neon move patterns handle all legitimate vector and struct
+ addresses. */
if (TARGET_NEON
+ && MEM_P (x)
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
- && neon_vector_mem_operand (x, 0))
- return NO_REGS;
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || VALID_NEON_STRUCT_MODE (mode)))
+ return NO_REGS;
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
return NO_REGS;
=== added file 'gcc/testsuite/gcc.target/arm/neon-modes-2.c'
--- old/gcc/testsuite/gcc.target/arm/neon-modes-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-modes-2.c 2011-02-02 10:02:45 +0000
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+#define SETUP(A) x##A = vld3_u32 (ptr + A * 0x20)
+#define MODIFY(A) x##A = vld3_lane_u32 (ptr + A * 0x20 + 0x10, x##A, 1)
+#define STORE(A) vst3_u32 (ptr + A * 0x20, x##A)
+
+#define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
+
+void
+bar (uint32_t *ptr, int y)
+{
+ uint32x2x3_t MANY (SETUP);
+ int *x = __builtin_alloca (y);
+ int z[0x1000];
+ foo (x, z);
+ MANY (MODIFY);
+ foo (x, z);
+ MANY (STORE);
+}

View File

@ -0,0 +1,32 @@
2011-03-22 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-03-21 Daniel Jacobowitz <dan@codesourcery.com>
gcc/
* config/arm/unwind-arm.c (__gnu_unwind_pr_common): Correct test
for barrier handlers.
=== modified file 'gcc/config/arm/unwind-arm.c'
--- old/gcc/config/arm/unwind-arm.c 2009-10-30 14:55:10 +0000
+++ new/gcc/config/arm/unwind-arm.c 2011-03-22 10:59:10 +0000
@@ -1196,8 +1196,6 @@
ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1];
if (data[0] & uint32_highbit)
- phase2_call_unexpected_after_unwind = 1;
- else
{
data += rtti_count + 1;
/* Setup for entry to the handler. */
@@ -1207,6 +1205,8 @@
_Unwind_SetGR (context, 0, (_uw) ucbp);
return _URC_INSTALL_CONTEXT;
}
+ else
+ phase2_call_unexpected_after_unwind = 1;
}
if (data[0] & uint32_highbit)
data++;

View File

@ -0,0 +1,653 @@
2011-03-27 Ira Rosen <ira.rosen@linaro.org>
gcc/
* doc/invoke.texi (max-stores-to-sink): Document.
* params.h (MAX_STORES_TO_SINK): Define.
* opts.c (finish_options): Set MAX_STORES_TO_SINK to 0
if either vectorization or if-conversion is disabled.
* tree-data-ref.c (dr_equal_offsets_p1): Moved and renamed from
tree-vect-data-refs.c vect_equal_offsets.
(dr_equal_offsets_p): New function.
(find_data_references_in_bb): Remove static.
* tree-data-ref.h (find_data_references_in_bb): Declare.
(dr_equal_offsets_p): Likewise.
* tree-vect-data-refs.c (vect_equal_offsets): Move to tree-data-ref.c.
(vect_drs_dependent_in_basic_block): Update calls to
vect_equal_offsets.
(vect_check_interleaving): Likewise.
* tree-ssa-phiopt.c: Include cfgloop.h and tree-data-ref.h.
(cond_if_else_store_replacement): Rename to...
(cond_if_else_store_replacement_1): ... this. Change arguments and
documentation.
(cond_if_else_store_replacement): New function.
* Makefile.in (tree-ssa-phiopt.o): Adjust dependencies.
* params.def (PARAM_MAX_STORES_TO_SINK): Define.
gcc/testsuite/
* gcc.dg/vect/vect-cselim-1.c: New test.
* gcc.dg/vect/vect-cselim-2.c: New test.
=== modified file 'gcc/Makefile.in'
--- old/gcc/Makefile.in 2011-03-26 09:20:34 +0000
+++ new/gcc/Makefile.in 2011-04-18 11:31:29 +0000
@@ -2422,7 +2422,8 @@
tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \
- $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h
+ $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \
+ $(TREE_DATA_REF_H)
tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-03-29 14:24:42 +0000
+++ new/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000
@@ -8909,6 +8909,11 @@
The maximum number of namespaces to consult for suggestions when C++
name lookup fails for an identifier. The default is 1000.
+@item max-stores-to-sink
+The maximum number of conditional stores paires that can be sunk. Set to 0
+if either vectorization (@option{-ftree-vectorize}) or if-conversion
+(@option{-ftree-loop-if-convert}) is disabled. The default is 2.
+
@end table
@end table
=== modified file 'gcc/opts.c'
--- old/gcc/opts.c 2011-02-17 22:51:57 +0000
+++ new/gcc/opts.c 2011-03-27 09:38:18 +0000
@@ -823,6 +823,12 @@
opts->x_flag_split_stack = 0;
}
}
+
+ /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
+ is disabled. */
+ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert)
+ maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0,
+ opts->x_param_values, opts_set->x_param_values);
}
#define LEFT_COLUMN 27
=== modified file 'gcc/params.def'
--- old/gcc/params.def 2011-03-26 09:20:34 +0000
+++ new/gcc/params.def 2011-04-18 11:31:29 +0000
@@ -883,6 +883,13 @@
"name lookup fails",
1000, 0, 0)
+/* Maximum number of conditional store pairs that can be sunk. */
+DEFPARAM (PARAM_MAX_STORES_TO_SINK,
+ "max-stores-to-sink",
+ "Maximum number of conditional store pairs that can be sunk",
+ 2, 0, 0)
+
+
/*
Local variables:
mode:c
=== modified file 'gcc/params.h'
--- old/gcc/params.h 2011-01-13 13:41:03 +0000
+++ new/gcc/params.h 2011-03-27 09:38:18 +0000
@@ -206,4 +206,6 @@
PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO)
#define MIN_NONDEBUG_INSN_UID \
PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
+#define MAX_STORES_TO_SINK \
+ PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
#endif /* ! GCC_PARAMS_H */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 2011-03-27 09:38:18 +0000
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+ short a;
+ short b;
+} data;
+
+data in1[N], in2[N], out[N];
+short result[N*2] = {7,-7,9,-6,11,-5,13,-4,15,-3,17,-2,19,-1,21,0,23,1,25,2,27,3,29,4,31,5,33,6,35,7,37,8,39,9,41,10,43,11,45,12,47,13,49,14,51,15,53,16,55,17,57,18,59,19,61,20,63,21,65,22,67,23,69,24,71,25,73,26,75,27,77,28,79,29,81,30,83,31,85,32,87,33,89,34,91,35,93,36,95,37,97,38,99,39,101,40,103,41,105,42};
+short out1[N], out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int i;
+ short c, d;
+
+ /* Vectorizable with conditional store sinking. */
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i].b;
+ d = in2[i].b;
+
+ if (c >= d)
+ {
+ out[i].b = c;
+ out[i].a = d + 5;
+ }
+ else
+ {
+ out[i].b = d - 12;
+ out[i].a = c + d;
+ }
+ }
+
+ /* Not vectorizable. */
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i].b;
+ d = in2[i].b;
+
+ if (c >= d)
+ {
+ out1[i] = c;
+ }
+ else
+ {
+ out2[i] = c + d;
+ }
+ }
+}
+
+int
+main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in1[i].a = i;
+ in1[i].b = i + 2;
+ in2[i].a = 5;
+ in2[i].b = i + 5;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-2.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 2011-03-27 09:38:18 +0000
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+int a[N], b[N], in1[N], in2[N];
+int result[2*N] = {5,-7,7,-6,9,-5,11,-4,13,-3,15,-2,17,-1,19,0,21,1,23,2,25,3,27,4,29,5,31,6,33,7,35,8,37,9,39,10,41,11,43,12,45,13,47,14,49,15,51,16,53,17,55,18,57,19,59,20,61,21,63,22,65,23,67,24,69,25,71,26,73,27,75,28,77,29,79,30,81,31,83,32,85,33,87,34,89,35,91,36,93,37,95,38,97,39,99,40,101,41,103,42};
+
+__attribute__ ((noinline)) void
+foo (int *pa, int *pb)
+{
+ int i;
+ int c, d;
+
+ /* Store sinking should not work here since the pointers may alias. */
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i];
+ d = in2[i];
+
+ if (c >= d)
+ {
+ *pa = c;
+ *pb = d + 5;
+ }
+ else
+ {
+ *pb = d - 12;
+ *pa = c + d;
+ }
+
+ pa++;
+ pb++;
+ }
+}
+
+int
+main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in1[i] = i;
+ in2[i] = i + 5;
+ __asm__ volatile ("");
+ }
+
+ foo (a, b);
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != result[2*i] || b[i] != result[2*i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/tree-data-ref.c'
--- old/gcc/tree-data-ref.c 2011-02-05 01:39:20 +0000
+++ new/gcc/tree-data-ref.c 2011-03-27 09:38:18 +0000
@@ -991,6 +991,48 @@
return dr;
}
+/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
+ expressions. */
+static bool
+dr_equal_offsets_p1 (tree offset1, tree offset2)
+{
+ bool res;
+
+ STRIP_NOPS (offset1);
+ STRIP_NOPS (offset2);
+
+ if (offset1 == offset2)
+ return true;
+
+ if (TREE_CODE (offset1) != TREE_CODE (offset2)
+ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
+ return false;
+
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
+ TREE_OPERAND (offset2, 0));
+
+ if (!res || !BINARY_CLASS_P (offset1))
+ return res;
+
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
+ TREE_OPERAND (offset2, 1));
+
+ return res;
+}
+
+/* Check if DRA and DRB have equal offsets. */
+bool
+dr_equal_offsets_p (struct data_reference *dra,
+ struct data_reference *drb)
+{
+ tree offset1, offset2;
+
+ offset1 = DR_OFFSET (dra);
+ offset2 = DR_OFFSET (drb);
+
+ return dr_equal_offsets_p1 (offset1, offset2);
+}
+
/* Returns true if FNA == FNB. */
static bool
@@ -4294,7 +4336,7 @@
DATAREFS. Returns chrec_dont_know when failing to analyze a
difficult case, returns NULL_TREE otherwise. */
-static tree
+tree
find_data_references_in_bb (struct loop *loop, basic_block bb,
VEC (data_reference_p, heap) **datarefs)
{
=== modified file 'gcc/tree-data-ref.h'
--- old/gcc/tree-data-ref.h 2011-01-25 21:24:23 +0000
+++ new/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
@@ -426,10 +426,14 @@
extern void compute_all_dependences (VEC (data_reference_p, heap) *,
VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
bool);
+extern tree find_data_references_in_bb (struct loop *, basic_block,
+ VEC (data_reference_p, heap) **);
extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
extern bool dr_may_alias_p (const struct data_reference *,
const struct data_reference *);
+extern bool dr_equal_offsets_p (struct data_reference *,
+ struct data_reference *);
/* Return true when the base objects of data references A and B are
=== modified file 'gcc/tree-ssa-phiopt.c'
--- old/gcc/tree-ssa-phiopt.c 2010-11-03 15:18:50 +0000
+++ new/gcc/tree-ssa-phiopt.c 2011-03-27 09:38:18 +0000
@@ -34,6 +34,8 @@
#include "langhooks.h"
#include "pointer-set.h"
#include "domwalk.h"
+#include "cfgloop.h"
+#include "tree-data-ref.h"
static unsigned int tree_ssa_phiopt (void);
static unsigned int tree_ssa_phiopt_worker (bool);
@@ -1292,35 +1294,18 @@
return true;
}
-/* Do the main work of conditional store replacement. We already know
- that the recognized pattern looks like so:
-
- split:
- if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
- THEN_BB:
- X = Y;
- goto JOIN_BB;
- ELSE_BB:
- X = Z;
- fallthrough (edge E0)
- JOIN_BB:
- some more
-
- We check that THEN_BB and ELSE_BB contain only one store
- that the stores have a "simple" RHS. */
+/* Do the main work of conditional store replacement. */
static bool
-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
- basic_block join_bb)
+cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
+ basic_block join_bb, gimple then_assign,
+ gimple else_assign)
{
- gimple then_assign = last_and_only_stmt (then_bb);
- gimple else_assign = last_and_only_stmt (else_bb);
tree lhs_base, lhs, then_rhs, else_rhs;
source_location then_locus, else_locus;
gimple_stmt_iterator gsi;
gimple newphi, new_stmt;
- /* Check if then_bb and else_bb contain only one store each. */
if (then_assign == NULL
|| !gimple_assign_single_p (then_assign)
|| else_assign == NULL
@@ -1385,6 +1370,190 @@
return true;
}
+/* Conditional store replacement. We already know
+ that the recognized pattern looks like so:
+
+ split:
+ if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
+ THEN_BB:
+ ...
+ X = Y;
+ ...
+ goto JOIN_BB;
+ ELSE_BB:
+ ...
+ X = Z;
+ ...
+ fallthrough (edge E0)
+ JOIN_BB:
+ some more
+
+ We check that it is safe to sink the store to JOIN_BB by verifying that
+ there are no read-after-write or write-after-write dependencies in
+ THEN_BB and ELSE_BB. */
+
+static bool
+cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
+ basic_block join_bb)
+{
+ gimple then_assign = last_and_only_stmt (then_bb);
+ gimple else_assign = last_and_only_stmt (else_bb);
+ VEC (data_reference_p, heap) *then_datarefs, *else_datarefs;
+ VEC (ddr_p, heap) *then_ddrs, *else_ddrs;
+ gimple then_store, else_store;
+ bool found, ok = false, res;
+ struct data_dependence_relation *ddr;
+ data_reference_p then_dr, else_dr;
+ int i, j;
+ tree then_lhs, else_lhs;
+ VEC (gimple, heap) *then_stores, *else_stores;
+ basic_block blocks[3];
+
+ if (MAX_STORES_TO_SINK == 0)
+ return false;
+
+ /* Handle the case with single statement in THEN_BB and ELSE_BB. */
+ if (then_assign && else_assign)
+ return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
+ then_assign, else_assign);
+
+ /* Find data references. */
+ then_datarefs = VEC_alloc (data_reference_p, heap, 1);
+ else_datarefs = VEC_alloc (data_reference_p, heap, 1);
+ if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
+ == chrec_dont_know)
+ || !VEC_length (data_reference_p, then_datarefs)
+ || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
+ == chrec_dont_know)
+ || !VEC_length (data_reference_p, else_datarefs))
+ {
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ return false;
+ }
+
+ /* Find pairs of stores with equal LHS. */
+ then_stores = VEC_alloc (gimple, heap, 1);
+ else_stores = VEC_alloc (gimple, heap, 1);
+ FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr)
+ {
+ if (DR_IS_READ (then_dr))
+ continue;
+
+ then_store = DR_STMT (then_dr);
+ then_lhs = gimple_assign_lhs (then_store);
+ found = false;
+
+ FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr)
+ {
+ if (DR_IS_READ (else_dr))
+ continue;
+
+ else_store = DR_STMT (else_dr);
+ else_lhs = gimple_assign_lhs (else_store);
+
+ if (operand_equal_p (then_lhs, else_lhs, 0))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ continue;
+
+ VEC_safe_push (gimple, heap, then_stores, then_store);
+ VEC_safe_push (gimple, heap, else_stores, else_store);
+ }
+
+ /* No pairs of stores found. */
+ if (!VEC_length (gimple, then_stores)
+ || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK)
+ {
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+ return false;
+ }
+
+ /* Compute and check data dependencies in both basic blocks. */
+ then_ddrs = VEC_alloc (ddr_p, heap, 1);
+ else_ddrs = VEC_alloc (ddr_p, heap, 1);
+ compute_all_dependences (then_datarefs, &then_ddrs, NULL, false);
+ compute_all_dependences (else_datarefs, &else_ddrs, NULL, false);
+ blocks[0] = then_bb;
+ blocks[1] = else_bb;
+ blocks[2] = join_bb;
+ renumber_gimple_stmt_uids_in_blocks (blocks, 3);
+
+ /* Check that there are no read-after-write or write-after-write dependencies
+ in THEN_BB. */
+ FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr)
+ {
+ struct data_reference *dra = DDR_A (ddr);
+ struct data_reference *drb = DDR_B (ddr);
+
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
+ {
+ free_dependence_relations (then_ddrs);
+ free_dependence_relations (else_ddrs);
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+ return false;
+ }
+ }
+
+ /* Check that there are no read-after-write or write-after-write dependencies
+ in ELSE_BB. */
+ FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr)
+ {
+ struct data_reference *dra = DDR_A (ddr);
+ struct data_reference *drb = DDR_B (ddr);
+
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
+ {
+ free_dependence_relations (then_ddrs);
+ free_dependence_relations (else_ddrs);
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+ return false;
+ }
+ }
+
+ /* Sink stores with same LHS. */
+ FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store)
+ {
+ else_store = VEC_index (gimple, else_stores, i);
+ res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
+ then_store, else_store);
+ ok = ok || res;
+ }
+
+ free_dependence_relations (then_ddrs);
+ free_dependence_relations (else_ddrs);
+ free_data_refs (then_datarefs);
+ free_data_refs (else_datarefs);
+ VEC_free (gimple, heap, then_stores);
+ VEC_free (gimple, heap, else_stores);
+
+ return ok;
+}
+
/* Always do these optimizations if we have SSA
trees to work on. */
static bool
=== modified file 'gcc/tree-vect-data-refs.c'
--- old/gcc/tree-vect-data-refs.c 2011-02-25 11:18:14 +0000
+++ new/gcc/tree-vect-data-refs.c 2011-03-27 09:38:18 +0000
@@ -289,39 +289,6 @@
}
}
-
-/* Function vect_equal_offsets.
-
- Check if OFFSET1 and OFFSET2 are identical expressions. */
-
-static bool
-vect_equal_offsets (tree offset1, tree offset2)
-{
- bool res;
-
- STRIP_NOPS (offset1);
- STRIP_NOPS (offset2);
-
- if (offset1 == offset2)
- return true;
-
- if (TREE_CODE (offset1) != TREE_CODE (offset2)
- || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
- return false;
-
- res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
- TREE_OPERAND (offset2, 0));
-
- if (!res || !BINARY_CLASS_P (offset1))
- return res;
-
- res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
- TREE_OPERAND (offset2, 1));
-
- return res;
-}
-
-
/* Check dependence between DRA and DRB for basic block vectorization.
If the accesses share same bases and offsets, we can compare their initial
constant offsets to decide whether they differ or not. In case of a read-
@@ -352,7 +319,7 @@
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
+ || !dr_equal_offsets_p (dra, drb))
return true;
/* Check the types. */
@@ -402,7 +369,7 @@
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
+ || !dr_equal_offsets_p (dra, drb)
|| !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
|| DR_IS_READ (dra) != DR_IS_READ (drb))
return false;

View File

@ -0,0 +1,126 @@
2011-04-21 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2008-12-03 Daniel Jacobowitz <dan@codesourcery.com>
gcc/testsuite/
* gcc.dg/vect/vect-shift-3.c, gcc.dg/vect/vect-shift-4.c: New.
* lib/target-supports.exp (check_effective_target_vect_shift_char): New
function.
=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-3.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 2011-04-21 13:51:06 +0000
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned short dst[N] __attribute__((aligned(N)));
+unsigned short src[N] __attribute__((aligned(N)));
+
+__attribute__ ((noinline))
+void array_shift(void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ dst[i] = src[i] >> 3;
+}
+
+int main()
+{
+ volatile int i;
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ src[i] = i << 3;
+
+ array_shift ();
+
+ for (i = 0; i < N; i++)
+ if (dst[i] != i)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-4.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 2011-04-21 13:51:06 +0000
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_shift_char } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned char dst[N] __attribute__((aligned(N)));
+unsigned char src[N] __attribute__((aligned(N)));
+
+__attribute__ ((noinline))
+void array_shift(void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ dst[i] = src[i] >> 3;
+}
+
+int main()
+{
+ volatile int i;
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ src[i] = i << 3;
+
+ array_shift ();
+
+ for (i = 0; i < N; i++)
+ if (dst[i] != i)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/lib/target-supports.exp'
--- old/gcc/testsuite/lib/target-supports.exp 2011-02-19 15:31:15 +0000
+++ new/gcc/testsuite/lib/target-supports.exp 2011-04-21 13:51:06 +0000
@@ -2308,6 +2308,26 @@
}
+# Return 1 if the target supports hardware vector shift operation for char.
+
+proc check_effective_target_vect_shift_char { } {
+ global et_vect_shift_char_saved
+
+ if [info exists et_vect_shift_char_saved] {
+ verbose "check_effective_target_vect_shift_char: using cached result" 2
+ } else {
+ set et_vect_shift_char_saved 0
+ if { ([istarget powerpc*-*-*]
+ && ![istarget powerpc-*-linux*paired*])
+ || [check_effective_target_arm32] } {
+ set et_vect_shift_char_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2
+ return $et_vect_shift_char_saved
+}
+
# Return 1 if the target supports hardware vectors of long, 0 otherwise.
#
# This can change for different subtargets so do not cache the result.

View File

@ -0,0 +1,177 @@
2011-04-27 Ira Rosen <ira.rosen@linaro.org>
Backport from FSF:
2011-04-03 Richard Guenther <rguenther@suse.de>
Ira Rosen <ira.rosen@linaro.org>
gcc/
* tree-if-conv.c (memrefs_read_or_written_unconditionally): Strip all
non-variable offsets and compare the remaining bases of the two
accesses instead of looking for exact same data-ref.
gcc/testsuite/
* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: New test.
* gcc.dg/vect/vect.exp: Run if-cvt-stores-vect* tests with
-ftree-loop-if-convert-stores.
=== added file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c'
--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 2011-04-24 07:45:49 +0000
@@ -0,0 +1,69 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+ short a;
+ short b;
+} data;
+
+data in1[N], in2[N], out[N];
+short result[N*2] = {10,-7,11,-6,12,-5,13,-4,14,-3,15,-2,16,-1,17,0,18,1,19,2,20,3,21,4,22,5,23,6,24,7,25,8,26,9,27,10,28,11,29,12,30,13,31,14,32,15,33,16,34,17,35,18,36,19,37,20,38,21,39,22,40,23,41,24,42,25,43,26,44,27,45,28,46,29,47,30,48,31,49,32,50,33,51,34,52,35,53,36,54,37,55,38,56,39,57,40,58,41,59,42};
+short out1[N], out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int i;
+ short c, d;
+
+ for (i = 0; i < N; i++)
+ {
+ c = in1[i].b;
+ d = in2[i].b;
+
+ if (c >= d)
+ {
+ out[i].b = in1[i].a;
+ out[i].a = d + 5;
+ }
+ else
+ {
+ out[i].b = d - 12;
+ out[i].a = in2[i].a + d;
+ }
+ }
+}
+
+int
+main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in1[i].a = i;
+ in1[i].b = i + 2;
+ in2[i].a = 5;
+ in2[i].b = i + 5;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2010-11-22 21:49:19 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-04-24 07:45:49 +0000
@@ -210,6 +210,12 @@
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
+# -ftree-loop-if-convert-stores
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
# With -O3.
# Don't allow IPA cloning, because it throws our counts out of whack.
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
=== modified file 'gcc/tree-if-conv.c'
--- old/gcc/tree-if-conv.c 2011-02-23 16:49:52 +0000
+++ new/gcc/tree-if-conv.c 2011-04-24 07:45:49 +0000
@@ -464,8 +464,8 @@
/* Returns true when the memory references of STMT are read or written
unconditionally. In other words, this function returns true when
for every data reference A in STMT there exist other accesses to
- the same data reference with predicates that add up (OR-up) to the
- true predicate: this ensures that the data reference A is touched
+ a data reference with the same base with predicates that add up (OR-up) to
+ the true predicate: this ensures that the data reference A is touched
(read or written) on every iteration of the if-converted loop. */
static bool
@@ -489,21 +489,38 @@
continue;
for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
- if (DR_STMT (b) != stmt
- && same_data_refs (a, b))
- {
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
- if (DR_RW_UNCONDITIONALLY (b) == 1
- || is_true_predicate (cb)
- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
- ca, cb)))
- {
- DR_RW_UNCONDITIONALLY (a) = 1;
- DR_RW_UNCONDITIONALLY (b) = 1;
- found = true;
- break;
- }
+ {
+ tree ref_base_a = DR_REF (a);
+ tree ref_base_b = DR_REF (b);
+
+ if (DR_STMT (b) == stmt)
+ continue;
+
+ while (TREE_CODE (ref_base_a) == COMPONENT_REF
+ || TREE_CODE (ref_base_a) == IMAGPART_EXPR
+ || TREE_CODE (ref_base_a) == REALPART_EXPR)
+ ref_base_a = TREE_OPERAND (ref_base_a, 0);
+
+ while (TREE_CODE (ref_base_b) == COMPONENT_REF
+ || TREE_CODE (ref_base_b) == IMAGPART_EXPR
+ || TREE_CODE (ref_base_b) == REALPART_EXPR)
+ ref_base_b = TREE_OPERAND (ref_base_b, 0);
+
+ if (!operand_equal_p (ref_base_a, ref_base_b, 0))
+ {
+ tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+
+ if (DR_RW_UNCONDITIONALLY (b) == 1
+ || is_true_predicate (cb)
+ || is_true_predicate (ca
+ = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
+ {
+ DR_RW_UNCONDITIONALLY (a) = 1;
+ DR_RW_UNCONDITIONALLY (b) = 1;
+ found = true;
+ break;
+ }
+ }
}
if (!found)

View File

@ -0,0 +1,140 @@
2011-05-02 Ira Rosen <ira.rosen@linaro.org>
Backport from FSF:
2011-03-27 Ira Rosen <ira.rosen@linaro.org>
gcc/
* config/arm/arm.c (arm_autovectorize_vector_sizes): New function.
(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
gcc/testsuite/
* gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
accesses to preserve the meaning of the test for doubleword vectors.
* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
* gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-03-02 11:38:43 +0000
+++ new/gcc/config/arm/arm.c 2011-04-28 11:46:58 +0000
@@ -250,6 +250,7 @@
bool is_packed);
static void arm_conditional_register_usage (void);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static unsigned int arm_autovectorize_vector_sizes (void);
/* Table of machine attributes. */
@@ -395,6 +396,9 @@
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+ arm_autovectorize_vector_sizes
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
@@ -23511,6 +23515,12 @@
}
}
+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+ return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+}
+
static bool
arm_vector_alignment_reachable (const_tree type, bool is_packed)
{
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2010-10-04 14:59:30 +0000
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
@@ -8,7 +8,7 @@
void with_restrict(int * __restrict p)
{
int i;
- int *q = p - 2;
+ int *q = p - 1;
for (i = 0; i < 1000; ++i) {
p[i] = q[i];
@@ -19,7 +19,7 @@
void without_restrict(int * p)
{
int i;
- int *q = p - 2;
+ int *q = p - 1;
for (i = 0; i < 1000; ++i) {
p[i] = q[i];
@@ -38,8 +38,8 @@
a[i] = b[i] = i;
}
- with_restrict(a + 2);
- without_restrict(b + 2);
+ with_restrict(a + 1);
+ without_restrict(b + 1);
for (i = 0; i < 1002; ++i) {
if (a[i] != b[i])
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2010-11-22 12:16:52 +0000
+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
@@ -4,9 +4,9 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 8
+#define N 12
-unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
int
main1 ()
@@ -101,7 +101,7 @@
}
/* SLP with unrolling by 8. */
- for (i = 0; i < N/2; i++)
+ for (i = 0; i < N/4; i++)
{
out[i*9] = in[i*9];
out[i*9 + 1] = in[i*9 + 1];
@@ -115,7 +115,7 @@
}
/* check results: */
- for (i = 0; i < N/2; i++)
+ for (i = 0; i < N/4; i++)
{
if (out[i*9] != in[i*9]
|| out[i*9 + 1] != in[i*9 + 1]
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2010-11-22 12:16:52 +0000
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
@@ -17,7 +17,7 @@
float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
- float E[4] = {0,1,2,480};
+ float E[4] = {0,480,960,1440};
float s;
int i, j;
@@ -55,7 +55,7 @@
s = 0;
for (j=0; j<N; j+=4)
s += C[j];
- B[i+3] = B[i] + s;
+ B[i+1] = B[i] + s;
}
/* check results: */

View File

@ -0,0 +1,294 @@
2011-05-04 Richard Sandiford <richard.sandiford@linaro.org>
Backport from mainline:
2011-03-29 Richard Sandiford <richard.sandiford@linaro.org>
PR debug/48190
* dwarf2out.c (dw_loc_list_node): Add resolved_addr and replaced.
(cached_dw_loc_list_def): New structure.
(cached_dw_loc_list): New typedef.
(cached_dw_loc_list_table): New variable.
(cached_dw_loc_list_table_hash): New function.
(cached_dw_loc_list_table_eq): Likewise.
(add_location_or_const_value_attribute): Take a bool cache_p.
Cache the list when the parameter is true.
(gen_formal_parameter_die): Update caller.
(gen_variable_die): Likewise.
(dwarf2out_finish): Likewise.
(dwarf2out_abstract_function): Nullify cached_dw_loc_list_table
while generating debug info for the decl.
(dwarf2out_function_decl): Clear cached_dw_loc_list_table.
(dwarf2out_init): Initialize cached_dw_loc_list_table.
(resolve_addr): Cache the result of resolving a chain of
location lists.
=== modified file 'gcc/dwarf2out.c'
--- old/gcc/dwarf2out.c 2011-03-29 22:47:59 +0000
+++ new/gcc/dwarf2out.c 2011-05-04 13:20:12 +0000
@@ -4427,6 +4427,11 @@
const char *section; /* Section this loclist is relative to */
dw_loc_descr_ref expr;
hashval_t hash;
+ /* True if all addresses in this and subsequent lists are known to be
+ resolved. */
+ bool resolved_addr;
+ /* True if this list has been replaced by dw_loc_next. */
+ bool replaced;
bool emitted;
} dw_loc_list_node;
@@ -6087,6 +6092,19 @@
/* Table of decl location linked lists. */
static GTY ((param_is (var_loc_list))) htab_t decl_loc_table;
+/* A cached location list. */
+struct GTY (()) cached_dw_loc_list_def {
+ /* The DECL_UID of the decl that this entry describes. */
+ unsigned int decl_id;
+
+ /* The cached location list. */
+ dw_loc_list_ref loc_list;
+};
+typedef struct cached_dw_loc_list_def cached_dw_loc_list;
+
+/* Table of cached location lists. */
+static GTY ((param_is (cached_dw_loc_list))) htab_t cached_dw_loc_list_table;
+
/* A pointer to the base of a list of references to DIE's that
are uniquely identified by their tag, presence/absence of
children DIE's, and list of attribute/value pairs. */
@@ -6434,7 +6452,7 @@
static void insert_double (double_int, unsigned char *);
static void insert_float (const_rtx, unsigned char *);
static rtx rtl_for_decl_location (tree);
-static bool add_location_or_const_value_attribute (dw_die_ref, tree,
+static bool add_location_or_const_value_attribute (dw_die_ref, tree, bool,
enum dwarf_attribute);
static bool tree_add_const_value_attribute (dw_die_ref, tree);
static bool tree_add_const_value_attribute_for_decl (dw_die_ref, tree);
@@ -8168,6 +8186,24 @@
htab_find_with_hash (decl_loc_table, decl, DECL_UID (decl));
}
+/* Returns a hash value for X (which really is a cached_dw_loc_list_list). */
+
+static hashval_t
+cached_dw_loc_list_table_hash (const void *x)
+{
+ return (hashval_t) ((const cached_dw_loc_list *) x)->decl_id;
+}
+
+/* Return nonzero if decl_id of cached_dw_loc_list X is the same as
+ UID of decl *Y. */
+
+static int
+cached_dw_loc_list_table_eq (const void *x, const void *y)
+{
+ return (((const cached_dw_loc_list *) x)->decl_id
+ == DECL_UID ((const_tree) y));
+}
+
/* Equate a DIE to a particular declaration. */
static void
@@ -16965,15 +17001,22 @@
these things can crop up in other ways also.) Note that one type of
constant value which can be passed into an inlined function is a constant
pointer. This can happen for example if an actual argument in an inlined
- function call evaluates to a compile-time constant address. */
+ function call evaluates to a compile-time constant address.
+
+ CACHE_P is true if it is worth caching the location list for DECL,
+ so that future calls can reuse it rather than regenerate it from scratch.
+ This is true for BLOCK_NONLOCALIZED_VARS in inlined subroutines,
+ since we will need to refer to them each time the function is inlined. */
static bool
-add_location_or_const_value_attribute (dw_die_ref die, tree decl,
+add_location_or_const_value_attribute (dw_die_ref die, tree decl, bool cache_p,
enum dwarf_attribute attr)
{
rtx rtl;
dw_loc_list_ref list;
var_loc_list *loc_list;
+ cached_dw_loc_list *cache;
+ void **slot;
if (TREE_CODE (decl) == ERROR_MARK)
return false;
@@ -17010,7 +17053,33 @@
&& add_const_value_attribute (die, rtl))
return true;
}
- list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2);
+ /* If this decl is from BLOCK_NONLOCALIZED_VARS, we might need its
+ list several times. See if we've already cached the contents. */
+ list = NULL;
+ if (loc_list == NULL || cached_dw_loc_list_table == NULL)
+ cache_p = false;
+ if (cache_p)
+ {
+ cache = (cached_dw_loc_list *)
+ htab_find_with_hash (cached_dw_loc_list_table, decl, DECL_UID (decl));
+ if (cache)
+ list = cache->loc_list;
+ }
+ if (list == NULL)
+ {
+ list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2);
+ /* It is usually worth caching this result if the decl is from
+ BLOCK_NONLOCALIZED_VARS and if the list has at least two elements. */
+ if (cache_p && list && list->dw_loc_next)
+ {
+ slot = htab_find_slot_with_hash (cached_dw_loc_list_table, decl,
+ DECL_UID (decl), INSERT);
+ cache = ggc_alloc_cleared_cached_dw_loc_list ();
+ cache->decl_id = DECL_UID (decl);
+ cache->loc_list = list;
+ *slot = cache;
+ }
+ }
if (list)
{
add_AT_location_description (die, attr, list);
@@ -18702,7 +18771,7 @@
equate_decl_number_to_die (node, parm_die);
if (! DECL_ABSTRACT (node_or_origin))
add_location_or_const_value_attribute (parm_die, node_or_origin,
- DW_AT_location);
+ node == NULL, DW_AT_location);
break;
@@ -18887,6 +18956,7 @@
tree context;
int was_abstract;
htab_t old_decl_loc_table;
+ htab_t old_cached_dw_loc_list_table;
/* Make sure we have the actual abstract inline, not a clone. */
decl = DECL_ORIGIN (decl);
@@ -18901,6 +18971,8 @@
get locations in abstract instantces. */
old_decl_loc_table = decl_loc_table;
decl_loc_table = NULL;
+ old_cached_dw_loc_list_table = cached_dw_loc_list_table;
+ cached_dw_loc_list_table = NULL;
/* Be sure we've emitted the in-class declaration DIE (if any) first, so
we don't get confused by DECL_ABSTRACT. */
@@ -18925,6 +18997,7 @@
current_function_decl = save_fn;
decl_loc_table = old_decl_loc_table;
+ cached_dw_loc_list_table = old_cached_dw_loc_list_table;
pop_cfun ();
}
@@ -19709,9 +19782,8 @@
&& !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl_or_origin)))
defer_location (decl_or_origin, var_die);
else
- add_location_or_const_value_attribute (var_die,
- decl_or_origin,
- DW_AT_location);
+ add_location_or_const_value_attribute (var_die, decl_or_origin,
+ decl == NULL, DW_AT_location);
add_pubname (decl_or_origin, var_die);
}
else
@@ -21498,6 +21570,7 @@
dwarf2out_decl (decl);
htab_empty (decl_loc_table);
+ htab_empty (cached_dw_loc_list_table);
}
/* Output a marker (i.e. a label) for the beginning of the generated code for
@@ -22230,6 +22303,11 @@
decl_loc_table = htab_create_ggc (10, decl_loc_table_hash,
decl_loc_table_eq, NULL);
+ /* Allocate the cached_dw_loc_list_table. */
+ cached_dw_loc_list_table
+ = htab_create_ggc (10, cached_dw_loc_list_table_hash,
+ cached_dw_loc_list_table_eq, NULL);
+
/* Allocate the initial hunk of the decl_scope_table. */
decl_scope_table = VEC_alloc (tree, gc, 256);
@@ -22870,30 +22948,53 @@
{
dw_die_ref c;
dw_attr_ref a;
- dw_loc_list_ref *curr;
+ dw_loc_list_ref *curr, *start, loc;
unsigned ix;
FOR_EACH_VEC_ELT (dw_attr_node, die->die_attr, ix, a)
switch (AT_class (a))
{
case dw_val_class_loc_list:
- curr = AT_loc_list_ptr (a);
- while (*curr)
+ start = curr = AT_loc_list_ptr (a);
+ loc = *curr;
+ gcc_assert (loc);
+ /* The same list can be referenced more than once. See if we have
+ already recorded the result from a previous pass. */
+ if (loc->replaced)
+ *curr = loc->dw_loc_next;
+ else if (!loc->resolved_addr)
{
- if (!resolve_addr_in_expr ((*curr)->expr))
+ /* As things stand, we do not expect or allow one die to
+ reference a suffix of another die's location list chain.
+ References must be identical or completely separate.
+ There is therefore no need to cache the result of this
+ pass on any list other than the first; doing so
+ would lead to unnecessary writes. */
+ while (*curr)
{
- dw_loc_list_ref next = (*curr)->dw_loc_next;
- if (next && (*curr)->ll_symbol)
+ gcc_assert (!(*curr)->replaced && !(*curr)->resolved_addr);
+ if (!resolve_addr_in_expr ((*curr)->expr))
{
- gcc_assert (!next->ll_symbol);
- next->ll_symbol = (*curr)->ll_symbol;
+ dw_loc_list_ref next = (*curr)->dw_loc_next;
+ if (next && (*curr)->ll_symbol)
+ {
+ gcc_assert (!next->ll_symbol);
+ next->ll_symbol = (*curr)->ll_symbol;
+ }
+ *curr = next;
}
- *curr = next;
+ else
+ curr = &(*curr)->dw_loc_next;
}
+ if (loc == *start)
+ loc->resolved_addr = 1;
else
- curr = &(*curr)->dw_loc_next;
+ {
+ loc->replaced = 1;
+ loc->dw_loc_next = *start;
+ }
}
- if (!AT_loc_list (a))
+ if (!*start)
{
remove_AT (die, a->dw_attr);
ix--;
@@ -23322,6 +23423,7 @@
add_location_or_const_value_attribute (
VEC_index (deferred_locations, deferred_locations_list, i)->die,
VEC_index (deferred_locations, deferred_locations_list, i)->variable,
+ false,
DW_AT_location);
}

View File

@ -0,0 +1,254 @@
2011-04-26 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-04-15 Maxim Kuvyrkov <maxim@codesourcery.com>
gcc/
* combine.c (subst, combine_simlify_rtx): Add new argument, use it
to track processing of conditionals. Update all callers.
(try_combine, simplify_if_then_else): Update.
2011-04-25 Maxim Kuvyrkov <maxim@codesourcery.com>
Eric Botcazou <ebotcazou@adacore.com>
gcc/
* combine.c (combine_simplify_rtx): Avoid mis-simplifying conditionals
for STORE_FLAG_VALUE==-1 case.
=== modified file 'gcc/combine.c'
--- old/gcc/combine.c 2011-02-15 19:46:26 +0000
+++ new/gcc/combine.c 2011-04-26 17:03:58 +0000
@@ -391,8 +391,8 @@
static void undo_all (void);
static void undo_commit (void);
static rtx *find_split_point (rtx *, rtx, bool);
-static rtx subst (rtx, rtx, rtx, int, int);
-static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
+static rtx subst (rtx, rtx, rtx, int, int, int);
+static rtx combine_simplify_rtx (rtx, enum machine_mode, int, int);
static rtx simplify_if_then_else (rtx);
static rtx simplify_set (rtx);
static rtx simplify_logical (rtx);
@@ -3086,12 +3086,12 @@
if (i1)
{
subst_low_luid = DF_INSN_LUID (i1);
- i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0);
+ i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}
else
{
subst_low_luid = DF_INSN_LUID (i2);
- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0);
+ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}
}
@@ -3103,7 +3103,7 @@
self-referential RTL when we will be substituting I1SRC for I1DEST
later. Likewise if I0 feeds into I2, either directly or indirectly
through I1, and I0DEST is in I0SRC. */
- newpat = subst (PATTERN (i3), i2dest, i2src, 0,
+ newpat = subst (PATTERN (i3), i2dest, i2src, 0, 0,
(i1_feeds_i2_n && i1dest_in_i1src)
|| ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
&& i0dest_in_i0src));
@@ -3142,7 +3142,7 @@
copy of I1SRC each time we substitute it, in order to avoid creating
self-referential RTL when we will be substituting I0SRC for I0DEST
later. */
- newpat = subst (newpat, i1dest, i1src, 0,
+ newpat = subst (newpat, i1dest, i1src, 0, 0,
i0_feeds_i1_n && i0dest_in_i0src);
substed_i1 = 1;
@@ -3172,7 +3172,7 @@
n_occurrences = 0;
subst_low_luid = DF_INSN_LUID (i0);
- newpat = subst (newpat, i0dest, i0src, 0, 0);
+ newpat = subst (newpat, i0dest, i0src, 0, 0, 0);
substed_i0 = 1;
}
@@ -3234,7 +3234,7 @@
{
rtx t = i1pat;
if (i0_feeds_i1_n)
- t = subst (t, i0dest, i0src, 0, 0);
+ t = subst (t, i0dest, i0src, 0, 0, 0);
XVECEXP (newpat, 0, --total_sets) = t;
}
@@ -3242,10 +3242,10 @@
{
rtx t = i2pat;
if (i1_feeds_i2_n)
- t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+ t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
i0_feeds_i1_n && i0dest_in_i0src);
if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
- t = subst (t, i0dest, i0src, 0, 0);
+ t = subst (t, i0dest, i0src, 0, 0, 0);
XVECEXP (newpat, 0, --total_sets) = t;
}
@@ -4914,11 +4914,13 @@
IN_DEST is nonzero if we are processing the SET_DEST of a SET.
+ IN_COND is nonzero if we are on top level of the condition.
+
UNIQUE_COPY is nonzero if each substitution must be unique. We do this
by copying if `n_occurrences' is nonzero. */
static rtx
-subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy)
+subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy)
{
enum rtx_code code = GET_CODE (x);
enum machine_mode op0_mode = VOIDmode;
@@ -4979,7 +4981,7 @@
&& GET_CODE (XVECEXP (x, 0, 0)) == SET
&& GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
{
- new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, unique_copy);
+ new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, 0, unique_copy);
/* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER
@@ -4996,7 +4998,7 @@
&& GET_CODE (dest) != CC0
&& GET_CODE (dest) != PC)
{
- new_rtx = subst (dest, from, to, 0, unique_copy);
+ new_rtx = subst (dest, from, to, 0, 0, unique_copy);
/* If this substitution failed, this whole thing fails. */
if (GET_CODE (new_rtx) == CLOBBER
@@ -5042,8 +5044,8 @@
}
else
{
- new_rtx = subst (XVECEXP (x, i, j), from, to, 0,
- unique_copy);
+ new_rtx = subst (XVECEXP (x, i, j), from, to, 0, 0,
+ unique_copy);
/* If this substitution failed, this whole thing
fails. */
@@ -5120,7 +5122,9 @@
&& (code == SUBREG || code == STRICT_LOW_PART
|| code == ZERO_EXTRACT))
|| code == SET)
- && i == 0), unique_copy);
+ && i == 0),
+ code == IF_THEN_ELSE && i == 0,
+ unique_copy);
/* If we found that we will have to reject this combination,
indicate that by returning the CLOBBER ourselves, rather than
@@ -5177,7 +5181,7 @@
/* If X is sufficiently simple, don't bother trying to do anything
with it. */
if (code != CONST_INT && code != REG && code != CLOBBER)
- x = combine_simplify_rtx (x, op0_mode, in_dest);
+ x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
if (GET_CODE (x) == code)
break;
@@ -5197,10 +5201,12 @@
expression.
OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero
- if we are inside a SET_DEST. */
+ if we are inside a SET_DEST. IN_COND is nonzero if we are on the top level
+ of a condition. */
static rtx
-combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
+combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest,
+ int in_cond)
{
enum rtx_code code = GET_CODE (x);
enum machine_mode mode = GET_MODE (x);
@@ -5255,8 +5261,8 @@
false arms to store-flag values. Be careful to use copy_rtx
here since true_rtx or false_rtx might share RTL with x as a
result of the if_then_else_cond call above. */
- true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0);
- false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0);
+ true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0, 0);
+ false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0, 0);
/* If true_rtx and false_rtx are not general_operands, an if_then_else
is unlikely to be simpler. */
@@ -5600,7 +5606,7 @@
{
/* Try to simplify the expression further. */
rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
- temp = combine_simplify_rtx (tor, mode, in_dest);
+ temp = combine_simplify_rtx (tor, mode, in_dest, 0);
/* If we could, great. If not, do not go ahead with the IOR
replacement, since PLUS appears in many special purpose
@@ -5693,7 +5699,16 @@
ZERO_EXTRACT is indeed appropriate, it will be placed back by
the call to make_compound_operation in the SET case. */
- if (STORE_FLAG_VALUE == 1
+ if (in_cond)
+ /* Don't apply below optimizations if the caller would
+ prefer a comparison rather than a value.
+ E.g., for the condition in an IF_THEN_ELSE most targets need
+ an explicit comparison. */
+ {
+ ;
+ }
+
+ else if (STORE_FLAG_VALUE == 1
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx
&& mode == GET_MODE (op0)
@@ -5739,7 +5754,10 @@
/* If STORE_FLAG_VALUE is -1, we have cases similar to
those above. */
- if (STORE_FLAG_VALUE == -1
+ if (in_cond)
+ ;
+
+ else if (STORE_FLAG_VALUE == -1
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
&& op1 == const0_rtx
&& (num_sign_bit_copies (op0, mode)
@@ -5937,11 +5955,11 @@
if (reg_mentioned_p (from, true_rtx))
true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
from, true_val),
- pc_rtx, pc_rtx, 0, 0);
+ pc_rtx, pc_rtx, 0, 0, 0);
if (reg_mentioned_p (from, false_rtx))
false_rtx = subst (known_cond (copy_rtx (false_rtx), false_code,
from, false_val),
- pc_rtx, pc_rtx, 0, 0);
+ pc_rtx, pc_rtx, 0, 0, 0);
SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
@@ -6158,11 +6176,11 @@
{
temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
cond_op0, cond_op1),
- pc_rtx, pc_rtx, 0, 0);
+ pc_rtx, pc_rtx, 0, 0, 0);
temp = simplify_gen_binary (MULT, m, temp,
simplify_gen_binary (MULT, m, c1,
const_true_rtx));
- temp = subst (temp, pc_rtx, pc_rtx, 0, 0);
+ temp = subst (temp, pc_rtx, pc_rtx, 0, 0, 0);
temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp);
if (extend_op != UNKNOWN)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,25 @@
2011-04-26 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-04-05 Tom de Vries <tom@codesourcery.com>
PR target/43920
gcc/
* config/arm/arm.h (BRANCH_COST): Set to 1 for Thumb-2 when optimizing
for size.
=== modified file 'gcc/config/arm/arm.h'
--- old/gcc/config/arm/arm.h 2011-05-03 15:17:25 +0000
+++ new/gcc/config/arm/arm.h 2011-04-26 14:42:21 +0000
@@ -2018,7 +2018,8 @@
/* Try to generate sequences that don't involve branches, we can then use
conditional instructions */
#define BRANCH_COST(speed_p, predictable_p) \
- (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+ (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
+ : (optimize > 0 ? 2 : 0))
/* Position Independent Code. */
/* We decide which register to use based on the compilation options and

View File

@ -0,0 +1,21 @@
2011-05-06 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
From Sergey Grechanik <mouseentity@ispras.ru>, approved for mainline
* config/arm/arm.c (coproc_secondary_reload_class): Return NO_REGS
for constant vectors.
=== modified file 'gcc/config/arm/arm.c'
--- old/gcc/config/arm/arm.c 2011-05-03 15:18:07 +0000
+++ new/gcc/config/arm/arm.c 2011-05-06 11:33:02 +0000
@@ -9193,7 +9193,7 @@
/* The neon move patterns handle all legitimate vector and struct
addresses. */
if (TARGET_NEON
- && MEM_P (x)
+ && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
|| VALID_NEON_STRUCT_MODE (mode)))

View File

@ -0,0 +1,24 @@
2011-05-12 Michael Hope <michael.hope@linaro.org>
gcc/
Backport from mainline:
2011-05-05 Michael Hope <michael.hope@linaro.org>
PR pch/45979
* config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for
__ARM_EABI__ hosts.
=== modified file 'gcc/config/host-linux.c'
--- old/gcc/config/host-linux.c 2010-11-29 14:09:41 +0000
+++ new/gcc/config/host-linux.c 2011-05-06 20:19:30 +0000
@@ -84,6 +84,8 @@
# define TRY_EMPTY_VM_SPACE 0x60000000
#elif defined(__mc68000__)
# define TRY_EMPTY_VM_SPACE 0x40000000
+#elif defined(__ARM_EABI__)
+# define TRY_EMPTY_VM_SPACE 0x60000000
#else
# define TRY_EMPTY_VM_SPACE 0
#endif

View File

@ -0,0 +1,640 @@
2011-05-13 Revital Eres <revital.eres@linaro.org>
gcc/
* loop-doloop.c (doloop_condition_get): Support new form of
doloop pattern and use prev_nondebug_insn instead of PREV_INSN.
* config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*".
(doloop_end): New.
* config/arm/arm.md (*addsi3_compare0): Remove "*".
* params.def (sms-min-sc): New param flag.
* doc/invoke.texi (sms-min-sc): Document it.
* ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge
enters the branch create an anti edge in the opposite direction
to prevent the creation of reg-moves.
* modulo-sched.c: Adjust comment to reflect the fact we are
scheduling closing branch.
(PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine.
(stage_count): New field in struct partial_schedule.
(calculate_stage_count): New function.
(normalize_sched_times): Rename to reset_sched_times and handle
incrementing the sched time of the nodes by a constant value
passed as parameter.
(duplicate_insns_of_cycles): Skip closing branch.
(sms_schedule_by_order): Schedule closing branch.
(ps_insn_find_column): Handle closing branch.
(sms_schedule): Call reset_sched_times and adjust the code to
support scheduling of the closing branch. Use sms-min-sc.
Support new form of doloop pattern.
(ps_insert_empty_row): Update calls to normalize_sched_times
and rotate_partial_schedule functions.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-05-06 11:28:27 +0000
+++ new/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
@@ -791,7 +791,7 @@
""
)
-(define_insn "*addsi3_compare0"
+(define_insn "addsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
=== modified file 'gcc/config/arm/thumb2.md'
--- old/gcc/config/arm/thumb2.md 2011-01-03 20:52:22 +0000
+++ new/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000
@@ -836,7 +836,7 @@
"operands[4] = GEN_INT (- INTVAL (operands[2]));"
)
-(define_insn "*thumb2_addsi3_compare0"
+(define_insn "thumb2_addsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
(plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r")
@@ -1118,3 +1118,54 @@
"
operands[2] = GEN_INT (32 - INTVAL (operands[2]));
")
+
+;; Define the subtract-one-and-jump insns so loop.c
+;; knows what to generate.
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
+ "TARGET_32BIT"
+ "
+ {
+ /* Currently SMS relies on the do-loop pattern to recognize loops
+ where (1) the control part consists of all insns defining and/or
+ using a certain 'count' register and (2) the loop count can be
+ adjusted by modifying this register prior to the loop.
+ ??? The possible introduction of a new block to initialize the
+ new IV can potentially affect branch optimizations. */
+ if (optimize > 0 && flag_modulo_sched)
+ {
+ rtx s0;
+ rtx bcomp;
+ rtx loc_ref;
+ rtx cc_reg;
+ rtx insn;
+ rtx cmp;
+
+ /* Only use this on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+
+ s0 = operands [0];
+ if (TARGET_THUMB2)
+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+ else
+ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+
+ cmp = XVECEXP (PATTERN (insn), 0, 0);
+ cc_reg = SET_DEST (cmp);
+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ loc_ref, pc_rtx)));
+ DONE;
+ }else
+ FAIL;
+}")
+
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2010-11-30 11:41:24 +0000
+++ new/gcc/ddg.c 2011-05-11 07:15:47 +0000
@@ -197,6 +197,11 @@
}
}
+ /* If a true dep edge enters the branch create an anti edge in the
+ opposite direction to prevent the creation of reg-moves. */
+ if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
+ create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
+
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e);
=== modified file 'gcc/doc/invoke.texi'
--- old/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000
+++ new/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000
@@ -8730,6 +8730,10 @@
The maximum number of best instructions in the ready list that are considered
for renaming in the selective scheduler. The default value is 2.
+@item sms-min-sc
+The minimum value of stage count that swing modulo scheduler will
+generate. The default value is 2.
+
@item max-last-value-rtl
The maximum size measured as number of RTLs that can be recorded in an expression
in combiner for a pseudo register as last known value of that register. The default
=== modified file 'gcc/loop-doloop.c'
--- old/gcc/loop-doloop.c 2010-11-30 11:41:24 +0000
+++ new/gcc/loop-doloop.c 2011-05-11 07:15:47 +0000
@@ -78,6 +78,8 @@
rtx inc_src;
rtx condition;
rtx pattern;
+ rtx cc_reg = NULL_RTX;
+ rtx reg_orig = NULL_RTX;
/* The canonical doloop pattern we expect has one of the following
forms:
@@ -96,7 +98,16 @@
2) (set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(label_ref (label))
- (pc))). */
+ (pc))).
+
+ Some targets (ARM) do the comparison before the branch, as in the
+ following form:
+
+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc))) */
pattern = PATTERN (doloop_pat);
@@ -104,19 +115,47 @@
{
rtx cond;
rtx prev_insn = prev_nondebug_insn (doloop_pat);
+ rtx cmp_arg1, cmp_arg2;
+ rtx cmp_orig;
- /* We expect the decrement to immediately precede the branch. */
+ /* In case the pattern is not PARALLEL we expect two forms
+ of doloop which are cases 2) and 3) above: in case 2) the
+ decrement immediately precedes the branch, while in case 3)
+ the compare and decrement instructions immediately precede
+ the branch. */
if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
return 0;
cmp = pattern;
- inc = PATTERN (PREV_INSN (doloop_pat));
+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
+ {
+ /* The third case: the compare and decrement instructions
+ immediately precede the branch. */
+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
+ if (GET_CODE (cmp_orig) != SET)
+ return 0;
+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
+ return 0;
+ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
+ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
+ if (cmp_arg2 != const0_rtx
+ || GET_CODE (cmp_arg1) != PLUS)
+ return 0;
+ reg_orig = XEXP (cmp_arg1, 0);
+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
+ || !REG_P (reg_orig))
+ return 0;
+ cc_reg = SET_DEST (cmp_orig);
+
+ inc = XVECEXP (PATTERN (prev_insn), 0, 1);
+ }
+ else
+ inc = PATTERN (prev_insn);
/* We expect the condition to be of the form (reg != 0) */
cond = XEXP (SET_SRC (cmp), 0);
if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
return 0;
-
}
else
{
@@ -162,11 +201,15 @@
return 0;
if ((XEXP (condition, 0) == reg)
+ /* For the third case: */
+ || ((cc_reg != NULL_RTX)
+ && (XEXP (condition, 0) == cc_reg)
+ && (reg_orig == reg))
|| (GET_CODE (XEXP (condition, 0)) == PLUS
- && XEXP (XEXP (condition, 0), 0) == reg))
+ && XEXP (XEXP (condition, 0), 0) == reg))
{
if (GET_CODE (pattern) != PARALLEL)
- /* The second form we expect:
+ /* For the second form we expect:
(set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
@@ -181,7 +224,24 @@
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
- So we return that form instead.
+ For the third form we expect:
+
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+ (set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+ (label_ref (label))
+ (pc)))
+
+ which is equivalent to the following:
+
+ (parallel [(set (cc) (compare (reg, 1))
+ (set (reg) (plus (reg) (const_int -1)))
+ (set (pc) (if_then_else (NE == cc)
+ (label_ref (label))
+ (pc))))])
+
+ So we return the second form instead for the two cases.
+
*/
condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-02-14 17:59:10 +0000
+++ new/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000
@@ -84,14 +84,13 @@
II cycles (i.e. use register copies to prevent a def from overwriting
itself before reaching the use).
- SMS works with countable loops (1) whose control part can be easily
- decoupled from the rest of the loop and (2) whose loop count can
- be easily adjusted. This is because we peel a constant number of
- iterations into a prologue and epilogue for which we want to avoid
- emitting the control part, and a kernel which is to iterate that
- constant number of iterations less than the original loop. So the
- control part should be a set of insns clearly identified and having
- its own iv, not otherwise used in the loop (at-least for now), which
+ SMS works with countable loops whose loop count can be easily
+ adjusted. This is because we peel a constant number of iterations
+ into a prologue and epilogue for which we want to avoid emitting
+ the control part, and a kernel which is to iterate that constant
+ number of iterations less than the original loop. So the control
+ part should be a set of insns clearly identified and having its
+ own iv, not otherwise used in the loop (at-least for now), which
initializes a register before the loop to the number of iterations.
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
@@ -116,8 +115,10 @@
/* The number of different iterations the nodes in ps span, assuming
the stage boundaries are placed efficiently. */
-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
- + 1 + (ps)->ii - 1) / (ps)->ii)
+#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
+ + 1 + ii - 1) / ii)
+/* The stage count of ps. */
+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
/* A single instruction in the partial schedule. */
struct ps_insn
@@ -155,6 +156,8 @@
int max_cycle;
ddg_ptr g; /* The DDG of the insns in the partial schedule. */
+
+ int stage_count; /* The stage count of the partial schedule. */
};
/* We use this to record all the register replacements we do in
@@ -195,7 +198,7 @@
rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
int, int, int, rtx);
-
+static int calculate_stage_count (partial_schedule_ptr ps);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
#define SCHED_FIRST_REG_MOVE(x) \
@@ -310,10 +313,10 @@
either a single (parallel) branch-on-count or a (non-parallel)
branch immediately preceded by a single (decrement) insn. */
first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
- : PREV_INSN (tail));
+ : prev_nondebug_insn (tail));
for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
- if (reg_mentioned_p (reg, insn))
+ if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn))
{
if (dump_file)
{
@@ -569,13 +572,12 @@
}
}
-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
- of SCHED_ROW and SCHED_STAGE. */
+/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
+ SCHED_ROW and SCHED_STAGE. */
static void
-normalize_sched_times (partial_schedule_ptr ps)
+reset_sched_times (partial_schedule_ptr ps, int amount)
{
int row;
- int amount = PS_MIN_CYCLE (ps);
int ii = ps->ii;
ps_insn_ptr crr_insn;
@@ -584,19 +586,43 @@
{
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
+ int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
+ int sc_until_cycle_zero, stage;
- if (dump_file)
- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
- (u), ps->min_cycle);
+ if (dump_file)
+ {
+ /* Print the scheduling times after the rotation. */
+ fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
+ "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
+ INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
+ normalized_time);
+ if (JUMP_P (crr_insn->node->insn))
+ fprintf (dump_file, " (branch)");
+ fprintf (dump_file, "\n");
+ }
+
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = normalized_time % ii;
- SCHED_STAGE (u) = normalized_time / ii;
+ SCHED_ROW (u) = SMODULO (normalized_time, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
}
}
-
+
/* Set SCHED_COLUMN of each node according to its position in PS. */
static void
set_columns_for_ps (partial_schedule_ptr ps)
@@ -646,9 +672,12 @@
/* Do not duplicate any insn which refers to count_reg as it
belongs to the control part.
+ The closing branch is scheduled as well and thus should
+ be ignored.
TODO: This should be done by analyzing the control part of
the loop. */
- if (reg_mentioned_p (count_reg, u_node->insn))
+ if (reg_mentioned_p (count_reg, u_node->insn)
+ || JUMP_P (ps_ij->node->insn))
continue;
if (for_prolog)
@@ -1009,9 +1038,11 @@
continue;
}
- /* Don't handle BBs with calls or barriers, or !single_set insns,
- or auto-increment insns (to avoid creating invalid reg-moves
- for the auto-increment insns).
+ /* Don't handle BBs with calls or barriers or auto-increment insns
+ (to avoid creating invalid reg-moves for the auto-increment insns),
+ or !single_set with the exception of instructions that include
+ count_reg---these instructions are part of the control part
+ that do-loop recognizes.
??? Should handle auto-increment insns.
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
@@ -1021,7 +1052,8 @@
if (CALL_P (insn)
|| BARRIER_P (insn)
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
+ && !reg_mentioned_p (count_reg, insn))
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
@@ -1049,7 +1081,11 @@
continue;
}
- if (! (g = create_ddg (bb, 0)))
+ /* Always schedule the closing branch with the rest of the
+ instructions. The branch is rotated to be in row ii-1 at the
+ end of the scheduling procedure to make sure it's the last
+ instruction in the iteration. */
+ if (! (g = create_ddg (bb, 1)))
{
if (dump_file)
fprintf (dump_file, "SMS create_ddg failed\n");
@@ -1157,14 +1193,17 @@
ps = sms_schedule_by_order (g, mii, maxii, node_order);
- if (ps){
- stage_count = PS_STAGE_COUNT (ps);
- gcc_assert(stage_count >= 1);
- }
+ if (ps)
+ {
+ stage_count = calculate_stage_count (ps);
+ gcc_assert(stage_count >= 1);
+ PS_STAGE_COUNT(ps) = stage_count;
+ }
- /* Stage count of 1 means that there is no interleaving between
- iterations, let the scheduling passes do the job. */
- if (stage_count <= 1
+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+ 1 means that there is no interleaving between iterations thus
+ we let the scheduling passes do the job in this case. */
+ if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
|| (count_init && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
@@ -1182,32 +1221,24 @@
else
{
struct undo_replace_buff_elem *reg_move_replaces;
-
- if (dump_file)
- {
+ int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ /* Set the stage boundaries. The closing_branch was scheduled
+ and should appear in the last (ii-1) row. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ set_columns_for_ps (ps);
+
+ canon_loop (loop);
+
+ if (dump_file)
+ {
fprintf (dump_file,
"SMS succeeded %d %d (with ii, sc)\n", ps->ii,
stage_count);
print_partial_schedule (ps, dump_file);
- fprintf (dump_file,
- "SMS Branch (%d) will later be scheduled at cycle %d.\n",
- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
}
-
- /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
- the closing_branch was scheduled and should appear in the last (ii-1)
- row. Otherwise, we are free to schedule the branch, and we let nodes
- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
- row; this should reduce stage_count to minimum.
- TODO: Revisit the issue of scheduling the insns of the
- control part relative to the branch when the control part
- has more than one insn. */
- normalize_sched_times (ps);
- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
- set_columns_for_ps (ps);
-
- canon_loop (loop);
-
+
/* case the BCT count is not known , Do loop-versioning */
if (count_reg && ! count_init)
{
@@ -1760,12 +1791,6 @@
continue;
}
- if (JUMP_P (insn)) /* Closing branch handled later. */
- {
- RESET_BIT (tobe_scheduled, u);
- continue;
- }
-
if (TEST_BIT (sched_nodes, u))
continue;
@@ -1893,8 +1918,8 @@
if (dump_file)
fprintf (dump_file, "split_row=%d\n", split_row);
- normalize_sched_times (ps);
- rotate_partial_schedule (ps, ps->min_cycle);
+ reset_sched_times (ps, PS_MIN_CYCLE (ps));
+ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
for (row = 0; row < split_row; row++)
@@ -2571,6 +2596,7 @@
ps_insn_ptr next_ps_i;
ps_insn_ptr first_must_follow = NULL;
ps_insn_ptr last_must_precede = NULL;
+ ps_insn_ptr last_in_row = NULL;
int row;
if (! ps_i)
@@ -2597,8 +2623,37 @@
else
last_must_precede = next_ps_i;
}
+ /* The closing branch must be the last in the row. */
+ if (must_precede
+ && TEST_BIT (must_precede, next_ps_i->node->cuid)
+ && JUMP_P (next_ps_i->node->insn))
+ return false;
+
+ last_in_row = next_ps_i;
}
+ /* The closing branch is scheduled as well. Make sure there is no
+ dependent instruction after it as the branch should be the last
+ instruction in the row. */
+ if (JUMP_P (ps_i->node->insn))
+ {
+ if (first_must_follow)
+ return false;
+ if (last_in_row)
+ {
+ /* Make the branch the last in the row. New instructions
+ will be inserted at the beginning of the row or after the
+ last must_precede instruction thus the branch is guaranteed
+ to remain the last instruction in the row. */
+ last_in_row->next_in_row = ps_i;
+ ps_i->prev_in_row = last_in_row;
+ ps_i->next_in_row = NULL;
+ }
+ else
+ ps->rows[row] = ps_i;
+ return true;
+ }
+
/* Now insert the node after INSERT_AFTER_PSI. */
if (! last_must_precede)
@@ -2820,6 +2875,24 @@
return ps_i;
}
+/* Calculate the stage count of the partial schedule PS. The calculation
+ takes into account the rotation to bring the closing branch to row
+ ii-1. */
+int
+calculate_stage_count (partial_schedule_ptr ps)
+{
+ int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
+ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
+ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
+ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
+
+ /* The calculation of stage count is done adding the number of stages
+ before cycle zero and after cycle zero. */
+ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
+
+ return stage_count;
+}
+
/* Rotate the rows of PS such that insns scheduled at time
START_CYCLE will appear in row 0. Updates max/min_cycles. */
void
=== modified file 'gcc/params.def'
--- old/gcc/params.def 2011-04-18 11:31:29 +0000
+++ new/gcc/params.def 2011-05-11 07:15:47 +0000
@@ -344,6 +344,11 @@
"sms-max-ii-factor",
"A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop",
100, 0, 0)
+/* The minimum value of stage count that swing modulo scheduler will generate. */
+DEFPARAM(PARAM_SMS_MIN_SC,
+ "sms-min-sc",
+ "The minimum value of stage count that swing modulo scheduler will generate.",
+ 2, 1, 1)
DEFPARAM(PARAM_SMS_DFA_HISTORY,
"sms-dfa-history",
"The number of cycles the swing modulo scheduler considers when checking conflicts using DFA",

View File

@ -0,0 +1,30 @@
2011-05-13 Revital Eres <revital.eres@linaro.org>
gcc/
* ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs.
* modulo-sched.c (sms_schedule): Avoid unfreed memory when SMS fails.
=== modified file 'gcc/ddg.c'
--- old/gcc/ddg.c 2011-05-11 07:15:47 +0000
+++ new/gcc/ddg.c 2011-05-13 16:03:40 +0000
@@ -1016,6 +1016,7 @@
for (i = 0; i < all_sccs->num_sccs; i++)
free_scc (all_sccs->sccs[i]);
+ free (all_sccs->sccs);
free (all_sccs);
}
=== modified file 'gcc/modulo-sched.c'
--- old/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000
+++ new/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000
@@ -1216,7 +1216,6 @@
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
fprintf (dump_file, ")\n");
}
- continue;
}
else
{

View File

@ -0,0 +1,134 @@
2011-06-02 Chung-Lin Tang <cltang@codesourcery.com>
Backport from mainline:
2011-03-21 Chung-Lin Tang <cltang@codesourcery.com>
gcc/
* simplify-rtx.c (simplify_binary_operation_1): Handle
(xor (and A B) C) case when B and C are both constants.
gcc/testsuite/
* gcc.target/arm/xor-and.c: New.
2011-03-18 Chung-Lin Tang <cltang@codesourcery.com>
gcc/
* combine.c (try_combine): Do simplification only call of
subst() on i2 even when i1 is present. Update comments.
gcc/testsuite/
* gcc.target/arm/unsigned-extend-1.c: New.
=== modified file 'gcc/combine.c'
--- old/gcc/combine.c 2011-05-06 11:28:27 +0000
+++ new/gcc/combine.c 2011-05-27 14:31:18 +0000
@@ -3089,7 +3089,7 @@
/* It is possible that the source of I2 or I1 may be performing
an unneeded operation, such as a ZERO_EXTEND of something
that is known to have the high part zero. Handle that case
- by letting subst look at the innermost one of them.
+ by letting subst look at the inner insns.
Another way to do this would be to have a function that tries
to simplify a single insn instead of merging two or more
@@ -3114,11 +3114,9 @@
subst_low_luid = DF_INSN_LUID (i1);
i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}
- else
- {
- subst_low_luid = DF_INSN_LUID (i2);
- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
- }
+
+ subst_low_luid = DF_INSN_LUID (i2);
+ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}
n_occurrences = 0; /* `subst' counts here */
=== modified file 'gcc/simplify-rtx.c'
--- old/gcc/simplify-rtx.c 2011-03-26 09:24:06 +0000
+++ new/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000
@@ -2484,6 +2484,46 @@
XEXP (op0, 1), mode),
op1);
+ /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P),
+ we can transform like this:
+ (A&B)^C == ~(A&B)&C | ~C&(A&B)
+ == (~A|~B)&C | ~C&(A&B) * DeMorgan's Law
+ == ~A&C | ~B&C | A&(~C&B) * Distribute and re-order
+ Attempt a few simplifications when B and C are both constants. */
+ if (GET_CODE (op0) == AND
+ && CONST_INT_P (op1)
+ && CONST_INT_P (XEXP (op0, 1)))
+ {
+ rtx a = XEXP (op0, 0);
+ rtx b = XEXP (op0, 1);
+ rtx c = op1;
+ HOST_WIDE_INT bval = INTVAL (b);
+ HOST_WIDE_INT cval = INTVAL (c);
+
+ rtx na_c
+ = simplify_binary_operation (AND, mode,
+ simplify_gen_unary (NOT, mode, a, mode),
+ c);
+ if ((~cval & bval) == 0)
+ {
+ /* Try to simplify ~A&C | ~B&C. */
+ if (na_c != NULL_RTX)
+ return simplify_gen_binary (IOR, mode, na_c,
+ GEN_INT (~bval & cval));
+ }
+ else
+ {
+ /* If ~A&C is zero, simplify A&(~C&B) | ~B&C. */
+ if (na_c == const0_rtx)
+ {
+ rtx a_nc_b = simplify_gen_binary (AND, mode, a,
+ GEN_INT (~cval & bval));
+ return simplify_gen_binary (IOR, mode, a_nc_b,
+ GEN_INT (~bval & cval));
+ }
+ }
+ }
+
/* (xor (comparison foo bar) (const_int 1)) can become the reversed
comparison if STORE_FLAG_VALUE is 1. */
if (STORE_FLAG_VALUE == 1
=== added file 'gcc/testsuite/gcc.target/arm/unsigned-extend-1.c'
--- old/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 2011-05-27 14:31:18 +0000
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6" } */
+
+unsigned char foo (unsigned char c)
+{
+ return (c >= '0') && (c <= '9');
+}
+
+/* { dg-final { scan-assembler-not "uxtb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/xor-and.c'
--- old/gcc/testsuite/gcc.target/arm/xor-and.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/xor-and.c 2011-05-27 14:31:18 +0000
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv6" } */
+
+unsigned short foo (unsigned short x)
+{
+ x ^= 0x4002;
+ x >>= 1;
+ x |= 0x8000;
+ return x;
+}
+
+/* { dg-final { scan-assembler "orr" } } */
+/* { dg-final { scan-assembler-not "mvn" } } */
+/* { dg-final { scan-assembler-not "uxth" } } */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,329 @@
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.md (*maddhidi4tb, *maddhidi4tt): New define_insns.
(*maddhisi4tb, *maddhisi4tt): New define_insns.
gcc/testsuite/
* gcc.target/arm/smlatb-1.c: New file.
* gcc.target/arm/smlatt-1.c: New file.
* gcc.target/arm/smlaltb-1.c: New file.
* gcc.target/arm/smlaltt-1.c: New file.
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
Backport from FSF:
2011-06-07 Bernd Schmidt <bernds@codesourcery.com>
Andrew Stubbs <ams@codesourcery.com>
gcc/
* simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening
multiplies.
* doc/md.texi (Canonicalization of Instructions): Document widening
multiply canonicalization.
gcc/testsuite/
* gcc.target/arm/mla-2.c: New test.
=== modified file 'gcc/config/arm/arm.md'
--- old/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
+++ new/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000
@@ -1809,6 +1809,36 @@
(set_attr "predicable" "yes")]
)
+;; Note: there is no maddhisi4ibt because this one is canonical form
+(define_insn "*maddhisi4tb"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (plus:SI (mult:SI (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16))
+ (sign_extend:SI
+ (match_operand:HI 2 "s_register_operand" "r")))
+ (match_operand:SI 3 "s_register_operand" "r")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlatb%?\\t%0, %1, %2, %3"
+ [(set_attr "insn" "smlaxy")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*maddhisi4tt"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (plus:SI (mult:SI (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16))
+ (ashiftrt:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (const_int 16)))
+ (match_operand:SI 3 "s_register_operand" "r")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlatt%?\\t%0, %1, %2, %3"
+ [(set_attr "insn" "smlaxy")
+ (set_attr "predicable" "yes")]
+)
+
(define_insn "*maddhidi4"
[(set (match_operand:DI 0 "s_register_operand" "=r")
(plus:DI
@@ -1822,6 +1852,39 @@
[(set_attr "insn" "smlalxy")
(set_attr "predicable" "yes")])
+;; Note: there is no maddhidi4ibt because this one is canonical form
+(define_insn "*maddhidi4tb"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (plus:DI
+ (mult:DI (sign_extend:DI
+ (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16)))
+ (sign_extend:DI
+ (match_operand:HI 2 "s_register_operand" "r")))
+ (match_operand:DI 3 "s_register_operand" "0")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlaltb%?\\t%Q0, %R0, %1, %2"
+ [(set_attr "insn" "smlalxy")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*maddhidi4tt"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (plus:DI
+ (mult:DI (sign_extend:DI
+ (ashiftrt:SI
+ (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16)))
+ (sign_extend:DI
+ (ashiftrt:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (const_int 16))))
+ (match_operand:DI 3 "s_register_operand" "0")))]
+ "TARGET_DSP_MULTIPLY"
+ "smlaltt%?\\t%Q0, %R0, %1, %2"
+ [(set_attr "insn" "smlalxy")
+ (set_attr "predicable" "yes")])
+
(define_expand "mulsf3"
[(set (match_operand:SF 0 "s_register_operand" "")
(mult:SF (match_operand:SF 1 "s_register_operand" "")
=== modified file 'gcc/doc/md.texi'
--- old/gcc/doc/md.texi 2011-05-05 15:43:06 +0000
+++ new/gcc/doc/md.texi 2011-06-07 11:18:20 +0000
@@ -5929,6 +5929,23 @@
will be written using @code{zero_extract} rather than the equivalent
@code{and} or @code{sign_extract} operations.
+@cindex @code{mult}, canonicalization of
+@item
+@code{(sign_extend:@var{m1} (mult:@var{m2} (sign_extend:@var{m2} @var{x})
+(sign_extend:@var{m2} @var{y})))} is converted to @code{(mult:@var{m1}
+(sign_extend:@var{m1} @var{x}) (sign_extend:@var{m1} @var{y}))}, and likewise
+for @code{zero_extend}.
+
+@item
+@code{(sign_extend:@var{m1} (mult:@var{m2} (ashiftrt:@var{m2}
+@var{x} @var{s}) (sign_extend:@var{m2} @var{y})))} is converted
+to @code{(mult:@var{m1} (sign_extend:@var{m1} (ashiftrt:@var{m2}
+@var{x} @var{s})) (sign_extend:@var{m1} @var{y}))}, and likewise for
+patterns using @code{zero_extend} and @code{lshiftrt}. If the second
+operand of @code{mult} is also a shift, then that is extended also.
+This transformation is only applied when it can be proven that the
+original operation had sufficient precision to prevent overflow.
+
@end itemize
Further canonicalization rules are defined in the function
=== modified file 'gcc/simplify-rtx.c'
--- old/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000
+++ new/gcc/simplify-rtx.c 2011-06-02 12:32:16 +0000
@@ -1000,6 +1000,48 @@
&& GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF)
return XEXP (op, 0);
+ /* Extending a widening multiplication should be canonicalized to
+ a wider widening multiplication. */
+ if (GET_CODE (op) == MULT)
+ {
+ rtx lhs = XEXP (op, 0);
+ rtx rhs = XEXP (op, 1);
+ enum rtx_code lcode = GET_CODE (lhs);
+ enum rtx_code rcode = GET_CODE (rhs);
+
+ /* Widening multiplies usually extend both operands, but sometimes
+ they use a shift to extract a portion of a register. */
+ if ((lcode == SIGN_EXTEND
+ || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+ && (rcode == SIGN_EXTEND
+ || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+ {
+ enum machine_mode lmode = GET_MODE (lhs);
+ enum machine_mode rmode = GET_MODE (rhs);
+ int bits;
+
+ if (lcode == ASHIFTRT)
+ /* Number of bits not shifted off the end. */
+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+ else /* lcode == SIGN_EXTEND */
+ /* Size of inner mode. */
+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+ if (rcode == ASHIFTRT)
+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+ else /* rcode == SIGN_EXTEND */
+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+ /* We can only widen multiplies if the result is mathematiclly
+ equivalent. I.e. if overflow was impossible. */
+ if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+ return simplify_gen_binary
+ (MULT, mode,
+ simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode),
+ simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode));
+ }
+ }
+
/* Check for a sign extension of a subreg of a promoted
variable, where the promotion is sign-extended, and the
target mode is the same as the variable's promotion. */
@@ -1071,6 +1113,48 @@
&& GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
return rtl_hooks.gen_lowpart_no_emit (mode, op);
+ /* Extending a widening multiplication should be canonicalized to
+ a wider widening multiplication. */
+ if (GET_CODE (op) == MULT)
+ {
+ rtx lhs = XEXP (op, 0);
+ rtx rhs = XEXP (op, 1);
+ enum rtx_code lcode = GET_CODE (lhs);
+ enum rtx_code rcode = GET_CODE (rhs);
+
+ /* Widening multiplies usually extend both operands, but sometimes
+ they use a shift to extract a portion of a register. */
+ if ((lcode == ZERO_EXTEND
+ || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+ && (rcode == ZERO_EXTEND
+ || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+ {
+ enum machine_mode lmode = GET_MODE (lhs);
+ enum machine_mode rmode = GET_MODE (rhs);
+ int bits;
+
+ if (lcode == LSHIFTRT)
+ /* Number of bits not shifted off the end. */
+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+ else /* lcode == ZERO_EXTEND */
+ /* Size of inner mode. */
+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+ if (rcode == LSHIFTRT)
+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+ else /* rcode == ZERO_EXTEND */
+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+ /* We can only widen multiplies if the result is mathematiclly
+ equivalent. I.e. if overflow was impossible. */
+ if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+ return simplify_gen_binary
+ (MULT, mode,
+ simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode),
+ simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode));
+ }
+ }
+
/* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>). */
if (GET_CODE (op) == ZERO_EXTEND)
return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0),
=== added file 'gcc/testsuite/gcc.target/arm/mla-2.c'
--- old/gcc/testsuite/gcc.target/arm/mla-2.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/mla-2.c 2011-06-02 12:32:16 +0000
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long foolong (long long x, short *a, short *b)
+{
+ return x + *a * *b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlaltb-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlaltb-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlaltb-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long int
+foo (long long x, int in)
+{
+ short a = in & 0xffff;
+ short b = (in & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlaltb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlaltt-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlaltt-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlaltt-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long int
+foo (long long x, int in1, int in2)
+{
+ short a = (in1 & 0xffff0000) >> 16;
+ short b = (in2 & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlaltt" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlatb-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlatb-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlatb-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int x, int in)
+{
+ short a = in & 0xffff;
+ short b = (in & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlatb" } } */
=== added file 'gcc/testsuite/gcc.target/arm/smlatt-1.c'
--- old/gcc/testsuite/gcc.target/arm/smlatt-1.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/smlatt-1.c 2011-06-02 15:58:33 +0000
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int x, int in1, int in2)
+{
+ short a = (in1 & 0xffff0000) >> 16;
+ short b = (in2 & 0xffff0000) >> 16;
+
+ return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlatt" } } */

View File

@ -0,0 +1,120 @@
2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline:
gcc/
2011-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/neon.md (orndi3_neon): Actually split it.
2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
Backport from mainline.
gcc/
2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* config/arm/neon.md ("orn<mode>3_neon"): Canonicalize not.
("orndi3_neon"): Likewise.
("bic<mode>3_neon"): Likewise.
gcc/testsuite
2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
* gcc.target/arm/neon-vorn-vbic.c: New test.
=== modified file 'gcc/config/arm/neon.md'
--- old/gcc/config/arm/neon.md 2011-06-02 12:12:00 +0000
+++ new/gcc/config/arm/neon.md 2011-06-04 00:04:47 +0000
@@ -783,30 +783,57 @@
(define_insn "orn<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
- (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+ (match_operand:VDQ 1 "s_register_operand" "w")))]
"TARGET_NEON"
"vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "orndi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
- (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
- (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
+;; TODO: investigate whether we should disable
+;; this and bicdi3_neon for the A8 in line with the other
+;; changes above.
+(define_insn_and_split "orndi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
+ (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
"TARGET_NEON"
"@
vorn\t%P0, %P1, %P2
#
+ #
#"
- [(set_attr "neon_type" "neon_int_1,*,*")
- (set_attr "length" "*,8,8")]
+ "reload_completed &&
+ (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
+ "
+ {
+ if (TARGET_THUMB2)
+ {
+ operands[3] = gen_highpart (SImode, operands[0]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[4] = gen_highpart (SImode, operands[2]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[5] = gen_highpart (SImode, operands[1]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ }
+ else
+ {
+ emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
+ emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
+ DONE;
+ }
+ }"
+ [(set_attr "neon_type" "neon_int_1,*,*,*")
+ (set_attr "length" "*,16,8,8")
+ (set_attr "arch" "any,a,t2,t2")]
)
(define_insn "bic<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
- (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+ (match_operand:VDQ 1 "s_register_operand" "w")))]
"TARGET_NEON"
"vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "neon_type" "neon_int_1")]
=== added file 'gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c'
--- old/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 1970-01-01 00:00:00 +0000
+++ new/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 2011-06-03 23:50:02 +0000
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-add-options arm_neon } */
+
+void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
+{
+ int i;
+ for (i = 0; i < 9; i++)
+ c[i] = b[i] | (~a[i]);
+}
+void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
+{
+ int i;
+ for (i = 0; i < 9; i++)
+ c[i] = b[i] & (~a[i]);
+}
+
+/* { dg-final { scan-assembler "vorn\\t" } } */
+/* { dg-final { scan-assembler "vbic\\t" } } */

View File

@ -2,7 +2,6 @@ GCC-4_6-BRANCH-LINARO-BACKPORTS = " \
file://linaro/gcc-4.6-linaro-r106720.patch \
file://linaro/gcc-4.6-linaro-r106723.patch \
file://linaro/gcc-4.6-linaro-r106729.patch \
file://linaro/gcc-4.6-linaro-r106731.patch \
file://linaro/gcc-4.6-linaro-r106733.patch \
file://linaro/gcc-4.6-linaro-r106737.patch \
file://linaro/gcc-4.6-linaro-r106738.patch \
@ -15,4 +14,8 @@ file://linaro/gcc-4.6-linaro-r106744.patch \
file://linaro/gcc-4.6-linaro-r106746.patch \
file://linaro/gcc-4.6-linaro-r106747.patch \
file://linaro/gcc-4.6-linaro-r106750.patch \
file://linaro/gcc-4.6-linaro-r106751.patch \
file://linaro/gcc-4.6-linaro-r106753.patch \
file://linaro/gcc-4.6-linaro-r106754.patch \
file://linaro/gcc-4.6-linaro-r106755.patch \
"

View File

@ -0,0 +1,3 @@
# this will prepend this layer to FILESPATH
FILESEXTRAPATHS := "${THISDIR}/gcc-4.6.0"
PRINC = "0"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"

View File

@ -0,0 +1,3 @@
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
require recipes-devtools/gcc/gcc-common-4.6.inc
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"