mirror of
git://git.openembedded.org/meta-openembedded
synced 2026-04-02 02:49:12 +00:00
Add bbappend files for gcc 4.6.0 from oe-core
The bbappends add patches for linaro so with this we can enhance gcc 4.6.0 from oe-core and have patches that are in meta-oe on top of it. Add patches from for linaro 4.6 This obsoletes the need of having gcc 4.6 recipes in meta-oe Signed-off-by: Khem Raj <raj.khem@gmail.com> Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
This commit is contained in:
parent
eac31f4e3a
commit
8052751332
@ -0,0 +1,51 @@
|
||||
2011-02-21 Andrew Stubbs <ams@codesourcery.com>
|
||||
Julian Brown <julian@codesourcery.com>
|
||||
Mark Shinwell <shinwell@codesourcery.com>
|
||||
|
||||
Forward-ported from Linaro GCC 4.5 (bzr99324).
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.h (arm_class_likely_spilled_p): Check against
|
||||
LO_REGS only for Thumb-1.
|
||||
(MODE_BASE_REG_CLASS): Restrict base registers to those which can
|
||||
be used in short instructions when optimising for size on Thumb-2.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-01-29 03:20:57 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-02-21 14:04:51 +0000
|
||||
@@ -22304,14 +22304,16 @@
|
||||
|
||||
/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
|
||||
|
||||
- We need to define this for LO_REGS on thumb. Otherwise we can end up
|
||||
- using r0-r4 for function arguments, r7 for the stack frame and don't
|
||||
- have enough left over to do doubleword arithmetic. */
|
||||
-
|
||||
+ We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
|
||||
+ using r0-r4 for function arguments, r7 for the stack frame and don't have
|
||||
+ enough left over to do doubleword arithmetic. For Thumb-2 all the
|
||||
+ potentially problematic instructions accept high registers so this is not
|
||||
+ necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
|
||||
+ that require many low registers. */
|
||||
static bool
|
||||
arm_class_likely_spilled_p (reg_class_t rclass)
|
||||
{
|
||||
- if ((TARGET_THUMB && rclass == LO_REGS)
|
||||
+ if ((TARGET_THUMB1 && rclass == LO_REGS)
|
||||
|| rclass == CC_REG)
|
||||
return true;
|
||||
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.h'
|
||||
--- old/gcc/config/arm/arm.h 2011-01-29 03:20:57 +0000
|
||||
+++ new/gcc/config/arm/arm.h 2011-02-21 14:04:51 +0000
|
||||
@@ -1185,7 +1185,7 @@
|
||||
when addressing quantities in QI or HI mode; if we don't know the
|
||||
mode, then we must be conservative. */
|
||||
#define MODE_BASE_REG_CLASS(MODE) \
|
||||
- (TARGET_32BIT ? CORE_REGS : \
|
||||
+ (TARGET_ARM || (TARGET_THUMB2 && !optimize_size) ? CORE_REGS : \
|
||||
(((MODE) == SImode) ? BASE_REGS : LO_REGS))
|
||||
|
||||
/* For Thumb we can not support SP+reg addressing, so we return LO_REGS
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
2011-02-02 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
PR target/47551
|
||||
* config/arm/arm.c (coproc_secondary_reload_class): Handle
|
||||
structure modes. Don't check neon_vector_mem_operand for
|
||||
vector or structure modes.
|
||||
|
||||
gcc/testsuite/
|
||||
PR target/47551
|
||||
* gcc.target/arm/neon-modes-2.c: New test.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-02-21 14:04:51 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-03-02 11:38:43 +0000
|
||||
@@ -9139,11 +9139,14 @@
|
||||
return GENERAL_REGS;
|
||||
}
|
||||
|
||||
+ /* The neon move patterns handle all legitimate vector and struct
|
||||
+ addresses. */
|
||||
if (TARGET_NEON
|
||||
+ && MEM_P (x)
|
||||
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
||||
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
||||
- && neon_vector_mem_operand (x, 0))
|
||||
- return NO_REGS;
|
||||
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
|
||||
+ || VALID_NEON_STRUCT_MODE (mode)))
|
||||
+ return NO_REGS;
|
||||
|
||||
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
|
||||
return NO_REGS;
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-modes-2.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-modes-2.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-modes-2.c 2011-02-02 10:02:45 +0000
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O1" } */
|
||||
+/* { dg-add-options arm_neon } */
|
||||
+
|
||||
+#include "arm_neon.h"
|
||||
+
|
||||
+#define SETUP(A) x##A = vld3_u32 (ptr + A * 0x20)
|
||||
+#define MODIFY(A) x##A = vld3_lane_u32 (ptr + A * 0x20 + 0x10, x##A, 1)
|
||||
+#define STORE(A) vst3_u32 (ptr + A * 0x20, x##A)
|
||||
+
|
||||
+#define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
|
||||
+
|
||||
+void
|
||||
+bar (uint32_t *ptr, int y)
|
||||
+{
|
||||
+ uint32x2x3_t MANY (SETUP);
|
||||
+ int *x = __builtin_alloca (y);
|
||||
+ int z[0x1000];
|
||||
+ foo (x, z);
|
||||
+ MANY (MODIFY);
|
||||
+ foo (x, z);
|
||||
+ MANY (STORE);
|
||||
+}
|
||||
|
||||
@ -0,0 +1,32 @@
|
||||
2011-03-22 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-03-21 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/unwind-arm.c (__gnu_unwind_pr_common): Correct test
|
||||
for barrier handlers.
|
||||
|
||||
=== modified file 'gcc/config/arm/unwind-arm.c'
|
||||
--- old/gcc/config/arm/unwind-arm.c 2009-10-30 14:55:10 +0000
|
||||
+++ new/gcc/config/arm/unwind-arm.c 2011-03-22 10:59:10 +0000
|
||||
@@ -1196,8 +1196,6 @@
|
||||
ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1];
|
||||
|
||||
if (data[0] & uint32_highbit)
|
||||
- phase2_call_unexpected_after_unwind = 1;
|
||||
- else
|
||||
{
|
||||
data += rtti_count + 1;
|
||||
/* Setup for entry to the handler. */
|
||||
@@ -1207,6 +1205,8 @@
|
||||
_Unwind_SetGR (context, 0, (_uw) ucbp);
|
||||
return _URC_INSTALL_CONTEXT;
|
||||
}
|
||||
+ else
|
||||
+ phase2_call_unexpected_after_unwind = 1;
|
||||
}
|
||||
if (data[0] & uint32_highbit)
|
||||
data++;
|
||||
|
||||
@ -0,0 +1,653 @@
|
||||
2011-03-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* doc/invoke.texi (max-stores-to-sink): Document.
|
||||
* params.h (MAX_STORES_TO_SINK): Define.
|
||||
* opts.c (finish_options): Set MAX_STORES_TO_SINK to 0
|
||||
if either vectorization or if-conversion is disabled.
|
||||
* tree-data-ref.c (dr_equal_offsets_p1): Moved and renamed from
|
||||
tree-vect-data-refs.c vect_equal_offsets.
|
||||
(dr_equal_offsets_p): New function.
|
||||
(find_data_references_in_bb): Remove static.
|
||||
* tree-data-ref.h (find_data_references_in_bb): Declare.
|
||||
(dr_equal_offsets_p): Likewise.
|
||||
* tree-vect-data-refs.c (vect_equal_offsets): Move to tree-data-ref.c.
|
||||
(vect_drs_dependent_in_basic_block): Update calls to
|
||||
vect_equal_offsets.
|
||||
(vect_check_interleaving): Likewise.
|
||||
* tree-ssa-phiopt.c: Include cfgloop.h and tree-data-ref.h.
|
||||
(cond_if_else_store_replacement): Rename to...
|
||||
(cond_if_else_store_replacement_1): ... this. Change arguments and
|
||||
documentation.
|
||||
(cond_if_else_store_replacement): New function.
|
||||
* Makefile.in (tree-ssa-phiopt.o): Adjust dependencies.
|
||||
* params.def (PARAM_MAX_STORES_TO_SINK): Define.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/vect-cselim-1.c: New test.
|
||||
* gcc.dg/vect/vect-cselim-2.c: New test.
|
||||
|
||||
=== modified file 'gcc/Makefile.in'
|
||||
--- old/gcc/Makefile.in 2011-03-26 09:20:34 +0000
|
||||
+++ new/gcc/Makefile.in 2011-04-18 11:31:29 +0000
|
||||
@@ -2422,7 +2422,8 @@
|
||||
tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
|
||||
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \
|
||||
- $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h
|
||||
+ $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \
|
||||
+ $(TREE_DATA_REF_H)
|
||||
tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \
|
||||
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
|
||||
|
||||
=== modified file 'gcc/doc/invoke.texi'
|
||||
--- old/gcc/doc/invoke.texi 2011-03-29 14:24:42 +0000
|
||||
+++ new/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000
|
||||
@@ -8909,6 +8909,11 @@
|
||||
The maximum number of namespaces to consult for suggestions when C++
|
||||
name lookup fails for an identifier. The default is 1000.
|
||||
|
||||
+@item max-stores-to-sink
|
||||
+The maximum number of conditional stores paires that can be sunk. Set to 0
|
||||
+if either vectorization (@option{-ftree-vectorize}) or if-conversion
|
||||
+(@option{-ftree-loop-if-convert}) is disabled. The default is 2.
|
||||
+
|
||||
@end table
|
||||
@end table
|
||||
|
||||
|
||||
=== modified file 'gcc/opts.c'
|
||||
--- old/gcc/opts.c 2011-02-17 22:51:57 +0000
|
||||
+++ new/gcc/opts.c 2011-03-27 09:38:18 +0000
|
||||
@@ -823,6 +823,12 @@
|
||||
opts->x_flag_split_stack = 0;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
|
||||
+ is disabled. */
|
||||
+ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert)
|
||||
+ maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0,
|
||||
+ opts->x_param_values, opts_set->x_param_values);
|
||||
}
|
||||
|
||||
#define LEFT_COLUMN 27
|
||||
|
||||
=== modified file 'gcc/params.def'
|
||||
--- old/gcc/params.def 2011-03-26 09:20:34 +0000
|
||||
+++ new/gcc/params.def 2011-04-18 11:31:29 +0000
|
||||
@@ -883,6 +883,13 @@
|
||||
"name lookup fails",
|
||||
1000, 0, 0)
|
||||
|
||||
+/* Maximum number of conditional store pairs that can be sunk. */
|
||||
+DEFPARAM (PARAM_MAX_STORES_TO_SINK,
|
||||
+ "max-stores-to-sink",
|
||||
+ "Maximum number of conditional store pairs that can be sunk",
|
||||
+ 2, 0, 0)
|
||||
+
|
||||
+
|
||||
/*
|
||||
Local variables:
|
||||
mode:c
|
||||
|
||||
=== modified file 'gcc/params.h'
|
||||
--- old/gcc/params.h 2011-01-13 13:41:03 +0000
|
||||
+++ new/gcc/params.h 2011-03-27 09:38:18 +0000
|
||||
@@ -206,4 +206,6 @@
|
||||
PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO)
|
||||
#define MIN_NONDEBUG_INSN_UID \
|
||||
PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
|
||||
+#define MAX_STORES_TO_SINK \
|
||||
+ PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
|
||||
#endif /* ! GCC_PARAMS_H */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-1.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c 2011-03-27 09:38:18 +0000
|
||||
@@ -0,0 +1,86 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 50
|
||||
+
|
||||
+typedef struct {
|
||||
+ short a;
|
||||
+ short b;
|
||||
+} data;
|
||||
+
|
||||
+data in1[N], in2[N], out[N];
|
||||
+short result[N*2] = {7,-7,9,-6,11,-5,13,-4,15,-3,17,-2,19,-1,21,0,23,1,25,2,27,3,29,4,31,5,33,6,35,7,37,8,39,9,41,10,43,11,45,12,47,13,49,14,51,15,53,16,55,17,57,18,59,19,61,20,63,21,65,22,67,23,69,24,71,25,73,26,75,27,77,28,79,29,81,30,83,31,85,32,87,33,89,34,91,35,93,36,95,37,97,38,99,39,101,40,103,41,105,42};
|
||||
+short out1[N], out2[N];
|
||||
+
|
||||
+__attribute__ ((noinline)) void
|
||||
+foo ()
|
||||
+{
|
||||
+ int i;
|
||||
+ short c, d;
|
||||
+
|
||||
+ /* Vectorizable with conditional store sinking. */
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ c = in1[i].b;
|
||||
+ d = in2[i].b;
|
||||
+
|
||||
+ if (c >= d)
|
||||
+ {
|
||||
+ out[i].b = c;
|
||||
+ out[i].a = d + 5;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ out[i].b = d - 12;
|
||||
+ out[i].a = c + d;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Not vectorizable. */
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ c = in1[i].b;
|
||||
+ d = in2[i].b;
|
||||
+
|
||||
+ if (c >= d)
|
||||
+ {
|
||||
+ out1[i] = c;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ out2[i] = c + d;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ in1[i].a = i;
|
||||
+ in1[i].b = i + 2;
|
||||
+ in2[i].a = 5;
|
||||
+ in2[i].b = i + 5;
|
||||
+ __asm__ volatile ("");
|
||||
+ }
|
||||
+
|
||||
+ foo ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/vect-cselim-2.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c 2011-03-27 09:38:18 +0000
|
||||
@@ -0,0 +1,65 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 50
|
||||
+
|
||||
+int a[N], b[N], in1[N], in2[N];
|
||||
+int result[2*N] = {5,-7,7,-6,9,-5,11,-4,13,-3,15,-2,17,-1,19,0,21,1,23,2,25,3,27,4,29,5,31,6,33,7,35,8,37,9,39,10,41,11,43,12,45,13,47,14,49,15,51,16,53,17,55,18,57,19,59,20,61,21,63,22,65,23,67,24,69,25,71,26,73,27,75,28,77,29,79,30,81,31,83,32,85,33,87,34,89,35,91,36,93,37,95,38,97,39,99,40,101,41,103,42};
|
||||
+
|
||||
+__attribute__ ((noinline)) void
|
||||
+foo (int *pa, int *pb)
|
||||
+{
|
||||
+ int i;
|
||||
+ int c, d;
|
||||
+
|
||||
+ /* Store sinking should not work here since the pointers may alias. */
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ c = in1[i];
|
||||
+ d = in2[i];
|
||||
+
|
||||
+ if (c >= d)
|
||||
+ {
|
||||
+ *pa = c;
|
||||
+ *pb = d + 5;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ *pb = d - 12;
|
||||
+ *pa = c + d;
|
||||
+ }
|
||||
+
|
||||
+ pa++;
|
||||
+ pb++;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ in1[i] = i;
|
||||
+ in2[i] = i + 5;
|
||||
+ __asm__ volatile ("");
|
||||
+ }
|
||||
+
|
||||
+ foo (a, b);
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ if (a[i] != result[2*i] || b[i] != result[2*i+1])
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/tree-data-ref.c'
|
||||
--- old/gcc/tree-data-ref.c 2011-02-05 01:39:20 +0000
|
||||
+++ new/gcc/tree-data-ref.c 2011-03-27 09:38:18 +0000
|
||||
@@ -991,6 +991,48 @@
|
||||
return dr;
|
||||
}
|
||||
|
||||
+/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
|
||||
+ expressions. */
|
||||
+static bool
|
||||
+dr_equal_offsets_p1 (tree offset1, tree offset2)
|
||||
+{
|
||||
+ bool res;
|
||||
+
|
||||
+ STRIP_NOPS (offset1);
|
||||
+ STRIP_NOPS (offset2);
|
||||
+
|
||||
+ if (offset1 == offset2)
|
||||
+ return true;
|
||||
+
|
||||
+ if (TREE_CODE (offset1) != TREE_CODE (offset2)
|
||||
+ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
|
||||
+ return false;
|
||||
+
|
||||
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
|
||||
+ TREE_OPERAND (offset2, 0));
|
||||
+
|
||||
+ if (!res || !BINARY_CLASS_P (offset1))
|
||||
+ return res;
|
||||
+
|
||||
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
|
||||
+ TREE_OPERAND (offset2, 1));
|
||||
+
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* Check if DRA and DRB have equal offsets. */
|
||||
+bool
|
||||
+dr_equal_offsets_p (struct data_reference *dra,
|
||||
+ struct data_reference *drb)
|
||||
+{
|
||||
+ tree offset1, offset2;
|
||||
+
|
||||
+ offset1 = DR_OFFSET (dra);
|
||||
+ offset2 = DR_OFFSET (drb);
|
||||
+
|
||||
+ return dr_equal_offsets_p1 (offset1, offset2);
|
||||
+}
|
||||
+
|
||||
/* Returns true if FNA == FNB. */
|
||||
|
||||
static bool
|
||||
@@ -4294,7 +4336,7 @@
|
||||
DATAREFS. Returns chrec_dont_know when failing to analyze a
|
||||
difficult case, returns NULL_TREE otherwise. */
|
||||
|
||||
-static tree
|
||||
+tree
|
||||
find_data_references_in_bb (struct loop *loop, basic_block bb,
|
||||
VEC (data_reference_p, heap) **datarefs)
|
||||
{
|
||||
|
||||
=== modified file 'gcc/tree-data-ref.h'
|
||||
--- old/gcc/tree-data-ref.h 2011-01-25 21:24:23 +0000
|
||||
+++ new/gcc/tree-data-ref.h 2011-03-27 09:38:18 +0000
|
||||
@@ -426,10 +426,14 @@
|
||||
extern void compute_all_dependences (VEC (data_reference_p, heap) *,
|
||||
VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
|
||||
bool);
|
||||
+extern tree find_data_references_in_bb (struct loop *, basic_block,
|
||||
+ VEC (data_reference_p, heap) **);
|
||||
|
||||
extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
|
||||
extern bool dr_may_alias_p (const struct data_reference *,
|
||||
const struct data_reference *);
|
||||
+extern bool dr_equal_offsets_p (struct data_reference *,
|
||||
+ struct data_reference *);
|
||||
|
||||
|
||||
/* Return true when the base objects of data references A and B are
|
||||
|
||||
=== modified file 'gcc/tree-ssa-phiopt.c'
|
||||
--- old/gcc/tree-ssa-phiopt.c 2010-11-03 15:18:50 +0000
|
||||
+++ new/gcc/tree-ssa-phiopt.c 2011-03-27 09:38:18 +0000
|
||||
@@ -34,6 +34,8 @@
|
||||
#include "langhooks.h"
|
||||
#include "pointer-set.h"
|
||||
#include "domwalk.h"
|
||||
+#include "cfgloop.h"
|
||||
+#include "tree-data-ref.h"
|
||||
|
||||
static unsigned int tree_ssa_phiopt (void);
|
||||
static unsigned int tree_ssa_phiopt_worker (bool);
|
||||
@@ -1292,35 +1294,18 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* Do the main work of conditional store replacement. We already know
|
||||
- that the recognized pattern looks like so:
|
||||
-
|
||||
- split:
|
||||
- if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
|
||||
- THEN_BB:
|
||||
- X = Y;
|
||||
- goto JOIN_BB;
|
||||
- ELSE_BB:
|
||||
- X = Z;
|
||||
- fallthrough (edge E0)
|
||||
- JOIN_BB:
|
||||
- some more
|
||||
-
|
||||
- We check that THEN_BB and ELSE_BB contain only one store
|
||||
- that the stores have a "simple" RHS. */
|
||||
+/* Do the main work of conditional store replacement. */
|
||||
|
||||
static bool
|
||||
-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
|
||||
- basic_block join_bb)
|
||||
+cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
|
||||
+ basic_block join_bb, gimple then_assign,
|
||||
+ gimple else_assign)
|
||||
{
|
||||
- gimple then_assign = last_and_only_stmt (then_bb);
|
||||
- gimple else_assign = last_and_only_stmt (else_bb);
|
||||
tree lhs_base, lhs, then_rhs, else_rhs;
|
||||
source_location then_locus, else_locus;
|
||||
gimple_stmt_iterator gsi;
|
||||
gimple newphi, new_stmt;
|
||||
|
||||
- /* Check if then_bb and else_bb contain only one store each. */
|
||||
if (then_assign == NULL
|
||||
|| !gimple_assign_single_p (then_assign)
|
||||
|| else_assign == NULL
|
||||
@@ -1385,6 +1370,190 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Conditional store replacement. We already know
|
||||
+ that the recognized pattern looks like so:
|
||||
+
|
||||
+ split:
|
||||
+ if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
|
||||
+ THEN_BB:
|
||||
+ ...
|
||||
+ X = Y;
|
||||
+ ...
|
||||
+ goto JOIN_BB;
|
||||
+ ELSE_BB:
|
||||
+ ...
|
||||
+ X = Z;
|
||||
+ ...
|
||||
+ fallthrough (edge E0)
|
||||
+ JOIN_BB:
|
||||
+ some more
|
||||
+
|
||||
+ We check that it is safe to sink the store to JOIN_BB by verifying that
|
||||
+ there are no read-after-write or write-after-write dependencies in
|
||||
+ THEN_BB and ELSE_BB. */
|
||||
+
|
||||
+static bool
|
||||
+cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
|
||||
+ basic_block join_bb)
|
||||
+{
|
||||
+ gimple then_assign = last_and_only_stmt (then_bb);
|
||||
+ gimple else_assign = last_and_only_stmt (else_bb);
|
||||
+ VEC (data_reference_p, heap) *then_datarefs, *else_datarefs;
|
||||
+ VEC (ddr_p, heap) *then_ddrs, *else_ddrs;
|
||||
+ gimple then_store, else_store;
|
||||
+ bool found, ok = false, res;
|
||||
+ struct data_dependence_relation *ddr;
|
||||
+ data_reference_p then_dr, else_dr;
|
||||
+ int i, j;
|
||||
+ tree then_lhs, else_lhs;
|
||||
+ VEC (gimple, heap) *then_stores, *else_stores;
|
||||
+ basic_block blocks[3];
|
||||
+
|
||||
+ if (MAX_STORES_TO_SINK == 0)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Handle the case with single statement in THEN_BB and ELSE_BB. */
|
||||
+ if (then_assign && else_assign)
|
||||
+ return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
|
||||
+ then_assign, else_assign);
|
||||
+
|
||||
+ /* Find data references. */
|
||||
+ then_datarefs = VEC_alloc (data_reference_p, heap, 1);
|
||||
+ else_datarefs = VEC_alloc (data_reference_p, heap, 1);
|
||||
+ if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
|
||||
+ == chrec_dont_know)
|
||||
+ || !VEC_length (data_reference_p, then_datarefs)
|
||||
+ || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
|
||||
+ == chrec_dont_know)
|
||||
+ || !VEC_length (data_reference_p, else_datarefs))
|
||||
+ {
|
||||
+ free_data_refs (then_datarefs);
|
||||
+ free_data_refs (else_datarefs);
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ /* Find pairs of stores with equal LHS. */
|
||||
+ then_stores = VEC_alloc (gimple, heap, 1);
|
||||
+ else_stores = VEC_alloc (gimple, heap, 1);
|
||||
+ FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr)
|
||||
+ {
|
||||
+ if (DR_IS_READ (then_dr))
|
||||
+ continue;
|
||||
+
|
||||
+ then_store = DR_STMT (then_dr);
|
||||
+ then_lhs = gimple_assign_lhs (then_store);
|
||||
+ found = false;
|
||||
+
|
||||
+ FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr)
|
||||
+ {
|
||||
+ if (DR_IS_READ (else_dr))
|
||||
+ continue;
|
||||
+
|
||||
+ else_store = DR_STMT (else_dr);
|
||||
+ else_lhs = gimple_assign_lhs (else_store);
|
||||
+
|
||||
+ if (operand_equal_p (then_lhs, else_lhs, 0))
|
||||
+ {
|
||||
+ found = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!found)
|
||||
+ continue;
|
||||
+
|
||||
+ VEC_safe_push (gimple, heap, then_stores, then_store);
|
||||
+ VEC_safe_push (gimple, heap, else_stores, else_store);
|
||||
+ }
|
||||
+
|
||||
+ /* No pairs of stores found. */
|
||||
+ if (!VEC_length (gimple, then_stores)
|
||||
+ || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK)
|
||||
+ {
|
||||
+ free_data_refs (then_datarefs);
|
||||
+ free_data_refs (else_datarefs);
|
||||
+ VEC_free (gimple, heap, then_stores);
|
||||
+ VEC_free (gimple, heap, else_stores);
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ /* Compute and check data dependencies in both basic blocks. */
|
||||
+ then_ddrs = VEC_alloc (ddr_p, heap, 1);
|
||||
+ else_ddrs = VEC_alloc (ddr_p, heap, 1);
|
||||
+ compute_all_dependences (then_datarefs, &then_ddrs, NULL, false);
|
||||
+ compute_all_dependences (else_datarefs, &else_ddrs, NULL, false);
|
||||
+ blocks[0] = then_bb;
|
||||
+ blocks[1] = else_bb;
|
||||
+ blocks[2] = join_bb;
|
||||
+ renumber_gimple_stmt_uids_in_blocks (blocks, 3);
|
||||
+
|
||||
+ /* Check that there are no read-after-write or write-after-write dependencies
|
||||
+ in THEN_BB. */
|
||||
+ FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr)
|
||||
+ {
|
||||
+ struct data_reference *dra = DDR_A (ddr);
|
||||
+ struct data_reference *drb = DDR_B (ddr);
|
||||
+
|
||||
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
|
||||
+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
|
||||
+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
|
||||
+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
|
||||
+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
|
||||
+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
|
||||
+ {
|
||||
+ free_dependence_relations (then_ddrs);
|
||||
+ free_dependence_relations (else_ddrs);
|
||||
+ free_data_refs (then_datarefs);
|
||||
+ free_data_refs (else_datarefs);
|
||||
+ VEC_free (gimple, heap, then_stores);
|
||||
+ VEC_free (gimple, heap, else_stores);
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Check that there are no read-after-write or write-after-write dependencies
|
||||
+ in ELSE_BB. */
|
||||
+ FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr)
|
||||
+ {
|
||||
+ struct data_reference *dra = DDR_A (ddr);
|
||||
+ struct data_reference *drb = DDR_B (ddr);
|
||||
+
|
||||
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
|
||||
+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
|
||||
+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
|
||||
+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
|
||||
+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
|
||||
+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
|
||||
+ {
|
||||
+ free_dependence_relations (then_ddrs);
|
||||
+ free_dependence_relations (else_ddrs);
|
||||
+ free_data_refs (then_datarefs);
|
||||
+ free_data_refs (else_datarefs);
|
||||
+ VEC_free (gimple, heap, then_stores);
|
||||
+ VEC_free (gimple, heap, else_stores);
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Sink stores with same LHS. */
|
||||
+ FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store)
|
||||
+ {
|
||||
+ else_store = VEC_index (gimple, else_stores, i);
|
||||
+ res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
|
||||
+ then_store, else_store);
|
||||
+ ok = ok || res;
|
||||
+ }
|
||||
+
|
||||
+ free_dependence_relations (then_ddrs);
|
||||
+ free_dependence_relations (else_ddrs);
|
||||
+ free_data_refs (then_datarefs);
|
||||
+ free_data_refs (else_datarefs);
|
||||
+ VEC_free (gimple, heap, then_stores);
|
||||
+ VEC_free (gimple, heap, else_stores);
|
||||
+
|
||||
+ return ok;
|
||||
+}
|
||||
+
|
||||
/* Always do these optimizations if we have SSA
|
||||
trees to work on. */
|
||||
static bool
|
||||
|
||||
=== modified file 'gcc/tree-vect-data-refs.c'
|
||||
--- old/gcc/tree-vect-data-refs.c 2011-02-25 11:18:14 +0000
|
||||
+++ new/gcc/tree-vect-data-refs.c 2011-03-27 09:38:18 +0000
|
||||
@@ -289,39 +289,6 @@
|
||||
}
|
||||
}
|
||||
|
||||
-
|
||||
-/* Function vect_equal_offsets.
|
||||
-
|
||||
- Check if OFFSET1 and OFFSET2 are identical expressions. */
|
||||
-
|
||||
-static bool
|
||||
-vect_equal_offsets (tree offset1, tree offset2)
|
||||
-{
|
||||
- bool res;
|
||||
-
|
||||
- STRIP_NOPS (offset1);
|
||||
- STRIP_NOPS (offset2);
|
||||
-
|
||||
- if (offset1 == offset2)
|
||||
- return true;
|
||||
-
|
||||
- if (TREE_CODE (offset1) != TREE_CODE (offset2)
|
||||
- || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
|
||||
- return false;
|
||||
-
|
||||
- res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
|
||||
- TREE_OPERAND (offset2, 0));
|
||||
-
|
||||
- if (!res || !BINARY_CLASS_P (offset1))
|
||||
- return res;
|
||||
-
|
||||
- res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
|
||||
- TREE_OPERAND (offset2, 1));
|
||||
-
|
||||
- return res;
|
||||
-}
|
||||
-
|
||||
-
|
||||
/* Check dependence between DRA and DRB for basic block vectorization.
|
||||
If the accesses share same bases and offsets, we can compare their initial
|
||||
constant offsets to decide whether they differ or not. In case of a read-
|
||||
@@ -352,7 +319,7 @@
|
||||
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|
||||
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
|
||||
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
|
||||
- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
|
||||
+ || !dr_equal_offsets_p (dra, drb))
|
||||
return true;
|
||||
|
||||
/* Check the types. */
|
||||
@@ -402,7 +369,7 @@
|
||||
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|
||||
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
|
||||
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
|
||||
- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
|
||||
+ || !dr_equal_offsets_p (dra, drb)
|
||||
|| !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
|
||||
|| DR_IS_READ (dra) != DR_IS_READ (drb))
|
||||
return false;
|
||||
|
||||
@ -0,0 +1,126 @@
|
||||
2011-04-21 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2008-12-03 Daniel Jacobowitz <dan@codesourcery.com>
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/vect-shift-3.c, gcc.dg/vect/vect-shift-4.c: New.
|
||||
* lib/target-supports.exp (check_effective_target_vect_shift_char): New
|
||||
function.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-3.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-shift-3.c 2011-04-21 13:51:06 +0000
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* { dg-require-effective-target vect_shift } */
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 32
|
||||
+
|
||||
+unsigned short dst[N] __attribute__((aligned(N)));
|
||||
+unsigned short src[N] __attribute__((aligned(N)));
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+void array_shift(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ dst[i] = src[i] >> 3;
|
||||
+}
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ volatile int i;
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ src[i] = i << 3;
|
||||
+
|
||||
+ array_shift ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (dst[i] != i)
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/vect-shift-4.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-shift-4.c 2011-04-21 13:51:06 +0000
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* { dg-require-effective-target vect_shift_char } */
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 32
|
||||
+
|
||||
+unsigned char dst[N] __attribute__((aligned(N)));
|
||||
+unsigned char src[N] __attribute__((aligned(N)));
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+void array_shift(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ dst[i] = src[i] >> 3;
|
||||
+}
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ volatile int i;
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ src[i] = i << 3;
|
||||
+
|
||||
+ array_shift ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ if (dst[i] != i)
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/lib/target-supports.exp'
|
||||
--- old/gcc/testsuite/lib/target-supports.exp 2011-02-19 15:31:15 +0000
|
||||
+++ new/gcc/testsuite/lib/target-supports.exp 2011-04-21 13:51:06 +0000
|
||||
@@ -2308,6 +2308,26 @@
|
||||
}
|
||||
|
||||
|
||||
+# Return 1 if the target supports hardware vector shift operation for char.
|
||||
+
|
||||
+proc check_effective_target_vect_shift_char { } {
|
||||
+ global et_vect_shift_char_saved
|
||||
+
|
||||
+ if [info exists et_vect_shift_char_saved] {
|
||||
+ verbose "check_effective_target_vect_shift_char: using cached result" 2
|
||||
+ } else {
|
||||
+ set et_vect_shift_char_saved 0
|
||||
+ if { ([istarget powerpc*-*-*]
|
||||
+ && ![istarget powerpc-*-linux*paired*])
|
||||
+ || [check_effective_target_arm32] } {
|
||||
+ set et_vect_shift_char_saved 1
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2
|
||||
+ return $et_vect_shift_char_saved
|
||||
+}
|
||||
+
|
||||
# Return 1 if the target supports hardware vectors of long, 0 otherwise.
|
||||
#
|
||||
# This can change for different subtargets so do not cache the result.
|
||||
|
||||
@ -0,0 +1,177 @@
|
||||
2011-04-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-04-03 Richard Guenther <rguenther@suse.de>
|
||||
Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* tree-if-conv.c (memrefs_read_or_written_unconditionally): Strip all
|
||||
non-variable offsets and compare the remaining bases of the two
|
||||
accesses instead of looking for exact same data-ref.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: New test.
|
||||
* gcc.dg/vect/vect.exp: Run if-cvt-stores-vect* tests with
|
||||
-ftree-loop-if-convert-stores.
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c 2011-04-24 07:45:49 +0000
|
||||
@@ -0,0 +1,69 @@
|
||||
+/* { dg-require-effective-target vect_int } */
|
||||
+
|
||||
+#include <stdarg.h>
|
||||
+#include "tree-vect.h"
|
||||
+
|
||||
+#define N 50
|
||||
+
|
||||
+typedef struct {
|
||||
+ short a;
|
||||
+ short b;
|
||||
+} data;
|
||||
+
|
||||
+data in1[N], in2[N], out[N];
|
||||
+short result[N*2] = {10,-7,11,-6,12,-5,13,-4,14,-3,15,-2,16,-1,17,0,18,1,19,2,20,3,21,4,22,5,23,6,24,7,25,8,26,9,27,10,28,11,29,12,30,13,31,14,32,15,33,16,34,17,35,18,36,19,37,20,38,21,39,22,40,23,41,24,42,25,43,26,44,27,45,28,46,29,47,30,48,31,49,32,50,33,51,34,52,35,53,36,54,37,55,38,56,39,57,40,58,41,59,42};
|
||||
+short out1[N], out2[N];
|
||||
+
|
||||
+__attribute__ ((noinline)) void
|
||||
+foo ()
|
||||
+{
|
||||
+ int i;
|
||||
+ short c, d;
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ c = in1[i].b;
|
||||
+ d = in2[i].b;
|
||||
+
|
||||
+ if (c >= d)
|
||||
+ {
|
||||
+ out[i].b = in1[i].a;
|
||||
+ out[i].a = d + 5;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ out[i].b = d - 12;
|
||||
+ out[i].a = in2[i].a + d;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ check_vect ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ in1[i].a = i;
|
||||
+ in1[i].b = i + 2;
|
||||
+ in2[i].a = 5;
|
||||
+ in2[i].b = i + 5;
|
||||
+ __asm__ volatile ("");
|
||||
+ }
|
||||
+
|
||||
+ foo ();
|
||||
+
|
||||
+ for (i = 0; i < N; i++)
|
||||
+ {
|
||||
+ if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
|
||||
+ abort ();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || {! vect_strided } } } } } */
|
||||
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect.exp'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect.exp 2010-11-22 21:49:19 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect.exp 2011-04-24 07:45:49 +0000
|
||||
@@ -210,6 +210,12 @@
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
+# -ftree-loop-if-convert-stores
|
||||
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
|
||||
+lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores"
|
||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]] \
|
||||
+ "" $DEFAULT_VECTCFLAGS
|
||||
+
|
||||
# With -O3.
|
||||
# Don't allow IPA cloning, because it throws our counts out of whack.
|
||||
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
|
||||
|
||||
=== modified file 'gcc/tree-if-conv.c'
|
||||
--- old/gcc/tree-if-conv.c 2011-02-23 16:49:52 +0000
|
||||
+++ new/gcc/tree-if-conv.c 2011-04-24 07:45:49 +0000
|
||||
@@ -464,8 +464,8 @@
|
||||
/* Returns true when the memory references of STMT are read or written
|
||||
unconditionally. In other words, this function returns true when
|
||||
for every data reference A in STMT there exist other accesses to
|
||||
- the same data reference with predicates that add up (OR-up) to the
|
||||
- true predicate: this ensures that the data reference A is touched
|
||||
+ a data reference with the same base with predicates that add up (OR-up) to
|
||||
+ the true predicate: this ensures that the data reference A is touched
|
||||
(read or written) on every iteration of the if-converted loop. */
|
||||
|
||||
static bool
|
||||
@@ -489,21 +489,38 @@
|
||||
continue;
|
||||
|
||||
for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
|
||||
- if (DR_STMT (b) != stmt
|
||||
- && same_data_refs (a, b))
|
||||
- {
|
||||
- tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
|
||||
-
|
||||
- if (DR_RW_UNCONDITIONALLY (b) == 1
|
||||
- || is_true_predicate (cb)
|
||||
- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
|
||||
- ca, cb)))
|
||||
- {
|
||||
- DR_RW_UNCONDITIONALLY (a) = 1;
|
||||
- DR_RW_UNCONDITIONALLY (b) = 1;
|
||||
- found = true;
|
||||
- break;
|
||||
- }
|
||||
+ {
|
||||
+ tree ref_base_a = DR_REF (a);
|
||||
+ tree ref_base_b = DR_REF (b);
|
||||
+
|
||||
+ if (DR_STMT (b) == stmt)
|
||||
+ continue;
|
||||
+
|
||||
+ while (TREE_CODE (ref_base_a) == COMPONENT_REF
|
||||
+ || TREE_CODE (ref_base_a) == IMAGPART_EXPR
|
||||
+ || TREE_CODE (ref_base_a) == REALPART_EXPR)
|
||||
+ ref_base_a = TREE_OPERAND (ref_base_a, 0);
|
||||
+
|
||||
+ while (TREE_CODE (ref_base_b) == COMPONENT_REF
|
||||
+ || TREE_CODE (ref_base_b) == IMAGPART_EXPR
|
||||
+ || TREE_CODE (ref_base_b) == REALPART_EXPR)
|
||||
+ ref_base_b = TREE_OPERAND (ref_base_b, 0);
|
||||
+
|
||||
+ if (!operand_equal_p (ref_base_a, ref_base_b, 0))
|
||||
+ {
|
||||
+ tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
|
||||
+
|
||||
+ if (DR_RW_UNCONDITIONALLY (b) == 1
|
||||
+ || is_true_predicate (cb)
|
||||
+ || is_true_predicate (ca
|
||||
+ = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
|
||||
+ {
|
||||
+ DR_RW_UNCONDITIONALLY (a) = 1;
|
||||
+ DR_RW_UNCONDITIONALLY (b) = 1;
|
||||
+ found = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
if (!found)
|
||||
|
||||
@ -0,0 +1,140 @@
|
||||
2011-05-02 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-03-27 Ira Rosen <ira.rosen@linaro.org>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.c (arm_autovectorize_vector_sizes): New function.
|
||||
(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
|
||||
accesses to preserve the meaning of the test for doubleword vectors.
|
||||
* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
|
||||
* gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-03-02 11:38:43 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-04-28 11:46:58 +0000
|
||||
@@ -250,6 +250,7 @@
|
||||
bool is_packed);
|
||||
static void arm_conditional_register_usage (void);
|
||||
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
|
||||
+static unsigned int arm_autovectorize_vector_sizes (void);
|
||||
|
||||
|
||||
/* Table of machine attributes. */
|
||||
@@ -395,6 +396,9 @@
|
||||
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
|
||||
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
||||
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
|
||||
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
||||
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
|
||||
+ arm_autovectorize_vector_sizes
|
||||
|
||||
#undef TARGET_MACHINE_DEPENDENT_REORG
|
||||
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
|
||||
@@ -23511,6 +23515,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
+static unsigned int
|
||||
+arm_autovectorize_vector_sizes (void)
|
||||
+{
|
||||
+ return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
|
||||
+}
|
||||
+
|
||||
static bool
|
||||
arm_vector_alignment_reachable (const_tree type, bool is_packed)
|
||||
{
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2010-10-04 14:59:30 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c 2011-04-28 11:46:58 +0000
|
||||
@@ -8,7 +8,7 @@
|
||||
void with_restrict(int * __restrict p)
|
||||
{
|
||||
int i;
|
||||
- int *q = p - 2;
|
||||
+ int *q = p - 1;
|
||||
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
p[i] = q[i];
|
||||
@@ -19,7 +19,7 @@
|
||||
void without_restrict(int * p)
|
||||
{
|
||||
int i;
|
||||
- int *q = p - 2;
|
||||
+ int *q = p - 1;
|
||||
|
||||
for (i = 0; i < 1000; ++i) {
|
||||
p[i] = q[i];
|
||||
@@ -38,8 +38,8 @@
|
||||
a[i] = b[i] = i;
|
||||
}
|
||||
|
||||
- with_restrict(a + 2);
|
||||
- without_restrict(b + 2);
|
||||
+ with_restrict(a + 1);
|
||||
+ without_restrict(b + 1);
|
||||
|
||||
for (i = 0; i < 1002; ++i) {
|
||||
if (a[i] != b[i])
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/slp-3.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/slp-3.c 2010-11-22 12:16:52 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/slp-3.c 2011-04-28 11:46:58 +0000
|
||||
@@ -4,9 +4,9 @@
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
-#define N 8
|
||||
+#define N 12
|
||||
|
||||
-unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
+unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
|
||||
int
|
||||
main1 ()
|
||||
@@ -101,7 +101,7 @@
|
||||
}
|
||||
|
||||
/* SLP with unrolling by 8. */
|
||||
- for (i = 0; i < N/2; i++)
|
||||
+ for (i = 0; i < N/4; i++)
|
||||
{
|
||||
out[i*9] = in[i*9];
|
||||
out[i*9 + 1] = in[i*9 + 1];
|
||||
@@ -115,7 +115,7 @@
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
- for (i = 0; i < N/2; i++)
|
||||
+ for (i = 0; i < N/4; i++)
|
||||
{
|
||||
if (out[i*9] != in[i*9]
|
||||
|| out[i*9 + 1] != in[i*9 + 1]
|
||||
|
||||
=== modified file 'gcc/testsuite/gcc.dg/vect/vect-outer-5.c'
|
||||
--- old/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2010-11-22 12:16:52 +0000
|
||||
+++ new/gcc/testsuite/gcc.dg/vect/vect-outer-5.c 2011-04-28 11:46:58 +0000
|
||||
@@ -17,7 +17,7 @@
|
||||
float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
- float E[4] = {0,1,2,480};
|
||||
+ float E[4] = {0,480,960,1440};
|
||||
float s;
|
||||
|
||||
int i, j;
|
||||
@@ -55,7 +55,7 @@
|
||||
s = 0;
|
||||
for (j=0; j<N; j+=4)
|
||||
s += C[j];
|
||||
- B[i+3] = B[i] + s;
|
||||
+ B[i+1] = B[i] + s;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
|
||||
@ -0,0 +1,294 @@
|
||||
2011-05-04 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-03-29 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
PR debug/48190
|
||||
* dwarf2out.c (dw_loc_list_node): Add resolved_addr and replaced.
|
||||
(cached_dw_loc_list_def): New structure.
|
||||
(cached_dw_loc_list): New typedef.
|
||||
(cached_dw_loc_list_table): New variable.
|
||||
(cached_dw_loc_list_table_hash): New function.
|
||||
(cached_dw_loc_list_table_eq): Likewise.
|
||||
(add_location_or_const_value_attribute): Take a bool cache_p.
|
||||
Cache the list when the parameter is true.
|
||||
(gen_formal_parameter_die): Update caller.
|
||||
(gen_variable_die): Likewise.
|
||||
(dwarf2out_finish): Likewise.
|
||||
(dwarf2out_abstract_function): Nullify cached_dw_loc_list_table
|
||||
while generating debug info for the decl.
|
||||
(dwarf2out_function_decl): Clear cached_dw_loc_list_table.
|
||||
(dwarf2out_init): Initialize cached_dw_loc_list_table.
|
||||
(resolve_addr): Cache the result of resolving a chain of
|
||||
location lists.
|
||||
|
||||
=== modified file 'gcc/dwarf2out.c'
|
||||
--- old/gcc/dwarf2out.c 2011-03-29 22:47:59 +0000
|
||||
+++ new/gcc/dwarf2out.c 2011-05-04 13:20:12 +0000
|
||||
@@ -4427,6 +4427,11 @@
|
||||
const char *section; /* Section this loclist is relative to */
|
||||
dw_loc_descr_ref expr;
|
||||
hashval_t hash;
|
||||
+ /* True if all addresses in this and subsequent lists are known to be
|
||||
+ resolved. */
|
||||
+ bool resolved_addr;
|
||||
+ /* True if this list has been replaced by dw_loc_next. */
|
||||
+ bool replaced;
|
||||
bool emitted;
|
||||
} dw_loc_list_node;
|
||||
|
||||
@@ -6087,6 +6092,19 @@
|
||||
/* Table of decl location linked lists. */
|
||||
static GTY ((param_is (var_loc_list))) htab_t decl_loc_table;
|
||||
|
||||
+/* A cached location list. */
|
||||
+struct GTY (()) cached_dw_loc_list_def {
|
||||
+ /* The DECL_UID of the decl that this entry describes. */
|
||||
+ unsigned int decl_id;
|
||||
+
|
||||
+ /* The cached location list. */
|
||||
+ dw_loc_list_ref loc_list;
|
||||
+};
|
||||
+typedef struct cached_dw_loc_list_def cached_dw_loc_list;
|
||||
+
|
||||
+/* Table of cached location lists. */
|
||||
+static GTY ((param_is (cached_dw_loc_list))) htab_t cached_dw_loc_list_table;
|
||||
+
|
||||
/* A pointer to the base of a list of references to DIE's that
|
||||
are uniquely identified by their tag, presence/absence of
|
||||
children DIE's, and list of attribute/value pairs. */
|
||||
@@ -6434,7 +6452,7 @@
|
||||
static void insert_double (double_int, unsigned char *);
|
||||
static void insert_float (const_rtx, unsigned char *);
|
||||
static rtx rtl_for_decl_location (tree);
|
||||
-static bool add_location_or_const_value_attribute (dw_die_ref, tree,
|
||||
+static bool add_location_or_const_value_attribute (dw_die_ref, tree, bool,
|
||||
enum dwarf_attribute);
|
||||
static bool tree_add_const_value_attribute (dw_die_ref, tree);
|
||||
static bool tree_add_const_value_attribute_for_decl (dw_die_ref, tree);
|
||||
@@ -8168,6 +8186,24 @@
|
||||
htab_find_with_hash (decl_loc_table, decl, DECL_UID (decl));
|
||||
}
|
||||
|
||||
+/* Returns a hash value for X (which really is a cached_dw_loc_list_list). */
|
||||
+
|
||||
+static hashval_t
|
||||
+cached_dw_loc_list_table_hash (const void *x)
|
||||
+{
|
||||
+ return (hashval_t) ((const cached_dw_loc_list *) x)->decl_id;
|
||||
+}
|
||||
+
|
||||
+/* Return nonzero if decl_id of cached_dw_loc_list X is the same as
|
||||
+ UID of decl *Y. */
|
||||
+
|
||||
+static int
|
||||
+cached_dw_loc_list_table_eq (const void *x, const void *y)
|
||||
+{
|
||||
+ return (((const cached_dw_loc_list *) x)->decl_id
|
||||
+ == DECL_UID ((const_tree) y));
|
||||
+}
|
||||
+
|
||||
/* Equate a DIE to a particular declaration. */
|
||||
|
||||
static void
|
||||
@@ -16965,15 +17001,22 @@
|
||||
these things can crop up in other ways also.) Note that one type of
|
||||
constant value which can be passed into an inlined function is a constant
|
||||
pointer. This can happen for example if an actual argument in an inlined
|
||||
- function call evaluates to a compile-time constant address. */
|
||||
+ function call evaluates to a compile-time constant address.
|
||||
+
|
||||
+ CACHE_P is true if it is worth caching the location list for DECL,
|
||||
+ so that future calls can reuse it rather than regenerate it from scratch.
|
||||
+ This is true for BLOCK_NONLOCALIZED_VARS in inlined subroutines,
|
||||
+ since we will need to refer to them each time the function is inlined. */
|
||||
|
||||
static bool
|
||||
-add_location_or_const_value_attribute (dw_die_ref die, tree decl,
|
||||
+add_location_or_const_value_attribute (dw_die_ref die, tree decl, bool cache_p,
|
||||
enum dwarf_attribute attr)
|
||||
{
|
||||
rtx rtl;
|
||||
dw_loc_list_ref list;
|
||||
var_loc_list *loc_list;
|
||||
+ cached_dw_loc_list *cache;
|
||||
+ void **slot;
|
||||
|
||||
if (TREE_CODE (decl) == ERROR_MARK)
|
||||
return false;
|
||||
@@ -17010,7 +17053,33 @@
|
||||
&& add_const_value_attribute (die, rtl))
|
||||
return true;
|
||||
}
|
||||
- list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2);
|
||||
+ /* If this decl is from BLOCK_NONLOCALIZED_VARS, we might need its
|
||||
+ list several times. See if we've already cached the contents. */
|
||||
+ list = NULL;
|
||||
+ if (loc_list == NULL || cached_dw_loc_list_table == NULL)
|
||||
+ cache_p = false;
|
||||
+ if (cache_p)
|
||||
+ {
|
||||
+ cache = (cached_dw_loc_list *)
|
||||
+ htab_find_with_hash (cached_dw_loc_list_table, decl, DECL_UID (decl));
|
||||
+ if (cache)
|
||||
+ list = cache->loc_list;
|
||||
+ }
|
||||
+ if (list == NULL)
|
||||
+ {
|
||||
+ list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2);
|
||||
+ /* It is usually worth caching this result if the decl is from
|
||||
+ BLOCK_NONLOCALIZED_VARS and if the list has at least two elements. */
|
||||
+ if (cache_p && list && list->dw_loc_next)
|
||||
+ {
|
||||
+ slot = htab_find_slot_with_hash (cached_dw_loc_list_table, decl,
|
||||
+ DECL_UID (decl), INSERT);
|
||||
+ cache = ggc_alloc_cleared_cached_dw_loc_list ();
|
||||
+ cache->decl_id = DECL_UID (decl);
|
||||
+ cache->loc_list = list;
|
||||
+ *slot = cache;
|
||||
+ }
|
||||
+ }
|
||||
if (list)
|
||||
{
|
||||
add_AT_location_description (die, attr, list);
|
||||
@@ -18702,7 +18771,7 @@
|
||||
equate_decl_number_to_die (node, parm_die);
|
||||
if (! DECL_ABSTRACT (node_or_origin))
|
||||
add_location_or_const_value_attribute (parm_die, node_or_origin,
|
||||
- DW_AT_location);
|
||||
+ node == NULL, DW_AT_location);
|
||||
|
||||
break;
|
||||
|
||||
@@ -18887,6 +18956,7 @@
|
||||
tree context;
|
||||
int was_abstract;
|
||||
htab_t old_decl_loc_table;
|
||||
+ htab_t old_cached_dw_loc_list_table;
|
||||
|
||||
/* Make sure we have the actual abstract inline, not a clone. */
|
||||
decl = DECL_ORIGIN (decl);
|
||||
@@ -18901,6 +18971,8 @@
|
||||
get locations in abstract instantces. */
|
||||
old_decl_loc_table = decl_loc_table;
|
||||
decl_loc_table = NULL;
|
||||
+ old_cached_dw_loc_list_table = cached_dw_loc_list_table;
|
||||
+ cached_dw_loc_list_table = NULL;
|
||||
|
||||
/* Be sure we've emitted the in-class declaration DIE (if any) first, so
|
||||
we don't get confused by DECL_ABSTRACT. */
|
||||
@@ -18925,6 +18997,7 @@
|
||||
|
||||
current_function_decl = save_fn;
|
||||
decl_loc_table = old_decl_loc_table;
|
||||
+ cached_dw_loc_list_table = old_cached_dw_loc_list_table;
|
||||
pop_cfun ();
|
||||
}
|
||||
|
||||
@@ -19709,9 +19782,8 @@
|
||||
&& !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl_or_origin)))
|
||||
defer_location (decl_or_origin, var_die);
|
||||
else
|
||||
- add_location_or_const_value_attribute (var_die,
|
||||
- decl_or_origin,
|
||||
- DW_AT_location);
|
||||
+ add_location_or_const_value_attribute (var_die, decl_or_origin,
|
||||
+ decl == NULL, DW_AT_location);
|
||||
add_pubname (decl_or_origin, var_die);
|
||||
}
|
||||
else
|
||||
@@ -21498,6 +21570,7 @@
|
||||
dwarf2out_decl (decl);
|
||||
|
||||
htab_empty (decl_loc_table);
|
||||
+ htab_empty (cached_dw_loc_list_table);
|
||||
}
|
||||
|
||||
/* Output a marker (i.e. a label) for the beginning of the generated code for
|
||||
@@ -22230,6 +22303,11 @@
|
||||
decl_loc_table = htab_create_ggc (10, decl_loc_table_hash,
|
||||
decl_loc_table_eq, NULL);
|
||||
|
||||
+ /* Allocate the cached_dw_loc_list_table. */
|
||||
+ cached_dw_loc_list_table
|
||||
+ = htab_create_ggc (10, cached_dw_loc_list_table_hash,
|
||||
+ cached_dw_loc_list_table_eq, NULL);
|
||||
+
|
||||
/* Allocate the initial hunk of the decl_scope_table. */
|
||||
decl_scope_table = VEC_alloc (tree, gc, 256);
|
||||
|
||||
@@ -22870,30 +22948,53 @@
|
||||
{
|
||||
dw_die_ref c;
|
||||
dw_attr_ref a;
|
||||
- dw_loc_list_ref *curr;
|
||||
+ dw_loc_list_ref *curr, *start, loc;
|
||||
unsigned ix;
|
||||
|
||||
FOR_EACH_VEC_ELT (dw_attr_node, die->die_attr, ix, a)
|
||||
switch (AT_class (a))
|
||||
{
|
||||
case dw_val_class_loc_list:
|
||||
- curr = AT_loc_list_ptr (a);
|
||||
- while (*curr)
|
||||
+ start = curr = AT_loc_list_ptr (a);
|
||||
+ loc = *curr;
|
||||
+ gcc_assert (loc);
|
||||
+ /* The same list can be referenced more than once. See if we have
|
||||
+ already recorded the result from a previous pass. */
|
||||
+ if (loc->replaced)
|
||||
+ *curr = loc->dw_loc_next;
|
||||
+ else if (!loc->resolved_addr)
|
||||
{
|
||||
- if (!resolve_addr_in_expr ((*curr)->expr))
|
||||
+ /* As things stand, we do not expect or allow one die to
|
||||
+ reference a suffix of another die's location list chain.
|
||||
+ References must be identical or completely separate.
|
||||
+ There is therefore no need to cache the result of this
|
||||
+ pass on any list other than the first; doing so
|
||||
+ would lead to unnecessary writes. */
|
||||
+ while (*curr)
|
||||
{
|
||||
- dw_loc_list_ref next = (*curr)->dw_loc_next;
|
||||
- if (next && (*curr)->ll_symbol)
|
||||
+ gcc_assert (!(*curr)->replaced && !(*curr)->resolved_addr);
|
||||
+ if (!resolve_addr_in_expr ((*curr)->expr))
|
||||
{
|
||||
- gcc_assert (!next->ll_symbol);
|
||||
- next->ll_symbol = (*curr)->ll_symbol;
|
||||
+ dw_loc_list_ref next = (*curr)->dw_loc_next;
|
||||
+ if (next && (*curr)->ll_symbol)
|
||||
+ {
|
||||
+ gcc_assert (!next->ll_symbol);
|
||||
+ next->ll_symbol = (*curr)->ll_symbol;
|
||||
+ }
|
||||
+ *curr = next;
|
||||
}
|
||||
- *curr = next;
|
||||
+ else
|
||||
+ curr = &(*curr)->dw_loc_next;
|
||||
}
|
||||
+ if (loc == *start)
|
||||
+ loc->resolved_addr = 1;
|
||||
else
|
||||
- curr = &(*curr)->dw_loc_next;
|
||||
+ {
|
||||
+ loc->replaced = 1;
|
||||
+ loc->dw_loc_next = *start;
|
||||
+ }
|
||||
}
|
||||
- if (!AT_loc_list (a))
|
||||
+ if (!*start)
|
||||
{
|
||||
remove_AT (die, a->dw_attr);
|
||||
ix--;
|
||||
@@ -23322,6 +23423,7 @@
|
||||
add_location_or_const_value_attribute (
|
||||
VEC_index (deferred_locations, deferred_locations_list, i)->die,
|
||||
VEC_index (deferred_locations, deferred_locations_list, i)->variable,
|
||||
+ false,
|
||||
DW_AT_location);
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,254 @@
|
||||
2011-04-26 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-04-15 Maxim Kuvyrkov <maxim@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* combine.c (subst, combine_simlify_rtx): Add new argument, use it
|
||||
to track processing of conditionals. Update all callers.
|
||||
(try_combine, simplify_if_then_else): Update.
|
||||
|
||||
2011-04-25 Maxim Kuvyrkov <maxim@codesourcery.com>
|
||||
Eric Botcazou <ebotcazou@adacore.com>
|
||||
|
||||
gcc/
|
||||
* combine.c (combine_simplify_rtx): Avoid mis-simplifying conditionals
|
||||
for STORE_FLAG_VALUE==-1 case.
|
||||
|
||||
=== modified file 'gcc/combine.c'
|
||||
--- old/gcc/combine.c 2011-02-15 19:46:26 +0000
|
||||
+++ new/gcc/combine.c 2011-04-26 17:03:58 +0000
|
||||
@@ -391,8 +391,8 @@
|
||||
static void undo_all (void);
|
||||
static void undo_commit (void);
|
||||
static rtx *find_split_point (rtx *, rtx, bool);
|
||||
-static rtx subst (rtx, rtx, rtx, int, int);
|
||||
-static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
|
||||
+static rtx subst (rtx, rtx, rtx, int, int, int);
|
||||
+static rtx combine_simplify_rtx (rtx, enum machine_mode, int, int);
|
||||
static rtx simplify_if_then_else (rtx);
|
||||
static rtx simplify_set (rtx);
|
||||
static rtx simplify_logical (rtx);
|
||||
@@ -3086,12 +3086,12 @@
|
||||
if (i1)
|
||||
{
|
||||
subst_low_luid = DF_INSN_LUID (i1);
|
||||
- i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0);
|
||||
+ i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
subst_low_luid = DF_INSN_LUID (i2);
|
||||
- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0);
|
||||
+ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3103,7 +3103,7 @@
|
||||
self-referential RTL when we will be substituting I1SRC for I1DEST
|
||||
later. Likewise if I0 feeds into I2, either directly or indirectly
|
||||
through I1, and I0DEST is in I0SRC. */
|
||||
- newpat = subst (PATTERN (i3), i2dest, i2src, 0,
|
||||
+ newpat = subst (PATTERN (i3), i2dest, i2src, 0, 0,
|
||||
(i1_feeds_i2_n && i1dest_in_i1src)
|
||||
|| ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
|
||||
&& i0dest_in_i0src));
|
||||
@@ -3142,7 +3142,7 @@
|
||||
copy of I1SRC each time we substitute it, in order to avoid creating
|
||||
self-referential RTL when we will be substituting I0SRC for I0DEST
|
||||
later. */
|
||||
- newpat = subst (newpat, i1dest, i1src, 0,
|
||||
+ newpat = subst (newpat, i1dest, i1src, 0, 0,
|
||||
i0_feeds_i1_n && i0dest_in_i0src);
|
||||
substed_i1 = 1;
|
||||
|
||||
@@ -3172,7 +3172,7 @@
|
||||
|
||||
n_occurrences = 0;
|
||||
subst_low_luid = DF_INSN_LUID (i0);
|
||||
- newpat = subst (newpat, i0dest, i0src, 0, 0);
|
||||
+ newpat = subst (newpat, i0dest, i0src, 0, 0, 0);
|
||||
substed_i0 = 1;
|
||||
}
|
||||
|
||||
@@ -3234,7 +3234,7 @@
|
||||
{
|
||||
rtx t = i1pat;
|
||||
if (i0_feeds_i1_n)
|
||||
- t = subst (t, i0dest, i0src, 0, 0);
|
||||
+ t = subst (t, i0dest, i0src, 0, 0, 0);
|
||||
|
||||
XVECEXP (newpat, 0, --total_sets) = t;
|
||||
}
|
||||
@@ -3242,10 +3242,10 @@
|
||||
{
|
||||
rtx t = i2pat;
|
||||
if (i1_feeds_i2_n)
|
||||
- t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
|
||||
+ t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
|
||||
i0_feeds_i1_n && i0dest_in_i0src);
|
||||
if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
|
||||
- t = subst (t, i0dest, i0src, 0, 0);
|
||||
+ t = subst (t, i0dest, i0src, 0, 0, 0);
|
||||
|
||||
XVECEXP (newpat, 0, --total_sets) = t;
|
||||
}
|
||||
@@ -4914,11 +4914,13 @@
|
||||
|
||||
IN_DEST is nonzero if we are processing the SET_DEST of a SET.
|
||||
|
||||
+ IN_COND is nonzero if we are on top level of the condition.
|
||||
+
|
||||
UNIQUE_COPY is nonzero if each substitution must be unique. We do this
|
||||
by copying if `n_occurrences' is nonzero. */
|
||||
|
||||
static rtx
|
||||
-subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy)
|
||||
+subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy)
|
||||
{
|
||||
enum rtx_code code = GET_CODE (x);
|
||||
enum machine_mode op0_mode = VOIDmode;
|
||||
@@ -4979,7 +4981,7 @@
|
||||
&& GET_CODE (XVECEXP (x, 0, 0)) == SET
|
||||
&& GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
|
||||
{
|
||||
- new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, unique_copy);
|
||||
+ new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, 0, unique_copy);
|
||||
|
||||
/* If this substitution failed, this whole thing fails. */
|
||||
if (GET_CODE (new_rtx) == CLOBBER
|
||||
@@ -4996,7 +4998,7 @@
|
||||
&& GET_CODE (dest) != CC0
|
||||
&& GET_CODE (dest) != PC)
|
||||
{
|
||||
- new_rtx = subst (dest, from, to, 0, unique_copy);
|
||||
+ new_rtx = subst (dest, from, to, 0, 0, unique_copy);
|
||||
|
||||
/* If this substitution failed, this whole thing fails. */
|
||||
if (GET_CODE (new_rtx) == CLOBBER
|
||||
@@ -5042,8 +5044,8 @@
|
||||
}
|
||||
else
|
||||
{
|
||||
- new_rtx = subst (XVECEXP (x, i, j), from, to, 0,
|
||||
- unique_copy);
|
||||
+ new_rtx = subst (XVECEXP (x, i, j), from, to, 0, 0,
|
||||
+ unique_copy);
|
||||
|
||||
/* If this substitution failed, this whole thing
|
||||
fails. */
|
||||
@@ -5120,7 +5122,9 @@
|
||||
&& (code == SUBREG || code == STRICT_LOW_PART
|
||||
|| code == ZERO_EXTRACT))
|
||||
|| code == SET)
|
||||
- && i == 0), unique_copy);
|
||||
+ && i == 0),
|
||||
+ code == IF_THEN_ELSE && i == 0,
|
||||
+ unique_copy);
|
||||
|
||||
/* If we found that we will have to reject this combination,
|
||||
indicate that by returning the CLOBBER ourselves, rather than
|
||||
@@ -5177,7 +5181,7 @@
|
||||
/* If X is sufficiently simple, don't bother trying to do anything
|
||||
with it. */
|
||||
if (code != CONST_INT && code != REG && code != CLOBBER)
|
||||
- x = combine_simplify_rtx (x, op0_mode, in_dest);
|
||||
+ x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
|
||||
|
||||
if (GET_CODE (x) == code)
|
||||
break;
|
||||
@@ -5197,10 +5201,12 @@
|
||||
expression.
|
||||
|
||||
OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero
|
||||
- if we are inside a SET_DEST. */
|
||||
+ if we are inside a SET_DEST. IN_COND is nonzero if we are on the top level
|
||||
+ of a condition. */
|
||||
|
||||
static rtx
|
||||
-combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
|
||||
+combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest,
|
||||
+ int in_cond)
|
||||
{
|
||||
enum rtx_code code = GET_CODE (x);
|
||||
enum machine_mode mode = GET_MODE (x);
|
||||
@@ -5255,8 +5261,8 @@
|
||||
false arms to store-flag values. Be careful to use copy_rtx
|
||||
here since true_rtx or false_rtx might share RTL with x as a
|
||||
result of the if_then_else_cond call above. */
|
||||
- true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0);
|
||||
- false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0);
|
||||
+ true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0, 0);
|
||||
+ false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0, 0);
|
||||
|
||||
/* If true_rtx and false_rtx are not general_operands, an if_then_else
|
||||
is unlikely to be simpler. */
|
||||
@@ -5600,7 +5606,7 @@
|
||||
{
|
||||
/* Try to simplify the expression further. */
|
||||
rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
|
||||
- temp = combine_simplify_rtx (tor, mode, in_dest);
|
||||
+ temp = combine_simplify_rtx (tor, mode, in_dest, 0);
|
||||
|
||||
/* If we could, great. If not, do not go ahead with the IOR
|
||||
replacement, since PLUS appears in many special purpose
|
||||
@@ -5693,7 +5699,16 @@
|
||||
ZERO_EXTRACT is indeed appropriate, it will be placed back by
|
||||
the call to make_compound_operation in the SET case. */
|
||||
|
||||
- if (STORE_FLAG_VALUE == 1
|
||||
+ if (in_cond)
|
||||
+ /* Don't apply below optimizations if the caller would
|
||||
+ prefer a comparison rather than a value.
|
||||
+ E.g., for the condition in an IF_THEN_ELSE most targets need
|
||||
+ an explicit comparison. */
|
||||
+ {
|
||||
+ ;
|
||||
+ }
|
||||
+
|
||||
+ else if (STORE_FLAG_VALUE == 1
|
||||
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
|
||||
&& op1 == const0_rtx
|
||||
&& mode == GET_MODE (op0)
|
||||
@@ -5739,7 +5754,10 @@
|
||||
|
||||
/* If STORE_FLAG_VALUE is -1, we have cases similar to
|
||||
those above. */
|
||||
- if (STORE_FLAG_VALUE == -1
|
||||
+ if (in_cond)
|
||||
+ ;
|
||||
+
|
||||
+ else if (STORE_FLAG_VALUE == -1
|
||||
&& new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
|
||||
&& op1 == const0_rtx
|
||||
&& (num_sign_bit_copies (op0, mode)
|
||||
@@ -5937,11 +5955,11 @@
|
||||
if (reg_mentioned_p (from, true_rtx))
|
||||
true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
|
||||
from, true_val),
|
||||
- pc_rtx, pc_rtx, 0, 0);
|
||||
+ pc_rtx, pc_rtx, 0, 0, 0);
|
||||
if (reg_mentioned_p (from, false_rtx))
|
||||
false_rtx = subst (known_cond (copy_rtx (false_rtx), false_code,
|
||||
from, false_val),
|
||||
- pc_rtx, pc_rtx, 0, 0);
|
||||
+ pc_rtx, pc_rtx, 0, 0, 0);
|
||||
|
||||
SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
|
||||
SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
|
||||
@@ -6158,11 +6176,11 @@
|
||||
{
|
||||
temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
|
||||
cond_op0, cond_op1),
|
||||
- pc_rtx, pc_rtx, 0, 0);
|
||||
+ pc_rtx, pc_rtx, 0, 0, 0);
|
||||
temp = simplify_gen_binary (MULT, m, temp,
|
||||
simplify_gen_binary (MULT, m, c1,
|
||||
const_true_rtx));
|
||||
- temp = subst (temp, pc_rtx, pc_rtx, 0, 0);
|
||||
+ temp = subst (temp, pc_rtx, pc_rtx, 0, 0, 0);
|
||||
temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp);
|
||||
|
||||
if (extend_op != UNKNOWN)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,25 @@
|
||||
2011-04-26 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-04-05 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
PR target/43920
|
||||
gcc/
|
||||
* config/arm/arm.h (BRANCH_COST): Set to 1 for Thumb-2 when optimizing
|
||||
for size.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.h'
|
||||
--- old/gcc/config/arm/arm.h 2011-05-03 15:17:25 +0000
|
||||
+++ new/gcc/config/arm/arm.h 2011-04-26 14:42:21 +0000
|
||||
@@ -2018,7 +2018,8 @@
|
||||
/* Try to generate sequences that don't involve branches, we can then use
|
||||
conditional instructions */
|
||||
#define BRANCH_COST(speed_p, predictable_p) \
|
||||
- (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
|
||||
+ (TARGET_32BIT ? (TARGET_THUMB2 && !speed_p ? 1 : 4) \
|
||||
+ : (optimize > 0 ? 2 : 0))
|
||||
|
||||
/* Position Independent Code. */
|
||||
/* We decide which register to use based on the compilation options and
|
||||
|
||||
@ -0,0 +1,21 @@
|
||||
2011-05-06 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
gcc/
|
||||
From Sergey Grechanik <mouseentity@ispras.ru>, approved for mainline
|
||||
|
||||
* config/arm/arm.c (coproc_secondary_reload_class): Return NO_REGS
|
||||
for constant vectors.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.c'
|
||||
--- old/gcc/config/arm/arm.c 2011-05-03 15:18:07 +0000
|
||||
+++ new/gcc/config/arm/arm.c 2011-05-06 11:33:02 +0000
|
||||
@@ -9193,7 +9193,7 @@
|
||||
/* The neon move patterns handle all legitimate vector and struct
|
||||
addresses. */
|
||||
if (TARGET_NEON
|
||||
- && MEM_P (x)
|
||||
+ && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
|
||||
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
||||
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
|
||||
|| VALID_NEON_STRUCT_MODE (mode)))
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
2011-05-12 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
gcc/
|
||||
Backport from mainline:
|
||||
|
||||
2011-05-05 Michael Hope <michael.hope@linaro.org>
|
||||
|
||||
PR pch/45979
|
||||
* config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for
|
||||
__ARM_EABI__ hosts.
|
||||
|
||||
=== modified file 'gcc/config/host-linux.c'
|
||||
--- old/gcc/config/host-linux.c 2010-11-29 14:09:41 +0000
|
||||
+++ new/gcc/config/host-linux.c 2011-05-06 20:19:30 +0000
|
||||
@@ -84,6 +84,8 @@
|
||||
# define TRY_EMPTY_VM_SPACE 0x60000000
|
||||
#elif defined(__mc68000__)
|
||||
# define TRY_EMPTY_VM_SPACE 0x40000000
|
||||
+#elif defined(__ARM_EABI__)
|
||||
+# define TRY_EMPTY_VM_SPACE 0x60000000
|
||||
#else
|
||||
# define TRY_EMPTY_VM_SPACE 0
|
||||
#endif
|
||||
|
||||
@ -0,0 +1,640 @@
|
||||
2011-05-13 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
gcc/
|
||||
* loop-doloop.c (doloop_condition_get): Support new form of
|
||||
doloop pattern and use prev_nondebug_insn instead of PREV_INSN.
|
||||
* config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*".
|
||||
(doloop_end): New.
|
||||
* config/arm/arm.md (*addsi3_compare0): Remove "*".
|
||||
* params.def (sms-min-sc): New param flag.
|
||||
* doc/invoke.texi (sms-min-sc): Document it.
|
||||
* ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge
|
||||
enters the branch create an anti edge in the opposite direction
|
||||
to prevent the creation of reg-moves.
|
||||
* modulo-sched.c: Adjust comment to reflect the fact we are
|
||||
scheduling closing branch.
|
||||
(PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine.
|
||||
(stage_count): New field in struct partial_schedule.
|
||||
(calculate_stage_count): New function.
|
||||
(normalize_sched_times): Rename to reset_sched_times and handle
|
||||
incrementing the sched time of the nodes by a constant value
|
||||
passed as parameter.
|
||||
(duplicate_insns_of_cycles): Skip closing branch.
|
||||
(sms_schedule_by_order): Schedule closing branch.
|
||||
(ps_insn_find_column): Handle closing branch.
|
||||
(sms_schedule): Call reset_sched_times and adjust the code to
|
||||
support scheduling of the closing branch. Use sms-min-sc.
|
||||
Support new form of doloop pattern.
|
||||
(ps_insert_empty_row): Update calls to normalize_sched_times
|
||||
and rotate_partial_schedule functions.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-05-06 11:28:27 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
|
||||
@@ -791,7 +791,7 @@
|
||||
""
|
||||
)
|
||||
|
||||
-(define_insn "*addsi3_compare0"
|
||||
+(define_insn "addsi3_compare0"
|
||||
[(set (reg:CC_NOOV CC_REGNUM)
|
||||
(compare:CC_NOOV
|
||||
(plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
|
||||
|
||||
=== modified file 'gcc/config/arm/thumb2.md'
|
||||
--- old/gcc/config/arm/thumb2.md 2011-01-03 20:52:22 +0000
|
||||
+++ new/gcc/config/arm/thumb2.md 2011-05-11 07:15:47 +0000
|
||||
@@ -836,7 +836,7 @@
|
||||
"operands[4] = GEN_INT (- INTVAL (operands[2]));"
|
||||
)
|
||||
|
||||
-(define_insn "*thumb2_addsi3_compare0"
|
||||
+(define_insn "thumb2_addsi3_compare0"
|
||||
[(set (reg:CC_NOOV CC_REGNUM)
|
||||
(compare:CC_NOOV
|
||||
(plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r")
|
||||
@@ -1118,3 +1118,54 @@
|
||||
"
|
||||
operands[2] = GEN_INT (32 - INTVAL (operands[2]));
|
||||
")
|
||||
+
|
||||
+;; Define the subtract-one-and-jump insns so loop.c
|
||||
+;; knows what to generate.
|
||||
+(define_expand "doloop_end"
|
||||
+ [(use (match_operand 0 "" "")) ; loop pseudo
|
||||
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
|
||||
+ (use (match_operand 2 "" "")) ; max iterations
|
||||
+ (use (match_operand 3 "" "")) ; loop level
|
||||
+ (use (match_operand 4 "" ""))] ; label
|
||||
+ "TARGET_32BIT"
|
||||
+ "
|
||||
+ {
|
||||
+ /* Currently SMS relies on the do-loop pattern to recognize loops
|
||||
+ where (1) the control part consists of all insns defining and/or
|
||||
+ using a certain 'count' register and (2) the loop count can be
|
||||
+ adjusted by modifying this register prior to the loop.
|
||||
+ ??? The possible introduction of a new block to initialize the
|
||||
+ new IV can potentially affect branch optimizations. */
|
||||
+ if (optimize > 0 && flag_modulo_sched)
|
||||
+ {
|
||||
+ rtx s0;
|
||||
+ rtx bcomp;
|
||||
+ rtx loc_ref;
|
||||
+ rtx cc_reg;
|
||||
+ rtx insn;
|
||||
+ rtx cmp;
|
||||
+
|
||||
+ /* Only use this on innermost loops. */
|
||||
+ if (INTVAL (operands[3]) > 1)
|
||||
+ FAIL;
|
||||
+ if (GET_MODE (operands[0]) != SImode)
|
||||
+ FAIL;
|
||||
+
|
||||
+ s0 = operands [0];
|
||||
+ if (TARGET_THUMB2)
|
||||
+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
|
||||
+ else
|
||||
+ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
|
||||
+
|
||||
+ cmp = XVECEXP (PATTERN (insn), 0, 0);
|
||||
+ cc_reg = SET_DEST (cmp);
|
||||
+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
|
||||
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
|
||||
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
|
||||
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
|
||||
+ loc_ref, pc_rtx)));
|
||||
+ DONE;
|
||||
+ }else
|
||||
+ FAIL;
|
||||
+}")
|
||||
+
|
||||
|
||||
=== modified file 'gcc/ddg.c'
|
||||
--- old/gcc/ddg.c 2010-11-30 11:41:24 +0000
|
||||
+++ new/gcc/ddg.c 2011-05-11 07:15:47 +0000
|
||||
@@ -197,6 +197,11 @@
|
||||
}
|
||||
}
|
||||
|
||||
+ /* If a true dep edge enters the branch create an anti edge in the
|
||||
+ opposite direction to prevent the creation of reg-moves. */
|
||||
+ if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
|
||||
+ create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
|
||||
+
|
||||
latency = dep_cost (link);
|
||||
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
||||
add_edge_to_ddg (g, e);
|
||||
|
||||
=== modified file 'gcc/doc/invoke.texi'
|
||||
--- old/gcc/doc/invoke.texi 2011-04-18 11:31:29 +0000
|
||||
+++ new/gcc/doc/invoke.texi 2011-05-11 07:15:47 +0000
|
||||
@@ -8730,6 +8730,10 @@
|
||||
The maximum number of best instructions in the ready list that are considered
|
||||
for renaming in the selective scheduler. The default value is 2.
|
||||
|
||||
+@item sms-min-sc
|
||||
+The minimum value of stage count that swing modulo scheduler will
|
||||
+generate. The default value is 2.
|
||||
+
|
||||
@item max-last-value-rtl
|
||||
The maximum size measured as number of RTLs that can be recorded in an expression
|
||||
in combiner for a pseudo register as last known value of that register. The default
|
||||
|
||||
=== modified file 'gcc/loop-doloop.c'
|
||||
--- old/gcc/loop-doloop.c 2010-11-30 11:41:24 +0000
|
||||
+++ new/gcc/loop-doloop.c 2011-05-11 07:15:47 +0000
|
||||
@@ -78,6 +78,8 @@
|
||||
rtx inc_src;
|
||||
rtx condition;
|
||||
rtx pattern;
|
||||
+ rtx cc_reg = NULL_RTX;
|
||||
+ rtx reg_orig = NULL_RTX;
|
||||
|
||||
/* The canonical doloop pattern we expect has one of the following
|
||||
forms:
|
||||
@@ -96,7 +98,16 @@
|
||||
2) (set (reg) (plus (reg) (const_int -1))
|
||||
(set (pc) (if_then_else (reg != 0)
|
||||
(label_ref (label))
|
||||
- (pc))). */
|
||||
+ (pc))).
|
||||
+
|
||||
+ Some targets (ARM) do the comparison before the branch, as in the
|
||||
+ following form:
|
||||
+
|
||||
+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
|
||||
+ (set (reg) (plus (reg) (const_int -1)))])
|
||||
+ (set (pc) (if_then_else (cc == NE)
|
||||
+ (label_ref (label))
|
||||
+ (pc))) */
|
||||
|
||||
pattern = PATTERN (doloop_pat);
|
||||
|
||||
@@ -104,19 +115,47 @@
|
||||
{
|
||||
rtx cond;
|
||||
rtx prev_insn = prev_nondebug_insn (doloop_pat);
|
||||
+ rtx cmp_arg1, cmp_arg2;
|
||||
+ rtx cmp_orig;
|
||||
|
||||
- /* We expect the decrement to immediately precede the branch. */
|
||||
+ /* In case the pattern is not PARALLEL we expect two forms
|
||||
+ of doloop which are cases 2) and 3) above: in case 2) the
|
||||
+ decrement immediately precedes the branch, while in case 3)
|
||||
+ the compare and decrement instructions immediately precede
|
||||
+ the branch. */
|
||||
|
||||
if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
|
||||
return 0;
|
||||
|
||||
cmp = pattern;
|
||||
- inc = PATTERN (PREV_INSN (doloop_pat));
|
||||
+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
|
||||
+ {
|
||||
+ /* The third case: the compare and decrement instructions
|
||||
+ immediately precede the branch. */
|
||||
+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
|
||||
+ if (GET_CODE (cmp_orig) != SET)
|
||||
+ return 0;
|
||||
+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
|
||||
+ return 0;
|
||||
+ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
|
||||
+ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
|
||||
+ if (cmp_arg2 != const0_rtx
|
||||
+ || GET_CODE (cmp_arg1) != PLUS)
|
||||
+ return 0;
|
||||
+ reg_orig = XEXP (cmp_arg1, 0);
|
||||
+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
|
||||
+ || !REG_P (reg_orig))
|
||||
+ return 0;
|
||||
+ cc_reg = SET_DEST (cmp_orig);
|
||||
+
|
||||
+ inc = XVECEXP (PATTERN (prev_insn), 0, 1);
|
||||
+ }
|
||||
+ else
|
||||
+ inc = PATTERN (prev_insn);
|
||||
/* We expect the condition to be of the form (reg != 0) */
|
||||
cond = XEXP (SET_SRC (cmp), 0);
|
||||
if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
|
||||
return 0;
|
||||
-
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -162,11 +201,15 @@
|
||||
return 0;
|
||||
|
||||
if ((XEXP (condition, 0) == reg)
|
||||
+ /* For the third case: */
|
||||
+ || ((cc_reg != NULL_RTX)
|
||||
+ && (XEXP (condition, 0) == cc_reg)
|
||||
+ && (reg_orig == reg))
|
||||
|| (GET_CODE (XEXP (condition, 0)) == PLUS
|
||||
- && XEXP (XEXP (condition, 0), 0) == reg))
|
||||
+ && XEXP (XEXP (condition, 0), 0) == reg))
|
||||
{
|
||||
if (GET_CODE (pattern) != PARALLEL)
|
||||
- /* The second form we expect:
|
||||
+ /* For the second form we expect:
|
||||
|
||||
(set (reg) (plus (reg) (const_int -1))
|
||||
(set (pc) (if_then_else (reg != 0)
|
||||
@@ -181,7 +224,24 @@
|
||||
(set (reg) (plus (reg) (const_int -1)))
|
||||
(additional clobbers and uses)])
|
||||
|
||||
- So we return that form instead.
|
||||
+ For the third form we expect:
|
||||
+
|
||||
+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
|
||||
+ (set (reg) (plus (reg) (const_int -1)))])
|
||||
+ (set (pc) (if_then_else (cc == NE)
|
||||
+ (label_ref (label))
|
||||
+ (pc)))
|
||||
+
|
||||
+ which is equivalent to the following:
|
||||
+
|
||||
+ (parallel [(set (cc) (compare (reg, 1))
|
||||
+ (set (reg) (plus (reg) (const_int -1)))
|
||||
+ (set (pc) (if_then_else (NE == cc)
|
||||
+ (label_ref (label))
|
||||
+ (pc))))])
|
||||
+
|
||||
+ So we return the second form instead for the two cases.
|
||||
+
|
||||
*/
|
||||
condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
|
||||
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-02-14 17:59:10 +0000
|
||||
+++ new/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000
|
||||
@@ -84,14 +84,13 @@
|
||||
II cycles (i.e. use register copies to prevent a def from overwriting
|
||||
itself before reaching the use).
|
||||
|
||||
- SMS works with countable loops (1) whose control part can be easily
|
||||
- decoupled from the rest of the loop and (2) whose loop count can
|
||||
- be easily adjusted. This is because we peel a constant number of
|
||||
- iterations into a prologue and epilogue for which we want to avoid
|
||||
- emitting the control part, and a kernel which is to iterate that
|
||||
- constant number of iterations less than the original loop. So the
|
||||
- control part should be a set of insns clearly identified and having
|
||||
- its own iv, not otherwise used in the loop (at-least for now), which
|
||||
+ SMS works with countable loops whose loop count can be easily
|
||||
+ adjusted. This is because we peel a constant number of iterations
|
||||
+ into a prologue and epilogue for which we want to avoid emitting
|
||||
+ the control part, and a kernel which is to iterate that constant
|
||||
+ number of iterations less than the original loop. So the control
|
||||
+ part should be a set of insns clearly identified and having its
|
||||
+ own iv, not otherwise used in the loop (at-least for now), which
|
||||
initializes a register before the loop to the number of iterations.
|
||||
Currently SMS relies on the do-loop pattern to recognize such loops,
|
||||
where (1) the control part comprises of all insns defining and/or
|
||||
@@ -116,8 +115,10 @@
|
||||
|
||||
/* The number of different iterations the nodes in ps span, assuming
|
||||
the stage boundaries are placed efficiently. */
|
||||
-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
|
||||
- + 1 + (ps)->ii - 1) / (ps)->ii)
|
||||
+#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
|
||||
+ + 1 + ii - 1) / ii)
|
||||
+/* The stage count of ps. */
|
||||
+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
|
||||
|
||||
/* A single instruction in the partial schedule. */
|
||||
struct ps_insn
|
||||
@@ -155,6 +156,8 @@
|
||||
int max_cycle;
|
||||
|
||||
ddg_ptr g; /* The DDG of the insns in the partial schedule. */
|
||||
+
|
||||
+ int stage_count; /* The stage count of the partial schedule. */
|
||||
};
|
||||
|
||||
/* We use this to record all the register replacements we do in
|
||||
@@ -195,7 +198,7 @@
|
||||
rtx, rtx);
|
||||
static void duplicate_insns_of_cycles (partial_schedule_ptr,
|
||||
int, int, int, rtx);
|
||||
-
|
||||
+static int calculate_stage_count (partial_schedule_ptr ps);
|
||||
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
|
||||
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
|
||||
#define SCHED_FIRST_REG_MOVE(x) \
|
||||
@@ -310,10 +313,10 @@
|
||||
either a single (parallel) branch-on-count or a (non-parallel)
|
||||
branch immediately preceded by a single (decrement) insn. */
|
||||
first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
|
||||
- : PREV_INSN (tail));
|
||||
+ : prev_nondebug_insn (tail));
|
||||
|
||||
for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
|
||||
- if (reg_mentioned_p (reg, insn))
|
||||
+ if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn))
|
||||
{
|
||||
if (dump_file)
|
||||
{
|
||||
@@ -569,13 +572,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
|
||||
- of SCHED_ROW and SCHED_STAGE. */
|
||||
+/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
|
||||
+ SCHED_ROW and SCHED_STAGE. */
|
||||
static void
|
||||
-normalize_sched_times (partial_schedule_ptr ps)
|
||||
+reset_sched_times (partial_schedule_ptr ps, int amount)
|
||||
{
|
||||
int row;
|
||||
- int amount = PS_MIN_CYCLE (ps);
|
||||
int ii = ps->ii;
|
||||
ps_insn_ptr crr_insn;
|
||||
|
||||
@@ -584,19 +586,43 @@
|
||||
{
|
||||
ddg_node_ptr u = crr_insn->node;
|
||||
int normalized_time = SCHED_TIME (u) - amount;
|
||||
+ int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
|
||||
+ int sc_until_cycle_zero, stage;
|
||||
|
||||
- if (dump_file)
|
||||
- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
|
||||
- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
|
||||
- (u), ps->min_cycle);
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ /* Print the scheduling times after the rotation. */
|
||||
+ fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
|
||||
+ "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
|
||||
+ INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
|
||||
+ normalized_time);
|
||||
+ if (JUMP_P (crr_insn->node->insn))
|
||||
+ fprintf (dump_file, " (branch)");
|
||||
+ fprintf (dump_file, "\n");
|
||||
+ }
|
||||
+
|
||||
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
|
||||
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
|
||||
SCHED_TIME (u) = normalized_time;
|
||||
- SCHED_ROW (u) = normalized_time % ii;
|
||||
- SCHED_STAGE (u) = normalized_time / ii;
|
||||
+ SCHED_ROW (u) = SMODULO (normalized_time, ii);
|
||||
+
|
||||
+ /* The calculation of stage count is done adding the number
|
||||
+ of stages before cycle zero and after cycle zero. */
|
||||
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
|
||||
+
|
||||
+ if (SCHED_TIME (u) < 0)
|
||||
+ {
|
||||
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
|
||||
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
|
||||
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
-
|
||||
+
|
||||
/* Set SCHED_COLUMN of each node according to its position in PS. */
|
||||
static void
|
||||
set_columns_for_ps (partial_schedule_ptr ps)
|
||||
@@ -646,9 +672,12 @@
|
||||
|
||||
/* Do not duplicate any insn which refers to count_reg as it
|
||||
belongs to the control part.
|
||||
+ The closing branch is scheduled as well and thus should
|
||||
+ be ignored.
|
||||
TODO: This should be done by analyzing the control part of
|
||||
the loop. */
|
||||
- if (reg_mentioned_p (count_reg, u_node->insn))
|
||||
+ if (reg_mentioned_p (count_reg, u_node->insn)
|
||||
+ || JUMP_P (ps_ij->node->insn))
|
||||
continue;
|
||||
|
||||
if (for_prolog)
|
||||
@@ -1009,9 +1038,11 @@
|
||||
continue;
|
||||
}
|
||||
|
||||
- /* Don't handle BBs with calls or barriers, or !single_set insns,
|
||||
- or auto-increment insns (to avoid creating invalid reg-moves
|
||||
- for the auto-increment insns).
|
||||
+ /* Don't handle BBs with calls or barriers or auto-increment insns
|
||||
+ (to avoid creating invalid reg-moves for the auto-increment insns),
|
||||
+ or !single_set with the exception of instructions that include
|
||||
+ count_reg---these instructions are part of the control part
|
||||
+ that do-loop recognizes.
|
||||
??? Should handle auto-increment insns.
|
||||
??? Should handle insns defining subregs. */
|
||||
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
|
||||
@@ -1021,7 +1052,8 @@
|
||||
if (CALL_P (insn)
|
||||
|| BARRIER_P (insn)
|
||||
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
|
||||
- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
|
||||
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
|
||||
+ && !reg_mentioned_p (count_reg, insn))
|
||||
|| (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|
||||
|| (INSN_P (insn) && (set = single_set (insn))
|
||||
&& GET_CODE (SET_DEST (set)) == SUBREG))
|
||||
@@ -1049,7 +1081,11 @@
|
||||
continue;
|
||||
}
|
||||
|
||||
- if (! (g = create_ddg (bb, 0)))
|
||||
+ /* Always schedule the closing branch with the rest of the
|
||||
+ instructions. The branch is rotated to be in row ii-1 at the
|
||||
+ end of the scheduling procedure to make sure it's the last
|
||||
+ instruction in the iteration. */
|
||||
+ if (! (g = create_ddg (bb, 1)))
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "SMS create_ddg failed\n");
|
||||
@@ -1157,14 +1193,17 @@
|
||||
|
||||
ps = sms_schedule_by_order (g, mii, maxii, node_order);
|
||||
|
||||
- if (ps){
|
||||
- stage_count = PS_STAGE_COUNT (ps);
|
||||
- gcc_assert(stage_count >= 1);
|
||||
- }
|
||||
+ if (ps)
|
||||
+ {
|
||||
+ stage_count = calculate_stage_count (ps);
|
||||
+ gcc_assert(stage_count >= 1);
|
||||
+ PS_STAGE_COUNT(ps) = stage_count;
|
||||
+ }
|
||||
|
||||
- /* Stage count of 1 means that there is no interleaving between
|
||||
- iterations, let the scheduling passes do the job. */
|
||||
- if (stage_count <= 1
|
||||
+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
|
||||
+ 1 means that there is no interleaving between iterations thus
|
||||
+ we let the scheduling passes do the job in this case. */
|
||||
+ if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
|
||||
|| (count_init && (loop_count <= stage_count))
|
||||
|| (flag_branch_probabilities && (trip_count <= stage_count)))
|
||||
{
|
||||
@@ -1182,32 +1221,24 @@
|
||||
else
|
||||
{
|
||||
struct undo_replace_buff_elem *reg_move_replaces;
|
||||
-
|
||||
- if (dump_file)
|
||||
- {
|
||||
+ int amount = SCHED_TIME (g->closing_branch) + 1;
|
||||
+
|
||||
+ /* Set the stage boundaries. The closing_branch was scheduled
|
||||
+ and should appear in the last (ii-1) row. */
|
||||
+ reset_sched_times (ps, amount);
|
||||
+ rotate_partial_schedule (ps, amount);
|
||||
+ set_columns_for_ps (ps);
|
||||
+
|
||||
+ canon_loop (loop);
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
fprintf (dump_file,
|
||||
"SMS succeeded %d %d (with ii, sc)\n", ps->ii,
|
||||
stage_count);
|
||||
print_partial_schedule (ps, dump_file);
|
||||
- fprintf (dump_file,
|
||||
- "SMS Branch (%d) will later be scheduled at cycle %d.\n",
|
||||
- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
|
||||
}
|
||||
-
|
||||
- /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
|
||||
- the closing_branch was scheduled and should appear in the last (ii-1)
|
||||
- row. Otherwise, we are free to schedule the branch, and we let nodes
|
||||
- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
|
||||
- row; this should reduce stage_count to minimum.
|
||||
- TODO: Revisit the issue of scheduling the insns of the
|
||||
- control part relative to the branch when the control part
|
||||
- has more than one insn. */
|
||||
- normalize_sched_times (ps);
|
||||
- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
|
||||
- set_columns_for_ps (ps);
|
||||
-
|
||||
- canon_loop (loop);
|
||||
-
|
||||
+
|
||||
/* case the BCT count is not known , Do loop-versioning */
|
||||
if (count_reg && ! count_init)
|
||||
{
|
||||
@@ -1760,12 +1791,6 @@
|
||||
continue;
|
||||
}
|
||||
|
||||
- if (JUMP_P (insn)) /* Closing branch handled later. */
|
||||
- {
|
||||
- RESET_BIT (tobe_scheduled, u);
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
if (TEST_BIT (sched_nodes, u))
|
||||
continue;
|
||||
|
||||
@@ -1893,8 +1918,8 @@
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "split_row=%d\n", split_row);
|
||||
|
||||
- normalize_sched_times (ps);
|
||||
- rotate_partial_schedule (ps, ps->min_cycle);
|
||||
+ reset_sched_times (ps, PS_MIN_CYCLE (ps));
|
||||
+ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
|
||||
|
||||
rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
|
||||
for (row = 0; row < split_row; row++)
|
||||
@@ -2571,6 +2596,7 @@
|
||||
ps_insn_ptr next_ps_i;
|
||||
ps_insn_ptr first_must_follow = NULL;
|
||||
ps_insn_ptr last_must_precede = NULL;
|
||||
+ ps_insn_ptr last_in_row = NULL;
|
||||
int row;
|
||||
|
||||
if (! ps_i)
|
||||
@@ -2597,8 +2623,37 @@
|
||||
else
|
||||
last_must_precede = next_ps_i;
|
||||
}
|
||||
+ /* The closing branch must be the last in the row. */
|
||||
+ if (must_precede
|
||||
+ && TEST_BIT (must_precede, next_ps_i->node->cuid)
|
||||
+ && JUMP_P (next_ps_i->node->insn))
|
||||
+ return false;
|
||||
+
|
||||
+ last_in_row = next_ps_i;
|
||||
}
|
||||
|
||||
+ /* The closing branch is scheduled as well. Make sure there is no
|
||||
+ dependent instruction after it as the branch should be the last
|
||||
+ instruction in the row. */
|
||||
+ if (JUMP_P (ps_i->node->insn))
|
||||
+ {
|
||||
+ if (first_must_follow)
|
||||
+ return false;
|
||||
+ if (last_in_row)
|
||||
+ {
|
||||
+ /* Make the branch the last in the row. New instructions
|
||||
+ will be inserted at the beginning of the row or after the
|
||||
+ last must_precede instruction thus the branch is guaranteed
|
||||
+ to remain the last instruction in the row. */
|
||||
+ last_in_row->next_in_row = ps_i;
|
||||
+ ps_i->prev_in_row = last_in_row;
|
||||
+ ps_i->next_in_row = NULL;
|
||||
+ }
|
||||
+ else
|
||||
+ ps->rows[row] = ps_i;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
/* Now insert the node after INSERT_AFTER_PSI. */
|
||||
|
||||
if (! last_must_precede)
|
||||
@@ -2820,6 +2875,24 @@
|
||||
return ps_i;
|
||||
}
|
||||
|
||||
+/* Calculate the stage count of the partial schedule PS. The calculation
|
||||
+ takes into account the rotation to bring the closing branch to row
|
||||
+ ii-1. */
|
||||
+int
|
||||
+calculate_stage_count (partial_schedule_ptr ps)
|
||||
+{
|
||||
+ int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
|
||||
+ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
|
||||
+ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
|
||||
+ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
|
||||
+
|
||||
+ /* The calculation of stage count is done adding the number of stages
|
||||
+ before cycle zero and after cycle zero. */
|
||||
+ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
|
||||
+
|
||||
+ return stage_count;
|
||||
+}
|
||||
+
|
||||
/* Rotate the rows of PS such that insns scheduled at time
|
||||
START_CYCLE will appear in row 0. Updates max/min_cycles. */
|
||||
void
|
||||
|
||||
=== modified file 'gcc/params.def'
|
||||
--- old/gcc/params.def 2011-04-18 11:31:29 +0000
|
||||
+++ new/gcc/params.def 2011-05-11 07:15:47 +0000
|
||||
@@ -344,6 +344,11 @@
|
||||
"sms-max-ii-factor",
|
||||
"A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop",
|
||||
100, 0, 0)
|
||||
+/* The minimum value of stage count that swing modulo scheduler will generate. */
|
||||
+DEFPARAM(PARAM_SMS_MIN_SC,
|
||||
+ "sms-min-sc",
|
||||
+ "The minimum value of stage count that swing modulo scheduler will generate.",
|
||||
+ 2, 1, 1)
|
||||
DEFPARAM(PARAM_SMS_DFA_HISTORY,
|
||||
"sms-dfa-history",
|
||||
"The number of cycles the swing modulo scheduler considers when checking conflicts using DFA",
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
2011-05-13 Revital Eres <revital.eres@linaro.org>
|
||||
|
||||
gcc/
|
||||
* ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs.
|
||||
* modulo-sched.c (sms_schedule): Avoid unfreed memory when SMS fails.
|
||||
|
||||
=== modified file 'gcc/ddg.c'
|
||||
--- old/gcc/ddg.c 2011-05-11 07:15:47 +0000
|
||||
+++ new/gcc/ddg.c 2011-05-13 16:03:40 +0000
|
||||
@@ -1016,6 +1016,7 @@
|
||||
for (i = 0; i < all_sccs->num_sccs; i++)
|
||||
free_scc (all_sccs->sccs[i]);
|
||||
|
||||
+ free (all_sccs->sccs);
|
||||
free (all_sccs);
|
||||
}
|
||||
|
||||
|
||||
=== modified file 'gcc/modulo-sched.c'
|
||||
--- old/gcc/modulo-sched.c 2011-05-11 07:15:47 +0000
|
||||
+++ new/gcc/modulo-sched.c 2011-05-13 16:03:40 +0000
|
||||
@@ -1216,7 +1216,6 @@
|
||||
fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
|
||||
fprintf (dump_file, ")\n");
|
||||
}
|
||||
- continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@ -0,0 +1,134 @@
|
||||
2011-06-02 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
Backport from mainline:
|
||||
|
||||
2011-03-21 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* simplify-rtx.c (simplify_binary_operation_1): Handle
|
||||
(xor (and A B) C) case when B and C are both constants.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/arm/xor-and.c: New.
|
||||
|
||||
2011-03-18 Chung-Lin Tang <cltang@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* combine.c (try_combine): Do simplification only call of
|
||||
subst() on i2 even when i1 is present. Update comments.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/arm/unsigned-extend-1.c: New.
|
||||
|
||||
=== modified file 'gcc/combine.c'
|
||||
--- old/gcc/combine.c 2011-05-06 11:28:27 +0000
|
||||
+++ new/gcc/combine.c 2011-05-27 14:31:18 +0000
|
||||
@@ -3089,7 +3089,7 @@
|
||||
/* It is possible that the source of I2 or I1 may be performing
|
||||
an unneeded operation, such as a ZERO_EXTEND of something
|
||||
that is known to have the high part zero. Handle that case
|
||||
- by letting subst look at the innermost one of them.
|
||||
+ by letting subst look at the inner insns.
|
||||
|
||||
Another way to do this would be to have a function that tries
|
||||
to simplify a single insn instead of merging two or more
|
||||
@@ -3114,11 +3114,9 @@
|
||||
subst_low_luid = DF_INSN_LUID (i1);
|
||||
i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
|
||||
}
|
||||
- else
|
||||
- {
|
||||
- subst_low_luid = DF_INSN_LUID (i2);
|
||||
- i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
|
||||
- }
|
||||
+
|
||||
+ subst_low_luid = DF_INSN_LUID (i2);
|
||||
+ i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
|
||||
}
|
||||
|
||||
n_occurrences = 0; /* `subst' counts here */
|
||||
|
||||
=== modified file 'gcc/simplify-rtx.c'
|
||||
--- old/gcc/simplify-rtx.c 2011-03-26 09:24:06 +0000
|
||||
+++ new/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000
|
||||
@@ -2484,6 +2484,46 @@
|
||||
XEXP (op0, 1), mode),
|
||||
op1);
|
||||
|
||||
+ /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P),
|
||||
+ we can transform like this:
|
||||
+ (A&B)^C == ~(A&B)&C | ~C&(A&B)
|
||||
+ == (~A|~B)&C | ~C&(A&B) * DeMorgan's Law
|
||||
+ == ~A&C | ~B&C | A&(~C&B) * Distribute and re-order
|
||||
+ Attempt a few simplifications when B and C are both constants. */
|
||||
+ if (GET_CODE (op0) == AND
|
||||
+ && CONST_INT_P (op1)
|
||||
+ && CONST_INT_P (XEXP (op0, 1)))
|
||||
+ {
|
||||
+ rtx a = XEXP (op0, 0);
|
||||
+ rtx b = XEXP (op0, 1);
|
||||
+ rtx c = op1;
|
||||
+ HOST_WIDE_INT bval = INTVAL (b);
|
||||
+ HOST_WIDE_INT cval = INTVAL (c);
|
||||
+
|
||||
+ rtx na_c
|
||||
+ = simplify_binary_operation (AND, mode,
|
||||
+ simplify_gen_unary (NOT, mode, a, mode),
|
||||
+ c);
|
||||
+ if ((~cval & bval) == 0)
|
||||
+ {
|
||||
+ /* Try to simplify ~A&C | ~B&C. */
|
||||
+ if (na_c != NULL_RTX)
|
||||
+ return simplify_gen_binary (IOR, mode, na_c,
|
||||
+ GEN_INT (~bval & cval));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* If ~A&C is zero, simplify A&(~C&B) | ~B&C. */
|
||||
+ if (na_c == const0_rtx)
|
||||
+ {
|
||||
+ rtx a_nc_b = simplify_gen_binary (AND, mode, a,
|
||||
+ GEN_INT (~cval & bval));
|
||||
+ return simplify_gen_binary (IOR, mode, a_nc_b,
|
||||
+ GEN_INT (~bval & cval));
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* (xor (comparison foo bar) (const_int 1)) can become the reversed
|
||||
comparison if STORE_FLAG_VALUE is 1. */
|
||||
if (STORE_FLAG_VALUE == 1
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/unsigned-extend-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c 2011-05-27 14:31:18 +0000
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=armv6" } */
|
||||
+
|
||||
+unsigned char foo (unsigned char c)
|
||||
+{
|
||||
+ return (c >= '0') && (c <= '9');
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not "uxtb" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/xor-and.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/xor-and.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/xor-and.c 2011-05-27 14:31:18 +0000
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O -march=armv6" } */
|
||||
+
|
||||
+unsigned short foo (unsigned short x)
|
||||
+{
|
||||
+ x ^= 0x4002;
|
||||
+ x >>= 1;
|
||||
+ x |= 0x8000;
|
||||
+ return x;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "orr" } } */
|
||||
+/* { dg-final { scan-assembler-not "mvn" } } */
|
||||
+/* { dg-final { scan-assembler-not "uxth" } } */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,329 @@
|
||||
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.md (*maddhidi4tb, *maddhidi4tt): New define_insns.
|
||||
(*maddhisi4tb, *maddhisi4tt): New define_insns.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/arm/smlatb-1.c: New file.
|
||||
* gcc.target/arm/smlatt-1.c: New file.
|
||||
* gcc.target/arm/smlaltb-1.c: New file.
|
||||
* gcc.target/arm/smlaltt-1.c: New file.
|
||||
|
||||
2011-06-07 Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
Backport from FSF:
|
||||
|
||||
2011-06-07 Bernd Schmidt <bernds@codesourcery.com>
|
||||
Andrew Stubbs <ams@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening
|
||||
multiplies.
|
||||
* doc/md.texi (Canonicalization of Instructions): Document widening
|
||||
multiply canonicalization.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/arm/mla-2.c: New test.
|
||||
|
||||
=== modified file 'gcc/config/arm/arm.md'
|
||||
--- old/gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
|
||||
+++ new/gcc/config/arm/arm.md 2011-06-02 15:58:33 +0000
|
||||
@@ -1809,6 +1809,36 @@
|
||||
(set_attr "predicable" "yes")]
|
||||
)
|
||||
|
||||
+;; Note: there is no maddhisi4ibt because this one is canonical form
|
||||
+(define_insn "*maddhisi4tb"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
+ (plus:SI (mult:SI (ashiftrt:SI
|
||||
+ (match_operand:SI 1 "s_register_operand" "r")
|
||||
+ (const_int 16))
|
||||
+ (sign_extend:SI
|
||||
+ (match_operand:HI 2 "s_register_operand" "r")))
|
||||
+ (match_operand:SI 3 "s_register_operand" "r")))]
|
||||
+ "TARGET_DSP_MULTIPLY"
|
||||
+ "smlatb%?\\t%0, %1, %2, %3"
|
||||
+ [(set_attr "insn" "smlaxy")
|
||||
+ (set_attr "predicable" "yes")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "*maddhisi4tt"
|
||||
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
|
||||
+ (plus:SI (mult:SI (ashiftrt:SI
|
||||
+ (match_operand:SI 1 "s_register_operand" "r")
|
||||
+ (const_int 16))
|
||||
+ (ashiftrt:SI
|
||||
+ (match_operand:SI 2 "s_register_operand" "r")
|
||||
+ (const_int 16)))
|
||||
+ (match_operand:SI 3 "s_register_operand" "r")))]
|
||||
+ "TARGET_DSP_MULTIPLY"
|
||||
+ "smlatt%?\\t%0, %1, %2, %3"
|
||||
+ [(set_attr "insn" "smlaxy")
|
||||
+ (set_attr "predicable" "yes")]
|
||||
+)
|
||||
+
|
||||
(define_insn "*maddhidi4"
|
||||
[(set (match_operand:DI 0 "s_register_operand" "=r")
|
||||
(plus:DI
|
||||
@@ -1822,6 +1852,39 @@
|
||||
[(set_attr "insn" "smlalxy")
|
||||
(set_attr "predicable" "yes")])
|
||||
|
||||
+;; Note: there is no maddhidi4ibt because this one is canonical form
|
||||
+(define_insn "*maddhidi4tb"
|
||||
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
|
||||
+ (plus:DI
|
||||
+ (mult:DI (sign_extend:DI
|
||||
+ (ashiftrt:SI
|
||||
+ (match_operand:SI 1 "s_register_operand" "r")
|
||||
+ (const_int 16)))
|
||||
+ (sign_extend:DI
|
||||
+ (match_operand:HI 2 "s_register_operand" "r")))
|
||||
+ (match_operand:DI 3 "s_register_operand" "0")))]
|
||||
+ "TARGET_DSP_MULTIPLY"
|
||||
+ "smlaltb%?\\t%Q0, %R0, %1, %2"
|
||||
+ [(set_attr "insn" "smlalxy")
|
||||
+ (set_attr "predicable" "yes")])
|
||||
+
|
||||
+(define_insn "*maddhidi4tt"
|
||||
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
|
||||
+ (plus:DI
|
||||
+ (mult:DI (sign_extend:DI
|
||||
+ (ashiftrt:SI
|
||||
+ (match_operand:SI 1 "s_register_operand" "r")
|
||||
+ (const_int 16)))
|
||||
+ (sign_extend:DI
|
||||
+ (ashiftrt:SI
|
||||
+ (match_operand:SI 2 "s_register_operand" "r")
|
||||
+ (const_int 16))))
|
||||
+ (match_operand:DI 3 "s_register_operand" "0")))]
|
||||
+ "TARGET_DSP_MULTIPLY"
|
||||
+ "smlaltt%?\\t%Q0, %R0, %1, %2"
|
||||
+ [(set_attr "insn" "smlalxy")
|
||||
+ (set_attr "predicable" "yes")])
|
||||
+
|
||||
(define_expand "mulsf3"
|
||||
[(set (match_operand:SF 0 "s_register_operand" "")
|
||||
(mult:SF (match_operand:SF 1 "s_register_operand" "")
|
||||
|
||||
=== modified file 'gcc/doc/md.texi'
|
||||
--- old/gcc/doc/md.texi 2011-05-05 15:43:06 +0000
|
||||
+++ new/gcc/doc/md.texi 2011-06-07 11:18:20 +0000
|
||||
@@ -5929,6 +5929,23 @@
|
||||
will be written using @code{zero_extract} rather than the equivalent
|
||||
@code{and} or @code{sign_extract} operations.
|
||||
|
||||
+@cindex @code{mult}, canonicalization of
|
||||
+@item
|
||||
+@code{(sign_extend:@var{m1} (mult:@var{m2} (sign_extend:@var{m2} @var{x})
|
||||
+(sign_extend:@var{m2} @var{y})))} is converted to @code{(mult:@var{m1}
|
||||
+(sign_extend:@var{m1} @var{x}) (sign_extend:@var{m1} @var{y}))}, and likewise
|
||||
+for @code{zero_extend}.
|
||||
+
|
||||
+@item
|
||||
+@code{(sign_extend:@var{m1} (mult:@var{m2} (ashiftrt:@var{m2}
|
||||
+@var{x} @var{s}) (sign_extend:@var{m2} @var{y})))} is converted
|
||||
+to @code{(mult:@var{m1} (sign_extend:@var{m1} (ashiftrt:@var{m2}
|
||||
+@var{x} @var{s})) (sign_extend:@var{m1} @var{y}))}, and likewise for
|
||||
+patterns using @code{zero_extend} and @code{lshiftrt}. If the second
|
||||
+operand of @code{mult} is also a shift, then that is extended also.
|
||||
+This transformation is only applied when it can be proven that the
|
||||
+original operation had sufficient precision to prevent overflow.
|
||||
+
|
||||
@end itemize
|
||||
|
||||
Further canonicalization rules are defined in the function
|
||||
|
||||
=== modified file 'gcc/simplify-rtx.c'
|
||||
--- old/gcc/simplify-rtx.c 2011-05-27 14:31:18 +0000
|
||||
+++ new/gcc/simplify-rtx.c 2011-06-02 12:32:16 +0000
|
||||
@@ -1000,6 +1000,48 @@
|
||||
&& GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF)
|
||||
return XEXP (op, 0);
|
||||
|
||||
+ /* Extending a widening multiplication should be canonicalized to
|
||||
+ a wider widening multiplication. */
|
||||
+ if (GET_CODE (op) == MULT)
|
||||
+ {
|
||||
+ rtx lhs = XEXP (op, 0);
|
||||
+ rtx rhs = XEXP (op, 1);
|
||||
+ enum rtx_code lcode = GET_CODE (lhs);
|
||||
+ enum rtx_code rcode = GET_CODE (rhs);
|
||||
+
|
||||
+ /* Widening multiplies usually extend both operands, but sometimes
|
||||
+ they use a shift to extract a portion of a register. */
|
||||
+ if ((lcode == SIGN_EXTEND
|
||||
+ || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
|
||||
+ && (rcode == SIGN_EXTEND
|
||||
+ || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
|
||||
+ {
|
||||
+ enum machine_mode lmode = GET_MODE (lhs);
|
||||
+ enum machine_mode rmode = GET_MODE (rhs);
|
||||
+ int bits;
|
||||
+
|
||||
+ if (lcode == ASHIFTRT)
|
||||
+ /* Number of bits not shifted off the end. */
|
||||
+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
|
||||
+ else /* lcode == SIGN_EXTEND */
|
||||
+ /* Size of inner mode. */
|
||||
+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
|
||||
+
|
||||
+ if (rcode == ASHIFTRT)
|
||||
+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
|
||||
+ else /* rcode == SIGN_EXTEND */
|
||||
+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
|
||||
+
|
||||
+ /* We can only widen multiplies if the result is mathematiclly
|
||||
+ equivalent. I.e. if overflow was impossible. */
|
||||
+ if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
|
||||
+ return simplify_gen_binary
|
||||
+ (MULT, mode,
|
||||
+ simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode),
|
||||
+ simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode));
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Check for a sign extension of a subreg of a promoted
|
||||
variable, where the promotion is sign-extended, and the
|
||||
target mode is the same as the variable's promotion. */
|
||||
@@ -1071,6 +1113,48 @@
|
||||
&& GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
|
||||
return rtl_hooks.gen_lowpart_no_emit (mode, op);
|
||||
|
||||
+ /* Extending a widening multiplication should be canonicalized to
|
||||
+ a wider widening multiplication. */
|
||||
+ if (GET_CODE (op) == MULT)
|
||||
+ {
|
||||
+ rtx lhs = XEXP (op, 0);
|
||||
+ rtx rhs = XEXP (op, 1);
|
||||
+ enum rtx_code lcode = GET_CODE (lhs);
|
||||
+ enum rtx_code rcode = GET_CODE (rhs);
|
||||
+
|
||||
+ /* Widening multiplies usually extend both operands, but sometimes
|
||||
+ they use a shift to extract a portion of a register. */
|
||||
+ if ((lcode == ZERO_EXTEND
|
||||
+ || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
|
||||
+ && (rcode == ZERO_EXTEND
|
||||
+ || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
|
||||
+ {
|
||||
+ enum machine_mode lmode = GET_MODE (lhs);
|
||||
+ enum machine_mode rmode = GET_MODE (rhs);
|
||||
+ int bits;
|
||||
+
|
||||
+ if (lcode == LSHIFTRT)
|
||||
+ /* Number of bits not shifted off the end. */
|
||||
+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
|
||||
+ else /* lcode == ZERO_EXTEND */
|
||||
+ /* Size of inner mode. */
|
||||
+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
|
||||
+
|
||||
+ if (rcode == LSHIFTRT)
|
||||
+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
|
||||
+ else /* rcode == ZERO_EXTEND */
|
||||
+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
|
||||
+
|
||||
+ /* We can only widen multiplies if the result is mathematiclly
|
||||
+ equivalent. I.e. if overflow was impossible. */
|
||||
+ if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
|
||||
+ return simplify_gen_binary
|
||||
+ (MULT, mode,
|
||||
+ simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode),
|
||||
+ simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode));
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>). */
|
||||
if (GET_CODE (op) == ZERO_EXTEND)
|
||||
return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0),
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/mla-2.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/mla-2.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/mla-2.c 2011-06-02 12:32:16 +0000
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=armv7-a" } */
|
||||
+
|
||||
+long long foolong (long long x, short *a, short *b)
|
||||
+{
|
||||
+ return x + *a * *b;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "smlalbb" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/smlaltb-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/smlaltb-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/smlaltb-1.c 2011-06-02 15:58:33 +0000
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=armv7-a" } */
|
||||
+
|
||||
+long long int
|
||||
+foo (long long x, int in)
|
||||
+{
|
||||
+ short a = in & 0xffff;
|
||||
+ short b = (in & 0xffff0000) >> 16;
|
||||
+
|
||||
+ return x + b * a;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "smlaltb" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/smlaltt-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/smlaltt-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/smlaltt-1.c 2011-06-02 15:58:33 +0000
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=armv7-a" } */
|
||||
+
|
||||
+long long int
|
||||
+foo (long long x, int in1, int in2)
|
||||
+{
|
||||
+ short a = (in1 & 0xffff0000) >> 16;
|
||||
+ short b = (in2 & 0xffff0000) >> 16;
|
||||
+
|
||||
+ return x + b * a;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "smlaltt" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/smlatb-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/smlatb-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/smlatb-1.c 2011-06-02 15:58:33 +0000
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=armv7-a" } */
|
||||
+
|
||||
+int
|
||||
+foo (int x, int in)
|
||||
+{
|
||||
+ short a = in & 0xffff;
|
||||
+ short b = (in & 0xffff0000) >> 16;
|
||||
+
|
||||
+ return x + b * a;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "smlatb" } } */
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/smlatt-1.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/smlatt-1.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/smlatt-1.c 2011-06-02 15:58:33 +0000
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -march=armv7-a" } */
|
||||
+
|
||||
+int
|
||||
+foo (int x, int in1, int in2)
|
||||
+{
|
||||
+ short a = (in1 & 0xffff0000) >> 16;
|
||||
+ short b = (in2 & 0xffff0000) >> 16;
|
||||
+
|
||||
+ return x + b * a;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "smlatt" } } */
|
||||
|
||||
@ -0,0 +1,120 @@
|
||||
2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline:
|
||||
gcc/
|
||||
2011-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
* config/arm/neon.md (orndi3_neon): Actually split it.
|
||||
|
||||
|
||||
2011-06-10 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
Backport from mainline.
|
||||
gcc/
|
||||
2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
* config/arm/neon.md ("orn<mode>3_neon"): Canonicalize not.
|
||||
("orndi3_neon"): Likewise.
|
||||
("bic<mode>3_neon"): Likewise.
|
||||
|
||||
gcc/testsuite
|
||||
2011-05-26 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
|
||||
|
||||
* gcc.target/arm/neon-vorn-vbic.c: New test.
|
||||
|
||||
=== modified file 'gcc/config/arm/neon.md'
|
||||
--- old/gcc/config/arm/neon.md 2011-06-02 12:12:00 +0000
|
||||
+++ new/gcc/config/arm/neon.md 2011-06-04 00:04:47 +0000
|
||||
@@ -783,30 +783,57 @@
|
||||
|
||||
(define_insn "orn<mode>3_neon"
|
||||
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
|
||||
- (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
|
||||
- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
|
||||
+ (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
|
||||
+ (match_operand:VDQ 1 "s_register_operand" "w")))]
|
||||
"TARGET_NEON"
|
||||
"vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set_attr "neon_type" "neon_int_1")]
|
||||
)
|
||||
|
||||
-(define_insn "orndi3_neon"
|
||||
- [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
|
||||
- (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
|
||||
- (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
|
||||
+;; TODO: investigate whether we should disable
|
||||
+;; this and bicdi3_neon for the A8 in line with the other
|
||||
+;; changes above.
|
||||
+(define_insn_and_split "orndi3_neon"
|
||||
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
|
||||
+ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
|
||||
+ (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
|
||||
"TARGET_NEON"
|
||||
"@
|
||||
vorn\t%P0, %P1, %P2
|
||||
#
|
||||
+ #
|
||||
#"
|
||||
- [(set_attr "neon_type" "neon_int_1,*,*")
|
||||
- (set_attr "length" "*,8,8")]
|
||||
+ "reload_completed &&
|
||||
+ (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
|
||||
+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
|
||||
+ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
|
||||
+ "
|
||||
+ {
|
||||
+ if (TARGET_THUMB2)
|
||||
+ {
|
||||
+ operands[3] = gen_highpart (SImode, operands[0]);
|
||||
+ operands[0] = gen_lowpart (SImode, operands[0]);
|
||||
+ operands[4] = gen_highpart (SImode, operands[2]);
|
||||
+ operands[2] = gen_lowpart (SImode, operands[2]);
|
||||
+ operands[5] = gen_highpart (SImode, operands[1]);
|
||||
+ operands[1] = gen_lowpart (SImode, operands[1]);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
|
||||
+ emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
|
||||
+ DONE;
|
||||
+ }
|
||||
+ }"
|
||||
+ [(set_attr "neon_type" "neon_int_1,*,*,*")
|
||||
+ (set_attr "length" "*,16,8,8")
|
||||
+ (set_attr "arch" "any,a,t2,t2")]
|
||||
)
|
||||
|
||||
(define_insn "bic<mode>3_neon"
|
||||
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
|
||||
- (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
|
||||
- (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
|
||||
+ (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
|
||||
+ (match_operand:VDQ 1 "s_register_operand" "w")))]
|
||||
"TARGET_NEON"
|
||||
"vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
|
||||
[(set_attr "neon_type" "neon_int_1")]
|
||||
|
||||
=== added file 'gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c'
|
||||
--- old/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 1970-01-01 00:00:00 +0000
|
||||
+++ new/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c 2011-06-03 23:50:02 +0000
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-require-effective-target arm_neon_ok } */
|
||||
+/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
+/* { dg-add-options arm_neon } */
|
||||
+
|
||||
+void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < 9; i++)
|
||||
+ c[i] = b[i] | (~a[i]);
|
||||
+}
|
||||
+void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < 9; i++)
|
||||
+ c[i] = b[i] & (~a[i]);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "vorn\\t" } } */
|
||||
+/* { dg-final { scan-assembler "vbic\\t" } } */
|
||||
|
||||
@ -2,7 +2,6 @@ GCC-4_6-BRANCH-LINARO-BACKPORTS = " \
|
||||
file://linaro/gcc-4.6-linaro-r106720.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106723.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106729.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106731.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106733.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106737.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106738.patch \
|
||||
@ -15,4 +14,8 @@ file://linaro/gcc-4.6-linaro-r106744.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106746.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106747.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106750.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106751.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106753.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106754.patch \
|
||||
file://linaro/gcc-4.6-linaro-r106755.patch \
|
||||
"
|
||||
|
||||
3
meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
Normal file
3
meta-oe/recipes-devtools/gcc/gcc-common-4.6.inc
Normal file
@ -0,0 +1,3 @@
|
||||
# this will prepend this layer to FILESPATH
|
||||
FILESEXTRAPATHS := "${THISDIR}/gcc-4.6.0"
|
||||
PRINC = "0"
|
||||
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
3
meta-oe/recipes-devtools/gcc/gcc-cross_4.6.0.bbappend
Normal file
3
meta-oe/recipes-devtools/gcc/gcc-cross_4.6.0.bbappend
Normal file
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
3
meta-oe/recipes-devtools/gcc/gcc-crosssdk_4.6.0.bbappend
Normal file
3
meta-oe/recipes-devtools/gcc/gcc-crosssdk_4.6.0.bbappend
Normal file
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
3
meta-oe/recipes-devtools/gcc/gcc-runtime_4.6.0.bbappend
Normal file
3
meta-oe/recipes-devtools/gcc/gcc-runtime_4.6.0.bbappend
Normal file
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
3
meta-oe/recipes-devtools/gcc/gcc_4.6.0.bbappend
Normal file
3
meta-oe/recipes-devtools/gcc/gcc_4.6.0.bbappend
Normal file
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
3
meta-oe/recipes-devtools/gcc/libgcc_4.6.0.bbappend
Normal file
3
meta-oe/recipes-devtools/gcc/libgcc_4.6.0.bbappend
Normal file
@ -0,0 +1,3 @@
|
||||
require recipes-devtools/gcc/gcc-4_6-branch-linaro-backports.inc
|
||||
require recipes-devtools/gcc/gcc-common-4.6.inc
|
||||
SRC_URI += "${GCC-4_6-BRANCH-LINARO-BACKPORTS}"
|
||||
Loading…
x
Reference in New Issue
Block a user