mirror of
git://git.openembedded.org/meta-openembedded
synced 2026-04-02 02:49:12 +00:00
This needs further investigation, but for now we can get the tested sources into the poky gcc harness Signed-off-by: Koen Kooi <k-kooi@ti.com>
715 lines
22 KiB
Diff
715 lines
22 KiB
Diff
2010-07-15 Sandra Loosemore <sandra@codesourcery.com>
|
|
|
|
Backport from mainline:
|
|
|
|
2010-06-09 Sandra Loosemore <sandra@codesourcery.com>
|
|
|
|
gcc/
|
|
* tree-ssa-loop-ivopts.c (adjust_setup_cost): New function.
|
|
(get_computation_cost_at): Use it.
|
|
(determine_use_iv_cost_condition): Likewise.
|
|
(determine_iv_cost): Likewise.
|
|
|
|
2010-07-05 Sandra Loosemore <sandra@codesourcery.com>
|
|
|
|
PR middle-end/42505
|
|
|
|
gcc/
|
|
* tree-ssa-loop-ivopts.c (determine_set_costs): Delete obsolete
|
|
comments about cost model.
|
|
(try_add_cand_for): Add second strategy for choosing initial set
|
|
based on original IVs, controlled by ORIGINALP argument.
|
|
(get_initial_solution): Add ORIGINALP argument.
|
|
(find_optimal_iv_set_1): New function, split from find_optimal_iv_set.
|
|
(find_optimal_iv_set): Try two different strategies for choosing
|
|
the IV set, and return the one with lower cost.
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/arm/pr42505.c: New test case.
|
|
|
|
2010-07-10 Sandra Loosemore <sandra@codesourcery.com>
|
|
|
|
PR middle-end/42505
|
|
|
|
gcc/
|
|
* tree-inline.c (estimate_num_insns): Refactor builtin complexity
|
|
lookup code into....
|
|
* builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these
|
|
new functions.
|
|
* tree.h (is_simple_builtin, is_inexpensive_builtin): Declare.
|
|
* cfgloopanal.c (target_clobbered_regs): Define.
|
|
(init_set_costs): Initialize target_clobbered_regs.
|
|
(estimate_reg_pressure_cost): Add call_p argument. When true,
|
|
adjust the number of available registers to exclude the
|
|
call-clobbered registers.
|
|
* cfgloop.h (target_clobbered_regs): Declare.
|
|
(estimate_reg_pressure_cost): Adjust declaration.
|
|
* tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call.
|
|
(ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost.
|
|
(determine_set_costs): Dump target_clobbered_regs.
|
|
(loop_body_includes_call): New function.
|
|
(tree_ssa_iv_optimize_loop): Use it to initialize new field.
|
|
* loop-invariant.c (gain_for_invariant): Adjust arguments to pass
|
|
call_p flag through.
|
|
(best_gain_for_invariant): Likewise.
|
|
(find_invariants_to_move): Likewise.
|
|
(move_single_loop_invariants): Likewise, using already-computed
|
|
has_call field.
|
|
|
|
2010-07-15 Jie Zhang <jie@codesourcery.com>
|
|
|
|
Issue #8497, #8893
|
|
|
|
=== modified file 'gcc/builtins.c'
|
|
--- old/gcc/builtins.c 2010-04-13 12:47:11 +0000
|
|
+++ new/gcc/builtins.c 2010-08-02 13:51:23 +0000
|
|
@@ -13624,3 +13624,123 @@
|
|
break;
|
|
}
|
|
}
|
|
+
|
|
+/* Return true if DECL is a builtin that expands to a constant or similarly
|
|
+ simple code. */
|
|
+bool
|
|
+is_simple_builtin (tree decl)
|
|
+{
|
|
+ if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
|
|
+ switch (DECL_FUNCTION_CODE (decl))
|
|
+ {
|
|
+ /* Builtins that expand to constants. */
|
|
+ case BUILT_IN_CONSTANT_P:
|
|
+ case BUILT_IN_EXPECT:
|
|
+ case BUILT_IN_OBJECT_SIZE:
|
|
+ case BUILT_IN_UNREACHABLE:
|
|
+ /* Simple register moves or loads from stack. */
|
|
+ case BUILT_IN_RETURN_ADDRESS:
|
|
+ case BUILT_IN_EXTRACT_RETURN_ADDR:
|
|
+ case BUILT_IN_FROB_RETURN_ADDR:
|
|
+ case BUILT_IN_RETURN:
|
|
+ case BUILT_IN_AGGREGATE_INCOMING_ADDRESS:
|
|
+ case BUILT_IN_FRAME_ADDRESS:
|
|
+ case BUILT_IN_VA_END:
|
|
+ case BUILT_IN_STACK_SAVE:
|
|
+ case BUILT_IN_STACK_RESTORE:
|
|
+ /* Exception state returns or moves registers around. */
|
|
+ case BUILT_IN_EH_FILTER:
|
|
+ case BUILT_IN_EH_POINTER:
|
|
+ case BUILT_IN_EH_COPY_VALUES:
|
|
+ return true;
|
|
+
|
|
+ default:
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/* Return true if DECL is a builtin that is not expensive, i.e., they are
|
|
+ most probably expanded inline into reasonably simple code. This is a
|
|
+ superset of is_simple_builtin. */
|
|
+bool
|
|
+is_inexpensive_builtin (tree decl)
|
|
+{
|
|
+ if (!decl)
|
|
+ return false;
|
|
+ else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
|
|
+ return true;
|
|
+ else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
|
|
+ switch (DECL_FUNCTION_CODE (decl))
|
|
+ {
|
|
+ case BUILT_IN_ABS:
|
|
+ case BUILT_IN_ALLOCA:
|
|
+ case BUILT_IN_BSWAP32:
|
|
+ case BUILT_IN_BSWAP64:
|
|
+ case BUILT_IN_CLZ:
|
|
+ case BUILT_IN_CLZIMAX:
|
|
+ case BUILT_IN_CLZL:
|
|
+ case BUILT_IN_CLZLL:
|
|
+ case BUILT_IN_CTZ:
|
|
+ case BUILT_IN_CTZIMAX:
|
|
+ case BUILT_IN_CTZL:
|
|
+ case BUILT_IN_CTZLL:
|
|
+ case BUILT_IN_FFS:
|
|
+ case BUILT_IN_FFSIMAX:
|
|
+ case BUILT_IN_FFSL:
|
|
+ case BUILT_IN_FFSLL:
|
|
+ case BUILT_IN_IMAXABS:
|
|
+ case BUILT_IN_FINITE:
|
|
+ case BUILT_IN_FINITEF:
|
|
+ case BUILT_IN_FINITEL:
|
|
+ case BUILT_IN_FINITED32:
|
|
+ case BUILT_IN_FINITED64:
|
|
+ case BUILT_IN_FINITED128:
|
|
+ case BUILT_IN_FPCLASSIFY:
|
|
+ case BUILT_IN_ISFINITE:
|
|
+ case BUILT_IN_ISINF_SIGN:
|
|
+ case BUILT_IN_ISINF:
|
|
+ case BUILT_IN_ISINFF:
|
|
+ case BUILT_IN_ISINFL:
|
|
+ case BUILT_IN_ISINFD32:
|
|
+ case BUILT_IN_ISINFD64:
|
|
+ case BUILT_IN_ISINFD128:
|
|
+ case BUILT_IN_ISNAN:
|
|
+ case BUILT_IN_ISNANF:
|
|
+ case BUILT_IN_ISNANL:
|
|
+ case BUILT_IN_ISNAND32:
|
|
+ case BUILT_IN_ISNAND64:
|
|
+ case BUILT_IN_ISNAND128:
|
|
+ case BUILT_IN_ISNORMAL:
|
|
+ case BUILT_IN_ISGREATER:
|
|
+ case BUILT_IN_ISGREATEREQUAL:
|
|
+ case BUILT_IN_ISLESS:
|
|
+ case BUILT_IN_ISLESSEQUAL:
|
|
+ case BUILT_IN_ISLESSGREATER:
|
|
+ case BUILT_IN_ISUNORDERED:
|
|
+ case BUILT_IN_VA_ARG_PACK:
|
|
+ case BUILT_IN_VA_ARG_PACK_LEN:
|
|
+ case BUILT_IN_VA_COPY:
|
|
+ case BUILT_IN_TRAP:
|
|
+ case BUILT_IN_SAVEREGS:
|
|
+ case BUILT_IN_POPCOUNTL:
|
|
+ case BUILT_IN_POPCOUNTLL:
|
|
+ case BUILT_IN_POPCOUNTIMAX:
|
|
+ case BUILT_IN_POPCOUNT:
|
|
+ case BUILT_IN_PARITYL:
|
|
+ case BUILT_IN_PARITYLL:
|
|
+ case BUILT_IN_PARITYIMAX:
|
|
+ case BUILT_IN_PARITY:
|
|
+ case BUILT_IN_LABS:
|
|
+ case BUILT_IN_LLABS:
|
|
+ case BUILT_IN_PREFETCH:
|
|
+ return true;
|
|
+
|
|
+ default:
|
|
+ return is_simple_builtin (decl);
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
|
|
=== modified file 'gcc/cfgloop.h'
|
|
--- old/gcc/cfgloop.h 2009-11-25 10:55:54 +0000
|
|
+++ new/gcc/cfgloop.h 2010-08-02 13:51:23 +0000
|
|
@@ -622,13 +622,14 @@
|
|
/* The properties of the target. */
|
|
|
|
extern unsigned target_avail_regs;
|
|
+extern unsigned target_clobbered_regs;
|
|
extern unsigned target_res_regs;
|
|
extern unsigned target_reg_cost [2];
|
|
extern unsigned target_spill_cost [2];
|
|
|
|
/* Register pressure estimation for induction variable optimizations & loop
|
|
invariant motion. */
|
|
-extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool);
|
|
+extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool);
|
|
extern void init_set_costs (void);
|
|
|
|
/* Loop optimizer initialization. */
|
|
|
|
=== modified file 'gcc/cfgloopanal.c'
|
|
--- old/gcc/cfgloopanal.c 2009-09-30 08:57:56 +0000
|
|
+++ new/gcc/cfgloopanal.c 2010-08-02 13:51:23 +0000
|
|
@@ -320,6 +320,8 @@
|
|
/* The properties of the target. */
|
|
|
|
unsigned target_avail_regs; /* Number of available registers. */
|
|
+unsigned target_clobbered_regs; /* Number of available registers that are
|
|
+ call-clobbered. */
|
|
unsigned target_res_regs; /* Number of registers reserved for temporary
|
|
expressions. */
|
|
unsigned target_reg_cost[2]; /* The cost for register when there still
|
|
@@ -342,10 +344,15 @@
|
|
unsigned i;
|
|
|
|
target_avail_regs = 0;
|
|
+ target_clobbered_regs = 0;
|
|
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
|
|
&& !fixed_regs[i])
|
|
- target_avail_regs++;
|
|
+ {
|
|
+ target_avail_regs++;
|
|
+ if (call_used_regs[i])
|
|
+ target_clobbered_regs++;
|
|
+ }
|
|
|
|
target_res_regs = 3;
|
|
|
|
@@ -379,20 +386,29 @@
|
|
|
|
/* Estimates cost of increased register pressure caused by making N_NEW new
|
|
registers live around the loop. N_OLD is the number of registers live
|
|
- around the loop. */
|
|
+ around the loop. If CALL_P is true, also take into account that
|
|
+ call-used registers may be clobbered in the loop body, reducing the
|
|
+ number of available registers before we spill. */
|
|
|
|
unsigned
|
|
-estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
|
|
+estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
|
|
+ bool call_p)
|
|
{
|
|
unsigned cost;
|
|
unsigned regs_needed = n_new + n_old;
|
|
+ unsigned available_regs = target_avail_regs;
|
|
+
|
|
+ /* If there is a call in the loop body, the call-clobbered registers
|
|
+ are not available for loop invariants. */
|
|
+ if (call_p)
|
|
+ available_regs = available_regs - target_clobbered_regs;
|
|
|
|
/* If we have enough registers, we should use them and not restrict
|
|
the transformations unnecessarily. */
|
|
- if (regs_needed + target_res_regs <= target_avail_regs)
|
|
+ if (regs_needed + target_res_regs <= available_regs)
|
|
return 0;
|
|
|
|
- if (regs_needed <= target_avail_regs)
|
|
+ if (regs_needed <= available_regs)
|
|
/* If we are close to running out of registers, try to preserve
|
|
them. */
|
|
cost = target_reg_cost [speed] * n_new;
|
|
|
|
=== modified file 'gcc/loop-invariant.c'
|
|
--- old/gcc/loop-invariant.c 2010-04-02 18:54:46 +0000
|
|
+++ new/gcc/loop-invariant.c 2010-08-02 13:51:23 +0000
|
|
@@ -1173,11 +1173,13 @@
|
|
/* Calculates gain for eliminating invariant INV. REGS_USED is the number
|
|
of registers used in the loop, NEW_REGS is the number of new variables
|
|
already added due to the invariant motion. The number of registers needed
|
|
- for it is stored in *REGS_NEEDED. */
|
|
+ for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed
|
|
+ through to estimate_reg_pressure_cost. */
|
|
|
|
static int
|
|
gain_for_invariant (struct invariant *inv, unsigned *regs_needed,
|
|
- unsigned *new_regs, unsigned regs_used, bool speed)
|
|
+ unsigned *new_regs, unsigned regs_used,
|
|
+ bool speed, bool call_p)
|
|
{
|
|
int comp_cost, size_cost;
|
|
|
|
@@ -1188,9 +1190,9 @@
|
|
if (! flag_ira_loop_pressure)
|
|
{
|
|
size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0],
|
|
- regs_used, speed)
|
|
+ regs_used, speed, call_p)
|
|
- estimate_reg_pressure_cost (new_regs[0],
|
|
- regs_used, speed));
|
|
+ regs_used, speed, call_p));
|
|
}
|
|
else
|
|
{
|
|
@@ -1245,7 +1247,8 @@
|
|
|
|
static int
|
|
best_gain_for_invariant (struct invariant **best, unsigned *regs_needed,
|
|
- unsigned *new_regs, unsigned regs_used, bool speed)
|
|
+ unsigned *new_regs, unsigned regs_used,
|
|
+ bool speed, bool call_p)
|
|
{
|
|
struct invariant *inv;
|
|
int i, gain = 0, again;
|
|
@@ -1261,7 +1264,7 @@
|
|
continue;
|
|
|
|
again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used,
|
|
- speed);
|
|
+ speed, call_p);
|
|
if (again > gain)
|
|
{
|
|
gain = again;
|
|
@@ -1314,7 +1317,7 @@
|
|
/* Determines which invariants to move. */
|
|
|
|
static void
|
|
-find_invariants_to_move (bool speed)
|
|
+find_invariants_to_move (bool speed, bool call_p)
|
|
{
|
|
int gain;
|
|
unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES];
|
|
@@ -1353,7 +1356,8 @@
|
|
new_regs[ira_reg_class_cover[i]] = 0;
|
|
}
|
|
while ((gain = best_gain_for_invariant (&inv, regs_needed,
|
|
- new_regs, regs_used, speed)) > 0)
|
|
+ new_regs, regs_used,
|
|
+ speed, call_p)) > 0)
|
|
{
|
|
set_move_mark (inv->invno, gain);
|
|
if (! flag_ira_loop_pressure)
|
|
@@ -1554,7 +1558,8 @@
|
|
init_inv_motion_data ();
|
|
|
|
find_invariants (loop);
|
|
- find_invariants_to_move (optimize_loop_for_speed_p (loop));
|
|
+ find_invariants_to_move (optimize_loop_for_speed_p (loop),
|
|
+ LOOP_DATA (loop)->has_call);
|
|
move_invariants (loop);
|
|
|
|
free_inv_motion_data ();
|
|
|
|
=== added file 'gcc/testsuite/gcc.target/arm/pr42505.c'
|
|
--- old/gcc/testsuite/gcc.target/arm/pr42505.c 1970-01-01 00:00:00 +0000
|
|
+++ new/gcc/testsuite/gcc.target/arm/pr42505.c 2010-08-02 13:51:23 +0000
|
|
@@ -0,0 +1,23 @@
|
|
+/* { dg-options "-mthumb -Os -march=armv5te" } */
|
|
+/* { dg-require-effective-target arm_thumb1_ok } */
|
|
+/* { dg-final { scan-assembler-not "str\[\\t \]*r.,\[\\t \]*.sp," } } */
|
|
+
|
|
+struct A {
|
|
+ int f1;
|
|
+ int f2;
|
|
+};
|
|
+
|
|
+int func(int c);
|
|
+
|
|
+/* This function should not need to spill anything to the stack. */
|
|
+int test(struct A* src, struct A* dst, int count)
|
|
+{
|
|
+ while (count--) {
|
|
+ if (!func(src->f2)) {
|
|
+ return 0;
|
|
+ }
|
|
+ *dst++ = *src++;
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
|
|
=== modified file 'gcc/tree-inline.c'
|
|
--- old/gcc/tree-inline.c 2010-03-18 20:07:13 +0000
|
|
+++ new/gcc/tree-inline.c 2010-08-02 13:51:23 +0000
|
|
@@ -3246,34 +3246,13 @@
|
|
if (POINTER_TYPE_P (funtype))
|
|
funtype = TREE_TYPE (funtype);
|
|
|
|
- if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD)
|
|
+ if (is_simple_builtin (decl))
|
|
+ return 0;
|
|
+ else if (is_inexpensive_builtin (decl))
|
|
cost = weights->target_builtin_call_cost;
|
|
else
|
|
cost = weights->call_cost;
|
|
|
|
- if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
|
|
- switch (DECL_FUNCTION_CODE (decl))
|
|
- {
|
|
- case BUILT_IN_CONSTANT_P:
|
|
- return 0;
|
|
- case BUILT_IN_EXPECT:
|
|
- return 0;
|
|
-
|
|
- /* Prefetch instruction is not expensive. */
|
|
- case BUILT_IN_PREFETCH:
|
|
- cost = weights->target_builtin_call_cost;
|
|
- break;
|
|
-
|
|
- /* Exception state returns or moves registers around. */
|
|
- case BUILT_IN_EH_FILTER:
|
|
- case BUILT_IN_EH_POINTER:
|
|
- case BUILT_IN_EH_COPY_VALUES:
|
|
- return 0;
|
|
-
|
|
- default:
|
|
- break;
|
|
- }
|
|
-
|
|
if (decl)
|
|
funtype = TREE_TYPE (decl);
|
|
|
|
|
|
=== modified file 'gcc/tree-ssa-loop-ivopts.c'
|
|
--- old/gcc/tree-ssa-loop-ivopts.c 2010-04-01 15:18:07 +0000
|
|
+++ new/gcc/tree-ssa-loop-ivopts.c 2010-08-02 13:51:23 +0000
|
|
@@ -257,6 +257,9 @@
|
|
|
|
/* Are we optimizing for speed? */
|
|
bool speed;
|
|
+
|
|
+ /* Whether the loop body includes any function calls. */
|
|
+ bool body_includes_call;
|
|
};
|
|
|
|
/* An assignment of iv candidates to uses. */
|
|
@@ -2926,6 +2929,20 @@
|
|
return get_computation_at (loop, use, cand, use->stmt);
|
|
}
|
|
|
|
+/* Adjust the cost COST for being in loop setup rather than loop body.
|
|
+ If we're optimizing for space, the loop setup overhead is constant;
|
|
+ if we're optimizing for speed, amortize it over the per-iteration cost. */
|
|
+static unsigned
|
|
+adjust_setup_cost (struct ivopts_data *data, unsigned cost)
|
|
+{
|
|
+ if (cost == INFTY)
|
|
+ return cost;
|
|
+ else if (optimize_loop_for_speed_p (data->current_loop))
|
|
+ return cost / AVG_LOOP_NITER (data->current_loop);
|
|
+ else
|
|
+ return cost;
|
|
+}
|
|
+
|
|
/* Returns cost of addition in MODE. */
|
|
|
|
static unsigned
|
|
@@ -3838,8 +3855,8 @@
|
|
/* Symbol + offset should be compile-time computable so consider that they
|
|
are added once to the variable, if present. */
|
|
if (var_present && (symbol_present || offset))
|
|
- cost.cost += add_cost (TYPE_MODE (ctype), speed)
|
|
- / AVG_LOOP_NITER (data->current_loop);
|
|
+ cost.cost += adjust_setup_cost (data,
|
|
+ add_cost (TYPE_MODE (ctype), speed));
|
|
|
|
/* Having offset does not affect runtime cost in case it is added to
|
|
symbol, but it increases complexity. */
|
|
@@ -4104,7 +4121,7 @@
|
|
elim_cost = force_var_cost (data, bound, &depends_on_elim);
|
|
/* The bound is a loop invariant, so it will be only computed
|
|
once. */
|
|
- elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
|
|
+ elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
|
|
}
|
|
else
|
|
elim_cost = infinite_cost;
|
|
@@ -4351,7 +4368,7 @@
|
|
cost_base = force_var_cost (data, base, NULL);
|
|
cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
|
|
|
|
- cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
|
|
+ cost = cost_step + adjust_setup_cost (data, cost_base.cost);
|
|
|
|
/* Prefer the original ivs unless we may gain something by replacing it.
|
|
The reason is to make debugging simpler; so this is not relevant for
|
|
@@ -4404,7 +4421,8 @@
|
|
{
|
|
/* We add size to the cost, so that we prefer eliminating ivs
|
|
if possible. */
|
|
- return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
|
|
+ return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
|
|
+ data->body_includes_call);
|
|
}
|
|
|
|
/* For each size of the induction variable set determine the penalty. */
|
|
@@ -4419,30 +4437,11 @@
|
|
struct loop *loop = data->current_loop;
|
|
bitmap_iterator bi;
|
|
|
|
- /* We use the following model (definitely improvable, especially the
|
|
- cost function -- TODO):
|
|
-
|
|
- We estimate the number of registers available (using MD data), name it A.
|
|
-
|
|
- We estimate the number of registers used by the loop, name it U. This
|
|
- number is obtained as the number of loop phi nodes (not counting virtual
|
|
- registers and bivs) + the number of variables from outside of the loop.
|
|
-
|
|
- We set a reserve R (free regs that are used for temporary computations,
|
|
- etc.). For now the reserve is a constant 3.
|
|
-
|
|
- Let I be the number of induction variables.
|
|
-
|
|
- -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
|
|
- make a lot of ivs without a reason).
|
|
- -- if A - R < U + I <= A, the cost is I * PRES_COST
|
|
- -- if U + I > A, the cost is I * PRES_COST and
|
|
- number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
|
|
-
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Global costs:\n");
|
|
fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
|
|
+ fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
|
|
fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
|
|
fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
|
|
}
|
|
@@ -5062,11 +5061,13 @@
|
|
}
|
|
|
|
/* Tries to extend the sets IVS in the best possible way in order
|
|
- to express the USE. */
|
|
+ to express the USE. If ORIGINALP is true, prefer candidates from
|
|
+ the original set of IVs, otherwise favor important candidates not
|
|
+ based on any memory object. */
|
|
|
|
static bool
|
|
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
|
|
- struct iv_use *use)
|
|
+ struct iv_use *use, bool originalp)
|
|
{
|
|
comp_cost best_cost, act_cost;
|
|
unsigned i;
|
|
@@ -5085,7 +5086,8 @@
|
|
iv_ca_set_no_cp (data, ivs, use);
|
|
}
|
|
|
|
- /* First try important candidates not based on any memory object. Only if
|
|
+ /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
|
|
+ first try important candidates not based on any memory object. Only if
|
|
this fails, try the specific ones. Rationale -- in loops with many
|
|
variables the best choice often is to use just one generic biv. If we
|
|
added here many ivs specific to the uses, the optimization algorithm later
|
|
@@ -5097,7 +5099,10 @@
|
|
{
|
|
cand = iv_cand (data, i);
|
|
|
|
- if (cand->iv->base_object != NULL_TREE)
|
|
+ if (originalp && cand->pos !=IP_ORIGINAL)
|
|
+ continue;
|
|
+
|
|
+ if (!originalp && cand->iv->base_object != NULL_TREE)
|
|
continue;
|
|
|
|
if (iv_ca_cand_used_p (ivs, cand))
|
|
@@ -5133,8 +5138,13 @@
|
|
continue;
|
|
|
|
/* Already tried this. */
|
|
- if (cand->important && cand->iv->base_object == NULL_TREE)
|
|
- continue;
|
|
+ if (cand->important)
|
|
+ {
|
|
+ if (originalp && cand->pos == IP_ORIGINAL)
|
|
+ continue;
|
|
+ if (!originalp && cand->iv->base_object == NULL_TREE)
|
|
+ continue;
|
|
+ }
|
|
|
|
if (iv_ca_cand_used_p (ivs, cand))
|
|
continue;
|
|
@@ -5168,13 +5178,13 @@
|
|
/* Finds an initial assignment of candidates to uses. */
|
|
|
|
static struct iv_ca *
|
|
-get_initial_solution (struct ivopts_data *data)
|
|
+get_initial_solution (struct ivopts_data *data, bool originalp)
|
|
{
|
|
struct iv_ca *ivs = iv_ca_new (data);
|
|
unsigned i;
|
|
|
|
for (i = 0; i < n_iv_uses (data); i++)
|
|
- if (!try_add_cand_for (data, ivs, iv_use (data, i)))
|
|
+ if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
|
|
{
|
|
iv_ca_free (&ivs);
|
|
return NULL;
|
|
@@ -5246,14 +5256,12 @@
|
|
solution and remove the unused ivs while this improves the cost. */
|
|
|
|
static struct iv_ca *
|
|
-find_optimal_iv_set (struct ivopts_data *data)
|
|
+find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
|
|
{
|
|
- unsigned i;
|
|
struct iv_ca *set;
|
|
- struct iv_use *use;
|
|
|
|
/* Get the initial solution. */
|
|
- set = get_initial_solution (data);
|
|
+ set = get_initial_solution (data, originalp);
|
|
if (!set)
|
|
{
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
@@ -5276,11 +5284,46 @@
|
|
}
|
|
}
|
|
|
|
+ return set;
|
|
+}
|
|
+
|
|
+static struct iv_ca *
|
|
+find_optimal_iv_set (struct ivopts_data *data)
|
|
+{
|
|
+ unsigned i;
|
|
+ struct iv_ca *set, *origset;
|
|
+ struct iv_use *use;
|
|
+ comp_cost cost, origcost;
|
|
+
|
|
+ /* Determine the cost based on a strategy that starts with original IVs,
|
|
+ and try again using a strategy that prefers candidates not based
|
|
+ on any IVs. */
|
|
+ origset = find_optimal_iv_set_1 (data, true);
|
|
+ set = find_optimal_iv_set_1 (data, false);
|
|
+
|
|
+ if (!origset && !set)
|
|
+ return NULL;
|
|
+
|
|
+ origcost = origset ? iv_ca_cost (origset) : infinite_cost;
|
|
+ cost = set ? iv_ca_cost (set) : infinite_cost;
|
|
+
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
- comp_cost cost = iv_ca_cost (set);
|
|
- fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
|
|
- }
|
|
+ fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
|
|
+ origcost.cost, origcost.complexity);
|
|
+ fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
|
|
+ cost.cost, cost.complexity);
|
|
+ }
|
|
+
|
|
+ /* Choose the one with the best cost. */
|
|
+ if (compare_costs (origcost, cost) <= 0)
|
|
+ {
|
|
+ if (set)
|
|
+ iv_ca_free (&set);
|
|
+ set = origset;
|
|
+ }
|
|
+ else if (origset)
|
|
+ iv_ca_free (&origset);
|
|
|
|
for (i = 0; i < n_iv_uses (data); i++)
|
|
{
|
|
@@ -5768,6 +5811,25 @@
|
|
VEC_free (iv_cand_p, heap, data->iv_candidates);
|
|
}
|
|
|
|
+/* Returns true if the loop body BODY includes any function calls. */
|
|
+
|
|
+static bool
|
|
+loop_body_includes_call (basic_block *body, unsigned num_nodes)
|
|
+{
|
|
+ gimple_stmt_iterator gsi;
|
|
+ unsigned i;
|
|
+
|
|
+ for (i = 0; i < num_nodes; i++)
|
|
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
+ {
|
|
+ gimple stmt = gsi_stmt (gsi);
|
|
+ if (is_gimple_call (stmt)
|
|
+ && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
/* Optimizes the LOOP. Returns true if anything changed. */
|
|
|
|
static bool
|
|
@@ -5799,6 +5861,7 @@
|
|
}
|
|
|
|
body = get_loop_body (loop);
|
|
+ data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
|
|
renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
|
|
free (body);
|
|
|
|
|
|
=== modified file 'gcc/tree.h'
|
|
--- old/gcc/tree.h 2010-04-02 18:54:46 +0000
|
|
+++ new/gcc/tree.h 2010-08-02 13:51:23 +0000
|
|
@@ -4962,6 +4962,8 @@
|
|
extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
|
|
tree, tree);
|
|
extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
|
|
+extern bool is_simple_builtin (tree);
|
|
+extern bool is_inexpensive_builtin (tree);
|
|
|
|
/* In convert.c */
|
|
extern tree strip_float_extensions (tree);
|
|
|