From 4d9f13e4ea0068d4912b430f9f6a22cf5ca18211 Mon Sep 17 00:00:00 2001
From: nemet <nemet@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Sun, 24 Sep 2006 19:03:57 +0000
Subject: [PATCH] 	* tree-ssa-loop-ivopts.c (aff_combination_convert, 
 determine_common_wider_type): New functions. 	(get_computation_aff): Use
 them to simplify arithmetic between 	UBASE and CBASE if they are shortened
 from the same type.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@117182 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog              |   8 +++
 gcc/tree-ssa-loop-ivopts.c | 105 ++++++++++++++++++++++++++++++++++---
 2 files changed, 105 insertions(+), 8 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 66332b1314ad..245b081ee9f7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2006-09-24  Zdenek Dvorak <dvorakz@suse.cz>
+	    Adam Nemet  <anemet@caviumnetworks.com>
+
+	* tree-ssa-loop-ivopts.c (aff_combination_convert,
+	determine_common_wider_type): New functions.
+	(get_computation_aff): Use them to simplify arithmetic between
+	UBASE and CBASE if they are shortened from the same type.
+
 2006-09-24  Kazu Hirata  <kazu@codesourcery.com>
 
 	PR target/28911
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index ae1905d05ded..6029923ae26a 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2762,6 +2762,37 @@ aff_combination_add (struct affine_tree_combination *comb1,
     aff_combination_add_elt (comb1, comb2->rest, 1);
 }
 
+/* Convert COMB to TYPE.  */
+
+static void
+aff_combination_convert (tree type, struct affine_tree_combination *comb)
+{
+  unsigned prec = TYPE_PRECISION (type);
+  unsigned i;
+
+  /* If the precision of both types is the same, it suffices to change the type
+     of the whole combination -- the elements are allowed to have another type
+     equivalent wrto STRIP_NOPS.  */
+  if (prec == TYPE_PRECISION (comb->type))
+    {
+      comb->type = type;
+      return;
+    }
+
+  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
+  comb->offset = comb->offset & comb->mask;
+
+  /* The type of the elements can be different from comb->type only as
+     much as what STRIP_NOPS would remove.  We can just directly cast
+     to TYPE.  */
+  for (i = 0; i < comb->n; i++)
+    comb->elts[i] = fold_convert (type, comb->elts[i]);
+  if (comb->rest)
+    comb->rest = fold_convert (type, comb->rest);
+
+  comb->type = type;
+}
+
 /* Splits EXPR into an affine combination of parts.  */
 
 static void
@@ -2951,6 +2982,44 @@ fold_affine_expr (tree expr)
   return aff_combination_to_tree (&comb);
 }
 
+/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
+   same precision that is at least as wide as the precision of TYPE, stores
+   BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
+   type of A and B.  */
+
+static tree
+determine_common_wider_type (tree *a, tree *b)
+{
+  tree wider_type = NULL;
+  tree suba, subb;
+  tree atype = TREE_TYPE (*a);
+
+  if ((TREE_CODE (*a) == NOP_EXPR
+       || TREE_CODE (*a) == CONVERT_EXPR))
+    {
+      suba = TREE_OPERAND (*a, 0);
+      wider_type = TREE_TYPE (suba);
+      if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
+	return atype;
+    }
+  else
+    return atype;
+
+  if ((TREE_CODE (*b) == NOP_EXPR
+       || TREE_CODE (*b) == CONVERT_EXPR))
+    {
+      subb = TREE_OPERAND (*b, 0);
+      if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
+	return atype;
+    }
+  else
+    return atype;
+
+  *a = suba;
+  *b = subb;
+  return wider_type;
+}
+
 /* Determines the expression by that USE is expressed from induction variable
    CAND at statement AT in LOOP.  The expression is stored in a decomposed
    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
@@ -2965,6 +3034,7 @@ get_computation_aff (struct loop *loop,
   tree cbase = cand->iv->base;
   tree cstep = cand->iv->step;
   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
+  tree common_type;
   tree uutype;
   tree expr, delta;
   tree ratio;
@@ -3040,9 +3110,20 @@ get_computation_aff (struct loop *loop,
 	ratioi = 0;
     }
 
+  /* In case both UBASE and CBASE are shortened to UUTYPE from some common
+     type, we achieve better folding by computing their difference in this
+     wider type, and cast the result to UUTYPE.  We do not need to worry about
+     overflows, as all the arithmetics will in the end be performed in UUTYPE
+     anyway.  */
+  common_type = determine_common_wider_type (&ubase, &cbase);
+
   /* We may need to shift the value if we are after the increment.  */
   if (stmt_after_increment (loop, cand, at))
-    cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
+    {
+      if (uutype != common_type)
+	cstep = fold_convert (common_type, cstep);
+      cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
+    }
 
   /* use = ubase - ratio * cbase + ratio * var.
 
@@ -3052,7 +3133,7 @@ get_computation_aff (struct loop *loop,
      happen, fold is able to apply the distributive law to obtain this form
      anyway.  */
 
-  if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
+  if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
     {
       /* Let's compute in trees and just return the result in AFF.  This case
 	 should not be very common, and fold itself is not that bad either,
@@ -3060,18 +3141,24 @@ get_computation_aff (struct loop *loop,
 	 is not that urgent.  */
       if (ratioi == 1)
 	{
-	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
+	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
+	  if (uutype != common_type)
+	    delta = fold_convert (uutype, delta);
 	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
 	}
       else if (ratioi == -1)
 	{
-	  delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
+	  delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
+	  if (uutype != common_type)
+	    delta = fold_convert (uutype, delta);
 	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
 	}
       else
 	{
-	  delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
-	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
+	  delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
+	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
+	  if (uutype != common_type)
+	    delta = fold_convert (uutype, delta);
 	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
 	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
 	}
@@ -3088,12 +3175,14 @@ get_computation_aff (struct loop *loop,
      possible to compute ratioi.  */
   gcc_assert (ratioi);
 
-  tree_to_aff_combination (ubase, uutype, aff);
-  tree_to_aff_combination (cbase, uutype, &cbase_aff);
+  tree_to_aff_combination (ubase, common_type, aff);
+  tree_to_aff_combination (cbase, common_type, &cbase_aff);
   tree_to_aff_combination (expr, uutype, &expr_aff);
   aff_combination_scale (&cbase_aff, -ratioi);
   aff_combination_scale (&expr_aff, ratioi);
   aff_combination_add (aff, &cbase_aff);
+  if (common_type != uutype)
+    aff_combination_convert (uutype, aff);
   aff_combination_add (aff, &expr_aff);
 
   return true;
-- 
GitLab