diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c80364005d1ac03d8c87f4e709059563639756dd..adc47bfa310b63593f892d26fd9e08945cbd9e22 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2003-03-04  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
+
+	* function.c (STACK_ALIGNMENT_NEEDED): New macro.  Default to 1.
+	(assign_stack_local_1): Perform overall stack alignment only when
+	STACK_ALIGNMENT_NEEDED is non-zero.
+	* doc/tm.texi (STACK_ALIGNMENT_NEEDED): Document.
+
+	* pa.c (compute_frame_size): Rename fsize to size.  Account for
+	alignment to a word boundary before general register save block.  Only
+	account for double-word alignment before floating point register save
+	block if one or more are saved.  Don't allocate space for %r3 when
+	frame pointer is needed.
+	(hppa_expand_prologue): Include alignment to word boundary in local
+	frame size.
+	* pa.h (STARTING_FRAME_OFFSET): Define to 8 on both 32 and 64-bit ports.
+	(STACK_ALIGNMENT_NEEDED): Define.
+
 2003-03-04  Kevin Buettner  <kevinb@redhat.com>
 
 	* dwarf2out.c (rtl_for_decl_location): Don't return NULL_RTX for
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 27b218c72e0137586bad0e870e0367017d79e800..0f1b894f89564f4b238bd26dd62020edd9a850e2 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -3195,15 +3195,24 @@ compute_frame_size (size, fregs_live)
      int size;
      int *fregs_live;
 {
-  int i, fsize;
-
-  /* Space for frame pointer + filler. If any frame is allocated
-     we need to add this in because of STARTING_FRAME_OFFSET.
-
-     Similar code also appears in hppa_expand_prologue.  Change both
-     of them at the same time.  */
-  fsize = size + (size || frame_pointer_needed ? STARTING_FRAME_OFFSET : 0);
-
+  int freg_saved = 0;
+  int i, j;
+
+  /* The code in hppa_expand_prologue and hppa_expand_epilogue must
+     be consistent with the rounding and size calculation done here.
+     Change them at the same time.  */
+
+  /* We do our own stack alignment.  First, round the size of the
+     stack locals up to a word boundary.  */
+  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
+
+  /* Space for previous frame pointer + filler.  If any frame is
+     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
+     waste some space here for the sake of HP compatibility.  The
+     first slot is only used when the frame pointer is needed.  */
+  if (size || frame_pointer_needed)
+    size += STARTING_FRAME_OFFSET;
+  
   /* If the current function calls __builtin_eh_return, then we need
      to allocate stack space for registers that will hold data for
      the exception handler.  */
@@ -3213,41 +3222,49 @@ compute_frame_size (size, fregs_live)
 
       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
 	continue;
-      fsize += i * UNITS_PER_WORD;
+      size += i * UNITS_PER_WORD;
     }
 
   /* Account for space used by the callee general register saves.  */
-  for (i = 18; i >= 3; i--)
+  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
     if (regs_ever_live[i])
-      fsize += UNITS_PER_WORD;
-
-  /* Round the stack.  */
-  fsize = (fsize + 7) & ~7;
+      size += UNITS_PER_WORD;
 
   /* Account for space used by the callee floating point register saves.  */
   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
     if (regs_ever_live[i]
-	|| (! TARGET_64BIT && regs_ever_live[i + 1]))
+	|| (!TARGET_64BIT && regs_ever_live[i + 1]))
       {
-	if (fregs_live)
-	  *fregs_live = 1;
+	freg_saved = 1;
 
 	/* We always save both halves of the FP register, so always
 	   increment the frame size by 8 bytes.  */
-	fsize += 8;
+	size += 8;
       }
 
+  /* If any of the floating registers are saved, account for the
+     alignment needed for the floating point register save block.  */
+  if (freg_saved)
+    {
+      size = (size + 7) & ~7;
+      if (fregs_live)
+	*fregs_live = 1;
+    }
+
   /* The various ABIs include space for the outgoing parameters in the
-     size of the current function's stack frame.  */
-  fsize += current_function_outgoing_args_size;
+     size of the current function's stack frame.  We don't need to align
+     for the outgoing arguments as their alignment is set by the final
+     rounding for the frame as a whole.  */
+  size += current_function_outgoing_args_size;
 
   /* Allocate space for the fixed frame marker.  This space must be
      allocated for any function that makes calls or otherwise allocates
      stack space.  */
-  if (!current_function_is_leaf || fsize)
-    fsize += TARGET_64BIT ? 16 : 32;
+  if (!current_function_is_leaf || size)
+    size += TARGET_64BIT ? 16 : 32;
 
-  return ((fsize + PREFERRED_STACK_BOUNDARY / 8 - 1)
+  /* Finally, round to the preferred stack boundary.  */
+  return ((size + PREFERRED_STACK_BOUNDARY / 8 - 1)
 	  & ~(PREFERRED_STACK_BOUNDARY / 8 - 1));
 }
 
@@ -3313,8 +3330,8 @@ pa_output_function_prologue (file, size)
 void
 hppa_expand_prologue ()
 {
-  int size = get_frame_size ();
   int merge_sp_adjust_with_store = 0;
+  int size = get_frame_size ();
   int i, offset;
   rtx insn, tmpreg;
 
@@ -3322,13 +3339,12 @@ hppa_expand_prologue ()
   fr_saved = 0;
   save_fregs = 0;
 
-  /* Allocate space for frame pointer + filler. If any frame is allocated
-     we need to add this in because of STARTING_FRAME_OFFSET.
-
-     Similar code also appears in compute_frame_size.  Change both
-     of them at the same time.  */
-  local_fsize = size + (size || frame_pointer_needed
-			? STARTING_FRAME_OFFSET : 0);
+  /* Compute total size for frame pointer, filler, locals and rounding to
+     the next word boundary.  Similar code appears in compute_frame_size
+     and must be changed in tandem with this code.  */
+  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
+  if (local_fsize || frame_pointer_needed)
+    local_fsize += STARTING_FRAME_OFFSET;
 
   actual_fsize = compute_frame_size (size, &save_fregs);
 
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 0b055e9b16b649e1c02b1111a5d1b71674b2a411..ba533ff0d4d6b876a4ebde8fe38870a63dd704d7 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -676,10 +676,17 @@ extern struct rtx_def *hppa_pic_save_rtx PARAMS ((void));
 /* Offset within stack frame to start allocating local variables at.
    If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
    first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  The start of the locals must lie on
-   a STACK_BOUNDARY or else the frame size of leaf functions will not
-   be zero.  */
-#define STARTING_FRAME_OFFSET (TARGET_64BIT ? 16 : 8)
+   of the first local allocated.
+
+   On the 32-bit ports, we reserve one slot for the previous frame
+   pointer and one fill slot.  The fill slot is for compatibility
+   with HP compiled programs.  On the 64-bit ports, we reserve one
+   slot for the previous frame pointer.  */
+#define STARTING_FRAME_OFFSET 8
+
+/* Define STACK_ALIGNMENT_NEEDED to zero to disable final alignment
+   of the stack.  The default is to align it to STACK_BOUNDARY.  */
+#define STACK_ALIGNMENT_NEEDED 0
 
 /* If we generate an insn to push BYTES bytes,
    this says how many the stack pointer really advances by.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a3c68a2923a783b3f4e3f5294908d1eb973beda5..2d20c4f568cd471289bf7ff9fdb2c9e9dd88a5ca 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -2816,6 +2816,16 @@ value @code{STARTING_FRAME_OFFSET}.
 @c i'm not sure if the above is still correct.. had to change it to get
 @c rid of an overfull.  --mew 2feb93
 
+@findex STACK_ALIGNMENT_NEEDED
+@item STACK_ALIGNMENT_NEEDED
+Define to zero to disable final alignment of the stack during reload.
+The non-zero default for this macro is suitable for most ports.
+
+On ports where @code{STARTING_FRAME_OFFSET} is non-zero or where there
+is a register save block following the local block that doesn't require
+alignment to @code{STACK_BOUNDARY}, it may be beneficial to disable
+stack alignment and do it in the backend.
+
 @findex STACK_POINTER_OFFSET
 @item STACK_POINTER_OFFSET
 Offset from the stack pointer register to the first location at which
diff --git a/gcc/function.c b/gcc/function.c
index 851dd246ee98402461321184c124fe5c69b531de..73527bc037e453981bf59144b33ba7969987d55d 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -70,6 +70,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #define LOCAL_ALIGNMENT(TYPE, ALIGNMENT) ALIGNMENT
 #endif
 
+#ifndef STACK_ALIGNMENT_NEEDED
+#define STACK_ALIGNMENT_NEEDED 1
+#endif
+
 /* Some systems use __main in a way incompatible with its use in gcc, in these
    cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
    give the same symbol without quotes for an alternative entry point.  You
@@ -566,16 +570,27 @@ assign_stack_local_1 (mode, size, align, function)
   frame_off = STARTING_FRAME_OFFSET % frame_alignment;
   frame_phase = frame_off ? frame_alignment - frame_off : 0;
 
-  /* Round frame offset to that alignment.
-     We must be careful here, since FRAME_OFFSET might be negative and
-     division with a negative dividend isn't as well defined as we might
-     like.  So we instead assume that ALIGNMENT is a power of two and
-     use logical operations which are unambiguous.  */
+  /* Round the frame offset to the specified alignment.  The default is
+     to always honor requests to align the stack but a port may choose to
+     do its own stack alignment by defining STACK_ALIGNMENT_NEEDED.  */
+  if (STACK_ALIGNMENT_NEEDED
+      || mode != BLKmode
+      || size != 0)
+    {
+      /*  We must be careful here, since FRAME_OFFSET might be negative and
+	  division with a negative dividend isn't as well defined as we might
+	  like.  So we instead assume that ALIGNMENT is a power of two and
+	  use logical operations which are unambiguous.  */
 #ifdef FRAME_GROWS_DOWNWARD
-  function->x_frame_offset = FLOOR_ROUND (function->x_frame_offset - frame_phase, alignment) + frame_phase;
+      function->x_frame_offset
+	= (FLOOR_ROUND (function->x_frame_offset - frame_phase, alignment)
+	   + frame_phase);
 #else
-  function->x_frame_offset = CEIL_ROUND (function->x_frame_offset - frame_phase, alignment) + frame_phase;
+      function->x_frame_offset
+	= (CEIL_ROUND (function->x_frame_offset - frame_phase, alignment)
+	   + frame_phase);
 #endif
+    }
 
   /* On a big-endian machine, if we are allocating more space than we will use,
      use the least significant bytes of those that are allocated.  */