diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d54214d00aad2f4bea7ed43f229ea5deb7c29944..9d55846489c4b5c5ae28eee203b90028f3db674f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+Sat Apr  1 02:05:29 MET DST 2000  Jan Hubicka  <jh@suse.cz>
+
+	* builtins.c  (expand_builtin_apply):  Pass proper parameters to
+	allocate_dynamic_stack_space.
+	* calls.c (emit_call_1):  Do not adjust stack pointer for SIB,
+	update stack_pointer_delta; do not update arg_size_so_far.
+	(compute_argument_block_size): Use stack_delta instead of
+	stack_pointer_pending and arg_size_so_far.
+	(expand_call): Add sanity checking for stack_pointer_delta;
+	save and restore stack_pointer_delta for SIB, use
+	stack_pointer_delta for alignment; do not update arg_space_so_far.
+	(emit_library_call_value): Use stack_pointer_delta for alignment.
+	(store_one_arg): Do not update arg_space_so_far.
+	* explow.c (adjust_stack, anti_adjust_stack): Update
+	stack_pointer_delta.
+	(allocate_dynamic_stack_space): Add sanity checking for
+	stack_pointer_delta.
+	* expr.c (init_expr, clear_pending_stack_adjust): Clear
+	stack_pointer_delta.
+	(emit_push_insn): Update stack_pointer_delta.
+	* function.h (struct expr_status): Add x_stack_pointer_delta;
+	remove x_arg_space_so_far.
+	(arg_space_so_far): Remove.
+	(stack_pointer_delta): New macro.
+
 2000-03-31  Zack Weinberg  <zack@wolery.cumb.org>
 
 	* cpplib.h: Merge struct cpp_options into struct cpp_reader.
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 63b17f702563bd0f7a37d056166846aefe829264..3540fccda121647c360c78323c3c8b1ddf040778 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -871,8 +871,6 @@ expand_builtin_apply (function, arguments, argsize)
   /* Create a block where the return registers can be saved.  */
   result = assign_stack_local (BLKmode, apply_result_size (), -1);
 
-  /* ??? The argsize value should be adjusted here.  */
-
   /* Fetch the arg pointer from the ARGUMENTS block.  */
   incoming_args = gen_reg_rtx (Pmode);
   emit_move_insn (incoming_args,
@@ -901,7 +899,7 @@ expand_builtin_apply (function, arguments, argsize)
      haven't figured out how the calling convention macros effect this,
      but it's likely that the source and/or destination addresses in
      the block copy will need updating in machine specific ways.  */
-  dest = allocate_dynamic_stack_space (argsize, 0, 0);
+  dest = allocate_dynamic_stack_space (argsize, 0, BITS_PER_UNIT);
   emit_block_move (gen_rtx_MEM (BLKmode, dest),
 		   gen_rtx_MEM (BLKmode, incoming_args),
 		   argsize, PARM_BOUNDARY);
diff --git a/gcc/calls.c b/gcc/calls.c
index 0148391a6c07e2843c734beafd74a5887cd6ef7c..ca4affddada9c827e6aabb8fe5f1395ef3cccbfc 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -577,6 +577,7 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
 			       CALL_INSN_FUNCTION_USAGE (call_insn));
       rounded_stack_size -= n_popped;
       rounded_stack_size_rtx = GEN_INT (rounded_stack_size);
+      stack_pointer_delta -= n_popped;
     }
 
   if (!ACCUMULATE_OUTGOING_ARGS)
@@ -588,10 +589,6 @@ emit_call_1 (funexp, fndecl, funtype, stack_size, rounded_stack_size,
 	 If returning from the subroutine does pop the args, indicate that the
 	 stack pointer will be changed.  */
 
-      /* The space for the args is no longer waiting for the call; either it
-	 was popped by the call, or it'll be popped below.  */
-      arg_space_so_far -= rounded_stack_size - n_popped;
-
       if (rounded_stack_size != 0)
 	{
 	  if (flag_defer_pop && inhibit_defer_pop == 0
@@ -1305,7 +1302,14 @@ compute_argument_block_size (reg_parm_stack_space, args_size,
 #ifdef PREFERRED_STACK_BOUNDARY
       preferred_stack_boundary /= BITS_PER_UNIT;
       if (preferred_stack_boundary > 1)
-	args_size->var = round_up (args_size->var, preferred_stack_boundary);
+	{
+	  /* We don't handle this case yet.  To handle it correctly we have
+	     to add the delta, round and substract the delta.  
+	     Currently no machine description requires this support.  */
+	  if (stack_pointer_delta & (preferred_stack_boundary - 1))
+	    abort();
+	  args_size->var = round_up (args_size->var, preferred_stack_boundary);
+	}
 #endif
 
       if (reg_parm_stack_space > 0)
@@ -1330,13 +1334,11 @@ compute_argument_block_size (reg_parm_stack_space, args_size,
       if (preferred_stack_boundary < 1)
 	preferred_stack_boundary = 1;
       args_size->constant = (((args_size->constant
-			       + arg_space_so_far
-			       + pending_stack_adjust
+			       + stack_pointer_delta
 			       + preferred_stack_boundary - 1)
 			      / preferred_stack_boundary
 			      * preferred_stack_boundary)
-			     - arg_space_so_far
-			     - pending_stack_adjust);
+			     - stack_pointer_delta);
 #endif
 
       args_size->constant = MAX (args_size->constant,
@@ -1813,6 +1815,7 @@ expand_call (exp, target, ignore)
   rtx old_stack_level = 0;
   int old_pending_adj = 0;
   int old_inhibit_defer_pop = inhibit_defer_pop;
+  int old_stack_allocated;
   rtx call_fusage;
   register tree p;
   register int i;
@@ -2118,6 +2121,7 @@ expand_call (exp, target, ignore)
 	 recursion call can be ignored if we indeed use the tail recursion
 	 call expansion.  */
       int save_pending_stack_adjust = pending_stack_adjust;
+      int save_stack_pointer_delta = stack_pointer_delta;
 
       /* Use a new sequence to hold any RTL we generate.  We do not even
 	 know if we will use this RTL yet.  The final decision can not be
@@ -2135,6 +2139,7 @@ expand_call (exp, target, ignore)
       /* Restore the original pending stack adjustment for the sibling and
 	 normal call cases below.  */
       pending_stack_adjust = save_pending_stack_adjust;
+      stack_pointer_delta = save_stack_pointer_delta;
     }
 
   function_call_count++;
@@ -2180,6 +2185,7 @@ expand_call (exp, target, ignore)
 	 recursion call can be ignored if we indeed use the tail recursion
 	 call expansion.  */
       int save_pending_stack_adjust;
+      int save_stack_pointer_delta;
       rtx insns;
       rtx before_call, next_arg_reg;
 
@@ -2217,6 +2223,7 @@ expand_call (exp, target, ignore)
 	  /* State variables we need to save and restore between
 	     iterations.  */
 	  save_pending_stack_adjust = pending_stack_adjust;
+	  save_stack_pointer_delta = stack_pointer_delta;
 	}
 
       /* Other state variables that we must reinitialize each time
@@ -2411,6 +2418,8 @@ expand_call (exp, target, ignore)
       if (is_const || is_malloc)
 	start_sequence ();
 
+      old_stack_allocated =  stack_pointer_delta - pending_stack_adjust;
+
       /* If we have no actual push instructions, or shouldn't use them,
 	 make space for all args right now.  */
 
@@ -2592,22 +2601,21 @@ expand_call (exp, target, ignore)
 	  if (pending_stack_adjust && ! is_const
 	      && ! inhibit_defer_pop)
 	    {
+	      int adjust;
 	      args_size.constant = (unadjusted_args_size
 				    + ((pending_stack_adjust
 					+ args_size.constant
-					+ arg_space_so_far
 					- unadjusted_args_size)
 				       % (preferred_stack_boundary
 					  / BITS_PER_UNIT)));
-	      pending_stack_adjust -= (args_size.constant
-				       - unadjusted_args_size);
-	      do_pending_stack_adjust ();
+	      adjust = (pending_stack_adjust - args_size.constant
+		        + unadjusted_args_size);
+	      adjust_stack (GEN_INT (adjust));
+	      pending_stack_adjust = 0;
 	    }
 	  else if (argblock == 0)
 	    anti_adjust_stack (GEN_INT (args_size.constant
 					- unadjusted_args_size));
-	  arg_space_so_far += args_size.constant - unadjusted_args_size;
-
 	  /* Now that the stack is properly aligned, pops can't safely
 	     be deferred during the evaluation of the arguments.  */
 	  NO_DEFER_POP;
@@ -2674,7 +2682,6 @@ expand_call (exp, target, ignore)
 #ifdef PREFERRED_STACK_BOUNDARY
       /* If we pushed args in forward order, perform stack alignment
 	 after pushing the last arg.  */
-      /* ??? Fix for arg_space_so_far.  */
       if (!PUSH_ARGS_REVERSED && argblock == 0)
 	anti_adjust_stack (GEN_INT (args_size.constant
 				    - unadjusted_args_size));
@@ -2746,6 +2753,11 @@ expand_call (exp, target, ignore)
 		    | (nothrow ? ECF_NOTHROW : 0)
 		    | (pass == 0 ? ECF_SIBCALL : 0)));
 
+      /* Verify that we've deallocated all the stack we used.  */
+      if (pass
+          && old_stack_allocated != stack_pointer_delta - pending_stack_adjust)
+	abort();
+
       /* If call is cse'able, make appropriate pair of reg-notes around it.
 	 Test valreg so we don't crash; may safely ignore `const'
 	 if return type is void.  Disable for PARALLEL return values, because
@@ -3025,10 +3037,11 @@ expand_call (exp, target, ignore)
 	     zero out the sequence.  */
 	  if (sibcall_failure)
 	    tail_call_insns = NULL_RTX;
-
 	  /* Restore the pending stack adjustment now that we have
 	     finished generating the sibling call sequence.  */
+
 	  pending_stack_adjust = save_pending_stack_adjust;
+	  stack_pointer_delta = save_stack_pointer_delta;
 	}
       else
 	normal_call_insns = insns;
@@ -3327,8 +3340,12 @@ emit_library_call_value_1 (retval, orgfun, value, no_queue, outmode, nargs, p)
 
   original_args_size = args_size;
 #ifdef PREFERRED_STACK_BOUNDARY
-  args_size.constant = (((args_size.constant + (STACK_BYTES - 1))
-			 / STACK_BYTES) * STACK_BYTES);
+  args_size.constant = (((args_size.constant
+			  + stack_pointer_delta
+			  + STACK_BYTES - 1)
+			  / STACK_BYTES
+			  * STACK_BYTES)
+			 - stack_pointer_delta);
 #endif
 
   args_size.constant = MAX (args_size.constant,
@@ -4043,8 +4060,6 @@ store_one_arg (arg, argblock, may_be_alloca, variable_size,
 		      partial, reg, used - size, argblock,
 		      ARGS_SIZE_RTX (arg->offset), reg_parm_stack_space,
 		      ARGS_SIZE_RTX (arg->alignment_pad));
-
-      arg_space_so_far += used;
     }
   else
     {
@@ -4072,7 +4087,6 @@ store_one_arg (arg, argblock, may_be_alloca, variable_size,
 	  excess = (arg->size.constant - int_size_in_bytes (TREE_TYPE (pval))
 		    + partial * UNITS_PER_WORD);
 	  size_rtx = expr_size (pval);
-	  arg_space_so_far += excess + INTVAL (size_rtx);
 	}
 
       emit_push_insn (arg->value, arg->mode, TREE_TYPE (pval), size_rtx,
diff --git a/gcc/explow.c b/gcc/explow.c
index 5eec1d7c28ba3bfd74f3a53d368bdb496aa00fd3..0f067caf215f0ca74bf776742c036c877b618c39 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -852,6 +852,11 @@ adjust_stack (adjust)
   if (adjust == const0_rtx)
     return;
 
+  /* We expect all variable sized adjustments to be multiple of
+     PREFERRED_STACK_BOUNDARY.  */
+  if (GET_CODE (adjust) == CONST_INT)
+    stack_pointer_delta -= INTVAL (adjust);
+
   temp = expand_binop (Pmode,
 #ifdef STACK_GROWS_DOWNWARD
 		       add_optab,
@@ -878,6 +883,11 @@ anti_adjust_stack (adjust)
   if (adjust == const0_rtx)
     return;
 
+  /* We expect all variable sized adjustments to be multiple of
+     PREFERRED_STACK_BOUNDARY.  */
+  if (GET_CODE (adjust) == CONST_INT)
+    stack_pointer_delta += INTVAL (adjust);
+
   temp = expand_binop (Pmode,
 #ifdef STACK_GROWS_DOWNWARD
 		       sub_optab,
@@ -1295,6 +1305,13 @@ allocate_dynamic_stack_space (size, target, known_align)
 
   do_pending_stack_adjust ();
 
+ /* We ought to be called always on the toplevel and stack ought to be aligned
+    propertly.  */
+#ifdef PREFERRED_STACK_BOUNDARY
+  if (stack_pointer_delta % (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT))
+    abort ();
+#endif
+
   /* If needed, check that we have the required amount of stack.  Take into
      account what has already been checked.  */
   if (flag_stack_check && ! STACK_CHECK_BUILTIN)
diff --git a/gcc/expr.c b/gcc/expr.c
index c1866a5556c7fc5ea1936c28a950ea891a5944b7..0302d959459031acadb85fed9a95990e323c4410 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -306,7 +306,7 @@ init_expr ()
 
   pending_chain = 0;
   pending_stack_adjust = 0;
-  arg_space_so_far = 0;
+  stack_pointer_delta = 0;
   inhibit_defer_pop = 0;
   saveregs_value = 0;
   apply_args_value = 0;
@@ -2996,6 +2996,7 @@ emit_push_insn (x, mode, type, size, align, partial, reg, extra,
 	      && where_pad != none && where_pad != stack_direction)
 	    anti_adjust_stack (GEN_INT (extra));
 
+	  stack_pointer_delta += INTVAL (size) - used;
 	  move_by_pieces (gen_rtx_MEM (BLKmode, gen_push_operand ()), xinner,
 			  INTVAL (size) - used, align);
 
@@ -3236,7 +3237,10 @@ emit_push_insn (x, mode, type, size, align, partial, reg, extra,
 
 #ifdef PUSH_ROUNDING
       if (args_addr == 0 && PUSH_ARGS)
-	addr = gen_push_operand ();
+	{
+	  addr = gen_push_operand ();
+	  stack_pointer_delta += PUSH_ROUNDING (GET_MODE_SIZE (mode));
+	}
       else
 #endif
 	{
@@ -9121,7 +9125,10 @@ clear_pending_stack_adjust ()
       && EXIT_IGNORE_STACK
       && ! (DECL_INLINE (current_function_decl) && ! flag_no_inline)
       && ! flag_inline_functions)
-    pending_stack_adjust = 0;
+    {
+      stack_pointer_delta -= pending_stack_adjust,
+      pending_stack_adjust = 0;
+    }
 #endif
 }
 
diff --git a/gcc/function.h b/gcc/function.h
index 884b98a3b734f464bb24c7184c316a960d9688ce..08d438ab67e880f9ccf73a94e49bf79819d2a97d 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -130,10 +130,6 @@ struct expr_status
      These are the arguments to function calls that have already returned.  */
   int x_pending_stack_adjust;
 
-  /* Number of units that we should eventually pop off the stack.
-     These are the arguments to function calls that have not happened yet.  */
-  int x_arg_space_so_far;
-
   /* Under some ABIs, it is the caller's responsibility to pop arguments
      pushed for function calls.  A naive implementation would simply pop
      the arguments immediately after each call.  However, if several
@@ -151,6 +147,12 @@ struct expr_status
      NO_DEFER_POP and OK_DEFER_POP.  */
   int x_inhibit_defer_pop;
 
+  /* If PREFERRED_STACK_BOUNDARY and PUSH_ROUNDING are defined, the stack
+     boundary can be momentairly unaligned while pushing the arguments.
+     Record the delta since last aligned boundary here in order to get
+     stack alignment in the nested function calls working right.  */
+  int x_stack_pointer_delta;
+
   /* Nonzero means __builtin_saveregs has already been done in this function.
      The value is the pseudoreg containing the value __builtin_saveregs
      returned.  */
@@ -167,12 +169,12 @@ struct expr_status
 };
 
 #define pending_stack_adjust (cfun->expr->x_pending_stack_adjust)
-#define arg_space_so_far (cfun->expr->x_arg_space_so_far)
 #define inhibit_defer_pop (cfun->expr->x_inhibit_defer_pop)
 #define saveregs_value (cfun->expr->x_saveregs_value)
 #define apply_args_value (cfun->expr->x_apply_args_value)
 #define forced_labels (cfun->expr->x_forced_labels)
 #define pending_chain (cfun->expr->x_pending_chain)
+#define stack_pointer_delta (cfun->expr->x_stack_pointer_delta)
 
 /* This structure can save all the important global and static variables
    describing the status of the current function.  */