diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6a37fb5398a26ed497121da3216ba5fd96501d86..f543a744a91ec9de395b9197300a237ebc1728f2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,52 @@
+Wed May 15 10:38:27 CEST 2002  Jan Hubicka  <jh@suse.cz>
+
+	* invoke.texi (-malign-double): Re-add lost warning.
+
+	* i386-protos.h (x86_output_mi_thunk): Declare.
+	* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
+	* i386.c (x86_output_mi_thunk): ... here; handle 64bits.
+
+	* dwarf2out.c (output_call_frame_info): Do not skip unwind info
+	when flag_asynchronous_unwind_tables is set.
+
+	* flags.h (flag_reorder_functions): Declare.
+	* function.c (prepare_function_start): Initialize frequnecy.
+	* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
+	* Makefile.in (predict.o): Add dependency on target.h and params.h
+	* defaults.h (HOT_TEXT_SECTION_NAME,
+	UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
+	* predict.c (choose_function_section): New function.
+	(estimate_bb_frequencies): Use it.
+	* toplev.c (flag_reorder_functions): New global variable.
+	(lang_independent_options): New.
+	(parse_options_and_default_flags): Set.
+	* varasm.c (assemble_start_function): Bypass functdion alignment
+	for never executed functions.
+	* invoke.texi (-freorder-blocks, -freorder-functions): Document.
+	(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
+	* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
+	Document.
+
+	Thu Jan  3 21:52:09 CET 2002  Jan Hubicka  <jh@suse.cz>
+
+	* predict.c: Inlude profile.h
+	(MIN_COUNT): Rename to MIN_COUNT_FRACTION
+	(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
+	Use the information about maximal counter in the program.
+
+	Thu Dec 20 22:14:00 CET 2001  Jan Hubicka  <jh@suse.cz>
+
+	* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
+	probably_never_executed_bb_p): New functions.
+	* cfgcleanup.c (outgoing_edges_match): Use them.
+	* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
+	(maybe_hot_bb_p, probably_cold_bb_p,
+	probably_never_executed_bb_p): New functions.
+
+	* function.h (function): Add new field function_frequency.
+	* predict.c (compute_function_frequency): New function.
+	(estimate_probability): Call it.
+
 2002-03-09  Jakub Jelinek  <jakub@redhat.com>
 
 	PR optimization/5172, optimization/5200
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 937b6e50d2f8ee1fc441568c0d338ebfb6809e09..5791e8e238b170154f0af0026efd205c9db9120a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H)
    varray.h function.h $(TM_P_H)
 predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \
    insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \
-   $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h
+   $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \
+   $(PARAMS_H) $(TARGET_H)
 lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H)
 bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \
    flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H)
diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 05b4b7c9002758a3129ac361e2bf92dd105266b7..5615b145f020d375c6a93be5d7c2a163dec5786b 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -628,6 +628,10 @@ extern rtx emit_block_insn_before	PARAMS ((rtx, rtx, basic_block));
 extern void estimate_probability        PARAMS ((struct loops *));
 extern void note_prediction_to_br_prob	PARAMS ((void));
 extern void expected_value_to_br_prob	PARAMS ((void));
+extern void note_prediction_to_br_prob	PARAMS ((void));
+extern bool maybe_hot_bb_p		PARAMS ((basic_block));
+extern bool probably_cold_bb_p		PARAMS ((basic_block));
+extern bool probably_never_executed_bb_p PARAMS ((basic_block));
 
 /* In flow.c */
 extern void init_flow                   PARAMS ((void));
diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 826569ad723ee1b583ca63512b0181271c3fe604..fcf6944d4bb8ce117aa1f4bb03b03f0989c4c7ea 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2)
 	 roughly similar.  */
       if (match
 	  && !optimize_size
-	  && bb1->frequency > BB_FREQ_MAX / 1000
-	  && bb2->frequency > BB_FREQ_MAX / 1000)
+	  && maybe_hot_bb_p (bb1)
+	  && maybe_hot_bb_p (bb2))
 	{
 	  int prob2;
 
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 8321d478ec017dddfc1abe9995e8e215722282f1..b3b168845792acf478b6e83995d33f654ee2b904 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool
 extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *,
 							int));
 extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int));
+extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree));
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9c328b72fb236fb2c72fe344444fdfbf2a4ebb49..8d939f2e0984b5ab05bb1b20d419d6877b196b90 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc ()
    while (pos < FIRST_PSEUDO_REGISTER)
      reg_alloc_order [pos++] = 0;
 }
+
+void
+x86_output_mi_thunk (file, delta, function)
+     FILE *file;
+     int delta;
+     tree function;
+{
+  tree parm;
+  rtx xops[3];
+
+  if (ix86_regparm > 0)
+    parm = TYPE_ARG_TYPES (TREE_TYPE (function));
+  else
+    parm = NULL_TREE;
+  for (; parm; parm = TREE_CHAIN (parm))
+    if (TREE_VALUE (parm) == void_type_node)
+      break;
+
+  xops[0] = GEN_INT (delta);
+  if (TARGET_64BIT)
+    {
+      int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
+      xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
+      output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
+      if (flag_pic)
+	{
+	  fprintf (file, "\tjmp *");
+	  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+	  fprintf (file, "@GOTPCREL(%%rip)\n");
+	}
+      else
+	{
+	  fprintf (file, "\tjmp ");
+	  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+	  fprintf (file, "\n");
+	}
+    }
+  else
+    {
+      if (parm)
+	xops[1] = gen_rtx_REG (SImode, 0);
+      else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
+	xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
+      else
+	xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
+      output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
+
+      if (flag_pic)
+	{
+	  xops[0] = pic_offset_table_rtx;
+	  xops[1] = gen_label_rtx ();
+	  xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+	  if (ix86_regparm > 2)
+	    abort ();
+	  output_asm_insn ("push{l}\t%0", xops);
+	  output_asm_insn ("call\t%P1", xops);
+	  ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
+	  output_asm_insn ("pop{l}\t%0", xops);
+	  output_asm_insn
+	    ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
+	  xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
+	  output_asm_insn
+	    ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
+	  asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
+	  asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
+	}
+      else
+	{
+	  fprintf (file, "\tjmp ");
+	  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+	  fprintf (file, "\n");
+	}
+    }
+}
diff --git a/gcc/config/i386/unix.h b/gcc/config/i386/unix.h
index 15a07018cf3671f8606d155242703bd87caa529e..f7e38b48e9a17a53f8769dc6327aa4898d2c74c6 100644
--- a/gcc/config/i386/unix.h
+++ b/gcc/config/i386/unix.h
@@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA.  */
 
 /* Output code to add DELTA to the first argument, and then jump to FUNCTION.
    Used for C++ multiple inheritance.  */
-#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION)	    \
-do {									    \
-  tree parm;								    \
-  rtx xops[3];								    \
-									    \
-  if (ix86_regparm > 0)							    \
-    parm = TYPE_ARG_TYPES (TREE_TYPE (function));			    \
-  else									    \
-    parm = NULL_TREE;							    \
-  for (; parm; parm = TREE_CHAIN (parm))				    \
-    if (TREE_VALUE (parm) == void_type_node)				    \
-      break;								    \
-									    \
-  xops[0] = GEN_INT (DELTA);						    \
-  if (parm)								    \
-    xops[1] = gen_rtx_REG (SImode, 0);					    \
-  else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION))))	    \
-    xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));   \
-  else									    \
-    xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));   \
-  output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);			    \
-									    \
-  if (flag_pic && !TARGET_64BIT)					    \
-    {									    \
-      xops[0] = pic_offset_table_rtx;					    \
-      xops[1] = gen_label_rtx ();					    \
-      xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");        \
-									    \
-      if (ix86_regparm > 2)						    \
-	abort ();							    \
-      output_asm_insn ("push{l}\t%0", xops);				    \
-      output_asm_insn ("call\t%P1", xops);				    \
-      ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1]));   \
-      output_asm_insn ("pop{l}\t%0", xops);				    \
-      output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \
-      xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0));	    \
-      output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\
-	               xops);						    \
-      asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n");			    \
-      asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n");			    \
-    }									    \
-  else if (flag_pic && TARGET_64BIT)					    \
-    {									    \
-      fprintf (FILE, "\tjmp *");					    \
-      assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0));	    \
-      fprintf (FILE, "@GOTPCREL(%%rip)\n");				    \
-    }									    \
-  else									    \
-    {									    \
-      fprintf (FILE, "\tjmp ");						    \
-      assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0));	    \
-      fprintf (FILE, "\n");						    \
-    }									    \
-} while (0)
+#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
+    x86_output_mi_thunk (FILE, DELTA, FUNCTION);
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 7a45877f3297aaebc7ad54bbfccb616ce325ac04..12f363a33d8815375759286d98f3db4bd92a413a 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -517,4 +517,12 @@ You Lose!  You must define PREFERRED_DEBUGGING_TYPE!
    && !ROUND_TOWARDS_ZERO)
 #endif
 
+#ifndef HOT_TEXT_SECTION_NAME
+#define HOT_TEXT_SECTION_NAME "text.hot"
+#endif
+
+#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely"
+#endif
+
 #endif  /* ! GCC_DEFAULTS_H */
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 001ab25b5f8f43a2e9f3e5bc65cdba3cddf2dc0f..1190c97965e8ef28641514bd51fad590f8070977 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -278,6 +278,7 @@ in the following sections.
 -fomit-frame-pointer  -foptimize-register-move @gol
 -foptimize-sibling-calls  -fprefetch-loop-arrays @gol
 -freduce-all-givs -fregmove  -frename-registers @gol
+-freorder-blocks -freorder-functions @gol
 -frerun-cse-after-loop  -frerun-loop-opt @gol
 -fschedule-insns  -fschedule-insns2 @gol
 -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
@@ -3712,6 +3713,23 @@ non-determinism is of paramount import.  This switch allows users to
 reduce non-determinism, possibly at the expense of inferior
 optimization.
 
+@item -freorder-blocks
+@opindex freorder-blocks
+Reorder basic blocks in the compiled function in order to reduce number of
+taken branches and improve code locality.
+
+@item -freorder-functions
+@opindex freorder-functions
+Reorder basic blocks in the compiled function in order to reduce number of
+taken branches and improve code locality. This is implemented by using special
+subsections @code{text.hot} for most frequently executed functions and
+@code{text.unlikely} for unlikely executed functions.  Reordering is done by
+the linker so object file format must support named sections and linker must
+place them in resonable way.
+
+Also profile feedback must be available in to make this option effective.  See
+@option{-fprofile-arcs} for details.
+
 @item -fstrict-aliasing
 @opindex fstrict-aliasing
 Allows the compiler to assume the strictest aliasing rules applicable to
@@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop
 is unrolled, and if the loop is unrolled, it determines how many times
 the loop code is unrolled.
 
+@item hot-bb-count-fraction
+Select fraction of the maximal count of repetitions of basic block in program
+given basic block needs to have to be considered hot.
+
+@item hot-bb-frequency-fraction
+Select fraction of the maximal frequency of executions of basic block in
+function given basic block needs to have to be considered hot
 @end table
 @end table
 
@@ -7389,6 +7414,10 @@ boundary.  Aligning @code{double} variables on a two word boundary will
 produce code that runs somewhat faster on a @samp{Pentium} at the
 expense of more memory.
 
+@strong{Warning:} if you use the @samp{-malign-double} switch,
+structures containing the above types will be aligned differently than
+the published application binary interface specifications for the 386.
+
 @item -m128bit-long-double
 @opindex m128bit-long-double
 Control the size of @code{long double} type. i386 application binary interface
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8d4e92522c53909aabaf17bde72feb27e06c9fb3..bbb2c5010bdec8ccc4ef132d549dba39381f7fe2 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP}
 is enough.  The MIPS port uses this to sort all functions after all data
 declarations.
 
+@findex HOT_TEXT_SECTION_NAME
+@item HOT_TEXT_SECTION_NAME
+If defined, a C string constant for the name of the section containing most
+frequently executed functions of the program.  If not defined, GCC will provide
+a default definition if the target supports named sections.
+
+@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+If defined, a C string constant for the name of the section containing unlikely
+executed functions in the program.
+
 @findex DATA_SECTION_ASM_OP
 @item DATA_SECTION_ASM_OP
 A C expression whose value is a string, including spacing, containing the
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 47edc4339e7aa13f1ec1d4f9ace37820b9d96f3b..2fa9f64817e9a2a4552acb9f0be59c409c26ab72 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh)
       fde = &fde_table[i];
 
       /* Don't emit EH unwind info for leaf functions that don't need it.  */
-      if (for_eh && fde->nothrow && ! fde->uses_eh_lsda)
+      if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow
+	  && !  fde->uses_eh_lsda)
 	continue;
 
       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2);
diff --git a/gcc/flags.h b/gcc/flags.h
index b9fca23d29cc62603f0be88648bdf64a8cc6d91d..efcc7716e3ecb316cfbb7566045a3f30f20ed3f5 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -204,6 +204,10 @@ extern int flag_branch_probabilities;
 
 extern int flag_reorder_blocks;
 
+/* Nonzero if functions should be reordered.  */
+
+extern int flag_reorder_functions;
+
 /* Nonzero if registers should be renamed.  */
 
 extern int flag_rename_registers;
diff --git a/gcc/function.c b/gcc/function.c
index 9f1c00a08b1a1790cea31c0f6479bd1afac60782..5bd70a0560f28c313b5259acd4dec599663d1ceb 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -6320,6 +6320,10 @@ prepare_function_start ()
 
   cfun->arc_profile = profile_arc_flag || flag_test_coverage;
 
+  cfun->arc_profile = profile_arc_flag || flag_test_coverage;
+
+  cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+
   (*lang_hooks.function.init) (cfun);
   if (init_machine_status)
     (*init_machine_status) (cfun);
diff --git a/gcc/function.h b/gcc/function.h
index bc789c8181a85ba813459655c2886a3b3368fd39..912f8513c53745ffa81d3d8af8749da5dd72e43c 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -481,6 +481,19 @@ struct function
 
   /* Nonzero if code to initialize arg_pointer_save_area has been emited.  */
   unsigned int arg_pointer_save_area_init : 1;
+
+  /* How commonly executed the function is.  Initialized during branch
+     probabilities pass.  */
+  enum function_frequency {
+    /* This function most likely won't be executed at all.
+       (set only when profile feedback is available).  */
+    FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
+    /* The default value.  */
+    FUNCTION_FREQUENCY_NORMAL,
+    /* Optimize this function hard
+       (set only when profile feedback is available).  */
+    FUNCTION_FREQUENCY_HOT
+  } function_frequency;
 };
 
 /* The function currently being compiled.  */
diff --git a/gcc/params.def b/gcc/params.def
index 2b2cfe67c4dc5dcd189cb09c85fc44bfb749633e..de55ecc5841e5798250f1043bc1538a21f24d225 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
 	 "max-unrolled-insns",
 	 "The maximum number of instructions to consider to unroll in a loop",
 	 100)
+
+DEFPARAM(HOT_BB_COUNT_FRACTION,
+	 "hot-bb-count-fraction",
+	 "Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot",
+	 10000)
+DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
+	 "hot-bb-frequency-fraction",
+	 "Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot",
+	 1000)
 /*
 Local variables:
 mode:c
diff --git a/gcc/predict.c b/gcc/predict.c
index 5896c10a1919961a2d09abdd3192596b19343ea2..f457817956d5612f7e47f3a40181be372a087aec 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -45,7 +45,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "recog.h"
 #include "expr.h"
 #include "predict.h"
+#include "profile.h"
 #include "real.h"
+#include "params.h"
+#include "target.h"
 
 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
                    REAL_BB_FREQ_MAX.  */
@@ -75,6 +78,8 @@ static void process_note_predictions	 PARAMS ((basic_block, int *, int *,
 static void process_note_prediction	 PARAMS ((basic_block, int *, int *,
                                                   sbitmap *, int, int));
 static bool last_basic_block_p           PARAMS ((basic_block));
+static void compute_function_frequency	 PARAMS ((void));
+static void choose_function_section	 PARAMS ((void));
 
 /* Information we hold about each branch predictor.
    Filled using information from predict.def.  */
@@ -103,6 +108,54 @@ static const struct predictor_info predictor_info[]= {
   {NULL, 0, 0}
 };
 #undef DEF_PREDICTOR
+
+/* Return true in case BB can be CPU intensive and should be optimized
+   for maximal perofmrance.  */
+
+bool
+maybe_hot_bb_p (bb)
+     basic_block bb;
+{
+  if (profile_info.count_profiles_merged
+      && flag_branch_probabilities
+      && (bb->count
+	  < profile_info.max_counter_in_program
+	  / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
+    return false;
+  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
+    return false;
+  return true;
+}
+
+/* Return true in case BB is cold and should be optimized for size.  */
+
+bool
+probably_cold_bb_p (bb)
+     basic_block bb;
+{
+  if (profile_info.count_profiles_merged
+      && flag_branch_probabilities
+      && (bb->count
+	  < profile_info.max_counter_in_program
+	  / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
+    return true;
+  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
+    return true;
+  return false;
+}
+
+/* Return true in case BB is probably never executed.  */
+bool
+probably_never_executed_bb_p (bb)
+	basic_block bb;
+{
+  if (profile_info.count_profiles_merged
+      && flag_branch_probabilities)
+    return ((bb->count + profile_info.count_profiles_merged / 2)
+	    / profile_info.count_profiles_merged) == 0;
+  return false;
+}
+
 /* Return true if the one of outgoing edges is already predicted by
    PREDICTOR.  */
 
@@ -1095,118 +1148,159 @@ estimate_bb_frequencies (loops)
   REAL_VALUE_TYPE freq_max;
   enum machine_mode double_mode = TYPE_MODE (double_type_node);
 
-  REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
+  if (flag_branch_probabilities)
+    counts_to_freqs ();
+  else
+    {
+      REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
 
-  REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
+      REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
 
-  REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
-  REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
+      REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
+      REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
 
-  mark_dfs_back_edges ();
-  if (flag_branch_probabilities)
-    {
-      counts_to_freqs ();
-      return;
-    }
+      mark_dfs_back_edges ();
+      /* Fill in the probability values in flowgraph based on the REG_BR_PROB
+         notes.  */
+      for (i = 0; i < n_basic_blocks; i++)
+	{
+	  rtx last_insn = BLOCK_END (i);
 
-  /* Fill in the probability values in flowgraph based on the REG_BR_PROB
-     notes.  */
-  for (i = 0; i < n_basic_blocks; i++)
-    {
-      rtx last_insn = BLOCK_END (i);
+	  if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
+	      /* Avoid handling of conditional jumps jumping to fallthru edge.  */
+	      || BASIC_BLOCK (i)->succ->succ_next == NULL)
+	    {
+	      /* We can predict only conditional jumps at the moment.
+	         Expect each edge to be equally probable.
+	         ?? In the future we want to make abnormal edges improbable.  */
+	      int nedges = 0;
+	      edge e;
 
-      if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
-	  /* Avoid handling of conditional jumps jumping to fallthru edge.  */
-	  || BASIC_BLOCK (i)->succ->succ_next == NULL)
+	      for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+		{
+		  nedges++;
+		  if (e->probability != 0)
+		    break;
+		}
+	      if (!e)
+		for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+		  e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
+	    }
+	}
+
+      ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
+
+      /* Set up block info for each basic block.  */
+      alloc_aux_for_blocks (sizeof (struct block_info_def));
+      alloc_aux_for_edges (sizeof (struct edge_info_def));
+      for (i = -2; i < n_basic_blocks; i++)
 	{
-	  /* We can predict only conditional jumps at the moment.
-	     Expect each edge to be equally probable.
-	     ?? In the future we want to make abnormal edges improbable.  */
-	  int nedges = 0;
 	  edge e;
+	  basic_block bb;
 
-	  for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+	  if (i == -2)
+	    bb = ENTRY_BLOCK_PTR;
+	  else if (i == -1)
+	    bb = EXIT_BLOCK_PTR;
+	  else
+	    bb = BASIC_BLOCK (i);
+
+	  BLOCK_INFO (bb)->tovisit = 0;
+	  for (e = bb->succ; e; e = e->succ_next)
 	    {
-	      nedges++;
-	      if (e->probability != 0)
-		break;
+
+	      REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
+				   e->probability, 0, double_mode);
+	      REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
+			       RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
+			       real_br_prob_base);
 	    }
-	  if (!e)
-	    for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
-	      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
 	}
-    }
 
-  ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
+      /* First compute probabilities locally for each loop from innermost
+         to outermost to examine probabilities for back edges.  */
+      estimate_loops_at_level (loops->tree_root);
 
-  /* Set up block info for each basic block.  */
-  alloc_aux_for_blocks (sizeof (struct block_info_def));
-  alloc_aux_for_edges (sizeof (struct edge_info_def));
-  for (i = -2; i < n_basic_blocks; i++)
-    {
-      edge e;
-      basic_block bb;
+      /* Now fake loop around whole function to finalize probabilities.  */
+      for (i = 0; i < n_basic_blocks; i++)
+	BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
 
-      if (i == -2)
-	bb = ENTRY_BLOCK_PTR;
-      else if (i == -1)
-	bb = EXIT_BLOCK_PTR;
-      else
-	bb = BASIC_BLOCK (i);
+      BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
+      BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
+      propagate_freq (ENTRY_BLOCK_PTR);
 
-      BLOCK_INFO (bb)->tovisit = 0;
-      for (e = bb->succ; e; e = e->succ_next)
+      memcpy (&freq_max, &real_zero, sizeof (real_zero));
+      for (i = 0; i < n_basic_blocks; i++)
+	if (REAL_VALUES_LESS
+	    (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
+	  memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
+		  sizeof (freq_max));
+
+      for (i = -2; i < n_basic_blocks; i++)
 	{
-	
-	  REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
-			       e->probability, 0, double_mode);
-	  REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
-			   RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
-			   real_br_prob_base);
-	}
-    }
+	  basic_block bb;
+	  REAL_VALUE_TYPE tmp;
 
-  /* First compute probabilities locally for each loop from innermost
-     to outermost to examine probabilities for back edges.  */
-  estimate_loops_at_level (loops->tree_root);
+	  if (i == -2)
+	    bb = ENTRY_BLOCK_PTR;
+	  else if (i == -1)
+	    bb = EXIT_BLOCK_PTR;
+	  else
+	    bb = BASIC_BLOCK (i);
 
-  /* Now fake loop around whole function to finalize probabilities.  */
-  for (i = 0; i < n_basic_blocks; i++)
-    BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
+	  REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
+			   real_bb_freq_max);
+	  REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
+	  REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
+	  bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
+	}
 
-  BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
-  BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
-  propagate_freq (ENTRY_BLOCK_PTR);
+      free_aux_for_blocks ();
+      free_aux_for_edges ();
+    }
+  compute_function_frequency ();
+  if (flag_reorder_functions)
+    choose_function_section ();
+}
 
-  memcpy (&freq_max, &real_zero, sizeof (real_zero));
+/* Decide whether function is hot, cold or unlikely executed.  */
+static void
+compute_function_frequency ()
+{
+  int i;
+  if (!profile_info.count_profiles_merged
+      || !flag_branch_probabilities)
+    return;
+  cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
   for (i = 0; i < n_basic_blocks; i++)
-    if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
-      memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
-	      sizeof (freq_max));
-
-  for (i = -2; i < n_basic_blocks; i++)
     {
-      basic_block bb;
-      REAL_VALUE_TYPE tmp;
-
-      if (i == -2)
-	bb = ENTRY_BLOCK_PTR;
-      else if (i == -1)
-	bb = EXIT_BLOCK_PTR;
-      else
-	bb = BASIC_BLOCK (i);
-
-      REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
-		       real_bb_freq_max);
-      REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
-      REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
-      bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
+      basic_block bb = BASIC_BLOCK (i);
+      if (maybe_hot_bb_p (bb))
+	{
+	  cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
+	  return;
+	}
+      if (!probably_never_executed_bb_p (bb))
+	cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
     }
+}
 
-  free_aux_for_blocks ();
-  free_aux_for_edges ();
+/* Choose appropriate section for the function.  */
+static void
+choose_function_section ()
+{
+  if (DECL_SECTION_NAME (current_function_decl)
+      || !targetm.have_named_sections)
+    return;
+  if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
+    DECL_SECTION_NAME (current_function_decl) =
+      build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
+  if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+    DECL_SECTION_NAME (current_function_decl) =
+      build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
+		    UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
 }
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 9213730f62932b94f233b3974e90cb7b758cafa4..5f5eb4628111ab080cdab833aeaa03a8fb11f4bc 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -381,6 +381,10 @@ int flag_branch_probabilities = 0;
 
 int flag_reorder_blocks = 0;
 
+/* Nonzero if functions should be reordered.  */
+
+int flag_reorder_functions = 0;
+
 /* Nonzero if registers should be renamed.  */
 
 int flag_rename_registers = 0;
@@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] =
    N_("Enable basic program profiling code") },
   {"reorder-blocks", &flag_reorder_blocks, 1,
    N_("Reorder basic blocks to improve code placement") },
+  {"reorder-functions", &flag_reorder_functions, 1,
+   N_("Reorder functions to improve code placement") },
   {"rename-registers", &flag_rename_registers, 1,
    N_("Do the register renaming optimization pass") },
   {"cprop-registers", &flag_cprop_registers, 1,
@@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv)
       flag_strict_aliasing = 1;
       flag_delete_null_pointer_checks = 1;
       flag_reorder_blocks = 1;
+      flag_reorder_functions = 1;
     }
 
   if (optimize >= 3)
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 279dc9085f45cc8b5bc8169d3874dfd88aae2e5b..6fb663e4e886322efaa384450ecd1665990dc41a 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname)
   /* Handle a user-specified function alignment.
      Note that we still need to align to FUNCTION_BOUNDARY, as above,
      because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all.  */
-  if (align_functions_log > align)
+  if (align_functions_log > align
+      && cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
     {
 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
       ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,