diff --git a/boehm-gc/cord/cordbscs.c b/boehm-gc/cord/cordbscs.c
index b75f5812f1794f3620c57da5920f0b1e19014ed9..9fc894d4946e0da9d066fe1fb18f00ba8d9d9ae0 100644
--- a/boehm-gc/cord/cordbscs.c
+++ b/boehm-gc/cord/cordbscs.c
@@ -361,7 +361,6 @@ CORD CORD_substr_checked(CORD x, size_t i, size_t n)
             return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
         } else {
             register char * result = GC_MALLOC_ATOMIC(n+1);
-            register char * p = result;
             
             if (result == 0) OUT_OF_MEMORY;
             strncpy(result, x+i, n);
diff --git a/boehm-gc/cord/cordprnt.c b/boehm-gc/cord/cordprnt.c
index 667560f2592cbc0ab54ed89562956858f9fc2df1..9c8cc8736a90fc658cb7e6d10b4ff4a9d48cbb4b 100644
--- a/boehm-gc/cord/cordprnt.c
+++ b/boehm-gc/cord/cordprnt.c
@@ -231,8 +231,9 @@ int CORD_vsprintf(CORD * out, CORD format, va_list args)
             	    	goto done;
 		    case 'c':
 			if (width == NONE && prec == NONE) {
-			    register char c = va_arg(args, char);
+			    register char c;
 
+			    c = va_arg(args, char);
 			    CORD_ec_append(result, c);
 			    goto done;
 			}
diff --git a/boehm-gc/cord/de.c b/boehm-gc/cord/de.c
index 18625d2bb3784596b101489263c78d98233f79a2..fda71424176f784a858aa56a56736cc0fc14d22d 100644
--- a/boehm-gc/cord/de.c
+++ b/boehm-gc/cord/de.c
@@ -565,7 +565,6 @@ int argc;
 char ** argv;
 {
     int c;
-    CORD initial;
 
 #if defined(MACINTOSH)
 	console_options.title = "\pDumb Editor";
diff --git a/boehm-gc/cord/de_win.c b/boehm-gc/cord/de_win.c
index 119d0fa044c6e607226c4bca7b19ff1cd377a363..fedbfbe67c25becc9f082aa3bb3e4f0c3d4f351f 100644
--- a/boehm-gc/cord/de_win.c
+++ b/boehm-gc/cord/de_win.c
@@ -268,6 +268,7 @@ LRESULT CALLBACK WndProc (HWND hwnd, UINT message,
 
       case WM_DESTROY:
            PostQuitMessage (0);
+	   GC_win32_free_heap();
            return 0;
       
       case WM_PAINT:
diff --git a/boehm-gc/cord/gc.h b/boehm-gc/cord/gc.h
index 09c8ca8196c865c4ee975329c045ad7ea49956fb..ceabb02f6eb611e5c4fada97e6dd589c4558ac30 100644
--- a/boehm-gc/cord/gc.h
+++ b/boehm-gc/cord/gc.h
@@ -36,11 +36,19 @@
 #endif
 
 #if defined(_MSC_VER) && defined(_DLL)
-#ifdef GC_BUILD
-#define GC_API __declspec(dllexport)
-#else
-#define GC_API __declspec(dllimport)
+# ifdef GC_BUILD
+#   define GC_API __declspec(dllexport)
+# else
+#   define GC_API __declspec(dllimport)
+# endif
 #endif
+
+#if defined(__WATCOMC__) && defined(GC_DLL)
+# ifdef GC_BUILD
+#   define GC_API extern __declspec(dllexport)
+# else
+#   define GC_API extern __declspec(dllimport)
+# endif
 #endif
 
 #ifndef GC_API
@@ -126,7 +134,19 @@ GC_API GC_word GC_max_retries;
 			/* reporting out of memory after heap		*/
 			/* expansion fails.  Initially 0.		*/
 			
-			
+
+GC_API char *GC_stackbottom;	/* Cool end of user stack.		*/
+				/* May be set in the client prior to	*/
+				/* calling any GC_ routines.  This	*/
+				/* avoids some overhead, and 		*/
+				/* potentially some signals that can 	*/
+				/* confuse debuggers.  Otherwise the	*/
+				/* collector attempts to set it 	*/
+				/* automatically.			*/
+				/* For multithreaded code, this is the	*/
+				/* cold end of the stack for the	*/
+				/* primordial thread.			*/
+				
 /* Public procedures */
 /*
  * general purpose allocation routines, with roughly malloc calling conv.
@@ -193,8 +213,8 @@ GC_API size_t GC_size GC_PROTO((GC_PTR object_addr));
 /* If the argument is stubborn, the result will have changes enabled.	*/
 /* It is an error to have changes enabled for the original object.	*/
 /* Follows ANSI comventions for NULL old_object.			*/
-GC_API GC_PTR GC_realloc GC_PROTO((GC_PTR old_object,
-				   size_t new_size_in_bytes));
+GC_API GC_PTR GC_realloc
+	GC_PROTO((GC_PTR old_object, size_t new_size_in_bytes));
 				   
 /* Explicitly increase the heap size.	*/
 /* Returns 0 on failure, 1 on success.  */
@@ -248,6 +268,7 @@ GC_API void GC_gcollect GC_PROTO((void));
 /* than normal pause times for incremental collection.  However,	*/
 /* aborted collections do no useful work; the next collection needs	*/
 /* to start from the beginning.						*/
+/* Return 0 if the collection was aborted, 1 if it succeeded.		*/
 typedef int (* GC_stop_func) GC_PROTO((void));
 GC_API int GC_try_to_collect GC_PROTO((GC_stop_func stop_func));
 
@@ -604,6 +625,10 @@ GC_API void (*GC_is_valid_displacement_print_proc)
 GC_API void (*GC_is_visible_print_proc)
 	GC_PROTO((GC_PTR p));
 
+#if defined(_SOLARIS_PTHREADS) && !defined(SOLARIS_THREADS)
+#   define SOLARIS_THREADS
+#endif
+
 #ifdef SOLARIS_THREADS
 /* We need to intercept calls to many of the threads primitives, so 	*/
 /* that we can locate thread stacks and stop the world.			*/
@@ -660,7 +685,10 @@ GC_API void (*GC_is_visible_print_proc)
 
 #endif /* IRIX_THREADS || LINUX_THREADS */
 
-#if defined(THREADS) && !defined(SRC_M3)
+# if defined(PCR) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || \
+	defined(IRIX_THREADS) || defined(LINUX_THREADS) || \
+	defined(IRIX_JDK_THREADS)
+   	/* Any flavor of threads except SRC_M3.	*/
 /* This returns a list of objects, linked through their first		*/
 /* word.  Its use can greatly reduce lock contention problems, since	*/
 /* the allocation lock can be acquired and released many fewer times.	*/
@@ -691,6 +719,13 @@ extern void GC_thr_init();	/* Needed for Solaris/X86	*/
 # endif
 #endif
 
+#if (defined(_MSDOS) || defined(_MSC_VER)) && (_M_IX86 >= 300) \
+     || defined(_WIN32)
+  /* win32S may not free all resources on process exit.  */
+  /* This explicitly deallocates the heap.		 */
+    GC_API void GC_win32_free_heap ();
+#endif
+
 #ifdef __cplusplus
     }  /* end of extern "C" */
 #endif
diff --git a/boehm-gc/include/gc.h b/boehm-gc/include/gc.h
index 09c8ca8196c865c4ee975329c045ad7ea49956fb..ceabb02f6eb611e5c4fada97e6dd589c4558ac30 100644
--- a/boehm-gc/include/gc.h
+++ b/boehm-gc/include/gc.h
@@ -36,11 +36,19 @@
 #endif
 
 #if defined(_MSC_VER) && defined(_DLL)
-#ifdef GC_BUILD
-#define GC_API __declspec(dllexport)
-#else
-#define GC_API __declspec(dllimport)
+# ifdef GC_BUILD
+#   define GC_API __declspec(dllexport)
+# else
+#   define GC_API __declspec(dllimport)
+# endif
 #endif
+
+#if defined(__WATCOMC__) && defined(GC_DLL)
+# ifdef GC_BUILD
+#   define GC_API extern __declspec(dllexport)
+# else
+#   define GC_API extern __declspec(dllimport)
+# endif
 #endif
 
 #ifndef GC_API
@@ -126,7 +134,19 @@ GC_API GC_word GC_max_retries;
 			/* reporting out of memory after heap		*/
 			/* expansion fails.  Initially 0.		*/
 			
-			
+
+GC_API char *GC_stackbottom;	/* Cool end of user stack.		*/
+				/* May be set in the client prior to	*/
+				/* calling any GC_ routines.  This	*/
+				/* avoids some overhead, and 		*/
+				/* potentially some signals that can 	*/
+				/* confuse debuggers.  Otherwise the	*/
+				/* collector attempts to set it 	*/
+				/* automatically.			*/
+				/* For multithreaded code, this is the	*/
+				/* cold end of the stack for the	*/
+				/* primordial thread.			*/
+				
 /* Public procedures */
 /*
  * general purpose allocation routines, with roughly malloc calling conv.
@@ -193,8 +213,8 @@ GC_API size_t GC_size GC_PROTO((GC_PTR object_addr));
 /* If the argument is stubborn, the result will have changes enabled.	*/
 /* It is an error to have changes enabled for the original object.	*/
 /* Follows ANSI comventions for NULL old_object.			*/
-GC_API GC_PTR GC_realloc GC_PROTO((GC_PTR old_object,
-				   size_t new_size_in_bytes));
+GC_API GC_PTR GC_realloc
+	GC_PROTO((GC_PTR old_object, size_t new_size_in_bytes));
 				   
 /* Explicitly increase the heap size.	*/
 /* Returns 0 on failure, 1 on success.  */
@@ -248,6 +268,7 @@ GC_API void GC_gcollect GC_PROTO((void));
 /* than normal pause times for incremental collection.  However,	*/
 /* aborted collections do no useful work; the next collection needs	*/
 /* to start from the beginning.						*/
+/* Return 0 if the collection was aborted, 1 if it succeeded.		*/
 typedef int (* GC_stop_func) GC_PROTO((void));
 GC_API int GC_try_to_collect GC_PROTO((GC_stop_func stop_func));
 
@@ -604,6 +625,10 @@ GC_API void (*GC_is_valid_displacement_print_proc)
 GC_API void (*GC_is_visible_print_proc)
 	GC_PROTO((GC_PTR p));
 
+#if defined(_SOLARIS_PTHREADS) && !defined(SOLARIS_THREADS)
+#   define SOLARIS_THREADS
+#endif
+
 #ifdef SOLARIS_THREADS
 /* We need to intercept calls to many of the threads primitives, so 	*/
 /* that we can locate thread stacks and stop the world.			*/
@@ -660,7 +685,10 @@ GC_API void (*GC_is_visible_print_proc)
 
 #endif /* IRIX_THREADS || LINUX_THREADS */
 
-#if defined(THREADS) && !defined(SRC_M3)
+# if defined(PCR) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || \
+	defined(IRIX_THREADS) || defined(LINUX_THREADS) || \
+	defined(IRIX_JDK_THREADS)
+   	/* Any flavor of threads except SRC_M3.	*/
 /* This returns a list of objects, linked through their first		*/
 /* word.  Its use can greatly reduce lock contention problems, since	*/
 /* the allocation lock can be acquired and released many fewer times.	*/
@@ -691,6 +719,13 @@ extern void GC_thr_init();	/* Needed for Solaris/X86	*/
 # endif
 #endif
 
+#if (defined(_MSDOS) || defined(_MSC_VER)) && (_M_IX86 >= 300) \
+     || defined(_WIN32)
+  /* win32S may not free all resources on process exit.  */
+  /* This explicitly deallocates the heap.		 */
+    GC_API void GC_win32_free_heap ();
+#endif
+
 #ifdef __cplusplus
     }  /* end of extern "C" */
 #endif
diff --git a/boehm-gc/include/gc_cpp.h b/boehm-gc/include/gc_cpp.h
index e2f456fb526a746b9405db0bb0b119aab99a414a..ad7df5d71faf94e0231e8bed5688fd236a255990 100644
--- a/boehm-gc/include/gc_cpp.h
+++ b/boehm-gc/include/gc_cpp.h
@@ -133,7 +133,8 @@ uses explicit invocation.
 #endif
 
 #if ! defined( OPERATOR_NEW_ARRAY ) \
-    && (__BORLANDC__ >= 0x450 || (__GNUC__ >= 2 && __GNUC_MINOR__ >= 6))
+    && (__BORLANDC__ >= 0x450 || (__GNUC__ >= 2 && __GNUC_MINOR__ >= 6) \
+        || __WATCOMC__ >= 1050)
 #   define OPERATOR_NEW_ARRAY
 #endif
 
@@ -212,6 +213,8 @@ inline void* gc::operator new( size_t size ) {
 inline void* gc::operator new( size_t size, GCPlacement gcp ) {
     if (gcp == GC) 
         return GC_MALLOC( size );
+    else if (gcp == PointerFreeGC)
+	return GC_MALLOC_ATOMIC( size );
     else
         return GC_MALLOC_UNCOLLECTABLE( size );}
 
@@ -234,7 +237,7 @@ inline void gc::operator delete[]( void* obj ) {
 
 
 inline gc_cleanup::~gc_cleanup() {
-    GC_REGISTER_FINALIZER_IGNORE_SELF( this, 0, 0, 0, 0 );}
+    GC_REGISTER_FINALIZER_IGNORE_SELF( GC_base(this), 0, 0, 0, 0 );}
 
 inline void gc_cleanup::cleanup( void* obj, void* displ ) {
     ((gc_cleanup*) ((char*) obj + (ptrdiff_t) displ))->~gc_cleanup();}
diff --git a/boehm-gc/include/private/gc_priv.h b/boehm-gc/include/private/gc_priv.h
index 96ba1da104476113a06ad3574dc6ae12541d1807..934075fa358c8faf5778e7bba7b34f0fbb52e631 100644
--- a/boehm-gc/include/private/gc_priv.h
+++ b/boehm-gc/include/private/gc_priv.h
@@ -42,7 +42,7 @@ typedef GC_word word;
 typedef GC_signed_word signed_word;
 
 # ifndef CONFIG_H
-#   include "config.h"
+#   include "gcconfig.h"
 # endif
 
 # ifndef HEADERS_H
@@ -336,6 +336,9 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
 /* space is assumed to be cleared.				*/
 /* In the case os USE_MMAP, the argument must also be a 	*/
 /* physical page size.						*/
+/* GET_MEM is currently not assumed to retrieve 0 filled space, */
+/* though we should perhaps take advantage of the case in which */
+/* does.							*/
 # ifdef PCR
     char * real_malloc();
 #   define GET_MEM(bytes) HBLKPTR(real_malloc((size_t)bytes + GC_page_size) \
@@ -434,7 +437,7 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
 #  ifdef LINUX_THREADS
 #    include <pthread.h>
 #    ifdef __i386__
-       inline static GC_test_and_set(volatile unsigned int *addr) {
+       inline static int GC_test_and_set(volatile unsigned int *addr) {
 	  int oldval;
 	  /* Note: the "xchg" instruction does not need a "lock" prefix */
 	  __asm__ __volatile__("xchgl %0, %1"
@@ -475,14 +478,15 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
 		}
 #    define EXIT_GC() GC_collecting = 0;
 #  endif /* LINUX_THREADS */
-#  ifdef IRIX_THREADS
+#  if defined(IRIX_THREADS) || defined(IRIX_JDK_THREADS)
 #    include <pthread.h>
 #    include <mutex.h>
 
-#    if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64))
+#    if __mips < 3 || !(defined (_ABIN32) || defined(_ABI64)) \
+	|| !defined(_COMPILER_VERSION) || _COMPILER_VERSION < 700
 #        define GC_test_and_set(addr, v) test_and_set(addr,v)
 #    else
-#	  define GC_test_and_set(addr, v) __test_and_set(addr,v)
+#	 define GC_test_and_set(addr, v) __test_and_set(addr,v)
 #    endif
      extern unsigned long GC_allocate_lock;
 	/* This is not a mutex because mutexes that obey the (optional) 	*/
@@ -501,10 +505,17 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
 #    	define UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
 #    else
 #	define LOCK() { if (GC_test_and_set(&GC_allocate_lock, 1)) GC_lock(); }
-#       if __mips >= 3 && (defined (_ABIN32) || defined(_ABI64))
+#       if __mips >= 3 && (defined (_ABIN32) || defined(_ABI64)) \
+	   && defined(_COMPILER_VERSION) && _COMPILER_VERSION >= 700
 #	    define UNLOCK() __lock_release(&GC_allocate_lock)
 #	else
-#           define UNLOCK() GC_allocate_lock = 0
+	    /* The function call in the following should prevent the	*/
+	    /* compiler from moving assignments to below the UNLOCK.	*/
+	    /* This is probably not necessary for ucode or gcc 2.8.	*/
+	    /* It may be necessary for Ragnarok and future gcc		*/
+	    /* versions.						*/
+#           define UNLOCK() { GC_noop1(&GC_allocate_lock); \
+			*(volatile unsigned long *)(&GC_allocate_lock) = 0; }
 #	endif
 #    endif
      extern GC_bool GC_collecting;
@@ -513,7 +524,7 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
 		    GC_collecting = 1; \
 		}
 #    define EXIT_GC() GC_collecting = 0;
-#  endif /* IRIX_THREADS */
+#  endif /* IRIX_THREADS || IRIX_JDK_THREADS */
 #  ifdef WIN32_THREADS
 #    include <windows.h>
      GC_API CRITICAL_SECTION GC_allocate_ml;
@@ -567,7 +578,7 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
 #   if defined(SRC_M3) || defined(AMIGA) || defined(SOLARIS_THREADS) \
 	|| defined(MSWIN32) || defined(MACOS) || defined(DJGPP) \
 	|| defined(NO_SIGNALS) || defined(IRIX_THREADS) \
-	|| defined(LINUX_THREADS)
+	|| defined(IRIX_JDK_THREADS) || defined(LINUX_THREADS) 
 			/* Also useful for debugging.		*/
 	/* Should probably use thr_sigsetmask for SOLARIS_THREADS. */
 #     define DISABLE_SIGNALS()
@@ -595,7 +606,8 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */);
  				   PCR_waitForever);
 # else
 #   if defined(SOLARIS_THREADS) || defined(WIN32_THREADS) \
-	|| defined(IRIX_THREADS) || defined(LINUX_THREADS)
+	|| defined(IRIX_THREADS) || defined(LINUX_THREADS) \
+	|| defined(IRIX_JDK_THREADS)
       void GC_stop_world();
       void GC_start_world();
 #     define STOP_WORLD() GC_stop_world()
@@ -864,7 +876,69 @@ struct hblk {
 /* Object free list link */
 # define obj_link(p) (*(ptr_t *)(p))
 
-/*  lists of all heap blocks and free lists	*/
+/* The type of mark procedures.  This really belongs in gc_mark.h.	*/
+/* But we put it here, so that we can avoid scanning the mark proc	*/
+/* table.								*/
+typedef struct ms_entry * (*mark_proc)(/* word * addr, mark_stack_ptr,
+					  mark_stack_limit, env */);
+# define LOG_MAX_MARK_PROCS 6
+# define MAX_MARK_PROCS (1 << LOG_MAX_MARK_PROCS)
+
+/* Root sets.  Logically private to mark_rts.c.  But we don't want the	*/
+/* tables scanned, so we put them here.					*/
+/* MAX_ROOT_SETS is the maximum number of ranges that can be 	*/
+/* registered as static roots. 					*/
+# ifdef LARGE_CONFIG
+#   define MAX_ROOT_SETS 4096
+# else
+#   ifdef PCR
+#     define MAX_ROOT_SETS 1024
+#   else
+#     ifdef MSWIN32
+#	define MAX_ROOT_SETS 512
+	    /* Under NT, we add only written pages, which can result 	*/
+	    /* in many small root sets.					*/
+#     else
+#       define MAX_ROOT_SETS 64
+#     endif
+#   endif
+# endif
+
+# define MAX_EXCLUSIONS (MAX_ROOT_SETS/4)
+/* Maximum number of segments that can be excluded from root sets.	*/
+
+/*
+ * Data structure for excluded static roots.
+ */
+struct exclusion {
+    ptr_t e_start;
+    ptr_t e_end;
+};
+
+/* Data structure for list of root sets.				*/
+/* We keep a hash table, so that we can filter out duplicate additions.	*/
+/* Under Win32, we need to do a better job of filtering overlaps, so	*/
+/* we resort to sequential search, and pay the price.			*/
+struct roots {
+	ptr_t r_start;
+	ptr_t r_end;
+#	ifndef MSWIN32
+	  struct roots * r_next;
+#	endif
+	GC_bool r_tmp;
+	  	/* Delete before registering new dynamic libraries */
+};
+
+#ifndef MSWIN32
+    /* Size of hash table index to roots.	*/
+#   define LOG_RT_SIZE 6
+#   define RT_SIZE (1 << LOG_RT_SIZE) /* Power of 2, may be != MAX_ROOT_SETS */
+#endif
+
+/* Lists of all heap blocks and free lists	*/
+/* as well as other random data structures	*/
+/* that should not be scanned by the		*/
+/* collector.					*/
 /* These are grouped together in a struct	*/
 /* so that they can be easily skipped by the	*/
 /* GC_mark routine.				*/
@@ -904,7 +978,10 @@ struct _GC_arrays {
   word _mem_freed;
   	/* Number of explicitly deallocated words of memory	*/
   	/* since last collection.				*/
-  	
+  mark_proc _mark_procs[MAX_MARK_PROCS];
+  	/* Table of user-defined mark procedures.  There is	*/
+	/* a small number of these, which can be referenced	*/
+	/* by DS_PROC mark descriptors.  See gc_mark.h.		*/
   ptr_t _objfreelist[MAXOBJSZ+1];
 			  /* free list for objects */
   ptr_t _aobjfreelist[MAXOBJSZ+1];
@@ -986,17 +1063,24 @@ struct _GC_arrays {
 				/* GC_modws_valid_offsets[i%sizeof(word)] */
 #   endif
 # ifdef STUBBORN_ALLOC
-      page_hash_table _changed_pages;
+    page_hash_table _changed_pages;
         /* Stubborn object pages that were changes since last call to	*/
 	/* GC_read_changed.						*/
-      page_hash_table _prev_changed_pages;
+    page_hash_table _prev_changed_pages;
         /* Stubborn object pages that were changes before last call to	*/
 	/* GC_read_changed.						*/
 # endif
 # if defined(PROC_VDB) || defined(MPROTECT_VDB)
-      page_hash_table _grungy_pages; /* Pages that were dirty at last 	   */
+    page_hash_table _grungy_pages; /* Pages that were dirty at last 	   */
 				     /* GC_read_dirty.			   */
 # endif
+# ifdef MPROTECT_VDB
+    VOLATILE page_hash_table _dirty_pages;	
+			/* Pages dirtied since last GC_read_dirty. */
+# endif
+# ifdef PROC_VDB
+    page_hash_table _written_pages;	/* Pages ever dirtied	*/
+# endif
 # ifdef LARGE_CONFIG
 #   if CPP_WORDSZ > 32
 #     define MAX_HEAP_SECTS 4096 	/* overflows at roughly 64 GB	   */
@@ -1013,6 +1097,11 @@ struct _GC_arrays {
     ptr_t _heap_bases[MAX_HEAP_SECTS];
     		/* Start address of memory regions obtained from kernel. */
 # endif
+  struct roots _static_roots[MAX_ROOT_SETS];
+# ifndef MSWIN32
+    struct roots * _root_index[RT_SIZE];
+# endif
+  struct exclusion _excl_table[MAX_EXCLUSIONS];
   /* Block header index; see gc_headers.h */
   bottom_index * _all_nils;
   bottom_index * _top_index [TOP_SZ];
@@ -1049,6 +1138,7 @@ GC_API GC_FAR struct _GC_arrays GC_arrays;
 # define GC_words_finalized GC_arrays._words_finalized
 # define GC_non_gc_bytes_at_gc GC_arrays._non_gc_bytes_at_gc
 # define GC_mem_freed GC_arrays._mem_freed
+# define GC_mark_procs GC_arrays._mark_procs
 # define GC_heapsize GC_arrays._heapsize
 # define GC_max_heapsize GC_arrays._max_heapsize
 # define GC_words_allocd_before_gc GC_arrays._words_allocd_before_gc
@@ -1057,11 +1147,20 @@ GC_API GC_FAR struct _GC_arrays GC_arrays;
 # ifdef MSWIN32
 #   define GC_heap_bases GC_arrays._heap_bases
 # endif
+# define GC_static_roots GC_arrays._static_roots
+# define GC_root_index GC_arrays._root_index
+# define GC_excl_table GC_arrays._excl_table
 # define GC_all_nils GC_arrays._all_nils
 # define GC_top_index GC_arrays._top_index
 # if defined(PROC_VDB) || defined(MPROTECT_VDB)
 #   define GC_grungy_pages GC_arrays._grungy_pages
 # endif
+# ifdef MPROTECT_VDB
+#   define GC_dirty_pages GC_arrays._dirty_pages
+# endif
+# ifdef PROC_VDB
+#   define GC_written_pages GC_arrays._written_pages
+# endif
 # ifdef GATHERSTATS
 #   define GC_composite_in_use GC_arrays._composite_in_use
 #   define GC_atomic_in_use GC_arrays._atomic_in_use
@@ -1073,11 +1172,9 @@ GC_API GC_FAR struct _GC_arrays GC_arrays;
 # define beginGC_arrays ((ptr_t)(&GC_arrays))
 # define endGC_arrays (((ptr_t)(&GC_arrays)) + (sizeof GC_arrays))
 
-GC_API word GC_fo_entries;
-
+/* Object kinds: */
 # define MAXOBJKINDS 16
 
-/* Object kinds: */
 extern struct obj_kind {
    ptr_t *ok_freelist;	/* Array of free listheaders for this kind of object */
    			/* Point either to GC_arrays or to storage allocated */
@@ -1091,8 +1188,14 @@ extern struct obj_kind {
    			/* Add object size in bytes to descriptor 	*/
    			/* template to obtain descriptor.  Otherwise	*/
    			/* template is used as is.			*/
-   GC_bool ok_init;     /* Clear objects before putting them on the free list. */
+   GC_bool ok_init;   /* Clear objects before putting them on the free list. */
 } GC_obj_kinds[MAXOBJKINDS];
+
+# define endGC_obj_kinds (((ptr_t)(&GC_obj_kinds)) + (sizeof GC_obj_kinds))
+
+# define end_gc_area ((ptr_t)endGC_arrays == (ptr_t)(&GC_obj_kinds) ? \
+			endGC_obj_kinds : endGC_arrays)
+
 /* Predefined kinds: */
 # define PTRFREE 0
 # define NORMAL  1
@@ -1108,6 +1211,8 @@ extern struct obj_kind {
 
 extern int GC_n_kinds;
 
+GC_API word GC_fo_entries;
+
 extern word GC_n_heap_sects;	/* Number of separately added heap	*/
 				/* sections.				*/
 
@@ -1142,17 +1247,19 @@ extern GC_bool GC_is_initialized;	/* GC_init() has been run.	*/
 extern GC_bool GC_objects_are_marked;	/* There are marked objects in  */
 					/* the heap.			*/
 
-extern GC_bool GC_incremental; /* Using incremental/generational collection. */
+#ifndef SMALL_CONFIG
+  extern GC_bool GC_incremental;
+			/* Using incremental/generational collection. */
+#else
+# define GC_incremental TRUE
+			/* Hopefully allow optimizer to remove some code. */
+#endif
 
 extern GC_bool GC_dirty_maintained;
 				/* Dirty bits are being maintained, 	*/
 				/* either for incremental collection,	*/
 				/* or to limit the root set.		*/
 
-# ifndef PCR
-    extern ptr_t GC_stackbottom;	/* Cool end of user stack	*/
-# endif
-
 extern word GC_root_size;	/* Total size of registered root sections */
 
 extern GC_bool GC_debugging_started;	/* GC_debug_malloc has been called. */ 
@@ -1216,7 +1323,8 @@ void GC_mark_from_mark_stack(); /* Mark from everything on the mark stack. */
 				/* Return after about one pages worth of   */
 				/* work.				   */
 GC_bool GC_mark_stack_empty();
-GC_bool GC_mark_some();	/* Perform about one pages worth of marking	*/
+GC_bool GC_mark_some(/* cold_gc_frame */);
+			/* Perform about one pages worth of marking	*/
 			/* work of whatever kind is needed.  Returns	*/
 			/* quickly if no collection is in progress.	*/
 			/* Return TRUE if mark phase finished.		*/
@@ -1238,7 +1346,31 @@ void GC_push_dirty(/*b,t*/);      /* Push all possibly changed	 	*/
 				/* on the third arg.			*/
 void GC_push_all_stack(/*b,t*/);    /* As above, but consider		*/
 				    /*  interior pointers as valid  	*/
-void GC_push_roots(/* GC_bool all */); /* Push all or dirty roots.	*/
+void GC_push_all_eager(/*b,t*/);    /* Same as GC_push_all_stack, but   */
+				    /* ensures that stack is scanned	*/
+				    /* immediately, not just scheduled  */
+				    /* for scanning.			*/
+#ifndef THREADS
+  void GC_push_all_stack_partially_eager(/* bottom, top, cold_gc_frame */);
+			/* Similar to GC_push_all_eager, but only the	*/
+			/* part hotter than cold_gc_frame is scanned	*/
+			/* immediately.  Needed to endure that callee-	*/
+			/* save registers are not missed.		*/
+#else
+  /* In the threads case, we push part of the current thread stack	*/
+  /* with GC_push_all_eager when we push the registers.  This gets the  */
+  /* callee-save registers that may disappear.  The remainder of the	*/
+  /* stacks are scheduled for scanning in *GC_push_other_roots, which	*/
+  /* is thread-package-specific.					*/
+#endif
+void GC_push_current_stack(/* ptr_t cold_gc_frame */);
+			/* Push enough of the current stack eagerly to	*/
+			/* ensure that callee-save registers saved in	*/
+			/* GC frames are scanned.			*/
+			/* In the non-threads case, schedule entire	*/
+			/* stack for scanning.				*/
+void GC_push_roots(/* GC_bool all, ptr_t cold_gc_frame */);
+			/* Push all or dirty roots.	*/
 extern void (*GC_push_other_roots)();
 			/* Push system or application specific roots	*/
 			/* onto the mark stack.  In some environments	*/
@@ -1403,7 +1535,7 @@ GC_bool GC_collect_or_expand(/* needed_blocks */);
 				/* blocks available.  Should be called	*/
 				/* until the blocks are available or	*/
 				/* until it fails by returning FALSE.	*/
-void GC_init();			/* Initialize collector.		*/
+GC_API void GC_init();		/* Initialize collector.		*/
 void GC_collect_a_little_inner(/* int n */);
 				/* Do n units worth of garbage 		*/
 				/* collection work, if appropriate.	*/
@@ -1491,7 +1623,7 @@ void GC_write_hint(/* struct hblk * h  */);
 void GC_dirty_init();
 
 /* Slow/general mark bit manipulation: */
-GC_bool GC_is_marked();
+GC_API GC_bool GC_is_marked();
 void GC_clear_mark_bit();
 void GC_set_mark_bit();