diff --git a/boehm-gc/include/private/specific.h b/boehm-gc/include/private/specific.h
index 60c152c6fd0e40b21268a9f7197a0d19ac423a71..399f84f58d6ae25ca51edf4fc26c70743bde91bf 100644
--- a/boehm-gc/include/private/specific.h
+++ b/boehm-gc/include/private/specific.h
@@ -27,16 +27,22 @@
 #define TS_HASH_SIZE 1024
 #define HASH(n) (((((long)n) >> 8) ^ (long)n) & (TS_HASH_SIZE - 1))
 
+/* An entry describing a thread-specific value for a given thread.	*/
+/* All such accessible structures preserve the invariant that if either	*/
+/* thread is a valid pthread id or qtid is a valid "quick tread id"	*/
+/* for a thread, then value holds the corresponding thread specific	*/
+/* value.  This invariant must be preserved at ALL times, since		*/
+/* asynchronous reads are allowed.					*/
 typedef struct thread_specific_entry {
 	unsigned long qtid;	/* quick thread id, only for cache */
 	void * value;
-	pthread_t thread;
 	struct thread_specific_entry *next;
+	pthread_t thread;
 } tse;
 
 
 /* We represent each thread-specific datum as two tables.  The first is	*/
-/* a cache, index by a "quick thread identifier".  The "quick" thread	*/
+/* a cache, indexed by a "quick thread identifier".  The "quick" thread	*/
 /* identifier is an easy to compute value, which is guaranteed to	*/
 /* determine the thread, though a thread may correspond to more than	*/
 /* one value.  We typically use the address of a page in the stack.	*/
@@ -45,12 +51,15 @@ typedef struct thread_specific_entry {
 
 /* Return the "quick thread id".  Default version.  Assumes page size,	*/
 /* or at least thread stack separation, is at least 4K.			*/
-static __inline__ long quick_thread_id() {
+/* Must be defined so that it never returns 0.  (Page 0 can't really	*/
+/* be part of any stack, since that would make 0 a valid stack pointer.)*/
+static __inline__ unsigned long quick_thread_id() {
     int dummy;
-    return (long)(&dummy) >> 12;
+    return (unsigned long)(&dummy) >> 12;
 }
 
-#define INVALID_QTID ((unsigned long)(-1))
+#define INVALID_QTID ((unsigned long)0)
+#define INVALID_THREADID ((pthread_t)0)
 
 typedef struct thread_specific_data {
     tse * volatile cache[TS_CACHE_SIZE];
@@ -76,7 +85,10 @@ static __inline__ void * PREFIXED(getspecific) (tsd * key) {
     unsigned hash_val = CACHE_HASH(qtid);
     tse * volatile * entry_ptr = key -> cache + hash_val;
     tse * entry = *entry_ptr;   /* Must be loaded only once.	*/
-    if (entry -> qtid == qtid) return entry -> value;
+    if (entry -> qtid == qtid) {
+      GC_ASSERT(entry -> thread == pthread_self());
+      return entry -> value;
+    }
     return PREFIXED(slow_getspecific) (key, qtid, entry_ptr);
 }
 
diff --git a/boehm-gc/linux_threads.c b/boehm-gc/linux_threads.c
index 0bc0f131b106b50ace796ea2ca0318e4839c58a0..c968e7cbfd4a934ff6010287ffce025932ae7f3c 100644
--- a/boehm-gc/linux_threads.c
+++ b/boehm-gc/linux_threads.c
@@ -231,15 +231,16 @@ static void return_freelists(ptr_t *fl, ptr_t *gfl)
 	nwords = i * (GRANULARITY/sizeof(word));
         qptr = fl + i;	
 	q = *qptr;
-	if ((word)q < HBLKSIZE) continue;
-	if (gfl[nwords] == 0) {
+	if ((word)q >= HBLKSIZE) {
+	  if (gfl[nwords] == 0) {
 	    gfl[nwords] = q;
-	} else {
+	  } else {
 	    /* Concatenate: */
 	    for (; (word)q >= HBLKSIZE; qptr = &(obj_link(q)), q = *qptr);
 	    GC_ASSERT(0 == q);
 	    *qptr = gfl[nwords];
 	    gfl[nwords] = fl[i];
+	  }
 	}
 	/* Clear fl[i], since the thread structure may hang around.	*/
 	/* Do it in a way that is likely to trap if we access it.	*/
@@ -412,6 +413,7 @@ GC_PTR GC_local_gcj_malloc(size_t bytes,
 	    /* A memory barrier is probably never needed, since the 	*/
 	    /* action of stopping this thread will cause prior writes	*/
 	    /* to complete.						*/
+	    GC_ASSERT(((void * volatile *)result)[1] == 0); 
 	    *(void * volatile *)result = ptr_to_struct_containing_descr; 
 	    return result;
 	} else if ((word)my_entry - 1 < DIRECT_GRANULES) {
@@ -544,7 +546,7 @@ static void start_mark_threads()
 	  ABORT("pthread_attr_getstacksize failed\n");
 	if (old_size < MIN_STACK_SIZE) {
 	  if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
-	    ABORT("pthread_attr_getstacksize failed\n");
+		  ABORT("pthread_attr_setstacksize failed\n");
 	}
       }
 #   endif /* HPUX */
diff --git a/boehm-gc/mark.c b/boehm-gc/mark.c
index 170c279b718e101ad5db20fa1877d8babcfcb020..eb5b9eeb34d9ae11bc6b5e7f75f8c1de3ec80f18 100644
--- a/boehm-gc/mark.c
+++ b/boehm-gc/mark.c
@@ -546,13 +546,13 @@ mse * mark_stack_limit;
           /* Large length.					        */
           /* Process part of the range to avoid pushing too much on the	*/
           /* stack.							*/
+	  GC_ASSERT(descr < GC_greatest_plausible_heap_addr
+			    - GC_least_plausible_heap_addr);
 #	  ifdef PARALLEL_MARK
 #	    define SHARE_BYTES 2048
 	    if (descr > SHARE_BYTES && GC_parallel
 		&& mark_stack_top < mark_stack_limit - 1) {
 	      int new_size = (descr/2) & ~(sizeof(word)-1);
-	      GC_ASSERT(descr < GC_greatest_plausible_heap_addr
-			        - GC_least_plausible_heap_addr);
 	      mark_stack_top -> mse_start = current_p;
 	      mark_stack_top -> mse_descr = new_size + sizeof(word);
 					/* makes sure we handle 	*/
diff --git a/boehm-gc/reclaim.c b/boehm-gc/reclaim.c
index 846215edb4f87370ae33b3e939bd5358535c6ef7..0418e9dea652ca04e8401022d98ebed492a3338a 100644
--- a/boehm-gc/reclaim.c
+++ b/boehm-gc/reclaim.c
@@ -861,6 +861,25 @@ void GC_print_block_list()
 
 #endif /* NO_DEBUGGING */
 
+/*
+ * Clear all obj_link pointers in the list of free objects *flp.
+ * Clear *flp.
+ * This must be done before dropping a list of free gcj-style objects,
+ * since may otherwise end up with dangling "descriptor" pointers.
+ * It may help for other pointer-containg objects.
+ */
+void GC_clear_fl_links(flp)
+ptr_t *flp;
+{
+    ptr_t next = *flp;
+
+    while (0 != next) {
+       *flp = 0;
+       flp = &(obj_link(next));
+       next = *flp;
+    }
+}
+
 /*
  * Perform GC_reclaim_block on the entire heap, after first clearing
  * small object free lists (if we are not just looking for leaks).
@@ -875,17 +894,24 @@ int report_if_found;		/* Abort if a GC_reclaimable object is found */
 #   endif
     /* Clear reclaim- and free-lists */
       for (kind = 0; kind < GC_n_kinds; kind++) {
-        register ptr_t *fop;
-        register ptr_t *lim;
-        register struct hblk ** rlp;
-        register struct hblk ** rlim;
-        register struct hblk ** rlist = GC_obj_kinds[kind].ok_reclaim_list;
+        ptr_t *fop;
+        ptr_t *lim;
+        struct hblk ** rlp;
+        struct hblk ** rlim;
+        struct hblk ** rlist = GC_obj_kinds[kind].ok_reclaim_list;
+	GC_bool should_clobber = (GC_obj_kinds[kind].ok_descriptor != 0);
         
         if (rlist == 0) continue;	/* This kind not used.	*/
         if (!report_if_found) {
             lim = &(GC_obj_kinds[kind].ok_freelist[MAXOBJSZ+1]);
 	    for( fop = GC_obj_kinds[kind].ok_freelist; fop < lim; fop++ ) {
-	      *fop = 0;
+	      if (*fop != 0) {
+		if (should_clobber) {
+		  GC_clear_fl_links(fop);
+		} else {
+	          *fop = 0;
+		}
+	      }
 	    }
 	} /* otherwise free list objects are marked, 	*/
 	  /* and its safe to leave them			*/
diff --git a/boehm-gc/specific.c b/boehm-gc/specific.c
index 48b53ac9f8ce86c710d400e4e45f6a0cbf5be989..2c40c2b44a2623996d0a0fbfe34607e4bafb585e 100644
--- a/boehm-gc/specific.c
+++ b/boehm-gc/specific.c
@@ -16,17 +16,27 @@
 #include "private/gc_priv.h" /* For GC_compare_and_exchange, GC_memory_barrier */
 #include "private/specific.h"
 
-static tse invalid_tse; 	/* 0 qtid is guaranteed to be invalid	*/
+static tse invalid_tse = {INVALID_QTID, 0, 0, INVALID_THREADID};
+			/* A thread-specific data entry which will never	*/
+			/* appear valid to a reader.  Used to fill in empty	*/
+			/* cache entries to avoid a check for 0.		*/
 
 int PREFIXED(key_create) (tsd ** key_ptr, void (* destructor)(void *)) {
     int i;
     tsd * result = (tsd *)MALLOC_CLEAR(sizeof (tsd));
 
+    /* A quick alignment check, since we need atomic stores */
+      GC_ASSERT((unsigned long)(&invalid_tse.next) % sizeof(tse *) == 0);
     if (0 == result) return ENOMEM;
     pthread_mutex_init(&(result -> lock), NULL);
     for (i = 0; i < TS_CACHE_SIZE; ++i) {
 	result -> cache[i] = &invalid_tse;
     }
+#   ifdef GC_ASSERTIONS
+      for (i = 0; i < TS_HASH_SIZE; ++i) {
+	GC_ASSERT(result -> hash[i] == 0);
+      }
+#   endif
     *key_ptr = result;
     return 0;
 }
@@ -36,12 +46,14 @@ int PREFIXED(setspecific) (tsd * key, void * value) {
     int hash_val = HASH(self);
     volatile tse * entry = (volatile tse *)MALLOC_CLEAR(sizeof (tse));
     
+    GC_ASSERT(self != INVALID_THREADID);
     if (0 == entry) return ENOMEM;
     pthread_mutex_lock(&(key -> lock));
     /* Could easily check for an existing entry here.	*/
     entry -> next = key -> hash[hash_val];
     entry -> thread = self;
     entry -> value = value;
+    GC_ASSERT(entry -> qtid == INVALID_QTID);
     /* There can only be one writer at a time, but this needs to be	*/
     /* atomic with respect to concurrent readers.			*/ 
     *(volatile tse **)(key -> hash + hash_val) = entry;
@@ -70,6 +82,10 @@ void PREFIXED(remove_specific) (tsd * key) {
 	*link = entry -> next;
 	/* Atomic! concurrent accesses still work.	*/
 	/* They must, since readers don't lock.		*/
+	/* We shouldn't need a volatile access here,	*/
+	/* since both this and the preceding write 	*/
+	/* should become visible no later than		*/
+	/* the pthread_mutex_unlock() call.		*/
     }
     /* If we wanted to deallocate the entry, we'd first have to clear 	*/
     /* any cache entries pointing to it.  That probably requires	*/
@@ -91,6 +107,7 @@ void *  PREFIXED(slow_getspecific) (tsd * key, unsigned long qtid,
     unsigned hash_val = HASH(self);
     tse *entry = key -> hash[hash_val];
 
+    GC_ASSERT(qtid != INVALID_QTID);
     while (entry != NULL && entry -> thread != self) {
 	entry = entry -> next;
     } 
@@ -99,6 +116,8 @@ void *  PREFIXED(slow_getspecific) (tsd * key, unsigned long qtid,
         entry -> qtid = qtid;
 		/* It's safe to do this asynchronously.  Either value 	*/
 		/* is safe, though may produce spurious misses.		*/
+		/* We're replacing one qtid with another one for the	*/
+		/* same thread.						*/
 	*cache_ptr = entry;
 		/* Again this is safe since pointer assignments are 	*/
 		/* presumed atomic, and either pointer is valid.	*/