Merge refs/heads/upstream from master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev 
diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c
index 55352ed..53c192a 100644
--- a/arch/sparc/kernel/setup.c
+++ b/arch/sparc/kernel/setup.c
@@ -32,7 +32,6 @@
 #include <linux/spinlock.h>
 #include <linux/root_dev.h>
 
-#include <asm/segment.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/tick14.c b/arch/sparc/kernel/tick14.c
index fd8005a..591547a 100644
--- a/arch/sparc/kernel/tick14.c
+++ b/arch/sparc/kernel/tick14.c
@@ -19,7 +19,6 @@
 #include <linux/interrupt.h>
 
 #include <asm/oplib.h>
-#include <asm/segment.h>
 #include <asm/timer.h>
 #include <asm/mostek.h>
 #include <asm/system.h>
diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c
index 6486cbf..3b759ae 100644
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c
@@ -32,7 +32,6 @@
 #include <linux/profile.h>
 
 #include <asm/oplib.h>
-#include <asm/segment.h>
 #include <asm/timer.h>
 #include <asm/mostek.h>
 #include <asm/system.h>
diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c
index 37f4107..2bbd53f 100644
--- a/arch/sparc/mm/fault.c
+++ b/arch/sparc/mm/fault.c
@@ -23,7 +23,6 @@
 #include <linux/module.h>
 
 #include <asm/system.h>
-#include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/memreg.h>
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index ec2e050..c03baba 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -25,7 +25,6 @@
 #include <linux/bootmem.h>
 
 #include <asm/system.h>
-#include <asm/segment.h>
 #include <asm/vac-ops.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 88332f0..cecdc0a 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -21,6 +21,7 @@
 #include <asm/visasm.h>
 #include <asm/estate.h>
 #include <asm/auxio.h>
+#include <asm/sfafsr.h>
 
 #define curptr      g6
 
@@ -690,79 +691,102 @@
 	retl
 	 nop
 
-	/* These next few routines must be sure to clear the
-	 * SFSR FaultValid bit so that the fast tlb data protection
-	 * handler does not flush the wrong context and lock up the
-	 * box.
+	/* We need to carefully read the error status, ACK
+	 * the errors, prevent recursive traps, and pass the
+	 * information on to C code for logging.
+	 *
+	 * We pass the AFAR in as-is, and we encode the status
+	 * information as described in asm-sparc64/sfafsr.h
 	 */
-	.globl		__do_data_access_exception
-	.globl		__do_data_access_exception_tl1
-__do_data_access_exception_tl1:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g5
-	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
-	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
-	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
+	.globl		__spitfire_access_error
+__spitfire_access_error:
+	/* Disable ESTATE error reporting so that we do not
+	 * take recursive traps and RED state the processor.
+	 */
+	stxa		%g0, [%g0] ASI_ESTATE_ERROR_EN
 	membar		#Sync
-	ba,pt		%xcc, winfix_dax
-	 rdpr		%tpc, %g3
-__do_data_access_exception:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g5
-	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
-	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
-	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
-	membar		#Sync
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		data_access_exception
-	 add		%sp, PTREGS_OFF, %o0
-	ba,pt		%xcc, rtrap
-	 clr		%l6
 
-	.globl		__do_instruction_access_exception
-	.globl		__do_instruction_access_exception_tl1
-__do_instruction_access_exception_tl1:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g5
-	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
-	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
-	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
+	mov		UDBE_UE, %g1
+	ldxa		[%g0] ASI_AFSR, %g4	! Get AFSR
+
+	/* __spitfire_cee_trap branches here with AFSR in %g4 and
+	 * UDBE_CE in %g1.  It only clears ESTATE_ERR_CE in the
+	 * ESTATE Error Enable register.
+	 */
+__spitfire_cee_trap_continue:
+	ldxa		[%g0] ASI_AFAR, %g5	! Get AFAR
+
+	rdpr		%tt, %g3
+	and		%g3, 0x1ff, %g3		! Paranoia
+	sllx		%g3, SFSTAT_TRAP_TYPE_SHIFT, %g3
+	or		%g4, %g3, %g4
+	rdpr		%tl, %g3
+	cmp		%g3, 1
+	mov		1, %g3
+	bleu		%xcc, 1f
+	 sllx		%g3, SFSTAT_TL_GT_ONE_SHIFT, %g3
+
+	or		%g4, %g3, %g4
+
+	/* Read in the UDB error register state, clearing the
+	 * sticky error bits as-needed.  We only clear them if
+	 * the UE bit is set.  Likewise, __spitfire_cee_trap
+	 * below will only do so if the CE bit is set.
+	 *
+	 * NOTE: UltraSparc-I/II have high and low UDB error
+	 *       registers, corresponding to the two UDB units
+	 *       present on those chips.  UltraSparc-IIi only
+	 *       has a single UDB, called "SDB" in the manual.
+	 *       For IIi the upper UDB register always reads
+	 *       as zero so for our purposes things will just
+	 *       work with the checks below.
+	 */
+1:	ldxa		[%g0] ASI_UDBH_ERROR_R, %g3
+	and		%g3, 0x3ff, %g7		! Paranoia
+	sllx		%g7, SFSTAT_UDBH_SHIFT, %g7
+	or		%g4, %g7, %g4
+	andcc		%g3, %g1, %g3		! UDBE_UE or UDBE_CE
+	be,pn		%xcc, 1f
+	 nop
+	stxa		%g3, [%g0] ASI_UDB_ERROR_W
 	membar		#Sync
-	sethi		%hi(109f), %g7
+
+1:	mov		0x18, %g3
+	ldxa		[%g3] ASI_UDBL_ERROR_R, %g3
+	and		%g3, 0x3ff, %g7		! Paranoia
+	sllx		%g7, SFSTAT_UDBL_SHIFT, %g7
+	or		%g4, %g7, %g4
+	andcc		%g3, %g1, %g3		! UDBE_UE or UDBE_CE
+	be,pn		%xcc, 1f
+	 nop
+	mov		0x18, %g7
+	stxa		%g3, [%g7] ASI_UDB_ERROR_W
+	membar		#Sync
+
+1:	/* Ok, now that we've latched the error state,
+	 * clear the sticky bits in the AFSR.
+	 */
+	stxa		%g4, [%g0] ASI_AFSR
+	membar		#Sync
+
+	rdpr		%tl, %g2
+	cmp		%g2, 1
+	rdpr		%pil, %g2
+	bleu,pt		%xcc, 1f
+	 wrpr		%g0, 15, %pil
+
 	ba,pt		%xcc, etraptl1
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		instruction_access_exception_tl1
-	 add		%sp, PTREGS_OFF, %o0
-	ba,pt		%xcc, rtrap
-	 clr		%l6
+	 rd		%pc, %g7
 
-__do_instruction_access_exception:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g5
-	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
-	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
-	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
-	membar		#Sync
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
+	ba,pt		%xcc, 2f
+	 nop
+
+1:	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+
+2:	mov		%l4, %o1
 	mov		%l5, %o2
-	call		instruction_access_exception
+	call		spitfire_access_error
 	 add		%sp, PTREGS_OFF, %o0
 	ba,pt		%xcc, rtrap
 	 clr		%l6
@@ -784,79 +808,124 @@
 	 * as it is the only situation where we can safely record
 	 * and log.  For trap level >1 we just clear the CE bit
 	 * in the AFSR and return.
-	 */
-
-	/* Our trap handling infrastructure allows us to preserve
-	 * two 64-bit values during etrap for arguments to
-	 * subsequent C code.  Therefore we encode the information
-	 * as follows:
 	 *
-	 * value 1) Full 64-bits of AFAR
-	 * value 2) Low 33-bits of AFSR, then bits 33-->42
-	 *          are UDBL error status and bits 43-->52
-	 *          are UDBH error status
+	 * This is just like __spiftire_access_error above, but it
+	 * specifically handles correctable errors.  If an
+	 * uncorrectable error is indicated in the AFSR we
+	 * will branch directly above to __spitfire_access_error
+	 * to handle it instead.  Uncorrectable therefore takes
+	 * priority over correctable, and the error logging
+	 * C code will notice this case by inspecting the
+	 * trap type.
 	 */
-	.align	64
-	.globl	cee_trap
-cee_trap:
-	ldxa	[%g0] ASI_AFSR, %g1		! Read AFSR
-	ldxa	[%g0] ASI_AFAR, %g2		! Read AFAR
-	sllx	%g1, 31, %g1			! Clear reserved bits
-	srlx	%g1, 31, %g1			! in AFSR
+	.globl		__spitfire_cee_trap
+__spitfire_cee_trap:
+	ldxa		[%g0] ASI_AFSR, %g4	! Get AFSR
+	mov		1, %g3
+	sllx		%g3, SFAFSR_UE_SHIFT, %g3
+	andcc		%g4, %g3, %g0		! Check for UE
+	bne,pn		%xcc, __spitfire_access_error
+	 nop
 
-	/* NOTE: UltraSparc-I/II have high and low UDB error
-	 *       registers, corresponding to the two UDB units
-	 *       present on those chips.  UltraSparc-IIi only
-	 *       has a single UDB, called "SDB" in the manual.
-	 *       For IIi the upper UDB register always reads
-	 *       as zero so for our purposes things will just
-	 *       work with the checks below.
+	/* Ok, in this case we only have a correctable error.
+	 * Indicate we only wish to capture that state in register
+	 * %g1, and we only disable CE error reporting unlike UE
+	 * handling which disables all errors.
 	 */
-	ldxa	[%g0] ASI_UDBL_ERROR_R, %g3	! Read UDB-Low error status
-	andcc	%g3, (1 << 8), %g4		! Check CE bit
-	sllx	%g3, (64 - 10), %g3		! Clear reserved bits
-	srlx	%g3, (64 - 10), %g3		! in UDB-Low error status
+	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g3
+	andn		%g3, ESTATE_ERR_CE, %g3
+	stxa		%g3, [%g0] ASI_ESTATE_ERROR_EN
+	membar		#Sync
 
-	sllx	%g3, (33 + 0), %g3		! Shift up to encoding area
-	or	%g1, %g3, %g1			! Or it in
-	be,pn	%xcc, 1f			! Branch if CE bit was clear
+	/* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */
+	ba,pt		%xcc, __spitfire_cee_trap_continue
+	 mov		UDBE_CE, %g1
+
+	.globl		__spitfire_data_access_exception
+	.globl		__spitfire_data_access_exception_tl1
+__spitfire_data_access_exception_tl1:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	mov		DMMU_SFAR, %g5
+	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
+	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
+	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
+	membar		#Sync
+	rdpr		%tt, %g3
+	cmp		%g3, 0x80		! first win spill/fill trap
+	blu,pn		%xcc, 1f
+	 cmp		%g3, 0xff		! last win spill/fill trap
+	bgu,pn		%xcc, 1f
 	 nop
-	stxa	%g4, [%g0] ASI_UDB_ERROR_W	! Clear CE sticky bit in UDBL
-	membar	#Sync				! Synchronize ASI stores
-1:	mov	0x18, %g5			! Addr of UDB-High error status
-	ldxa	[%g5] ASI_UDBH_ERROR_R, %g3	! Read it
+	ba,pt		%xcc, winfix_dax
+	 rdpr		%tpc, %g3
+1:	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etraptl1
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_data_access_exception_tl1
+	 add		%sp, PTREGS_OFF, %o0
+	ba,pt		%xcc, rtrap
+	 clr		%l6
 
-	andcc	%g3, (1 << 8), %g4		! Check CE bit
-	sllx	%g3, (64 - 10), %g3		! Clear reserved bits
-	srlx	%g3, (64 - 10), %g3		! in UDB-High error status
-	sllx	%g3, (33 + 10), %g3		! Shift up to encoding area
-	or	%g1, %g3, %g1			! Or it in
-	be,pn	%xcc, 1f			! Branch if CE bit was clear
-	 nop
-	nop
+__spitfire_data_access_exception:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	mov		DMMU_SFAR, %g5
+	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
+	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
+	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_data_access_exception
+	 add		%sp, PTREGS_OFF, %o0
+	ba,pt		%xcc, rtrap
+	 clr		%l6
 
-	stxa	%g4, [%g5] ASI_UDB_ERROR_W	! Clear CE sticky bit in UDBH
-	membar	#Sync				! Synchronize ASI stores
-1:	mov	1, %g5				! AFSR CE bit is
-	sllx	%g5, 20, %g5			! bit 20
-	stxa	%g5, [%g0] ASI_AFSR		! Clear CE sticky bit in AFSR
-	membar	#Sync				! Synchronize ASI stores
-	sllx	%g2, (64 - 41), %g2		! Clear reserved bits
-	srlx	%g2, (64 - 41), %g2		! in latched AFAR
+	.globl		__spitfire_insn_access_exception
+	.globl		__spitfire_insn_access_exception_tl1
+__spitfire_insn_access_exception_tl1:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	ldxa		[%g3] ASI_IMMU, %g4	! Get SFSR
+	rdpr		%tpc, %g5		! IMMU has no SFAR, use TPC
+	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etraptl1
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_insn_access_exception_tl1
+	 add		%sp, PTREGS_OFF, %o0
+	ba,pt		%xcc, rtrap
+	 clr		%l6
 
-	andn	%g2, 0x0f, %g2			! Finish resv bit clearing
-	mov	%g1, %g4			! Move AFSR+UDB* into save reg
-	mov	%g2, %g5			! Move AFAR into save reg
-	rdpr	%pil, %g2
-	wrpr	%g0, 15, %pil
-	ba,pt	%xcc, etrap_irq
-	 rd	%pc, %g7
-	mov	%l4, %o0
-
-	mov	%l5, %o1
-	call	cee_log
-	 add	%sp, PTREGS_OFF, %o2
-	ba,a,pt	%xcc, rtrap_irq
+__spitfire_insn_access_exception:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	ldxa		[%g3] ASI_IMMU, %g4	! Get SFSR
+	rdpr		%tpc, %g5		! IMMU has no SFAR, use TPC
+	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_insn_access_exception
+	 add		%sp, PTREGS_OFF, %o0
+	ba,pt		%xcc, rtrap
+	 clr		%l6
 
 	/* Capture I/D/E-cache state into per-cpu error scoreboard.
 	 *
diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c
index 2803bc7..425c60c 100644
--- a/arch/sparc64/kernel/pci_iommu.c
+++ b/arch/sparc64/kernel/pci_iommu.c
@@ -466,7 +466,7 @@
 		if (!limit)
 			break;
 		udelay(1);
-		membar("#LoadLoad");
+		rmb();
 	}
 	if (!limit)
 		printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 07424b0..6625543 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -103,7 +103,7 @@
 		 * other cpus see our increasing idleness for the buddy
 		 * redistribution algorithm.  -DaveM
 		 */
-		membar("#StoreStore | #StoreLoad");
+		membar_storeload_storestore();
 	}
 }
 
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c
index 89f5e01..e09ddf9 100644
--- a/arch/sparc64/kernel/sbus.c
+++ b/arch/sparc64/kernel/sbus.c
@@ -147,7 +147,7 @@
 		if (!limit)
 			break;
 		udelay(1);
-		membar("#LoadLoad");
+		rmb();
 	}
 	if (!limit)
 		printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout "
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index b7e6a91..fbdfed3 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -33,7 +33,6 @@
 #include <linux/cpu.h>
 #include <linux/initrd.h>
 
-#include <asm/segment.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/processor.h>
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index b1ed230..aecccd0 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -877,11 +877,12 @@
 			unsigned long page = (unsigned long)
 				page_address(pte_page(*ptep));
 
-			__asm__ __volatile__(
-			"	membar	#StoreStore\n"
-			"	flush	%0 + %1"
-			: : "r" (page), "r" (address & (PAGE_SIZE - 1))
-			: "memory");
+			wmb();
+			__asm__ __volatile__("flush	%0 + %1"
+					     : /* no outputs */
+					     : "r" (page),
+					       "r" (address & (PAGE_SIZE - 1))
+					     : "memory");
 		}
 		pte_unmap(ptep);
 		preempt_enable();
@@ -1292,11 +1293,12 @@
 			unsigned long page = (unsigned long)
 				page_address(pte_page(*ptep));
 
-			__asm__ __volatile__(
-			"	membar	#StoreStore\n"
-			"	flush	%0 + %1"
-			: : "r" (page), "r" (address & (PAGE_SIZE - 1))
-			: "memory");
+			wmb();
+			__asm__ __volatile__("flush	%0 + %1"
+					     : /* no outputs */
+					     : "r" (page),
+					       "r" (address & (PAGE_SIZE - 1))
+					     : "memory");
 		}
 		pte_unmap(ptep);
 		preempt_enable();
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b9b4249..b4fc6a5 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -144,7 +144,7 @@
 	current->active_mm = &init_mm;
 
 	while (!cpu_isset(cpuid, smp_commenced_mask))
-		membar("#LoadLoad");
+		rmb();
 
 	cpu_set(cpuid, cpu_online_map);
 }
@@ -184,11 +184,11 @@
 	for (i = 0; i < NUM_ITERS; i++) {
 		t0 = tick_ops->get_tick();
 		go[MASTER] = 1;
-		membar("#StoreLoad");
+		membar_storeload();
 		while (!(tm = go[SLAVE]))
-			membar("#LoadLoad");
+			rmb();
 		go[SLAVE] = 0;
-		membar("#StoreStore");
+		wmb();
 		t1 = tick_ops->get_tick();
 
 		if (t1 - t0 < best_t1 - best_t0)
@@ -221,7 +221,7 @@
 	go[MASTER] = 1;
 
 	while (go[MASTER])
-		membar("#LoadLoad");
+		rmb();
 
 	local_irq_save(flags);
 	{
@@ -273,21 +273,21 @@
 
 	/* wait for client to be ready */
 	while (!go[MASTER])
-		membar("#LoadLoad");
+		rmb();
 
 	/* now let the client proceed into his loop */
 	go[MASTER] = 0;
-	membar("#StoreLoad");
+	membar_storeload();
 
 	spin_lock_irqsave(&itc_sync_lock, flags);
 	{
 		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
 			while (!go[MASTER])
-				membar("#LoadLoad");
+				rmb();
 			go[MASTER] = 0;
-			membar("#StoreStore");
+			wmb();
 			go[SLAVE] = tick_ops->get_tick();
-			membar("#StoreLoad");
+			membar_storeload();
 		}
 	}
 	spin_unlock_irqrestore(&itc_sync_lock, flags);
@@ -927,11 +927,11 @@
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 1;
-		membar("#StoreStore | #LoadStore");
+		membar_storestore_loadstore();
 		atomic_inc(&smp_capture_registry);
 		smp_cross_call(&xcall_capture, 0, 0, 0);
 		while (atomic_read(&smp_capture_registry) != ncpus)
-			membar("#LoadLoad");
+			rmb();
 #ifdef CAPTURE_DEBUG
 		printk("done\n");
 #endif
@@ -947,7 +947,7 @@
 		       smp_processor_id());
 #endif
 		penguins_are_doing_time = 0;
-		membar("#StoreStore | #StoreLoad");
+		membar_storeload_storestore();
 		atomic_dec(&smp_capture_registry);
 	}
 }
@@ -970,9 +970,9 @@
 	save_alternate_globals(global_save);
 	prom_world(1);
 	atomic_inc(&smp_capture_registry);
-	membar("#StoreLoad | #StoreStore");
+	membar_storeload_storestore();
 	while (penguins_are_doing_time)
-		membar("#LoadLoad");
+		rmb();
 	restore_alternate_globals(global_save);
 	atomic_dec(&smp_capture_registry);
 	prom_world(0);
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 9202d92..a3ea697 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -99,17 +99,6 @@
 extern void dump_thread(struct pt_regs *, struct user *);
 extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_DEBUG_SPINLOCK)
-extern void _do_spin_lock (spinlock_t *lock, char *str);
-extern void _do_spin_unlock (spinlock_t *lock);
-extern int _spin_trylock (spinlock_t *lock);
-extern void _do_read_lock(rwlock_t *rw, char *str);
-extern void _do_read_unlock(rwlock_t *rw, char *str);
-extern void _do_write_lock(rwlock_t *rw, char *str);
-extern void _do_write_unlock(rwlock_t *rw);
-extern int _do_write_trylock(rwlock_t *rw, char *str);
-#endif
-
 extern unsigned long phys_base;
 extern unsigned long pfn_base;
 
@@ -152,18 +141,6 @@
 EXPORT_SYMBOL(cpu_online_map);
 EXPORT_SYMBOL(phys_cpu_present_map);
 
-/* Spinlock debugging library, optional. */
-#ifdef CONFIG_DEBUG_SPINLOCK
-EXPORT_SYMBOL(_do_spin_lock);
-EXPORT_SYMBOL(_do_spin_unlock);
-EXPORT_SYMBOL(_spin_trylock);
-EXPORT_SYMBOL(_do_read_lock);
-EXPORT_SYMBOL(_do_read_unlock);
-EXPORT_SYMBOL(_do_write_lock);
-EXPORT_SYMBOL(_do_write_unlock);
-EXPORT_SYMBOL(_do_write_trylock);
-#endif
-
 EXPORT_SYMBOL(smp_call_function);
 #endif /* CONFIG_SMP */
 
@@ -429,3 +406,12 @@
 EXPORT_SYMBOL(xor_vis_5);
 
 EXPORT_SYMBOL(prom_palette);
+
+/* memory barriers */
+EXPORT_SYMBOL(mb);
+EXPORT_SYMBOL(rmb);
+EXPORT_SYMBOL(wmb);
+EXPORT_SYMBOL(membar_storeload);
+EXPORT_SYMBOL(membar_storeload_storestore);
+EXPORT_SYMBOL(membar_storeload_loadload);
+EXPORT_SYMBOL(membar_storestore_loadstore);
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 0c9e54b..b280b2ef 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -33,6 +33,7 @@
 #include <asm/dcu.h>
 #include <asm/estate.h>
 #include <asm/chafsr.h>
+#include <asm/sfafsr.h>
 #include <asm/psrcompat.h>
 #include <asm/processor.h>
 #include <asm/timer.h>
@@ -143,8 +144,7 @@
 }
 #endif
 
-void instruction_access_exception(struct pt_regs *regs,
-				  unsigned long sfsr, unsigned long sfar)
+void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
 {
 	siginfo_t info;
 
@@ -153,8 +153,8 @@
 		return;
 
 	if (regs->tstate & TSTATE_PRIV) {
-		printk("instruction_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n",
-		       sfsr, sfar);
+		printk("spitfire_insn_access_exception: SFSR[%016lx] "
+		       "SFAR[%016lx], going.\n", sfsr, sfar);
 		die_if_kernel("Iax", regs);
 	}
 	if (test_thread_flag(TIF_32BIT)) {
@@ -169,19 +169,17 @@
 	force_sig_info(SIGSEGV, &info, current);
 }
 
-void instruction_access_exception_tl1(struct pt_regs *regs,
-				      unsigned long sfsr, unsigned long sfar)
+void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
 {
 	if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
 		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
 		return;
 
 	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-	instruction_access_exception(regs, sfsr, sfar);
+	spitfire_insn_access_exception(regs, sfsr, sfar);
 }
 
-void data_access_exception(struct pt_regs *regs,
-			   unsigned long sfsr, unsigned long sfar)
+void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
 {
 	siginfo_t info;
 
@@ -207,8 +205,8 @@
 			return;
 		}
 		/* Shit... */
-		printk("data_access_exception: SFSR[%016lx] SFAR[%016lx], going.\n",
-		       sfsr, sfar);
+		printk("spitfire_data_access_exception: SFSR[%016lx] "
+		       "SFAR[%016lx], going.\n", sfsr, sfar);
 		die_if_kernel("Dax", regs);
 	}
 
@@ -220,6 +218,16 @@
 	force_sig_info(SIGSEGV, &info, current);
 }
 
+void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+{
+	if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+		       0, 0x30, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	spitfire_data_access_exception(regs, sfsr, sfar);
+}
+
 #ifdef CONFIG_PCI
 /* This is really pathetic... */
 extern volatile int pci_poke_in_progress;
@@ -253,54 +261,13 @@
 			     : "memory");
 }
 
-void do_iae(struct pt_regs *regs)
+static void spitfire_enable_estate_errors(void)
 {
-	siginfo_t info;
-
-	spitfire_clean_and_reenable_l1_caches();
-
-	if (notify_die(DIE_TRAP, "instruction access exception", regs,
-		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
-		return;
-
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_OBJERR;
-	info.si_addr = (void *)0;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
-}
-
-void do_dae(struct pt_regs *regs)
-{
-	siginfo_t info;
-
-#ifdef CONFIG_PCI
-	if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
-		spitfire_clean_and_reenable_l1_caches();
-
-		pci_poke_faulted = 1;
-
-		/* Why the fuck did they have to change this? */
-		if (tlb_type == cheetah || tlb_type == cheetah_plus)
-			regs->tpc += 4;
-
-		regs->tnpc = regs->tpc + 4;
-		return;
-	}
-#endif
-	spitfire_clean_and_reenable_l1_caches();
-
-	if (notify_die(DIE_TRAP, "data access exception", regs,
-		       0, 0x30, SIGTRAP) == NOTIFY_STOP)
-		return;
-
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_OBJERR;
-	info.si_addr = (void *)0;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
+	__asm__ __volatile__("stxa	%0, [%%g0] %1\n\t"
+			     "membar	#Sync"
+			     : /* no outputs */
+			     : "r" (ESTATE_ERR_ALL),
+			       "i" (ASI_ESTATE_ERROR_EN));
 }
 
 static char ecc_syndrome_table[] = {
@@ -338,65 +305,15 @@
 	0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a
 };
 
-/* cee_trap in entry.S encodes AFSR/UDBH/UDBL error status
- * in the following format.  The AFAR is left as is, with
- * reserved bits cleared, and is a raw 40-bit physical
- * address.
- */
-#define CE_STATUS_UDBH_UE		(1UL << (43 + 9))
-#define CE_STATUS_UDBH_CE		(1UL << (43 + 8))
-#define CE_STATUS_UDBH_ESYNDR		(0xffUL << 43)
-#define CE_STATUS_UDBH_SHIFT		43
-#define CE_STATUS_UDBL_UE		(1UL << (33 + 9))
-#define CE_STATUS_UDBL_CE		(1UL << (33 + 8))
-#define CE_STATUS_UDBL_ESYNDR		(0xffUL << 33)
-#define CE_STATUS_UDBL_SHIFT		33
-#define CE_STATUS_AFSR_MASK		(0x1ffffffffUL)
-#define CE_STATUS_AFSR_ME		(1UL << 32)
-#define CE_STATUS_AFSR_PRIV		(1UL << 31)
-#define CE_STATUS_AFSR_ISAP		(1UL << 30)
-#define CE_STATUS_AFSR_ETP		(1UL << 29)
-#define CE_STATUS_AFSR_IVUE		(1UL << 28)
-#define CE_STATUS_AFSR_TO		(1UL << 27)
-#define CE_STATUS_AFSR_BERR		(1UL << 26)
-#define CE_STATUS_AFSR_LDP		(1UL << 25)
-#define CE_STATUS_AFSR_CP		(1UL << 24)
-#define CE_STATUS_AFSR_WP		(1UL << 23)
-#define CE_STATUS_AFSR_EDP		(1UL << 22)
-#define CE_STATUS_AFSR_UE		(1UL << 21)
-#define CE_STATUS_AFSR_CE		(1UL << 20)
-#define CE_STATUS_AFSR_ETS		(0xfUL << 16)
-#define CE_STATUS_AFSR_ETS_SHIFT	16
-#define CE_STATUS_AFSR_PSYND		(0xffffUL << 0)
-#define CE_STATUS_AFSR_PSYND_SHIFT	0
-
-/* Layout of Ecache TAG Parity Syndrome of AFSR */
-#define AFSR_ETSYNDROME_7_0		0x1UL /* E$-tag bus bits  <7:0> */
-#define AFSR_ETSYNDROME_15_8		0x2UL /* E$-tag bus bits <15:8> */
-#define AFSR_ETSYNDROME_21_16		0x4UL /* E$-tag bus bits <21:16> */
-#define AFSR_ETSYNDROME_24_22		0x8UL /* E$-tag bus bits <24:22> */
-
 static char *syndrome_unknown = "<Unknown>";
 
-asmlinkage void cee_log(unsigned long ce_status,
-			unsigned long afar,
-			struct pt_regs *regs)
+static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit)
 {
-	char memmod_str[64];
-	char *p;
-	unsigned short scode, udb_reg;
+	unsigned short scode;
+	char memmod_str[64], *p;
 
-	printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
-	       "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx]\n",
-	       smp_processor_id(),
-	       (ce_status & CE_STATUS_AFSR_MASK),
-	       afar,
-	       ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL),
-	       ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL));
-
-	udb_reg = ((ce_status >> CE_STATUS_UDBL_SHIFT) & 0x3ffUL);
-	if (udb_reg & (1 << 8)) {
-		scode = ecc_syndrome_table[udb_reg & 0xff];
+	if (udbl & bit) {
+		scode = ecc_syndrome_table[udbl & 0xff];
 		if (prom_getunumber(scode, afar,
 				    memmod_str, sizeof(memmod_str)) == -1)
 			p = syndrome_unknown;
@@ -407,9 +324,8 @@
 		       smp_processor_id(), scode, p);
 	}
 
-	udb_reg = ((ce_status >> CE_STATUS_UDBH_SHIFT) & 0x3ffUL);
-	if (udb_reg & (1 << 8)) {
-		scode = ecc_syndrome_table[udb_reg & 0xff];
+	if (udbh & bit) {
+		scode = ecc_syndrome_table[udbh & 0xff];
 		if (prom_getunumber(scode, afar,
 				    memmod_str, sizeof(memmod_str)) == -1)
 			p = syndrome_unknown;
@@ -419,6 +335,127 @@
 		       "Memory Module \"%s\"\n",
 		       smp_processor_id(), scode, p);
 	}
+
+}
+
+static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs)
+{
+
+	printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
+	       "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n",
+	       smp_processor_id(), afsr, afar, udbl, udbh, tl1);
+
+	spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE);
+
+	/* We always log it, even if someone is listening for this
+	 * trap.
+	 */
+	notify_die(DIE_TRAP, "Correctable ECC Error", regs,
+		   0, TRAP_TYPE_CEE, SIGTRAP);
+
+	/* The Correctable ECC Error trap does not disable I/D caches.  So
+	 * we only have to restore the ESTATE Error Enable register.
+	 */
+	spitfire_enable_estate_errors();
+}
+
+static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
+	       "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
+	       smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
+
+	/* XXX add more human friendly logging of the error status
+	 * XXX as is implemented for cheetah
+	 */
+
+	spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE);
+
+	/* We always log it, even if someone is listening for this
+	 * trap.
+	 */
+	notify_die(DIE_TRAP, "Uncorrectable Error", regs,
+		   0, tt, SIGTRAP);
+
+	if (regs->tstate & TSTATE_PRIV) {
+		if (tl1)
+			dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+		die_if_kernel("UE", regs);
+	}
+
+	/* XXX need more intelligent processing here, such as is implemented
+	 * XXX for cheetah errors, in fact if the E-cache still holds the
+	 * XXX line with bad parity this will loop
+	 */
+
+	spitfire_clean_and_reenable_l1_caches();
+	spitfire_enable_estate_errors();
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_OBJERR;
+	info.si_addr = (void *)0;
+	info.si_trapno = 0;
+	force_sig_info(SIGBUS, &info, current);
+}
+
+void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
+{
+	unsigned long afsr, tt, udbh, udbl;
+	int tl1;
+
+	afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT;
+	tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT;
+	tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0;
+	udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT;
+	udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT;
+
+#ifdef CONFIG_PCI
+	if (tt == TRAP_TYPE_DAE &&
+	    pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
+		spitfire_clean_and_reenable_l1_caches();
+		spitfire_enable_estate_errors();
+
+		pci_poke_faulted = 1;
+		regs->tnpc = regs->tpc + 4;
+		return;
+	}
+#endif
+
+	if (afsr & SFAFSR_UE)
+		spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs);
+
+	if (tt == TRAP_TYPE_CEE) {
+		/* Handle the case where we took a CEE trap, but ACK'd
+		 * only the UE state in the UDB error registers.
+		 */
+		if (afsr & SFAFSR_UE) {
+			if (udbh & UDBE_CE) {
+				__asm__ __volatile__(
+					"stxa	%0, [%1] %2\n\t"
+					"membar	#Sync"
+					: /* no outputs */
+					: "r" (udbh & UDBE_CE),
+					  "r" (0x0), "i" (ASI_UDB_ERROR_W));
+			}
+			if (udbl & UDBE_CE) {
+				__asm__ __volatile__(
+					"stxa	%0, [%1] %2\n\t"
+					"membar	#Sync"
+					: /* no outputs */
+					: "r" (udbl & UDBE_CE),
+					  "r" (0x18), "i" (ASI_UDB_ERROR_W));
+			}
+		}
+
+		spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs);
+	}
 }
 
 int cheetah_pcache_forced_on;
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
index 491bb3681f..8365bc1 100644
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -18,9 +18,10 @@
 tl0_resv000:	BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3)
 tl0_resv004:	BTRAP(0x4)  BTRAP(0x5) BTRAP(0x6) BTRAP(0x7)
 tl0_iax:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__do_instruction_access_exception)
+		TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception)
 tl0_resv009:	BTRAP(0x9)
-tl0_iae:	TRAP(do_iae)
+tl0_iae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
 tl0_resv00b:	BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf)
 tl0_ill:	membar #Sync
 		TRAP_7INSNS(do_illegal_instruction)
@@ -36,9 +37,10 @@
 tl0_div0:	TRAP(do_div0)
 tl0_resv029:	BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e)
 tl0_resv02f:	BTRAP(0x2f)
-tl0_dax:	TRAP_NOSAVE(__do_data_access_exception)
+tl0_dax:	TRAP_NOSAVE(__spitfire_data_access_exception)
 tl0_resv031:	BTRAP(0x31)
-tl0_dae:	TRAP(do_dae)
+tl0_dae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
 tl0_resv033:	BTRAP(0x33)
 tl0_mna:	TRAP_NOSAVE(do_mna)
 tl0_lddfmna:	TRAP_NOSAVE(do_lddfmna)
@@ -73,7 +75,8 @@
 tl0_ivec:	TRAP_IVEC
 tl0_paw:	TRAP(do_paw)
 tl0_vaw:	TRAP(do_vaw)
-tl0_cee:	TRAP_NOSAVE(cee_trap)
+tl0_cee:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
 tl0_iamiss:
 #include	"itlb_base.S"
 tl0_damiss:
@@ -175,9 +178,10 @@
 sparc64_ttable_tl1:
 tl1_resv000:	BOOT_KERNEL    BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3)
 tl1_resv004:	BTRAPTL1(0x4)  BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7)
-tl1_iax:	TRAP_NOSAVE(__do_instruction_access_exception_tl1)
+tl1_iax:	TRAP_NOSAVE(__spitfire_insn_access_exception_tl1)
 tl1_resv009:	BTRAPTL1(0x9)
-tl1_iae:	TRAPTL1(do_iae_tl1)
+tl1_iae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
 tl1_resv00b:	BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf)
 tl1_ill:	TRAPTL1(do_ill_tl1)
 tl1_privop:	BTRAPTL1(0x11)
@@ -193,9 +197,10 @@
 tl1_div0:	TRAPTL1(do_div0_tl1)
 tl1_resv029:	BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c)
 tl1_resv02d:	BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f)
-tl1_dax:	TRAP_NOSAVE(__do_data_access_exception_tl1)
+tl1_dax:	TRAP_NOSAVE(__spitfire_data_access_exception_tl1)
 tl1_resv031:	BTRAPTL1(0x31)
-tl1_dae:	TRAPTL1(do_dae_tl1)
+tl1_dae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
 tl1_resv033:	BTRAPTL1(0x33)
 tl1_mna:	TRAP_NOSAVE(do_mna)
 tl1_lddfmna:	TRAPTL1(do_lddfmna_tl1)
@@ -219,8 +224,8 @@
 tl1_vaw:	TRAPTL1(do_vaw_tl1)
 
 		/* The grotty trick to save %g1 into current->thread.cee_stuff
-		 * is because when we take this trap we could be interrupting trap
-		 * code already using the trap alternate global registers.
+		 * is because when we take this trap we could be interrupting
+		 * trap code already using the trap alternate global registers.
 		 *
 		 * We cross our fingers and pray that this store/load does
 		 * not cause yet another CEE trap.
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index 11c3e88..da9739f 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -349,9 +349,9 @@
 
 extern void do_fpother(struct pt_regs *regs);
 extern void do_privact(struct pt_regs *regs);
-extern void data_access_exception(struct pt_regs *regs,
-				  unsigned long sfsr,
-				  unsigned long sfar);
+extern void spitfire_data_access_exception(struct pt_regs *regs,
+					   unsigned long sfsr,
+					   unsigned long sfar);
 
 int handle_ldf_stq(u32 insn, struct pt_regs *regs)
 {
@@ -394,14 +394,14 @@
 				break;
 			}
 		default:
-			data_access_exception(regs, 0, addr);
+			spitfire_data_access_exception(regs, 0, addr);
 			return 1;
 		}
 		if (put_user (first >> 32, (u32 __user *)addr) ||
 		    __put_user ((u32)first, (u32 __user *)(addr + 4)) ||
 		    __put_user (second >> 32, (u32 __user *)(addr + 8)) ||
 		    __put_user ((u32)second, (u32 __user *)(addr + 12))) {
-		    	data_access_exception(regs, 0, addr);
+		    	spitfire_data_access_exception(regs, 0, addr);
 		    	return 1;
 		}
 	} else {
@@ -414,7 +414,7 @@
 			do_privact(regs);
 			return 1;
 		} else if (asi > ASI_SNFL) {
-			data_access_exception(regs, 0, addr);
+			spitfire_data_access_exception(regs, 0, addr);
 			return 1;
 		}
 		switch (insn & 0x180000) {
@@ -431,7 +431,7 @@
 				err |= __get_user (data[i], (u32 __user *)(addr + 4*i));
 		}
 		if (err && !(asi & 0x2 /* NF */)) {
-			data_access_exception(regs, 0, addr);
+			spitfire_data_access_exception(regs, 0, addr);
 			return 1;
 		}
 		if (asi & 0x8) /* Little */ {
@@ -534,7 +534,7 @@
 		*(u64 *)(f->regs + freg) = value;
 		current_thread_info()->fpsaved[0] |= flag;
 	} else {
-daex:		data_access_exception(regs, sfsr, sfar);
+daex:		spitfire_data_access_exception(regs, sfsr, sfar);
 		return;
 	}
 	advance(regs);
@@ -578,7 +578,7 @@
 		    __put_user ((u32)value, (u32 __user *)(sfar + 4)))
 			goto daex;
 	} else {
-daex:		data_access_exception(regs, sfsr, sfar);
+daex:		spitfire_data_access_exception(regs, sfsr, sfar);
 		return;
 	}
 	advance(regs);
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index dfbc7e0..99c809a 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -318,7 +318,7 @@
 	 nop
 	rdpr		%pstate, %l1			! Prepare to change globals.
 	mov		%g4, %o1			! Setup args for
-	mov		%g5, %o2			! final call to data_access_exception.
+	mov		%g5, %o2			! final call to spitfire_data_access_exception.
 	andn		%l1, PSTATE_MM, %l1		! We want to be in RMO
 
 	mov		%g6, %o7			! Stash away current.
@@ -330,7 +330,7 @@
 	mov		TSB_REG, %g1
 	ldxa		[%g1] ASI_IMMU, %g5
 #endif
-	call		data_access_exception
+	call		spitfire_data_access_exception
 	 add		%sp, PTREGS_OFF, %o0
 
 	b,pt		%xcc, rtrap
@@ -391,7 +391,7 @@
 109:	 or		%g7, %lo(109b), %g7
 	mov		%l4, %o1
 	mov		%l5, %o2
-	call		data_access_exception
+	call		spitfire_data_access_exception
 	 add		%sp, PTREGS_OFF, %o0
 	ba,pt		%xcc, rtrap
 	 clr		%l6
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 40dbeec..6201f10 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -12,7 +12,7 @@
 	 U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
 	 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
 	 copy_in_user.o user_fixup.o memmove.o \
-	 mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
+	 mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o mb.o
 
 lib-$(CONFIG_DEBUG_SPINLOCK) += debuglocks.o
 lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c
index f03344c..f5f0b558 100644
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -12,8 +12,6 @@
 
 #ifdef CONFIG_SMP
 
-#define GET_CALLER(PC) __asm__ __volatile__("mov %%i7, %0" : "=r" (PC))
-
 static inline void show (char *str, spinlock_t *lock, unsigned long caller)
 {
 	int cpu = smp_processor_id();
@@ -51,20 +49,19 @@
 #undef INIT_STUCK
 #define INIT_STUCK 100000000
 
-void _do_spin_lock(spinlock_t *lock, char *str)
+void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller)
 {
-	unsigned long caller, val;
+	unsigned long val;
 	int stuck = INIT_STUCK;
 	int cpu = get_cpu();
 	int shown = 0;
 
-	GET_CALLER(caller);
 again:
 	__asm__ __volatile__("ldstub [%1], %0"
 			     : "=r" (val)
 			     : "r" (&(lock->lock))
 			     : "memory");
-	membar("#StoreLoad | #StoreStore");
+	membar_storeload_storestore();
 	if (val) {
 		while (lock->lock) {
 			if (!--stuck) {
@@ -72,7 +69,7 @@
 					show(str, lock, caller);
 				stuck = INIT_STUCK;
 			}
-			membar("#LoadLoad");
+			rmb();
 		}
 		goto again;
 	}
@@ -84,17 +81,16 @@
 	put_cpu();
 }
 
-int _do_spin_trylock(spinlock_t *lock)
+int _do_spin_trylock(spinlock_t *lock, unsigned long caller)
 {
-	unsigned long val, caller;
+	unsigned long val;
 	int cpu = get_cpu();
 
-	GET_CALLER(caller);
 	__asm__ __volatile__("ldstub [%1], %0"
 			     : "=r" (val)
 			     : "r" (&(lock->lock))
 			     : "memory");
-	membar("#StoreLoad | #StoreStore");
+	membar_storeload_storestore();
 	if (!val) {
 		lock->owner_pc = ((unsigned int)caller);
 		lock->owner_cpu = cpu;
@@ -111,21 +107,20 @@
 {
 	lock->owner_pc = 0;
 	lock->owner_cpu = NO_PROC_ID;
-	membar("#StoreStore | #LoadStore");
+	membar_storestore_loadstore();
 	lock->lock = 0;
 	current->thread.smp_lock_count--;
 }
 
 /* Keep INIT_STUCK the same... */
 
-void _do_read_lock (rwlock_t *rw, char *str)
+void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller)
 {
-	unsigned long caller, val;
+	unsigned long val;
 	int stuck = INIT_STUCK;
 	int cpu = get_cpu();
 	int shown = 0;
 
-	GET_CALLER(caller);
 wlock_again:
 	/* Wait for any writer to go away.  */
 	while (((long)(rw->lock)) < 0) {
@@ -134,7 +129,7 @@
 				show_read(str, rw, caller);
 			stuck = INIT_STUCK;
 		}
-		membar("#LoadLoad");
+		rmb();
 	}
 	/* Try once to increment the counter.  */
 	__asm__ __volatile__(
@@ -147,7 +142,7 @@
 "2:"	: "=r" (val)
 	: "0" (&(rw->lock))
 	: "g1", "g7", "memory");
-	membar("#StoreLoad | #StoreStore");
+	membar_storeload_storestore();
 	if (val)
 		goto wlock_again;
 	rw->reader_pc[cpu] = ((unsigned int)caller);
@@ -157,15 +152,13 @@
 	put_cpu();
 }
 
-void _do_read_unlock (rwlock_t *rw, char *str)
+void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller)
 {
-	unsigned long caller, val;
+	unsigned long val;
 	int stuck = INIT_STUCK;
 	int cpu = get_cpu();
 	int shown = 0;
 
-	GET_CALLER(caller);
-
 	/* Drop our identity _first_. */
 	rw->reader_pc[cpu] = 0;
 	current->thread.smp_lock_count--;
@@ -193,14 +186,13 @@
 	put_cpu();
 }
 
-void _do_write_lock (rwlock_t *rw, char *str)
+void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller)
 {
-	unsigned long caller, val;
+	unsigned long val;
 	int stuck = INIT_STUCK;
 	int cpu = get_cpu();
 	int shown = 0;
 
-	GET_CALLER(caller);
 wlock_again:
 	/* Spin while there is another writer. */
 	while (((long)rw->lock) < 0) {
@@ -209,7 +201,7 @@
 				show_write(str, rw, caller);
 			stuck = INIT_STUCK;
 		}
-		membar("#LoadLoad");
+		rmb();
 	}
 
 	/* Try to acuire the write bit.  */
@@ -264,7 +256,7 @@
 					show_write(str, rw, caller);
 				stuck = INIT_STUCK;
 			}
-			membar("#LoadLoad");
+			rmb();
 		}
 		goto wlock_again;
 	}
@@ -278,14 +270,12 @@
 	put_cpu();
 }
 
-void _do_write_unlock(rwlock_t *rw)
+void _do_write_unlock(rwlock_t *rw, unsigned long caller)
 {
-	unsigned long caller, val;
+	unsigned long val;
 	int stuck = INIT_STUCK;
 	int shown = 0;
 
-	GET_CALLER(caller);
-
 	/* Drop our identity _first_ */
 	rw->writer_pc = 0;
 	rw->writer_cpu = NO_PROC_ID;
@@ -313,13 +303,11 @@
 	}
 }
 
-int _do_write_trylock (rwlock_t *rw, char *str)
+int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller)
 {
-	unsigned long caller, val;
+	unsigned long val;
 	int cpu = get_cpu();
 
-	GET_CALLER(caller);
-
 	/* Try to acuire the write bit.  */
 	__asm__ __volatile__(
 "	mov	1, %%g3\n"
diff --git a/arch/sparc64/lib/mb.S b/arch/sparc64/lib/mb.S
new file mode 100644
index 0000000..4004f74
--- /dev/null
+++ b/arch/sparc64/lib/mb.S
@@ -0,0 +1,73 @@
+/* mb.S: Out of line memory barriers.
+ *
+ * Copyright (C) 2005 David S. Miller ([email protected])
+ */
+
+	/* These are here in an effort to more fully work around
+	 * Spitfire Errata #51.  Essentially, if a memory barrier
+	 * occurs soon after a mispredicted branch, the chip can stop
+	 * executing instructions until a trap occurs.  Therefore, if
+	 * interrupts are disabled, the chip can hang forever.
+	 *
+	 * It used to be believed that the memory barrier had to be
+	 * right in the delay slot, but a case has been traced
+	 * recently wherein the memory barrier was one instruction
+	 * after the branch delay slot and the chip still hung.  The
+	 * offending sequence was the following in sym_wakeup_done()
+	 * of the sym53c8xx_2 driver:
+	 *
+	 *	call	sym_ccb_from_dsa, 0
+	 *	 movge	%icc, 0, %l0
+	 *	brz,pn	%o0, .LL1303
+	 *	 mov	%o0, %l2
+	 *	membar	#LoadLoad
+	 *
+	 * The branch has to be mispredicted for the bug to occur.
+	 * Therefore, we put the memory barrier explicitly into a
+	 * "branch always, predicted taken" delay slot to avoid the
+	 * problem case.
+	 */
+
+	.text
+
+99:	retl
+	 nop
+
+	.globl	mb
+mb:	ba,pt	%xcc, 99b
+	 membar	#LoadLoad | #LoadStore | #StoreStore | #StoreLoad
+	.size	mb, .-mb
+
+	.globl	rmb
+rmb:	ba,pt	%xcc, 99b
+	 membar	#LoadLoad
+	.size	rmb, .-rmb
+
+	.globl	wmb
+wmb:	ba,pt	%xcc, 99b
+	 membar	#StoreStore
+	.size	wmb, .-wmb
+
+	.globl	membar_storeload
+membar_storeload:
+	ba,pt	%xcc, 99b
+	 membar	#StoreLoad
+	.size	membar_storeload, .-membar_storeload
+
+	.globl	membar_storeload_storestore
+membar_storeload_storestore:
+	ba,pt	%xcc, 99b
+	 membar	#StoreLoad | #StoreStore
+	.size	membar_storeload_storestore, .-membar_storeload_storestore
+
+	.globl	membar_storeload_loadload
+membar_storeload_loadload:
+	ba,pt	%xcc, 99b
+	 membar	#StoreLoad | #LoadLoad
+	.size	membar_storeload_loadload, .-membar_storeload_loadload
+
+	.globl	membar_storestore_loadstore
+membar_storestore_loadstore:
+	ba,pt	%xcc, 99b
+	 membar	#StoreStore | #LoadStore
+	.size	membar_storestore_loadstore, .-membar_storestore_loadstore
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 15b4cfe..302efbc 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -737,7 +737,8 @@
 extern u32 tl0_solaris[8];
 #define update_ttable(x) 										\
 	tl0_solaris[3] = (((long)(x) - (long)tl0_solaris - 3) >> 2) | 0x40000000;			\
-	__asm__ __volatile__ ("membar #StoreStore; flush %0" : : "r" (&tl0_solaris[3]))
+	wmb();		\
+	__asm__ __volatile__ ("flush %0" : : "r" (&tl0_solaris[3]))
 #else
 #endif	
 
@@ -761,7 +762,8 @@
 	entry64_personality_patch |=
 		(offsetof(struct task_struct, personality) +
 		 (sizeof(unsigned long) - 1));
-	__asm__ __volatile__("membar #StoreStore; flush %0"
+	wmb();
+	__asm__ __volatile__("flush %0"
 			     : : "r" (&entry64_personality_patch));
 	return 0;
 }
diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h
index 32c9699..5a7a1a8 100644
--- a/include/asm-sparc/processor.h
+++ b/include/asm-sparc/processor.h
@@ -19,7 +19,6 @@
 #include <asm/ptrace.h>
 #include <asm/head.h>
 #include <asm/signal.h>
-#include <asm/segment.h>
 #include <asm/btfixup.h>
 #include <asm/page.h>
 
diff --git a/include/asm-sparc/segment.h b/include/asm-sparc/segment.h
deleted file mode 100644
index a1b7ffc..0000000
--- a/include/asm-sparc/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SPARC_SEGMENT_H
-#define __SPARC_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 898562e..3557781 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -9,7 +9,6 @@
 #include <linux/threads.h>	/* NR_CPUS */
 #include <linux/thread_info.h>
 
-#include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/psr.h>
 #include <asm/ptrace.h>
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index d80f3379..e175afc 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,10 +72,10 @@
 
 /* Atomic operations are already serializing */
 #ifdef CONFIG_SMP
-#define smp_mb__before_atomic_dec()	membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_dec()	membar("#StoreLoad | #StoreStore")
-#define smp_mb__before_atomic_inc()	membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_inc()	membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_atomic_dec()	membar_storeload_loadload();
+#define smp_mb__after_atomic_dec()	membar_storeload_storestore();
+#define smp_mb__before_atomic_inc()	membar_storeload_loadload();
+#define smp_mb__after_atomic_inc()	membar_storeload_storestore();
 #else
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 9c5e7197..6388b83 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -72,8 +72,8 @@
 }
 
 #ifdef CONFIG_SMP
-#define smp_mb__before_clear_bit()	membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_clear_bit()	membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_clear_bit()	membar_storeload_loadload()
+#define smp_mb__after_clear_bit()	membar_storeload_storestore()
 #else
 #define smp_mb__before_clear_bit()	barrier()
 #define smp_mb__after_clear_bit()	barrier()
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index d0bee24..3169f3e 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -18,7 +18,6 @@
 #include <asm/a.out.h>
 #include <asm/pstate.h>
 #include <asm/ptrace.h>
-#include <asm/segment.h>
 #include <asm/page.h>
 
 /* The sparc has no problems with write protection */
diff --git a/include/asm-sparc64/segment.h b/include/asm-sparc64/segment.h
deleted file mode 100644
index b03e709..0000000
--- a/include/asm-sparc64/segment.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SPARC64_SEGMENT_H
-#define __SPARC64_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
diff --git a/include/asm-sparc64/sfafsr.h b/include/asm-sparc64/sfafsr.h
new file mode 100644
index 0000000..2f792c2
--- /dev/null
+++ b/include/asm-sparc64/sfafsr.h
@@ -0,0 +1,82 @@
+#ifndef _SPARC64_SFAFSR_H
+#define _SPARC64_SFAFSR_H
+
+#include <asm/const.h>
+
+/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */
+
+#define SFAFSR_ME		(_AC(1,UL) << SFAFSR_ME_SHIFT)
+#define SFAFSR_ME_SHIFT		32
+#define SFAFSR_PRIV		(_AC(1,UL) << SFAFSR_PRIV_SHIFT)
+#define SFAFSR_PRIV_SHIFT	31
+#define SFAFSR_ISAP		(_AC(1,UL) << SFAFSR_ISAP_SHIFT)
+#define SFAFSR_ISAP_SHIFT	30
+#define SFAFSR_ETP		(_AC(1,UL) << SFAFSR_ETP_SHIFT)
+#define SFAFSR_ETP_SHIFT	29
+#define SFAFSR_IVUE		(_AC(1,UL) << SFAFSR_IVUE_SHIFT)
+#define SFAFSR_IVUE_SHIFT	28
+#define SFAFSR_TO		(_AC(1,UL) << SFAFSR_TO_SHIFT)
+#define SFAFSR_TO_SHIFT		27
+#define SFAFSR_BERR		(_AC(1,UL) << SFAFSR_BERR_SHIFT)
+#define SFAFSR_BERR_SHIFT	26
+#define SFAFSR_LDP		(_AC(1,UL) << SFAFSR_LDP_SHIFT)
+#define SFAFSR_LDP_SHIFT	25
+#define SFAFSR_CP		(_AC(1,UL) << SFAFSR_CP_SHIFT)
+#define SFAFSR_CP_SHIFT		24
+#define SFAFSR_WP		(_AC(1,UL) << SFAFSR_WP_SHIFT)
+#define SFAFSR_WP_SHIFT		23
+#define SFAFSR_EDP		(_AC(1,UL) << SFAFSR_EDP_SHIFT)
+#define SFAFSR_EDP_SHIFT	22
+#define SFAFSR_UE		(_AC(1,UL) << SFAFSR_UE_SHIFT)
+#define SFAFSR_UE_SHIFT		21
+#define SFAFSR_CE		(_AC(1,UL) << SFAFSR_CE_SHIFT)
+#define SFAFSR_CE_SHIFT		20
+#define SFAFSR_ETS		(_AC(0xf,UL) << SFAFSR_ETS_SHIFT)
+#define SFAFSR_ETS_SHIFT	16
+#define SFAFSR_PSYND		(_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT)
+#define SFAFSR_PSYND_SHIFT	0
+
+/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read
+ *                     ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write
+ */
+
+#define UDBE_UE			(_AC(1,UL) << 9)
+#define UDBE_CE			(_AC(1,UL) << 8)
+#define UDBE_E_SYNDR		(_AC(0xff,UL) << 0)
+
+/* The trap handlers for asynchronous errors encode the AFSR and
+ * other pieces of information into a 64-bit argument for C code
+ * encoded as follows:
+ *
+ * -----------------------------------------------
+ * |  UDB_H  |  UDB_L  | TL>1  |  TT  |   AFSR   |
+ * -----------------------------------------------
+ *  63     54 53     44    42   41  33 32       0
+ *
+ * The AFAR is passed in unchanged.
+ */
+#define SFSTAT_UDBH_MASK	(_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
+#define SFSTAT_UDBH_SHIFT	54
+#define SFSTAT_UDBL_MASK	(_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
+#define SFSTAT_UDBL_SHIFT	44
+#define SFSTAT_TL_GT_ONE	(_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT)
+#define SFSTAT_TL_GT_ONE_SHIFT	42
+#define SFSTAT_TRAP_TYPE	(_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT)
+#define SFSTAT_TRAP_TYPE_SHIFT	33
+#define SFSTAT_AFSR_MASK	(_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT)
+#define SFSTAT_AFSR_SHIFT	0
+
+/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */
+#define ESTATE_ERR_CE		0x1 /* Correctable errors                    */
+#define ESTATE_ERR_NCE		0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */
+#define ESTATE_ERR_ISAP		0x4 /* System address parity error           */
+#define ESTATE_ERR_ALL		(ESTATE_ERR_CE | \
+				 ESTATE_ERR_NCE | \
+				 ESTATE_ERR_ISAP)
+
+/* The various trap types that report using the above state. */
+#define TRAP_TYPE_IAE		0x09 /* Instruction Access Error             */
+#define TRAP_TYPE_DAE		0x32 /* Data Access Error                    */
+#define TRAP_TYPE_CEE		0x63 /* Correctable ECC Error                */
+
+#endif /* _SPARC64_SFAFSR_H */
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h
index 9cb93a5..a02c437 100644
--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -43,7 +43,7 @@
 #define spin_is_locked(lp)  ((lp)->lock != 0)
 
 #define spin_unlock_wait(lp)	\
-do {	membar("#LoadLoad");	\
+do {	rmb();			\
 } while((lp)->lock)
 
 static inline void _raw_spin_lock(spinlock_t *lock)
@@ -129,15 +129,18 @@
 #define spin_is_locked(__lock)	((__lock)->lock != 0)
 #define spin_unlock_wait(__lock)	\
 do { \
-	membar("#LoadLoad"); \
+	rmb(); \
 } while((__lock)->lock)
 
-extern void _do_spin_lock (spinlock_t *lock, char *str);
-extern void _do_spin_unlock (spinlock_t *lock);
-extern int _do_spin_trylock (spinlock_t *lock);
+extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller);
+extern void _do_spin_unlock(spinlock_t *lock);
+extern int _do_spin_trylock(spinlock_t *lock, unsigned long caller);
 
-#define _raw_spin_trylock(lp)	_do_spin_trylock(lp)
-#define _raw_spin_lock(lock)	_do_spin_lock(lock, "spin_lock")
+#define _raw_spin_trylock(lp)	\
+	_do_spin_trylock(lp, (unsigned long) __builtin_return_address(0))
+#define _raw_spin_lock(lock)	\
+	_do_spin_lock(lock, "spin_lock", \
+		      (unsigned long) __builtin_return_address(0))
 #define _raw_spin_unlock(lock)	_do_spin_unlock(lock)
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
 
@@ -279,37 +282,41 @@
 #define RW_LOCK_UNLOCKED	(rwlock_t) { 0, 0, 0xff, { } }
 #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0)
 
-extern void _do_read_lock(rwlock_t *rw, char *str);
-extern void _do_read_unlock(rwlock_t *rw, char *str);
-extern void _do_write_lock(rwlock_t *rw, char *str);
-extern void _do_write_unlock(rwlock_t *rw);
-extern int _do_write_trylock(rwlock_t *rw, char *str);
+extern void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller);
+extern void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller);
+extern void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller);
+extern void _do_write_unlock(rwlock_t *rw, unsigned long caller);
+extern int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller);
 
 #define _raw_read_lock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	_do_read_lock(lock, "read_lock"); \
+	_do_read_lock(lock, "read_lock", \
+		      (unsigned long) __builtin_return_address(0)); \
 	local_irq_restore(flags); \
 } while(0)
 
 #define _raw_read_unlock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	_do_read_unlock(lock, "read_unlock"); \
+	_do_read_unlock(lock, "read_unlock", \
+		      (unsigned long) __builtin_return_address(0)); \
 	local_irq_restore(flags); \
 } while(0)
 
 #define _raw_write_lock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	_do_write_lock(lock, "write_lock"); \
+	_do_write_lock(lock, "write_lock", \
+		      (unsigned long) __builtin_return_address(0)); \
 	local_irq_restore(flags); \
 } while(0)
 
 #define _raw_write_unlock(lock) \
 do {	unsigned long flags; \
 	local_irq_save(flags); \
-	_do_write_unlock(lock); \
+	_do_write_unlock(lock, \
+		      (unsigned long) __builtin_return_address(0)); \
 	local_irq_restore(flags); \
 } while(0)
 
@@ -317,7 +324,8 @@
 ({	unsigned long flags; \
 	int val; \
 	local_irq_save(flags); \
-	val = _do_write_trylock(lock, "write_trylock"); \
+	val = _do_write_trylock(lock, "write_trylock", \
+				(unsigned long) __builtin_return_address(0)); \
 	local_irq_restore(flags); \
 	val; \
 })
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index ee4bdfc..5e94c05 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -28,6 +28,14 @@
 #define ARCH_SUN4C_SUN4 0
 #define ARCH_SUN4 0
 
+extern void mb(void);
+extern void rmb(void);
+extern void wmb(void);
+extern void membar_storeload(void);
+extern void membar_storeload_storestore(void);
+extern void membar_storeload_loadload(void);
+extern void membar_storestore_loadstore(void);
+
 #endif
 
 #define setipl(__new_ipl) \
@@ -78,16 +86,11 @@
 
 #define nop() 		__asm__ __volatile__ ("nop")
 
-#define membar(type)	__asm__ __volatile__ ("membar " type : : : "memory")
-#define mb()		\
-	membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define rmb()		membar("#LoadLoad")
-#define wmb()		membar("#StoreStore")
 #define read_barrier_depends()		do { } while(0)
 #define set_mb(__var, __value) \
-	do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0)
+	do { __var = __value; membar_storeload_storestore(); } while(0)
 #define set_wmb(__var, __value) \
-	do { __var = __value; membar("#StoreStore"); } while(0)
+	do { __var = __value; wmb(); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()