[Pkg-lustre-svn-commit] r435 - in /trunk/lustre/kernel_patches: patches/ series/

goswin-guest at users.alioth.debian.org goswin-guest at users.alioth.debian.org
Wed Feb 13 12:23:22 UTC 2008


Author: goswin-guest
Date: Wed Feb 13 12:23:22 2008
New Revision: 435

URL: http://svn.debian.org/wsvn/pkg-lustre/?sc=1&rev=435
Log:
Bring kernel patches in sync.


Added:
    trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch
    trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch
    trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch
Modified:
    trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch
    trunk/lustre/kernel_patches/series/2.6.20-vanilla.series
    trunk/lustre/kernel_patches/series/2.6.22-vanilla.series

Added: trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch?rev=435&op=file
==============================================================================
--- trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch (added)
+++ trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch Wed Feb 13 12:23:22 2008
@@ -1,0 +1,2143 @@
+x86: Readd dwarf2 unwinder
+
+From: Jan Beulich <jbeulich at novell.com>
+
+The dwarf2 unwinder uses information generated by the compiler
+to do exact backtraces without frame pointers.  Enabled for i386 and x86-64. 
+
+AK: Readded by me, but all the real work was done by Jan
+AK: I just did some cleanup
+
+TBD: add paranoid checks Linus wanted
+TBD: better description
+
+Signed-off-by: Andi Kleen <ak at suse.de>
+
+---
+ Makefile                          |    5 
+ arch/i386/kernel/Makefile         |    1 
+ arch/i386/kernel/traps.c          |   84 ++
+ arch/i386/kernel/unwind.S         |   36 +
+ arch/x86_64/Makefile              |    2 
+ arch/x86_64/kernel/Makefile       |    1 
+ arch/x86_64/kernel/traps.c        |   85 ++
+ arch/x86_64/kernel/unwind.S       |   38 +
+ arch/x86_64/kernel/vmlinux.lds.S  |    2 
+ include/asm-generic/vmlinux.lds.h |   22 
+ include/asm-i386/unwind.h         |   91 ++
+ include/asm-x86_64/unwind.h       |   96 ++
+ include/linux/unwind.h            |   63 +
+ kernel/Makefile                   |    1 
+ kernel/unwind.c                   | 1288 ++++++++++++++++++++++++++++++++++++++
+ lib/Kconfig.debug                 |   21 
+ 16 files changed, 1834 insertions(+), 2 deletions(-)
+
+Index: linux-2.6.20.3/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/Makefile	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/Makefile	2007-08-29 15:10:06.000000000 +0200
+@@ -496,6 +496,11 @@
+ CFLAGS		+= -fomit-frame-pointer
+ endif
+ 
++ifdef CONFIG_UNWIND_INFO
++CFLAGS		+= -fasynchronous-unwind-tables
++LDFLAGS_vmlinux	+= --eh-frame-hdr
++endif
++
+ ifdef CONFIG_DEBUG_INFO
+ CFLAGS		+= -g
+ endif
+Index: linux-2.6.20.3/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.6.20.3.orig/arch/i386/kernel/traps.c	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/i386/kernel/traps.c	2007-08-29 15:14:28.000000000 +0200
+@@ -94,6 +94,12 @@
+ asmlinkage void machine_check(void);
+ 
+ int kstack_depth_to_print = 24;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
++
+ ATOMIC_NOTIFIER_HEAD(i386die_chain);
+ 
+ int register_die_notifier(struct notifier_block *nb)
+@@ -112,7 +118,7 @@
+ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+ 	return	p > (void *)tinfo &&
+-		p < (void *)tinfo + THREAD_SIZE - 3;
++		p < (void *)tinfo THREAD_SIZE - 3;
+ }
+ 
+ static inline unsigned long print_context_stack(struct thread_info *tinfo,
+@@ -124,7 +130,7 @@
+ #ifdef	CONFIG_FRAME_POINTER
+ 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
+ 		unsigned long new_ebp;
+-		addr = *(unsigned long *)(ebp + 4);
++		addr = *(unsigned long *)(ebp 4);
+ 		ops->address(data, addr);
+ 		/*
+ 		 * break out of recursive entries (such as
+@@ -147,6 +153,34 @@
+ 	return ebp;
+ }
+ 
++struct ops_and_data {
++	struct stacktrace_ops *ops;
++	void *data;
++};
++
++static asmlinkage int
++dump_trace_unwind(struct unwind_frame_info *info, void *data)
++{
++	struct ops_and_data *oad = (struct ops_and_data *)data;
++	int n = 0;
++	unsigned long sp = UNW_SP(info);
++
++	if (arch_unw_user_mode(info))
++		return -1;
++	while (unwind(info) == 0 && UNW_PC(info)) {
++		n++;
++		oad->ops->address(oad->data, UNW_PC(info));
++		if (arch_unw_user_mode(info))
++			break;
++		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++		    && sp > UNW_SP(info))
++			break;
++		sp = UNW_SP(info);
++		touch_nmi_watchdog();
++	}
++	return n;
++}
++
+ #define MSG(msg) ops->warning(data, msg)
+ 
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+@@ -158,6 +192,41 @@
+ 	if (!task)
+ 		task = current;
+ 
++	if (call_trace >= 0) {
++		int unw_ret = 0;
++		struct unwind_frame_info info;
++		struct ops_and_data oad = { .ops = ops, .data = data };
++
++		if (regs) {
++			if (unwind_init_frame_info(&info, task, regs) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		} else if (task == current)
++			unw_ret = unwind_init_running(&info, dump_trace_unwind,
++						      &oad);
++		else {
++			if (unwind_init_blocked(&info, task) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		}
++		if (unw_ret > 0) {
++			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++				ops->warning_symbol(data,
++					     "DWARF2 unwinder stuck at %s",
++					     UNW_PC(&info));
++				if (UNW_SP(&info) >= PAGE_OFFSET) {
++					MSG("Leftover inexact backtrace:");
++					stack = (void *)UNW_SP(&info);
++					if (!stack)
++						return;
++					ebp = UNW_FP(&info);
++				} else
++					MSG("Full inexact backtrace again:");
++			} else if (call_trace >= 1)
++				return;
++			else
++				MSG("Full inexact backtrace again:");
++		} else
++			MSG("Inexact backtrace:");
++	}
+ 	if (!stack) {
+ 		unsigned long dummy;
+ 		stack = &dummy;
+@@ -983,7 +1052,7 @@
+ 					  long error_code)
+ {
+ 	if (cpu_has_xmm) {
+-		/* Handle SIMD FPU exceptions on PIII+ processors. */
++		/* Handle SIMD FPU exceptions on PIIIprocessors. */
+ 		ignore_fpu_irq = 1;
+ 		simd_math_error((void __user *)regs->eip);
+ 	} else {
+@@ -1191,3 +1260,21 @@
+ 	return 1;
+ }
+ __setup("kstack=", kstack_setup);
++
+++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++	if (strcmp(s, "old") == 0)
++		call_trace = -1;
++	else if (strcmp(s, "both") == 0)
++		call_trace = 0;
++	else if (strcmp(s, "newfallback") == 0)
++		call_trace = 1;
++	else if (strcmp(s, "new") == 2)
++		call_trace = 2;
++	return 1;
++}
++__setup("call_trace=", call_trace_setup);
++#endif
++
+Index: linux-2.6.20.3/arch/x86_64/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/Makefile	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/Makefile	2007-08-29 15:10:06.000000000 +0200
+@@ -45,7 +45,9 @@
+ # actually it makes the kernel smaller too.
+ cflags-y += -fno-reorder-blocks
+ cflags-y += -Wno-sign-compare
++ifneq ($(CONFIG_UNWIND_INFO),y)
+ cflags-y += -fno-asynchronous-unwind-tables
++endif
+ ifneq ($(CONFIG_DEBUG_INFO),y)
+ # -fweb shrinks the kernel a bit, but the difference is very small
+ # it also messes up debugging, so don't use it for now.
+Index: linux-2.6.20.3/arch/x86_64/kernel/vmlinux.lds.S
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/kernel/vmlinux.lds.S	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/kernel/vmlinux.lds.S	2007-08-29 15:10:06.000000000 +0200
+@@ -221,7 +221,9 @@
+   /* Sections to be discarded */
+   /DISCARD/ : {
+ 	*(.exitcall.exit)
++#ifndef CONFIG_UNWIND_INFO
+ 	*(.eh_frame)
++#endif
+ 	}
+ 
+   STABS_DEBUG
+Index: linux-2.6.20.3/include/asm-generic/vmlinux.lds.h
+===================================================================
+--- linux-2.6.20.3.orig/include/asm-generic/vmlinux.lds.h	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/asm-generic/vmlinux.lds.h	2007-08-29 15:10:06.000000000 +0200
+@@ -122,6 +122,8 @@
+ 		*(__ksymtab_strings)					\
+ 	}								\
+ 									\
++	EH_FRAME							\
++									\
+ 	/* Built-in module parameters. */				\
+ 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
+ 		VMLINUX_SYMBOL(__start___param) = .;			\
+@@ -161,6 +163,26 @@
+ 		*(.kprobes.text)					\
+ 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
+ 
++#ifdef CONFIG_STACK_UNWIND
++#define EH_FRAME							\
++		/* Unwind data binary search table */			\
++		. = ALIGN(8);						\
++        	.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) {	\
++			VMLINUX_SYMBOL(__start_unwind_hdr) = .;		\
++			*(.eh_frame_hdr)				\
++			VMLINUX_SYMBOL(__end_unwind_hdr) = .;		\
++		}							\
++		/* Unwind data */					\
++		. = ALIGN(8);						\
++		.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {		\
++			VMLINUX_SYMBOL(__start_unwind) = .;		\
++		  	*(.eh_frame)					\
++			VMLINUX_SYMBOL(__end_unwind) = .;		\
++		}
++#else
++#define EH_FRAME
++#endif
++
+ 		/* DWARF debug sections.
+ 		Symbols in the DWARF debugging sections are relative to
+ 		the beginning of the section so we begin them at 0.  */
+Index: linux-2.6.20.3/include/asm-i386/unwind.h
+===================================================================
+--- linux-2.6.20.3.orig/include/asm-i386/unwind.h	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/asm-i386/unwind.h	2007-08-29 15:10:06.000000000 +0200
+@@ -1,6 +1,95 @@
+ #ifndef _ASM_I386_UNWIND_H
+ #define _ASM_I386_UNWIND_H
+ 
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ *	Jan Beulich <jbeulich at novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/fixmap.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++
++struct unwind_frame_info
++{
++	struct pt_regs regs;
++	struct task_struct *task;
++	unsigned call_frame:1;
++};
++
++#define UNW_PC(frame)        (frame)->regs.eip
++#define UNW_SP(frame)        (frame)->regs.esp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame)        (frame)->regs.ebp
++#define FRAME_RETADDR_OFFSET 4
++#define FRAME_LINK_OFFSET    0
++#define STACK_BOTTOM(tsk)    STACK_LIMIT((tsk)->thread.esp0)
++#define STACK_TOP(tsk)       ((tsk)->thread.esp0)
++#else
++#define UNW_FP(frame) ((void)(frame), 0)
++#endif
++#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++	PTREGS_INFO(eax), \
++	PTREGS_INFO(ecx), \
++	PTREGS_INFO(edx), \
++	PTREGS_INFO(ebx), \
++	PTREGS_INFO(esp), \
++	PTREGS_INFO(ebp), \
++	PTREGS_INFO(esi), \
++	PTREGS_INFO(edi), \
++	PTREGS_INFO(eip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++	((raItem).where == Memory && \
++	 !((raItem).value * (dataAlign) + 4))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++                                            /*const*/ struct pt_regs *regs)
++{
++	if (user_mode_vm(regs))
++		info->regs = *regs;
++	else {
++		memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
++		info->regs.esp = (unsigned long)&regs->esp;
++		info->regs.xss = __KERNEL_DS;
++	}
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++	memset(&info->regs, 0, sizeof(info->regs));
++	info->regs.eip = info->task->thread.eip;
++	info->regs.xcs = __KERNEL_CS;
++	__get_user(info->regs.ebp, (long *)info->task->thread.esp);
++	info->regs.esp = info->task->thread.esp;
++	info->regs.xss = __KERNEL_DS;
++	info->regs.xds = __USER_DS;
++	info->regs.xes = __USER_DS;
++	info->regs.xfs = __KERNEL_PERCPU;
++}
++
++extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
++                                               asmlinkage int (*callback)(struct unwind_frame_info *,
++                                                                          void *arg),
++                                               void *arg);
++
++static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
++{
++	return user_mode_vm(&info->regs)
++	       || info->regs.eip < PAGE_OFFSET
++	       || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
++	           && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
++	       || info->regs.esp < PAGE_OFFSET;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0)
+ #define UNW_SP(frame) ((void)(frame), 0)
+ #define UNW_FP(frame) ((void)(frame), 0)
+@@ -10,4 +99,6 @@
+ 	return 0;
+ }
+ 
++#endif
++
+ #endif /* _ASM_I386_UNWIND_H */
+Index: linux-2.6.20.3/include/asm-x86_64/unwind.h
+===================================================================
+--- linux-2.6.20.3.orig/include/asm-x86_64/unwind.h	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/asm-x86_64/unwind.h	2007-08-29 15:10:06.000000000 +0200
+@@ -1,6 +1,100 @@
+ #ifndef _ASM_X86_64_UNWIND_H
+ #define _ASM_X86_64_UNWIND_H
+ 
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ *	Jan Beulich <jbeulich at novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++#include <asm/vsyscall.h>
++
++struct unwind_frame_info
++{
++	struct pt_regs regs;
++	struct task_struct *task;
++	unsigned call_frame:1;
++};
++
++#define UNW_PC(frame)        (frame)->regs.rip
++#define UNW_SP(frame)        (frame)->regs.rsp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame)        (frame)->regs.rbp
++#define FRAME_RETADDR_OFFSET 8
++#define FRAME_LINK_OFFSET    0
++#define STACK_BOTTOM(tsk)    (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
++#define STACK_TOP(tsk)       ((tsk)->thread.rsp0)
++#endif
++/* Might need to account for the special exception and interrupt handling
++   stacks here, since normally
++	EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
++   but the construct is needed only for getting across the stack switch to
++   the interrupt stack - thus considering the IRQ stack itself is unnecessary,
++   and the overhead of comparing against all exception handling stacks seems
++   not desirable. */
++#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++	PTREGS_INFO(rax), \
++	PTREGS_INFO(rdx), \
++	PTREGS_INFO(rcx), \
++	PTREGS_INFO(rbx), \
++	PTREGS_INFO(rsi), \
++	PTREGS_INFO(rdi), \
++	PTREGS_INFO(rbp), \
++	PTREGS_INFO(rsp), \
++	PTREGS_INFO(r8), \
++	PTREGS_INFO(r9), \
++	PTREGS_INFO(r10), \
++	PTREGS_INFO(r11), \
++	PTREGS_INFO(r12), \
++	PTREGS_INFO(r13), \
++	PTREGS_INFO(r14), \
++	PTREGS_INFO(r15), \
++	PTREGS_INFO(rip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++	((raItem).where == Memory && \
++	 !((raItem).value * (dataAlign) + 8))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++                                            /*const*/ struct pt_regs *regs)
++{
++	info->regs = *regs;
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++	extern const char thread_return[];
++
++	memset(&info->regs, 0, sizeof(info->regs));
++	info->regs.rip = (unsigned long)thread_return;
++	info->regs.cs = __KERNEL_CS;
++	__get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
++	info->regs.rsp = info->task->thread.rsp;
++	info->regs.ss = __KERNEL_DS;
++}
++
++extern int arch_unwind_init_running(struct unwind_frame_info *,
++                                    int (*callback)(struct unwind_frame_info *,
++                                                    void *arg),
++                                    void *arg);
++
++static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
++{
++	return user_mode(&info->regs)
++	       || (long)info->regs.rip >= 0
++	       || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
++	       || (long)info->regs.rsp >= 0;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0UL)
+ #define UNW_SP(frame) ((void)(frame), 0UL)
+ 
+@@ -9,4 +103,6 @@
+ 	return 0;
+ }
+ 
++#endif
++
+ #endif /* _ASM_X86_64_UNWIND_H */
+Index: linux-2.6.20.3/include/linux/unwind.h
+===================================================================
+--- linux-2.6.20.3.orig/include/linux/unwind.h	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/linux/unwind.h	2007-08-29 15:10:06.000000000 +0200
+@@ -14,6 +14,63 @@
+ 
+ struct module;
+ 
++#ifdef CONFIG_STACK_UNWIND
++
++#include <asm/unwind.h>
++
++#ifndef ARCH_UNWIND_SECTION_NAME
++#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
++#endif
++
++/*
++ * Initialize unwind support.
++ */
++extern void unwind_init(void);
++extern void unwind_setup(void);
++
++#ifdef CONFIG_MODULES
++
++extern void *unwind_add_table(struct module *,
++                              const void *table_start,
++                              unsigned long table_size);
++
++extern void unwind_remove_table(void *handle, int init_only);
++
++#endif
++
++extern int unwind_init_frame_info(struct unwind_frame_info *,
++                                  struct task_struct *,
++                                  /*const*/ struct pt_regs *);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++extern int unwind_init_blocked(struct unwind_frame_info *,
++                               struct task_struct *);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++extern int unwind_init_running(struct unwind_frame_info *,
++                               asmlinkage int (*callback)(struct unwind_frame_info *,
++                                                          void *arg),
++                               void *arg);
++
++/*
++ * Unwind to previous to frame.  Returns 0 if successful, negative
++ * number in case of an error.
++ */
++extern int unwind(struct unwind_frame_info *);
++
++/*
++ * Unwind until the return pointer is in user-land (or until an error
++ * occurs).  Returns 0 if successful, negative number in case of
++ * error.
++ */
++extern int unwind_to_user(struct unwind_frame_info *);
++
++#else
++
+ struct unwind_frame_info {};
+ 
+ static inline void unwind_init(void) {}
+@@ -28,12 +85,12 @@
+ 	return NULL;
+ }
+ 
++#endif
++
+ static inline void unwind_remove_table(void *handle, int init_only)
+ {
+ }
+ 
+-#endif
+-
+ static inline int unwind_init_frame_info(struct unwind_frame_info *info,
+                                          struct task_struct *tsk,
+                                          const struct pt_regs *regs)
+@@ -65,4 +122,6 @@
+ 	return -ENOSYS;
+ }
+ 
++#endif
++
+ #endif /* _LINUX_UNWIND_H */
+Index: linux-2.6.20.3/kernel/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/kernel/Makefile	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/kernel/Makefile	2007-08-29 15:10:06.000000000 +0200
+@@ -31,6 +31,7 @@
+ obj-$(CONFIG_UID16) += uid16.o
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_KALLSYMS) += kallsyms.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+ obj-$(CONFIG_PM) += power/
+ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+ obj-$(CONFIG_KEXEC) += kexec.o
+Index: linux-2.6.20.3/kernel/unwind.c
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.20.3/kernel/unwind.c	2007-08-29 15:10:06.000000000 +0200
+@@ -0,0 +1,1288 @@
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ *	Jan Beulich <jbeulich at novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ *
++ * A simple API for unwinding kernel stacks.  This is used for
++ * debugging and error reporting purposes.  The kernel doesn't need
++ * full-blown stack unwinding with all the bells and whistles, so there
++ * is not much point in implementing the full Dwarf2 unwind API.
++ */
++
++#include <linux/unwind.h>
++#include <linux/module.h>
++#include <linux/bootmem.h>
++#include <linux/sort.h>
++#include <linux/stop_machine.h>
++#include <linux/uaccess.h>
++#include <asm/sections.h>
++#include <asm/uaccess.h>
++#include <asm/unaligned.h>
++
++extern const char __start_unwind[], __end_unwind[];
++extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
++
++#define MAX_STACK_DEPTH 8
++
++#define EXTRA_INFO(f) { \
++		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
++		                  % FIELD_SIZEOF(struct unwind_frame_info, f)) \
++		+ offsetof(struct unwind_frame_info, f) \
++		  / FIELD_SIZEOF(struct unwind_frame_info, f), \
++		FIELD_SIZEOF(struct unwind_frame_info, f) \
++	}
++#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
++
++static const struct {
++	unsigned offs:BITS_PER_LONG / 2;
++	unsigned width:BITS_PER_LONG / 2;
++} reg_info[] = {
++	UNW_REGISTER_INFO
++};
++
++#undef PTREGS_INFO
++#undef EXTRA_INFO
++
++#ifndef REG_INVALID
++#define REG_INVALID(r) (reg_info[r].width == 0)
++#endif
++
++#define DW_CFA_nop                          0x00
++#define DW_CFA_set_loc                      0x01
++#define DW_CFA_advance_loc1                 0x02
++#define DW_CFA_advance_loc2                 0x03
++#define DW_CFA_advance_loc4                 0x04
++#define DW_CFA_offset_extended              0x05
++#define DW_CFA_restore_extended             0x06
++#define DW_CFA_undefined                    0x07
++#define DW_CFA_same_value                   0x08
++#define DW_CFA_register                     0x09
++#define DW_CFA_remember_state               0x0a
++#define DW_CFA_restore_state                0x0b
++#define DW_CFA_def_cfa                      0x0c
++#define DW_CFA_def_cfa_register             0x0d
++#define DW_CFA_def_cfa_offset               0x0e
++#define DW_CFA_def_cfa_expression           0x0f
++#define DW_CFA_expression                   0x10
++#define DW_CFA_offset_extended_sf           0x11
++#define DW_CFA_def_cfa_sf                   0x12
++#define DW_CFA_def_cfa_offset_sf            0x13
++#define DW_CFA_val_offset                   0x14
++#define DW_CFA_val_offset_sf                0x15
++#define DW_CFA_val_expression               0x16
++#define DW_CFA_lo_user                      0x1c
++#define DW_CFA_GNU_window_save              0x2d
++#define DW_CFA_GNU_args_size                0x2e
++#define DW_CFA_GNU_negative_offset_extended 0x2f
++#define DW_CFA_hi_user                      0x3f
++
++#define DW_EH_PE_FORM     0x07
++#define DW_EH_PE_native   0x00
++#define DW_EH_PE_leb128   0x01
++#define DW_EH_PE_data2    0x02
++#define DW_EH_PE_data4    0x03
++#define DW_EH_PE_data8    0x04
++#define DW_EH_PE_signed   0x08
++#define DW_EH_PE_ADJUST   0x70
++#define DW_EH_PE_abs      0x00
++#define DW_EH_PE_pcrel    0x10
++#define DW_EH_PE_textrel  0x20
++#define DW_EH_PE_datarel  0x30
++#define DW_EH_PE_funcrel  0x40
++#define DW_EH_PE_aligned  0x50
++#define DW_EH_PE_indirect 0x80
++#define DW_EH_PE_omit     0xff
++
++typedef unsigned long uleb128_t;
++typedef   signed long sleb128_t;
++#define sleb128abs __builtin_labs
++
++static struct unwind_table {
++	struct {
++		unsigned long pc;
++		unsigned long range;
++	} core, init;
++	const void *address;
++	unsigned long size;
++	const unsigned char *header;
++	unsigned long hdrsz;
++	struct unwind_table *link;
++	const char *name;
++} root_table;
++
++struct unwind_item {
++	enum item_location {
++		Nowhere,
++		Memory,
++		Register,
++		Value
++	} where;
++	uleb128_t value;
++};
++
++struct unwind_state {
++	uleb128_t loc, org;
++	const u8 *cieStart, *cieEnd;
++	uleb128_t codeAlign;
++	sleb128_t dataAlign;
++	struct cfa {
++		uleb128_t reg, offs;
++	} cfa;
++	struct unwind_item regs[ARRAY_SIZE(reg_info)];
++	unsigned stackDepth:8;
++	unsigned version:8;
++	const u8 *label;
++	const u8 *stack[MAX_STACK_DEPTH];
++};
++
++static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
++
++static unsigned unwind_debug;
++static int __init unwind_debug_setup(char *s)
++{
++	unwind_debug = simple_strtoul(s, NULL, 0);
++	return 1;
++}
++__setup("unwind_debug=", unwind_debug_setup);
++#define dprintk(lvl, fmt, args...) \
++	((void)(lvl > unwind_debug \
++	 || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
++
++static struct unwind_table *find_table(unsigned long pc)
++{
++	struct unwind_table *table;
++
++	for (table = &root_table; table; table = table->link)
++		if ((pc >= table->core.pc
++		     && pc < table->core.pc + table->core.range)
++		    || (pc >= table->init.pc
++		        && pc < table->init.pc + table->init.range))
++			break;
++
++	return table;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++                                  const void *end,
++                                  signed ptrType,
++                                  unsigned long text_base,
++                                  unsigned long data_base);
++
++static void init_unwind_table(struct unwind_table *table,
++                              const char *name,
++                              const void *core_start,
++                              unsigned long core_size,
++                              const void *init_start,
++                              unsigned long init_size,
++                              const void *table_start,
++                              unsigned long table_size,
++                              const u8 *header_start,
++                              unsigned long header_size)
++{
++	const u8 *ptr = header_start + 4;
++	const u8 *end = header_start + header_size;
++
++	table->core.pc = (unsigned long)core_start;
++	table->core.range = core_size;
++	table->init.pc = (unsigned long)init_start;
++	table->init.range = init_size;
++	table->address = table_start;
++	table->size = table_size;
++	/* See if the linker provided table looks valid. */
++	if (header_size <= 4
++	    || header_start[0] != 1
++	    || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
++	       != table_start
++	    || !read_pointer(&ptr, end, header_start[2], 0, 0)
++	    || !read_pointer(&ptr, end, header_start[3], 0,
++	                     (unsigned long)header_start)
++	    || !read_pointer(&ptr, end, header_start[3], 0,
++	                     (unsigned long)header_start))
++		header_start = NULL;
++	table->hdrsz = header_size;
++	smp_wmb();
++	table->header = header_start;
++	table->link = NULL;
++	table->name = name;
++}
++
++void __init unwind_init(void)
++{
++	init_unwind_table(&root_table, "kernel",
++	                  _text, _end - _text,
++	                  NULL, 0,
++	                  __start_unwind, __end_unwind - __start_unwind,
++	                  __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
++}
++
++static const u32 bad_cie, not_fde;
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
++static signed fde_pointer_type(const u32 *cie);
++
++struct eh_frame_hdr_table_entry {
++	unsigned long start, fde;
++};
++
++static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
++{
++	const struct eh_frame_hdr_table_entry *e1 = p1;
++	const struct eh_frame_hdr_table_entry *e2 = p2;
++
++	return (e1->start > e2->start) - (e1->start < e2->start);
++}
++
++static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
++{
++	struct eh_frame_hdr_table_entry *e1 = p1;
++	struct eh_frame_hdr_table_entry *e2 = p2;
++	unsigned long v;
++
++	v = e1->start;
++	e1->start = e2->start;
++	e2->start = v;
++	v = e1->fde;
++	e1->fde = e2->fde;
++	e2->fde = v;
++}
++
++static void __init setup_unwind_table(struct unwind_table *table,
++					void *(*alloc)(unsigned long))
++{
++	const u8 *ptr;
++	unsigned long tableSize = table->size, hdrSize;
++	unsigned n;
++	const u32 *fde;
++	struct {
++		u8 version;
++		u8 eh_frame_ptr_enc;
++		u8 fde_count_enc;
++		u8 table_enc;
++		unsigned long eh_frame_ptr;
++		unsigned int fde_count;
++		struct eh_frame_hdr_table_entry table[];
++	} __attribute__((__packed__)) *header;
++
++	if (table->header)
++		return;
++
++	if (table->hdrsz)
++		printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
++		       table->name);
++
++	if (tableSize & (sizeof(*fde) - 1))
++		return;
++
++	for (fde = table->address, n = 0;
++	     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
++	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++		const u32 *cie = cie_for_fde(fde, table);
++		signed ptrType;
++
++		if (cie == &not_fde)
++			continue;
++		if (cie == NULL
++		    || cie == &bad_cie
++		    || (ptrType = fde_pointer_type(cie)) < 0)
++			return;
++		ptr = (const u8 *)(fde + 2);
++		if (!read_pointer(&ptr,
++		                  (const u8 *)(fde + 1) + *fde,
++		                  ptrType, 0, 0))
++			return;
++		++n;
++	}
++
++	if (tableSize || !n)
++		return;
++
++	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
++	        + 2 * n * sizeof(unsigned long);
++	dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
++	header = alloc(hdrSize);
++	if (!header)
++		return;
++	header->version          = 1;
++	header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
++	header->fde_count_enc    = DW_EH_PE_abs|DW_EH_PE_data4;
++	header->table_enc        = DW_EH_PE_abs|DW_EH_PE_native;
++	put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
++	BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
++	             % __alignof(typeof(header->fde_count)));
++	header->fde_count        = n;
++
++	BUILD_BUG_ON(offsetof(typeof(*header), table)
++	             % __alignof(typeof(*header->table)));
++	for (fde = table->address, tableSize = table->size, n = 0;
++	     tableSize;
++	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++		const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
++
++		if (!fde[1])
++			continue; /* this is a CIE */
++		ptr = (const u8 *)(fde + 2);
++		header->table[n].start = read_pointer(&ptr,
++		                                      (const u8 *)(fde + 1) + *fde,
++		                                      fde_pointer_type(cie), 0, 0);
++		header->table[n].fde = (unsigned long)fde;
++		++n;
++	}
++	WARN_ON(n != header->fde_count);
++
++	sort(header->table,
++	     n,
++	     sizeof(*header->table),
++	     cmp_eh_frame_hdr_table_entries,
++	     swap_eh_frame_hdr_table_entries);
++
++	table->hdrsz = hdrSize;
++	smp_wmb();
++	table->header = (const void *)header;
++}
++
++static void *__init balloc(unsigned long sz)
++{
++	return __alloc_bootmem_nopanic(sz,
++	                               sizeof(unsigned int),
++	                               __pa(MAX_DMA_ADDRESS));
++}
++
++void __init unwind_setup(void)
++{
++	setup_unwind_table(&root_table, balloc);
++}
++
++#ifdef CONFIG_MODULES
++
++static struct unwind_table *last_table;
++
++/* Must be called with module_mutex held. */
++void *unwind_add_table(struct module *module,
++                       const void *table_start,
++                       unsigned long table_size)
++{
++	struct unwind_table *table;
++
++	if (table_size <= 0)
++		return NULL;
++
++	table = kmalloc(sizeof(*table), GFP_KERNEL);
++	if (!table)
++		return NULL;
++
++	init_unwind_table(table, module->name,
++	                  module->module_core, module->core_size,
++	                  module->module_init, module->init_size,
++	                  table_start, table_size,
++	                  NULL, 0);
++
++	if (last_table)
++		last_table->link = table;
++	else
++		root_table.link = table;
++	last_table = table;
++
++	return table;
++}
++
++struct unlink_table_info
++{
++	struct unwind_table *table;
++	int init_only;
++};
++
++static int unlink_table(void *arg)
++{
++	struct unlink_table_info *info = arg;
++	struct unwind_table *table = info->table, *prev;
++
++	for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
++		;
++
++	if (prev->link) {
++		if (info->init_only) {
++			table->init.pc = 0;
++			table->init.range = 0;
++			info->table = NULL;
++		} else {
++			prev->link = table->link;
++			if (!prev->link)
++				last_table = prev;
++		}
++	} else
++		info->table = NULL;
++
++	return 0;
++}
++
++/* Must be called with module_mutex held. */
++void unwind_remove_table(void *handle, int init_only)
++{
++	struct unwind_table *table = handle;
++	struct unlink_table_info info;
++
++	if (!table || table == &root_table)
++		return;
++
++	if (init_only && table == last_table) {
++		table->init.pc = 0;
++		table->init.range = 0;
++		return;
++	}
++
++	info.table = table;
++	info.init_only = init_only;
++	stop_machine_run(unlink_table, &info, NR_CPUS);
++
++	if (info.table)
++		kfree(table);
++}
++
++#endif /* CONFIG_MODULES */
++
++static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
++{
++	const u8 *cur = *pcur;
++	uleb128_t value;
++	unsigned shift;
++
++	for (shift = 0, value = 0; cur < end; shift += 7) {
++		if (shift + 7 > 8 * sizeof(value)
++		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++			cur = end + 1;
++			break;
++		}
++		value |= (uleb128_t)(*cur & 0x7f) << shift;
++		if (!(*cur++ & 0x80))
++			break;
++	}
++	*pcur = cur;
++
++	return value;
++}
++
++static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
++{
++	const u8 *cur = *pcur;
++	sleb128_t value;
++	unsigned shift;
++
++	for (shift = 0, value = 0; cur < end; shift += 7) {
++		if (shift + 7 > 8 * sizeof(value)
++		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++			cur = end + 1;
++			break;
++		}
++		value |= (sleb128_t)(*cur & 0x7f) << shift;
++		if (!(*cur & 0x80)) {
++			value |= -(*cur++ & 0x40) << shift;
++			break;
++		}
++	}
++	*pcur = cur;
++
++	return value;
++}
++
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
++{
++	const u32 *cie;
++
++	if (!*fde || (*fde & (sizeof(*fde) - 1)))
++		return &bad_cie;
++	if (!fde[1])
++		return &not_fde; /* this is a CIE */
++	if ((fde[1] & (sizeof(*fde) - 1))
++	    || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
++		return NULL; /* this is not a valid FDE */
++	cie = fde + 1 - fde[1] / sizeof(*fde);
++	if (*cie <= sizeof(*cie) + 4
++	    || *cie >= fde[1] - sizeof(*fde)
++	    || (*cie & (sizeof(*cie) - 1))
++	    || cie[1])
++		return NULL; /* this is not a (valid) CIE */
++	return cie;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++                                  const void *end,
++                                  signed ptrType,
++                                  unsigned long text_base,
++                                  unsigned long data_base)
++{
++	unsigned long value = 0;
++	union {
++		const u8 *p8;
++		const u16 *p16u;
++		const s16 *p16s;
++		const u32 *p32u;
++		const s32 *p32s;
++		const unsigned long *pul;
++	} ptr;
++
++	if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
++		dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
++		return 0;
++	}
++	ptr.p8 = *pLoc;
++	switch(ptrType & DW_EH_PE_FORM) {
++	case DW_EH_PE_data2:
++		if (end < (const void *)(ptr.p16u + 1)) {
++			dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		if(ptrType & DW_EH_PE_signed)
++			value = get_unaligned(ptr.p16s++);
++		else
++			value = get_unaligned(ptr.p16u++);
++		break;
++	case DW_EH_PE_data4:
++#ifdef CONFIG_64BIT
++		if (end < (const void *)(ptr.p32u + 1)) {
++			dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		if(ptrType & DW_EH_PE_signed)
++			value = get_unaligned(ptr.p32s++);
++		else
++			value = get_unaligned(ptr.p32u++);
++		break;
++	case DW_EH_PE_data8:
++		BUILD_BUG_ON(sizeof(u64) != sizeof(value));
++#else
++		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
++#endif
++	case DW_EH_PE_native:
++		if (end < (const void *)(ptr.pul + 1)) {
++			dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		value = get_unaligned(ptr.pul++);
++		break;
++	case DW_EH_PE_leb128:
++		BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
++		value = ptrType & DW_EH_PE_signed
++		        ? get_sleb128(&ptr.p8, end)
++		        : get_uleb128(&ptr.p8, end);
++		if ((const void *)ptr.p8 > end) {
++			dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		break;
++	default:
++		dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
++		        ptrType, ptr.p8, end);
++		return 0;
++	}
++	switch(ptrType & DW_EH_PE_ADJUST) {
++	case DW_EH_PE_abs:
++		break;
++	case DW_EH_PE_pcrel:
++		value += (unsigned long)*pLoc;
++		break;
++	case DW_EH_PE_textrel:
++		if (likely(text_base)) {
++			value += text_base;
++			break;
++		}
++		dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
++		        ptrType, *pLoc, end);
++		return 0;
++	case DW_EH_PE_datarel:
++		if (likely(data_base)) {
++			value += data_base;
++			break;
++		}
++		dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
++		        ptrType, *pLoc, end);
++		return 0;
++	default:
++		dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
++		        ptrType, *pLoc, end);
++		return 0;
++	}
++	if ((ptrType & DW_EH_PE_indirect)
++	    && probe_kernel_address((unsigned long *)value, value)) {
++		dprintk(1, "Cannot read indirect value %lx (%p,%p).",
++		        value, *pLoc, end);
++		return 0;
++	}
++	*pLoc = ptr.p8;
++
++	return value;
++}
++
++static signed fde_pointer_type(const u32 *cie)
++{
++	const u8 *ptr = (const u8 *)(cie + 2);
++	unsigned version = *ptr;
++
++	if (version != 1)
++		return -1; /* unsupported */
++	if (*++ptr) {
++		const char *aug;
++		const u8 *end = (const u8 *)(cie + 1) + *cie;
++		uleb128_t len;
++
++		/* check if augmentation size is first (and thus present) */
++		if (*ptr != 'z')
++			return -1;
++		/* check if augmentation string is nul-terminated */
++		if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
++			return -1;
++		++ptr; /* skip terminator */
++		get_uleb128(&ptr, end); /* skip code alignment */
++		get_sleb128(&ptr, end); /* skip data alignment */
++		/* skip return address column */
++		version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
++		len = get_uleb128(&ptr, end); /* augmentation length */
++		if (ptr + len < ptr || ptr + len > end)
++			return -1;
++		end = ptr + len;
++		while (*++aug) {
++			if (ptr >= end)
++				return -1;
++			switch(*aug) {
++			case 'L':
++				++ptr;
++				break;
++			case 'P': {
++					signed ptrType = *ptr++;
++
++					if (!read_pointer(&ptr, end, ptrType, 0, 0)
++					    || ptr > end)
++						return -1;
++				}
++				break;
++			case 'R':
++				return *ptr;
++			default:
++				return -1;
++			}
++		}
++	}
++	return DW_EH_PE_native|DW_EH_PE_abs;
++}
++
++static int advance_loc(unsigned long delta, struct unwind_state *state)
++{
++	state->loc += delta * state->codeAlign;
++
++	return delta > 0;
++}
++
++static void set_rule(uleb128_t reg,
++                     enum item_location where,
++                     uleb128_t value,
++                     struct unwind_state *state)
++{
++	if (reg < ARRAY_SIZE(state->regs)) {
++		state->regs[reg].where = where;
++		state->regs[reg].value = value;
++	}
++}
++
++static int processCFI(const u8 *start,
++                      const u8 *end,
++                      unsigned long targetLoc,
++                      signed ptrType,
++                      struct unwind_state *state)
++{
++	union {
++		const u8 *p8;
++		const u16 *p16;
++		const u32 *p32;
++	} ptr;
++	int result = 1;
++
++	if (start != state->cieStart) {
++		state->loc = state->org;
++		result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
++		if (targetLoc == 0 && state->label == NULL)
++			return result;
++	}
++	for (ptr.p8 = start; result && ptr.p8 < end; ) {
++		switch(*ptr.p8 >> 6) {
++			uleb128_t value;
++
++		case 0:
++			switch(*ptr.p8++) {
++			case DW_CFA_nop:
++				break;
++			case DW_CFA_set_loc:
++				state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
++				if (state->loc == 0)
++					result = 0;
++				break;
++			case DW_CFA_advance_loc1:
++				result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
++				break;
++			case DW_CFA_advance_loc2:
++				result = ptr.p8 <= end + 2
++				         && advance_loc(*ptr.p16++, state);
++				break;
++			case DW_CFA_advance_loc4:
++				result = ptr.p8 <= end + 4
++				         && advance_loc(*ptr.p32++, state);
++				break;
++			case DW_CFA_offset_extended:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_val_offset:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_offset_extended_sf:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_val_offset_sf:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_restore_extended:
++			case DW_CFA_undefined:
++			case DW_CFA_same_value:
++				set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
++				break;
++			case DW_CFA_register:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value,
++				         Register,
++				         get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_remember_state:
++				if (ptr.p8 == state->label) {
++					state->label = NULL;
++					return 1;
++				}
++				if (state->stackDepth >= MAX_STACK_DEPTH) {
++					dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
++					return 0;
++				}
++				state->stack[state->stackDepth++] = ptr.p8;
++				break;
++			case DW_CFA_restore_state:
++				if (state->stackDepth) {
++					const uleb128_t loc = state->loc;
++					const u8 *label = state->label;
++
++					state->label = state->stack[state->stackDepth - 1];
++					memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
++					memset(state->regs, 0, sizeof(state->regs));
++					state->stackDepth = 0;
++					result = processCFI(start, end, 0, ptrType, state);
++					state->loc = loc;
++					state->label = label;
++				} else {
++					dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
++					return 0;
++				}
++				break;
++			case DW_CFA_def_cfa:
++				state->cfa.reg = get_uleb128(&ptr.p8, end);
++				/*nobreak*/
++			case DW_CFA_def_cfa_offset:
++				state->cfa.offs = get_uleb128(&ptr.p8, end);
++				break;
++			case DW_CFA_def_cfa_sf:
++				state->cfa.reg = get_uleb128(&ptr.p8, end);
++				/*nobreak*/
++			case DW_CFA_def_cfa_offset_sf:
++				state->cfa.offs = get_sleb128(&ptr.p8, end)
++				                  * state->dataAlign;
++				break;
++			case DW_CFA_def_cfa_register:
++				state->cfa.reg = get_uleb128(&ptr.p8, end);
++				break;
++			/*todo case DW_CFA_def_cfa_expression: */
++			/*todo case DW_CFA_expression: */
++			/*todo case DW_CFA_val_expression: */
++			case DW_CFA_GNU_args_size:
++				get_uleb128(&ptr.p8, end);
++				break;
++			case DW_CFA_GNU_negative_offset_extended:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value,
++				         Memory,
++				         (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_GNU_window_save:
++			default:
++				dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
++				result = 0;
++				break;
++			}
++			break;
++		case 1:
++			result = advance_loc(*ptr.p8++ & 0x3f, state);
++			break;
++		case 2:
++			value = *ptr.p8++ & 0x3f;
++			set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++			break;
++		case 3:
++			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
++			break;
++		}
++		if (ptr.p8 > end) {
++			dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
++			result = 0;
++		}
++		if (result && targetLoc != 0 && targetLoc < state->loc)
++			return 1;
++	}
++
++	if (result && ptr.p8 < end)
++		dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
++
++	return result
++	   && ptr.p8 == end
++	   && (targetLoc == 0
++	    || (/*todo While in theory this should apply, gcc in practice omits
++	          everything past the function prolog, and hence the location
++	          never reaches the end of the function.
++	        targetLoc < state->loc &&*/ state->label == NULL));
++}
++
++/* Unwind to previous to frame.  Returns 0 if successful, negative
++ * number in case of an error. */
++int unwind(struct unwind_frame_info *frame)
++{
++#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
++	const u32 *fde = NULL, *cie = NULL;
++	const u8 *ptr = NULL, *end = NULL;
++	unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
++	unsigned long startLoc = 0, endLoc = 0, cfa;
++	unsigned i;
++	signed ptrType = -1;
++	uleb128_t retAddrReg = 0;
++	const struct unwind_table *table;
++	struct unwind_state state;
++
++	if (UNW_PC(frame) == 0)
++		return -EINVAL;
++	if ((table = find_table(pc)) != NULL
++	    && !(table->size & (sizeof(*fde) - 1))) {
++		const u8 *hdr = table->header;
++		unsigned long tableSize;
++
++		smp_rmb();
++		if (hdr && hdr[0] == 1) {
++			switch(hdr[3] & DW_EH_PE_FORM) {
++			case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
++			case DW_EH_PE_data2: tableSize = 2; break;
++			case DW_EH_PE_data4: tableSize = 4; break;
++			case DW_EH_PE_data8: tableSize = 8; break;
++			default: tableSize = 0; break;
++			}
++			ptr = hdr + 4;
++			end = hdr + table->hdrsz;
++			if (tableSize
++			    && read_pointer(&ptr, end, hdr[1], 0, 0)
++			       == (unsigned long)table->address
++			    && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
++			    && i == (end - ptr) / (2 * tableSize)
++			    && !((end - ptr) % (2 * tableSize))) {
++				do {
++					const u8 *cur = ptr + (i / 2) * (2 * tableSize);
++
++					startLoc = read_pointer(&cur,
++					                        cur + tableSize,
++					                        hdr[3], 0,
++					                        (unsigned long)hdr);
++					if (pc < startLoc)
++						i /= 2;
++					else {
++						ptr = cur - tableSize;
++						i = (i + 1) / 2;
++					}
++				} while (startLoc && i > 1);
++				if (i == 1
++				    && (startLoc = read_pointer(&ptr,
++				                                ptr + tableSize,
++				                                hdr[3], 0,
++				                                (unsigned long)hdr)) != 0
++				    && pc >= startLoc)
++					fde = (void *)read_pointer(&ptr,
++					                           ptr + tableSize,
++					                           hdr[3], 0,
++					                           (unsigned long)hdr);
++			}
++		}
++		if(hdr && !fde)
++			dprintk(3, "Binary lookup for %lx failed.", pc);
++
++		if (fde != NULL) {
++			cie = cie_for_fde(fde, table);
++			ptr = (const u8 *)(fde + 2);
++			if(cie != NULL
++			   && cie != &bad_cie
++			   && cie != &not_fde
++			   && (ptrType = fde_pointer_type(cie)) >= 0
++			   && read_pointer(&ptr,
++			                   (const u8 *)(fde + 1) + *fde,
++			                   ptrType, 0, 0) == startLoc) {
++				if (!(ptrType & DW_EH_PE_indirect))
++					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++				endLoc = startLoc
++				         + read_pointer(&ptr,
++				                        (const u8 *)(fde + 1) + *fde,
++				                        ptrType, 0, 0);
++				if(pc >= endLoc)
++					fde = NULL;
++			} else
++				fde = NULL;
++			if(!fde)
++				dprintk(1, "Binary lookup result for %lx discarded.", pc);
++		}
++		if (fde == NULL) {
++			for (fde = table->address, tableSize = table->size;
++			     cie = NULL, tableSize > sizeof(*fde)
++			     && tableSize - sizeof(*fde) >= *fde;
++			     tableSize -= sizeof(*fde) + *fde,
++			     fde += 1 + *fde / sizeof(*fde)) {
++				cie = cie_for_fde(fde, table);
++				if (cie == &bad_cie) {
++					cie = NULL;
++					break;
++				}
++				if (cie == NULL
++				    || cie == &not_fde
++				    || (ptrType = fde_pointer_type(cie)) < 0)
++					continue;
++				ptr = (const u8 *)(fde + 2);
++				startLoc = read_pointer(&ptr,
++				                        (const u8 *)(fde + 1) + *fde,
++				                        ptrType, 0, 0);
++				if (!startLoc)
++					continue;
++				if (!(ptrType & DW_EH_PE_indirect))
++					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++				endLoc = startLoc
++				         + read_pointer(&ptr,
++				                        (const u8 *)(fde + 1) + *fde,
++				                        ptrType, 0, 0);
++				if (pc >= startLoc && pc < endLoc)
++					break;
++			}
++			if(!fde)
++				dprintk(3, "Linear lookup for %lx failed.", pc);
++		}
++	}
++	if (cie != NULL) {
++		memset(&state, 0, sizeof(state));
++		state.cieEnd = ptr; /* keep here temporarily */
++		ptr = (const u8 *)(cie + 2);
++		end = (const u8 *)(cie + 1) + *cie;
++		frame->call_frame = 1;
++		if ((state.version = *ptr) != 1)
++			cie = NULL; /* unsupported version */
++		else if (*++ptr) {
++			/* check if augmentation size is first (and thus present) */
++			if (*ptr == 'z') {
++				while (++ptr < end && *ptr) {
++					switch(*ptr) {
++					/* check for ignorable (or already handled)
++					 * nul-terminated augmentation string */
++					case 'L':
++					case 'P':
++					case 'R':
++						continue;
++					case 'S':
++						frame->call_frame = 0;
++						continue;
++					default:
++						break;
++					}
++					break;
++				}
++			}
++			if (ptr >= end || *ptr)
++				cie = NULL;
++		}
++		if(!cie)
++			dprintk(1, "CIE unusable (%p,%p).", ptr, end);
++		++ptr;
++	}
++	if (cie != NULL) {
++		/* get code aligment factor */
++		state.codeAlign = get_uleb128(&ptr, end);
++		/* get data aligment factor */
++		state.dataAlign = get_sleb128(&ptr, end);
++		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
++			cie = NULL;
++		else if (UNW_PC(frame) % state.codeAlign
++		         || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++			dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
++			        UNW_PC(frame), UNW_SP(frame));
++			return -EPERM;
++		} else {
++			retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
++			/* skip augmentation */
++			if (((const char *)(cie + 2))[1] == 'z') {
++				uleb128_t augSize = get_uleb128(&ptr, end);
++
++				ptr += augSize;
++			}
++			if (ptr > end
++			   || retAddrReg >= ARRAY_SIZE(reg_info)
++			   || REG_INVALID(retAddrReg)
++			   || reg_info[retAddrReg].width != sizeof(unsigned long))
++				cie = NULL;
++		}
++		if(!cie)
++			dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
++	}
++	if (cie != NULL) {
++		state.cieStart = ptr;
++		ptr = state.cieEnd;
++		state.cieEnd = end;
++		end = (const u8 *)(fde + 1) + *fde;
++		/* skip augmentation */
++		if (((const char *)(cie + 2))[1] == 'z') {
++			uleb128_t augSize = get_uleb128(&ptr, end);
++
++			if ((ptr += augSize) > end)
++				fde = NULL;
++		}
++		if(!fde)
++			dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
++	}
++	if (cie == NULL || fde == NULL) {
++#ifdef CONFIG_FRAME_POINTER
++		unsigned long top, bottom;
++
++		if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
++			return -EPERM;
++		top = STACK_TOP(frame->task);
++		bottom = STACK_BOTTOM(frame->task);
++# if FRAME_RETADDR_OFFSET < 0
++		if (UNW_SP(frame) < top
++		    && UNW_FP(frame) <= UNW_SP(frame)
++		    && bottom < UNW_FP(frame)
++# else
++		if (UNW_SP(frame) > top
++		    && UNW_FP(frame) >= UNW_SP(frame)
++		    && bottom > UNW_FP(frame)
++# endif
++		   && !((UNW_SP(frame) | UNW_FP(frame))
++		        & (sizeof(unsigned long) - 1))) {
++			unsigned long link;
++
++			if (!probe_kernel_address(
++			                (unsigned long *)(UNW_FP(frame)
++			                                  + FRAME_LINK_OFFSET),
++						  link)
++# if FRAME_RETADDR_OFFSET < 0
++			   && link > bottom && link < UNW_FP(frame)
++# else
++			   && link > UNW_FP(frame) && link < bottom
++# endif
++			   && !(link & (sizeof(link) - 1))
++			   && !probe_kernel_address(
++			                  (unsigned long *)(UNW_FP(frame)
++			                                    + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
++				UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
++# if FRAME_RETADDR_OFFSET < 0
++					-
++# else
++					+
++# endif
++					  sizeof(UNW_PC(frame));
++				UNW_FP(frame) = link;
++				return 0;
++			}
++		}
++#endif
++		return -ENXIO;
++	}
++	state.org = startLoc;
++	memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
++	/* process instructions */
++	if (!processCFI(ptr, end, pc, ptrType, &state)
++	   || state.loc > endLoc
++	   || state.regs[retAddrReg].where == Nowhere
++	   || state.cfa.reg >= ARRAY_SIZE(reg_info)
++	   || reg_info[state.cfa.reg].width != sizeof(unsigned long)
++	   || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
++	   || state.cfa.offs % sizeof(unsigned long)) {
++		dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
++		return -EIO;
++	}
++	/* update frame */
++#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
++	if(frame->call_frame
++	   && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
++		frame->call_frame = 0;
++#endif
++	cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
++	startLoc = min((unsigned long)UNW_SP(frame), cfa);
++	endLoc = max((unsigned long)UNW_SP(frame), cfa);
++	if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
++		startLoc = min(STACK_LIMIT(cfa), cfa);
++		endLoc = max(STACK_LIMIT(cfa), cfa);
++	}
++#ifndef CONFIG_64BIT
++# define CASES CASE(8); CASE(16); CASE(32)
++#else
++# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
++#endif
++	pc = UNW_PC(frame);
++	sp = UNW_SP(frame);
++	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++		if (REG_INVALID(i)) {
++			if (state.regs[i].where == Nowhere)
++				continue;
++			dprintk(1, "Cannot restore register %u (%d).",
++			        i, state.regs[i].where);
++			return -EIO;
++		}
++		switch(state.regs[i].where) {
++		default:
++			break;
++		case Register:
++			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
++			   || REG_INVALID(state.regs[i].value)
++			   || reg_info[i].width > reg_info[state.regs[i].value].width) {
++				dprintk(1, "Cannot restore register %u from register %lu.",
++				        i, state.regs[i].value);
++				return -EIO;
++			}
++			switch(reg_info[state.regs[i].value].width) {
++#define CASE(n) \
++			case sizeof(u##n): \
++				state.regs[i].value = FRAME_REG(state.regs[i].value, \
++				                                const u##n); \
++				break
++			CASES;
++#undef CASE
++			default:
++				dprintk(1, "Unsupported register size %u (%lu).",
++				        reg_info[state.regs[i].value].width,
++				        state.regs[i].value);
++				return -EIO;
++			}
++			break;
++		}
++	}
++	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++		if (REG_INVALID(i))
++			continue;
++		switch(state.regs[i].where) {
++		case Nowhere:
++			if (reg_info[i].width != sizeof(UNW_SP(frame))
++			   || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
++			      != &UNW_SP(frame))
++				continue;
++			UNW_SP(frame) = cfa;
++			break;
++		case Register:
++			switch(reg_info[i].width) {
++#define CASE(n) case sizeof(u##n): \
++				FRAME_REG(i, u##n) = state.regs[i].value; \
++				break
++			CASES;
++#undef CASE
++			default:
++				dprintk(1, "Unsupported register size %u (%u).",
++				        reg_info[i].width, i);
++				return -EIO;
++			}
++			break;
++		case Value:
++			if (reg_info[i].width != sizeof(unsigned long)) {
++				dprintk(1, "Unsupported value size %u (%u).",
++				        reg_info[i].width, i);
++				return -EIO;
++			}
++			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
++			                                    * state.dataAlign;
++			break;
++		case Memory: {
++				unsigned long addr = cfa + state.regs[i].value
++				                           * state.dataAlign;
++
++				if ((state.regs[i].value * state.dataAlign)
++				    % sizeof(unsigned long)
++				    || addr < startLoc
++				    || addr + sizeof(unsigned long) < addr
++				    || addr + sizeof(unsigned long) > endLoc) {
++					dprintk(1, "Bad memory location %lx (%lx).",
++					        addr, state.regs[i].value);
++					return -EIO;
++				}
++				switch(reg_info[i].width) {
++#define CASE(n)     case sizeof(u##n): \
++					probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
++					break
++				CASES;
++#undef CASE
++				default:
++					dprintk(1, "Unsupported memory size %u (%u).",
++					        reg_info[i].width, i);
++					return -EIO;
++				}
++			}
++			break;
++		}
++	}
++
++	if (UNW_PC(frame) % state.codeAlign
++	    || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++		dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
++		        UNW_PC(frame), UNW_SP(frame));
++		return -EIO;
++	}
++	if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
++		dprintk(1, "No progress (%lx,%lx).", pc, sp);
++		return -EIO;
++	}
++
++	return 0;
++#undef CASES
++#undef FRAME_REG
++}
++EXPORT_SYMBOL(unwind);
++
++int unwind_init_frame_info(struct unwind_frame_info *info,
++                           struct task_struct *tsk,
++                           /*const*/ struct pt_regs *regs)
++{
++	info->task = tsk;
++	info->call_frame = 0;
++	arch_unw_init_frame_info(info, regs);
++
++	return 0;
++}
++EXPORT_SYMBOL(unwind_init_frame_info);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++int unwind_init_blocked(struct unwind_frame_info *info,
++                        struct task_struct *tsk)
++{
++	info->task = tsk;
++	info->call_frame = 0;
++	arch_unw_init_blocked(info);
++
++	return 0;
++}
++EXPORT_SYMBOL(unwind_init_blocked);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++int unwind_init_running(struct unwind_frame_info *info,
++                        asmlinkage int (*callback)(struct unwind_frame_info *,
++                                                   void *arg),
++                        void *arg)
++{
++	info->task = current;
++	info->call_frame = 0;
++
++	return arch_unwind_init_running(info, callback, arg);
++}
++EXPORT_SYMBOL(unwind_init_running);
++
+Index: linux-2.6.20.3/lib/Kconfig.debug
+===================================================================
+--- linux-2.6.20.3.orig/lib/Kconfig.debug	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/lib/Kconfig.debug	2007-08-29 15:11:26.000000000 +0200
+@@ -354,6 +354,24 @@
+ 	  some architectures or if you use external debuggers.
+ 	  If you don't debug the kernel, you can say N.
+ 
++config UNWIND_INFO
++	bool "Compile the kernel with frame unwind information"
++	depends on !IA64 && !PARISC && !ARM
++	depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
++	help
++	  If you say Y here the resulting kernel image will be slightly larger
++	  but not slower, and it will give very useful debugging information.
++	  If you don't debug the kernel, you can say N, but we may not be able
++	  to solve problems without frame unwind information or frame pointers.
++
++config STACK_UNWIND
++	bool "Stack unwind support"
++	depends on UNWIND_INFO
++	depends on X86
++	help
++	  This enables more precise stack traces, omitting all unrelated
++	  occurrences of pointers into kernel code from the dump.
++
+ config FORCED_INLINING
+ 	bool "Force gcc to inline functions marked 'inline'"
+ 	depends on DEBUG_KERNEL
+@@ -400,6 +418,9 @@
+ config FAULT_INJECTION
+ 	bool "Fault-injection framework"
+ 	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
++	# could support fp on X86_32 here too, but let's not
++	select UNWIND_INFO if X86
++	select STACK_UNWIND if X86
+ 	select STACKTRACE
+ 	select FRAME_POINTER
+ 	help
+Index: linux-2.6.20.3/arch/x86_64/kernel/traps.c
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/kernel/traps.c	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/kernel/traps.c	2007-08-29 15:10:06.000000000 +0200
+@@ -110,6 +110,11 @@
+ }
+ 
+ int kstack_depth_to_print = 12;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+ 
+ #ifdef CONFIG_KALLSYMS
+ void printk_address(unsigned long address)
+@@ -212,6 +217,33 @@
+ 	return NULL;
+ }
+ 
++struct ops_and_data {
++	struct stacktrace_ops *ops;
++	void *data;
++};
++
++static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
++{
++	struct ops_and_data *oad = (struct ops_and_data *)context;
++	int n = 0;
++	unsigned long sp = UNW_SP(info);
++
++	if (arch_unw_user_mode(info))
++		return -1;
++	while (unwind(info) == 0 && UNW_PC(info)) {
++		n++;
++		oad->ops->address(oad->data, UNW_PC(info));
++		if (arch_unw_user_mode(info))
++			break;
++		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++		    && sp > UNW_SP(info))
++			break;
++		sp = UNW_SP(info);
++		touch_nmi_watchdog();
++	}
++	return n;
++}
++
+ #define MSG(txt) ops->warning(data, txt)
+ 
+ /*
+@@ -239,6 +271,40 @@
+ 	if (!tsk)
+ 		tsk = current;
+ 
++	if (call_trace >= 0) {
++		int unw_ret = 0;
++		struct unwind_frame_info info;
++		struct ops_and_data oad = { .ops = ops, .data = data };
++
++		if (regs) {
++			if (unwind_init_frame_info(&info, tsk, regs) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		} else if (tsk == current)
++			unw_ret = unwind_init_running(&info, dump_trace_unwind,
++						      &oad);
++		else {
++			if (unwind_init_blocked(&info, tsk) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		}
++		if (unw_ret > 0) {
++			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++				ops->warning_symbol(data,
++					     "DWARF2 unwinder stuck at %s",
++					     UNW_PC(&info));
++				if ((long)UNW_SP(&info) < 0) {
++					MSG("Leftover inexact backtrace:");
++					stack = (unsigned long *)UNW_SP(&info);
++					if (!stack)
++						goto out;
++				} else
++					MSG("Full inexact backtrace again:");
++			} else if (call_trace >= 1)
++				goto out;
++			else
++				MSG("Full inexact backtrace again:");
++		} else
++			MSG("Inexact backtrace:");
++	}
+ 	if (!stack) {
+ 		unsigned long dummy;
+ 		stack = &dummy;
+@@ -322,6 +388,7 @@
+ 	tinfo = task_thread_info(tsk);
+ 	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
+ #undef HANDLE_STACK
++out:
+ 	put_cpu();
+ }
+ EXPORT_SYMBOL(dump_trace);
+@@ -1122,3 +1189,21 @@
+ 	return 0;
+ }
+ early_param("kstack", kstack_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++	if (!s)
++		return -EINVAL;
++	if (strcmp(s, "old") == 0)
++		call_trace = -1;
++	else if (strcmp(s, "both") == 0)
++		call_trace = 0;
++	else if (strcmp(s, "newfallback") == 0)
++		call_trace = 1;
++	else if (strcmp(s, "new") == 0)
++		call_trace = 2;
++	return 0;
++}
++early_param("call_trace", call_trace_setup);
++#endif
+Index: linux-2.6.20.3/arch/i386/kernel/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/arch/i386/kernel/Makefile	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/i386/kernel/Makefile	2007-08-29 15:10:06.000000000 +0200
+@@ -39,6 +39,7 @@
+ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+ obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
+ obj-$(CONFIG_K8_NB)		+= k8.o
++obj-$(CONFIG_STACK_UNWIND)	+= unwind.o
+ 
+ # Make sure this is linked after any other paravirt_ops structs: see head.S
+ obj-$(CONFIG_PARAVIRT)		+= paravirt.o
+Index: linux-2.6.20.3/arch/i386/kernel/unwind.S
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.20.3/arch/i386/kernel/unwind.S	2007-08-29 15:10:06.000000000 +0200
+@@ -0,0 +1,36 @@
++/* Assembler support code for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/ptrace-abi.h>
++#include <asm/segment.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++	CFI_STARTPROC
++	movl	4(%esp), %edx
++	movl	(%esp), %ecx
++	leal	4(%esp), %eax
++	movl	%ebx, PT_EBX(%edx)
++	xorl	%ebx, %ebx
++	movl	%ebx, PT_ECX(%edx)
++	movl	%ebx, PT_EDX(%edx)
++	movl	%esi, PT_ESI(%edx)
++	movl	%edi, PT_EDI(%edx)
++	movl	%ebp, PT_EBP(%edx)
++	movl	%ebx, PT_EAX(%edx)
++	movl	$__USER_DS, PT_DS(%edx)
++	movl	$__USER_DS, PT_ES(%edx)
++	movl	$0, PT_FS(%edx)
++	movl	%ebx, PT_ORIG_EAX(%edx)
++	movl	%ecx, PT_EIP(%edx)
++	movl	12(%esp), %ecx
++	movl	$__KERNEL_CS, PT_CS(%edx)
++	movl	%ebx, PT_EFLAGS(%edx)
++	movl	%eax, PT_OLDESP(%edx)
++	movl	8(%esp), %eax
++	movl	%ecx, 8(%esp)
++	movl	PT_EBX(%edx), %ebx
++	movl	$__KERNEL_DS, PT_OLDSS(%edx)
++	jmpl	*%eax
++	CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
+Index: linux-2.6.20.3/arch/x86_64/kernel/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/kernel/Makefile	2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/kernel/Makefile	2007-08-29 15:10:06.000000000 +0200
+@@ -37,6 +37,7 @@
+ obj-$(CONFIG_X86_VSMP)		+= vsmp.o
+ obj-$(CONFIG_K8_NB)		+= k8.o
+ obj-$(CONFIG_AUDIT)		+= audit.o
++obj-$(CONFIG_STACK_UNWIND)	+= unwind.o
+ 
+ obj-$(CONFIG_MODULES)		+= module.o
+ obj-$(CONFIG_PCI)		+= early-quirks.o
+Index: linux-2.6.20.3/arch/x86_64/kernel/unwind.S
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.20.3/arch/x86_64/kernel/unwind.S	2007-08-29 15:10:06.000000000 +0200
+@@ -0,0 +1,38 @@
++/* Assembler support for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/segment.h>
++#include <asm/ptrace.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++	CFI_STARTPROC
++	movq	%r15, R15(%rdi)
++	movq	%r14, R14(%rdi)
++	xchgq	%rsi, %rdx
++	movq	%r13, R13(%rdi)
++	movq	%r12, R12(%rdi)
++	xorl	%eax, %eax
++	movq	%rbp, RBP(%rdi)
++	movq	%rbx, RBX(%rdi)
++	movq	(%rsp), %rcx
++	movq	%rax, R11(%rdi)
++	movq	%rax, R10(%rdi)
++	movq	%rax, R9(%rdi)
++	movq	%rax, R8(%rdi)
++	movq	%rax, RAX(%rdi)
++	movq	%rax, RCX(%rdi)
++	movq	%rax, RDX(%rdi)
++	movq	%rax, RSI(%rdi)
++	movq	%rax, RDI(%rdi)
++	movq	%rax, ORIG_RAX(%rdi)
++	movq	%rcx, RIP(%rdi)
++	leaq	8(%rsp), %rcx
++	movq	$__KERNEL_CS, CS(%rdi)
++	movq	%rax, EFLAGS(%rdi)
++	movq	%rcx, RSP(%rdi)
++	movq	$__KERNEL_DS, SS(%rdi)
++	jmpq	*%rdx
++	CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
++

Added: trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch?rev=435&op=file
==============================================================================
--- trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch (added)
+++ trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch Wed Feb 13 12:23:22 2008
@@ -1,0 +1,45 @@
+Index: linux-2.6.20.3/mm/filemap.c
+===================================================================
+--- linux-2.6.20.3.orig/mm/filemap.c	2007-09-10 14:17:43.000000000 +0200
++++ linux-2.6.20.3/mm/filemap.c	2007-09-10 14:23:31.000000000 +0200
+@@ -2099,21 +2099,27 @@
+ 		/* Limit the size of the copy to the caller's write size */
+ 		bytes = min(bytes, count);
+ 
+-		/*
+-		 * Limit the size of the copy to that of the current segment,
+-		 * because fault_in_pages_readable() doesn't know how to walk
+-		 * segments.
++		/* We only need to worry about prefaulting when writes are from
++		 * user-space.  NFSd uses vfs_writev with several non-aligned
++		 * segments in the vector, and limiting to one segment a time is
++		 * a noticeable performance for re-write
+ 		 */
+-		bytes = min(bytes, cur_iov->iov_len - iov_base);
+-
+-		/*
+-		 * Bring in the user page that we will copy from _first_.
+-		 * Otherwise there's a nasty deadlock on copying from the
+-		 * same page as we're writing to, without it being marked
+-		 * up-to-date.
+-		 */
+-		fault_in_pages_readable(buf, bytes);
++		if (!segment_eq(get_fs(), KERNEL_DS)) {
++			/*
++			 * Limit the size of the copy to that of the current
++			 * segment, because fault_in_pages_readable() doesn't
++			 * know how to walk segments.
++			 */
++			bytes = min(bytes, cur_iov->iov_len - iov_base);
+ 
++			/*
++			 * Bring in the user page that we will copy from
++			 * _first_.  Otherwise there's a nasty deadlock on
++			 * copying from the same page as we're writing to,
++			 * without it being marked up-to-date.
++			 */
++			fault_in_pages_readable(buf, bytes);
++		}
+ 		page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
+ 		if (!page) {
+ 			status = -ENOMEM;

Added: trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch?rev=435&op=file
==============================================================================
--- trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch (added)
+++ trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch Wed Feb 13 12:23:22 2008
@@ -1,0 +1,743 @@
+Index: linux-2.6.22.18.patch.lustre.1.6/include/linux/jbd.h
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/include/linux/jbd.h	2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/include/linux/jbd.h	2008-02-12 19:15:28.000000000 +0100
+@@ -428,6 +428,16 @@ struct handle_s
+ };
+ 
+ 
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++	unsigned long		cs_chp_time;
++	unsigned long		cs_forced_to_close;
++	unsigned long		cs_written;
++	unsigned long		cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism.  It
+  * tracks a compound transaction through its various states:
+  *
+@@ -565,6 +575,21 @@ struct transaction_s
+ 	spinlock_t		t_handle_lock;
+ 
+ 	/*
++	 * Longest time some handle had to wait for running transaction
++	 */
++	unsigned long		t_max_wait;
++
++	/*
++	 * When transaction started
++	 */
++	unsigned long		t_start;
++
++	/*
++	 * Checkpointing stats [j_checkpoint_sem]
++	 */
++	struct transaction_chp_stats_s t_chp_stats;
++
++	/*
+ 	 * Number of outstanding updates running on this transaction
+ 	 * [t_handle_lock]
+ 	 */
+@@ -604,6 +629,57 @@ struct transaction_s
+ 	struct list_head	t_jcb;
+ };
+ 
++struct transaction_run_stats_s {
++	unsigned long		rs_wait;
++	unsigned long		rs_running;
++	unsigned long		rs_locked;
++	unsigned long		rs_flushing;
++	unsigned long		rs_logging;
++
++	unsigned long		rs_handle_count;
++	unsigned long		rs_blocks;
++	unsigned long		rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++	int 			ts_type;
++	unsigned long		ts_tid;
++	union {
++		struct transaction_run_stats_s run;
++		struct transaction_chp_stats_s chp;
++	} u;
++};
++
++#define JBD_STATS_RUN		1
++#define JBD_STATS_CHECKPOINT	2
++
++#define ts_wait			u.run.rs_wait
++#define ts_running		u.run.rs_running
++#define ts_locked		u.run.rs_locked
++#define ts_flushing		u.run.rs_flushing
++#define ts_logging		u.run.rs_logging
++#define ts_handle_count		u.run.rs_handle_count
++#define ts_blocks		u.run.rs_blocks
++#define ts_blocks_logged	u.run.rs_blocks_logged
++
++#define ts_chp_time		u.chp.cs_chp_time
++#define ts_forced_to_close	u.chp.cs_forced_to_close
++#define ts_written		u.chp.cs_written
++#define ts_dropped		u.chp.cs_dropped
++
++#define CURRENT_MSECS		(jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++	if (unlikely(start > end))
++		end = end + (~0UL - start);
++	else
++		end -= start;
++	return end;
++}
++
+ /**
+  * struct journal_s - The journal_s type is the concrete type associated with
+  *     journal_t.
+@@ -857,6 +933,16 @@ struct journal_s
+ 	pid_t			j_last_sync_writer;
+ 
+ 	/*
++	 *
++	 */
++	struct transaction_stats_s *j_history;
++	int			j_history_max;
++	int			j_history_cur;
++	spinlock_t		j_history_lock;
++	struct proc_dir_entry	*j_proc_entry;
++	struct transaction_stats_s j_stats;
++
++	/*
+ 	 * An opaque pointer to fs-private information.  ext3 puts its
+ 	 * superblock pointer here
+ 	 */
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/transaction.c	2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/transaction.c	2008-02-12 19:15:28.000000000 +0100
+@@ -61,6 +61,8 @@ get_transaction(journal_t *journal, tran
+ 
+ 	J_ASSERT(journal->j_running_transaction == NULL);
+ 	journal->j_running_transaction = transaction;
++	transaction->t_max_wait = 0;
++	transaction->t_start = CURRENT_MSECS;
+ 
+ 	return transaction;
+ }
+@@ -87,6 +89,7 @@ static int start_this_handle(journal_t *
+ 	int nblocks = handle->h_buffer_credits;
+ 	transaction_t *new_transaction = NULL;
+ 	int ret = 0;
++	unsigned long ts = CURRENT_MSECS;
+ 
+ 	if (nblocks > journal->j_max_transaction_buffers) {
+ 		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -220,6 +223,12 @@ repeat_locked:
+ 	/* OK, account for the buffers that this operation expects to
+ 	 * use and add the handle to the running transaction. */
+ 
++	if (time_after(transaction->t_start, ts)) {
++		ts = jbd_time_diff(ts, transaction->t_start);
++		if (ts > transaction->t_max_wait)
++			transaction->t_max_wait= ts;
++	}
++
+ 	handle->h_transaction = transaction;
+ 	transaction->t_outstanding_credits += nblocks;
+ 	transaction->t_updates++;
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/journal.c	2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/journal.c	2008-02-12 19:22:43.000000000 +0100
+@@ -35,6 +35,7 @@
+ #include <linux/kthread.h>
+ #include <linux/poison.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/page.h>
+@@ -643,6 +644,300 @@ struct journal_head *journal_get_descrip
+ 	return journal_add_journal_head(bh);
+ }
+ 
++struct jbd_stats_proc_session {
++	journal_t *journal;
++	struct transaction_stats_s *stats;
++	int start;
++	int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++					struct transaction_stats_s *ts,
++					int first)
++{
++	if (ts == s->stats + s->max)
++		ts = s->stats;
++	if (!first && ts == s->stats + s->start)
++		return NULL;
++	while (ts->ts_type == 0) {
++		ts++;
++		if (ts == s->stats + s->max)
++			ts = s->stats;
++		if (ts == s->stats + s->start)
++			return NULL;
++	}
++	return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++	struct jbd_stats_proc_session *s = seq->private;
++	struct transaction_stats_s *ts;
++	int l = *pos;
++
++	if (l == 0)
++		return SEQ_START_TOKEN;
++	ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++	if (!ts)
++		return NULL;
++	while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++	return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++	struct jbd_stats_proc_session *s = seq->private;
++	struct transaction_stats_s *ts = v;
++
++	++*pos;
++	if (v == SEQ_START_TOKEN)
++		return jbd_history_skip_empty(s, s->stats + s->start, 1);
++	else
++		return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++	struct transaction_stats_s *ts = v;
++	if (v == SEQ_START_TOKEN) {
++		seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++				"%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++				"wait", "run", "lock", "flush", "log", "hndls",
++				"block", "inlog", "ctime", "write", "drop",
++				"close");
++		return 0;
++	}
++	if (ts->ts_type == JBD_STATS_RUN)
++		seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++				"%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++				ts->ts_wait, ts->ts_running, ts->ts_locked,
++				ts->ts_flushing, ts->ts_logging,
++				ts->ts_handle_count, ts->ts_blocks,
++				ts->ts_blocks_logged);
++	else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++		seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++				"C", ts->ts_tid, " ", ts->ts_chp_time,
++				ts->ts_written, ts->ts_dropped,
++				ts->ts_forced_to_close);
++	else
++		J_ASSERT(0);
++	return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++	.start  = jbd_seq_history_start,
++	.next   = jbd_seq_history_next,
++	.stop   = jbd_seq_history_stop,
++	.show   = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++	journal_t *journal = PDE(inode)->data;
++	struct jbd_stats_proc_session *s;
++	int rc, size;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (s == NULL)
++		return -EIO;
++	size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++	s->stats = kmalloc(size, GFP_KERNEL);
++	if (s->stats == NULL) {
++		kfree(s);
++		return -EIO;
++	}
++	spin_lock(&journal->j_history_lock);
++	memcpy(s->stats, journal->j_history, size);
++	s->max = journal->j_history_max;
++	s->start = journal->j_history_cur % s->max;
++	spin_unlock(&journal->j_history_lock);
++
++	rc = seq_open(file, &jbd_seq_history_ops);
++	if (rc == 0) {
++		struct seq_file *m = (struct seq_file *)file->private_data;
++		m->private = s;
++	} else {
++		kfree(s->stats);
++		kfree(s);
++	}
++	return rc;
++
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = (struct seq_file *)file->private_data;
++	struct jbd_stats_proc_session *s = seq->private;
++	kfree(s->stats);
++	kfree(s);
++	return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++	.owner		= THIS_MODULE,
++	.open           = jbd_seq_history_open,
++	.read           = seq_read,
++	.llseek         = seq_lseek,
++	.release        = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++	return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++	return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++	struct jbd_stats_proc_session *s = seq->private;
++	if (v != SEQ_START_TOKEN)
++		return 0;
++	seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++			s->stats->ts_tid,
++			s->journal->j_max_transaction_buffers);
++	if (s->stats->ts_tid == 0)
++		return 0;
++	seq_printf(seq, "average: \n  %lums waiting for transaction\n",
++			s->stats->ts_wait / s->stats->ts_tid);
++	seq_printf(seq, "  %lums running transaction\n",
++			s->stats->ts_running / s->stats->ts_tid);
++	seq_printf(seq, "  %lums transaction was being locked\n",
++			s->stats->ts_locked / s->stats->ts_tid);
++	seq_printf(seq, "  %lums flushing data (in ordered mode)\n",
++			s->stats->ts_flushing / s->stats->ts_tid);
++	seq_printf(seq, "  %lums logging transaction\n",
++			s->stats->ts_logging / s->stats->ts_tid);
++	seq_printf(seq, "  %lu handles per transaction\n",
++			s->stats->ts_handle_count / s->stats->ts_tid);
++	seq_printf(seq, "  %lu blocks per transaction\n",
++			s->stats->ts_blocks / s->stats->ts_tid);
++	seq_printf(seq, "  %lu logged blocks per transaction\n",
++			s->stats->ts_blocks_logged / s->stats->ts_tid);
++	return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++	.start  = jbd_seq_info_start,
++	.next   = jbd_seq_info_next,
++	.stop   = jbd_seq_info_stop,
++	.show   = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++	journal_t *journal = PDE(inode)->data;
++	struct jbd_stats_proc_session *s;
++	int rc, size;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (s == NULL)
++		return -EIO;
++	size = sizeof(struct transaction_stats_s);
++	s->stats = kmalloc(size, GFP_KERNEL);
++	if (s->stats == NULL) {
++		kfree(s);
++		return -EIO;
++	}
++	spin_lock(&journal->j_history_lock);
++	memcpy(s->stats, &journal->j_stats, size);
++	s->journal = journal;
++	spin_unlock(&journal->j_history_lock);
++
++	rc = seq_open(file, &jbd_seq_info_ops);
++	if (rc == 0) {
++		struct seq_file *m = (struct seq_file *)file->private_data;
++		m->private = s;
++	} else {
++		kfree(s->stats);
++		kfree(s);
++	}
++	return rc;
++
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = (struct seq_file *)file->private_data;
++	struct jbd_stats_proc_session *s = seq->private;
++	kfree(s->stats);
++	kfree(s);
++	return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++	.owner		= THIS_MODULE,
++	.open           = jbd_seq_info_open,
++	.read           = seq_read,
++	.llseek         = seq_lseek,
++	.release        = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++	char name[64];
++
++	snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++	journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++	if (journal->j_proc_entry) {
++		struct proc_dir_entry *p;
++		p = create_proc_entry("history", S_IRUGO,
++				journal->j_proc_entry);
++		if (p) {
++			p->proc_fops = &jbd_seq_history_fops;
++			p->data = journal;
++			p = create_proc_entry("info", S_IRUGO,
++						journal->j_proc_entry);
++			if (p) {
++				p->proc_fops = &jbd_seq_info_fops;
++				p->data = journal;
++			}
++		}
++	}
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++	char name[64];
++
++	snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++	remove_proc_entry("info", journal->j_proc_entry);
++	remove_proc_entry("history", journal->j_proc_entry);
++	remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++	int size;
++
++	if (proc_jbd_stats == NULL)
++		return;
++
++	journal->j_history_max = 100;
++	size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++	journal->j_history = kmalloc(size, GFP_KERNEL);
++	if (journal->j_history == NULL) {
++		journal->j_history_max = 0;
++		return;
++	}
++	memset(journal->j_history, 0, size);
++	spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+  * Management for journal control blocks: functions to create and
+  * destroy journal_t structures, and to initialise and read existing
+@@ -685,6 +980,9 @@ static journal_t * journal_init_common (
+ 		kfree(journal);
+ 		goto fail;
+ 	}
++
++	journal_init_stats(journal);
++
+ 	return journal;
+ fail:
+ 	return NULL;
+@@ -739,6 +1037,7 @@ journal_t * journal_init_dev(struct bloc
+ 	journal->j_fs_dev = fs_dev;
+ 	journal->j_blk_offset = start;
+ 	journal->j_maxlen = len;
++	jbd_stats_proc_init(journal);
+ 
+ 	bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+ 	J_ASSERT(bh != NULL);
+@@ -777,6 +1076,7 @@ journal_t * journal_init_inode (struct i
+ 
+ 	journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ 	journal->j_blocksize = inode->i_sb->s_blocksize;
++	jbd_stats_proc_init(journal);
+ 
+ 	/* journal descriptor can store up to n blocks -bzzz */
+ 	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+@@ -1164,6 +1464,8 @@ void journal_destroy(journal_t *journal)
+ 		brelse(journal->j_sb_buffer);
+ 	}
+ 
++	if (journal->j_proc_entry)
++		jbd_stats_proc_exit(journal);
+ 	if (journal->j_inode)
+ 		iput(journal->j_inode);
+ 	if (journal->j_revoke)
+@@ -2003,6 +2305,28 @@ static void __exit remove_jbd_proc_entry
+ 
+ #endif
+ 
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++	proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++	if (proc_jbd_stats)
++		remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ struct kmem_cache *jbd_handle_cache;
+ 
+ static int __init journal_init_handle_cache(void)
+@@ -2060,6 +2384,7 @@ static int __init journal_init(void)
+ 	if (ret != 0)
+ 		journal_destroy_caches();
+ 	create_jbd_proc_entry();
++	create_jbd_stats_proc_entry();
+ 	return ret;
+ }
+ 
+@@ -2071,6 +2396,7 @@ static void __exit journal_exit(void)
+ 		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+ 	remove_jbd_proc_entry();
++	remove_jbd_stats_proc_entry();
+ 	journal_destroy_caches();
+ }
+ 
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/checkpoint.c	2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/checkpoint.c	2008-02-12 19:15:28.000000000 +0100
+@@ -232,7 +232,7 @@ __flush_batch(journal_t *journal, struct
+  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+  */
+ static int __process_buffer(journal_t *journal, struct journal_head *jh,
+-			struct buffer_head **bhs, int *batch_count)
++			struct buffer_head **bhs, int *batch_count, transaction_t *transaction)
+ {
+ 	struct buffer_head *bh = jh2bh(jh);
+ 	int ret = 0;
+@@ -250,6 +250,7 @@ static int __process_buffer(journal_t *j
+ 		transaction_t *t = jh->b_transaction;
+ 		tid_t tid = t->t_tid;
+ 
++		transaction->t_chp_stats.cs_forced_to_close++;
+ 		spin_unlock(&journal->j_list_lock);
+ 		jbd_unlock_bh_state(bh);
+ 		log_start_commit(journal, tid);
+@@ -279,6 +280,7 @@ static int __process_buffer(journal_t *j
+ 		bhs[*batch_count] = bh;
+ 		__buffer_relink_io(jh);
+ 		jbd_unlock_bh_state(bh);
++		transaction->t_chp_stats.cs_written++;
+ 		(*batch_count)++;
+ 		if (*batch_count == NR_BATCH) {
+ 			spin_unlock(&journal->j_list_lock);
+@@ -322,6 +324,8 @@ int log_do_checkpoint(journal_t *journal
+ 	if (!journal->j_checkpoint_transactions)
+ 		goto out;
+ 	transaction = journal->j_checkpoint_transactions;
++	if (transaction->t_chp_stats.cs_chp_time == 0)
++		transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+ 	this_tid = transaction->t_tid;
+ restart:
+ 	/*
+@@ -346,7 +350,8 @@ restart:
+ 				retry = 1;
+ 				break;
+ 			}
+-			retry = __process_buffer(journal, jh, bhs,&batch_count);
++			retry = __process_buffer(journal, jh, bhs,&batch_count,
++						 transaction);
+ 			if (!retry && lock_need_resched(&journal->j_list_lock)){
+ 				spin_unlock(&journal->j_list_lock);
+ 				retry = 1;
+@@ -668,6 +673,8 @@ void __journal_insert_checkpoint(struct 
+ 
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++	struct transaction_stats_s stats;
++
+ 	assert_spin_locked(&journal->j_list_lock);
+ 	if (transaction->t_cpnext) {
+ 		transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -694,5 +701,25 @@ void __journal_drop_transaction(journal_
+ 	J_ASSERT(journal->j_running_transaction != transaction);
+ 
+ 	jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++	/*
++	 * File the transaction for history
++	 */
++	if (transaction->t_chp_stats.cs_written != 0 ||
++			transaction->t_chp_stats.cs_chp_time != 0) {
++		stats.ts_type = JBD_STATS_CHECKPOINT;
++		stats.ts_tid = transaction->t_tid;
++		stats.u.chp = transaction->t_chp_stats;
++		if (stats.ts_chp_time)
++			stats.ts_chp_time =
++				jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++		spin_lock(&journal->j_history_lock);
++		memcpy(journal->j_history + journal->j_history_cur, &stats,
++				sizeof(stats));
++		if (++journal->j_history_cur == journal->j_history_max)
++			journal->j_history_cur = 0;
++		spin_unlock(&journal->j_history_lock);
++	}
++
+ 	kfree(transaction);
+ }
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/commit.c	2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/commit.c	2008-02-12 19:15:28.000000000 +0100
+@@ -13,6 +13,7 @@
+  * part of the ext2fs journaling system.
+  */
+ 
++#include <linux/jiffies.h>
+ #include <linux/time.h>
+ #include <linux/fs.h>
+ #include <linux/jbd.h>
+@@ -21,6 +22,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ 
++
+ /*
+  * Default IO end handler for temporary BJ_IO buffer_heads.
+  */
+@@ -282,6 +284,7 @@ write_out_data:
+  */
+ void journal_commit_transaction(journal_t *journal)
+ {
++	struct transaction_stats_s stats;
+ 	transaction_t *commit_transaction;
+ 	struct journal_head *jh, *new_jh, *descriptor;
+ 	struct buffer_head **wbuf = journal->j_wbuf;
+@@ -328,6 +331,11 @@ void journal_commit_transaction(journal_
+ 	spin_lock(&journal->j_state_lock);
+ 	commit_transaction->t_state = T_LOCKED;
+ 
++	stats.ts_wait = commit_transaction->t_max_wait;
++	stats.ts_locked = CURRENT_MSECS;
++	stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++						stats.ts_locked);
++
+ 	spin_lock(&commit_transaction->t_handle_lock);
+ 	while (commit_transaction->t_updates) {
+ 		DEFINE_WAIT(wait);
+@@ -398,6 +406,9 @@ void journal_commit_transaction(journal_
+ 	 */
+ 	journal_switch_revoke_table(journal);
+ 
++	stats.ts_flushing = CURRENT_MSECS;
++	stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+ 	commit_transaction->t_state = T_FLUSH;
+ 	journal->j_committing_transaction = commit_transaction;
+ 	journal->j_running_transaction = NULL;
+@@ -489,6 +500,11 @@ void journal_commit_transaction(journal_
+ 	 */
+ 	commit_transaction->t_state = T_COMMIT;
+ 
++	stats.ts_logging = CURRENT_MSECS;
++	stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++	stats.ts_blocks = commit_transaction->t_outstanding_credits;
++	stats.ts_blocks_logged = 0;
++
+ 	descriptor = NULL;
+ 	bufs = 0;
+ 	while (commit_transaction->t_buffers) {
+@@ -637,6 +653,7 @@ start_journal_io:
+ 				submit_bh(WRITE, bh);
+ 			}
+ 			cond_resched();
++			stats.ts_blocks_logged += bufs;
+ 
+ 			/* Force a new descriptor to be generated next
+                            time round the loop. */
+@@ -831,6 +848,7 @@ restart_loop:
+ 		cp_transaction = jh->b_cp_transaction;
+ 		if (cp_transaction) {
+ 			JBUFFER_TRACE(jh, "remove from old cp transaction");
++			cp_transaction->t_chp_stats.cs_dropped++;
+ 			__journal_remove_checkpoint(jh);
+ 		}
+ 
+@@ -905,6 +923,36 @@ restart_loop:
+ 
+ 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+ 
++	commit_transaction->t_start = CURRENT_MSECS;
++	stats.ts_logging = jbd_time_diff(stats.ts_logging,
++					 commit_transaction->t_start);
++
++	/*
++	 * File the transaction for history
++	 */
++	stats.ts_type = JBD_STATS_RUN;
++	stats.ts_tid = commit_transaction->t_tid;
++	stats.ts_handle_count = commit_transaction->t_handle_count;
++	spin_lock(&journal->j_history_lock);
++	memcpy(journal->j_history + journal->j_history_cur, &stats,
++			sizeof(stats));
++	if (++journal->j_history_cur == journal->j_history_max)
++		journal->j_history_cur = 0;
++
++	/*
++	 * Calculate overall stats
++	 */
++	journal->j_stats.ts_tid++;
++	journal->j_stats.ts_wait += stats.ts_wait;
++	journal->j_stats.ts_running += stats.ts_running;
++	journal->j_stats.ts_locked += stats.ts_locked;
++	journal->j_stats.ts_flushing += stats.ts_flushing;
++	journal->j_stats.ts_logging += stats.ts_logging;
++	journal->j_stats.ts_handle_count += stats.ts_handle_count;
++	journal->j_stats.ts_blocks += stats.ts_blocks;
++	journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++	spin_unlock(&journal->j_history_lock);
++
+ 	commit_transaction->t_state = T_FINISHED;
+ 	J_ASSERT(commit_transaction == journal->j_committing_transaction);
+ 	journal->j_commit_sequence = commit_transaction->t_tid;

Modified: trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch?rev=435&op=diff
==============================================================================
--- trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch (original)
+++ trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch Wed Feb 13 12:23:22 2008
@@ -1,8 +1,8 @@
-Index: linux-2.6.22-rc4/drivers/scsi/Kconfig
+Index: linux-2.6.22/drivers/scsi/Kconfig
 ===================================================================
---- linux-2.6.22-rc4.orig/drivers/scsi/Kconfig	2007-06-11 20:23:32.000000000 +0200
-+++ linux-2.6.22-rc4/drivers/scsi/Kconfig	2007-06-11 20:24:20.000000000 +0200
-@@ -75,6 +75,14 @@
+--- linux-2.6.22.orig/drivers/scsi/Kconfig	2007-09-10 16:19:54.000000000 +0200
++++ linux-2.6.22/drivers/scsi/Kconfig	2007-09-10 16:19:56.000000000 +0200
+@@ -76,6 +76,14 @@
  	  In this case, do not compile the driver for your SCSI host adapter
  	  (below) as a module either.
  
@@ -17,10 +17,10 @@
  config CHR_DEV_ST
  	tristate "SCSI tape support"
  	depends on SCSI
-Index: linux-2.6.22-rc4/drivers/scsi/sd.c
+Index: linux-2.6.22/drivers/scsi/sd.c
 ===================================================================
---- linux-2.6.22-rc4.orig/drivers/scsi/sd.c	2007-06-11 20:23:32.000000000 +0200
-+++ linux-2.6.22-rc4/drivers/scsi/sd.c	2007-06-11 20:33:35.000000000 +0200
+--- linux-2.6.22.orig/drivers/scsi/sd.c	2007-09-10 16:19:54.000000000 +0200
++++ linux-2.6.22/drivers/scsi/sd.c	2007-09-10 16:19:56.000000000 +0200
 @@ -244,6 +244,38 @@
  	.issue_flush		= sd_issue_flush,
  };
@@ -431,10 +431,10 @@
  	scsi_unregister_driver(&sd_template.gendrv);
  	class_unregister(&sd_disk_class);
  
-Index: linux-2.6.22-rc4/drivers/scsi/scsi_proc.c
+Index: linux-2.6.22/drivers/scsi/scsi_proc.c
 ===================================================================
---- linux-2.6.22-rc4.orig/drivers/scsi/scsi_proc.c	2007-06-11 20:23:32.000000000 +0200
-+++ linux-2.6.22-rc4/drivers/scsi/scsi_proc.c	2007-06-11 20:24:20.000000000 +0200
+--- linux-2.6.22.orig/drivers/scsi/scsi_proc.c	2007-09-10 16:19:54.000000000 +0200
++++ linux-2.6.22/drivers/scsi/scsi_proc.c	2007-09-10 16:19:56.000000000 +0200
 @@ -40,7 +40,8 @@
  /* 4K page size, but our output routines, use some slack for overruns */
  #define PROC_BLOCK_SIZE (3*1024)

Modified: trunk/lustre/kernel_patches/series/2.6.20-vanilla.series
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/series/2.6.20-vanilla.series?rev=435&op=diff
==============================================================================
--- trunk/lustre/kernel_patches/series/2.6.20-vanilla.series (original)
+++ trunk/lustre/kernel_patches/series/2.6.20-vanilla.series Wed Feb 13 12:23:22 2008
@@ -11,3 +11,5 @@
 export-show_task-2.6.18-vanilla.patch
 sd_iostats-2.6.20.patch 
 LDISKFS_SUPER_MAGIC-2.6.20.patch
+2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch
+generic_file_buffered_write_backport_2.6.20.patch

Modified: trunk/lustre/kernel_patches/series/2.6.22-vanilla.series
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/series/2.6.22-vanilla.series?rev=435&op=diff
==============================================================================
--- trunk/lustre/kernel_patches/series/2.6.22-vanilla.series (original)
+++ trunk/lustre/kernel_patches/series/2.6.22-vanilla.series Wed Feb 13 12:23:22 2008
@@ -11,3 +11,4 @@
 export-show_task-2.6.18-vanilla.patch
 sd_iostats-2.6.22.patch
 LDISKFS_SUPER_MAGIC-2.6.20.patch
+jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch




More information about the Pkg-lustre-svn-commit mailing list