[Pkg-lustre-svn-commit] r435 - in /trunk/lustre/kernel_patches: patches/ series/
goswin-guest at users.alioth.debian.org
goswin-guest at users.alioth.debian.org
Wed Feb 13 12:23:22 UTC 2008
Author: goswin-guest
Date: Wed Feb 13 12:23:22 2008
New Revision: 435
URL: http://svn.debian.org/wsvn/pkg-lustre/?sc=1&rev=435
Log:
Bring kernel patches in sync.
Added:
trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch
trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch
trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch
Modified:
trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch
trunk/lustre/kernel_patches/series/2.6.20-vanilla.series
trunk/lustre/kernel_patches/series/2.6.22-vanilla.series
Added: trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch?rev=435&op=file
==============================================================================
--- trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch (added)
+++ trunk/lustre/kernel_patches/patches/2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch Wed Feb 13 12:23:22 2008
@@ -1,0 +1,2143 @@
+x86: Readd dwarf2 unwinder
+
+From: Jan Beulich <jbeulich at novell.com>
+
+The dwarf2 unwinder uses information generated by the compiler
+to do exact backtraces without frame pointers. Enabled for i386 and x86-64.
+
+AK: Readded by me, but all the real work was done by Jan
+AK: I just did some cleanup
+
+TBD: add paranoid checks Linus wanted
+TBD: better description
+
+Signed-off-by: Andi Kleen <ak at suse.de>
+
+---
+ Makefile | 5
+ arch/i386/kernel/Makefile | 1
+ arch/i386/kernel/traps.c | 84 ++
+ arch/i386/kernel/unwind.S | 36 +
+ arch/x86_64/Makefile | 2
+ arch/x86_64/kernel/Makefile | 1
+ arch/x86_64/kernel/traps.c | 85 ++
+ arch/x86_64/kernel/unwind.S | 38 +
+ arch/x86_64/kernel/vmlinux.lds.S | 2
+ include/asm-generic/vmlinux.lds.h | 22
+ include/asm-i386/unwind.h | 91 ++
+ include/asm-x86_64/unwind.h | 96 ++
+ include/linux/unwind.h | 63 +
+ kernel/Makefile | 1
+ kernel/unwind.c | 1288 ++++++++++++++++++++++++++++++++++++++
+ lib/Kconfig.debug | 21
+ 16 files changed, 1834 insertions(+), 2 deletions(-)
+
+Index: linux-2.6.20.3/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/Makefile 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/Makefile 2007-08-29 15:10:06.000000000 +0200
+@@ -496,6 +496,11 @@
+ CFLAGS += -fomit-frame-pointer
+ endif
+
++ifdef CONFIG_UNWIND_INFO
++CFLAGS += -fasynchronous-unwind-tables
++LDFLAGS_vmlinux += --eh-frame-hdr
++endif
++
+ ifdef CONFIG_DEBUG_INFO
+ CFLAGS += -g
+ endif
+Index: linux-2.6.20.3/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.6.20.3.orig/arch/i386/kernel/traps.c 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/i386/kernel/traps.c 2007-08-29 15:14:28.000000000 +0200
+@@ -94,6 +94,12 @@
+ asmlinkage void machine_check(void);
+
+ int kstack_depth_to_print = 24;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
++
+ ATOMIC_NOTIFIER_HEAD(i386die_chain);
+
+ int register_die_notifier(struct notifier_block *nb)
+@@ -112,7 +118,7 @@
+ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+ return p > (void *)tinfo &&
+- p < (void *)tinfo + THREAD_SIZE - 3;
++ p < (void *)tinfo THREAD_SIZE - 3;
+ }
+
+ static inline unsigned long print_context_stack(struct thread_info *tinfo,
+@@ -124,7 +130,7 @@
+ #ifdef CONFIG_FRAME_POINTER
+ while (valid_stack_ptr(tinfo, (void *)ebp)) {
+ unsigned long new_ebp;
+- addr = *(unsigned long *)(ebp + 4);
++ addr = *(unsigned long *)(ebp 4);
+ ops->address(data, addr);
+ /*
+ * break out of recursive entries (such as
+@@ -147,6 +153,34 @@
+ return ebp;
+ }
+
++struct ops_and_data {
++ struct stacktrace_ops *ops;
++ void *data;
++};
++
++static asmlinkage int
++dump_trace_unwind(struct unwind_frame_info *info, void *data)
++{
++ struct ops_and_data *oad = (struct ops_and_data *)data;
++ int n = 0;
++ unsigned long sp = UNW_SP(info);
++
++ if (arch_unw_user_mode(info))
++ return -1;
++ while (unwind(info) == 0 && UNW_PC(info)) {
++ n++;
++ oad->ops->address(oad->data, UNW_PC(info));
++ if (arch_unw_user_mode(info))
++ break;
++ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++ && sp > UNW_SP(info))
++ break;
++ sp = UNW_SP(info);
++ touch_nmi_watchdog();
++ }
++ return n;
++}
++
+ #define MSG(msg) ops->warning(data, msg)
+
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+@@ -158,6 +192,41 @@
+ if (!task)
+ task = current;
+
++ if (call_trace >= 0) {
++ int unw_ret = 0;
++ struct unwind_frame_info info;
++ struct ops_and_data oad = { .ops = ops, .data = data };
++
++ if (regs) {
++ if (unwind_init_frame_info(&info, task, regs) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ } else if (task == current)
++ unw_ret = unwind_init_running(&info, dump_trace_unwind,
++ &oad);
++ else {
++ if (unwind_init_blocked(&info, task) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ }
++ if (unw_ret > 0) {
++ if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++ ops->warning_symbol(data,
++ "DWARF2 unwinder stuck at %s",
++ UNW_PC(&info));
++ if (UNW_SP(&info) >= PAGE_OFFSET) {
++ MSG("Leftover inexact backtrace:");
++ stack = (void *)UNW_SP(&info);
++ if (!stack)
++ return;
++ ebp = UNW_FP(&info);
++ } else
++ MSG("Full inexact backtrace again:");
++ } else if (call_trace >= 1)
++ return;
++ else
++ MSG("Full inexact backtrace again:");
++ } else
++ MSG("Inexact backtrace:");
++ }
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+@@ -983,7 +1052,7 @@
+ long error_code)
+ {
+ if (cpu_has_xmm) {
+- /* Handle SIMD FPU exceptions on PIII+ processors. */
++ /* Handle SIMD FPU exceptions on PIIIprocessors. */
+ ignore_fpu_irq = 1;
+ simd_math_error((void __user *)regs->eip);
+ } else {
+@@ -1191,3 +1260,21 @@
+ return 1;
+ }
+ __setup("kstack=", kstack_setup);
++
+++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++ if (strcmp(s, "old") == 0)
++ call_trace = -1;
++ else if (strcmp(s, "both") == 0)
++ call_trace = 0;
++ else if (strcmp(s, "newfallback") == 0)
++ call_trace = 1;
++ else if (strcmp(s, "new") == 2)
++ call_trace = 2;
++ return 1;
++}
++__setup("call_trace=", call_trace_setup);
++#endif
++
+Index: linux-2.6.20.3/arch/x86_64/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/Makefile 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/Makefile 2007-08-29 15:10:06.000000000 +0200
+@@ -45,7 +45,9 @@
+ # actually it makes the kernel smaller too.
+ cflags-y += -fno-reorder-blocks
+ cflags-y += -Wno-sign-compare
++ifneq ($(CONFIG_UNWIND_INFO),y)
+ cflags-y += -fno-asynchronous-unwind-tables
++endif
+ ifneq ($(CONFIG_DEBUG_INFO),y)
+ # -fweb shrinks the kernel a bit, but the difference is very small
+ # it also messes up debugging, so don't use it for now.
+Index: linux-2.6.20.3/arch/x86_64/kernel/vmlinux.lds.S
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/kernel/vmlinux.lds.S 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/kernel/vmlinux.lds.S 2007-08-29 15:10:06.000000000 +0200
+@@ -221,7 +221,9 @@
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.exitcall.exit)
++#ifndef CONFIG_UNWIND_INFO
+ *(.eh_frame)
++#endif
+ }
+
+ STABS_DEBUG
+Index: linux-2.6.20.3/include/asm-generic/vmlinux.lds.h
+===================================================================
+--- linux-2.6.20.3.orig/include/asm-generic/vmlinux.lds.h 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/asm-generic/vmlinux.lds.h 2007-08-29 15:10:06.000000000 +0200
+@@ -122,6 +122,8 @@
+ *(__ksymtab_strings) \
+ } \
+ \
++ EH_FRAME \
++ \
+ /* Built-in module parameters. */ \
+ __param : AT(ADDR(__param) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___param) = .; \
+@@ -161,6 +163,26 @@
+ *(.kprobes.text) \
+ VMLINUX_SYMBOL(__kprobes_text_end) = .;
+
++#ifdef CONFIG_STACK_UNWIND
++#define EH_FRAME \
++ /* Unwind data binary search table */ \
++ . = ALIGN(8); \
++ .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \
++ VMLINUX_SYMBOL(__start_unwind_hdr) = .; \
++ *(.eh_frame_hdr) \
++ VMLINUX_SYMBOL(__end_unwind_hdr) = .; \
++ } \
++ /* Unwind data */ \
++ . = ALIGN(8); \
++ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \
++ VMLINUX_SYMBOL(__start_unwind) = .; \
++ *(.eh_frame) \
++ VMLINUX_SYMBOL(__end_unwind) = .; \
++ }
++#else
++#define EH_FRAME
++#endif
++
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to
+ the beginning of the section so we begin them at 0. */
+Index: linux-2.6.20.3/include/asm-i386/unwind.h
+===================================================================
+--- linux-2.6.20.3.orig/include/asm-i386/unwind.h 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/asm-i386/unwind.h 2007-08-29 15:10:06.000000000 +0200
+@@ -1,6 +1,95 @@
+ #ifndef _ASM_I386_UNWIND_H
+ #define _ASM_I386_UNWIND_H
+
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ * Jan Beulich <jbeulich at novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/fixmap.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++
++struct unwind_frame_info
++{
++ struct pt_regs regs;
++ struct task_struct *task;
++ unsigned call_frame:1;
++};
++
++#define UNW_PC(frame) (frame)->regs.eip
++#define UNW_SP(frame) (frame)->regs.esp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame) (frame)->regs.ebp
++#define FRAME_RETADDR_OFFSET 4
++#define FRAME_LINK_OFFSET 0
++#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0)
++#define STACK_TOP(tsk) ((tsk)->thread.esp0)
++#else
++#define UNW_FP(frame) ((void)(frame), 0)
++#endif
++#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++ PTREGS_INFO(eax), \
++ PTREGS_INFO(ecx), \
++ PTREGS_INFO(edx), \
++ PTREGS_INFO(ebx), \
++ PTREGS_INFO(esp), \
++ PTREGS_INFO(ebp), \
++ PTREGS_INFO(esi), \
++ PTREGS_INFO(edi), \
++ PTREGS_INFO(eip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++ ((raItem).where == Memory && \
++ !((raItem).value * (dataAlign) + 4))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++ /*const*/ struct pt_regs *regs)
++{
++ if (user_mode_vm(regs))
++ info->regs = *regs;
++ else {
++ memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
++ info->regs.esp = (unsigned long)®s->esp;
++ info->regs.xss = __KERNEL_DS;
++ }
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++ memset(&info->regs, 0, sizeof(info->regs));
++ info->regs.eip = info->task->thread.eip;
++ info->regs.xcs = __KERNEL_CS;
++ __get_user(info->regs.ebp, (long *)info->task->thread.esp);
++ info->regs.esp = info->task->thread.esp;
++ info->regs.xss = __KERNEL_DS;
++ info->regs.xds = __USER_DS;
++ info->regs.xes = __USER_DS;
++ info->regs.xfs = __KERNEL_PERCPU;
++}
++
++extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
++ asmlinkage int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg);
++
++static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
++{
++ return user_mode_vm(&info->regs)
++ || info->regs.eip < PAGE_OFFSET
++ || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
++ && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
++ || info->regs.esp < PAGE_OFFSET;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0)
+ #define UNW_SP(frame) ((void)(frame), 0)
+ #define UNW_FP(frame) ((void)(frame), 0)
+@@ -10,4 +99,6 @@
+ return 0;
+ }
+
++#endif
++
+ #endif /* _ASM_I386_UNWIND_H */
+Index: linux-2.6.20.3/include/asm-x86_64/unwind.h
+===================================================================
+--- linux-2.6.20.3.orig/include/asm-x86_64/unwind.h 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/asm-x86_64/unwind.h 2007-08-29 15:10:06.000000000 +0200
+@@ -1,6 +1,100 @@
+ #ifndef _ASM_X86_64_UNWIND_H
+ #define _ASM_X86_64_UNWIND_H
+
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ * Jan Beulich <jbeulich at novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++#include <asm/vsyscall.h>
++
++struct unwind_frame_info
++{
++ struct pt_regs regs;
++ struct task_struct *task;
++ unsigned call_frame:1;
++};
++
++#define UNW_PC(frame) (frame)->regs.rip
++#define UNW_SP(frame) (frame)->regs.rsp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame) (frame)->regs.rbp
++#define FRAME_RETADDR_OFFSET 8
++#define FRAME_LINK_OFFSET 0
++#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
++#define STACK_TOP(tsk) ((tsk)->thread.rsp0)
++#endif
++/* Might need to account for the special exception and interrupt handling
++ stacks here, since normally
++ EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
++ but the construct is needed only for getting across the stack switch to
++ the interrupt stack - thus considering the IRQ stack itself is unnecessary,
++ and the overhead of comparing against all exception handling stacks seems
++ not desirable. */
++#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++ PTREGS_INFO(rax), \
++ PTREGS_INFO(rdx), \
++ PTREGS_INFO(rcx), \
++ PTREGS_INFO(rbx), \
++ PTREGS_INFO(rsi), \
++ PTREGS_INFO(rdi), \
++ PTREGS_INFO(rbp), \
++ PTREGS_INFO(rsp), \
++ PTREGS_INFO(r8), \
++ PTREGS_INFO(r9), \
++ PTREGS_INFO(r10), \
++ PTREGS_INFO(r11), \
++ PTREGS_INFO(r12), \
++ PTREGS_INFO(r13), \
++ PTREGS_INFO(r14), \
++ PTREGS_INFO(r15), \
++ PTREGS_INFO(rip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++ ((raItem).where == Memory && \
++ !((raItem).value * (dataAlign) + 8))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++ /*const*/ struct pt_regs *regs)
++{
++ info->regs = *regs;
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++ extern const char thread_return[];
++
++ memset(&info->regs, 0, sizeof(info->regs));
++ info->regs.rip = (unsigned long)thread_return;
++ info->regs.cs = __KERNEL_CS;
++ __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
++ info->regs.rsp = info->task->thread.rsp;
++ info->regs.ss = __KERNEL_DS;
++}
++
++extern int arch_unwind_init_running(struct unwind_frame_info *,
++ int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg);
++
++static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
++{
++ return user_mode(&info->regs)
++ || (long)info->regs.rip >= 0
++ || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
++ || (long)info->regs.rsp >= 0;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0UL)
+ #define UNW_SP(frame) ((void)(frame), 0UL)
+
+@@ -9,4 +103,6 @@
+ return 0;
+ }
+
++#endif
++
+ #endif /* _ASM_X86_64_UNWIND_H */
+Index: linux-2.6.20.3/include/linux/unwind.h
+===================================================================
+--- linux-2.6.20.3.orig/include/linux/unwind.h 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/include/linux/unwind.h 2007-08-29 15:10:06.000000000 +0200
+@@ -14,6 +14,63 @@
+
+ struct module;
+
++#ifdef CONFIG_STACK_UNWIND
++
++#include <asm/unwind.h>
++
++#ifndef ARCH_UNWIND_SECTION_NAME
++#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
++#endif
++
++/*
++ * Initialize unwind support.
++ */
++extern void unwind_init(void);
++extern void unwind_setup(void);
++
++#ifdef CONFIG_MODULES
++
++extern void *unwind_add_table(struct module *,
++ const void *table_start,
++ unsigned long table_size);
++
++extern void unwind_remove_table(void *handle, int init_only);
++
++#endif
++
++extern int unwind_init_frame_info(struct unwind_frame_info *,
++ struct task_struct *,
++ /*const*/ struct pt_regs *);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++extern int unwind_init_blocked(struct unwind_frame_info *,
++ struct task_struct *);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++extern int unwind_init_running(struct unwind_frame_info *,
++ asmlinkage int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg);
++
++/*
++ * Unwind to previous to frame. Returns 0 if successful, negative
++ * number in case of an error.
++ */
++extern int unwind(struct unwind_frame_info *);
++
++/*
++ * Unwind until the return pointer is in user-land (or until an error
++ * occurs). Returns 0 if successful, negative number in case of
++ * error.
++ */
++extern int unwind_to_user(struct unwind_frame_info *);
++
++#else
++
+ struct unwind_frame_info {};
+
+ static inline void unwind_init(void) {}
+@@ -28,12 +85,12 @@
+ return NULL;
+ }
+
++#endif
++
+ static inline void unwind_remove_table(void *handle, int init_only)
+ {
+ }
+
+-#endif
+-
+ static inline int unwind_init_frame_info(struct unwind_frame_info *info,
+ struct task_struct *tsk,
+ const struct pt_regs *regs)
+@@ -65,4 +122,6 @@
+ return -ENOSYS;
+ }
+
++#endif
++
+ #endif /* _LINUX_UNWIND_H */
+Index: linux-2.6.20.3/kernel/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/kernel/Makefile 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/kernel/Makefile 2007-08-29 15:10:06.000000000 +0200
+@@ -31,6 +31,7 @@
+ obj-$(CONFIG_UID16) += uid16.o
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_KALLSYMS) += kallsyms.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+ obj-$(CONFIG_PM) += power/
+ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+ obj-$(CONFIG_KEXEC) += kexec.o
+Index: linux-2.6.20.3/kernel/unwind.c
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.20.3/kernel/unwind.c 2007-08-29 15:10:06.000000000 +0200
+@@ -0,0 +1,1288 @@
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ * Jan Beulich <jbeulich at novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ *
++ * A simple API for unwinding kernel stacks. This is used for
++ * debugging and error reporting purposes. The kernel doesn't need
++ * full-blown stack unwinding with all the bells and whistles, so there
++ * is not much point in implementing the full Dwarf2 unwind API.
++ */
++
++#include <linux/unwind.h>
++#include <linux/module.h>
++#include <linux/bootmem.h>
++#include <linux/sort.h>
++#include <linux/stop_machine.h>
++#include <linux/uaccess.h>
++#include <asm/sections.h>
++#include <asm/uaccess.h>
++#include <asm/unaligned.h>
++
++extern const char __start_unwind[], __end_unwind[];
++extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
++
++#define MAX_STACK_DEPTH 8
++
++#define EXTRA_INFO(f) { \
++ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
++ % FIELD_SIZEOF(struct unwind_frame_info, f)) \
++ + offsetof(struct unwind_frame_info, f) \
++ / FIELD_SIZEOF(struct unwind_frame_info, f), \
++ FIELD_SIZEOF(struct unwind_frame_info, f) \
++ }
++#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
++
++static const struct {
++ unsigned offs:BITS_PER_LONG / 2;
++ unsigned width:BITS_PER_LONG / 2;
++} reg_info[] = {
++ UNW_REGISTER_INFO
++};
++
++#undef PTREGS_INFO
++#undef EXTRA_INFO
++
++#ifndef REG_INVALID
++#define REG_INVALID(r) (reg_info[r].width == 0)
++#endif
++
++#define DW_CFA_nop 0x00
++#define DW_CFA_set_loc 0x01
++#define DW_CFA_advance_loc1 0x02
++#define DW_CFA_advance_loc2 0x03
++#define DW_CFA_advance_loc4 0x04
++#define DW_CFA_offset_extended 0x05
++#define DW_CFA_restore_extended 0x06
++#define DW_CFA_undefined 0x07
++#define DW_CFA_same_value 0x08
++#define DW_CFA_register 0x09
++#define DW_CFA_remember_state 0x0a
++#define DW_CFA_restore_state 0x0b
++#define DW_CFA_def_cfa 0x0c
++#define DW_CFA_def_cfa_register 0x0d
++#define DW_CFA_def_cfa_offset 0x0e
++#define DW_CFA_def_cfa_expression 0x0f
++#define DW_CFA_expression 0x10
++#define DW_CFA_offset_extended_sf 0x11
++#define DW_CFA_def_cfa_sf 0x12
++#define DW_CFA_def_cfa_offset_sf 0x13
++#define DW_CFA_val_offset 0x14
++#define DW_CFA_val_offset_sf 0x15
++#define DW_CFA_val_expression 0x16
++#define DW_CFA_lo_user 0x1c
++#define DW_CFA_GNU_window_save 0x2d
++#define DW_CFA_GNU_args_size 0x2e
++#define DW_CFA_GNU_negative_offset_extended 0x2f
++#define DW_CFA_hi_user 0x3f
++
++#define DW_EH_PE_FORM 0x07
++#define DW_EH_PE_native 0x00
++#define DW_EH_PE_leb128 0x01
++#define DW_EH_PE_data2 0x02
++#define DW_EH_PE_data4 0x03
++#define DW_EH_PE_data8 0x04
++#define DW_EH_PE_signed 0x08
++#define DW_EH_PE_ADJUST 0x70
++#define DW_EH_PE_abs 0x00
++#define DW_EH_PE_pcrel 0x10
++#define DW_EH_PE_textrel 0x20
++#define DW_EH_PE_datarel 0x30
++#define DW_EH_PE_funcrel 0x40
++#define DW_EH_PE_aligned 0x50
++#define DW_EH_PE_indirect 0x80
++#define DW_EH_PE_omit 0xff
++
++typedef unsigned long uleb128_t;
++typedef signed long sleb128_t;
++#define sleb128abs __builtin_labs
++
++static struct unwind_table {
++ struct {
++ unsigned long pc;
++ unsigned long range;
++ } core, init;
++ const void *address;
++ unsigned long size;
++ const unsigned char *header;
++ unsigned long hdrsz;
++ struct unwind_table *link;
++ const char *name;
++} root_table;
++
++struct unwind_item {
++ enum item_location {
++ Nowhere,
++ Memory,
++ Register,
++ Value
++ } where;
++ uleb128_t value;
++};
++
++struct unwind_state {
++ uleb128_t loc, org;
++ const u8 *cieStart, *cieEnd;
++ uleb128_t codeAlign;
++ sleb128_t dataAlign;
++ struct cfa {
++ uleb128_t reg, offs;
++ } cfa;
++ struct unwind_item regs[ARRAY_SIZE(reg_info)];
++ unsigned stackDepth:8;
++ unsigned version:8;
++ const u8 *label;
++ const u8 *stack[MAX_STACK_DEPTH];
++};
++
++static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
++
++static unsigned unwind_debug;
++static int __init unwind_debug_setup(char *s)
++{
++ unwind_debug = simple_strtoul(s, NULL, 0);
++ return 1;
++}
++__setup("unwind_debug=", unwind_debug_setup);
++#define dprintk(lvl, fmt, args...) \
++ ((void)(lvl > unwind_debug \
++ || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
++
++static struct unwind_table *find_table(unsigned long pc)
++{
++ struct unwind_table *table;
++
++ for (table = &root_table; table; table = table->link)
++ if ((pc >= table->core.pc
++ && pc < table->core.pc + table->core.range)
++ || (pc >= table->init.pc
++ && pc < table->init.pc + table->init.range))
++ break;
++
++ return table;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++ const void *end,
++ signed ptrType,
++ unsigned long text_base,
++ unsigned long data_base);
++
++static void init_unwind_table(struct unwind_table *table,
++ const char *name,
++ const void *core_start,
++ unsigned long core_size,
++ const void *init_start,
++ unsigned long init_size,
++ const void *table_start,
++ unsigned long table_size,
++ const u8 *header_start,
++ unsigned long header_size)
++{
++ const u8 *ptr = header_start + 4;
++ const u8 *end = header_start + header_size;
++
++ table->core.pc = (unsigned long)core_start;
++ table->core.range = core_size;
++ table->init.pc = (unsigned long)init_start;
++ table->init.range = init_size;
++ table->address = table_start;
++ table->size = table_size;
++ /* See if the linker provided table looks valid. */
++ if (header_size <= 4
++ || header_start[0] != 1
++ || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
++ != table_start
++ || !read_pointer(&ptr, end, header_start[2], 0, 0)
++ || !read_pointer(&ptr, end, header_start[3], 0,
++ (unsigned long)header_start)
++ || !read_pointer(&ptr, end, header_start[3], 0,
++ (unsigned long)header_start))
++ header_start = NULL;
++ table->hdrsz = header_size;
++ smp_wmb();
++ table->header = header_start;
++ table->link = NULL;
++ table->name = name;
++}
++
++void __init unwind_init(void)
++{
++ init_unwind_table(&root_table, "kernel",
++ _text, _end - _text,
++ NULL, 0,
++ __start_unwind, __end_unwind - __start_unwind,
++ __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
++}
++
++static const u32 bad_cie, not_fde;
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
++static signed fde_pointer_type(const u32 *cie);
++
++struct eh_frame_hdr_table_entry {
++ unsigned long start, fde;
++};
++
++static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
++{
++ const struct eh_frame_hdr_table_entry *e1 = p1;
++ const struct eh_frame_hdr_table_entry *e2 = p2;
++
++ return (e1->start > e2->start) - (e1->start < e2->start);
++}
++
++static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
++{
++ struct eh_frame_hdr_table_entry *e1 = p1;
++ struct eh_frame_hdr_table_entry *e2 = p2;
++ unsigned long v;
++
++ v = e1->start;
++ e1->start = e2->start;
++ e2->start = v;
++ v = e1->fde;
++ e1->fde = e2->fde;
++ e2->fde = v;
++}
++
++static void __init setup_unwind_table(struct unwind_table *table,
++ void *(*alloc)(unsigned long))
++{
++ const u8 *ptr;
++ unsigned long tableSize = table->size, hdrSize;
++ unsigned n;
++ const u32 *fde;
++ struct {
++ u8 version;
++ u8 eh_frame_ptr_enc;
++ u8 fde_count_enc;
++ u8 table_enc;
++ unsigned long eh_frame_ptr;
++ unsigned int fde_count;
++ struct eh_frame_hdr_table_entry table[];
++ } __attribute__((__packed__)) *header;
++
++ if (table->header)
++ return;
++
++ if (table->hdrsz)
++ printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
++ table->name);
++
++ if (tableSize & (sizeof(*fde) - 1))
++ return;
++
++ for (fde = table->address, n = 0;
++ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
++ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++ const u32 *cie = cie_for_fde(fde, table);
++ signed ptrType;
++
++ if (cie == ¬_fde)
++ continue;
++ if (cie == NULL
++ || cie == &bad_cie
++ || (ptrType = fde_pointer_type(cie)) < 0)
++ return;
++ ptr = (const u8 *)(fde + 2);
++ if (!read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0))
++ return;
++ ++n;
++ }
++
++ if (tableSize || !n)
++ return;
++
++ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
++ + 2 * n * sizeof(unsigned long);
++ dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
++ header = alloc(hdrSize);
++ if (!header)
++ return;
++ header->version = 1;
++ header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
++ header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4;
++ header->table_enc = DW_EH_PE_abs|DW_EH_PE_native;
++ put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
++ BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
++ % __alignof(typeof(header->fde_count)));
++ header->fde_count = n;
++
++ BUILD_BUG_ON(offsetof(typeof(*header), table)
++ % __alignof(typeof(*header->table)));
++ for (fde = table->address, tableSize = table->size, n = 0;
++ tableSize;
++ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++ const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
++
++ if (!fde[1])
++ continue; /* this is a CIE */
++ ptr = (const u8 *)(fde + 2);
++ header->table[n].start = read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ fde_pointer_type(cie), 0, 0);
++ header->table[n].fde = (unsigned long)fde;
++ ++n;
++ }
++ WARN_ON(n != header->fde_count);
++
++ sort(header->table,
++ n,
++ sizeof(*header->table),
++ cmp_eh_frame_hdr_table_entries,
++ swap_eh_frame_hdr_table_entries);
++
++ table->hdrsz = hdrSize;
++ smp_wmb();
++ table->header = (const void *)header;
++}
++
++static void *__init balloc(unsigned long sz)
++{
++ return __alloc_bootmem_nopanic(sz,
++ sizeof(unsigned int),
++ __pa(MAX_DMA_ADDRESS));
++}
++
++void __init unwind_setup(void)
++{
++ setup_unwind_table(&root_table, balloc);
++}
++
++#ifdef CONFIG_MODULES
++
++static struct unwind_table *last_table;
++
++/* Must be called with module_mutex held. */
++void *unwind_add_table(struct module *module,
++ const void *table_start,
++ unsigned long table_size)
++{
++ struct unwind_table *table;
++
++ if (table_size <= 0)
++ return NULL;
++
++ table = kmalloc(sizeof(*table), GFP_KERNEL);
++ if (!table)
++ return NULL;
++
++ init_unwind_table(table, module->name,
++ module->module_core, module->core_size,
++ module->module_init, module->init_size,
++ table_start, table_size,
++ NULL, 0);
++
++ if (last_table)
++ last_table->link = table;
++ else
++ root_table.link = table;
++ last_table = table;
++
++ return table;
++}
++
++struct unlink_table_info
++{
++ struct unwind_table *table;
++ int init_only;
++};
++
++static int unlink_table(void *arg)
++{
++ struct unlink_table_info *info = arg;
++ struct unwind_table *table = info->table, *prev;
++
++ for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
++ ;
++
++ if (prev->link) {
++ if (info->init_only) {
++ table->init.pc = 0;
++ table->init.range = 0;
++ info->table = NULL;
++ } else {
++ prev->link = table->link;
++ if (!prev->link)
++ last_table = prev;
++ }
++ } else
++ info->table = NULL;
++
++ return 0;
++}
++
++/* Must be called with module_mutex held. */
++void unwind_remove_table(void *handle, int init_only)
++{
++ struct unwind_table *table = handle;
++ struct unlink_table_info info;
++
++ if (!table || table == &root_table)
++ return;
++
++ if (init_only && table == last_table) {
++ table->init.pc = 0;
++ table->init.range = 0;
++ return;
++ }
++
++ info.table = table;
++ info.init_only = init_only;
++ stop_machine_run(unlink_table, &info, NR_CPUS);
++
++ if (info.table)
++ kfree(table);
++}
++
++#endif /* CONFIG_MODULES */
++
++static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
++{
++ const u8 *cur = *pcur;
++ uleb128_t value;
++ unsigned shift;
++
++ for (shift = 0, value = 0; cur < end; shift += 7) {
++ if (shift + 7 > 8 * sizeof(value)
++ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++ cur = end + 1;
++ break;
++ }
++ value |= (uleb128_t)(*cur & 0x7f) << shift;
++ if (!(*cur++ & 0x80))
++ break;
++ }
++ *pcur = cur;
++
++ return value;
++}
++
++static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
++{
++ const u8 *cur = *pcur;
++ sleb128_t value;
++ unsigned shift;
++
++ for (shift = 0, value = 0; cur < end; shift += 7) {
++ if (shift + 7 > 8 * sizeof(value)
++ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++ cur = end + 1;
++ break;
++ }
++ value |= (sleb128_t)(*cur & 0x7f) << shift;
++ if (!(*cur & 0x80)) {
++ value |= -(*cur++ & 0x40) << shift;
++ break;
++ }
++ }
++ *pcur = cur;
++
++ return value;
++}
++
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
++{
++ const u32 *cie;
++
++ if (!*fde || (*fde & (sizeof(*fde) - 1)))
++ return &bad_cie;
++ if (!fde[1])
++ return ¬_fde; /* this is a CIE */
++ if ((fde[1] & (sizeof(*fde) - 1))
++ || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
++ return NULL; /* this is not a valid FDE */
++ cie = fde + 1 - fde[1] / sizeof(*fde);
++ if (*cie <= sizeof(*cie) + 4
++ || *cie >= fde[1] - sizeof(*fde)
++ || (*cie & (sizeof(*cie) - 1))
++ || cie[1])
++ return NULL; /* this is not a (valid) CIE */
++ return cie;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++ const void *end,
++ signed ptrType,
++ unsigned long text_base,
++ unsigned long data_base)
++{
++ unsigned long value = 0;
++ union {
++ const u8 *p8;
++ const u16 *p16u;
++ const s16 *p16s;
++ const u32 *p32u;
++ const s32 *p32s;
++ const unsigned long *pul;
++ } ptr;
++
++ if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
++ dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
++ return 0;
++ }
++ ptr.p8 = *pLoc;
++ switch(ptrType & DW_EH_PE_FORM) {
++ case DW_EH_PE_data2:
++ if (end < (const void *)(ptr.p16u + 1)) {
++ dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ if(ptrType & DW_EH_PE_signed)
++ value = get_unaligned(ptr.p16s++);
++ else
++ value = get_unaligned(ptr.p16u++);
++ break;
++ case DW_EH_PE_data4:
++#ifdef CONFIG_64BIT
++ if (end < (const void *)(ptr.p32u + 1)) {
++ dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ if(ptrType & DW_EH_PE_signed)
++ value = get_unaligned(ptr.p32s++);
++ else
++ value = get_unaligned(ptr.p32u++);
++ break;
++ case DW_EH_PE_data8:
++ BUILD_BUG_ON(sizeof(u64) != sizeof(value));
++#else
++ BUILD_BUG_ON(sizeof(u32) != sizeof(value));
++#endif
++ case DW_EH_PE_native:
++ if (end < (const void *)(ptr.pul + 1)) {
++ dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ value = get_unaligned(ptr.pul++);
++ break;
++ case DW_EH_PE_leb128:
++ BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
++ value = ptrType & DW_EH_PE_signed
++ ? get_sleb128(&ptr.p8, end)
++ : get_uleb128(&ptr.p8, end);
++ if ((const void *)ptr.p8 > end) {
++ dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ break;
++ default:
++ dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
++ ptrType, ptr.p8, end);
++ return 0;
++ }
++ switch(ptrType & DW_EH_PE_ADJUST) {
++ case DW_EH_PE_abs:
++ break;
++ case DW_EH_PE_pcrel:
++ value += (unsigned long)*pLoc;
++ break;
++ case DW_EH_PE_textrel:
++ if (likely(text_base)) {
++ value += text_base;
++ break;
++ }
++ dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
++ ptrType, *pLoc, end);
++ return 0;
++ case DW_EH_PE_datarel:
++ if (likely(data_base)) {
++ value += data_base;
++ break;
++ }
++ dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
++ ptrType, *pLoc, end);
++ return 0;
++ default:
++ dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
++ ptrType, *pLoc, end);
++ return 0;
++ }
++ if ((ptrType & DW_EH_PE_indirect)
++ && probe_kernel_address((unsigned long *)value, value)) {
++ dprintk(1, "Cannot read indirect value %lx (%p,%p).",
++ value, *pLoc, end);
++ return 0;
++ }
++ *pLoc = ptr.p8;
++
++ return value;
++}
++
++static signed fde_pointer_type(const u32 *cie)
++{
++ const u8 *ptr = (const u8 *)(cie + 2);
++ unsigned version = *ptr;
++
++ if (version != 1)
++ return -1; /* unsupported */
++ if (*++ptr) {
++ const char *aug;
++ const u8 *end = (const u8 *)(cie + 1) + *cie;
++ uleb128_t len;
++
++ /* check if augmentation size is first (and thus present) */
++ if (*ptr != 'z')
++ return -1;
++ /* check if augmentation string is nul-terminated */
++ if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
++ return -1;
++ ++ptr; /* skip terminator */
++ get_uleb128(&ptr, end); /* skip code alignment */
++ get_sleb128(&ptr, end); /* skip data alignment */
++ /* skip return address column */
++ version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
++ len = get_uleb128(&ptr, end); /* augmentation length */
++ if (ptr + len < ptr || ptr + len > end)
++ return -1;
++ end = ptr + len;
++ while (*++aug) {
++ if (ptr >= end)
++ return -1;
++ switch(*aug) {
++ case 'L':
++ ++ptr;
++ break;
++ case 'P': {
++ signed ptrType = *ptr++;
++
++ if (!read_pointer(&ptr, end, ptrType, 0, 0)
++ || ptr > end)
++ return -1;
++ }
++ break;
++ case 'R':
++ return *ptr;
++ default:
++ return -1;
++ }
++ }
++ }
++ return DW_EH_PE_native|DW_EH_PE_abs;
++}
++
++static int advance_loc(unsigned long delta, struct unwind_state *state)
++{
++ state->loc += delta * state->codeAlign;
++
++ return delta > 0;
++}
++
++static void set_rule(uleb128_t reg,
++ enum item_location where,
++ uleb128_t value,
++ struct unwind_state *state)
++{
++ if (reg < ARRAY_SIZE(state->regs)) {
++ state->regs[reg].where = where;
++ state->regs[reg].value = value;
++ }
++}
++
++static int processCFI(const u8 *start,
++ const u8 *end,
++ unsigned long targetLoc,
++ signed ptrType,
++ struct unwind_state *state)
++{
++ union {
++ const u8 *p8;
++ const u16 *p16;
++ const u32 *p32;
++ } ptr;
++ int result = 1;
++
++ if (start != state->cieStart) {
++ state->loc = state->org;
++ result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
++ if (targetLoc == 0 && state->label == NULL)
++ return result;
++ }
++ for (ptr.p8 = start; result && ptr.p8 < end; ) {
++ switch(*ptr.p8 >> 6) {
++ uleb128_t value;
++
++ case 0:
++ switch(*ptr.p8++) {
++ case DW_CFA_nop:
++ break;
++ case DW_CFA_set_loc:
++ state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
++ if (state->loc == 0)
++ result = 0;
++ break;
++ case DW_CFA_advance_loc1:
++ result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
++ break;
++ case DW_CFA_advance_loc2:
++ result = ptr.p8 <= end + 2
++ && advance_loc(*ptr.p16++, state);
++ break;
++ case DW_CFA_advance_loc4:
++ result = ptr.p8 <= end + 4
++ && advance_loc(*ptr.p32++, state);
++ break;
++ case DW_CFA_offset_extended:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_val_offset:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_offset_extended_sf:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_val_offset_sf:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_restore_extended:
++ case DW_CFA_undefined:
++ case DW_CFA_same_value:
++ set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
++ break;
++ case DW_CFA_register:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value,
++ Register,
++ get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_remember_state:
++ if (ptr.p8 == state->label) {
++ state->label = NULL;
++ return 1;
++ }
++ if (state->stackDepth >= MAX_STACK_DEPTH) {
++ dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ state->stack[state->stackDepth++] = ptr.p8;
++ break;
++ case DW_CFA_restore_state:
++ if (state->stackDepth) {
++ const uleb128_t loc = state->loc;
++ const u8 *label = state->label;
++
++ state->label = state->stack[state->stackDepth - 1];
++ memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
++ memset(state->regs, 0, sizeof(state->regs));
++ state->stackDepth = 0;
++ result = processCFI(start, end, 0, ptrType, state);
++ state->loc = loc;
++ state->label = label;
++ } else {
++ dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ break;
++ case DW_CFA_def_cfa:
++ state->cfa.reg = get_uleb128(&ptr.p8, end);
++ /*nobreak*/
++ case DW_CFA_def_cfa_offset:
++ state->cfa.offs = get_uleb128(&ptr.p8, end);
++ break;
++ case DW_CFA_def_cfa_sf:
++ state->cfa.reg = get_uleb128(&ptr.p8, end);
++ /*nobreak*/
++ case DW_CFA_def_cfa_offset_sf:
++ state->cfa.offs = get_sleb128(&ptr.p8, end)
++ * state->dataAlign;
++ break;
++ case DW_CFA_def_cfa_register:
++ state->cfa.reg = get_uleb128(&ptr.p8, end);
++ break;
++ /*todo case DW_CFA_def_cfa_expression: */
++ /*todo case DW_CFA_expression: */
++ /*todo case DW_CFA_val_expression: */
++ case DW_CFA_GNU_args_size:
++ get_uleb128(&ptr.p8, end);
++ break;
++ case DW_CFA_GNU_negative_offset_extended:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value,
++ Memory,
++ (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_GNU_window_save:
++ default:
++ dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
++ result = 0;
++ break;
++ }
++ break;
++ case 1:
++ result = advance_loc(*ptr.p8++ & 0x3f, state);
++ break;
++ case 2:
++ value = *ptr.p8++ & 0x3f;
++ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++ break;
++ case 3:
++ set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
++ break;
++ }
++ if (ptr.p8 > end) {
++ dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
++ result = 0;
++ }
++ if (result && targetLoc != 0 && targetLoc < state->loc)
++ return 1;
++ }
++
++ if (result && ptr.p8 < end)
++ dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
++
++ return result
++ && ptr.p8 == end
++ && (targetLoc == 0
++ || (/*todo While in theory this should apply, gcc in practice omits
++ everything past the function prolog, and hence the location
++ never reaches the end of the function.
++ targetLoc < state->loc &&*/ state->label == NULL));
++}
++
++/* Unwind to previous to frame. Returns 0 if successful, negative
++ * number in case of an error. */
++int unwind(struct unwind_frame_info *frame)
++{
++#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
++ const u32 *fde = NULL, *cie = NULL;
++ const u8 *ptr = NULL, *end = NULL;
++ unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
++ unsigned long startLoc = 0, endLoc = 0, cfa;
++ unsigned i;
++ signed ptrType = -1;
++ uleb128_t retAddrReg = 0;
++ const struct unwind_table *table;
++ struct unwind_state state;
++
++ if (UNW_PC(frame) == 0)
++ return -EINVAL;
++ if ((table = find_table(pc)) != NULL
++ && !(table->size & (sizeof(*fde) - 1))) {
++ const u8 *hdr = table->header;
++ unsigned long tableSize;
++
++ smp_rmb();
++ if (hdr && hdr[0] == 1) {
++ switch(hdr[3] & DW_EH_PE_FORM) {
++ case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
++ case DW_EH_PE_data2: tableSize = 2; break;
++ case DW_EH_PE_data4: tableSize = 4; break;
++ case DW_EH_PE_data8: tableSize = 8; break;
++ default: tableSize = 0; break;
++ }
++ ptr = hdr + 4;
++ end = hdr + table->hdrsz;
++ if (tableSize
++ && read_pointer(&ptr, end, hdr[1], 0, 0)
++ == (unsigned long)table->address
++ && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
++ && i == (end - ptr) / (2 * tableSize)
++ && !((end - ptr) % (2 * tableSize))) {
++ do {
++ const u8 *cur = ptr + (i / 2) * (2 * tableSize);
++
++ startLoc = read_pointer(&cur,
++ cur + tableSize,
++ hdr[3], 0,
++ (unsigned long)hdr);
++ if (pc < startLoc)
++ i /= 2;
++ else {
++ ptr = cur - tableSize;
++ i = (i + 1) / 2;
++ }
++ } while (startLoc && i > 1);
++ if (i == 1
++ && (startLoc = read_pointer(&ptr,
++ ptr + tableSize,
++ hdr[3], 0,
++ (unsigned long)hdr)) != 0
++ && pc >= startLoc)
++ fde = (void *)read_pointer(&ptr,
++ ptr + tableSize,
++ hdr[3], 0,
++ (unsigned long)hdr);
++ }
++ }
++ if(hdr && !fde)
++ dprintk(3, "Binary lookup for %lx failed.", pc);
++
++ if (fde != NULL) {
++ cie = cie_for_fde(fde, table);
++ ptr = (const u8 *)(fde + 2);
++ if(cie != NULL
++ && cie != &bad_cie
++ && cie != ¬_fde
++ && (ptrType = fde_pointer_type(cie)) >= 0
++ && read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0) == startLoc) {
++ if (!(ptrType & DW_EH_PE_indirect))
++ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++ endLoc = startLoc
++ + read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0);
++ if(pc >= endLoc)
++ fde = NULL;
++ } else
++ fde = NULL;
++ if(!fde)
++ dprintk(1, "Binary lookup result for %lx discarded.", pc);
++ }
++ if (fde == NULL) {
++ for (fde = table->address, tableSize = table->size;
++ cie = NULL, tableSize > sizeof(*fde)
++ && tableSize - sizeof(*fde) >= *fde;
++ tableSize -= sizeof(*fde) + *fde,
++ fde += 1 + *fde / sizeof(*fde)) {
++ cie = cie_for_fde(fde, table);
++ if (cie == &bad_cie) {
++ cie = NULL;
++ break;
++ }
++ if (cie == NULL
++ || cie == ¬_fde
++ || (ptrType = fde_pointer_type(cie)) < 0)
++ continue;
++ ptr = (const u8 *)(fde + 2);
++ startLoc = read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0);
++ if (!startLoc)
++ continue;
++ if (!(ptrType & DW_EH_PE_indirect))
++ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++ endLoc = startLoc
++ + read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0);
++ if (pc >= startLoc && pc < endLoc)
++ break;
++ }
++ if(!fde)
++ dprintk(3, "Linear lookup for %lx failed.", pc);
++ }
++ }
++ if (cie != NULL) {
++ memset(&state, 0, sizeof(state));
++ state.cieEnd = ptr; /* keep here temporarily */
++ ptr = (const u8 *)(cie + 2);
++ end = (const u8 *)(cie + 1) + *cie;
++ frame->call_frame = 1;
++ if ((state.version = *ptr) != 1)
++ cie = NULL; /* unsupported version */
++ else if (*++ptr) {
++ /* check if augmentation size is first (and thus present) */
++ if (*ptr == 'z') {
++ while (++ptr < end && *ptr) {
++ switch(*ptr) {
++ /* check for ignorable (or already handled)
++ * nul-terminated augmentation string */
++ case 'L':
++ case 'P':
++ case 'R':
++ continue;
++ case 'S':
++ frame->call_frame = 0;
++ continue;
++ default:
++ break;
++ }
++ break;
++ }
++ }
++ if (ptr >= end || *ptr)
++ cie = NULL;
++ }
++ if(!cie)
++ dprintk(1, "CIE unusable (%p,%p).", ptr, end);
++ ++ptr;
++ }
++ if (cie != NULL) {
++ /* get code aligment factor */
++ state.codeAlign = get_uleb128(&ptr, end);
++ /* get data aligment factor */
++ state.dataAlign = get_sleb128(&ptr, end);
++ if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
++ cie = NULL;
++ else if (UNW_PC(frame) % state.codeAlign
++ || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++ dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
++ UNW_PC(frame), UNW_SP(frame));
++ return -EPERM;
++ } else {
++ retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
++ /* skip augmentation */
++ if (((const char *)(cie + 2))[1] == 'z') {
++ uleb128_t augSize = get_uleb128(&ptr, end);
++
++ ptr += augSize;
++ }
++ if (ptr > end
++ || retAddrReg >= ARRAY_SIZE(reg_info)
++ || REG_INVALID(retAddrReg)
++ || reg_info[retAddrReg].width != sizeof(unsigned long))
++ cie = NULL;
++ }
++ if(!cie)
++ dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
++ }
++ if (cie != NULL) {
++ state.cieStart = ptr;
++ ptr = state.cieEnd;
++ state.cieEnd = end;
++ end = (const u8 *)(fde + 1) + *fde;
++ /* skip augmentation */
++ if (((const char *)(cie + 2))[1] == 'z') {
++ uleb128_t augSize = get_uleb128(&ptr, end);
++
++ if ((ptr += augSize) > end)
++ fde = NULL;
++ }
++ if(!fde)
++ dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
++ }
++ if (cie == NULL || fde == NULL) {
++#ifdef CONFIG_FRAME_POINTER
++ unsigned long top, bottom;
++
++ if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
++ return -EPERM;
++ top = STACK_TOP(frame->task);
++ bottom = STACK_BOTTOM(frame->task);
++# if FRAME_RETADDR_OFFSET < 0
++ if (UNW_SP(frame) < top
++ && UNW_FP(frame) <= UNW_SP(frame)
++ && bottom < UNW_FP(frame)
++# else
++ if (UNW_SP(frame) > top
++ && UNW_FP(frame) >= UNW_SP(frame)
++ && bottom > UNW_FP(frame)
++# endif
++ && !((UNW_SP(frame) | UNW_FP(frame))
++ & (sizeof(unsigned long) - 1))) {
++ unsigned long link;
++
++ if (!probe_kernel_address(
++ (unsigned long *)(UNW_FP(frame)
++ + FRAME_LINK_OFFSET),
++ link)
++# if FRAME_RETADDR_OFFSET < 0
++ && link > bottom && link < UNW_FP(frame)
++# else
++ && link > UNW_FP(frame) && link < bottom
++# endif
++ && !(link & (sizeof(link) - 1))
++ && !probe_kernel_address(
++ (unsigned long *)(UNW_FP(frame)
++ + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
++ UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
++# if FRAME_RETADDR_OFFSET < 0
++ -
++# else
++ +
++# endif
++ sizeof(UNW_PC(frame));
++ UNW_FP(frame) = link;
++ return 0;
++ }
++ }
++#endif
++ return -ENXIO;
++ }
++ state.org = startLoc;
++ memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
++ /* process instructions */
++ if (!processCFI(ptr, end, pc, ptrType, &state)
++ || state.loc > endLoc
++ || state.regs[retAddrReg].where == Nowhere
++ || state.cfa.reg >= ARRAY_SIZE(reg_info)
++ || reg_info[state.cfa.reg].width != sizeof(unsigned long)
++ || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
++ || state.cfa.offs % sizeof(unsigned long)) {
++ dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
++ return -EIO;
++ }
++ /* update frame */
++#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
++ if(frame->call_frame
++ && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
++ frame->call_frame = 0;
++#endif
++ cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
++ startLoc = min((unsigned long)UNW_SP(frame), cfa);
++ endLoc = max((unsigned long)UNW_SP(frame), cfa);
++ if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
++ startLoc = min(STACK_LIMIT(cfa), cfa);
++ endLoc = max(STACK_LIMIT(cfa), cfa);
++ }
++#ifndef CONFIG_64BIT
++# define CASES CASE(8); CASE(16); CASE(32)
++#else
++# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
++#endif
++ pc = UNW_PC(frame);
++ sp = UNW_SP(frame);
++ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++ if (REG_INVALID(i)) {
++ if (state.regs[i].where == Nowhere)
++ continue;
++ dprintk(1, "Cannot restore register %u (%d).",
++ i, state.regs[i].where);
++ return -EIO;
++ }
++ switch(state.regs[i].where) {
++ default:
++ break;
++ case Register:
++ if (state.regs[i].value >= ARRAY_SIZE(reg_info)
++ || REG_INVALID(state.regs[i].value)
++ || reg_info[i].width > reg_info[state.regs[i].value].width) {
++ dprintk(1, "Cannot restore register %u from register %lu.",
++ i, state.regs[i].value);
++ return -EIO;
++ }
++ switch(reg_info[state.regs[i].value].width) {
++#define CASE(n) \
++ case sizeof(u##n): \
++ state.regs[i].value = FRAME_REG(state.regs[i].value, \
++ const u##n); \
++ break
++ CASES;
++#undef CASE
++ default:
++ dprintk(1, "Unsupported register size %u (%lu).",
++ reg_info[state.regs[i].value].width,
++ state.regs[i].value);
++ return -EIO;
++ }
++ break;
++ }
++ }
++ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++ if (REG_INVALID(i))
++ continue;
++ switch(state.regs[i].where) {
++ case Nowhere:
++ if (reg_info[i].width != sizeof(UNW_SP(frame))
++ || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
++ != &UNW_SP(frame))
++ continue;
++ UNW_SP(frame) = cfa;
++ break;
++ case Register:
++ switch(reg_info[i].width) {
++#define CASE(n) case sizeof(u##n): \
++ FRAME_REG(i, u##n) = state.regs[i].value; \
++ break
++ CASES;
++#undef CASE
++ default:
++ dprintk(1, "Unsupported register size %u (%u).",
++ reg_info[i].width, i);
++ return -EIO;
++ }
++ break;
++ case Value:
++ if (reg_info[i].width != sizeof(unsigned long)) {
++ dprintk(1, "Unsupported value size %u (%u).",
++ reg_info[i].width, i);
++ return -EIO;
++ }
++ FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
++ * state.dataAlign;
++ break;
++ case Memory: {
++ unsigned long addr = cfa + state.regs[i].value
++ * state.dataAlign;
++
++ if ((state.regs[i].value * state.dataAlign)
++ % sizeof(unsigned long)
++ || addr < startLoc
++ || addr + sizeof(unsigned long) < addr
++ || addr + sizeof(unsigned long) > endLoc) {
++ dprintk(1, "Bad memory location %lx (%lx).",
++ addr, state.regs[i].value);
++ return -EIO;
++ }
++ switch(reg_info[i].width) {
++#define CASE(n) case sizeof(u##n): \
++ probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
++ break
++ CASES;
++#undef CASE
++ default:
++ dprintk(1, "Unsupported memory size %u (%u).",
++ reg_info[i].width, i);
++ return -EIO;
++ }
++ }
++ break;
++ }
++ }
++
++ if (UNW_PC(frame) % state.codeAlign
++ || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++ dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
++ UNW_PC(frame), UNW_SP(frame));
++ return -EIO;
++ }
++ if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
++ dprintk(1, "No progress (%lx,%lx).", pc, sp);
++ return -EIO;
++ }
++
++ return 0;
++#undef CASES
++#undef FRAME_REG
++}
++EXPORT_SYMBOL(unwind);
++
++int unwind_init_frame_info(struct unwind_frame_info *info,
++ struct task_struct *tsk,
++ /*const*/ struct pt_regs *regs)
++{
++ info->task = tsk;
++ info->call_frame = 0;
++ arch_unw_init_frame_info(info, regs);
++
++ return 0;
++}
++EXPORT_SYMBOL(unwind_init_frame_info);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++int unwind_init_blocked(struct unwind_frame_info *info,
++ struct task_struct *tsk)
++{
++ info->task = tsk;
++ info->call_frame = 0;
++ arch_unw_init_blocked(info);
++
++ return 0;
++}
++EXPORT_SYMBOL(unwind_init_blocked);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++int unwind_init_running(struct unwind_frame_info *info,
++ asmlinkage int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg)
++{
++ info->task = current;
++ info->call_frame = 0;
++
++ return arch_unwind_init_running(info, callback, arg);
++}
++EXPORT_SYMBOL(unwind_init_running);
++
+Index: linux-2.6.20.3/lib/Kconfig.debug
+===================================================================
+--- linux-2.6.20.3.orig/lib/Kconfig.debug 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/lib/Kconfig.debug 2007-08-29 15:11:26.000000000 +0200
+@@ -354,6 +354,24 @@
+ some architectures or if you use external debuggers.
+ If you don't debug the kernel, you can say N.
+
++config UNWIND_INFO
++ bool "Compile the kernel with frame unwind information"
++ depends on !IA64 && !PARISC && !ARM
++ depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
++ help
++ If you say Y here the resulting kernel image will be slightly larger
++ but not slower, and it will give very useful debugging information.
++ If you don't debug the kernel, you can say N, but we may not be able
++ to solve problems without frame unwind information or frame pointers.
++
++config STACK_UNWIND
++ bool "Stack unwind support"
++ depends on UNWIND_INFO
++ depends on X86
++ help
++ This enables more precise stack traces, omitting all unrelated
++ occurrences of pointers into kernel code from the dump.
++
+ config FORCED_INLINING
+ bool "Force gcc to inline functions marked 'inline'"
+ depends on DEBUG_KERNEL
+@@ -400,6 +418,9 @@
+ config FAULT_INJECTION
+ bool "Fault-injection framework"
+ depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
++ # could support fp on X86_32 here too, but let's not
++ select UNWIND_INFO if X86
++ select STACK_UNWIND if X86
+ select STACKTRACE
+ select FRAME_POINTER
+ help
+Index: linux-2.6.20.3/arch/x86_64/kernel/traps.c
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/kernel/traps.c 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/kernel/traps.c 2007-08-29 15:10:06.000000000 +0200
+@@ -110,6 +110,11 @@
+ }
+
+ int kstack_depth_to_print = 12;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+
+ #ifdef CONFIG_KALLSYMS
+ void printk_address(unsigned long address)
+@@ -212,6 +217,33 @@
+ return NULL;
+ }
+
++struct ops_and_data {
++ struct stacktrace_ops *ops;
++ void *data;
++};
++
++static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
++{
++ struct ops_and_data *oad = (struct ops_and_data *)context;
++ int n = 0;
++ unsigned long sp = UNW_SP(info);
++
++ if (arch_unw_user_mode(info))
++ return -1;
++ while (unwind(info) == 0 && UNW_PC(info)) {
++ n++;
++ oad->ops->address(oad->data, UNW_PC(info));
++ if (arch_unw_user_mode(info))
++ break;
++ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++ && sp > UNW_SP(info))
++ break;
++ sp = UNW_SP(info);
++ touch_nmi_watchdog();
++ }
++ return n;
++}
++
+ #define MSG(txt) ops->warning(data, txt)
+
+ /*
+@@ -239,6 +271,40 @@
+ if (!tsk)
+ tsk = current;
+
++ if (call_trace >= 0) {
++ int unw_ret = 0;
++ struct unwind_frame_info info;
++ struct ops_and_data oad = { .ops = ops, .data = data };
++
++ if (regs) {
++ if (unwind_init_frame_info(&info, tsk, regs) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ } else if (tsk == current)
++ unw_ret = unwind_init_running(&info, dump_trace_unwind,
++ &oad);
++ else {
++ if (unwind_init_blocked(&info, tsk) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ }
++ if (unw_ret > 0) {
++ if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++ ops->warning_symbol(data,
++ "DWARF2 unwinder stuck at %s",
++ UNW_PC(&info));
++ if ((long)UNW_SP(&info) < 0) {
++ MSG("Leftover inexact backtrace:");
++ stack = (unsigned long *)UNW_SP(&info);
++ if (!stack)
++ goto out;
++ } else
++ MSG("Full inexact backtrace again:");
++ } else if (call_trace >= 1)
++ goto out;
++ else
++ MSG("Full inexact backtrace again:");
++ } else
++ MSG("Inexact backtrace:");
++ }
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+@@ -322,6 +388,7 @@
+ tinfo = task_thread_info(tsk);
+ HANDLE_STACK (valid_stack_ptr(tinfo, stack));
+ #undef HANDLE_STACK
++out:
+ put_cpu();
+ }
+ EXPORT_SYMBOL(dump_trace);
+@@ -1122,3 +1189,21 @@
+ return 0;
+ }
+ early_param("kstack", kstack_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++ if (!s)
++ return -EINVAL;
++ if (strcmp(s, "old") == 0)
++ call_trace = -1;
++ else if (strcmp(s, "both") == 0)
++ call_trace = 0;
++ else if (strcmp(s, "newfallback") == 0)
++ call_trace = 1;
++ else if (strcmp(s, "new") == 0)
++ call_trace = 2;
++ return 0;
++}
++early_param("call_trace", call_trace_setup);
++#endif
+Index: linux-2.6.20.3/arch/i386/kernel/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/arch/i386/kernel/Makefile 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/i386/kernel/Makefile 2007-08-29 15:10:06.000000000 +0200
+@@ -39,6 +39,7 @@
+ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+ obj-$(CONFIG_HPET_TIMER) += hpet.o
+ obj-$(CONFIG_K8_NB) += k8.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+
+ # Make sure this is linked after any other paravirt_ops structs: see head.S
+ obj-$(CONFIG_PARAVIRT) += paravirt.o
+Index: linux-2.6.20.3/arch/i386/kernel/unwind.S
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.20.3/arch/i386/kernel/unwind.S 2007-08-29 15:10:06.000000000 +0200
+@@ -0,0 +1,36 @@
++/* Assembler support code for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/ptrace-abi.h>
++#include <asm/segment.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++ CFI_STARTPROC
++ movl 4(%esp), %edx
++ movl (%esp), %ecx
++ leal 4(%esp), %eax
++ movl %ebx, PT_EBX(%edx)
++ xorl %ebx, %ebx
++ movl %ebx, PT_ECX(%edx)
++ movl %ebx, PT_EDX(%edx)
++ movl %esi, PT_ESI(%edx)
++ movl %edi, PT_EDI(%edx)
++ movl %ebp, PT_EBP(%edx)
++ movl %ebx, PT_EAX(%edx)
++ movl $__USER_DS, PT_DS(%edx)
++ movl $__USER_DS, PT_ES(%edx)
++ movl $0, PT_FS(%edx)
++ movl %ebx, PT_ORIG_EAX(%edx)
++ movl %ecx, PT_EIP(%edx)
++ movl 12(%esp), %ecx
++ movl $__KERNEL_CS, PT_CS(%edx)
++ movl %ebx, PT_EFLAGS(%edx)
++ movl %eax, PT_OLDESP(%edx)
++ movl 8(%esp), %eax
++ movl %ecx, 8(%esp)
++ movl PT_EBX(%edx), %ebx
++ movl $__KERNEL_DS, PT_OLDSS(%edx)
++ jmpl *%eax
++ CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
+Index: linux-2.6.20.3/arch/x86_64/kernel/Makefile
+===================================================================
+--- linux-2.6.20.3.orig/arch/x86_64/kernel/Makefile 2007-08-29 15:00:54.000000000 +0200
++++ linux-2.6.20.3/arch/x86_64/kernel/Makefile 2007-08-29 15:10:06.000000000 +0200
+@@ -37,6 +37,7 @@
+ obj-$(CONFIG_X86_VSMP) += vsmp.o
+ obj-$(CONFIG_K8_NB) += k8.o
+ obj-$(CONFIG_AUDIT) += audit.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_PCI) += early-quirks.o
+Index: linux-2.6.20.3/arch/x86_64/kernel/unwind.S
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ linux-2.6.20.3/arch/x86_64/kernel/unwind.S 2007-08-29 15:10:06.000000000 +0200
+@@ -0,0 +1,38 @@
++/* Assembler support for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/segment.h>
++#include <asm/ptrace.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++ CFI_STARTPROC
++ movq %r15, R15(%rdi)
++ movq %r14, R14(%rdi)
++ xchgq %rsi, %rdx
++ movq %r13, R13(%rdi)
++ movq %r12, R12(%rdi)
++ xorl %eax, %eax
++ movq %rbp, RBP(%rdi)
++ movq %rbx, RBX(%rdi)
++ movq (%rsp), %rcx
++ movq %rax, R11(%rdi)
++ movq %rax, R10(%rdi)
++ movq %rax, R9(%rdi)
++ movq %rax, R8(%rdi)
++ movq %rax, RAX(%rdi)
++ movq %rax, RCX(%rdi)
++ movq %rax, RDX(%rdi)
++ movq %rax, RSI(%rdi)
++ movq %rax, RDI(%rdi)
++ movq %rax, ORIG_RAX(%rdi)
++ movq %rcx, RIP(%rdi)
++ leaq 8(%rsp), %rcx
++ movq $__KERNEL_CS, CS(%rdi)
++ movq %rax, EFLAGS(%rdi)
++ movq %rcx, RSP(%rdi)
++ movq $__KERNEL_DS, SS(%rdi)
++ jmpq *%rdx
++ CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
++
Added: trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch?rev=435&op=file
==============================================================================
--- trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch (added)
+++ trunk/lustre/kernel_patches/patches/generic_file_buffered_write_backport_2.6.20.patch Wed Feb 13 12:23:22 2008
@@ -1,0 +1,45 @@
+Index: linux-2.6.20.3/mm/filemap.c
+===================================================================
+--- linux-2.6.20.3.orig/mm/filemap.c 2007-09-10 14:17:43.000000000 +0200
++++ linux-2.6.20.3/mm/filemap.c 2007-09-10 14:23:31.000000000 +0200
+@@ -2099,21 +2099,27 @@
+ /* Limit the size of the copy to the caller's write size */
+ bytes = min(bytes, count);
+
+- /*
+- * Limit the size of the copy to that of the current segment,
+- * because fault_in_pages_readable() doesn't know how to walk
+- * segments.
++ /* We only need to worry about prefaulting when writes are from
++ * user-space. NFSd uses vfs_writev with several non-aligned
++ * segments in the vector, and limiting to one segment a time is
++ * a noticeable performance for re-write
+ */
+- bytes = min(bytes, cur_iov->iov_len - iov_base);
+-
+- /*
+- * Bring in the user page that we will copy from _first_.
+- * Otherwise there's a nasty deadlock on copying from the
+- * same page as we're writing to, without it being marked
+- * up-to-date.
+- */
+- fault_in_pages_readable(buf, bytes);
++ if (!segment_eq(get_fs(), KERNEL_DS)) {
++ /*
++ * Limit the size of the copy to that of the current
++ * segment, because fault_in_pages_readable() doesn't
++ * know how to walk segments.
++ */
++ bytes = min(bytes, cur_iov->iov_len - iov_base);
+
++ /*
++ * Bring in the user page that we will copy from
++ * _first_. Otherwise there's a nasty deadlock on
++ * copying from the same page as we're writing to,
++ * without it being marked up-to-date.
++ */
++ fault_in_pages_readable(buf, bytes);
++ }
+ page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
+ if (!page) {
+ status = -ENOMEM;
Added: trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch?rev=435&op=file
==============================================================================
--- trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch (added)
+++ trunk/lustre/kernel_patches/patches/jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch Wed Feb 13 12:23:22 2008
@@ -1,0 +1,743 @@
+Index: linux-2.6.22.18.patch.lustre.1.6/include/linux/jbd.h
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/include/linux/jbd.h 2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/include/linux/jbd.h 2008-02-12 19:15:28.000000000 +0100
+@@ -428,6 +428,16 @@ struct handle_s
+ };
+
+
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++ unsigned long cs_chp_time;
++ unsigned long cs_forced_to_close;
++ unsigned long cs_written;
++ unsigned long cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism. It
+ * tracks a compound transaction through its various states:
+ *
+@@ -565,6 +575,21 @@ struct transaction_s
+ spinlock_t t_handle_lock;
+
+ /*
++ * Longest time some handle had to wait for running transaction
++ */
++ unsigned long t_max_wait;
++
++ /*
++ * When transaction started
++ */
++ unsigned long t_start;
++
++ /*
++ * Checkpointing stats [j_checkpoint_sem]
++ */
++ struct transaction_chp_stats_s t_chp_stats;
++
++ /*
+ * Number of outstanding updates running on this transaction
+ * [t_handle_lock]
+ */
+@@ -604,6 +629,57 @@ struct transaction_s
+ struct list_head t_jcb;
+ };
+
++struct transaction_run_stats_s {
++ unsigned long rs_wait;
++ unsigned long rs_running;
++ unsigned long rs_locked;
++ unsigned long rs_flushing;
++ unsigned long rs_logging;
++
++ unsigned long rs_handle_count;
++ unsigned long rs_blocks;
++ unsigned long rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++ int ts_type;
++ unsigned long ts_tid;
++ union {
++ struct transaction_run_stats_s run;
++ struct transaction_chp_stats_s chp;
++ } u;
++};
++
++#define JBD_STATS_RUN 1
++#define JBD_STATS_CHECKPOINT 2
++
++#define ts_wait u.run.rs_wait
++#define ts_running u.run.rs_running
++#define ts_locked u.run.rs_locked
++#define ts_flushing u.run.rs_flushing
++#define ts_logging u.run.rs_logging
++#define ts_handle_count u.run.rs_handle_count
++#define ts_blocks u.run.rs_blocks
++#define ts_blocks_logged u.run.rs_blocks_logged
++
++#define ts_chp_time u.chp.cs_chp_time
++#define ts_forced_to_close u.chp.cs_forced_to_close
++#define ts_written u.chp.cs_written
++#define ts_dropped u.chp.cs_dropped
++
++#define CURRENT_MSECS (jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++ if (unlikely(start > end))
++ end = end + (~0UL - start);
++ else
++ end -= start;
++ return end;
++}
++
+ /**
+ * struct journal_s - The journal_s type is the concrete type associated with
+ * journal_t.
+@@ -857,6 +933,16 @@ struct journal_s
+ pid_t j_last_sync_writer;
+
+ /*
++ *
++ */
++ struct transaction_stats_s *j_history;
++ int j_history_max;
++ int j_history_cur;
++ spinlock_t j_history_lock;
++ struct proc_dir_entry *j_proc_entry;
++ struct transaction_stats_s j_stats;
++
++ /*
+ * An opaque pointer to fs-private information. ext3 puts its
+ * superblock pointer here
+ */
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/transaction.c 2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/transaction.c 2008-02-12 19:15:28.000000000 +0100
+@@ -61,6 +61,8 @@ get_transaction(journal_t *journal, tran
+
+ J_ASSERT(journal->j_running_transaction == NULL);
+ journal->j_running_transaction = transaction;
++ transaction->t_max_wait = 0;
++ transaction->t_start = CURRENT_MSECS;
+
+ return transaction;
+ }
+@@ -87,6 +89,7 @@ static int start_this_handle(journal_t *
+ int nblocks = handle->h_buffer_credits;
+ transaction_t *new_transaction = NULL;
+ int ret = 0;
++ unsigned long ts = CURRENT_MSECS;
+
+ if (nblocks > journal->j_max_transaction_buffers) {
+ printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -220,6 +223,12 @@ repeat_locked:
+ /* OK, account for the buffers that this operation expects to
+ * use and add the handle to the running transaction. */
+
++ if (time_after(transaction->t_start, ts)) {
++ ts = jbd_time_diff(ts, transaction->t_start);
++ if (ts > transaction->t_max_wait)
++ transaction->t_max_wait= ts;
++ }
++
+ handle->h_transaction = transaction;
+ transaction->t_outstanding_credits += nblocks;
+ transaction->t_updates++;
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/journal.c 2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/journal.c 2008-02-12 19:22:43.000000000 +0100
+@@ -35,6 +35,7 @@
+ #include <linux/kthread.h>
+ #include <linux/poison.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/page.h>
+@@ -643,6 +644,300 @@ struct journal_head *journal_get_descrip
+ return journal_add_journal_head(bh);
+ }
+
++struct jbd_stats_proc_session {
++ journal_t *journal;
++ struct transaction_stats_s *stats;
++ int start;
++ int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++ struct transaction_stats_s *ts,
++ int first)
++{
++ if (ts == s->stats + s->max)
++ ts = s->stats;
++ if (!first && ts == s->stats + s->start)
++ return NULL;
++ while (ts->ts_type == 0) {
++ ts++;
++ if (ts == s->stats + s->max)
++ ts = s->stats;
++ if (ts == s->stats + s->start)
++ return NULL;
++ }
++ return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ struct transaction_stats_s *ts;
++ int l = *pos;
++
++ if (l == 0)
++ return SEQ_START_TOKEN;
++ ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++ if (!ts)
++ return NULL;
++ while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++ return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ struct transaction_stats_s *ts = v;
++
++ ++*pos;
++ if (v == SEQ_START_TOKEN)
++ return jbd_history_skip_empty(s, s->stats + s->start, 1);
++ else
++ return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++ struct transaction_stats_s *ts = v;
++ if (v == SEQ_START_TOKEN) {
++ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++ "wait", "run", "lock", "flush", "log", "hndls",
++ "block", "inlog", "ctime", "write", "drop",
++ "close");
++ return 0;
++ }
++ if (ts->ts_type == JBD_STATS_RUN)
++ seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++ ts->ts_wait, ts->ts_running, ts->ts_locked,
++ ts->ts_flushing, ts->ts_logging,
++ ts->ts_handle_count, ts->ts_blocks,
++ ts->ts_blocks_logged);
++ else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++ seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++ "C", ts->ts_tid, " ", ts->ts_chp_time,
++ ts->ts_written, ts->ts_dropped,
++ ts->ts_forced_to_close);
++ else
++ J_ASSERT(0);
++ return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++ .start = jbd_seq_history_start,
++ .next = jbd_seq_history_next,
++ .stop = jbd_seq_history_stop,
++ .show = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++ journal_t *journal = PDE(inode)->data;
++ struct jbd_stats_proc_session *s;
++ int rc, size;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL)
++ return -EIO;
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ s->stats = kmalloc(size, GFP_KERNEL);
++ if (s->stats == NULL) {
++ kfree(s);
++ return -EIO;
++ }
++ spin_lock(&journal->j_history_lock);
++ memcpy(s->stats, journal->j_history, size);
++ s->max = journal->j_history_max;
++ s->start = journal->j_history_cur % s->max;
++ spin_unlock(&journal->j_history_lock);
++
++ rc = seq_open(file, &jbd_seq_history_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = s;
++ } else {
++ kfree(s->stats);
++ kfree(s);
++ }
++ return rc;
++
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = (struct seq_file *)file->private_data;
++ struct jbd_stats_proc_session *s = seq->private;
++ kfree(s->stats);
++ kfree(s);
++ return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++ .owner = THIS_MODULE,
++ .open = jbd_seq_history_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++ return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++ struct jbd_stats_proc_session *s = seq->private;
++ if (v != SEQ_START_TOKEN)
++ return 0;
++ seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++ s->stats->ts_tid,
++ s->journal->j_max_transaction_buffers);
++ if (s->stats->ts_tid == 0)
++ return 0;
++ seq_printf(seq, "average: \n %lums waiting for transaction\n",
++ s->stats->ts_wait / s->stats->ts_tid);
++ seq_printf(seq, " %lums running transaction\n",
++ s->stats->ts_running / s->stats->ts_tid);
++ seq_printf(seq, " %lums transaction was being locked\n",
++ s->stats->ts_locked / s->stats->ts_tid);
++ seq_printf(seq, " %lums flushing data (in ordered mode)\n",
++ s->stats->ts_flushing / s->stats->ts_tid);
++ seq_printf(seq, " %lums logging transaction\n",
++ s->stats->ts_logging / s->stats->ts_tid);
++ seq_printf(seq, " %lu handles per transaction\n",
++ s->stats->ts_handle_count / s->stats->ts_tid);
++ seq_printf(seq, " %lu blocks per transaction\n",
++ s->stats->ts_blocks / s->stats->ts_tid);
++ seq_printf(seq, " %lu logged blocks per transaction\n",
++ s->stats->ts_blocks_logged / s->stats->ts_tid);
++ return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++ .start = jbd_seq_info_start,
++ .next = jbd_seq_info_next,
++ .stop = jbd_seq_info_stop,
++ .show = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++ journal_t *journal = PDE(inode)->data;
++ struct jbd_stats_proc_session *s;
++ int rc, size;
++
++ s = kmalloc(sizeof(*s), GFP_KERNEL);
++ if (s == NULL)
++ return -EIO;
++ size = sizeof(struct transaction_stats_s);
++ s->stats = kmalloc(size, GFP_KERNEL);
++ if (s->stats == NULL) {
++ kfree(s);
++ return -EIO;
++ }
++ spin_lock(&journal->j_history_lock);
++ memcpy(s->stats, &journal->j_stats, size);
++ s->journal = journal;
++ spin_unlock(&journal->j_history_lock);
++
++ rc = seq_open(file, &jbd_seq_info_ops);
++ if (rc == 0) {
++ struct seq_file *m = (struct seq_file *)file->private_data;
++ m->private = s;
++ } else {
++ kfree(s->stats);
++ kfree(s);
++ }
++ return rc;
++
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = (struct seq_file *)file->private_data;
++ struct jbd_stats_proc_session *s = seq->private;
++ kfree(s->stats);
++ kfree(s);
++ return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++ .owner = THIS_MODULE,
++ .open = jbd_seq_info_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++ char name[64];
++
++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++ journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++ if (journal->j_proc_entry) {
++ struct proc_dir_entry *p;
++ p = create_proc_entry("history", S_IRUGO,
++ journal->j_proc_entry);
++ if (p) {
++ p->proc_fops = &jbd_seq_history_fops;
++ p->data = journal;
++ p = create_proc_entry("info", S_IRUGO,
++ journal->j_proc_entry);
++ if (p) {
++ p->proc_fops = &jbd_seq_info_fops;
++ p->data = journal;
++ }
++ }
++ }
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++ char name[64];
++
++ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++ remove_proc_entry("info", journal->j_proc_entry);
++ remove_proc_entry("history", journal->j_proc_entry);
++ remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++ int size;
++
++ if (proc_jbd_stats == NULL)
++ return;
++
++ journal->j_history_max = 100;
++ size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++ journal->j_history = kmalloc(size, GFP_KERNEL);
++ if (journal->j_history == NULL) {
++ journal->j_history_max = 0;
++ return;
++ }
++ memset(journal->j_history, 0, size);
++ spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+ * Management for journal control blocks: functions to create and
+ * destroy journal_t structures, and to initialise and read existing
+@@ -685,6 +980,9 @@ static journal_t * journal_init_common (
+ kfree(journal);
+ goto fail;
+ }
++
++ journal_init_stats(journal);
++
+ return journal;
+ fail:
+ return NULL;
+@@ -739,6 +1037,7 @@ journal_t * journal_init_dev(struct bloc
+ journal->j_fs_dev = fs_dev;
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
++ jbd_stats_proc_init(journal);
+
+ bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+ J_ASSERT(bh != NULL);
+@@ -777,6 +1076,7 @@ journal_t * journal_init_inode (struct i
+
+ journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ journal->j_blocksize = inode->i_sb->s_blocksize;
++ jbd_stats_proc_init(journal);
+
+ /* journal descriptor can store up to n blocks -bzzz */
+ n = journal->j_blocksize / sizeof(journal_block_tag_t);
+@@ -1164,6 +1464,8 @@ void journal_destroy(journal_t *journal)
+ brelse(journal->j_sb_buffer);
+ }
+
++ if (journal->j_proc_entry)
++ jbd_stats_proc_exit(journal);
+ if (journal->j_inode)
+ iput(journal->j_inode);
+ if (journal->j_revoke)
+@@ -2003,6 +2305,28 @@ static void __exit remove_jbd_proc_entry
+
+ #endif
+
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++ proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++ if (proc_jbd_stats)
++ remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ struct kmem_cache *jbd_handle_cache;
+
+ static int __init journal_init_handle_cache(void)
+@@ -2060,6 +2384,7 @@ static int __init journal_init(void)
+ if (ret != 0)
+ journal_destroy_caches();
+ create_jbd_proc_entry();
++ create_jbd_stats_proc_entry();
+ return ret;
+ }
+
+@@ -2071,6 +2396,7 @@ static void __exit journal_exit(void)
+ printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+ remove_jbd_proc_entry();
++ remove_jbd_stats_proc_entry();
+ journal_destroy_caches();
+ }
+
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/checkpoint.c 2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/checkpoint.c 2008-02-12 19:15:28.000000000 +0100
+@@ -232,7 +232,7 @@ __flush_batch(journal_t *journal, struct
+ * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+ */
+ static int __process_buffer(journal_t *journal, struct journal_head *jh,
+- struct buffer_head **bhs, int *batch_count)
++ struct buffer_head **bhs, int *batch_count, transaction_t *transaction)
+ {
+ struct buffer_head *bh = jh2bh(jh);
+ int ret = 0;
+@@ -250,6 +250,7 @@ static int __process_buffer(journal_t *j
+ transaction_t *t = jh->b_transaction;
+ tid_t tid = t->t_tid;
+
++ transaction->t_chp_stats.cs_forced_to_close++;
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ log_start_commit(journal, tid);
+@@ -279,6 +280,7 @@ static int __process_buffer(journal_t *j
+ bhs[*batch_count] = bh;
+ __buffer_relink_io(jh);
+ jbd_unlock_bh_state(bh);
++ transaction->t_chp_stats.cs_written++;
+ (*batch_count)++;
+ if (*batch_count == NR_BATCH) {
+ spin_unlock(&journal->j_list_lock);
+@@ -322,6 +324,8 @@ int log_do_checkpoint(journal_t *journal
+ if (!journal->j_checkpoint_transactions)
+ goto out;
+ transaction = journal->j_checkpoint_transactions;
++ if (transaction->t_chp_stats.cs_chp_time == 0)
++ transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+ this_tid = transaction->t_tid;
+ restart:
+ /*
+@@ -346,7 +350,8 @@ restart:
+ retry = 1;
+ break;
+ }
+- retry = __process_buffer(journal, jh, bhs,&batch_count);
++ retry = __process_buffer(journal, jh, bhs,&batch_count,
++ transaction);
+ if (!retry && lock_need_resched(&journal->j_list_lock)){
+ spin_unlock(&journal->j_list_lock);
+ retry = 1;
+@@ -668,6 +673,8 @@ void __journal_insert_checkpoint(struct
+
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++ struct transaction_stats_s stats;
++
+ assert_spin_locked(&journal->j_list_lock);
+ if (transaction->t_cpnext) {
+ transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -694,5 +701,25 @@ void __journal_drop_transaction(journal_
+ J_ASSERT(journal->j_running_transaction != transaction);
+
+ jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++ /*
++ * File the transaction for history
++ */
++ if (transaction->t_chp_stats.cs_written != 0 ||
++ transaction->t_chp_stats.cs_chp_time != 0) {
++ stats.ts_type = JBD_STATS_CHECKPOINT;
++ stats.ts_tid = transaction->t_tid;
++ stats.u.chp = transaction->t_chp_stats;
++ if (stats.ts_chp_time)
++ stats.ts_chp_time =
++ jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++ spin_lock(&journal->j_history_lock);
++ memcpy(journal->j_history + journal->j_history_cur, &stats,
++ sizeof(stats));
++ if (++journal->j_history_cur == journal->j_history_max)
++ journal->j_history_cur = 0;
++ spin_unlock(&journal->j_history_lock);
++ }
++
+ kfree(transaction);
+ }
+Index: linux-2.6.22.18.patch.lustre.1.6/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.22.18.patch.lustre.1.6.orig/fs/jbd/commit.c 2008-02-12 19:08:14.000000000 +0100
++++ linux-2.6.22.18.patch.lustre.1.6/fs/jbd/commit.c 2008-02-12 19:15:28.000000000 +0100
+@@ -13,6 +13,7 @@
+ * part of the ext2fs journaling system.
+ */
+
++#include <linux/jiffies.h>
+ #include <linux/time.h>
+ #include <linux/fs.h>
+ #include <linux/jbd.h>
+@@ -21,6 +22,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+
++
+ /*
+ * Default IO end handler for temporary BJ_IO buffer_heads.
+ */
+@@ -282,6 +284,7 @@ write_out_data:
+ */
+ void journal_commit_transaction(journal_t *journal)
+ {
++ struct transaction_stats_s stats;
+ transaction_t *commit_transaction;
+ struct journal_head *jh, *new_jh, *descriptor;
+ struct buffer_head **wbuf = journal->j_wbuf;
+@@ -328,6 +331,11 @@ void journal_commit_transaction(journal_
+ spin_lock(&journal->j_state_lock);
+ commit_transaction->t_state = T_LOCKED;
+
++ stats.ts_wait = commit_transaction->t_max_wait;
++ stats.ts_locked = CURRENT_MSECS;
++ stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++ stats.ts_locked);
++
+ spin_lock(&commit_transaction->t_handle_lock);
+ while (commit_transaction->t_updates) {
+ DEFINE_WAIT(wait);
+@@ -398,6 +406,9 @@ void journal_commit_transaction(journal_
+ */
+ journal_switch_revoke_table(journal);
+
++ stats.ts_flushing = CURRENT_MSECS;
++ stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+ commit_transaction->t_state = T_FLUSH;
+ journal->j_committing_transaction = commit_transaction;
+ journal->j_running_transaction = NULL;
+@@ -489,6 +500,11 @@ void journal_commit_transaction(journal_
+ */
+ commit_transaction->t_state = T_COMMIT;
+
++ stats.ts_logging = CURRENT_MSECS;
++ stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++ stats.ts_blocks = commit_transaction->t_outstanding_credits;
++ stats.ts_blocks_logged = 0;
++
+ descriptor = NULL;
+ bufs = 0;
+ while (commit_transaction->t_buffers) {
+@@ -637,6 +653,7 @@ start_journal_io:
+ submit_bh(WRITE, bh);
+ }
+ cond_resched();
++ stats.ts_blocks_logged += bufs;
+
+ /* Force a new descriptor to be generated next
+ time round the loop. */
+@@ -831,6 +848,7 @@ restart_loop:
+ cp_transaction = jh->b_cp_transaction;
+ if (cp_transaction) {
+ JBUFFER_TRACE(jh, "remove from old cp transaction");
++ cp_transaction->t_chp_stats.cs_dropped++;
+ __journal_remove_checkpoint(jh);
+ }
+
+@@ -905,6 +923,36 @@ restart_loop:
+
+ J_ASSERT(commit_transaction->t_state == T_COMMIT);
+
++ commit_transaction->t_start = CURRENT_MSECS;
++ stats.ts_logging = jbd_time_diff(stats.ts_logging,
++ commit_transaction->t_start);
++
++ /*
++ * File the transaction for history
++ */
++ stats.ts_type = JBD_STATS_RUN;
++ stats.ts_tid = commit_transaction->t_tid;
++ stats.ts_handle_count = commit_transaction->t_handle_count;
++ spin_lock(&journal->j_history_lock);
++ memcpy(journal->j_history + journal->j_history_cur, &stats,
++ sizeof(stats));
++ if (++journal->j_history_cur == journal->j_history_max)
++ journal->j_history_cur = 0;
++
++ /*
++ * Calculate overall stats
++ */
++ journal->j_stats.ts_tid++;
++ journal->j_stats.ts_wait += stats.ts_wait;
++ journal->j_stats.ts_running += stats.ts_running;
++ journal->j_stats.ts_locked += stats.ts_locked;
++ journal->j_stats.ts_flushing += stats.ts_flushing;
++ journal->j_stats.ts_logging += stats.ts_logging;
++ journal->j_stats.ts_handle_count += stats.ts_handle_count;
++ journal->j_stats.ts_blocks += stats.ts_blocks;
++ journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++ spin_unlock(&journal->j_history_lock);
++
+ commit_transaction->t_state = T_FINISHED;
+ J_ASSERT(commit_transaction == journal->j_committing_transaction);
+ journal->j_commit_sequence = commit_transaction->t_tid;
Modified: trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch?rev=435&op=diff
==============================================================================
--- trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch (original)
+++ trunk/lustre/kernel_patches/patches/sd_iostats-2.6.22.patch Wed Feb 13 12:23:22 2008
@@ -1,8 +1,8 @@
-Index: linux-2.6.22-rc4/drivers/scsi/Kconfig
+Index: linux-2.6.22/drivers/scsi/Kconfig
===================================================================
---- linux-2.6.22-rc4.orig/drivers/scsi/Kconfig 2007-06-11 20:23:32.000000000 +0200
-+++ linux-2.6.22-rc4/drivers/scsi/Kconfig 2007-06-11 20:24:20.000000000 +0200
-@@ -75,6 +75,14 @@
+--- linux-2.6.22.orig/drivers/scsi/Kconfig 2007-09-10 16:19:54.000000000 +0200
++++ linux-2.6.22/drivers/scsi/Kconfig 2007-09-10 16:19:56.000000000 +0200
+@@ -76,6 +76,14 @@
In this case, do not compile the driver for your SCSI host adapter
(below) as a module either.
@@ -17,10 +17,10 @@
config CHR_DEV_ST
tristate "SCSI tape support"
depends on SCSI
-Index: linux-2.6.22-rc4/drivers/scsi/sd.c
+Index: linux-2.6.22/drivers/scsi/sd.c
===================================================================
---- linux-2.6.22-rc4.orig/drivers/scsi/sd.c 2007-06-11 20:23:32.000000000 +0200
-+++ linux-2.6.22-rc4/drivers/scsi/sd.c 2007-06-11 20:33:35.000000000 +0200
+--- linux-2.6.22.orig/drivers/scsi/sd.c 2007-09-10 16:19:54.000000000 +0200
++++ linux-2.6.22/drivers/scsi/sd.c 2007-09-10 16:19:56.000000000 +0200
@@ -244,6 +244,38 @@
.issue_flush = sd_issue_flush,
};
@@ -431,10 +431,10 @@
scsi_unregister_driver(&sd_template.gendrv);
class_unregister(&sd_disk_class);
-Index: linux-2.6.22-rc4/drivers/scsi/scsi_proc.c
+Index: linux-2.6.22/drivers/scsi/scsi_proc.c
===================================================================
---- linux-2.6.22-rc4.orig/drivers/scsi/scsi_proc.c 2007-06-11 20:23:32.000000000 +0200
-+++ linux-2.6.22-rc4/drivers/scsi/scsi_proc.c 2007-06-11 20:24:20.000000000 +0200
+--- linux-2.6.22.orig/drivers/scsi/scsi_proc.c 2007-09-10 16:19:54.000000000 +0200
++++ linux-2.6.22/drivers/scsi/scsi_proc.c 2007-09-10 16:19:56.000000000 +0200
@@ -40,7 +40,8 @@
/* 4K page size, but our output routines, use some slack for overruns */
#define PROC_BLOCK_SIZE (3*1024)
Modified: trunk/lustre/kernel_patches/series/2.6.20-vanilla.series
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/series/2.6.20-vanilla.series?rev=435&op=diff
==============================================================================
--- trunk/lustre/kernel_patches/series/2.6.20-vanilla.series (original)
+++ trunk/lustre/kernel_patches/series/2.6.20-vanilla.series Wed Feb 13 12:23:22 2008
@@ -11,3 +11,5 @@
export-show_task-2.6.18-vanilla.patch
sd_iostats-2.6.20.patch
LDISKFS_SUPER_MAGIC-2.6.20.patch
+2.6.20_backported_from_2.6.22-rc4-x86_64-mm-unwinder.patch
+generic_file_buffered_write_backport_2.6.20.patch
Modified: trunk/lustre/kernel_patches/series/2.6.22-vanilla.series
URL: http://svn.debian.org/wsvn/pkg-lustre/trunk/lustre/kernel_patches/series/2.6.22-vanilla.series?rev=435&op=diff
==============================================================================
--- trunk/lustre/kernel_patches/series/2.6.22-vanilla.series (original)
+++ trunk/lustre/kernel_patches/series/2.6.22-vanilla.series Wed Feb 13 12:23:22 2008
@@ -11,3 +11,4 @@
export-show_task-2.6.18-vanilla.patch
sd_iostats-2.6.22.patch
LDISKFS_SUPER_MAGIC-2.6.20.patch
+jbd-stats-2.6.22-vanilla-from-l1642_2.6-rhel5.patch
More information about the Pkg-lustre-svn-commit
mailing list