debian/0000775000000000000000000000000012154641757007204 5ustar debian/libxenomai1.dirs0000664000000000000000000000007411373562011012263 0ustar etc/modprobe.d etc/udev/rules.d usr/share/lintian/overrides debian/libxenomai1.udev0000644000000000000000000000026512154641601012266 0ustar # real-time heap device (Xenomai:rtheap) KERNEL=="rtheap", MODE="0660", GROUP=="xenomai" # real-time pipe devices (Xenomai:rtpipe) KERNEL=="rtp[0-9]*", MODE="0660", GROUP="xenomai" debian/patches/0000775000000000000000000000000012142752772010630 5ustar debian/patches/series0000664000000000000000000000007612142753004012035 0ustar 01_support_debian_kernel_2.6.32.patch 02_fix_powerpcspe.patch debian/patches/02_fix_powerpcspe.patch0000664000000000000000000000276112142752744015214 0ustar --- xenomai-2.6.2.1.orig/ksrc/arch/powerpc/fpu.S +++ xenomai-2.6.2.1/ksrc/arch/powerpc/fpu.S @@ -64,6 +64,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601) /* r3 = &thread_struct (tcb->fpup) */ _GLOBAL(rthal_save_fpu) +#ifndef __NO_FPRS__ mfmsr r5 ori r5,r5,MSR_FP SYNC @@ -72,6 +73,7 @@ _GLOBAL(rthal_save_fpu) SAVE_32FPRS(0,r3) mffs fr0 stfd fr0,__FPSCR(r3) +#endif blr /* r3 = &thread_struct */ @@ -85,6 +87,7 @@ _GLOBAL(rthal_init_fpu) /* r3 = &thread_struct (tcb->fpup) */ _GLOBAL(rthal_restore_fpu) +#ifndef __NO_FPRS__ mfmsr r5 ori r5,r5,MSR_FP SYNC @@ -93,4 +96,5 @@ _GLOBAL(rthal_restore_fpu) lfd fr0,__FPSCR(r3) MTFSF_L(fr0) REST_32FPRS(0,r3) +#endif blr --- xenomai-2.6.2.1.orig/include/asm-powerpc/fptest.h +++ xenomai-2.6.2.1/include/asm-powerpc/fptest.h @@ -38,6 +38,7 @@ static inline void fp_linux_end(void) static inline void fp_regs_set(unsigned val) { +#ifndef __NO_FPRS__ uint64_t fpval = val; __asm__ __volatile__("lfd 0, %0\n" " fmr 1, 0\n" @@ -71,6 +72,7 @@ static inline void fp_regs_set(unsigned " fmr 29, 0\n" " fmr 30, 0\n" " fmr 31, 0\n"::"m"(fpval)); +#endif } #define FPTEST_REGVAL(n) { \ @@ -82,6 +84,7 @@ static inline void fp_regs_set(unsigned static inline unsigned fp_regs_check(unsigned val) { unsigned i, result = val; +#ifndef __NO_FPRS__ uint32_t e[32]; FPTEST_REGVAL(0); @@ -123,6 +126,7 @@ static inline unsigned fp_regs_check(uns result = e[i]; } +#endif return result; } debian/patches/01_support_debian_kernel_2.6.32.patch0000664000000000000000000247351411440435310017341 0ustar Description: Support for Debian kernel 2.6.32 This patch is necessary for Xenomai to support the Debian kernel 2.6.32 in squeeze. Author: Roland Stigge --- /dev/null +++ xenomai-2.5.4/ksrc/arch/x86/patches/adeos-ipipe-2.6.32-x86-2.7-01.patch @@ -0,0 +1,11490 @@ +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index fbc161d..966de95 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -480,6 +480,7 @@ config SCHED_OMIT_FRAME_POINTER + + menuconfig PARAVIRT_GUEST + bool "Paravirtualized guest support" ++ depends on !IPIPE + ---help--- + Say Y here to get to see options related to running Linux under + various hypervisors. This option alone does not add any kernel code. +@@ -531,6 +532,7 @@ source "arch/x86/lguest/Kconfig" + + config PARAVIRT + bool "Enable paravirtualization code" ++ depends on !IPIPE + ---help--- + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly +@@ -750,6 +752,8 @@ config SCHED_MC + + source "kernel/Kconfig.preempt" + ++source "kernel/ipipe/Kconfig" ++ + config X86_UP_APIC + bool "Local APIC support on uniprocessors" + depends on X86_32 && !SMP && !X86_32_NON_STANDARD +diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h +index 474d80d..0b33b55 100644 +--- a/arch/x86/include/asm/apic.h ++++ b/arch/x86/include/asm/apic.h +@@ -404,7 +404,13 @@ static inline u32 safe_apic_wait_icr_idle(void) + } + + ++#ifdef CONFIG_IPIPE ++#define ack_APIC_irq() do { } while(0) ++static inline void __ack_APIC_irq(void) ++#else /* !CONFIG_IPIPE */ ++#define __ack_APIC_irq() ack_APIC_irq() + static inline void ack_APIC_irq(void) ++#endif /* CONFIG_IPIPE */ + { + #ifdef CONFIG_X86_LOCAL_APIC + /* +diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h +index 3b62da9..855534f 100644 +--- a/arch/x86/include/asm/apicdef.h ++++ b/arch/x86/include/asm/apicdef.h +@@ -143,6 +143,7 @@ + # define MAX_LOCAL_APIC 32768 + #endif + ++#ifndef __ASSEMBLY__ + /* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. +@@ -418,4 +419,7 @@ struct local_apic { + #else + #define BAD_APICID 0xFFFFu + #endif ++ ++#endif /* !__ASSEMBLY__ */ ++ + #endif /* _ASM_X86_APICDEF_H */ +diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h +index f5693c8..b45303a 100644 +--- a/arch/x86/include/asm/entry_arch.h ++++ b/arch/x86/include/asm/entry_arch.h +@@ -22,6 +22,7 @@ BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1, + smp_invalidate_interrupt) + BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2, + smp_invalidate_interrupt) ++#ifndef CONFIG_IPIPE + BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3, + smp_invalidate_interrupt) + BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4, +@@ -32,6 +33,7 @@ BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6, + smp_invalidate_interrupt) + BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, + smp_invalidate_interrupt) ++#endif /* !CONFIG_IPIPE */ + #endif + + BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) +diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h +index ba180d9..6a7c6bc 100644 +--- a/arch/x86/include/asm/hw_irq.h ++++ b/arch/x86/include/asm/hw_irq.h +@@ -35,6 +35,13 @@ extern void spurious_interrupt(void); + extern void thermal_interrupt(void); + extern void reschedule_interrupt(void); + extern void mce_self_interrupt(void); ++#ifdef CONFIG_IPIPE ++void ipipe_ipi0(void); ++void ipipe_ipi1(void); ++void ipipe_ipi2(void); ++void ipipe_ipi3(void); ++void ipipe_ipiX(void); ++#endif + + extern void invalidate_interrupt(void); + extern void invalidate_interrupt0(void); +@@ -115,6 +122,7 @@ extern void smp_invalidate_interrupt(struct pt_regs *); + #else + extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); + #endif ++extern asmlinkage void smp_reboot_interrupt(void); + #endif + + extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); +diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h +index 0b20bbb..b8a7638 100644 +--- a/arch/x86/include/asm/i387.h ++++ b/arch/x86/include/asm/i387.h +@@ -289,11 +289,14 @@ static inline void __clear_fpu(struct task_struct *tsk) + static inline void kernel_fpu_begin(void) + { + struct thread_info *me = current_thread_info(); ++ unsigned long flags; + preempt_disable(); ++ local_irq_save_hw_cond(flags); + if (me->status & TS_USEDFPU) + __save_init_fpu(me->task); + else + clts(); ++ local_irq_restore_hw_cond(flags); + } + + static inline void kernel_fpu_end(void) +diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h +index 58d7091..ac8bd15 100644 +--- a/arch/x86/include/asm/i8259.h ++++ b/arch/x86/include/asm/i8259.h +@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; + #define SLAVE_ICW4_DEFAULT 0x01 + #define PIC_ICW4_AEOI 2 + +-extern spinlock_t i8259A_lock; ++extern ipipe_spinlock_t i8259A_lock; + + extern void init_8259A(int auto_eoi); + extern void enable_8259A_irq(unsigned int irq); +diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h +index 0b72282..6574056 100644 +--- a/arch/x86/include/asm/ipi.h ++++ b/arch/x86/include/asm/ipi.h +@@ -68,6 +68,9 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest + * to the APIC. + */ + unsigned int cfg; ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); + + /* + * Wait for idle. +@@ -83,6 +86,8 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest + * Send the IPI. The write to APIC_ICR fires this off. + */ + native_apic_mem_write(APIC_ICR, cfg); ++ ++ local_irq_restore_hw(flags); + } + + /* +diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h +new file mode 100644 +index 0000000..521885c +--- /dev/null ++++ b/arch/x86/include/asm/ipipe.h +@@ -0,0 +1,158 @@ ++/* -*- linux-c -*- ++ * arch/x86/include/asm/ipipe.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __X86_IPIPE_H ++#define __X86_IPIPE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#ifndef IPIPE_ARCH_STRING ++#define IPIPE_ARCH_STRING "2.7-02" ++#define IPIPE_MAJOR_NUMBER 2 ++#define IPIPE_MINOR_NUMBER 7 ++#define IPIPE_PATCH_NUMBER 2 ++#endif ++ ++DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs); ++ ++DECLARE_PER_CPU(unsigned long, __ipipe_cr2); ++ ++static inline unsigned __ipipe_get_irq_vector(int irq) ++{ ++#ifdef CONFIG_X86_IO_APIC ++ unsigned __ipipe_get_ioapic_irq_vector(int irq); ++ return __ipipe_get_ioapic_irq_vector(irq); ++#elif defined(CONFIG_X86_LOCAL_APIC) ++ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ? ++ ipipe_apic_irq_vector(irq) : irq + IRQ0_VECTOR; ++#else ++ return irq + IRQ0_VECTOR; ++#endif ++} ++ ++#ifdef CONFIG_X86_32 ++# include "ipipe_32.h" ++#else ++# include "ipipe_64.h" ++#endif ++ ++/* ++ * The logical processor id and the current Linux task are read from the PDA, ++ * so this is always safe, regardless of the underlying stack. ++ */ ++#define ipipe_processor_id() raw_smp_processor_id() ++#define ipipe_safe_current() current ++ ++#define prepare_arch_switch(next) \ ++do { \ ++ ipipe_schedule_notify(current, next); \ ++ local_irq_disable_hw(); \ ++} while(0) ++ ++#define task_hijacked(p) \ ++ ({ int x = __ipipe_root_domain_p; \ ++ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status)); \ ++ if (x) local_irq_enable_hw(); !x; }) ++ ++struct ipipe_domain; ++ ++struct ipipe_sysinfo { ++ ++ int ncpus; /* Number of CPUs on board */ ++ u64 cpufreq; /* CPU frequency (in Hz) */ ++ ++ /* Arch-dependent block */ ++ ++ struct { ++ unsigned tmirq; /* Timer tick IRQ */ ++ u64 tmfreq; /* Timer frequency */ ++ } archdep; ++}; ++ ++/* Private interface -- Internal use only */ ++ ++#define __ipipe_check_platform() do { } while(0) ++#define __ipipe_init_platform() do { } while(0) ++#define __ipipe_enable_irq(irq) irq_to_desc(irq)->chip->enable(irq) ++#define __ipipe_disable_irq(irq) irq_to_desc(irq)->chip->disable(irq) ++ ++#ifdef CONFIG_SMP ++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); ++#else ++#define __ipipe_hook_critical_ipi(ipd) do { } while(0) ++#endif ++ ++#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0) ++ ++void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq); ++ ++void __ipipe_enable_pipeline(void); ++ ++void __ipipe_do_critical_sync(unsigned irq, void *cookie); ++ ++void __ipipe_serial_debug(const char *fmt, ...); ++ ++extern int __ipipe_tick_irq; ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++#define ipipe_update_tick_evtdev(evtdev) \ ++ do { \ ++ if (strcmp((evtdev)->name, "lapic") == 0) \ ++ __ipipe_tick_irq = \ ++ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); \ ++ else \ ++ __ipipe_tick_irq = 0; \ ++ } while (0) ++#else ++#define ipipe_update_tick_evtdev(evtdev) \ ++ __ipipe_tick_irq = 0 ++#endif ++ ++int __ipipe_check_lapic(void); ++ ++int __ipipe_check_tickdev(const char *devname); ++ ++#define __ipipe_syscall_watched_p(p, sc) \ ++ (ipipe_notifier_enabled_p(p) || (unsigned long)sc >= NR_syscalls) ++ ++#define __ipipe_root_tick_p(regs) ((regs)->flags & X86_EFLAGS_IF) ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_update_tick_evtdev(evtdev) do { } while (0) ++#define task_hijacked(p) 0 ++ ++#endif /* CONFIG_IPIPE */ ++ ++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) ++#define __ipipe_move_root_irq(irq) \ ++ do { \ ++ if (irq < NR_IRQS) { \ ++ struct irq_chip *chip = irq_to_desc(irq)->chip; \ ++ if (chip->move) \ ++ chip->move(irq); \ ++ } \ ++ } while (0) ++#else /* !(CONFIG_SMP && CONFIG_IPIPE) */ ++#define __ipipe_move_root_irq(irq) do { } while (0) ++#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */ ++ ++#endif /* !__X86_IPIPE_H */ +diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h +new file mode 100644 +index 0000000..8d1f4b5 +--- /dev/null ++++ b/arch/x86/include/asm/ipipe_32.h +@@ -0,0 +1,156 @@ ++/* -*- linux-c -*- ++ * arch/x86/include/asm/ipipe_32.h ++ * ++ * Copyright (C) 2002-2005 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __X86_IPIPE_32_H ++#define __X86_IPIPE_32_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=A" (t)) ++#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; }) ++ ++#define ipipe_tsc2ns(t) \ ++({ \ ++ unsigned long long delta = (t)*1000; \ ++ do_div(delta, cpu_khz/1000+1); \ ++ (unsigned long)delta; \ ++}) ++ ++#define ipipe_tsc2us(t) \ ++({ \ ++ unsigned long long delta = (t); \ ++ do_div(delta, cpu_khz/1000+1); \ ++ (unsigned long)delta; \ ++}) ++ ++/* Private interface -- Internal use only */ ++ ++int __ipipe_handle_irq(struct pt_regs *regs); ++ ++static inline unsigned long __ipipe_ffnz(unsigned long ul) ++{ ++ __asm__("bsrl %1, %0":"=r"(ul) ++ : "r"(ul)); ++ return ul; ++} ++ ++struct irq_desc; ++ ++void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); ++ ++void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); ++ ++static inline void __ipipe_call_root_xirq_handler(unsigned irq, ++ ipipe_irq_handler_t handler) ++{ ++ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs); ++ ++ regs->orig_ax = ~__ipipe_get_irq_vector(irq); ++ ++ __asm__ __volatile__("pushfl\n\t" ++ "pushl %%cs\n\t" ++ "pushl $__xirq_end\n\t" ++ "pushl %%eax\n\t" ++ "pushl %%gs\n\t" ++ "pushl %%fs\n\t" ++ "pushl %%es\n\t" ++ "pushl %%ds\n\t" ++ "pushl %%eax\n\t" ++ "pushl %%ebp\n\t" ++ "pushl %%edi\n\t" ++ "pushl %%esi\n\t" ++ "pushl %%edx\n\t" ++ "pushl %%ecx\n\t" ++ "pushl %%ebx\n\t" ++ "movl %2,%%eax\n\t" ++ "call *%1\n\t" ++ "jmp ret_from_intr\n\t" ++ "__xirq_end: cli\n" ++ : /* no output */ ++ : "a" (~irq), "r" (handler), "rm" (regs)); ++} ++ ++void irq_enter(void); ++void irq_exit(void); ++ ++static inline void __ipipe_call_root_virq_handler(unsigned irq, ++ ipipe_irq_handler_t handler, ++ void *cookie) ++{ ++ irq_enter(); ++ __asm__ __volatile__("pushfl\n\t" ++ "pushl %%cs\n\t" ++ "pushl $__virq_end\n\t" ++ "pushl $-1\n\t" ++ "pushl %%gs\n\t" ++ "pushl %%fs\n\t" ++ "pushl %%es\n\t" ++ "pushl %%ds\n\t" ++ "pushl %%eax\n\t" ++ "pushl %%ebp\n\t" ++ "pushl %%edi\n\t" ++ "pushl %%esi\n\t" ++ "pushl %%edx\n\t" ++ "pushl %%ecx\n\t" ++ "pushl %%ebx\n\t" ++ "pushl %2\n\t" ++ "pushl %%eax\n\t" ++ "call *%1\n\t" ++ "addl $8,%%esp\n" ++ : /* no output */ ++ : "a" (irq), "r" (handler), "d" (cookie)); ++ irq_exit(); ++ __asm__ __volatile__("jmp ret_from_intr\n\t" ++ "__virq_end: cli\n" ++ : /* no output */ ++ : /* no input */); ++} ++ ++/* ++ * When running handlers, enable hw interrupts for all domains but the ++ * one heading the pipeline, so that IRQs can never be significantly ++ * deferred for the latter. ++ */ ++#define __ipipe_run_isr(ipd, irq) \ ++do { \ ++ if (!__ipipe_pipeline_head_p(ipd)) \ ++ local_irq_enable_hw(); \ ++ if (ipd == ipipe_root_domain) { \ ++ if (likely(!ipipe_virtual_irq_p(irq))) \ ++ __ipipe_call_root_xirq_handler(irq, \ ++ ipd->irqs[irq].handler); \ ++ else \ ++ __ipipe_call_root_virq_handler(irq, \ ++ ipd->irqs[irq].handler, \ ++ ipd->irqs[irq].cookie); \ ++ } else { \ ++ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ ++ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ ++ } \ ++ local_irq_disable_hw(); \ ++} while(0) ++ ++#endif /* !__X86_IPIPE_32_H */ +diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h +new file mode 100644 +index 0000000..bc427b8 +--- /dev/null ++++ b/arch/x86/include/asm/ipipe_64.h +@@ -0,0 +1,161 @@ ++/* -*- linux-c -*- ++ * arch/x86/include/asm/ipipe_64.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __X86_IPIPE_64_H ++#define __X86_IPIPE_64_H ++ ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_SMP ++#include ++#include ++#endif ++ ++#define ipipe_read_tsc(t) do { \ ++ unsigned int __a,__d; \ ++ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ ++ (t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ ++} while(0) ++ ++extern unsigned cpu_khz; ++#define ipipe_cpu_freq() ({ unsigned long __freq = (1000UL * cpu_khz); __freq; }) ++#define ipipe_tsc2ns(t) (((t) * 1000UL) / (ipipe_cpu_freq() / 1000000UL)) ++#define ipipe_tsc2us(t) ((t) / (ipipe_cpu_freq() / 1000000UL)) ++ ++/* Private interface -- Internal use only */ ++ ++int __ipipe_handle_irq(struct pt_regs *regs); ++ ++static inline unsigned long __ipipe_ffnz(unsigned long ul) ++{ ++ __asm__("bsrq %1, %0":"=r"(ul) ++ : "rm"(ul)); ++ return ul; ++} ++ ++struct irq_desc; ++ ++void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); ++ ++void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); ++ ++static inline void __ipipe_call_root_xirq_handler(unsigned irq, ++ void (*handler)(unsigned, void *)) ++{ ++ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs); ++ ++ regs->orig_ax = ~__ipipe_get_irq_vector(irq); ++ ++ __asm__ __volatile__("movq %%rsp, %%rax\n\t" ++ "pushq $0\n\t" ++ "pushq %%rax\n\t" ++ "pushfq\n\t" ++ "pushq %[kernel_cs]\n\t" ++ "pushq $__xirq_end\n\t" ++ "pushq %[vector]\n\t" ++ "subq $9*8,%%rsp\n\t" ++ "movq %%rdi,8*8(%%rsp)\n\t" ++ "movq %%rsi,7*8(%%rsp)\n\t" ++ "movq %%rdx,6*8(%%rsp)\n\t" ++ "movq %%rcx,5*8(%%rsp)\n\t" ++ "movq %%rax,4*8(%%rsp)\n\t" ++ "movq %%r8,3*8(%%rsp)\n\t" ++ "movq %%r9,2*8(%%rsp)\n\t" ++ "movq %%r10,1*8(%%rsp)\n\t" ++ "movq %%r11,(%%rsp)\n\t" ++ "call *%[handler]\n\t" ++ "cli\n\t" ++ "jmp exit_intr\n\t" ++ "__xirq_end: cli\n" ++ : /* no output */ ++ : [kernel_cs] "i" (__KERNEL_CS), ++ [vector] "rm" (regs->orig_ax), ++ [handler] "r" (handler), "D" (regs) ++ : "rax"); ++} ++ ++void irq_enter(void); ++void irq_exit(void); ++ ++static inline void __ipipe_call_root_virq_handler(unsigned irq, ++ void (*handler)(unsigned, void *), ++ void *cookie) ++{ ++ irq_enter(); ++ __asm__ __volatile__("movq %%rsp, %%rax\n\t" ++ "pushq $0\n\t" ++ "pushq %%rax\n\t" ++ "pushfq\n\t" ++ "pushq %[kernel_cs]\n\t" ++ "pushq $__virq_end\n\t" ++ "pushq $-1\n\t" ++ "subq $9*8,%%rsp\n\t" ++ "movq %%rdi,8*8(%%rsp)\n\t" ++ "movq %%rsi,7*8(%%rsp)\n\t" ++ "movq %%rdx,6*8(%%rsp)\n\t" ++ "movq %%rcx,5*8(%%rsp)\n\t" ++ "movq %%rax,4*8(%%rsp)\n\t" ++ "movq %%r8,3*8(%%rsp)\n\t" ++ "movq %%r9,2*8(%%rsp)\n\t" ++ "movq %%r10,1*8(%%rsp)\n\t" ++ "movq %%r11,(%%rsp)\n\t" ++ "call *%[handler]\n\t" ++ : /* no output */ ++ : [kernel_cs] "i" (__KERNEL_CS), ++ [handler] "r" (handler), "D" (irq), "S" (cookie) ++ : "rax"); ++ irq_exit(); ++ __asm__ __volatile__("cli\n\t" ++ "jmp exit_intr\n\t" ++ "__virq_end: cli\n" ++ : /* no output */ ++ : /* no input */); ++} ++ ++/* ++ * When running handlers, enable hw interrupts for all domains but the ++ * one heading the pipeline, so that IRQs can never be significantly ++ * deferred for the latter. ++ */ ++#define __ipipe_run_isr(ipd, irq) \ ++ do { \ ++ if (!__ipipe_pipeline_head_p(ipd)) \ ++ local_irq_enable_hw(); \ ++ if (ipd == ipipe_root_domain) { \ ++ if (likely(!ipipe_virtual_irq_p(irq))) \ ++ __ipipe_call_root_xirq_handler( \ ++ irq, (ipd)->irqs[irq].handler); \ ++ else \ ++ __ipipe_call_root_virq_handler( \ ++ irq, (ipd)->irqs[irq].handler, \ ++ (ipd)->irqs[irq].cookie); \ ++ } else { \ ++ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ ++ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ ++ } \ ++ local_irq_disable_hw(); \ ++ } while(0) ++ ++#endif /* !__X86_IPIPE_64_H */ +diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h +new file mode 100644 +index 0000000..ca6596b +--- /dev/null ++++ b/arch/x86/include/asm/ipipe_base.h +@@ -0,0 +1,211 @@ ++/* -*- linux-c -*- ++ * arch/x86/include/asm/ipipe_base.h ++ * ++ * Copyright (C) 2007-2009 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __X86_IPIPE_BASE_H ++#define __X86_IPIPE_BASE_H ++ ++#include ++#include ++#include ++ ++#ifdef CONFIG_X86_32 ++#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */ ++#else ++#define IPIPE_NR_FAULTS 32 ++#endif ++ ++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) ++/* ++ * System interrupts are mapped beyond the last defined external IRQ ++ * number. ++ */ ++#define IPIPE_NR_XIRQS (NR_IRQS + 32) ++#define IPIPE_FIRST_APIC_IRQ NR_IRQS ++#define IPIPE_SERVICE_VECTOR0 (INVALIDATE_TLB_VECTOR_END + 1) ++#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0) ++#define IPIPE_SERVICE_VECTOR1 (INVALIDATE_TLB_VECTOR_END + 2) ++#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1) ++#define IPIPE_SERVICE_VECTOR2 (INVALIDATE_TLB_VECTOR_END + 3) ++#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2) ++#define IPIPE_SERVICE_VECTOR3 (INVALIDATE_TLB_VECTOR_END + 4) ++#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3) ++#ifdef CONFIG_SMP ++#define IPIPE_CRITICAL_VECTOR (INVALIDATE_TLB_VECTOR_END + 5) ++#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR) ++#endif ++#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR) ++#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ) ++#else /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */ ++#define IPIPE_NR_XIRQS NR_IRQS ++#endif /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */ ++ ++/* Pseudo-vectors used for kernel events */ ++#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS ++#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) ++#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) ++#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) ++#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) ++#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) ++#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) ++#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) ++#define IPIPE_EVENT_RETURN (IPIPE_FIRST_EVENT + 7) ++#define IPIPE_LAST_EVENT IPIPE_EVENT_RETURN ++#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) ++ ++#define ex_do_divide_error 0 ++#define ex_do_debug 1 ++/* NMI not pipelined. */ ++#define ex_do_int3 3 ++#define ex_do_overflow 4 ++#define ex_do_bounds 5 ++#define ex_do_invalid_op 6 ++#define ex_do_device_not_available 7 ++/* Double fault not pipelined. */ ++#define ex_do_coprocessor_segment_overrun 9 ++#define ex_do_invalid_TSS 10 ++#define ex_do_segment_not_present 11 ++#define ex_do_stack_segment 12 ++#define ex_do_general_protection 13 ++#define ex_do_page_fault 14 ++#define ex_do_spurious_interrupt_bug 15 ++#define ex_do_coprocessor_error 16 ++#define ex_do_alignment_check 17 ++#define ex_machine_check_vector 18 ++#define ex_reserved ex_machine_check_vector ++#define ex_do_simd_coprocessor_error 19 ++#define ex_do_iret_error 32 ++ ++#ifndef __ASSEMBLY__ ++ ++#ifdef CONFIG_SMP ++ ++#include ++ ++#ifdef CONFIG_X86_32 ++#define GET_ROOT_STATUS_ADDR \ ++ "pushfl; cli;" \ ++ "movl %%fs:per_cpu__this_cpu_off, %%eax;" \ ++ "lea per_cpu__ipipe_percpu_darray(%%eax), %%eax;" ++#define PUT_ROOT_STATUS_ADDR "popfl;" ++#define TEST_AND_SET_ROOT_STATUS \ ++ "btsl $0,(%%eax);" ++#define TEST_ROOT_STATUS \ ++ "btl $0,(%%eax);" ++#define ROOT_TEST_CLOBBER_LIST "eax" ++#else /* CONFIG_X86_64 */ ++#define GET_ROOT_STATUS_ADDR \ ++ "pushfq; cli;" \ ++ "movq %%gs:per_cpu__this_cpu_off, %%rax;" \ ++ "lea per_cpu__ipipe_percpu_darray(%%rax), %%rax;" ++#define PUT_ROOT_STATUS_ADDR "popfq;" ++#define TEST_AND_SET_ROOT_STATUS \ ++ "btsl $0,(%%rax);" ++#define TEST_ROOT_STATUS \ ++ "btl $0,(%%rax);" ++#define ROOT_TEST_CLOBBER_LIST "rax" ++#endif /* CONFIG_X86_64 */ ++ ++static inline void __ipipe_stall_root(void) ++{ ++ __asm__ __volatile__(GET_ROOT_STATUS_ADDR ++ LOCK_PREFIX ++ TEST_AND_SET_ROOT_STATUS ++ PUT_ROOT_STATUS_ADDR ++ : : : ROOT_TEST_CLOBBER_LIST, "memory"); ++} ++ ++static inline unsigned long __ipipe_test_and_stall_root(void) ++{ ++ int oldbit; ++ ++ __asm__ __volatile__(GET_ROOT_STATUS_ADDR ++ LOCK_PREFIX ++ TEST_AND_SET_ROOT_STATUS ++ "sbbl %0,%0;" ++ PUT_ROOT_STATUS_ADDR ++ :"=r" (oldbit) ++ : : ROOT_TEST_CLOBBER_LIST, "memory"); ++ return oldbit; ++} ++ ++static inline unsigned long __ipipe_test_root(void) ++{ ++ int oldbit; ++ ++ __asm__ __volatile__(GET_ROOT_STATUS_ADDR ++ TEST_ROOT_STATUS ++ "sbbl %0,%0;" ++ PUT_ROOT_STATUS_ADDR ++ :"=r" (oldbit) ++ : : ROOT_TEST_CLOBBER_LIST); ++ return oldbit; ++} ++ ++#else /* !CONFIG_SMP */ ++ ++#if __GNUC__ >= 4 ++/* Alias to ipipe_root_cpudom_var(status) */ ++extern unsigned long __ipipe_root_status; ++#else ++extern unsigned long *const __ipipe_root_status_addr; ++#define __ipipe_root_status (*__ipipe_root_status_addr) ++#endif ++ ++static inline void __ipipe_stall_root(void) ++{ ++ volatile unsigned long *p = &__ipipe_root_status; ++ __asm__ __volatile__("btsl $0,%0;" ++ :"+m" (*p) : : "memory"); ++} ++ ++static inline unsigned long __ipipe_test_and_stall_root(void) ++{ ++ volatile unsigned long *p = &__ipipe_root_status; ++ int oldbit; ++ ++ __asm__ __volatile__("btsl $0,%1;" ++ "sbbl %0,%0;" ++ :"=r" (oldbit), "+m" (*p) ++ : : "memory"); ++ return oldbit; ++} ++ ++static inline unsigned long __ipipe_test_root(void) ++{ ++ volatile unsigned long *p = &__ipipe_root_status; ++ int oldbit; ++ ++ __asm__ __volatile__("btl $0,%1;" ++ "sbbl %0,%0;" ++ :"=r" (oldbit) ++ :"m" (*p)); ++ return oldbit; ++} ++ ++#endif /* !CONFIG_SMP */ ++ ++void __ipipe_halt_root(void); ++ ++void __ipipe_serial_debug(const char *fmt, ...); ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* !__X86_IPIPE_BASE_H */ +diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h +index 6e90a04..6178f92 100644 +--- a/arch/x86/include/asm/irq_vectors.h ++++ b/arch/x86/include/asm/irq_vectors.h +@@ -91,10 +91,17 @@ + #define THRESHOLD_APIC_VECTOR 0xf9 + #define REBOOT_VECTOR 0xf8 + ++#ifdef CONFIG_IPIPE ++/* f0-f2 used for TLB flush, f3-f7 reserved for the I-pipe */ ++#define INVALIDATE_TLB_VECTOR_END 0xf2 ++#define INVALIDATE_TLB_VECTOR_START 0xf0 ++#define NUM_INVALIDATE_TLB_VECTORS 3 ++#else /* !CONFIG_IPIPE */ + /* f0-f7 used for spreading out TLB flushes: */ + #define INVALIDATE_TLB_VECTOR_END 0xf7 + #define INVALIDATE_TLB_VECTOR_START 0xf0 + #define NUM_INVALIDATE_TLB_VECTORS 8 ++#endif + + /* + * Local APIC timer IRQ vector is on a different priority level, +@@ -120,6 +127,9 @@ + */ + #define MCE_SELF_VECTOR 0xeb + ++/* I-pipe: Lowest number of vectors above */ ++#define FIRST_SYSTEM_VECTOR 0xea ++ + /* + * First APIC vector available to drivers: (vectors 0x30-0xee) we + * start at 0x31(0x41) to spread out vectors evenly between priority +diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h +index 9e2b952..d2c4d2a 100644 +--- a/arch/x86/include/asm/irqflags.h ++++ b/arch/x86/include/asm/irqflags.h +@@ -4,6 +4,10 @@ + #include + + #ifndef __ASSEMBLY__ ++ ++#include ++#include ++ + /* + * Interrupt control: + */ +@@ -12,6 +16,10 @@ static inline unsigned long native_save_fl(void) + { + unsigned long flags; + ++#ifdef CONFIG_IPIPE ++ flags = (!__ipipe_test_root()) << 9; ++ barrier(); ++#else + /* + * "=rm" is safe here, because "pop" adjusts the stack before + * it evaluates its effective address -- this is part of the +@@ -22,31 +30,53 @@ static inline unsigned long native_save_fl(void) + : "=rm" (flags) + : /* no input */ + : "memory"); ++#endif + + return flags; + } + + static inline void native_restore_fl(unsigned long flags) + { ++#ifdef CONFIG_IPIPE ++ barrier(); ++ __ipipe_restore_root(!(flags & X86_EFLAGS_IF)); ++#else + asm volatile("push %0 ; popf" + : /* no output */ + :"g" (flags) + :"memory", "cc"); ++#endif + } + + static inline void native_irq_disable(void) + { ++#ifdef CONFIG_IPIPE ++ ipipe_check_context(ipipe_root_domain); ++ __ipipe_stall_root(); ++ barrier(); ++#else + asm volatile("cli": : :"memory"); ++#endif + } + + static inline void native_irq_enable(void) + { ++#ifdef CONFIG_IPIPE ++ barrier(); ++ __ipipe_unstall_root(); ++#else + asm volatile("sti": : :"memory"); ++#endif + } + + static inline void native_safe_halt(void) + { ++#ifdef CONFIG_IPIPE ++ barrier(); ++ __ipipe_halt_root(); ++#else + asm volatile("sti; hlt": : :"memory"); ++#endif + } + + static inline void native_halt(void) +@@ -71,6 +101,71 @@ static inline void raw_local_irq_restore(unsigned long flags) + native_restore_fl(flags); + } + ++static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real) ++{ ++ /* ++ * Merge virtual and real interrupt mask bits into a single ++ * (32bit) word. ++ */ ++ return (real & ~(1L << 31)) | ((virt != 0) << 31); ++} ++ ++static inline int raw_demangle_irq_bits(unsigned long *x) ++{ ++ int virt = (*x & (1L << 31)) != 0; ++ *x &= ~(1L << 31); ++ return virt; ++} ++ ++#define local_irq_save_hw_notrace(x) \ ++ __asm__ __volatile__("pushf ; pop %0 ; cli":"=g" (x): /* no input */ :"memory") ++#define local_irq_restore_hw_notrace(x) \ ++ __asm__ __volatile__("push %0 ; popf": /* no output */ :"g" (x):"memory", "cc") ++ ++#define local_save_flags_hw(x) __asm__ __volatile__("pushf ; pop %0":"=g" (x): /* no input */) ++ ++#define irqs_disabled_hw() \ ++ ({ \ ++ unsigned long x; \ ++ local_save_flags_hw(x); \ ++ !((x) & X86_EFLAGS_IF); \ ++ }) ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++#define local_irq_disable_hw() do { \ ++ if (!irqs_disabled_hw()) { \ ++ local_irq_disable_hw_notrace(); \ ++ ipipe_trace_begin(0x80000000); \ ++ } \ ++ } while (0) ++#define local_irq_enable_hw() do { \ ++ if (irqs_disabled_hw()) { \ ++ ipipe_trace_end(0x80000000); \ ++ local_irq_enable_hw_notrace(); \ ++ } \ ++ } while (0) ++#define local_irq_save_hw(x) do { \ ++ local_save_flags_hw(x); \ ++ if ((x) & X86_EFLAGS_IF) { \ ++ local_irq_disable_hw_notrace(); \ ++ ipipe_trace_begin(0x80000001); \ ++ } \ ++ } while (0) ++#define local_irq_restore_hw(x) do { \ ++ if ((x) & X86_EFLAGS_IF) \ ++ ipipe_trace_end(0x80000001); \ ++ local_irq_restore_hw_notrace(x); \ ++ } while (0) ++#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ ++#define local_irq_save_hw(x) local_irq_save_hw_notrace(x) ++#define local_irq_restore_hw(x) local_irq_restore_hw_notrace(x) ++#define local_irq_enable_hw() local_irq_enable_hw_notrace() ++#define local_irq_disable_hw() local_irq_disable_hw_notrace() ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++ ++#define local_irq_disable_hw_notrace() __asm__ __volatile__("cli": : :"memory") ++#define local_irq_enable_hw_notrace() __asm__ __volatile__("sti": : :"memory") ++ + static inline void raw_local_irq_disable(void) + { + native_irq_disable(); +@@ -104,16 +199,40 @@ static inline void halt(void) + */ + static inline unsigned long __raw_local_irq_save(void) + { ++#ifdef CONFIG_IPIPE ++ unsigned long flags = (!__ipipe_test_and_stall_root()) << 9; ++ barrier(); ++#else + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); ++#endif + + return flags; + } + #else + +-#define ENABLE_INTERRUPTS(x) sti +-#define DISABLE_INTERRUPTS(x) cli ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_X86_32 ++#define DISABLE_INTERRUPTS(clobbers) PER_CPU(ipipe_percpu_darray, %eax); btsl $0,(%eax); sti ++#define ENABLE_INTERRUPTS(clobbers) call __ipipe_unstall_root ++#else /* CONFIG_X86_64 */ ++/* Not worth virtualizing in x86_64 mode. */ ++#define DISABLE_INTERRUPTS(clobbers) cli ++#define ENABLE_INTERRUPTS(clobbers) sti ++#endif /* CONFIG_X86_64 */ ++#define ENABLE_INTERRUPTS_HW_COND sti ++#define DISABLE_INTERRUPTS_HW_COND cli ++#define DISABLE_INTERRUPTS_HW(clobbers) cli ++#define ENABLE_INTERRUPTS_HW(clobbers) sti ++#else /* !CONFIG_IPIPE */ ++#define ENABLE_INTERRUPTS(x) sti ++#define DISABLE_INTERRUPTS(x) cli ++#define ENABLE_INTERRUPTS_HW_COND ++#define DISABLE_INTERRUPTS_HW_COND ++#define DISABLE_INTERRUPTS_HW(clobbers) DISABLE_INTERRUPTS(clobbers) ++#define ENABLE_INTERRUPTS_HW(clobbers) ENABLE_INTERRUPTS(clobbers) ++#endif /* !CONFIG_IPIPE */ + + #ifdef CONFIG_X86_64 + #define SWAPGS swapgs +@@ -156,8 +275,10 @@ static inline unsigned long __raw_local_irq_save(void) + #define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +-#define raw_local_irq_save(flags) \ +- do { (flags) = __raw_local_irq_save(); } while (0) ++#define raw_local_irq_save(flags) do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ (flags) = __raw_local_irq_save(); \ ++ } while (0) + + static inline int raw_irqs_disabled_flags(unsigned long flags) + { +@@ -189,7 +310,10 @@ static inline int raw_irqs_disabled(void) + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ ++ pushfl; \ ++ sti; \ + call lockdep_sys_exit; \ ++ popfl; \ + popl %edx; \ + popl %ecx; \ + popl %eax; +diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h +index 4a2d4e0..1ee45d4 100644 +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -30,11 +30,14 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) + #endif + } + +-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, +- struct task_struct *tsk) ++static inline void __switch_mm(struct mm_struct *prev, struct mm_struct *next, ++ struct task_struct *tsk) + { + unsigned cpu = smp_processor_id(); + ++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL ++ WARN_ON_ONCE(!irqs_disabled_hw()); ++#endif + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); +@@ -70,10 +73,23 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + #endif + } + ++static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ++ struct task_struct *tsk) ++{ ++ unsigned long flags; ++ local_irq_save_hw_cond(flags); ++ __switch_mm(prev, next, tsk); ++ local_irq_restore_hw_cond(flags); ++} ++ ++#define ipipe_mm_switch_protect(flags) local_irq_save_hw_cond(flags) ++#define ipipe_mm_switch_unprotect(flags) \ ++ local_irq_restore_hw_cond(flags) ++ + #define activate_mm(prev, next) \ + do { \ + paravirt_activate_mm((prev), (next)); \ +- switch_mm((prev), (next), NULL); \ ++ __switch_mm((prev), (next), NULL); \ + } while (0); + + #ifdef CONFIG_X86_32 +diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h +index 139d4c1..3914d19 100644 +--- a/arch/x86/include/asm/nmi.h ++++ b/arch/x86/include/asm/nmi.h +@@ -29,7 +29,7 @@ extern void setup_apic_nmi_watchdog(void *); + extern void stop_apic_nmi_watchdog(void *); + extern void disable_timer_nmi_watchdog(void); + extern void enable_timer_nmi_watchdog(void); +-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); ++extern int (*nmi_watchdog_tick)(struct pt_regs *regs, unsigned reason); + extern void cpu_nmi_set_wd_enabled(void); + + extern atomic_t nmi_active; +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index 13b1885..3e80c19 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -435,6 +435,7 @@ struct thread_struct { + unsigned short ds; + unsigned short fsindex; + unsigned short gsindex; ++ unsigned long rip; + #endif + #ifdef CONFIG_X86_32 + unsigned long ip; +diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h +index f08f973..093687e 100644 +--- a/arch/x86/include/asm/system.h ++++ b/arch/x86/include/asm/system.h +@@ -126,8 +126,10 @@ do { \ + #define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ ++ "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ +- "call __switch_to\n\t" \ ++ "pushq %P[threadrip](%[next])\n\t" /* restore RIP */ \ ++ "jmp __switch_to\n\t" \ + ".globl thread_return\n" \ + "thread_return:\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ +@@ -141,6 +143,7 @@ do { \ + __switch_canary_oparam \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ ++ [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [_tif_fork] "i" (_TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ +@@ -305,8 +308,13 @@ static inline void native_wbinvd(void) + #else + #define read_cr0() (native_read_cr0()) + #define write_cr0(x) (native_write_cr0(x)) ++#ifdef CONFIG_IPIPE ++#define read_cr2() __raw_get_cpu_var(__ipipe_cr2) ++#define write_cr2(x) __raw_get_cpu_var(__ipipe_cr2) = (x) ++#else /* !CONFIG_IPIPE */ + #define read_cr2() (native_read_cr2()) + #define write_cr2(x) (native_write_cr2(x)) ++#endif /* !CONFIG_IPIPE */ + #define read_cr3() (native_read_cr3()) + #define write_cr3(x) (native_write_cr3(x)) + #define read_cr4() (native_read_cr4()) +diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h +index 4da91ad..25e346e 100644 +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -82,8 +82,8 @@ extern int panic_on_unrecovered_nmi; + void math_error(void __user *); + void math_emulate(struct math_emu_info *); + #ifndef CONFIG_X86_32 +-asmlinkage void smp_thermal_interrupt(void); + asmlinkage void mce_threshold_interrupt(void); + #endif ++asmlinkage void smp_thermal_interrupt(void); + + #endif /* _ASM_X86_TRAPS_H */ +diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile +index d8e5d0c..847cc01 100644 +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -85,6 +85,7 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o + obj-$(CONFIG_KGDB) += kgdb.o + obj-$(CONFIG_VM86) += vm86_32.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ++obj-$(CONFIG_IPIPE) += ipipe.o + + obj-$(CONFIG_HPET_TIMER) += hpet.o + +diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c +index 168e172..948c033 100644 +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -446,7 +446,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: +@@ -466,7 +466,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, + break; + } + +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + /* +@@ -982,7 +982,7 @@ void lapic_shutdown(void) + if (!cpu_has_apic && !apic_from_smp_config()) + return; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + + #ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) +@@ -992,7 +992,7 @@ void lapic_shutdown(void) + disable_local_APIC(); + + +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + /* +@@ -1166,6 +1166,10 @@ static void __cpuinit lapic_setup_esr(void) + oldvalue, value); + } + ++int __ipipe_check_lapic(void) ++{ ++ return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY); ++} + + /** + * setup_local_APIC - setup the local APIC +@@ -1229,7 +1233,7 @@ void __cpuinit setup_local_APIC(void) + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1<> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) +- ack_APIC_irq(); ++ __ack_APIC_irq(); + + inc_irq_stat(irq_spurious_count); + +@@ -2006,13 +2010,13 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); + #endif + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + disable_local_APIC(); + + if (intr_remapping_enabled) + disable_intr_remapping(); + +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + return 0; + } + +@@ -2027,7 +2031,7 @@ static int lapic_resume(struct sys_device *dev) + if (!apic_pm_state.active) + return 0; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + if (intr_remapping_enabled) { + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { +@@ -2093,7 +2097,7 @@ static int lapic_resume(struct sys_device *dev) + free_ioapic_entries(ioapic_entries); + } + restore: +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + return ret; + } +diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c +index 873f81f..aada533 100644 +--- a/arch/x86/kernel/apic/apic_flat_64.c ++++ b/arch/x86/kernel/apic/apic_flat_64.c +@@ -72,9 +72,9 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) + { + unsigned long flags; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) +diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c +index dc4f486..2ed892b 100644 +--- a/arch/x86/kernel/apic/io_apic.c ++++ b/arch/x86/kernel/apic/io_apic.c +@@ -75,8 +75,11 @@ + */ + int sis_apic_bug = -1; + +-static DEFINE_SPINLOCK(ioapic_lock); +-static DEFINE_SPINLOCK(vector_lock); ++static IPIPE_DEFINE_SPINLOCK(ioapic_lock); ++static IPIPE_DEFINE_SPINLOCK(vector_lock); ++#ifdef CONFIG_IPIPE ++unsigned long bugous_edge_irq_triggers[(NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG]; ++#endif + + /* + * # of IRQ routing registers +@@ -417,6 +420,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned + writel(value, &io_apic->data); + } + ++#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP) ++ + static bool io_apic_level_ack_pending(struct irq_cfg *cfg) + { + struct irq_pin_list *entry; +@@ -440,6 +445,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) + return false; + } + ++#endif /* !CONFIG_IPIPE || CONFIG_SMP */ ++ + union entry_union { + struct { u32 w1, w2; }; + struct IO_APIC_route_entry entry; +@@ -615,6 +622,7 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) + BUG_ON(!cfg); + + spin_lock_irqsave(&ioapic_lock, flags); ++ ipipe_irq_lock(desc->irq); + __mask_IO_APIC_irq(cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); + } +@@ -625,7 +633,13 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); ++#ifdef CONFIG_IPIPE ++ if (test_and_clear_bit(desc->irq, &bugous_edge_irq_triggers[0])) ++ __unmask_and_level_IO_APIC_irq(cfg); ++ else ++#endif + __unmask_IO_APIC_irq(cfg); ++ ipipe_irq_unlock(desc->irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -2250,6 +2264,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) + } + cfg = irq_cfg(irq); + __unmask_IO_APIC_irq(cfg); ++ ipipe_irq_unlock(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +@@ -2529,23 +2544,61 @@ static void irq_complete_move(struct irq_desc **descp) + static inline void irq_complete_move(struct irq_desc **descp) {} + #endif + ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ ++#ifdef CONFIG_INTR_REMAP ++static void eoi_ioapic_irq(struct irq_desc *desc); ++#else /* !CONFIG_INTR_REMAP */ ++static inline void eoi_ioapic_irq(struct irq_desc *desc) {} ++#endif /* !CONFIG_INTR_REMAP */ ++ ++static void move_apic_irq(unsigned int irq) ++{ ++ struct irq_desc *desc = irq_to_desc(irq); ++ struct irq_cfg *cfg; ++ ++ if (desc->handle_irq == &handle_edge_irq) { ++ spin_lock(&desc->lock); ++ irq_complete_move(&desc); ++ move_native_irq(irq); ++ spin_unlock(&desc->lock); ++ } else if (desc->handle_irq == &handle_fasteoi_irq) { ++ spin_lock(&desc->lock); ++ irq_complete_move(&desc); ++ if (irq_remapped(irq)) ++ eoi_ioapic_irq(desc); ++ if (unlikely(desc->status & IRQ_MOVE_PENDING)) { ++ cfg = desc->chip_data; ++ if (!io_apic_level_ack_pending(cfg)) ++ move_masked_irq(irq); ++ unmask_IO_APIC_irq_desc(desc); ++ } ++ spin_unlock(&desc->lock); ++ } else ++ WARN_ON_ONCE(1); ++} ++#endif /* CONFIG_IPIPE && CONFIG_SMP */ ++ + static void ack_apic_edge(unsigned int irq) + { ++#ifndef CONFIG_IPIPE + struct irq_desc *desc = irq_to_desc(irq); + + irq_complete_move(&desc); + move_native_irq(irq); +- ack_APIC_irq(); ++#endif /* CONFIG_IPIPE */ ++ __ack_APIC_irq(); + } + + atomic_t irq_mis_count; + + static void ack_apic_level(unsigned int irq) + { +- struct irq_desc *desc = irq_to_desc(irq); + unsigned long v; + int i; + struct irq_cfg *cfg; ++#ifndef CONFIG_IPIPE ++ struct irq_desc *desc = irq_to_desc(irq); + int do_unmask_irq = 0; + + irq_complete_move(&desc); +@@ -2628,6 +2681,26 @@ static void ack_apic_level(unsigned int irq) + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } ++#else /* CONFIG_IPIPE */ ++ /* ++ * Prevent low priority IRQs grabbed by high priority domains ++ * from being delayed, waiting for a high priority interrupt ++ * handler running in a low priority domain to complete. ++ */ ++ cfg = irq_cfg(irq); ++ i = cfg->vector; ++ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ++ spin_lock(&ioapic_lock); ++ if (unlikely(!(v & (1 << (i & 0x1f))))) { ++ /* IO-APIC erratum: see comment above. */ ++ atomic_inc(&irq_mis_count); ++ __mask_and_edge_IO_APIC_irq(cfg); ++ set_bit(irq, &bugous_edge_irq_triggers[0]); ++ } else ++ __mask_IO_APIC_irq(cfg); ++ spin_unlock(&ioapic_lock); ++ __ack_APIC_irq(); ++#endif /* CONFIG_IPIPE */ + } + + #ifdef CONFIG_INTR_REMAP +@@ -2656,14 +2729,14 @@ eoi_ioapic_irq(struct irq_desc *desc) + + static void ir_ack_apic_edge(unsigned int irq) + { +- ack_APIC_irq(); ++ __ack_APIC_irq(); + } + + static void ir_ack_apic_level(unsigned int irq) + { + struct irq_desc *desc = irq_to_desc(irq); + +- ack_APIC_irq(); ++ __ack_APIC_irq(); + eoi_ioapic_irq(desc); + } + #endif /* CONFIG_INTR_REMAP */ +@@ -2677,6 +2750,9 @@ static struct irq_chip ioapic_chip __read_mostly = { + .eoi = ack_apic_level, + #ifdef CONFIG_SMP + .set_affinity = set_ioapic_affinity_irq, ++#ifdef CONFIG_IPIPE ++ .move = move_apic_irq, ++#endif + #endif + .retrigger = ioapic_retrigger_irq, + }; +@@ -2691,6 +2767,9 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { + .eoi = ir_ack_apic_level, + #ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, ++#ifdef CONFIG_IPIPE ++ .move = move_apic_irq, ++#endif + #endif + #endif + .retrigger = ioapic_retrigger_irq, +@@ -2736,23 +2815,29 @@ static inline void init_IO_APIC_traps(void) + + static void mask_lapic_irq(unsigned int irq) + { +- unsigned long v; ++ unsigned long v, flags; + ++ local_irq_save_hw_cond(flags); ++ ipipe_irq_lock(irq); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); ++ local_irq_restore_hw_cond(flags); + } + + static void unmask_lapic_irq(unsigned int irq) + { +- unsigned long v; ++ unsigned long v, flags; + ++ local_irq_save_hw_cond(flags); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); ++ ipipe_irq_unlock(irq); ++ local_irq_restore_hw_cond(flags); + } + + static void ack_lapic_irq(unsigned int irq) + { +- ack_APIC_irq(); ++ __ack_APIC_irq(); + } + + static struct irq_chip lapic_chip __read_mostly = { +@@ -2760,6 +2845,9 @@ static struct irq_chip lapic_chip __read_mostly = { + .mask = mask_lapic_irq, + .unmask = unmask_lapic_irq, + .ack = ack_lapic_irq, ++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) ++ .move = move_apic_irq, ++#endif + }; + + static void lapic_register_intr(int irq, struct irq_desc *desc) +@@ -3007,6 +3095,10 @@ static inline void __init check_timer(void) + "...trying to set up timer as Virtual Wire IRQ...\n"); + + lapic_register_intr(0, desc); ++#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64) ++ irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq; ++ irq_to_desc(0)->ipipe_end = __ipipe_end_edge_irq; ++#endif + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + enable_8259A_irq(0); + +@@ -3404,6 +3496,9 @@ static struct irq_chip msi_chip = { + .ack = ack_apic_edge, + #ifdef CONFIG_SMP + .set_affinity = set_msi_irq_affinity, ++#ifdef CONFIG_IPIPE ++ .move = move_apic_irq, ++#endif + #endif + .retrigger = ioapic_retrigger_irq, + }; +@@ -3416,6 +3511,9 @@ static struct irq_chip msi_ir_chip = { + .ack = ir_ack_apic_edge, + #ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, ++#ifdef CONFIG_IPIPE ++ .move = move_apic_irq, ++#endif + #endif + #endif + .retrigger = ioapic_retrigger_irq, +@@ -3704,6 +3802,9 @@ static struct irq_chip ht_irq_chip = { + .ack = ack_apic_edge, + #ifdef CONFIG_SMP + .set_affinity = set_ht_irq_affinity, ++#ifdef CONFIG_IPIPE ++ .move = move_apic_irq, ++#endif + #endif + .retrigger = ioapic_retrigger_irq, + }; +@@ -4075,6 +4176,14 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) + return 0; + } + ++#ifdef CONFIG_IPIPE ++unsigned __ipipe_get_ioapic_irq_vector(int irq) ++{ ++ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ? ++ ipipe_apic_irq_vector(irq) : irq_cfg(irq)->vector; ++} ++#endif /* CONFIG_IPIPE */ ++ + /* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online +diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c +index 08385e0..f5ad117 100644 +--- a/arch/x86/kernel/apic/ipi.c ++++ b/arch/x86/kernel/apic/ipi.c +@@ -29,12 +29,12 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, +@@ -46,14 +46,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + + /* See Hack comment above */ + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, +@@ -68,12 +68,12 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + * should be modified to do 1 message per cluster ID - mbligh + */ + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, +@@ -85,7 +85,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + + /* See Hack comment above */ + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; +@@ -93,7 +93,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + #ifdef CONFIG_X86_32 +@@ -109,10 +109,10 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) + if (WARN_ONCE(!mask, "empty IPI mask")) + return; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + void default_send_IPI_allbutself(int vector) +diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c +index 7ff61d6..a72056e 100644 +--- a/arch/x86/kernel/apic/nmi.c ++++ b/arch/x86/kernel/apic/nmi.c +@@ -59,6 +59,10 @@ static unsigned int nmi_hz = HZ; + static DEFINE_PER_CPU(short, wd_enabled); + static int endflag __initdata; + ++static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason); ++int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick; ++EXPORT_SYMBOL(nmi_watchdog_tick); ++ + static inline unsigned int get_nmi_count(int cpu) + { + return per_cpu(irq_stat, cpu).__nmi_count; +@@ -387,7 +391,7 @@ void touch_nmi_watchdog(void) + EXPORT_SYMBOL(touch_nmi_watchdog); + + notrace __kprobes int +-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) ++default_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) + { + /* + * Since current_thread_info()-> is always on the stack, and we +diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c +index a5371ec..442f45c 100644 +--- a/arch/x86/kernel/apic/x2apic_cluster.c ++++ b/arch/x86/kernel/apic/x2apic_cluster.c +@@ -61,13 +61,13 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void +@@ -79,7 +79,7 @@ static void + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; +@@ -87,7 +87,7 @@ static void + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void x2apic_send_IPI_allbutself(int vector) +@@ -98,7 +98,7 @@ static void x2apic_send_IPI_allbutself(int vector) + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; +@@ -106,7 +106,7 @@ static void x2apic_send_IPI_allbutself(int vector) + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void x2apic_send_IPI_all(int vector) +diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c +index a8989aa..fba85fa 100644 +--- a/arch/x86/kernel/apic/x2apic_phys.c ++++ b/arch/x86/kernel/apic/x2apic_phys.c +@@ -62,12 +62,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void +@@ -79,14 +79,14 @@ static void + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void x2apic_send_IPI_allbutself(int vector) +@@ -97,14 +97,14 @@ static void x2apic_send_IPI_allbutself(int vector) + + x2apic_wrmsr_fence(); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + static void x2apic_send_IPI_all(int vector) +diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c +index 228d982..c249555 100644 +--- a/arch/x86/kernel/cpu/mtrr/cyrix.c ++++ b/arch/x86/kernel/cpu/mtrr/cyrix.c +@@ -18,7 +18,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ +@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, + rcr = getCx86(CX86_RCR_BASE + reg); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + shift = ((unsigned char *) base)[1] & 0x0f; + *base >>= PAGE_SHIFT; +@@ -178,6 +178,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) + { + unsigned char arr, arr_type, arr_size; ++ unsigned long flags; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + +@@ -221,6 +222,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, + } + } + ++ local_irq_save_hw(flags); ++ + prepare_set(); + + base <<= PAGE_SHIFT; +@@ -230,6 +233,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, + setCx86(CX86_RCR_BASE + reg, arr_type); + + post_set(); ++ ++ local_irq_restore_hw(flags); + } + + typedef struct { +@@ -247,8 +252,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; + + static void cyrix_set_all(void) + { ++ unsigned long flags; + int i; + ++ local_irq_save_hw(flags); + prepare_set(); + + /* the CCRs are not contiguous */ +@@ -263,6 +270,7 @@ static void cyrix_set_all(void) + } + + post_set(); ++ local_irq_restore_hw(flags); + } + + static struct mtrr_ops cyrix_mtrr_ops = { +diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c +index 55da0c5..5594a98 100644 +--- a/arch/x86/kernel/cpu/mtrr/generic.c ++++ b/arch/x86/kernel/cpu/mtrr/generic.c +@@ -635,7 +635,7 @@ static void generic_set_all(void) + unsigned long mask, count; + unsigned long flags; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + prepare_set(); + + /* Actually set the state */ +@@ -645,7 +645,7 @@ static void generic_set_all(void) + pat_init(); + + post_set(); +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) { +@@ -669,12 +669,12 @@ static void generic_set_all(void) + static void generic_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) + { +- unsigned long flags; ++ unsigned long flags, _flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; + +- local_irq_save(flags); ++ local_irq_save_full(flags, _flags); + prepare_set(); + + if (size == 0) { +@@ -695,7 +695,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, + } + + post_set(); +- local_irq_restore(flags); ++ local_irq_restore_full(flags, _flags); + } + + int generic_validate_add_page(unsigned long base, unsigned long size, +diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c +index 2d8a371..8c6afa5 100644 +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -327,6 +327,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) + local_irq_enable(); + do_exit(SIGBUS); + } ++EXPORT_SYMBOL_GPL(die_nmi); + + static int __init oops_setup(char *s) + { +diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c +index f7dd2a7..37b2338 100644 +--- a/arch/x86/kernel/dumpstack_32.c ++++ b/arch/x86/kernel/dumpstack_32.c +@@ -108,6 +108,9 @@ void show_registers(struct pt_regs *regs) + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", + TASK_COMM_LEN, current->comm, task_pid_nr(current), + current_thread_info(), current, task_thread_info(current)); ++#ifdef CONFIG_IPIPE ++ printk(KERN_EMERG "I-pipe domain %s\n", ipipe_current_domain->name); ++#endif /* CONFIG_IPIPE */ + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. +diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c +index a071e6b..de2dde2 100644 +--- a/arch/x86/kernel/dumpstack_64.c ++++ b/arch/x86/kernel/dumpstack_64.c +@@ -254,6 +254,11 @@ void show_registers(struct pt_regs *regs) + sp = regs->sp; + printk("CPU %d ", cpu); + __show_regs(regs, 1); ++#ifdef CONFIG_IPIPE ++ if (ipipe_current_domain != ipipe_root_domain) ++ printk("I-pipe domain %s\n", ipipe_current_domain->name); ++ else ++#endif /* CONFIG_IPIPE */ + printk("Process %s (pid: %d, threadinfo %p, task %p)\n", + cur->comm, cur->pid, task_thread_info(cur), cur); + +diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S +index c097e7d..06bf0c7 100644 +--- a/arch/x86/kernel/entry_32.S ++++ b/arch/x86/kernel/entry_32.S +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -79,8 +80,61 @@ + + #define nr_syscalls ((syscall_table_size)/4) + ++#ifdef CONFIG_IPIPE ++#define EMULATE_ROOT_IRET(bypass) \ ++ call __ipipe_unstall_iret_root ; \ ++ TRACE_IRQS_ON ; \ ++ bypass: \ ++ movl PT_EAX(%esp),%eax ++#define TEST_PREEMPTIBLE(regs) call __ipipe_kpreempt_root ; testl %eax,%eax ++#define CATCH_ROOT_SYSCALL(bypass1,bypass2) \ ++ movl %esp,%eax ; \ ++ call __ipipe_syscall_root ; \ ++ testl %eax,%eax ; \ ++ js bypass1 ; \ ++ jne bypass2 ; \ ++ movl PT_ORIG_EAX(%esp),%eax ++#define PUSH_XCODE(v) pushl $ ex_ ## v ++#define PUSH_XVEC(v) pushl $ ex_ ## v ++#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \ ++ call __ipipe_handle_exception ; \ ++ testl %eax,%eax ; \ ++ jnz restore_ret ++#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \ ++ movl %ecx, %ds ; \ ++ movl %ecx, %es ; \ ++ movl %esp, %eax ; \ ++ movl $ex_ ## code,%edx ; \ ++ call __ipipe_divert_exception ; \ ++ testl %eax,%eax ; \ ++ jnz restore_ret ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++# define IPIPE_TRACE_IRQ_ENTER \ ++ lea PT_EIP-4(%esp), %ebp; \ ++ movl PT_ORIG_EAX(%esp), %eax; \ ++ call ipipe_trace_begin ++# define IPIPE_TRACE_IRQ_EXIT \ ++ pushl %eax; \ ++ movl PT_ORIG_EAX+4(%esp), %eax; \ ++ call ipipe_trace_end; \ ++ popl %eax ++#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ ++#define IPIPE_TRACE_IRQ_ENTER ++#define IPIPE_TRACE_IRQ_EXIT ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++#else /* !CONFIG_IPIPE */ ++#define EMULATE_ROOT_IRET(bypass) ++#define TEST_PREEMPTIBLE(regs) testl $X86_EFLAGS_IF,PT_EFLAGS(regs) ++#define CATCH_ROOT_SYSCALL(bypass1,bypass2) ++#define PUSH_XCODE(v) pushl $v ++#define PUSH_XVEC(v) pushl v ++#define HANDLE_EXCEPTION(code) call *%code ++#define DIVERT_EXCEPTION(code) ++#endif /* CONFIG_IPIPE */ ++ + #ifdef CONFIG_PREEMPT +-#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF ++#define preempt_stop(clobbers) DISABLE_INTERRUPTS_HW(clobbers); TRACE_IRQS_OFF + #else + #define preempt_stop(clobbers) + #define resume_kernel restore_all +@@ -318,6 +372,7 @@ + .endm + + ENTRY(ret_from_fork) ++ ENABLE_INTERRUPTS_HW_COND + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 +@@ -345,7 +400,7 @@ END(ret_from_fork) + RING0_PTREGS_FRAME + ret_from_exception: + preempt_stop(CLBR_ANY) +-ret_from_intr: ++ENTRY(ret_from_intr) + GET_THREAD_INFO(%ebp) + check_userspace: + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS +@@ -369,14 +424,13 @@ END(ret_from_exception) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +- DISABLE_INTERRUPTS(CLBR_ANY) + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jnz restore_all + need_resched: + movl TI_flags(%ebp), %ecx # need_resched set ? + testb $_TIF_NEED_RESCHED, %cl + jz restore_all +- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? ++ TEST_PREEMPTIBLE(%esp) # interrupts off (exception path) ? + jz restore_all + call preempt_schedule_irq + jmp need_resched +@@ -424,7 +478,7 @@ sysenter_past_esp: + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL +- ENABLE_INTERRUPTS(CLBR_NONE) ++ ENABLE_INTERRUPTS_HW(CLBR_NONE) + + /* + * Load the potential sixth argument from user stack. +@@ -440,6 +494,7 @@ sysenter_past_esp: + .previous + + GET_THREAD_INFO(%ebp) ++ CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_out) + + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + jnz sysenter_audit +@@ -448,6 +503,7 @@ sysenter_do_call: + jae syscall_badsys + call *sys_call_table(,%eax,4) + movl %eax,PT_EAX(%esp) ++sysenter_tail: + LOCKDEP_SYS_EXIT + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF +@@ -456,10 +512,13 @@ sysenter_do_call: + jne sysexit_audit + sysenter_exit: + /* if something modifies registers it must also disable sysexit */ ++ EMULATE_ROOT_IRET(sysenter_out) + movl PT_EIP(%esp), %edx + movl PT_OLDESP(%esp), %ecx + xorl %ebp,%ebp +- TRACE_IRQS_ON ++#ifndef CONFIG_IPIPE ++ TRACE_IRQS_ON ++#endif + 1: mov PT_FS(%esp), %fs + PTGS_TO_GS + ENABLE_INTERRUPTS_SYSEXIT +@@ -520,6 +579,7 @@ ENTRY(system_call) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + GET_THREAD_INFO(%ebp) ++ CATCH_ROOT_SYSCALL(syscall_exit,restore_ret) + # system call tracing in operation / emulation + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) + jnz syscall_trace_entry +@@ -552,6 +612,10 @@ restore_all_notrace: + CFI_REMEMBER_STATE + je ldt_ss # returning to user-space with LDT SS + restore_nocheck: ++#ifdef CONFIG_IPIPE ++ call __ipipe_unstall_iret_root ++#endif /* CONFIG_IPIPE */ ++restore_ret: + RESTORE_REGS 4 # skip orig_eax/error_code + CFI_ADJUST_CFA_OFFSET -4 + irq_return: +@@ -559,7 +623,7 @@ irq_return: + .section .fixup,"ax" + ENTRY(iret_exc) + pushl $0 # no error code +- pushl $do_iret_error ++ PUSH_XCODE(do_iret_error) + jmp error_code + .previous + .section __ex_table,"a" +@@ -613,7 +677,7 @@ ldt_ss: + /* Disable interrupts, but do not irqtrace this section: we + * will soon execute iret and the tracer was already set to + * the irqstate after the iret */ +- DISABLE_INTERRUPTS(CLBR_EAX) ++ DISABLE_INTERRUPTS_HW(CLBR_EAX) + lss (%esp), %esp /* switch to espfix segment */ + CFI_ADJUST_CFA_OFFSET -8 + jmp restore_nocheck +@@ -627,6 +691,7 @@ work_pending: + testb $_TIF_NEED_RESCHED, %cl + jz work_notifysig + work_resched: ++ ENABLE_INTERRUPTS_HW_COND + call schedule + LOCKDEP_SYS_EXIT + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt +@@ -799,6 +864,48 @@ END(irq_entries_start) + END(interrupt) + .previous + ++#ifdef CONFIG_IPIPE ++ .p2align CONFIG_X86_L1_CACHE_SHIFT ++common_interrupt: ++ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ ++ SAVE_ALL ++ IPIPE_TRACE_IRQ_ENTER ++ movl %esp, %eax ++ call __ipipe_handle_irq ++ IPIPE_TRACE_IRQ_EXIT ++ testl %eax,%eax ++ jnz ret_from_intr ++ jmp restore_ret ++ CFI_ENDPROC ++ ++#define BUILD_INTERRUPT3(name, nr, fn) \ ++ENTRY(name) \ ++ RING0_INT_FRAME; \ ++ pushl $~(nr); \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ SAVE_ALL; \ ++ IPIPE_TRACE_IRQ_ENTER; \ ++ movl %esp, %eax; \ ++ call __ipipe_handle_irq; \ ++ IPIPE_TRACE_IRQ_EXIT; \ ++ testl %eax,%eax; \ ++ jnz ret_from_intr; \ ++ jmp restore_ret; \ ++ CFI_ENDPROC ++ ++#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0) ++ BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1) ++ BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2) ++ BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3) ++#ifdef CONFIG_SMP ++ BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR) ++#endif ++#endif ++ ++#else /* !CONFIG_IPIPE */ + /* + * the CPU automatically disables interrupts when executing an IRQ vector, + * so IRQ-flags tracing has to follow that: +@@ -829,6 +936,8 @@ ENDPROC(name) + + #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) + ++#endif /* !CONFIG_IPIPE */ ++ + /* The include is where all of the SMP etc. interrupts come from */ + #include + +@@ -836,7 +945,7 @@ ENTRY(coprocessor_error) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_coprocessor_error ++ PUSH_XCODE(do_coprocessor_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -846,7 +955,7 @@ ENTRY(simd_coprocessor_error) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_simd_coprocessor_error ++ PUSH_XCODE(do_simd_coprocessor_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -856,7 +965,7 @@ ENTRY(device_not_available) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_device_not_available ++ PUSH_XCODE(do_device_not_available) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -881,7 +990,7 @@ ENTRY(overflow) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_overflow ++ PUSH_XCODE(do_overflow) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -891,7 +1000,7 @@ ENTRY(bounds) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_bounds ++ PUSH_XCODE(do_bounds) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -901,7 +1010,7 @@ ENTRY(invalid_op) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_invalid_op ++ PUSH_XCODE(do_invalid_op) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -911,7 +1020,7 @@ ENTRY(coprocessor_segment_overrun) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_coprocessor_segment_overrun ++ PUSH_XCODE(do_coprocessor_segment_overrun) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -919,7 +1028,7 @@ END(coprocessor_segment_overrun) + + ENTRY(invalid_TSS) + RING0_EC_FRAME +- pushl $do_invalid_TSS ++ PUSH_XCODE(do_invalid_TSS) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -927,7 +1036,7 @@ END(invalid_TSS) + + ENTRY(segment_not_present) + RING0_EC_FRAME +- pushl $do_segment_not_present ++ PUSH_XCODE(do_segment_not_present) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -935,7 +1044,7 @@ END(segment_not_present) + + ENTRY(stack_segment) + RING0_EC_FRAME +- pushl $do_stack_segment ++ PUSH_XCODE(do_stack_segment) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -943,7 +1052,7 @@ END(stack_segment) + + ENTRY(alignment_check) + RING0_EC_FRAME +- pushl $do_alignment_check ++ PUSH_XCODE(do_alignment_check) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -953,7 +1062,7 @@ ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_divide_error ++ PUSH_XCODE(do_divide_error) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -964,7 +1073,7 @@ ENTRY(machine_check) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl machine_check_vector ++ PUSH_XVEC(machine_check_vector) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -975,7 +1084,7 @@ ENTRY(spurious_interrupt_bug) + RING0_INT_FRAME + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_spurious_interrupt_bug ++ PUSH_XCODE(do_spurious_interrupt_bug) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +@@ -1210,7 +1319,7 @@ syscall_table_size=(.-sys_call_table) + + ENTRY(page_fault) + RING0_EC_FRAME +- pushl $do_page_fault ++ PUSH_XCODE(do_page_fault) + CFI_ADJUST_CFA_OFFSET 4 + ALIGN + error_code: +@@ -1260,7 +1369,7 @@ error_code: + movl %ecx, %es + TRACE_IRQS_OFF + movl %esp,%eax # pt_regs pointer +- call *%edi ++ HANDLE_EXCEPTION(edi) + jmp ret_from_exception + CFI_ENDPROC + END(page_fault) +@@ -1304,6 +1413,7 @@ debug_stack_correct: + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF ++ DIVERT_EXCEPTION(do_debug) + xorl %edx,%edx # error code 0 + movl %esp,%eax # pt_regs pointer + call do_debug +@@ -1404,6 +1514,7 @@ ENTRY(int3) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF ++ DIVERT_EXCEPTION(do_int3) + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_int3 +@@ -1413,7 +1524,7 @@ END(int3) + + ENTRY(general_protection) + RING0_EC_FRAME +- pushl $do_general_protection ++ PUSH_XCODE(do_general_protection) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S +index b5c061f..ce29b45 100644 +--- a/arch/x86/kernel/entry_64.S ++++ b/arch/x86/kernel/entry_64.S +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -61,6 +62,13 @@ + #define __AUDIT_ARCH_LE 0x40000000 + + .code64 ++ ++#ifdef CONFIG_IPIPE ++#define PREEMPT_SCHEDULE_IRQ call __ipipe_preempt_schedule_irq ++#else /* !CONFIG_IPIPE */ ++#define PREEMPT_SCHEDULE_IRQ call preempt_schedule_irq ++#endif /* !CONFIG_IPIPE */ ++ + #ifdef CONFIG_FUNCTION_TRACER + #ifdef CONFIG_DYNAMIC_FTRACE + ENTRY(mcount) +@@ -336,7 +344,10 @@ ENTRY(save_args) + /* + * We entered an interrupt context - irqs are off: + */ +-2: TRACE_IRQS_OFF ++2: ++#ifndef CONFIG_IPIPE ++ TRACE_IRQS_OFF ++#endif + ret + CFI_ENDPROC + END(save_args) +@@ -402,6 +413,7 @@ ENTRY(ret_from_fork) + CFI_ADJUST_CFA_OFFSET 8 + popf # reset kernel eflags + CFI_ADJUST_CFA_OFFSET -8 ++ ENABLE_INTERRUPTS_HW_COND + + call schedule_tail # rdi: 'prev' task parameter + +@@ -477,6 +489,17 @@ ENTRY(system_call_after_swapgs) + movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + movq %rcx,RIP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rip,RIP-ARGOFFSET ++#ifdef CONFIG_IPIPE ++ pushq %rdi ++ pushq %rax ++ leaq -(ARGOFFSET-16)(%rsp),%rdi # regs for handler ++ call __ipipe_syscall_root_thunk ++ testl %eax, %eax ++ popq %rax ++ popq %rdi ++ js ret_from_sys_call ++ jnz sysret_fastexit ++#endif + GET_THREAD_INFO(%rcx) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) + jnz tracesys +@@ -506,6 +529,7 @@ sysret_check: + * sysretq will re-enable interrupts: + */ + TRACE_IRQS_ON ++sysret_fastexit: + movq RIP-ARGOFFSET(%rsp),%rcx + CFI_REGISTER rip,rcx + RESTORE_ARGS 0,-ARG_SKIP,1 +@@ -517,6 +541,8 @@ sysret_check: + /* Handle reschedules */ + /* edx: work, edi: workmask */ + sysret_careful: ++ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),%edx ++ jnz ret_from_sys_call_trace + bt $TIF_NEED_RESCHED,%edx + jnc sysret_signal + TRACE_IRQS_ON +@@ -528,6 +554,16 @@ sysret_careful: + CFI_ADJUST_CFA_OFFSET -8 + jmp sysret_check + ++ret_from_sys_call_trace: ++ TRACE_IRQS_ON ++ sti ++ SAVE_REST ++ FIXUP_TOP_OF_STACK %rdi ++ movq %rsp,%rdi ++ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ ++ RESTORE_REST ++ jmp int_ret_from_sys_call ++ + /* Handle a signal */ + sysret_signal: + TRACE_IRQS_ON +@@ -800,7 +836,29 @@ END(interrupt) + CFI_ADJUST_CFA_OFFSET 10*8 + call save_args + PARTIAL_FRAME 0 ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ pushq %rbp ++ leaq RIP-8(%rdi), %rbp # make interrupted address show up in trace ++ pushq %rdi ++ movq ORIG_RAX(%rdi), %rdi # IRQ number ++ notq %rdi # ...is inverted, fix up ++ call ipipe_trace_begin ++ popq %rdi ++ popq %rbp ++ ++ call \func ++ ++ pushq %rbp ++ pushq %rax ++ movq 8-ARGOFFSET+ORIG_RAX(%rbp), %rdi ++ leaq 8-ARGOFFSET+RIP-8(%rbp), %rbp ++ notq %rdi ++ call ipipe_trace_end ++ popq %rax ++ popq %rbp ++#else + call \func ++#endif + .endm + + /* +@@ -809,9 +867,24 @@ END(interrupt) + */ + .p2align CONFIG_X86_L1_CACHE_SHIFT + common_interrupt: ++#ifdef CONFIG_IPIPE ++ XCPT_FRAME ++ addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ ++ interrupt __ipipe_handle_irq ++ testl %eax, %eax ++ jnz ret_from_intr ++ decl PER_CPU_VAR(irq_count) ++ leaveq ++ CFI_DEF_CFA_REGISTER rsp ++ CFI_ADJUST_CFA_OFFSET -8 ++ testl $3,CS-ARGOFFSET(%rsp) ++ jz restore_args ++ jmp retint_swapgs_notrace ++#else /* !CONFIG_IPIPE */ + XCPT_FRAME + addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ + interrupt do_IRQ ++#endif /* !CONFIG_IPIPE */ + /* 0(%rsp): old_rsp-ARGOFFSET */ + ret_from_intr: + DISABLE_INTERRUPTS(CLBR_NONE) +@@ -820,7 +893,7 @@ ret_from_intr: + leaveq + CFI_DEF_CFA_REGISTER rsp + CFI_ADJUST_CFA_OFFSET -8 +-exit_intr: ++ENTRY(exit_intr) + GET_THREAD_INFO(%rcx) + testl $3,CS-ARGOFFSET(%rsp) + je retint_kernel +@@ -840,20 +913,20 @@ retint_check: + jnz retint_careful + + retint_swapgs: /* return to user-space */ ++ TRACE_IRQS_IRETQ + /* + * The iretq could re-enable interrupts: + */ +- DISABLE_INTERRUPTS(CLBR_ANY) +- TRACE_IRQS_IRETQ ++retint_swapgs_notrace: + SWAPGS ++retint_noswapgs: + jmp restore_args + + retint_restore_args: /* return to kernel space */ +- DISABLE_INTERRUPTS(CLBR_ANY) ++ TRACE_IRQS_IRETQ + /* + * The iretq could re-enable interrupts: + */ +- TRACE_IRQS_IRETQ + restore_args: + RESTORE_ARGS 0,8,0 + +@@ -935,7 +1008,15 @@ ENTRY(retint_kernel) + jnc retint_restore_args + bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ + jnc retint_restore_args +- call preempt_schedule_irq ++#ifdef CONFIG_IPIPE ++ /* ++ * We may have preempted call_softirq before __do_softirq raised or ++ * after it lowered the preemption counter. ++ */ ++ cmpl $0,PER_CPU_VAR(irq_count) ++ jge retint_restore_args ++#endif ++ PREEMPT_SCHEDULE_IRQ + jmp exit_intr + #endif + +@@ -945,16 +1026,31 @@ END(common_interrupt) + /* + * APIC interrupts. + */ +-.macro apicinterrupt num sym do_sym ++ .macro apicinterrupt num sym do_sym + ENTRY(\sym) + INTR_FRAME + pushq $~(\num) + CFI_ADJUST_CFA_OFFSET 8 ++#ifdef CONFIG_IPIPE ++ interrupt __ipipe_handle_irq ++ testl %eax, %eax ++ jnz ret_from_intr ++ decl PER_CPU_VAR(irq_count) ++ leaveq ++ CFI_DEF_CFA_REGISTER rsp ++ CFI_ADJUST_CFA_OFFSET -8 ++ testl $3,CS-ARGOFFSET(%rsp) ++ jz restore_args ++ jmp retint_swapgs_notrace ++ CFI_ENDPROC ++ .endm ++#else /* !CONFIG_IPIPE */ + interrupt \do_sym + jmp ret_from_intr + CFI_ENDPROC + END(\sym) + .endm ++#endif /* !CONFIG_IPIPE */ + + #ifdef CONFIG_SMP + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ +@@ -979,6 +1075,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ + invalidate_interrupt1 smp_invalidate_interrupt + apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ + invalidate_interrupt2 smp_invalidate_interrupt ++#ifndef CONFIG_IPIPE + apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ + invalidate_interrupt3 smp_invalidate_interrupt + apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ +@@ -989,6 +1086,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ + invalidate_interrupt6 smp_invalidate_interrupt + apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ + invalidate_interrupt7 smp_invalidate_interrupt ++#endif /* !CONFIG_IPIPE */ + #endif + + apicinterrupt THRESHOLD_APIC_VECTOR \ +@@ -1023,7 +1121,7 @@ apicinterrupt LOCAL_PENDING_VECTOR \ + /* + * Exception entry points. + */ +-.macro zeroentry sym do_sym ++.macro zeroentry sym do_sym ex_code + ENTRY(\sym) + INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME +@@ -1034,13 +1132,26 @@ ENTRY(\sym) + DEFAULT_FRAME 0 + movq %rsp,%rdi /* pt_regs pointer */ + xorl %esi,%esi /* no error code */ ++#ifdef CONFIG_IPIPE ++ movq $\ex_code,%rdx ++ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */ ++ testl %eax, %eax ++ jz error_exit ++ movl %ebx,%eax ++ RESTORE_REST ++ DISABLE_INTERRUPTS(CLBR_NONE) ++ testl %eax,%eax ++ jne retint_noswapgs ++ jmp retint_swapgs_notrace ++#else /* !CONFIG_IPIPE */ + call \do_sym ++#endif /* !CONFIG_IPIPE */ + jmp error_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + END(\sym) + .endm + +-.macro paranoidzeroentry sym do_sym ++.macro paranoidzeroentry sym do_sym ex_code=0 + ENTRY(\sym) + INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME +@@ -1050,14 +1161,27 @@ ENTRY(\sym) + call save_paranoid + TRACE_IRQS_OFF + movq %rsp,%rdi /* pt_regs pointer */ ++#ifdef CONFIG_IPIPE ++ .if \ex_code ++ movq $\ex_code,%rsi ++ call __ipipe_divert_exception /* handle(regs, ex_code) */ ++ testl %eax,%eax ++ jnz 1f ++ movq %rsp,%rdi ++ .endif ++#endif + xorl %esi,%esi /* no error code */ + call \do_sym ++#ifdef CONFIG_IPIPE ++ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ ++1: ++#endif + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + END(\sym) + .endm + +-.macro paranoidzeroentry_ist sym do_sym ist ++.macro paranoidzeroentry_ist sym do_sym ist ex_code=0 + ENTRY(\sym) + INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME +@@ -1067,17 +1191,30 @@ ENTRY(\sym) + call save_paranoid + TRACE_IRQS_OFF + movq %rsp,%rdi /* pt_regs pointer */ ++#ifdef CONFIG_IPIPE ++ .if \ex_code ++ movq $\ex_code,%rsi ++ call __ipipe_divert_exception /* handle(regs, ex_code) */ ++ testl %eax,%eax ++ jnz 1f ++ movq %rsp,%rdi ++ .endif ++#endif + xorl %esi,%esi /* no error code */ + PER_CPU(init_tss, %rbp) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + call \do_sym + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) ++#ifdef CONFIG_IPIPE ++ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ ++1: ++#endif + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + END(\sym) + .endm + +-.macro errorentry sym do_sym ++.macro errorentry sym do_sym ex_code + ENTRY(\sym) + XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME +@@ -1088,14 +1225,27 @@ ENTRY(\sym) + movq %rsp,%rdi /* pt_regs pointer */ + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ ++#ifdef CONFIG_IPIPE ++ movq $\ex_code,%rdx ++ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */ ++ testl %eax, %eax ++ jz error_exit ++ movl %ebx,%eax ++ RESTORE_REST ++ DISABLE_INTERRUPTS(CLBR_NONE) ++ testl %eax,%eax ++ jne retint_noswapgs ++ jmp retint_swapgs_notrace ++#else /* !CONFIG_IPIPE */ + call \do_sym ++#endif /* !CONFIG_IPIPE */ + jmp error_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + END(\sym) + .endm + + /* error code is on the stack already */ +-.macro paranoiderrorentry sym do_sym ++.macro paranoiderrorentry sym do_sym ex_code=0 + ENTRY(\sym) + XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME +@@ -1105,27 +1255,40 @@ ENTRY(\sym) + DEFAULT_FRAME 0 + TRACE_IRQS_OFF + movq %rsp,%rdi /* pt_regs pointer */ ++#ifdef CONFIG_IPIPE ++ .if \ex_code ++ movq $\ex_code,%rsi ++ call __ipipe_divert_exception /* handle(regs, ex_code) */ ++ testl %eax,%eax ++ jnz 1f ++ movq %rsp,%rdi ++ .endif ++#endif + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + call \do_sym ++#ifdef CONFIG_IPIPE ++ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ ++1: ++#endif + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + END(\sym) + .endm + +-zeroentry divide_error do_divide_error +-zeroentry overflow do_overflow +-zeroentry bounds do_bounds +-zeroentry invalid_op do_invalid_op +-zeroentry device_not_available do_device_not_available ++zeroentry divide_error do_divide_error ex_do_divide_error ++zeroentry overflow do_overflow ex_do_overflow ++zeroentry bounds do_bounds ex_do_bounds ++zeroentry invalid_op do_invalid_op ex_do_invalid_op ++zeroentry device_not_available do_device_not_available ex_do_device_not_available + paranoiderrorentry double_fault do_double_fault +-zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun +-errorentry invalid_TSS do_invalid_TSS +-errorentry segment_not_present do_segment_not_present +-zeroentry spurious_interrupt_bug do_spurious_interrupt_bug +-zeroentry coprocessor_error do_coprocessor_error +-errorentry alignment_check do_alignment_check +-zeroentry simd_coprocessor_error do_simd_coprocessor_error ++zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun ex_do_coprocessor_segment_overrun ++errorentry invalid_TSS do_invalid_TSS ex_do_invalid_TSS ++errorentry segment_not_present do_segment_not_present ex_do_segment_not_present ++zeroentry spurious_interrupt_bug do_spurious_interrupt_bug ex_do_spurious_interrupt_bug ++zeroentry coprocessor_error do_coprocessor_error ex_do_coprocessor_error ++errorentry alignment_check do_alignment_check ex_do_alignment_check ++zeroentry simd_coprocessor_error do_simd_coprocessor_error ex_do_simd_coprocessor_error + + /* Reload gs selector with exception handling */ + /* edi: new selector */ +@@ -1255,14 +1418,18 @@ ENTRY(call_softirq) + CFI_REL_OFFSET rbp,0 + mov %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp ++ DISABLE_INTERRUPTS_HW_COND + incl PER_CPU_VAR(irq_count) + cmove PER_CPU_VAR(irq_stack_ptr),%rsp ++ ENABLE_INTERRUPTS_HW_COND + push %rbp # backlink for old unwinder + call __do_softirq ++ DISABLE_INTERRUPTS_HW_COND + leaveq + CFI_DEF_CFA_REGISTER rsp + CFI_ADJUST_CFA_OFFSET -8 + decl PER_CPU_VAR(irq_count) ++ ENABLE_INTERRUPTS_HW_COND + ret + CFI_ENDPROC + END(call_softirq) +@@ -1371,16 +1538,16 @@ END(xen_failsafe_callback) + */ + .pushsection .kprobes.text, "ax" + +-paranoidzeroentry_ist debug do_debug DEBUG_STACK +-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ++paranoidzeroentry_ist debug do_debug DEBUG_STACK ex_do_debug ++paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ex_do_int3 + paranoiderrorentry stack_segment do_stack_segment + #ifdef CONFIG_XEN + zeroentry xen_debug do_debug + zeroentry xen_int3 do_int3 + errorentry xen_stack_segment do_stack_segment + #endif +-errorentry general_protection do_general_protection +-errorentry page_fault do_page_fault ++errorentry general_protection do_general_protection ex_do_general_protection ++errorentry page_fault do_page_fault ex_do_page_fault + #ifdef CONFIG_X86_MCE + paranoidzeroentry machine_check *machine_check_vector(%rip) + #endif +@@ -1403,8 +1570,13 @@ ENTRY(paranoid_exit) + INTR_FRAME + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF ++paranoid_notrace: + testl %ebx,%ebx /* swapgs needed? */ + jnz paranoid_restore ++#ifdef CONFIG_IPIPE ++ testl %eax,%eax ++ jnz paranoid_swapgs ++#endif + testl $3,CS(%rsp) + jnz paranoid_userspace + paranoid_swapgs: +diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c +index 23c1679..1c00022 100644 +--- a/arch/x86/kernel/i8253.c ++++ b/arch/x86/kernel/i8253.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -130,6 +131,12 @@ static cycle_t pit_read(struct clocksource *cs) + int count; + u32 jifs; + ++#ifdef CONFIG_IPIPE ++ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) ++ /* We don't really own the PIT. */ ++ return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count; ++#endif /* CONFIG_IPIPE */ ++ + spin_lock_irqsave(&i8253_lock, flags); + /* + * Although our caller may have the read side of xtime_lock, +diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c +index df89102..cfb29a2 100644 +--- a/arch/x86/kernel/i8259.c ++++ b/arch/x86/kernel/i8259.c +@@ -32,7 +32,7 @@ + */ + + static int i8259A_auto_eoi; +-DEFINE_SPINLOCK(i8259A_lock); ++IPIPE_DEFINE_SPINLOCK(i8259A_lock); + static void mask_and_ack_8259A(unsigned int); + + struct irq_chip i8259A_chip = { +@@ -69,6 +69,7 @@ void disable_8259A_irq(unsigned int irq) + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); ++ ipipe_irq_lock(irq); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); +@@ -79,15 +80,18 @@ void disable_8259A_irq(unsigned int irq) + + void enable_8259A_irq(unsigned int irq) + { +- unsigned int mask = ~(1 << irq); ++ unsigned int mask = (1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); +- cached_irq_mask &= mask; +- if (irq & 8) +- outb(cached_slave_mask, PIC_SLAVE_IMR); +- else +- outb(cached_master_mask, PIC_MASTER_IMR); ++ if (cached_irq_mask & mask) { ++ cached_irq_mask &= ~mask; ++ if (irq & 8) ++ outb(cached_slave_mask, PIC_SLAVE_IMR); ++ else ++ outb(cached_master_mask, PIC_MASTER_IMR); ++ ipipe_irq_unlock(irq); ++ } + spin_unlock_irqrestore(&i8259A_lock, flags); + } + +@@ -168,6 +172,18 @@ static void mask_and_ack_8259A(unsigned int irq) + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; ++#ifdef CONFIG_IPIPE ++ if (irq == 0) { ++ /* ++ * Fast timer ack -- don't mask (unless supposedly ++ * spurious). We trace outb's in order to detect ++ * broken hardware inducing large delays. ++ */ ++ outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */ ++ spin_unlock_irqrestore(&i8259A_lock, flags); ++ return; ++ } ++#endif /* CONFIG_IPIPE */ + cached_irq_mask |= irqmask; + + handle_real_irq: +diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c +new file mode 100644 +index 0000000..ae67343 +--- /dev/null ++++ b/arch/x86/kernel/ipipe.c +@@ -0,0 +1,1074 @@ ++/* -*- linux-c -*- ++ * linux/arch/x86/kernel/ipipe.c ++ * ++ * Copyright (C) 2002-2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Architecture-dependent I-PIPE support for x86. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_LOCAL_APIC ++#include ++#include ++#include ++#include ++#ifdef CONFIG_X86_IO_APIC ++#include ++#endif /* CONFIG_X86_IO_APIC */ ++#include ++#endif /* CONFIG_X86_LOCAL_APIC */ ++#include ++ ++int __ipipe_tick_irq = 0; /* Legacy timer */ ++ ++DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs); ++ ++DEFINE_PER_CPU(unsigned long, __ipipe_cr2); ++EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2); ++ ++#ifdef CONFIG_SMP ++ ++static cpumask_t __ipipe_cpu_sync_map; ++ ++static cpumask_t __ipipe_cpu_lock_map; ++ ++static unsigned long __ipipe_critical_lock; ++ ++static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); ++ ++static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); ++ ++static void (*__ipipe_cpu_sync) (void); ++ ++#endif /* CONFIG_SMP */ ++ ++/* ++ * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline ++ * just like if it has been actually received from a hw source. Also ++ * works for virtual interrupts. ++ */ ++int ipipe_trigger_irq(unsigned int irq) ++{ ++ struct pt_regs regs; ++ unsigned long flags; ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ if (irq >= IPIPE_NR_IRQS) ++ return -EINVAL; ++ if (ipipe_virtual_irq_p(irq)) { ++ if (!test_bit(irq - IPIPE_VIRQ_BASE, ++ &__ipipe_virtual_irq_map)) ++ return -EINVAL; ++ } else if (irq_to_desc(irq) == NULL) ++ return -EINVAL; ++#endif ++ local_irq_save_hw(flags); ++ regs.flags = flags; ++ regs.orig_ax = irq; /* Positive value - IRQ won't be acked */ ++ regs.cs = __KERNEL_CS; ++ __ipipe_handle_irq(®s); ++ local_irq_restore_hw(flags); ++ ++ return 1; ++} ++ ++int ipipe_get_sysinfo(struct ipipe_sysinfo *info) ++{ ++ info->ncpus = num_online_cpus(); ++ info->cpufreq = ipipe_cpu_freq(); ++ info->archdep.tmirq = __ipipe_tick_irq; ++#ifdef CONFIG_X86_TSC ++ info->archdep.tmfreq = ipipe_cpu_freq(); ++#else /* !CONFIG_X86_TSC */ ++ info->archdep.tmfreq = CLOCK_TICK_RATE; ++#endif /* CONFIG_X86_TSC */ ++ ++ return 0; ++} ++ ++#ifdef CONFIG_X86_UV ++asmlinkage void uv_bau_message_interrupt(struct pt_regs *regs); ++#endif ++#ifdef CONFIG_X86_MCE_THRESHOLD ++asmlinkage void smp_threshold_interrupt(void); ++#endif ++#ifdef CONFIG_X86_NEW_MCE ++asmlinkage void smp_mce_self_interrupt(void); ++#endif ++ ++static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc) ++{ ++ desc->ipipe_ack(irq, desc); ++} ++ ++void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq) ++{ ++ irq_to_desc(irq)->status &= ~IRQ_DISABLED; ++} ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ ++static void __ipipe_noack_apic(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++static void __ipipe_ack_apic(unsigned irq, struct irq_desc *desc) ++{ ++ __ack_APIC_irq(); ++} ++ ++static void __ipipe_null_handler(unsigned irq, void *cookie) ++{ ++} ++ ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ ++/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw ++ interrupts are off, and secondary CPUs are still lost in space. */ ++ ++void __init __ipipe_enable_pipeline(void) ++{ ++ unsigned int vector, irq; ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ ++ /* Map the APIC system vectors. */ ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR), ++ (ipipe_irq_handler_t)&smp_apic_timer_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR), ++ (ipipe_irq_handler_t)&smp_spurious_interrupt, ++ NULL, ++ &__ipipe_noack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(ERROR_APIC_VECTOR), ++ (ipipe_irq_handler_t)&smp_error_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0), ++ &__ipipe_null_handler, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1), ++ &__ipipe_null_handler, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2), ++ &__ipipe_null_handler, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3), ++ &__ipipe_null_handler, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++#ifdef CONFIG_X86_THERMAL_VECTOR ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(THERMAL_APIC_VECTOR), ++ (ipipe_irq_handler_t)&smp_thermal_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++#endif /* CONFIG_X86_THERMAL_VECTOR */ ++ ++#ifdef CONFIG_X86_MCE_THRESHOLD ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR), ++ (ipipe_irq_handler_t)&smp_threshold_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++#endif /* CONFIG_X86_MCE_THRESHOLD */ ++ ++#ifdef CONFIG_X86_NEW_MCE ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(MCE_SELF_VECTOR), ++ (ipipe_irq_handler_t)&smp_mce_self_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++#endif /* CONFIG_X86_MCE_THRESHOLD */ ++ ++#ifdef CONFIG_X86_UV ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(UV_BAU_MESSAGE), ++ (ipipe_irq_handler_t)&uv_bau_message_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++#endif /* CONFIG_X86_UV */ ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(GENERIC_INTERRUPT_VECTOR), ++ (ipipe_irq_handler_t)&smp_generic_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++#ifdef CONFIG_PERF_COUNTERS ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(LOCAL_PENDING_VECTOR), ++ (ipipe_irq_handler_t)&perf_pending_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++#endif /* CONFIG_PERF_COUNTERS */ ++ ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ ++#ifdef CONFIG_SMP ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(RESCHEDULE_VECTOR), ++ (ipipe_irq_handler_t)&smp_reschedule_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ for (vector = INVALIDATE_TLB_VECTOR_START; ++ vector <= INVALIDATE_TLB_VECTOR_END; ++vector) ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(vector), ++ (ipipe_irq_handler_t)&smp_invalidate_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR), ++ (ipipe_irq_handler_t)&smp_call_function_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR), ++ (ipipe_irq_handler_t)&smp_call_function_single_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ IRQ_MOVE_CLEANUP_VECTOR, ++ (ipipe_irq_handler_t)&smp_irq_move_cleanup_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ ipipe_apic_vector_irq(REBOOT_VECTOR), ++ (ipipe_irq_handler_t)&smp_reboot_interrupt, ++ NULL, ++ &__ipipe_ack_apic, ++ IPIPE_STDROOT_MASK); ++#else ++ (void)vector; ++#endif /* CONFIG_SMP */ ++ ++ /* Finally, virtualize the remaining ISA and IO-APIC ++ * interrupts. Interrupts which have already been virtualized ++ * will just beget a silent -EPERM error since ++ * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */ ++ ++ for (irq = 0; irq < NR_IRQS; irq++) ++ /* ++ * Fails for IPIPE_CRITICAL_IPI and IRQ_MOVE_CLEANUP_VECTOR, ++ * but that's ok. ++ */ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ irq, ++ (ipipe_irq_handler_t)&do_IRQ, ++ NULL, ++ &__ipipe_ack_irq, ++ IPIPE_STDROOT_MASK); ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++ /* Eventually allow these vectors to be reprogrammed. */ ++ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK; ++ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK; ++ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK; ++ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK; ++#endif /* CONFIG_X86_LOCAL_APIC */ ++} ++ ++#ifdef CONFIG_SMP ++ ++cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask) ++{ ++ cpumask_t oldmask; ++ ++ if (irq_to_desc(irq)->chip->set_affinity == NULL) ++ return CPU_MASK_NONE; ++ ++ if (cpus_empty(cpumask)) ++ return CPU_MASK_NONE; /* Return mask value -- no change. */ ++ ++ cpus_and(cpumask, cpumask, cpu_online_map); ++ if (cpus_empty(cpumask)) ++ return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */ ++ ++ cpumask_copy(&oldmask, irq_to_desc(irq)->affinity); ++ irq_to_desc(irq)->chip->set_affinity(irq, &cpumask); ++ ++ return oldmask; ++} ++ ++int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask) ++{ ++ unsigned long flags; ++ int self; ++ ++ if (ipi != IPIPE_SERVICE_IPI0 && ++ ipi != IPIPE_SERVICE_IPI1 && ++ ipi != IPIPE_SERVICE_IPI2 && ++ ipi != IPIPE_SERVICE_IPI3) ++ return -EINVAL; ++ ++ local_irq_save_hw(flags); ++ ++ self = cpu_isset(ipipe_processor_id(),cpumask); ++ cpu_clear(ipipe_processor_id(), cpumask); ++ ++ if (!cpus_empty(cpumask)) ++ apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi)); ++ ++ if (self) ++ ipipe_trigger_irq(ipi); ++ ++ local_irq_restore_hw(flags); ++ ++ return 0; ++} ++ ++/* Always called with hw interrupts off. */ ++ ++void __ipipe_do_critical_sync(unsigned irq, void *cookie) ++{ ++ int cpu = ipipe_processor_id(); ++ ++ cpu_set(cpu, __ipipe_cpu_sync_map); ++ ++ /* Now we are in sync with the lock requestor running on another ++ CPU. Enter a spinning wait until he releases the global ++ lock. */ ++ spin_lock(&__ipipe_cpu_barrier); ++ ++ /* Got it. Now get out. */ ++ ++ if (__ipipe_cpu_sync) ++ /* Call the sync routine if any. */ ++ __ipipe_cpu_sync(); ++ ++ spin_unlock(&__ipipe_cpu_barrier); ++ ++ cpu_clear(cpu, __ipipe_cpu_sync_map); ++} ++ ++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) ++{ ++ ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic; ++ ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync; ++ ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL; ++ /* Immediately handle in the current domain but *never* pass */ ++ ipd->irqs[IPIPE_CRITICAL_IPI].control = ++ IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK; ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/* ++ * ipipe_critical_enter() -- Grab the superlock excluding all CPUs but ++ * the current one from a critical section. This lock is used when we ++ * must enforce a global critical section for a single CPU in a ++ * possibly SMP system whichever context the CPUs are running. ++ */ ++unsigned long ipipe_critical_enter(void (*syncfn) (void)) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ ++#ifdef CONFIG_SMP ++ if (unlikely(num_online_cpus() == 1)) ++ return flags; ++ ++ { ++ int cpu = ipipe_processor_id(); ++ cpumask_t lock_map; ++ ++ if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) { ++ while (test_and_set_bit(0, &__ipipe_critical_lock)) { ++ int n = 0; ++ do { ++ cpu_relax(); ++ } while (++n < cpu); ++ } ++ ++ spin_lock(&__ipipe_cpu_barrier); ++ ++ __ipipe_cpu_sync = syncfn; ++ ++ /* Send the sync IPI to all processors but the current one. */ ++ apic->send_IPI_allbutself(IPIPE_CRITICAL_VECTOR); ++ ++ cpus_andnot(lock_map, cpu_online_map, __ipipe_cpu_lock_map); ++ ++ while (!cpus_equal(__ipipe_cpu_sync_map, lock_map)) ++ cpu_relax(); ++ } ++ ++ atomic_inc(&__ipipe_critical_count); ++ } ++#endif /* CONFIG_SMP */ ++ ++ return flags; ++} ++ ++/* ipipe_critical_exit() -- Release the superlock. */ ++ ++void ipipe_critical_exit(unsigned long flags) ++{ ++#ifdef CONFIG_SMP ++ if (num_online_cpus() == 1) ++ goto out; ++ ++ if (atomic_dec_and_test(&__ipipe_critical_count)) { ++ spin_unlock(&__ipipe_cpu_barrier); ++ ++ while (!cpus_empty(__ipipe_cpu_sync_map)) ++ cpu_relax(); ++ ++ cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map); ++ clear_bit(0, &__ipipe_critical_lock); ++ smp_mb__after_clear_bit(); ++ } ++out: ++#endif /* CONFIG_SMP */ ++ ++ local_irq_restore_hw(flags); ++} ++ ++static inline void __fixup_if(int s, struct pt_regs *regs) ++{ ++ /* ++ * Have the saved hw state look like the domain stall bit, so ++ * that __ipipe_unstall_iret_root() restores the proper ++ * pipeline state for the root stage upon exit. ++ */ ++ if (s) ++ regs->flags &= ~X86_EFLAGS_IF; ++ else ++ regs->flags |= X86_EFLAGS_IF; ++} ++ ++#ifdef CONFIG_X86_32 ++ ++/* ++ * Check the stall bit of the root domain to make sure the existing ++ * preemption opportunity upon in-kernel resumption could be ++ * exploited. In case a rescheduling could take place, the root stage ++ * is stalled before the hw interrupts are re-enabled. This routine ++ * must be called with hw interrupts off. ++ */ ++ ++asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs) ++{ ++ if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) ++ /* Root stage is stalled: rescheduling denied. */ ++ return 0; ++ ++ __ipipe_stall_root(); ++ trace_hardirqs_off(); ++ local_irq_enable_hw_notrace(); ++ ++ return 1; /* Ok, may reschedule now. */ ++} ++ ++asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ /* Emulate IRET's handling of the interrupt flag. */ ++ ++ local_irq_disable_hw(); ++ ++ p = ipipe_root_cpudom_ptr(); ++ ++ /* ++ * Restore the software state as it used to be on kernel ++ * entry. CAUTION: NMIs must *not* return through this ++ * emulation. ++ */ ++ if (raw_irqs_disabled_flags(regs.flags)) { ++ if (!__test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) ++ trace_hardirqs_off(); ++ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) ++ regs.flags |= X86_EFLAGS_IF; ++ } else { ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) { ++ trace_hardirqs_on(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ } ++ /* ++ * We could have received and logged interrupts while ++ * stalled in the syscall path: play the log now to ++ * release any pending event. The SYNC_BIT prevents ++ * infinite recursion in case of flooding. ++ */ ++ if (unlikely(__ipipe_ipending_p(p))) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ } ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ipipe_trace_end(0x8000000D); ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++} ++ ++#else /* !CONFIG_X86_32 */ ++ ++#ifdef CONFIG_PREEMPT ++ ++asmlinkage void preempt_schedule_irq(void); ++ ++void __ipipe_preempt_schedule_irq(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ /* ++ * We have no IRQ state fixup on entry to exceptions in ++ * x86_64, so we have to stall the root stage before ++ * rescheduling. ++ */ ++ BUG_ON(!irqs_disabled_hw()); ++ local_irq_save(flags); ++ local_irq_enable_hw(); ++ preempt_schedule_irq(); /* Ok, may reschedule now. */ ++ local_irq_disable_hw(); ++ ++ /* ++ * Flush any pending interrupt that may have been logged after ++ * preempt_schedule_irq() stalled the root stage before ++ * returning to us, and now. ++ */ ++ p = ipipe_root_cpudom_ptr(); ++ if (unlikely(__ipipe_ipending_p(p))) { ++ add_preempt_count(PREEMPT_ACTIVE); ++ trace_hardirqs_on(); ++ clear_bit(IPIPE_STALL_FLAG, &p->status); ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ sub_preempt_count(PREEMPT_ACTIVE); ++ } ++ ++ __local_irq_restore_nosync(flags); ++} ++ ++#endif /* CONFIG_PREEMPT */ ++ ++#endif /* !CONFIG_X86_32 */ ++ ++void __ipipe_halt_root(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ /* Emulate sti+hlt sequence over the root domain. */ ++ ++ local_irq_disable_hw(); ++ ++ p = ipipe_root_cpudom_ptr(); ++ ++ trace_hardirqs_on(); ++ clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(__ipipe_ipending_p(p))) { ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ local_irq_enable_hw(); ++ } else { ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ipipe_trace_end(0x8000000E); ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++ asm volatile("sti; hlt": : :"memory"); ++ } ++} ++ ++static void do_machine_check_vector(struct pt_regs *regs, long error_code) ++{ ++#ifdef CONFIG_X86_MCE ++#ifdef CONFIG_X86_32 ++ extern void (*machine_check_vector)(struct pt_regs *, long error_code); ++ machine_check_vector(regs, error_code); ++#else ++ do_machine_check(regs, error_code); ++#endif ++#endif /* CONFIG_X86_MCE */ ++} ++ ++/* Work around genksyms's issue with over-qualification in decls. */ ++ ++typedef void dotraplinkage __ipipe_exhandler(struct pt_regs *, long); ++ ++typedef __ipipe_exhandler *__ipipe_exptr; ++ ++static __ipipe_exptr __ipipe_std_extable[] = { ++ ++ [ex_do_divide_error] = &do_divide_error, ++ [ex_do_overflow] = &do_overflow, ++ [ex_do_bounds] = &do_bounds, ++ [ex_do_invalid_op] = &do_invalid_op, ++ [ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun, ++ [ex_do_invalid_TSS] = &do_invalid_TSS, ++ [ex_do_segment_not_present] = &do_segment_not_present, ++ [ex_do_stack_segment] = &do_stack_segment, ++ [ex_do_general_protection] = do_general_protection, ++ [ex_do_page_fault] = (__ipipe_exptr)&do_page_fault, ++ [ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug, ++ [ex_do_coprocessor_error] = &do_coprocessor_error, ++ [ex_do_alignment_check] = &do_alignment_check, ++ [ex_machine_check_vector] = &do_machine_check_vector, ++ [ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error, ++ [ex_do_device_not_available] = &do_device_not_available, ++#ifdef CONFIG_X86_32 ++ [ex_do_iret_error] = &do_iret_error, ++#endif ++}; ++ ++#ifdef CONFIG_KGDB ++#include ++ ++static int __ipipe_xlate_signo[] = { ++ ++ [ex_do_divide_error] = SIGFPE, ++ [ex_do_debug] = SIGTRAP, ++ [2] = -1, ++ [ex_do_int3] = SIGTRAP, ++ [ex_do_overflow] = SIGSEGV, ++ [ex_do_bounds] = SIGSEGV, ++ [ex_do_invalid_op] = SIGILL, ++ [ex_do_device_not_available] = -1, ++ [8] = -1, ++ [ex_do_coprocessor_segment_overrun] = SIGFPE, ++ [ex_do_invalid_TSS] = SIGSEGV, ++ [ex_do_segment_not_present] = SIGBUS, ++ [ex_do_stack_segment] = SIGBUS, ++ [ex_do_general_protection] = SIGSEGV, ++ [ex_do_page_fault] = SIGSEGV, ++ [ex_do_spurious_interrupt_bug] = -1, ++ [ex_do_coprocessor_error] = -1, ++ [ex_do_alignment_check] = SIGBUS, ++ [ex_machine_check_vector] = -1, ++ [ex_do_simd_coprocessor_error] = -1, ++ [20 ... 31] = -1, ++#ifdef CONFIG_X86_32 ++ [ex_do_iret_error] = SIGSEGV, ++#endif ++}; ++#endif /* CONFIG_KGDB */ ++ ++int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector) ++{ ++ bool root_entry = false; ++ unsigned long flags = 0; ++ unsigned long cr2 = 0; ++ ++ if (ipipe_root_domain_p) { ++ root_entry = true; ++ ++ local_save_flags(flags); ++ /* ++ * Replicate hw interrupt state into the virtual mask ++ * before calling the I-pipe event handler over the ++ * root domain. Also required later when calling the ++ * Linux exception handler. ++ */ ++ if (irqs_disabled_hw()) ++ local_irq_disable(); ++ } ++#ifdef CONFIG_KGDB ++ /* catch exception KGDB is interested in over non-root domains */ ++ else if (__ipipe_xlate_signo[vector] >= 0 && ++ !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], ++ error_code, regs)) ++ return 1; ++#endif /* CONFIG_KGDB */ ++ ++ if (vector == ex_do_page_fault) ++ cr2 = native_read_cr2(); ++ ++ if (unlikely(ipipe_trap_notify(vector, regs))) { ++ if (root_entry) ++ local_irq_restore_nosync(flags); ++ return 1; ++ } ++ ++ if (likely(ipipe_root_domain_p)) { ++ /* ++ * In case we faulted in the iret path, regs.flags do not ++ * match the root domain state. The fault handler or the ++ * low-level return code may evaluate it. Fix this up, either ++ * by the root state sampled on entry or, if we migrated to ++ * root, with the current state. ++ */ ++ __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) : ++ raw_irqs_disabled(), regs); ++ } else { ++ /* Detect unhandled faults over non-root domains. */ ++ struct ipipe_domain *ipd = ipipe_current_domain; ++ ++ /* Switch to root so that Linux can handle the fault cleanly. */ ++ __ipipe_current_domain = ipipe_root_domain; ++ ++ ipipe_trace_panic_freeze(); ++ ++ /* Always warn about user land and unfixable faults. */ ++ if ((error_code & 4) || !search_exception_tables(instruction_pointer(regs))) { ++ printk(KERN_ERR "BUG: Unhandled exception over domain" ++ " %s at 0x%lx - switching to ROOT\n", ++ ipd->name, instruction_pointer(regs)); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++#ifdef CONFIG_IPIPE_DEBUG ++ /* Also report fixable ones when debugging is enabled. */ ++ } else { ++ printk(KERN_WARNING "WARNING: Fixable exception over " ++ "domain %s at 0x%lx - switching to ROOT\n", ++ ipd->name, instruction_pointer(regs)); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++#endif /* CONFIG_IPIPE_DEBUG */ ++ } ++ } ++ ++ if (vector == ex_do_page_fault) ++ write_cr2(cr2); ++ ++ __ipipe_std_extable[vector](regs, error_code); ++ ++ /* ++ * Relevant for 64-bit: Restore root domain state as the low-level ++ * return code will not align it to regs.flags. ++ */ ++ if (root_entry) ++ local_irq_restore_nosync(flags); ++ ++ return 0; ++} ++ ++int __ipipe_divert_exception(struct pt_regs *regs, int vector) ++{ ++ bool root_entry = false; ++ unsigned long flags = 0; ++ ++ if (ipipe_root_domain_p) { ++ root_entry = true; ++ ++ local_save_flags(flags); ++ ++ if (irqs_disabled_hw()) { ++ /* ++ * Same root state handling as in ++ * __ipipe_handle_exception. ++ */ ++ local_irq_disable(); ++ } ++ } ++#ifdef CONFIG_KGDB ++ /* catch int1 and int3 over non-root domains */ ++ else { ++#ifdef CONFIG_X86_32 ++ if (vector != ex_do_device_not_available) ++#endif ++ { ++ unsigned int condition = 0; ++ ++ if (vector == 1) ++ get_debugreg(condition, 6); ++ if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs)) ++ return 1; ++ } ++ } ++#endif /* CONFIG_KGDB */ ++ ++ if (unlikely(ipipe_trap_notify(vector, regs))) { ++ if (root_entry) ++ local_irq_restore_nosync(flags); ++ return 1; ++ } ++ ++ /* see __ipipe_handle_exception */ ++ if (likely(ipipe_root_domain_p)) ++ __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) : ++ raw_irqs_disabled(), regs); ++ /* ++ * No need to restore root state in the 64-bit case, the Linux handler ++ * and the return code will take care of it. ++ */ ++ ++ return 0; ++} ++ ++int __ipipe_syscall_root(struct pt_regs *regs) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ int ret; ++ ++ /* ++ * This routine either returns: ++ * 0 -- if the syscall is to be passed to Linux; ++ * >0 -- if the syscall should not be passed to Linux, and no ++ * tail work should be performed; ++ * <0 -- if the syscall should not be passed to Linux but the ++ * tail work has to be performed (for handling signals etc). ++ */ ++ ++ if (!__ipipe_syscall_watched_p(current, regs->orig_ax) || ++ !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL)) ++ return 0; ++ ++ ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs); ++ ++ local_irq_save_hw(flags); ++ ++ if (current->ipipe_flags & PF_EVTRET) { ++ current->ipipe_flags &= ~PF_EVTRET; ++ __ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs); ++ } ++ ++ if (!ipipe_root_domain_p) { ++#ifdef CONFIG_X86_32 ++ local_irq_restore_hw(flags); ++#endif ++ return 1; ++ } ++ ++ p = ipipe_root_cpudom_ptr(); ++#ifdef CONFIG_X86_32 ++ /* ++ * Fix-up only required on 32-bit as only here the IRET return code ++ * will evaluate the flags. ++ */ ++ __fixup_if(test_bit(IPIPE_STALL_FLAG, &p->status), regs); ++#endif ++ /* ++ * If allowed, sync pending VIRQs before _TIF_NEED_RESCHED is ++ * tested. ++ */ ++ if (__ipipe_ipending_p(p)) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOVIRT); ++#ifdef CONFIG_X86_64 ++ if (!ret) ++#endif ++ local_irq_restore_hw(flags); ++ ++ return -ret; ++} ++ ++/* ++ * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic ++ * interrupt protection log is maintained here for each domain. Hw ++ * interrupts are off on entry. ++ */ ++int __ipipe_handle_irq(struct pt_regs *regs) ++{ ++ struct ipipe_domain *this_domain, *next_domain; ++ unsigned int vector = regs->orig_ax, irq; ++ struct list_head *head, *pos; ++ struct pt_regs *tick_regs; ++ int m_ack; ++ ++ if ((long)regs->orig_ax < 0) { ++ vector = ~vector; ++#ifdef CONFIG_X86_LOCAL_APIC ++ if (vector >= FIRST_SYSTEM_VECTOR) ++ irq = ipipe_apic_vector_irq(vector); ++#ifdef CONFIG_SMP ++ else if (vector == IRQ_MOVE_CLEANUP_VECTOR) ++ irq = vector; ++#endif /* CONFIG_SMP */ ++ else ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ irq = __get_cpu_var(vector_irq)[vector]; ++ m_ack = 0; ++ } else { /* This is a self-triggered one. */ ++ irq = vector; ++ m_ack = 1; ++ } ++ ++ this_domain = ipipe_current_domain; ++ ++ if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control)) ++ head = &this_domain->p_link; ++ else { ++ head = __ipipe_pipeline.next; ++ next_domain = list_entry(head, struct ipipe_domain, p_link); ++ if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) { ++ if (!m_ack && next_domain->irqs[irq].acknowledge) ++ next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq)); ++ __ipipe_dispatch_wired(next_domain, irq); ++ goto finalize_nosync; ++ } ++ } ++ ++ /* Ack the interrupt. */ ++ ++ pos = head; ++ ++ while (pos != &__ipipe_pipeline) { ++ next_domain = list_entry(pos, struct ipipe_domain, p_link); ++ if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) { ++ __ipipe_set_irq_pending(next_domain, irq); ++ if (!m_ack && next_domain->irqs[irq].acknowledge) { ++ next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq)); ++ m_ack = 1; ++ } ++ } ++ if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) ++ break; ++ pos = next_domain->p_link.next; ++ } ++ ++ /* ++ * If the interrupt preempted the head domain, then do not ++ * even try to walk the pipeline, unless an interrupt is ++ * pending for it. ++ */ ++ if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) && ++ !__ipipe_ipending_p(ipipe_head_cpudom_ptr())) ++ goto finalize_nosync; ++ ++ /* ++ * Now walk the pipeline, yielding control to the highest ++ * priority domain that has pending interrupt(s) or ++ * immediately to the current domain if the interrupt has been ++ * marked as 'sticky'. This search does not go beyond the ++ * current domain in the pipeline. ++ */ ++ ++ __ipipe_walk_pipeline(head); ++ ++finalize_nosync: ++ ++ /* ++ * Given our deferred dispatching model for regular IRQs, we ++ * only record CPU regs for the last timer interrupt, so that ++ * the timer handler charges CPU times properly. It is assumed ++ * that other interrupt handlers don't actually care for such ++ * information. ++ */ ++ ++ if (irq == __ipipe_tick_irq) { ++ tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs); ++ tick_regs->flags = regs->flags; ++ tick_regs->cs = regs->cs; ++ tick_regs->ip = regs->ip; ++ tick_regs->bp = regs->bp; ++#ifdef CONFIG_X86_64 ++ tick_regs->ss = regs->ss; ++ tick_regs->sp = regs->sp; ++#endif ++ if (!__ipipe_root_domain_p) ++ tick_regs->flags &= ~X86_EFLAGS_IF; ++ } ++ ++ if (user_mode(regs) && (current->ipipe_flags & PF_EVTRET) != 0) { ++ current->ipipe_flags &= ~PF_EVTRET; ++ __ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs); ++ } ++ ++ if (!__ipipe_root_domain_p || ++ test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) ++ return 0; ++ ++#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) ++ /* ++ * Prevent a spurious rescheduling from being triggered on ++ * preemptible kernels along the way out through ++ * ret_from_intr. ++ */ ++ if ((long)regs->orig_ax < 0) ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++#endif /* CONFIG_SMP */ ++ ++ return 1; ++} ++ ++int __ipipe_check_tickdev(const char *devname) ++{ ++#ifdef CONFIG_X86_LOCAL_APIC ++ if (!strcmp(devname, "lapic")) ++ return __ipipe_check_lapic(); ++#endif ++ ++ return 1; ++} ++ ++EXPORT_SYMBOL(__ipipe_tick_irq); ++ ++EXPORT_SYMBOL_GPL(irq_to_desc); ++struct task_struct *__switch_to(struct task_struct *prev_p, ++ struct task_struct *next_p); ++EXPORT_SYMBOL_GPL(__switch_to); ++EXPORT_SYMBOL_GPL(show_stack); ++ ++EXPORT_PER_CPU_SYMBOL_GPL(init_tss); ++#ifdef CONFIG_SMP ++EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate); ++#endif /* CONFIG_SMP */ ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) ++EXPORT_SYMBOL(tasklist_lock); ++#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ ++ ++#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64) ++EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); ++#endif ++ ++EXPORT_SYMBOL(__ipipe_halt_root); +diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c +index 04bbd52..76d2fa3 100644 +--- a/arch/x86/kernel/irq.c ++++ b/arch/x86/kernel/irq.c +@@ -38,7 +38,7 @@ void ack_bad_irq(unsigned int irq) + * completely. + * But only ack when the APIC is enabled -AK + */ +- ack_APIC_irq(); ++ __ack_APIC_irq(); + } + + #define irq_stats(x) (&per_cpu(irq_stat, x)) +@@ -231,11 +231,12 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) + unsigned vector = ~regs->orig_ax; + unsigned irq; + ++ irq = __get_cpu_var(vector_irq)[vector]; ++ __ipipe_move_root_irq(irq); ++ + exit_idle(); + irq_enter(); + +- irq = __get_cpu_var(vector_irq)[vector]; +- + if (!handle_irq(irq, regs)) { + ack_APIC_irq(); + +diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c +index 40f3077..e3604ee 100644 +--- a/arch/x86/kernel/irqinit.c ++++ b/arch/x86/kernel/irqinit.c +@@ -159,11 +159,13 @@ static void __init smp_intr_init(void) + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); ++#ifndef CONFIG_IPIPE + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); ++#endif + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); +@@ -178,6 +180,10 @@ static void __init smp_intr_init(void) + + /* IPI used for rebooting/stopping */ + alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); ++#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32) ++ /* IPI for critical lock */ ++ alloc_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX); ++#endif + #endif + #endif /* CONFIG_SMP */ + } +@@ -212,6 +218,12 @@ static void __init apic_intr_init(void) + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); + # endif + ++#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32) ++ alloc_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0); ++ alloc_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1); ++ alloc_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2); ++ alloc_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3); ++#endif + #endif + } + +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 5fd5b07..09a5fa4 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -35,7 +35,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); ++ } else { ++#ifdef CONFIG_IPIPE ++ dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, ++ GFP_KERNEL); ++ if (!dst->thread.xstate) ++ return -ENOMEM; ++#endif + } ++ + return 0; + } + +@@ -61,6 +69,10 @@ void arch_task_cache_init(void) + kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC | SLAB_NOTRACK, NULL); ++#ifdef CONFIG_IPIPE ++ current->thread.xstate = kmem_cache_alloc(task_xstate_cachep, ++ GFP_KERNEL); ++#endif + } + + /* +@@ -309,7 +321,7 @@ EXPORT_SYMBOL(default_idle); + + void stop_this_cpu(void *dummy) + { +- local_irq_disable(); ++ local_irq_disable_hw(); + /* + * Remove this CPU: + */ +@@ -536,6 +548,11 @@ static void c1e_idle(void) + + void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) + { ++#ifdef CONFIG_IPIPE ++#define default_to_mwait force_mwait ++#else ++#define default_to_mwait 1 ++#endif + #ifdef CONFIG_SMP + if (pm_idle == poll_idle && smp_num_siblings > 1) { + printk(KERN_WARNING "WARNING: polling idle and HT enabled," +@@ -545,7 +562,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) + if (pm_idle) + return; + +- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { ++ if (default_to_mwait && cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { + /* + * One CPU supports mwait => All CPUs supports mwait + */ +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 4cf7956..fff349c 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -305,10 +305,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) + regs->cs = __USER_CS; + regs->ip = new_ip; + regs->sp = new_sp; ++#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */ + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); ++#endif + } + EXPORT_SYMBOL_GPL(start_thread); + +@@ -345,7 +347,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + { + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; +- int cpu = smp_processor_id(); ++ int cpu = raw_smp_processor_id(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + bool preload_fpu; + +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index 868fdb4..ddf025d 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -58,6 +58,8 @@ asmlinkage extern void ret_from_fork(void); + DEFINE_PER_CPU(unsigned long, old_rsp); + static DEFINE_PER_CPU(unsigned char, is_idle); + ++asmlinkage extern void thread_return(void); ++ + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; + + static ATOMIC_NOTIFIER_HEAD(idle_notifier); +@@ -292,6 +294,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, + p->thread.sp = (unsigned long) childregs; + p->thread.sp0 = (unsigned long) (childregs+1); + p->thread.usersp = me->thread.usersp; ++ p->thread.rip = (unsigned long) thread_return; + + set_tsk_thread_flag(p, TIF_FORK); + +@@ -357,10 +360,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) + regs->ss = __USER_DS; + regs->flags = 0x200; + set_fs(USER_DS); ++#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */ + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); ++#endif + } + EXPORT_SYMBOL_GPL(start_thread); + +@@ -379,7 +384,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + { + struct thread_struct *prev = &prev_p->thread; + struct thread_struct *next = &next_p->thread; +- int cpu = smp_processor_id(); ++ int cpu = raw_smp_processor_id(); + struct tss_struct *tss = &per_cpu(init_tss, cpu); + unsigned fsindex, gsindex; + bool preload_fpu; +diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c +index ec1de97..a3f5bd6 100644 +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -184,9 +184,9 @@ static void native_smp_send_stop(void) + udelay(1); + } + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + disable_local_APIC(); +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + /* +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index 28e963d..9eee566 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -266,7 +266,7 @@ static void __cpuinit smp_callin(void) + /* + * Activate a secondary processor. + */ +-notrace static void __cpuinit start_secondary(void *unused) ++static void __cpuinit start_secondary(void *unused) + { + /* + * Don't put *anything* before cpu_init(), SMP booting is too +@@ -837,7 +837,7 @@ do_rest: + int __cpuinit native_cpu_up(unsigned int cpu) + { + int apicid = apic->cpu_present_to_apicid(cpu); +- unsigned long flags; ++ unsigned long flags, _flags; + int err; + + WARN_ON(irqs_disabled()); +@@ -889,9 +889,9 @@ int __cpuinit native_cpu_up(unsigned int cpu) + * Check TSC synchronization with the AP (keep irqs disabled + * while doing so): + */ +- local_irq_save(flags); ++ local_irq_save_full(flags, _flags); + check_tsc_sync_source(cpu); +- local_irq_restore(flags); ++ local_irq_restore_full(flags, _flags); + + while (!cpu_online(cpu)) { + cpu_relax(); +diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c +index be25734..2b61ebd 100644 +--- a/arch/x86/kernel/time.c ++++ b/arch/x86/kernel/time.c +@@ -70,11 +70,12 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) + * manually to deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ +- spin_lock(&i8259A_lock); ++ unsigned long flags; ++ spin_lock_irqsave_cond(&i8259A_lock,flags); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); +- spin_unlock(&i8259A_lock); ++ spin_unlock_irqrestore_cond(&i8259A_lock,flags); + } + + global_clock_event->event_handler(global_clock_event); +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index 7e37dce..38ff3e2 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -805,6 +805,7 @@ void __math_state_restore(void) + */ + if (unlikely(restore_fpu_checking(tsk))) { + stts(); ++ local_irq_enable_hw_cond(); + force_sig(SIGSEGV, tsk); + return; + } +@@ -827,6 +828,7 @@ asmlinkage void math_state_restore(void) + { + struct thread_info *thread = current_thread_info(); + struct task_struct *tsk = thread->task; ++ unsigned long flags; + + if (!tsk_used_math(tsk)) { + local_irq_enable(); +@@ -843,9 +845,11 @@ asmlinkage void math_state_restore(void) + local_irq_disable(); + } + ++ local_irq_save_hw_cond(flags); + clts(); /* Allow maths ops (or we recurse) */ + + __math_state_restore(); ++ local_irq_restore_hw_cond(flags); + } + EXPORT_SYMBOL_GPL(math_state_restore); + +diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c +index 9c4e625..f0f25ab 100644 +--- a/arch/x86/kernel/vm86_32.c ++++ b/arch/x86/kernel/vm86_32.c +@@ -148,12 +148,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) + do_exit(SIGSEGV); + } + ++ local_irq_disable_hw_cond(); + tss = &per_cpu(init_tss, get_cpu()); + current->thread.sp0 = current->thread.saved_sp0; + current->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, ¤t->thread); + current->thread.saved_sp0 = 0; + put_cpu(); ++ local_irq_enable_hw_cond(); + + ret = KVM86->regs32; + +@@ -324,12 +326,14 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk + tsk->thread.saved_fs = info->regs32->fs; + tsk->thread.saved_gs = get_user_gs(info->regs32); + ++ local_irq_disable_hw_cond(); + tss = &per_cpu(init_tss, get_cpu()); + tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + if (cpu_has_sep) + tsk->thread.sysenter_cs = 0; + load_sp0(tss, &tsk->thread); + put_cpu(); ++ local_irq_enable_hw_cond(); + + tsk->thread.screen_bitmap = info->screen_bitmap; + if (info->flags & VM86_SCREEN_BITMAP) +diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c +index c9f2d9b..78d780a 100644 +--- a/arch/x86/lib/mmx_32.c ++++ b/arch/x86/lib/mmx_32.c +@@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) + void *p; + int i; + +- if (unlikely(in_interrupt())) ++ if (unlikely(!ipipe_root_domain_p || in_interrupt())) + return __memcpy(to, from, len); + + p = to; +diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S +index bf9a7d5..98609ae 100644 +--- a/arch/x86/lib/thunk_64.S ++++ b/arch/x86/lib/thunk_64.S +@@ -65,6 +65,10 @@ + thunk lockdep_sys_exit_thunk,lockdep_sys_exit + #endif + ++#ifdef CONFIG_IPIPE ++ thunk_retrax __ipipe_syscall_root_thunk,__ipipe_syscall_root ++#endif ++ + /* SAVE_ARGS below is used only for the .cfi directives it contains. */ + CFI_STARTPROC + SAVE_ARGS +diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c +index f4cee90..d678a7c 100644 +--- a/arch/x86/mm/fault.c ++++ b/arch/x86/mm/fault.c +@@ -1,3 +1,4 @@ ++ + /* + * Copyright (C) 1995 Linus Torvalds + * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. +@@ -323,43 +324,9 @@ out: + + #else /* CONFIG_X86_64: */ + +-void vmalloc_sync_all(void) +-{ +- unsigned long address; +- +- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; +- address += PGDIR_SIZE) { +- +- const pgd_t *pgd_ref = pgd_offset_k(address); +- unsigned long flags; +- struct page *page; +- +- if (pgd_none(*pgd_ref)) +- continue; +- +- spin_lock_irqsave(&pgd_lock, flags); +- list_for_each_entry(page, &pgd_list, lru) { +- pgd_t *pgd; +- pgd = (pgd_t *)page_address(page) + pgd_index(address); +- if (pgd_none(*pgd)) +- set_pgd(pgd, *pgd_ref); +- else +- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); +- } +- spin_unlock_irqrestore(&pgd_lock, flags); +- } +-} +- +-/* +- * 64-bit: +- * +- * Handle a fault on the vmalloc area +- * +- * This assumes no large pages in there. +- */ +-static noinline int vmalloc_fault(unsigned long address) ++static inline int vmalloc_sync_one(pgd_t *pgd, unsigned long address) + { +- pgd_t *pgd, *pgd_ref; ++ pgd_t *pgd_ref; + pud_t *pud, *pud_ref; + pmd_t *pmd, *pmd_ref; + pte_t *pte, *pte_ref; +@@ -373,7 +340,6 @@ static noinline int vmalloc_fault(unsigned long address) + * happen within a race in page table update. In the later + * case just flush: + */ +- pgd = pgd_offset(current->active_mm, address); + pgd_ref = pgd_offset_k(address); + if (pgd_none(*pgd_ref)) + return -1; +@@ -421,6 +387,46 @@ static noinline int vmalloc_fault(unsigned long address) + return 0; + } + ++void vmalloc_sync_all(void) ++{ ++ unsigned long address; ++ ++ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; ++ address += PGDIR_SIZE) { ++ ++ const pgd_t *pgd_ref = pgd_offset_k(address); ++ unsigned long flags; ++ struct page *page; ++ ++ if (pgd_none(*pgd_ref)) ++ continue; ++ ++ spin_lock_irqsave(&pgd_lock, flags); ++ list_for_each_entry(page, &pgd_list, lru) { ++ pgd_t *pgd; ++ pgd = (pgd_t *)page_address(page) + pgd_index(address); ++ if (pgd_none(*pgd)) ++ set_pgd(pgd, *pgd_ref); ++ else ++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); ++ } ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ } ++} ++ ++/* ++ * 64-bit: ++ * ++ * Handle a fault on the vmalloc area ++ * ++ * This assumes no large pages in there. ++ */ ++static noinline int vmalloc_fault(unsigned long address) ++{ ++ pgd_t *pgd = pgd = pgd_offset(current->active_mm, address); ++ return vmalloc_sync_one(pgd, address); ++} ++ + static const char errata93_warning[] = + KERN_ERR + "******* Your BIOS seems to not contain a fix for K8 errata #93\n" +@@ -958,6 +964,9 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) + /* Get the faulting address: */ + address = read_cr2(); + ++ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) ++ local_irq_enable_hw_cond(); ++ + /* + * Detect and handle instructions that would cause a page fault for + * both a tracked kernel page and a userspace page. +@@ -1137,3 +1146,43 @@ good_area: + + up_read(&mm->mmap_sem); + } ++ ++#ifdef CONFIG_IPIPE ++void __ipipe_pin_range_globally(unsigned long start, unsigned long end) ++{ ++#ifdef CONFIG_X86_32 ++ unsigned long next, addr = start; ++ ++ do { ++ unsigned long flags; ++ struct page *page; ++ ++ next = pgd_addr_end(addr, end); ++ spin_lock_irqsave(&pgd_lock, flags); ++ list_for_each_entry(page, &pgd_list, lru) ++ vmalloc_sync_one(page_address(page), addr); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++ ++ } while (addr = next, addr != end); ++#else ++ unsigned long next, addr = start; ++ int ret = 0; ++ ++ do { ++ struct page *page; ++ ++ next = pgd_addr_end(addr, end); ++ spin_lock(&pgd_lock); ++ list_for_each_entry(page, &pgd_list, lru) { ++ pgd_t *pgd; ++ pgd = (pgd_t *)page_address(page) + pgd_index(addr); ++ ret = vmalloc_sync_one(pgd, addr); ++ if (ret) ++ break; ++ } ++ spin_unlock(&pgd_lock); ++ addr = next; ++ } while (!ret && addr != end); ++#endif ++} ++#endif /* CONFIG_IPIPE */ +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 36fe08e..32adecd 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -57,11 +57,15 @@ static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; + */ + void leave_mm(int cpu) + { ++ unsigned long flags; ++ + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + BUG(); ++ local_irq_save_hw_cond(flags); + cpumask_clear_cpu(cpu, + mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); + load_cr3(swapper_pg_dir); ++ local_irq_restore_hw_cond(flags); + } + EXPORT_SYMBOL_GPL(leave_mm); + +@@ -192,6 +196,9 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, + apic->send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); + ++#ifdef CONFIG_IPIPE ++ WARN_ON_ONCE(irqs_disabled_hw()); ++#endif + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) + cpu_relax(); + } +diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c +index 737a1c4..15e81de 100644 +--- a/drivers/pci/htirq.c ++++ b/drivers/pci/htirq.c +@@ -21,7 +21,7 @@ + * With multiple simultaneous hypertransport irq devices it might pay + * to make this more fine grained. But start with simple, stupid, and correct. + */ +-static DEFINE_SPINLOCK(ht_irq_lock); ++static IPIPE_DEFINE_SPINLOCK(ht_irq_lock); + + struct ht_irq_cfg { + struct pci_dev *dev; +diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c +index 5ed1b82..d57ad7d 100644 +--- a/drivers/serial/8250.c ++++ b/drivers/serial/8250.c +@@ -3016,6 +3016,53 @@ static int serial8250_resume(struct platform_device *dev) + return 0; + } + ++#if defined(CONFIG_IPIPE_DEBUG) && defined(CONFIG_SERIAL_8250_CONSOLE) ++ ++#include ++ ++void __weak __ipipe_serial_debug(const char *fmt, ...) ++{ ++ struct uart_8250_port *up = &serial8250_ports[0]; ++ unsigned int ier, count; ++ unsigned long flags; ++ char buf[128]; ++ va_list ap; ++ ++ va_start(ap, fmt); ++ vsprintf(buf, fmt, ap); ++ va_end(ap); ++ count = strlen(buf); ++ ++ touch_nmi_watchdog(); ++ ++ local_irq_save_hw(flags); ++ ++ /* ++ * First save the IER then disable the interrupts ++ */ ++ ier = serial_in(up, UART_IER); ++ ++ if (up->capabilities & UART_CAP_UUE) ++ serial_out(up, UART_IER, UART_IER_UUE); ++ else ++ serial_out(up, UART_IER, 0); ++ ++ uart_console_write(&up->port, buf, count, serial8250_console_putchar); ++ ++ /* ++ * Finally, wait for transmitter to become empty ++ * and restore the IER ++ */ ++ wait_for_xmitr(up, BOTH_EMPTY); ++ serial_out(up, UART_IER, ier); ++ ++ local_irq_restore_hw(flags); ++} ++ ++EXPORT_SYMBOL(__ipipe_serial_debug); ++ ++#endif ++ + static struct platform_driver serial8250_isa_driver = { + .probe = serial8250_probe, + .remove = __devexit_p(serial8250_remove), +diff --git a/fs/exec.c b/fs/exec.c +index 56da15f..ab11e6b 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -715,6 +715,7 @@ static int exec_mmap(struct mm_struct *mm) + { + struct task_struct *tsk; + struct mm_struct * old_mm, *active_mm; ++ unsigned long flags; + + /* Notify parent that we're no longer interested in the old VM */ + tsk = current; +@@ -737,8 +738,10 @@ static int exec_mmap(struct mm_struct *mm) + task_lock(tsk); + active_mm = tsk->active_mm; + tsk->mm = mm; ++ ipipe_mm_switch_protect(flags); + tsk->active_mm = mm; + activate_mm(active_mm, mm); ++ ipipe_mm_switch_unprotect(flags); + task_unlock(tsk); + arch_pick_mmap_layout(mm); + if (old_mm) { +diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h +index c99c64d..5d01b93 100644 +--- a/include/asm-generic/atomic.h ++++ b/include/asm-generic/atomic.h +@@ -60,11 +60,11 @@ static inline int atomic_add_return(int i, atomic_t *v) + unsigned long flags; + int temp; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + temp = v->counter; + temp += i; + v->counter = temp; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + return temp; + } +@@ -82,11 +82,11 @@ static inline int atomic_sub_return(int i, atomic_t *v) + unsigned long flags; + int temp; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + temp = v->counter; + temp -= i; + v->counter = temp; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + return temp; + } +@@ -139,9 +139,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) + unsigned long flags; + + mask = ~mask; +- local_irq_save(flags); ++ local_irq_save_hw(flags); + *addr &= mask; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h +index c894646..8d42ffe 100644 +--- a/include/asm-generic/bitops/atomic.h ++++ b/include/asm-generic/bitops/atomic.h +@@ -21,20 +21,20 @@ extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; + * this is the substitute */ + #define _atomic_spin_lock_irqsave(l,f) do { \ + raw_spinlock_t *s = ATOMIC_HASH(l); \ +- local_irq_save(f); \ ++ local_irq_save_hw(f); \ + __raw_spin_lock(s); \ + } while(0) + + #define _atomic_spin_unlock_irqrestore(l,f) do { \ + raw_spinlock_t *s = ATOMIC_HASH(l); \ + __raw_spin_unlock(s); \ +- local_irq_restore(f); \ ++ local_irq_restore_hw(f); \ + } while(0) + + + #else +-# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) +-# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) ++# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save_hw(f); } while (0) ++# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore_hw(f); } while (0) + #endif + + /* +diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h +index b2ba2fc..ed01ab9 100644 +--- a/include/asm-generic/cmpxchg-local.h ++++ b/include/asm-generic/cmpxchg-local.h +@@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, + if (size == 8 && sizeof(unsigned long) != 8) + wrong_size_cmpxchg(ptr); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + switch (size) { + case 1: prev = *(u8 *)ptr; + if (prev == old) +@@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, + default: + wrong_size_cmpxchg(ptr); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + return prev; + } + +@@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, + u64 prev; + unsigned long flags; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + return prev; + } + +diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h +index 90079c3..65e872e 100644 +--- a/include/asm-generic/percpu.h ++++ b/include/asm-generic/percpu.h +@@ -56,6 +56,20 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; + #define __raw_get_cpu_var(var) \ + (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)) + ++#ifdef CONFIG_IPIPE ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++extern int __ipipe_check_percpu_access(void); ++#define __ipipe_local_cpu_offset \ ++ ({ \ ++ WARN_ON_ONCE(__ipipe_check_percpu_access()); \ ++ __my_cpu_offset; \ ++ }) ++#else ++#define __ipipe_local_cpu_offset __my_cpu_offset ++#endif ++#define __ipipe_get_cpu_var(var) \ ++ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __ipipe_local_cpu_offset)) ++#endif /* CONFIG_IPIPE */ + + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA + extern void setup_per_cpu_areas(void); +@@ -66,6 +80,7 @@ extern void setup_per_cpu_areas(void); + #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) + #define __get_cpu_var(var) per_cpu_var(var) + #define __raw_get_cpu_var(var) per_cpu_var(var) ++#define __ipipe_get_cpu_var(var) __raw_get_cpu_var(var) + + #endif /* SMP */ + +diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h +index 6d527ee..c997ef1 100644 +--- a/include/linux/hardirq.h ++++ b/include/linux/hardirq.h +@@ -183,24 +183,28 @@ extern void irq_enter(void); + */ + extern void irq_exit(void); + +-#define nmi_enter() \ +- do { \ +- ftrace_nmi_enter(); \ +- BUG_ON(in_nmi()); \ +- add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ +- lockdep_off(); \ +- rcu_nmi_enter(); \ +- trace_hardirq_enter(); \ ++#define nmi_enter() \ ++ do { \ ++ if (likely(!ipipe_test_foreign_stack())) { \ ++ ftrace_nmi_enter(); \ ++ BUG_ON(in_nmi()); \ ++ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ++ lockdep_off(); \ ++ rcu_nmi_enter(); \ ++ trace_hardirq_enter(); \ ++ } \ + } while (0) + +-#define nmi_exit() \ +- do { \ +- trace_hardirq_exit(); \ +- rcu_nmi_exit(); \ +- lockdep_on(); \ +- BUG_ON(!in_nmi()); \ +- sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ +- ftrace_nmi_exit(); \ ++#define nmi_exit() \ ++ do { \ ++ if (likely(!ipipe_test_foreign_stack())) { \ ++ trace_hardirq_exit(); \ ++ rcu_nmi_exit(); \ ++ lockdep_on(); \ ++ BUG_ON(!in_nmi()); \ ++ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ++ ftrace_nmi_exit(); \ ++ } \ + } while (0) + + #endif /* LINUX_HARDIRQ_H */ +diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h +new file mode 100644 +index 0000000..4144c79 +--- /dev/null ++++ b/include/linux/ipipe.h +@@ -0,0 +1,724 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe.h ++ * ++ * Copyright (C) 2002-2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_H ++#define __LINUX_IPIPE_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ++#include ++#include ++ ++static inline int ipipe_disable_context_check(int cpu) ++{ ++ return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0); ++} ++ ++static inline void ipipe_restore_context_check(int cpu, int old_state) ++{ ++ per_cpu(ipipe_percpu_context_check, cpu) = old_state; ++} ++ ++static inline void ipipe_context_check_off(void) ++{ ++ int cpu; ++ for_each_online_cpu(cpu) ++ per_cpu(ipipe_percpu_context_check, cpu) = 0; ++} ++ ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++static inline int ipipe_disable_context_check(int cpu) ++{ ++ return 0; ++} ++ ++static inline void ipipe_restore_context_check(int cpu, int old_state) { } ++ ++static inline void ipipe_context_check_off(void) { } ++ ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++#ifdef CONFIG_IPIPE ++ ++#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING ++#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ ++ (IPIPE_MINOR_NUMBER << 8) | \ ++ (IPIPE_PATCH_NUMBER)) ++ ++#ifndef BROKEN_BUILTIN_RETURN_ADDRESS ++#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) ++#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) ++#endif /* !BUILTIN_RETURN_ADDRESS */ ++ ++#define IPIPE_ROOT_PRIO 100 ++#define IPIPE_ROOT_ID 0 ++#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */ ++ ++#define IPIPE_RESET_TIMER 0x1 ++#define IPIPE_GRAB_TIMER 0x2 ++ ++/* Global domain flags */ ++#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ ++#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ ++ ++/* Interrupt control bits */ ++#define IPIPE_HANDLE_FLAG 0 ++#define IPIPE_PASS_FLAG 1 ++#define IPIPE_ENABLE_FLAG 2 ++#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG ++#define IPIPE_STICKY_FLAG 3 ++#define IPIPE_SYSTEM_FLAG 4 ++#define IPIPE_LOCK_FLAG 5 ++#define IPIPE_WIRED_FLAG 6 ++#define IPIPE_EXCLUSIVE_FLAG 7 ++ ++#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) ++#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) ++#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) ++#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK ++#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) ++#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) ++#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) ++#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) ++#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) ++ ++#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) ++#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK) ++ ++#define IPIPE_EVENT_SELF 0x80000000 ++ ++#define IPIPE_NR_CPUS NR_CPUS ++ ++/* This accessor assumes hw IRQs are off on SMP; allows assignment. */ ++#define __ipipe_current_domain __ipipe_get_cpu_var(ipipe_percpu_domain) ++/* This read-only accessor makes sure that hw IRQs are off on SMP. */ ++#define ipipe_current_domain \ ++ ({ \ ++ struct ipipe_domain *__ipd__; \ ++ unsigned long __flags__; \ ++ local_irq_save_hw_smp(__flags__); \ ++ __ipd__ = __ipipe_current_domain; \ ++ local_irq_restore_hw_smp(__flags__); \ ++ __ipd__; \ ++ }) ++ ++#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \ ++ (irq) < IPIPE_NR_IRQS) ++ ++#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) ++ ++struct irq_desc; ++ ++typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc); ++ ++typedef int (*ipipe_event_handler_t)(unsigned event, ++ struct ipipe_domain *from, ++ void *data); ++struct ipipe_domain { ++ ++ int slot; /* Slot number in percpu domain data array. */ ++ struct list_head p_link; /* Link in pipeline */ ++ ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ ++ unsigned long long evself; /* Self-monitored event bits. */ ++ ++ struct irqdesc { ++ unsigned long control; ++ ipipe_irq_ackfn_t acknowledge; ++ ipipe_irq_handler_t handler; ++ void *cookie; ++ } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; ++ ++ int priority; ++ void *pdd; ++ unsigned long flags; ++ unsigned domid; ++ const char *name; ++ struct mutex mutex; ++}; ++ ++#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */ ++ ++struct ipipe_domain_attr { ++ ++ unsigned domid; /* Domain identifier -- Magic value set by caller */ ++ const char *name; /* Domain name -- Warning: won't be dup'ed! */ ++ int priority; /* Priority in interrupt pipeline */ ++ void (*entry) (void); /* Domain entry point */ ++ void *pdd; /* Per-domain (opaque) data pointer */ ++}; ++ ++#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie ++#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler ++#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irqall, cpu)[irq] ++ ++extern unsigned __ipipe_printk_virq; ++ ++extern unsigned long __ipipe_virtual_irq_map; ++ ++extern struct list_head __ipipe_pipeline; ++ ++extern int __ipipe_event_monitors[]; ++ ++/* Private interface */ ++ ++void ipipe_init_early(void); ++ ++void ipipe_init(void); ++ ++#ifdef CONFIG_PROC_FS ++void ipipe_init_proc(void); ++ ++#ifdef CONFIG_IPIPE_TRACE ++void __ipipe_init_tracer(void); ++#else /* !CONFIG_IPIPE_TRACE */ ++#define __ipipe_init_tracer() do { } while(0) ++#endif /* CONFIG_IPIPE_TRACE */ ++ ++#else /* !CONFIG_PROC_FS */ ++#define ipipe_init_proc() do { } while(0) ++#endif /* CONFIG_PROC_FS */ ++ ++void __ipipe_init_stage(struct ipipe_domain *ipd); ++ ++void __ipipe_cleanup_domain(struct ipipe_domain *ipd); ++ ++void __ipipe_add_domain_proc(struct ipipe_domain *ipd); ++ ++void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); ++ ++void __ipipe_flush_printk(unsigned irq, void *cookie); ++ ++void __ipipe_walk_pipeline(struct list_head *pos); ++ ++void __ipipe_pend_irq(unsigned irq, struct list_head *head); ++ ++int __ipipe_dispatch_event(unsigned event, void *data); ++ ++void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq); ++ ++void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq); ++ ++void __ipipe_sync_stage(int dovirt); ++ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq); ++ ++void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq); ++ ++void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq); ++ ++void __ipipe_pin_range_globally(unsigned long start, unsigned long end); ++ ++/* Must be called hw IRQs off. */ ++static inline void ipipe_irq_lock(unsigned irq) ++{ ++ __ipipe_lock_irq(__ipipe_current_domain, ipipe_processor_id(), irq); ++} ++ ++/* Must be called hw IRQs off. */ ++static inline void ipipe_irq_unlock(unsigned irq) ++{ ++ __ipipe_unlock_irq(__ipipe_current_domain, irq); ++} ++ ++#ifndef __ipipe_sync_pipeline ++#define __ipipe_sync_pipeline(dovirt) __ipipe_sync_stage(dovirt) ++#endif ++ ++#ifndef __ipipe_run_irqtail ++#define __ipipe_run_irqtail() do { } while(0) ++#endif ++ ++#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next) ++ ++#define __ipipe_ipending_p(p) ((p)->irqpend_himap != 0) ++ ++/* ++ * Keep the following as a macro, so that client code could check for ++ * the support of the invariant pipeline head optimization. ++ */ ++#define __ipipe_pipeline_head() \ ++ list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link) ++ ++#define local_irq_enable_hw_cond() local_irq_enable_hw() ++#define local_irq_disable_hw_cond() local_irq_disable_hw() ++#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) ++#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) ++ ++#ifdef CONFIG_SMP ++cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask); ++int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask); ++#define local_irq_save_hw_smp(flags) local_irq_save_hw(flags) ++#define local_irq_restore_hw_smp(flags) local_irq_restore_hw(flags) ++#else /* !CONFIG_SMP */ ++#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) ++#define local_irq_restore_hw_smp(flags) do { } while(0) ++#endif /* CONFIG_SMP */ ++ ++#define local_irq_save_full(vflags, rflags) \ ++ do { \ ++ local_irq_save(vflags); \ ++ local_irq_save_hw(rflags); \ ++ } while(0) ++ ++#define local_irq_restore_full(vflags, rflags) \ ++ do { \ ++ local_irq_restore_hw(rflags); \ ++ local_irq_restore(vflags); \ ++ } while(0) ++ ++static inline void __local_irq_restore_nosync(unsigned long x) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr(); ++ ++ if (raw_irqs_disabled_flags(x)) { ++ set_bit(IPIPE_STALL_FLAG, &p->status); ++ trace_hardirqs_off(); ++ } else { ++ trace_hardirqs_on(); ++ clear_bit(IPIPE_STALL_FLAG, &p->status); ++ } ++} ++ ++static inline void local_irq_restore_nosync(unsigned long x) ++{ ++ unsigned long flags; ++ local_irq_save_hw_smp(flags); ++ __local_irq_restore_nosync(x); ++ local_irq_restore_hw_smp(flags); ++} ++ ++#define __ipipe_root_domain_p (__ipipe_current_domain == ipipe_root_domain) ++#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain) ++ ++/* This has to be called with hw IRQs off. */ ++#define __ipipe_head_domain_p __ipipe_pipeline_head_p(__ipipe_current_domain) ++ ++static inline int __ipipe_event_monitored_p(int ev) ++{ ++ if (__ipipe_event_monitors[ev] > 0) ++ return 1; ++ ++ return (ipipe_current_domain->evself & (1LL << ev)) != 0; ++} ++ ++/* ++ * : Backward compat is kept for now, with client domains ++ * storing the notifier enabled bit in the main flags of a ++ * task struct. This is clearly deprecated: at some point, ++ * this kludge will vanish. Fix the relevant code using ++ * ipipe_enable/disable_notifier() instead. You have been ++ * warned. ++ */ ++#define ipipe_notifier_enabled_p(p) \ ++ (((p)->flags|(p)->ipipe_flags) & PF_EVNOTIFY) ++ ++#define ipipe_sigwake_notify(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p) && \ ++ __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE, p); \ ++ } while (0) ++ ++#define ipipe_exit_notify(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p) && \ ++ __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_EXIT, p); \ ++ } while (0) ++ ++#define ipipe_setsched_notify(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p) && \ ++ __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED, p); \ ++ } while (0) ++ ++#define ipipe_schedule_notify(prev, next) \ ++do { \ ++ if ((ipipe_notifier_enabled_p(next) || \ ++ ipipe_notifier_enabled_p(prev)) && \ ++ __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE, next); \ ++} while (0) ++ ++#define ipipe_trap_notify(ex, regs) \ ++ ({ \ ++ unsigned long __flags__; \ ++ int __ret__ = 0; \ ++ local_irq_save_hw_smp(__flags__); \ ++ if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \ ++ ipipe_notifier_enabled_p(current)) && \ ++ __ipipe_event_monitored_p(ex)) { \ ++ local_irq_restore_hw_smp(__flags__); \ ++ __ret__ = __ipipe_dispatch_event(ex, regs); \ ++ } else \ ++ local_irq_restore_hw_smp(__flags__); \ ++ __ret__; \ ++ }) ++ ++#define ipipe_init_notify(p) \ ++ do { \ ++ if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_INIT, p); \ ++ } while (0) ++ ++#define ipipe_cleanup_notify(mm) \ ++ do { \ ++ if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP, mm); \ ++ } while (0) ++ ++/* Public interface */ ++ ++int ipipe_register_domain(struct ipipe_domain *ipd, ++ struct ipipe_domain_attr *attr); ++ ++int ipipe_unregister_domain(struct ipipe_domain *ipd); ++ ++void ipipe_suspend_domain(void); ++ ++int ipipe_virtualize_irq(struct ipipe_domain *ipd, ++ unsigned irq, ++ ipipe_irq_handler_t handler, ++ void *cookie, ++ ipipe_irq_ackfn_t acknowledge, ++ unsigned modemask); ++ ++int ipipe_control_irq(unsigned irq, ++ unsigned clrmask, ++ unsigned setmask); ++ ++unsigned ipipe_alloc_virq(void); ++ ++int ipipe_free_virq(unsigned virq); ++ ++int ipipe_trigger_irq(unsigned irq); ++ ++static inline void __ipipe_propagate_irq(unsigned irq) ++{ ++ struct list_head *next = __ipipe_current_domain->p_link.next; ++ if (next == &ipipe_root.p_link) { ++ /* Fast path: root must handle all interrupts. */ ++ __ipipe_set_irq_pending(&ipipe_root, irq); ++ return; ++ } ++ __ipipe_pend_irq(irq, next); ++} ++ ++static inline void __ipipe_schedule_irq(unsigned irq) ++{ ++ __ipipe_pend_irq(irq, &__ipipe_current_domain->p_link); ++} ++ ++static inline void __ipipe_schedule_irq_head(unsigned irq) ++{ ++ __ipipe_set_irq_pending(__ipipe_pipeline_head(), irq); ++} ++ ++static inline void __ipipe_schedule_irq_root(unsigned irq) ++{ ++ __ipipe_set_irq_pending(&ipipe_root, irq); ++} ++ ++static inline void ipipe_propagate_irq(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_propagate_irq(irq); ++ local_irq_restore_hw(flags); ++} ++ ++static inline void ipipe_schedule_irq(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_schedule_irq(irq); ++ local_irq_restore_hw(flags); ++} ++ ++static inline void ipipe_schedule_irq_head(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_schedule_irq_head(irq); ++ local_irq_restore_hw(flags); ++} ++ ++static inline void ipipe_schedule_irq_root(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_schedule_irq_root(irq); ++ local_irq_restore_hw(flags); ++} ++ ++void ipipe_stall_pipeline_from(struct ipipe_domain *ipd); ++ ++unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd); ++ ++unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd); ++ ++static inline void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ ipipe_test_and_unstall_pipeline_from(ipd); ++} ++ ++void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, ++ unsigned long x); ++ ++static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd) ++{ ++ return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++} ++ ++static inline void ipipe_stall_pipeline_head(void) ++{ ++ local_irq_disable_hw(); ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); ++} ++ ++static inline unsigned long ipipe_test_and_stall_pipeline_head(void) ++{ ++ local_irq_disable_hw(); ++ return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); ++} ++ ++void ipipe_unstall_pipeline_head(void); ++ ++void __ipipe_restore_pipeline_head(unsigned long x); ++ ++static inline void ipipe_restore_pipeline_head(unsigned long x) ++{ ++ /* On some archs, __test_and_set_bit() might return different ++ * truth value than test_bit(), so we test the exclusive OR of ++ * both statuses, assuming that the lowest bit is always set in ++ * the truth value (if this is wrong, the failed optimization will ++ * be caught in __ipipe_restore_pipeline_head() if ++ * CONFIG_DEBUG_KERNEL is set). */ ++ if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1) ++ __ipipe_restore_pipeline_head(x); ++} ++ ++#define ipipe_unstall_pipeline() \ ++ ipipe_unstall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_test_and_unstall_pipeline() \ ++ ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_test_pipeline() \ ++ ipipe_test_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_test_and_stall_pipeline() \ ++ ipipe_test_and_stall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_stall_pipeline() \ ++ ipipe_stall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_restore_pipeline(x) \ ++ ipipe_restore_pipeline_from(ipipe_current_domain, (x)) ++ ++void ipipe_init_attr(struct ipipe_domain_attr *attr); ++ ++int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); ++ ++unsigned long ipipe_critical_enter(void (*syncfn) (void)); ++ ++void ipipe_critical_exit(unsigned long flags); ++ ++static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) ++{ ++ set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); ++} ++ ++static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) ++{ ++ clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); ++} ++ ++static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) ++{ ++ /* Must be called hw interrupts off. */ ++ __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); ++} ++ ++static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) ++{ ++ /* Must be called hw interrupts off. */ ++ __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); ++} ++ ++static inline int ipipe_test_foreign_stack(void) ++{ ++ /* Must be called hw interrupts off. */ ++ return test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)); ++} ++ ++#ifndef ipipe_safe_current ++#define ipipe_safe_current() \ ++({ \ ++ struct task_struct *p; \ ++ unsigned long flags; \ ++ local_irq_save_hw_smp(flags); \ ++ p = ipipe_test_foreign_stack() ? &init_task : current; \ ++ local_irq_restore_hw_smp(flags); \ ++ p; \ ++}) ++#endif ++ ++ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, ++ unsigned event, ++ ipipe_event_handler_t handler); ++ ++cpumask_t ipipe_set_irq_affinity(unsigned irq, ++ cpumask_t cpumask); ++ ++int ipipe_send_ipi(unsigned ipi, ++ cpumask_t cpumask); ++ ++int ipipe_setscheduler_root(struct task_struct *p, ++ int policy, ++ int prio); ++ ++int ipipe_reenter_root(struct task_struct *prev, ++ int policy, ++ int prio); ++ ++int ipipe_alloc_ptdkey(void); ++ ++int ipipe_free_ptdkey(int key); ++ ++int ipipe_set_ptd(int key, ++ void *value); ++ ++void *ipipe_get_ptd(int key); ++ ++int ipipe_disable_ondemand_mappings(struct task_struct *tsk); ++ ++static inline void ipipe_nmi_enter(void) ++{ ++ int cpu = ipipe_processor_id(); ++ ++ per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status); ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ per_cpu(ipipe_saved_context_check_state, cpu) = ++ ipipe_disable_context_check(cpu); ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++} ++ ++static inline void ipipe_nmi_exit(void) ++{ ++ int cpu = ipipe_processor_id(); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ipipe_restore_context_check ++ (cpu, per_cpu(ipipe_saved_context_check_state, cpu)); ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++ if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu))) ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++} ++ ++#define ipipe_enable_notifier(p) \ ++ do { \ ++ (p)->ipipe_flags |= PF_EVNOTIFY; \ ++ } while (0) ++ ++#define ipipe_disable_notifier(p) \ ++ do { \ ++ (p)->ipipe_flags &= ~(PF_EVNOTIFY|PF_EVTRET); \ ++ } while (0) ++ ++/* hw IRQs off. */ ++#define ipipe_return_notify(p) \ ++ do { \ ++ if (ipipe_notifier_enabled_p(p) && \ ++ __ipipe_event_monitored_p(IPIPE_EVENT_RETURN)) \ ++ (p)->ipipe_flags |= PF_EVTRET; \ ++ } while (0) ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_init_early() do { } while(0) ++#define ipipe_init() do { } while(0) ++#define ipipe_suspend_domain() do { } while(0) ++#define ipipe_sigwake_notify(p) do { } while(0) ++#define ipipe_setsched_notify(p) do { } while(0) ++#define ipipe_init_notify(p) do { } while(0) ++#define ipipe_exit_notify(p) do { } while(0) ++#define ipipe_cleanup_notify(mm) do { } while(0) ++#define ipipe_trap_notify(t,r) 0 ++#define ipipe_init_proc() do { } while(0) ++ ++static inline void __ipipe_pin_range_globally(unsigned long start, ++ unsigned long end) ++{ ++} ++ ++static inline int ipipe_test_foreign_stack(void) ++{ ++ return 0; ++} ++ ++#define local_irq_enable_hw_cond() do { } while(0) ++#define local_irq_disable_hw_cond() do { } while(0) ++#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) ++#define local_irq_restore_hw_cond(flags) do { } while(0) ++#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) ++#define local_irq_restore_hw_smp(flags) do { } while(0) ++ ++#define ipipe_irq_lock(irq) do { } while(0) ++#define ipipe_irq_unlock(irq) do { } while(0) ++ ++#define __ipipe_root_domain_p 1 ++#define ipipe_root_domain_p 1 ++#define ipipe_safe_current current ++#define ipipe_processor_id() smp_processor_id() ++ ++#define ipipe_nmi_enter() do { } while (0) ++#define ipipe_nmi_exit() do { } while (0) ++ ++#define local_irq_disable_head() local_irq_disable() ++ ++#define local_irq_save_full(vflags, rflags) do { (void)(vflags); local_irq_save(rflags); } while(0) ++#define local_irq_restore_full(vflags, rflags) do { (void)(vflags); local_irq_restore(rflags); } while(0) ++#define local_irq_restore_nosync(vflags) local_irq_restore(vflags) ++ ++#define __ipipe_pipeline_head_p(ipd) 1 ++ ++#endif /* CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_H */ +diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h +new file mode 100644 +index 0000000..9d8c7c9 +--- /dev/null ++++ b/include/linux/ipipe_base.h +@@ -0,0 +1,119 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_base.h ++ * ++ * Copyright (C) 2002-2007 Philippe Gerum. ++ * 2007 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_BASE_H ++#define __LINUX_IPIPE_BASE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#include ++ ++#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1)) ++/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */ ++#define IPIPE_NR_VIRQS BITS_PER_LONG ++/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */ ++#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS) ++/* Total number of IRQ slots */ ++#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS) ++ ++#define IPIPE_IRQ_LOMAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG) ++#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG ++/* ++ * We need a 3-level mapping. This allows us to handle up to 32k IRQ ++ * vectors on 32bit machines, 256k on 64bit ones. ++ */ ++#define __IPIPE_3LEVEL_IRQMAP 1 ++#define IPIPE_IRQ_MDMAPSZ (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG) ++#else ++/* ++ * 2-level mapping is enough. This allows us to handle up to 1024 IRQ ++ * vectors on 32bit machines, 4096 on 64bit ones. ++ */ ++#define __IPIPE_2LEVEL_IRQMAP 1 ++#endif ++ ++#define IPIPE_IRQ_DOALL 0 ++#define IPIPE_IRQ_DOVIRT 1 ++ ++/* Per-cpu pipeline status */ ++#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */ ++#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the domain */ ++#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack */ ++ ++#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) ++#define IPIPE_SYNC_MASK (1L << IPIPE_SYNC_FLAG) ++#define IPIPE_NOSTACK_MASK (1L << IPIPE_NOSTACK_FLAG) ++ ++typedef void (*ipipe_irq_handler_t)(unsigned int irq, ++ void *cookie); ++ ++extern struct ipipe_domain ipipe_root; ++ ++#define ipipe_root_domain (&ipipe_root) ++ ++void __ipipe_unstall_root(void); ++ ++void __ipipe_restore_root(unsigned long x); ++ ++#define ipipe_preempt_disable(flags) \ ++ do { \ ++ local_irq_save_hw(flags); \ ++ if (__ipipe_root_domain_p) \ ++ preempt_disable(); \ ++ } while (0) ++ ++#define ipipe_preempt_enable(flags) \ ++ do { \ ++ if (__ipipe_root_domain_p) { \ ++ preempt_enable_no_resched(); \ ++ local_irq_restore_hw(flags); \ ++ preempt_check_resched(); \ ++ } else \ ++ local_irq_restore_hw(flags); \ ++ } while (0) ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++void ipipe_check_context(struct ipipe_domain *border_ipd); ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { } ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++/* Generic features */ ++ ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++#define __IPIPE_FEATURE_REQUEST_TICKDEV 1 ++#endif ++#define __IPIPE_FEATURE_DELAYED_ATOMICSW 1 ++#define __IPIPE_FEATURE_FASTPEND_IRQ 1 ++#define __IPIPE_FEATURE_TRACE_EVENT 1 ++#define __IPIPE_FEATURE_ENABLE_NOTIFIER 1 ++ ++#else /* !CONFIG_IPIPE */ ++#define ipipe_preempt_disable(flags) do { \ ++ preempt_disable(); \ ++ (void)(flags); \ ++ } while (0) ++#define ipipe_preempt_enable(flags) preempt_enable() ++#define ipipe_check_context(ipd) do { } while(0) ++#endif /* CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_BASE_H */ +diff --git a/include/linux/ipipe_compat.h b/include/linux/ipipe_compat.h +new file mode 100644 +index 0000000..50a245c +--- /dev/null ++++ b/include/linux/ipipe_compat.h +@@ -0,0 +1,54 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_compat.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_COMPAT_H ++#define __LINUX_IPIPE_COMPAT_H ++ ++#ifdef CONFIG_IPIPE_COMPAT ++/* ++ * OBSOLETE: defined only for backward compatibility. Will be removed ++ * in future releases, please update client code accordingly. ++ */ ++ ++#ifdef CONFIG_SMP ++#define ipipe_declare_cpuid int cpuid ++#define ipipe_load_cpuid() do { \ ++ cpuid = ipipe_processor_id(); \ ++ } while(0) ++#define ipipe_lock_cpu(flags) do { \ ++ local_irq_save_hw(flags); \ ++ cpuid = ipipe_processor_id(); \ ++ } while(0) ++#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) ++#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags) ++#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags) ++#else /* !CONFIG_SMP */ ++#define ipipe_declare_cpuid const int cpuid = 0 ++#define ipipe_load_cpuid() do { } while(0) ++#define ipipe_lock_cpu(flags) local_irq_save_hw(flags) ++#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) ++#define ipipe_get_cpu(flags) do { (void)(flags); } while(0) ++#define ipipe_put_cpu(flags) do { } while(0) ++#endif /* CONFIG_SMP */ ++ ++#endif /* CONFIG_IPIPE_COMPAT */ ++ ++#endif /* !__LINUX_IPIPE_COMPAT_H */ +diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h +new file mode 100644 +index 0000000..b751d54 +--- /dev/null ++++ b/include/linux/ipipe_lock.h +@@ -0,0 +1,144 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_lock.h ++ * ++ * Copyright (C) 2009 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_LOCK_H ++#define __LINUX_IPIPE_LOCK_H ++ ++typedef struct { ++ raw_spinlock_t bare_lock; ++} __ipipe_spinlock_t; ++ ++#define ipipe_lock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) ++ ++#define common_lock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), spinlock_t *) ++ ++#define bare_lock(lock) (&((__ipipe_spinlock_t *)(lock))->bare_lock) ++#define std_lock(lock) ((spinlock_t *)(lock)) ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) ++ ++extern int __bad_spinlock_type(void); ++#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ (flags) = __ipipe_spin_lock_irqsave(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ (flags) = _spin_lock_irqsave(std_lock(lock)); \ ++ else __bad_spinlock_type(); \ ++ } while (0) ++ ++#else /* !(CONFIG_SMP || CONFIG_DEBUG_SPINLOCK) */ ++ ++#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ (flags) = __ipipe_spin_lock_irqsave(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ _spin_lock_irqsave(std_lock(lock), flags); \ ++ } while (0) ++ ++#endif /* !(CONFIG_SMP || CONFIG_DEBUG_SPINLOCK) */ ++ ++#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ __ipipe_spin_unlock_irqrestore(bare_lock(lock), flags); \ ++ else if (common_lock_p(lock)) \ ++ _spin_unlock_irqrestore(std_lock(lock), flags); \ ++ } while (0) ++ ++#define PICK_SPINOP(op, lock) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ __raw_spin##op(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ _spin##op(std_lock(lock)); \ ++ } while (0) ++ ++#define PICK_SPINOP_IRQ(op, lock) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ __ipipe_spin##op##_irq(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ _spin##op##_irq(std_lock(lock)); \ ++ } while (0) ++ ++#define __raw_spin_lock_init(lock) \ ++ do { \ ++ IPIPE_DEFINE_SPINLOCK(__lock__); \ ++ *((ipipe_spinlock_t *)lock) = __lock__; \ ++ } while (0) ++ ++#ifdef CONFIG_IPIPE ++ ++#define ipipe_spinlock_t __ipipe_spinlock_t ++#define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED ++#define IPIPE_DECLARE_SPINLOCK(x) extern ipipe_spinlock_t x ++#define IPIPE_SPIN_LOCK_UNLOCKED \ ++ (__ipipe_spinlock_t) { .bare_lock = __RAW_SPIN_LOCK_UNLOCKED } ++ ++#define spin_lock_irqsave_cond(lock, flags) \ ++ spin_lock_irqsave(lock, flags) ++ ++#define spin_unlock_irqrestore_cond(lock, flags) \ ++ spin_unlock_irqrestore(lock, flags) ++ ++void __ipipe_spin_lock_irq(raw_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irq(raw_spinlock_t *lock); ++ ++unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, ++ unsigned long x); ++ ++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irqcomplete(unsigned long x); ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_spinlock_t spinlock_t ++#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) ++#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x ++#define IPIPE_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED ++ ++#define spin_lock_irqsave_cond(lock, flags) \ ++ do { \ ++ (void)(flags); \ ++ spin_lock(lock); \ ++ } while(0) ++ ++#define spin_unlock_irqrestore_cond(lock, flags) \ ++ spin_unlock(lock) ++ ++#define __ipipe_spin_lock_irq(lock) do { } while (0) ++#define __ipipe_spin_unlock_irq(lock) do { } while (0) ++#define __ipipe_spin_lock_irqsave(lock) 0 ++#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) ++#define __ipipe_spin_unlock_irqbegin(lock) do { } while (0) ++#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_LOCK_H */ +diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h +new file mode 100644 +index 0000000..f6727e3 +--- /dev/null ++++ b/include/linux/ipipe_percpu.h +@@ -0,0 +1,89 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_percpu.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_PERCPU_H ++#define __LINUX_IPIPE_PERCPU_H ++ ++#include ++#include ++ ++struct ipipe_domain; ++ ++struct ipipe_percpu_domain_data { ++ unsigned long status; /* <= Must be first in struct. */ ++ unsigned long irqpend_himap; ++#ifdef __IPIPE_3LEVEL_IRQMAP ++ unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ]; ++#endif ++ unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ]; ++ unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ]; ++ unsigned long irqall[IPIPE_NR_IRQS]; ++ u64 evsync; ++}; ++ ++/* ++ * CAREFUL: all accessors based on __raw_get_cpu_var() you may find in ++ * this file should be used only while hw interrupts are off, to ++ * prevent from CPU migration regardless of the running domain. ++ */ ++#ifdef CONFIG_SMP ++#define ipipe_percpudom_ptr(ipd, cpu) \ ++ (&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot]) ++#define ipipe_cpudom_ptr(ipd) \ ++ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot]) ++#else ++DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]); ++#define ipipe_percpudom_ptr(ipd, cpu) \ ++ (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]) ++#define ipipe_cpudom_ptr(ipd) \ ++ (__ipipe_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]) ++#endif ++#define ipipe_percpudom(ipd, var, cpu) (ipipe_percpudom_ptr(ipd, cpu)->var) ++#define ipipe_cpudom_var(ipd, var) (ipipe_cpudom_ptr(ipd)->var) ++ ++#define IPIPE_ROOT_SLOT 0 ++#define IPIPE_HEAD_SLOT (CONFIG_IPIPE_DOMAINS - 1) ++ ++DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]); ++ ++DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); ++ ++DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++DECLARE_PER_CPU(int, ipipe_percpu_context_check); ++DECLARE_PER_CPU(int, ipipe_saved_context_check_state); ++#endif ++ ++#define ipipe_root_cpudom_ptr(var) \ ++ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT]) ++ ++#define ipipe_root_cpudom_var(var) ipipe_root_cpudom_ptr()->var ++ ++#define ipipe_this_cpudom_var(var) \ ++ ipipe_cpudom_var(__ipipe_current_domain, var) ++ ++#define ipipe_head_cpudom_ptr() \ ++ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT]) ++ ++#define ipipe_head_cpudom_var(var) ipipe_head_cpudom_ptr()->var ++ ++#endif /* !__LINUX_IPIPE_PERCPU_H */ +diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h +new file mode 100644 +index 0000000..4a1cb1b +--- /dev/null ++++ b/include/linux/ipipe_tickdev.h +@@ -0,0 +1,58 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_tickdev.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_TICKDEV_H ++#define __LINUX_IPIPE_TICKDEV_H ++ ++#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS) ++ ++#include ++ ++struct tick_device; ++ ++struct ipipe_tick_device { ++ ++ void (*emul_set_mode)(enum clock_event_mode, ++ struct clock_event_device *cdev); ++ int (*emul_set_tick)(unsigned long delta, ++ struct clock_event_device *cdev); ++ void (*real_set_mode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev); ++ int (*real_set_tick)(unsigned long delta, ++ struct clock_event_device *cdev); ++ struct tick_device *slave; ++ unsigned long real_max_delta_ns; ++ unsigned long real_mult; ++ int real_shift; ++}; ++ ++int ipipe_request_tickdev(const char *devname, ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev), ++ int (*emutick)(unsigned long evt, ++ struct clock_event_device *cdev), ++ int cpu, unsigned long *tmfreq); ++ ++void ipipe_release_tickdev(int cpu); ++ ++#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */ ++ ++#endif /* !__LINUX_IPIPE_TICKDEV_H */ +diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h +new file mode 100644 +index 0000000..627b354 +--- /dev/null ++++ b/include/linux/ipipe_trace.h +@@ -0,0 +1,72 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_trace.h ++ * ++ * Copyright (C) 2005 Luotao Fu. ++ * 2005-2007 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _LINUX_IPIPE_TRACE_H ++#define _LINUX_IPIPE_TRACE_H ++ ++#ifdef CONFIG_IPIPE_TRACE ++ ++#include ++ ++void ipipe_trace_begin(unsigned long v); ++void ipipe_trace_end(unsigned long v); ++void ipipe_trace_freeze(unsigned long v); ++void ipipe_trace_special(unsigned char special_id, unsigned long v); ++void ipipe_trace_pid(pid_t pid, short prio); ++void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); ++int ipipe_trace_max_reset(void); ++int ipipe_trace_frozen_reset(void); ++ ++#else /* !CONFIG_IPIPE_TRACE */ ++ ++#define ipipe_trace_begin(v) do { (void)(v); } while(0) ++#define ipipe_trace_end(v) do { (void)(v); } while(0) ++#define ipipe_trace_freeze(v) do { (void)(v); } while(0) ++#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) ++#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) ++#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) ++#define ipipe_trace_max_reset() do { } while(0) ++#define ipipe_trace_froze_reset() do { } while(0) ++ ++#endif /* !CONFIG_IPIPE_TRACE */ ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++void ipipe_trace_panic_freeze(void); ++void ipipe_trace_panic_dump(void); ++#else ++static inline void ipipe_trace_panic_freeze(void) { } ++static inline void ipipe_trace_panic_dump(void) { } ++#endif ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) ++#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) ++#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) ++#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) ++#else ++#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) ++#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) ++#define ipipe_trace_irqsoff() do { } while(0) ++#define ipipe_trace_irqson() do { } while(0) ++#endif ++ ++#endif /* !__LINUX_IPIPE_TRACE_H */ +diff --git a/include/linux/irq.h b/include/linux/irq.h +index 9e5f45a..85642bc 100644 +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -124,6 +124,9 @@ struct irq_chip { + void (*end)(unsigned int irq); + int (*set_affinity)(unsigned int irq, + const struct cpumask *dest); ++#ifdef CONFIG_IPIPE ++ void (*move)(unsigned int irq); ++#endif /* CONFIG_IPIPE */ + int (*retrigger)(unsigned int irq); + int (*set_type)(unsigned int irq, unsigned int flow_type); + int (*set_wake)(unsigned int irq, unsigned int on); +@@ -173,6 +176,12 @@ struct irq_2_iommu; + * @name: flow handler name for /proc/interrupts output + */ + struct irq_desc { ++#ifdef CONFIG_IPIPE ++ void (*ipipe_ack)(unsigned int irq, ++ struct irq_desc *desc); ++ void (*ipipe_end)(unsigned int irq, ++ struct irq_desc *desc); ++#endif /* CONFIG_IPIPE */ + unsigned int irq; + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; +@@ -346,6 +355,10 @@ extern void + set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, + irq_flow_handler_t handle, const char *name); + ++extern irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, ++ int is_chained); ++ + extern void + __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name); +@@ -357,6 +370,7 @@ static inline void __set_irq_handler_unlocked(int irq, + struct irq_desc *desc; + + desc = irq_to_desc(irq); ++ handler = __fixup_irq_handler(desc, handler, 0); + desc->handle_irq = handler; + } + +diff --git a/include/linux/kernel.h b/include/linux/kernel.h +index f4e3184..3b80b7b 100644 +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -119,9 +120,12 @@ struct user; + + #ifdef CONFIG_PREEMPT_VOLUNTARY + extern int _cond_resched(void); +-# define might_resched() _cond_resched() ++# define might_resched() do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ _cond_resched(); \ ++ } while (0) + #else +-# define might_resched() do { } while (0) ++# define might_resched() ipipe_check_context(ipipe_root_domain) + #endif + + #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 72b1a10..80553be 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -9,13 +9,20 @@ + #include + #include + #include ++#include + + #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) + extern void add_preempt_count(int val); + extern void sub_preempt_count(int val); + #else +-# define add_preempt_count(val) do { preempt_count() += (val); } while (0) +-# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) ++# define add_preempt_count(val) do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ preempt_count() += (val); \ ++ } while (0) ++# define sub_preempt_count(val) do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ preempt_count() -= (val); \ ++ } while (0) + #endif + + #define inc_preempt_count() add_preempt_count(1) +diff --git a/include/linux/sched.h b/include/linux/sched.h +index b253434..02b5e27 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -61,6 +61,7 @@ struct sched_param { + #include + #include + #include ++#include + + #include + #include +@@ -195,6 +196,13 @@ extern unsigned long long time_sync_thresh; + #define TASK_DEAD 64 + #define TASK_WAKEKILL 128 + #define TASK_WAKING 256 ++#ifdef CONFIG_IPIPE ++#define TASK_ATOMICSWITCH 512 ++#define TASK_NOWAKEUP 1024 ++#else /* !CONFIG_IPIPE */ ++#define TASK_ATOMICSWITCH 0 ++#define TASK_NOWAKEUP 0 ++#endif /* CONFIG_IPIPE */ + + /* Convenience macros for the sake of set_task_state */ + #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +@@ -302,6 +310,15 @@ extern void trap_init(void); + extern void update_process_times(int user); + extern void scheduler_tick(void); + ++#ifdef CONFIG_IPIPE ++void update_root_process_times(struct pt_regs *regs); ++#else /* !CONFIG_IPIPE */ ++static inline void update_root_process_times(struct pt_regs *regs) ++{ ++ update_process_times(user_mode(regs)); ++} ++#endif /* CONFIG_IPIPE */ ++ + extern void sched_show_task(struct task_struct *p); + + #ifdef CONFIG_DETECT_SOFTLOCKUP +@@ -349,8 +366,8 @@ extern signed long schedule_timeout(signed long timeout); + extern signed long schedule_timeout_interruptible(signed long timeout); + extern signed long schedule_timeout_killable(signed long timeout); + extern signed long schedule_timeout_uninterruptible(signed long timeout); +-asmlinkage void __schedule(void); +-asmlinkage void schedule(void); ++asmlinkage int __schedule(void); ++asmlinkage int schedule(void); + extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); + + struct nsproxy; +@@ -475,6 +492,9 @@ extern int get_dumpable(struct mm_struct *mm); + #endif + /* leave room for more dump flags */ + #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ ++#ifdef CONFIG_IPIPE ++#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */ ++#endif + + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) + +@@ -1496,6 +1516,10 @@ struct task_struct { + #endif + atomic_t fs_excl; /* holding fs exclusive resources */ + struct rcu_head rcu; ++#ifdef CONFIG_IPIPE ++ unsigned int ipipe_flags; ++ void *ptd[IPIPE_ROOT_NPTDKEYS]; ++#endif + + /* + * cache last used pipe for splice +@@ -1735,6 +1759,11 @@ extern cputime_t task_gtime(struct task_struct *p); + #define PF_EXITING 0x00000004 /* getting shut down */ + #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ + #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ ++#ifdef CONFIG_IPIPE ++#define PF_EVNOTIFY 0x00000020 /* Notify other domains about internal events */ ++#else ++#define PF_EVNOTIFY 0 ++#endif /* CONFIG_IPIPE */ + #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ + #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ + #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +@@ -1763,6 +1792,12 @@ extern cputime_t task_gtime(struct task_struct *p); + #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ + + /* ++ * p->ipipe_flags -- care for conflict with legacy PF_EVNOTIFY in main ++ * flags, until it moves there. ++ */ ++#define PF_EVTRET 0x1 ++ ++/* + * Only the _current_ task can read/write to tsk->flags, but other + * tasks can access tsk->flags in readonly mode for example + * with tsk_used_math (like during threaded core dumping). +diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h +index f0ca7a7..3096642 100644 +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -90,10 +90,12 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); + # include + #endif + ++#include ++ + #ifdef CONFIG_DEBUG_SPINLOCK + extern void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key); +-# define spin_lock_init(lock) \ ++# define _spin_lock_init(lock) \ + do { \ + static struct lock_class_key __key; \ + \ +@@ -101,10 +103,12 @@ do { \ + } while (0) + + #else +-# define spin_lock_init(lock) \ ++# define _spin_lock_init(lock) \ + do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) + #endif + ++# define spin_lock_init(lock) PICK_SPINOP(_lock_init, lock) ++ + #ifdef CONFIG_DEBUG_SPINLOCK + extern void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key); +@@ -186,7 +190,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + #define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) + #define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) + +-#define spin_lock(lock) _spin_lock(lock) ++#define spin_lock(lock) PICK_SPINOP(_lock, lock) + + #ifdef CONFIG_DEBUG_LOCK_ALLOC + # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) +@@ -208,7 +212,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + #define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ +- flags = _spin_lock_irqsave(lock); \ ++ PICK_SPINLOCK_IRQSAVE(lock, flags); \ + } while (0) + #define read_lock_irqsave(lock, flags) \ + do { \ +@@ -240,7 +244,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + #define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ +- _spin_lock_irqsave(lock, flags); \ ++ PICK_SPINLOCK_IRQSAVE(lock, flags); \ + } while (0) + #define read_lock_irqsave(lock, flags) \ + do { \ +@@ -257,23 +261,23 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + + #endif + +-#define spin_lock_irq(lock) _spin_lock_irq(lock) ++#define spin_lock_irq(lock) PICK_SPINOP_IRQ(_lock, lock) + #define spin_lock_bh(lock) _spin_lock_bh(lock) + #define read_lock_irq(lock) _read_lock_irq(lock) + #define read_lock_bh(lock) _read_lock_bh(lock) + #define write_lock_irq(lock) _write_lock_irq(lock) + #define write_lock_bh(lock) _write_lock_bh(lock) +-#define spin_unlock(lock) _spin_unlock(lock) ++#define spin_unlock(lock) PICK_SPINOP(_unlock, lock) + #define read_unlock(lock) _read_unlock(lock) + #define write_unlock(lock) _write_unlock(lock) +-#define spin_unlock_irq(lock) _spin_unlock_irq(lock) ++#define spin_unlock_irq(lock) PICK_SPINOP_IRQ(_unlock, lock) + #define read_unlock_irq(lock) _read_unlock_irq(lock) + #define write_unlock_irq(lock) _write_unlock_irq(lock) + + #define spin_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ +- _spin_unlock_irqrestore(lock, flags); \ ++ PICK_SPINUNLOCK_IRQRESTORE(lock, flags); \ + } while (0) + #define spin_unlock_bh(lock) _spin_unlock_bh(lock) + +diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h +index 7a7e18f..190bc0a 100644 +--- a/include/linux/spinlock_api_smp.h ++++ b/include/linux/spinlock_api_smp.h +@@ -229,7 +229,9 @@ static inline int __write_trylock(rwlock_t *lock) + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + + static inline void __read_lock(rwlock_t *lock) + { +@@ -250,7 +252,7 @@ static inline unsigned long __spin_lock_irqsave(spinlock_t *lock) + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +-#ifdef CONFIG_LOCKDEP ++#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + #else + _raw_spin_lock_flags(lock, &flags); +diff --git a/init/Kconfig b/init/Kconfig +index eb4b337..a73e078 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -78,6 +78,7 @@ config INIT_ENV_ARG_LIMIT + + config LOCALVERSION + string "Local version - append to kernel release" ++ default "-ipipe" + help + Append an extra string to the end of your kernel version. + This will show up when you type uname, for example. +diff --git a/init/main.c b/init/main.c +index bc109c7..4672e7d 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -530,7 +530,7 @@ asmlinkage void __init start_kernel(void) + + cgroup_init_early(); + +- local_irq_disable(); ++ local_irq_disable_hw(); + early_boot_irqs_off(); + early_init_irq_lock_class(); + +@@ -565,6 +565,7 @@ asmlinkage void __init start_kernel(void) + pidhash_init(); + vfs_caches_init_early(); + sort_main_extable(); ++ ipipe_init_early(); + trap_init(); + mm_init(); + /* +@@ -593,6 +594,11 @@ asmlinkage void __init start_kernel(void) + softirq_init(); + timekeeping_init(); + time_init(); ++ /* ++ * We need to wait for the interrupt and time subsystems to be ++ * initialized before enabling the pipeline. ++ */ ++ ipipe_init(); + profile_init(); + if (!irqs_disabled()) + printk(KERN_CRIT "start_kernel(): bug: interrupts were " +@@ -774,6 +780,7 @@ static void __init do_basic_setup(void) + init_tmpfs(); + driver_init(); + init_irq_proc(); ++ ipipe_init_proc(); + do_ctors(); + do_initcalls(); + } +diff --git a/kernel/Makefile b/kernel/Makefile +index d7c13d2..b6a84ee 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -83,6 +83,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o + obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o + obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o + obj-$(CONFIG_RELAY) += relay.o ++obj-$(CONFIG_IPIPE) += ipipe/ + obj-$(CONFIG_SYSCTL) += utsname_sysctl.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +diff --git a/kernel/exit.c b/kernel/exit.c +index f7864ac..f5c3129 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -963,6 +963,7 @@ NORET_TYPE void do_exit(long code) + acct_process(); + trace_sched_process_exit(tsk); + ++ ipipe_exit_notify(tsk); + exit_sem(tsk); + exit_files(tsk); + exit_fs(tsk); +diff --git a/kernel/fork.c b/kernel/fork.c +index 28b4874..4f16eb3 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -511,6 +511,7 @@ void mmput(struct mm_struct *mm) + exit_aio(mm); + ksm_exit(mm); + exit_mmap(mm); ++ ipipe_cleanup_notify(mm); + set_mm_exe_file(mm, NULL); + if (!list_empty(&mm->mmlist)) { + spin_lock(&mmlist_lock); +@@ -918,7 +919,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) + { + unsigned long new_flags = p->flags; + +- new_flags &= ~PF_SUPERPRIV; ++ new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY); + new_flags |= PF_FORKNOEXEC; + new_flags |= PF_STARTING; + p->flags = new_flags; +@@ -1301,6 +1302,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, + write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); + cgroup_post_fork(p); ++#ifdef CONFIG_IPIPE ++ p->ipipe_flags = 0; ++ memset(p->ptd, 0, sizeof(p->ptd)); ++#endif /* CONFIG_IPIPE */ + perf_event_fork(p); + return p; + +@@ -1698,11 +1703,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) + } + + if (new_mm) { ++ unsigned long flags; + mm = current->mm; + active_mm = current->active_mm; + current->mm = new_mm; ++ ipipe_mm_switch_protect(flags); + current->active_mm = new_mm; + activate_mm(active_mm, new_mm); ++ ipipe_mm_switch_unprotect(flags); + new_mm = mm; + } + +diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig +new file mode 100644 +index 0000000..de5e6a3 +--- /dev/null ++++ b/kernel/ipipe/Kconfig +@@ -0,0 +1,35 @@ ++config IPIPE ++ bool "Interrupt pipeline" ++ default y ++ ---help--- ++ Activate this option if you want the interrupt pipeline to be ++ compiled in. ++ ++config IPIPE_DOMAINS ++ int "Max domains" ++ depends on IPIPE ++ default 4 ++ ---help--- ++ The maximum number of I-pipe domains to run concurrently. ++ ++config IPIPE_COMPAT ++ bool "Maintain code compatibility with older releases" ++ depends on IPIPE ++ default y ++ ---help--- ++ Activate this option if you want the compatibility code to be ++ defined, so that older I-pipe clients may use obsolete ++ constructs. WARNING: obsolete code will be eventually ++ deprecated in future I-pipe releases, and removed from the ++ compatibility support as time passes. Please fix I-pipe ++ clients to get rid of such uses as soon as possible. ++ ++config IPIPE_DELAYED_ATOMICSW ++ bool ++ depends on IPIPE ++ default n ++ ++config IPIPE_UNMASKED_CONTEXT_SWITCH ++ bool ++ depends on IPIPE ++ default n +diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug +new file mode 100644 +index 0000000..629c894 +--- /dev/null ++++ b/kernel/ipipe/Kconfig.debug +@@ -0,0 +1,97 @@ ++config IPIPE_DEBUG ++ bool "I-pipe debugging" ++ depends on IPIPE ++ ++config IPIPE_DEBUG_CONTEXT ++ bool "Check for illicit cross-domain calls" ++ depends on IPIPE_DEBUG ++ default y ++ ---help--- ++ Enable this feature to arm checkpoints in the kernel that ++ verify the correct invocation context. On entry of critical ++ Linux services a warning is issued if the caller is not ++ running over the root domain. ++ ++config IPIPE_DEBUG_INTERNAL ++ bool "Enable internal debug checks" ++ depends on IPIPE_DEBUG ++ default y ++ ---help--- ++ When this feature is enabled, I-pipe will perform internal ++ consistency checks of its subsystems, e.g. on per-cpu variable ++ access. ++ ++config IPIPE_TRACE ++ bool "Latency tracing" ++ depends on IPIPE_DEBUG ++ select FRAME_POINTER ++ select KALLSYMS ++ select PROC_FS ++ ---help--- ++ Activate this option if you want to use per-function tracing of ++ the kernel. The tracer will collect data via instrumentation ++ features like the one below or with the help of explicite calls ++ of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the ++ in-kernel tracing API. The collected data and runtime control ++ is available via /proc/ipipe/trace/*. ++ ++if IPIPE_TRACE ++ ++config IPIPE_TRACE_ENABLE ++ bool "Enable tracing on boot" ++ default y ++ ---help--- ++ Disable this option if you want to arm the tracer after booting ++ manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce ++ boot time on slow embedded devices due to the tracer overhead. ++ ++config IPIPE_TRACE_MCOUNT ++ bool "Instrument function entries" ++ default y ++ select FUNCTION_TRACER ++ select TRACING ++ select CONTEXT_SWITCH_TRACER ++ select FTRACE_MCOUNT_RECORD ++ select DYNAMIC_FTRACE ++ ---help--- ++ When enabled, records every kernel function entry in the tracer ++ log. While this slows down the system noticeably, it provides ++ the highest level of information about the flow of events. ++ However, it can be switch off in order to record only explicit ++ I-pipe trace points. ++ ++config IPIPE_TRACE_IRQSOFF ++ bool "Trace IRQs-off times" ++ default y ++ ---help--- ++ Activate this option if I-pipe shall trace the longest path ++ with hard-IRQs switched off. ++ ++config IPIPE_TRACE_SHIFT ++ int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" ++ range 10 18 ++ default 14 ++ ---help--- ++ The number of trace points to hold tracing data for each ++ trace path, as a power of 2. ++ ++config IPIPE_TRACE_VMALLOC ++ bool "Use vmalloc'ed trace buffer" ++ default y if EMBEDDED ++ ---help--- ++ Instead of reserving static kernel data, the required buffer ++ is allocated via vmalloc during boot-up when this option is ++ enabled. This can help to start systems that are low on memory, ++ but it slightly degrades overall performance. Try this option ++ when a traced kernel hangs unexpectedly at boot time. ++ ++config IPIPE_TRACE_PANIC ++ bool "Enable panic back traces" ++ default y ++ ---help--- ++ Provides services to freeze and dump a back trace on panic ++ situations. This is used on IPIPE_DEBUG_CONTEXT exceptions ++ as well as ordinary kernel oopses. You can control the number ++ of printed back trace points via /proc/ipipe/trace. ++ ++endif +diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile +new file mode 100644 +index 0000000..6257dfa +--- /dev/null ++++ b/kernel/ipipe/Makefile +@@ -0,0 +1,3 @@ ++ ++obj-$(CONFIG_IPIPE) += core.o ++obj-$(CONFIG_IPIPE_TRACE) += tracer.o +diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c +new file mode 100644 +index 0000000..b6731a4 +--- /dev/null ++++ b/kernel/ipipe/core.c +@@ -0,0 +1,1955 @@ ++/* -*- linux-c -*- ++ * linux/kernel/ipipe/core.c ++ * ++ * Copyright (C) 2002-2005 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Architecture-independent I-PIPE core support. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_PROC_FS ++#include ++#include ++#endif /* CONFIG_PROC_FS */ ++#include ++#include ++#include ++ ++static int __ipipe_ptd_key_count; ++ ++static unsigned long __ipipe_ptd_key_map; ++ ++static unsigned long __ipipe_domain_slot_map; ++ ++struct ipipe_domain ipipe_root; ++ ++#ifndef CONFIG_SMP ++/* ++ * Create an alias to the unique root status, so that arch-dep code ++ * may get simple and easy access to this percpu variable. We also ++ * create an array of pointers to the percpu domain data; this tends ++ * to produce a better code when reaching non-root domains. We make ++ * sure that the early boot code would be able to dereference the ++ * pointer to the root domain data safely by statically initializing ++ * its value (local_irq*() routines depend on this). ++ */ ++#if __GNUC__ >= 4 ++extern unsigned long __ipipe_root_status ++__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray))))); ++EXPORT_SYMBOL(__ipipe_root_status); ++#else /* __GNUC__ < 4 */ ++/* ++ * Work around a GCC 3.x issue making alias symbols unusable as ++ * constant initializers. ++ */ ++unsigned long *const __ipipe_root_status_addr = ++ &__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status; ++EXPORT_SYMBOL(__ipipe_root_status_addr); ++#endif /* __GNUC__ < 4 */ ++ ++DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) = ++{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) }; ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr); ++#endif /* !CONFIG_SMP */ ++ ++DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) = ++{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */ ++ ++DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root }; ++ ++DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */ ++ ++static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock); ++ ++LIST_HEAD(__ipipe_pipeline); ++ ++unsigned long __ipipe_virtual_irq_map; ++ ++#ifdef CONFIG_PRINTK ++unsigned __ipipe_printk_virq; ++#endif /* CONFIG_PRINTK */ ++ ++int __ipipe_event_monitors[IPIPE_NR_EVENTS]; ++ ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++ ++DECLARE_PER_CPU(struct tick_device, tick_cpu_device); ++ ++static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device); ++ ++int ipipe_request_tickdev(const char *devname, ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev), ++ int (*emutick)(unsigned long delta, ++ struct clock_event_device *cdev), ++ int cpu, unsigned long *tmfreq) ++{ ++ struct ipipe_tick_device *itd; ++ struct tick_device *slave; ++ struct clock_event_device *evtdev; ++ unsigned long long freq; ++ unsigned long flags; ++ int status; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ itd = &per_cpu(ipipe_tick_cpu_device, cpu); ++ ++ if (itd->slave != NULL) { ++ status = -EBUSY; ++ goto out; ++ } ++ ++ slave = &per_cpu(tick_cpu_device, cpu); ++ ++ if (strcmp(slave->evtdev->name, devname)) { ++ /* ++ * No conflict so far with the current tick device, ++ * check whether the requested device is sane and has ++ * been blessed by the kernel. ++ */ ++ status = __ipipe_check_tickdev(devname) ? ++ CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN; ++ goto out; ++ } ++ ++ /* ++ * Our caller asks for using the same clock event device for ++ * ticking than we do, let's create a tick emulation device to ++ * interpose on the set_next_event() method, so that we may ++ * both manage the device in oneshot mode. Only the tick ++ * emulation code will actually program the clockchip hardware ++ * for the next shot, though. ++ * ++ * CAUTION: we still have to grab the tick device even when it ++ * current runs in periodic mode, since the kernel may switch ++ * to oneshot dynamically (highres/no_hz tick mode). ++ */ ++ ++ evtdev = slave->evtdev; ++ status = evtdev->mode; ++ ++ if (status == CLOCK_EVT_MODE_SHUTDOWN) ++ goto out; ++ ++ itd->slave = slave; ++ itd->emul_set_mode = emumode; ++ itd->emul_set_tick = emutick; ++ itd->real_set_mode = evtdev->set_mode; ++ itd->real_set_tick = evtdev->set_next_event; ++ itd->real_max_delta_ns = evtdev->max_delta_ns; ++ itd->real_mult = evtdev->mult; ++ itd->real_shift = evtdev->shift; ++ freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; ++ *tmfreq = (unsigned long)freq; ++ evtdev->set_mode = emumode; ++ evtdev->set_next_event = emutick; ++ evtdev->max_delta_ns = ULONG_MAX; ++ evtdev->mult = 1; ++ evtdev->shift = 0; ++out: ++ ipipe_critical_exit(flags); ++ ++ return status; ++} ++ ++void ipipe_release_tickdev(int cpu) ++{ ++ struct ipipe_tick_device *itd; ++ struct tick_device *slave; ++ struct clock_event_device *evtdev; ++ unsigned long flags; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ itd = &per_cpu(ipipe_tick_cpu_device, cpu); ++ ++ if (itd->slave != NULL) { ++ slave = &per_cpu(tick_cpu_device, cpu); ++ evtdev = slave->evtdev; ++ evtdev->set_mode = itd->real_set_mode; ++ evtdev->set_next_event = itd->real_set_tick; ++ evtdev->max_delta_ns = itd->real_max_delta_ns; ++ evtdev->mult = itd->real_mult; ++ evtdev->shift = itd->real_shift; ++ itd->slave = NULL; ++ } ++ ++ ipipe_critical_exit(flags); ++} ++ ++#endif /* CONFIG_GENERIC_CLOCKEVENTS */ ++ ++void __init ipipe_init_early(void) ++{ ++ struct ipipe_domain *ipd = &ipipe_root; ++ ++ /* ++ * Do the early init stuff. At this point, the kernel does not ++ * provide much services yet: be careful. ++ */ ++ __ipipe_check_platform(); /* Do platform dependent checks first. */ ++ ++ /* ++ * A lightweight registration code for the root domain. We are ++ * running on the boot CPU, hw interrupts are off, and ++ * secondary CPUs are still lost in space. ++ */ ++ ++ /* Reserve percpu data slot #0 for the root domain. */ ++ ipd->slot = 0; ++ set_bit(0, &__ipipe_domain_slot_map); ++ ++ ipd->name = "Linux"; ++ ipd->domid = IPIPE_ROOT_ID; ++ ipd->priority = IPIPE_ROOT_PRIO; ++ ++ __ipipe_init_stage(ipd); ++ ++ list_add_tail(&ipd->p_link, &__ipipe_pipeline); ++ ++ __ipipe_init_platform(); ++ ++#ifdef CONFIG_PRINTK ++ __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */ ++ ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk; ++ ipd->irqs[__ipipe_printk_virq].cookie = NULL; ++ ipd->irqs[__ipipe_printk_virq].acknowledge = NULL; ++ ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; ++#endif /* CONFIG_PRINTK */ ++} ++ ++void __init ipipe_init(void) ++{ ++ /* Now we may engage the pipeline. */ ++ __ipipe_enable_pipeline(); ++ ++ printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", ++ IPIPE_VERSION_STRING); ++} ++ ++void __ipipe_init_stage(struct ipipe_domain *ipd) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long status; ++ int cpu, n; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ status = p->status; ++ memset(p, 0, sizeof(*p)); ++ p->status = status; ++ } ++ ++ for (n = 0; n < IPIPE_NR_IRQS; n++) { ++ ipd->irqs[n].acknowledge = NULL; ++ ipd->irqs[n].handler = NULL; ++ ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */ ++ } ++ ++ for (n = 0; n < IPIPE_NR_EVENTS; n++) ++ ipd->evhand[n] = NULL; ++ ++ ipd->evself = 0LL; ++ mutex_init(&ipd->mutex); ++ ++ __ipipe_hook_critical_ipi(ipd); ++} ++ ++void __ipipe_cleanup_domain(struct ipipe_domain *ipd) ++{ ++ ipipe_unstall_pipeline_from(ipd); ++ ++#ifdef CONFIG_SMP ++ { ++ struct ipipe_percpu_domain_data *p; ++ int cpu; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ while (__ipipe_ipending_p(p)) ++ cpu_relax(); ++ } ++ } ++#else ++ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL; ++#endif ++ ++ clear_bit(ipd->slot, &__ipipe_domain_slot_map); ++} ++ ++void __ipipe_unstall_root(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ local_irq_disable_hw(); ++ ++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL ++ /* This helps catching bad usage from assembly call sites. */ ++ BUG_ON(!__ipipe_root_domain_p); ++#endif ++ ++ p = ipipe_root_cpudom_ptr(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(__ipipe_ipending_p(p))) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ ++ local_irq_enable_hw(); ++} ++ ++void __ipipe_restore_root(unsigned long x) ++{ ++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL ++ BUG_ON(!ipipe_root_domain_p); ++#endif ++ ++ if (x) ++ __ipipe_stall_root(); ++ else ++ __ipipe_unstall_root(); ++} ++ ++void ipipe_stall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ unsigned long flags; ++ /* ++ * We have to prevent against race on updating the status ++ * variable _and_ CPU migration at the same time, so disable ++ * hw IRQs here. ++ */ ++ local_irq_save_hw(flags); ++ ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++ ++ if (!__ipipe_pipeline_head_p(ipd)) ++ local_irq_restore_hw(flags); ++} ++ ++unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ unsigned long flags, x; ++ ++ /* See ipipe_stall_pipeline_from() */ ++ local_irq_save_hw(flags); ++ ++ x = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++ ++ if (!__ipipe_pipeline_head_p(ipd)) ++ local_irq_restore_hw(flags); ++ ++ return x; ++} ++ ++unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ unsigned long flags, x; ++ struct list_head *pos; ++ ++ local_irq_save_hw(flags); ++ ++ x = __test_and_clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++ ++ if (ipd == __ipipe_current_domain) ++ pos = &ipd->p_link; ++ else ++ pos = __ipipe_pipeline.next; ++ ++ __ipipe_walk_pipeline(pos); ++ ++ if (likely(__ipipe_pipeline_head_p(ipd))) ++ local_irq_enable_hw(); ++ else ++ local_irq_restore_hw(flags); ++ ++ return x; ++} ++ ++void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, ++ unsigned long x) ++{ ++ if (x) ++ ipipe_stall_pipeline_from(ipd); ++ else ++ ipipe_unstall_pipeline_from(ipd); ++} ++ ++void ipipe_unstall_pipeline_head(void) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); ++ struct ipipe_domain *head_domain; ++ ++ local_irq_disable_hw(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(__ipipe_ipending_p(p))) { ++ head_domain = __ipipe_pipeline_head(); ++ if (likely(head_domain == __ipipe_current_domain)) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ else ++ __ipipe_walk_pipeline(&head_domain->p_link); ++ } ++ ++ local_irq_enable_hw(); ++} ++ ++void __ipipe_restore_pipeline_head(unsigned long x) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); ++ struct ipipe_domain *head_domain; ++ ++ local_irq_disable_hw(); ++ ++ if (x) { ++#ifdef CONFIG_DEBUG_KERNEL ++ static int warned; ++ if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { ++ /* ++ * Already stalled albeit ipipe_restore_pipeline_head() ++ * should have detected it? Send a warning once. ++ */ ++ warned = 1; ++ printk(KERN_WARNING ++ "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); ++ dump_stack(); ++ } ++#else /* !CONFIG_DEBUG_KERNEL */ ++ set_bit(IPIPE_STALL_FLAG, &p->status); ++#endif /* CONFIG_DEBUG_KERNEL */ ++ } ++ else { ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ if (unlikely(__ipipe_ipending_p(p))) { ++ head_domain = __ipipe_pipeline_head(); ++ if (likely(head_domain == __ipipe_current_domain)) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ else ++ __ipipe_walk_pipeline(&head_domain->p_link); ++ } ++ local_irq_enable_hw(); ++ } ++} ++ ++void __ipipe_spin_lock_irq(raw_spinlock_t *lock) ++{ ++ local_irq_disable_hw(); ++ __raw_spin_lock(lock); ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++} ++ ++void __ipipe_spin_unlock_irq(raw_spinlock_t *lock) ++{ ++ __raw_spin_unlock(lock); ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ local_irq_enable_hw(); ++} ++ ++unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock) ++{ ++ unsigned long flags; ++ int s; ++ ++ local_irq_save_hw(flags); ++ __raw_spin_lock(lock); ++ s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ ++ return raw_mangle_irq_bits(s, flags); ++} ++ ++void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x) ++{ ++ __raw_spin_unlock(lock); ++ if (!raw_demangle_irq_bits(&x)) ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ local_irq_restore_hw(x); ++} ++ ++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) ++{ ++ __raw_spin_unlock(&lock->bare_lock); ++} ++ ++void __ipipe_spin_unlock_irqcomplete(unsigned long x) ++{ ++ if (!raw_demangle_irq_bits(&x)) ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ local_irq_restore_hw(x); ++} ++ ++#ifdef __IPIPE_3LEVEL_IRQMAP ++ ++/* Must be called hw IRQs off. */ ++static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, ++ unsigned int irq) ++{ ++ __set_bit(irq, p->irqheld_map); ++ p->irqall[irq]++; ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); ++ int l0b, l1b; ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / BITS_PER_LONG; ++ prefetchw(p); ++ ++ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { ++ __set_bit(irq, p->irqpend_lomap); ++ __set_bit(l1b, p->irqpend_mdmap); ++ __set_bit(l0b, &p->irqpend_himap); ++ } else ++ __set_bit(irq, p->irqheld_map); ++ ++ p->irqall[irq]++; ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int l0b, l1b; ++ ++ if (unlikely(test_and_set_bit(IPIPE_LOCK_FLAG, ++ &ipd->irqs[irq].control))) ++ return; ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / BITS_PER_LONG; ++ ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ if (__test_and_clear_bit(irq, p->irqpend_lomap)) { ++ __set_bit(irq, p->irqheld_map); ++ if (p->irqpend_lomap[l1b] == 0) { ++ __clear_bit(l1b, p->irqpend_mdmap); ++ if (p->irqpend_mdmap[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_himap); ++ } ++ } ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned int irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int l0b, l1b, cpu; ++ ++ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, ++ &ipd->irqs[irq].control))) ++ return; ++ ++ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); ++ l1b = irq / BITS_PER_LONG; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ if (test_and_clear_bit(irq, p->irqheld_map)) { ++ /* We need atomic ops here: */ ++ set_bit(irq, p->irqpend_lomap); ++ set_bit(l1b, p->irqpend_mdmap); ++ set_bit(l0b, &p->irqpend_himap); ++ } ++ } ++} ++ ++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p, ++ int dovirt) ++{ ++ unsigned long l0m, l1m, l2m, himask, mdmask; ++ int l0b, l1b, l2b, vl0b, vl1b; ++ unsigned int irq; ++ ++ if (dovirt) { ++ /* ++ * All virtual IRQs are mapped by a single long word. ++ * There is exactly BITS_PER_LONG virqs, and they are ++ * always last in the interrupt map, starting at ++ * IPIPE_VIRQ_BASE. Therefore, we only need to test a ++ * single bit within the high and middle maps to check ++ * whether a virtual IRQ is pending (the computations ++ * below are constant). ++ */ ++ vl0b = IPIPE_VIRQ_BASE / (BITS_PER_LONG * BITS_PER_LONG); ++ himask = (1L << vl0b); ++ vl1b = IPIPE_VIRQ_BASE / BITS_PER_LONG; ++ mdmask = (1L << (vl1b & (BITS_PER_LONG-1))); ++ } else ++ himask = mdmask = ~0L; ++ ++ l0m = p->irqpend_himap & himask; ++ if (unlikely(l0m == 0)) ++ return -1; ++ ++ l0b = __ipipe_ffnz(l0m); ++ l1m = p->irqpend_mdmap[l0b] & mdmask; ++ if (unlikely(l1m == 0)) ++ return -1; ++ ++ l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG; ++ l2m = p->irqpend_lomap[l1b]; ++ if (unlikely(l2m == 0)) ++ return -1; ++ ++ l2b = __ipipe_ffnz(l2m); ++ irq = l1b * BITS_PER_LONG + l2b; ++ ++ __clear_bit(irq, p->irqpend_lomap); ++ if (p->irqpend_lomap[l1b] == 0) { ++ __clear_bit(l1b, p->irqpend_mdmap); ++ if (p->irqpend_mdmap[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_himap); ++ } ++ ++ return irq; ++} ++ ++#else /* __IPIPE_2LEVEL_IRQMAP */ ++ ++/* Must be called hw IRQs off. */ ++static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, ++ unsigned int irq) ++{ ++ __set_bit(irq, p->irqheld_map); ++ p->irqall[irq]++; ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); ++ int l0b = irq / BITS_PER_LONG; ++ ++ prefetchw(p); ++ ++ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { ++ __set_bit(irq, p->irqpend_lomap); ++ __set_bit(l0b, &p->irqpend_himap); ++ } else ++ __set_bit(irq, p->irqheld_map); ++ ++ p->irqall[irq]++; ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int l0b = irq / BITS_PER_LONG; ++ ++ if (unlikely(test_and_set_bit(IPIPE_LOCK_FLAG, ++ &ipd->irqs[irq].control))) ++ return; ++ ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ if (__test_and_clear_bit(irq, p->irqpend_lomap)) { ++ __set_bit(irq, p->irqheld_map); ++ if (p->irqpend_lomap[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_himap); ++ } ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int l0b = irq / BITS_PER_LONG, cpu; ++ ++ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, ++ &ipd->irqs[irq].control))) ++ return; ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ if (test_and_clear_bit(irq, p->irqheld_map)) { ++ /* We need atomic ops here: */ ++ set_bit(irq, p->irqpend_lomap); ++ set_bit(l0b, &p->irqpend_himap); ++ } ++ } ++} ++ ++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p, ++ int dovirt) ++{ ++ unsigned long l0m, l1m, himask = ~0L; ++ int l0b, l1b; ++ ++ himask <<= dovirt ? IPIPE_VIRQ_BASE/BITS_PER_LONG : 0; ++ ++ l0m = p->irqpend_himap & himask; ++ if (unlikely(l0m == 0)) ++ return -1; ++ ++ l0b = __ipipe_ffnz(l0m); ++ l1m = p->irqpend_lomap[l0b]; ++ if (unlikely(l1m == 0)) ++ return -1; ++ ++ l1b = __ipipe_ffnz(l1m); ++ __clear_bit(l1b, &p->irqpend_lomap[l0b]); ++ if (p->irqpend_lomap[l0b] == 0) ++ __clear_bit(l0b, &p->irqpend_himap); ++ ++ return l0b * BITS_PER_LONG + l1b; ++} ++ ++#endif /* __IPIPE_2LEVEL_IRQMAP */ ++ ++/* ++ * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must ++ * be called with local hw interrupts disabled. ++ */ ++void __ipipe_walk_pipeline(struct list_head *pos) ++{ ++ struct ipipe_domain *this_domain = __ipipe_current_domain, *next_domain; ++ struct ipipe_percpu_domain_data *p, *np; ++ ++ p = ipipe_cpudom_ptr(this_domain); ++ ++ while (pos != &__ipipe_pipeline) { ++ ++ next_domain = list_entry(pos, struct ipipe_domain, p_link); ++ np = ipipe_cpudom_ptr(next_domain); ++ ++ if (test_bit(IPIPE_STALL_FLAG, &np->status)) ++ break; /* Stalled stage -- do not go further. */ ++ ++ if (__ipipe_ipending_p(np)) { ++ if (next_domain == this_domain) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ else { ++ ++ p->evsync = 0; ++ __ipipe_current_domain = next_domain; ++ ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ ++ ++ if (__ipipe_current_domain == next_domain) ++ __ipipe_current_domain = this_domain; ++ /* ++ * Otherwise, something changed the current domain under our ++ * feet recycling the register set; do not override the new ++ * domain. ++ */ ++ ++ if (__ipipe_ipending_p(p) && ++ !test_bit(IPIPE_STALL_FLAG, &p->status)) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ } ++ break; ++ } else if (next_domain == this_domain) ++ break; ++ ++ pos = next_domain->p_link.next; ++ } ++} ++ ++/* ++ * ipipe_suspend_domain() -- Suspend the current domain, switching to ++ * the next one which has pending work down the pipeline. ++ */ ++void ipipe_suspend_domain(void) ++{ ++ struct ipipe_domain *this_domain, *next_domain; ++ struct ipipe_percpu_domain_data *p; ++ struct list_head *ln; ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ ++ this_domain = next_domain = __ipipe_current_domain; ++ p = ipipe_cpudom_ptr(this_domain); ++ p->status &= ~(IPIPE_STALL_MASK|IPIPE_SYNC_MASK); ++ ++ if (__ipipe_ipending_p(p)) ++ goto sync_stage; ++ ++ for (;;) { ++ ln = next_domain->p_link.next; ++ ++ if (ln == &__ipipe_pipeline) ++ break; ++ ++ next_domain = list_entry(ln, struct ipipe_domain, p_link); ++ p = ipipe_cpudom_ptr(next_domain); ++ ++ if (p->status & IPIPE_STALL_MASK) ++ break; ++ ++ if (!__ipipe_ipending_p(p)) ++ continue; ++ ++ __ipipe_current_domain = next_domain; ++sync_stage: ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ ++ if (__ipipe_current_domain != next_domain) ++ /* ++ * Something has changed the current domain under our ++ * feet, recycling the register set; take note. ++ */ ++ this_domain = __ipipe_current_domain; ++ } ++ ++ __ipipe_current_domain = this_domain; ++ ++ local_irq_restore_hw(flags); ++} ++ ++ ++/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. ++ * Virtual interrupts are handled in exactly the same way than their ++ * hw-generated counterparts wrt pipelining. ++ */ ++unsigned ipipe_alloc_virq(void) ++{ ++ unsigned long flags, irq = 0; ++ int ipos; ++ ++ spin_lock_irqsave(&__ipipe_pipelock, flags); ++ ++ if (__ipipe_virtual_irq_map != ~0) { ++ ipos = ffz(__ipipe_virtual_irq_map); ++ set_bit(ipos, &__ipipe_virtual_irq_map); ++ irq = ipos + IPIPE_VIRQ_BASE; ++ } ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ ++ return irq; ++} ++ ++/* ++ * ipipe_control_irq() -- Change modes of a pipelined interrupt for ++ * the current domain. ++ */ ++int ipipe_virtualize_irq(struct ipipe_domain *ipd, ++ unsigned irq, ++ ipipe_irq_handler_t handler, ++ void *cookie, ++ ipipe_irq_ackfn_t acknowledge, ++ unsigned modemask) ++{ ++ ipipe_irq_handler_t old_handler; ++ struct irq_desc *desc; ++ unsigned long flags; ++ int err; ++ ++ if (irq >= IPIPE_NR_IRQS) ++ return -EINVAL; ++ ++ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) ++ return -EPERM; ++ ++ if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) ++ /* Silently unwire interrupts for non-heading domains. */ ++ modemask &= ~IPIPE_WIRED_MASK; ++ ++ spin_lock_irqsave(&__ipipe_pipelock, flags); ++ ++ old_handler = ipd->irqs[irq].handler; ++ ++ if (handler != NULL) { ++ if (handler == IPIPE_SAME_HANDLER) { ++ handler = old_handler; ++ cookie = ipd->irqs[irq].cookie; ++ ++ if (handler == NULL) { ++ err = -EINVAL; ++ goto unlock_and_exit; ++ } ++ } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && ++ old_handler != NULL) { ++ err = -EBUSY; ++ goto unlock_and_exit; ++ } ++ ++ /* Wired interrupts can only be delivered to domains ++ * always heading the pipeline, and using dynamic ++ * propagation. */ ++ ++ if ((modemask & IPIPE_WIRED_MASK) != 0) { ++ if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) { ++ err = -EINVAL; ++ goto unlock_and_exit; ++ } ++ modemask |= (IPIPE_HANDLE_MASK); ++ } ++ ++ if ((modemask & IPIPE_STICKY_MASK) != 0) ++ modemask |= IPIPE_HANDLE_MASK; ++ } else ++ modemask &= ++ ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | ++ IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); ++ ++ if (acknowledge == NULL && !ipipe_virtual_irq_p(irq)) ++ /* ++ * Acknowledge handler unspecified for a hw interrupt: ++ * use the Linux-defined handler instead. ++ */ ++ acknowledge = ipipe_root_domain->irqs[irq].acknowledge; ++ ++ ipd->irqs[irq].handler = handler; ++ ipd->irqs[irq].cookie = cookie; ++ ipd->irqs[irq].acknowledge = acknowledge; ++ ipd->irqs[irq].control = modemask; ++ ++ if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) { ++ desc = irq_to_desc(irq); ++ if (handler != NULL) { ++ if (desc) ++ __ipipe_enable_irqdesc(ipd, irq); ++ ++ if ((modemask & IPIPE_ENABLE_MASK) != 0) { ++ if (ipd != __ipipe_current_domain) { ++ /* ++ * IRQ enable/disable state is domain-sensitive, so we ++ * may not change it for another domain. What is ++ * allowed however is forcing some domain to handle an ++ * interrupt source, by passing the proper 'ipd' ++ * descriptor which thus may be different from ++ * __ipipe_current_domain. ++ */ ++ err = -EPERM; ++ goto unlock_and_exit; ++ } ++ if (desc) ++ __ipipe_enable_irq(irq); ++ } ++ } else if (old_handler != NULL && desc) ++ __ipipe_disable_irqdesc(ipd, irq); ++ } ++ ++ err = 0; ++ ++ unlock_and_exit: ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ ++ return err; ++} ++ ++/* ipipe_control_irq() -- Change modes of a pipelined interrupt for ++ * the current domain. */ ++ ++int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask) ++{ ++ struct ipipe_domain *ipd; ++ unsigned long flags; ++ ++ if (irq >= IPIPE_NR_IRQS) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&__ipipe_pipelock, flags); ++ ++ ipd = __ipipe_current_domain; ++ ++ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) { ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ return -EPERM; ++ } ++ ++ if (ipd->irqs[irq].handler == NULL) ++ setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); ++ ++ if ((setmask & IPIPE_STICKY_MASK) != 0) ++ setmask |= IPIPE_HANDLE_MASK; ++ ++ if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */ ++ clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); ++ ++ ipd->irqs[irq].control &= ~clrmask; ++ ipd->irqs[irq].control |= setmask; ++ ++ if ((setmask & IPIPE_ENABLE_MASK) != 0) ++ __ipipe_enable_irq(irq); ++ else if ((clrmask & IPIPE_ENABLE_MASK) != 0) ++ __ipipe_disable_irq(irq); ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ ++ return 0; ++} ++ ++/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ ++ ++int __ipipe_dispatch_event (unsigned event, void *data) ++{ ++ struct ipipe_domain *start_domain, *this_domain, *next_domain; ++ struct ipipe_percpu_domain_data *np; ++ ipipe_event_handler_t evhand; ++ struct list_head *pos, *npos; ++ unsigned long flags; ++ int propagate = 1; ++ ++ local_irq_save_hw(flags); ++ ++ start_domain = this_domain = __ipipe_current_domain; ++ ++ list_for_each_safe(pos, npos, &__ipipe_pipeline) { ++ /* ++ * Note: Domain migration may occur while running ++ * event or interrupt handlers, in which case the ++ * current register set is going to be recycled for a ++ * different domain than the initiating one. We do ++ * care for that, always tracking the current domain ++ * descriptor upon return from those handlers. ++ */ ++ next_domain = list_entry(pos, struct ipipe_domain, p_link); ++ np = ipipe_cpudom_ptr(next_domain); ++ ++ /* ++ * Keep a cached copy of the handler's address since ++ * ipipe_catch_event() may clear it under our feet. ++ */ ++ evhand = next_domain->evhand[event]; ++ ++ if (evhand != NULL) { ++ __ipipe_current_domain = next_domain; ++ np->evsync |= (1LL << event); ++ local_irq_restore_hw(flags); ++ propagate = !evhand(event, start_domain, data); ++ local_irq_save_hw(flags); ++ /* ++ * We may have a migration issue here, if the ++ * current task is migrated to another CPU on ++ * behalf of the invoked handler, usually when ++ * a syscall event is processed. However, ++ * ipipe_catch_event() will make sure that a ++ * CPU that clears a handler for any given ++ * event will not attempt to wait for itself ++ * to clear the evsync bit for that event, ++ * which practically plugs the hole, without ++ * resorting to a much more complex strategy. ++ */ ++ np->evsync &= ~(1LL << event); ++ if (__ipipe_current_domain != next_domain) ++ this_domain = __ipipe_current_domain; ++ } ++ ++ /* NEVER sync the root stage here. */ ++ if (next_domain != ipipe_root_domain && ++ __ipipe_ipending_p(np) && ++ !test_bit(IPIPE_STALL_FLAG, &np->status)) { ++ __ipipe_current_domain = next_domain; ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ if (__ipipe_current_domain != next_domain) ++ this_domain = __ipipe_current_domain; ++ } ++ ++ __ipipe_current_domain = this_domain; ++ ++ if (next_domain == this_domain || !propagate) ++ break; ++ } ++ ++ local_irq_restore_hw(flags); ++ ++ return !propagate; ++} ++ ++/* ++ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired ++ * interrupts are immediately and unconditionally delivered to the ++ * domain heading the pipeline upon receipt, and such domain must have ++ * been registered as an invariant head for the system (priority == ++ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is ++ * to get an extra-fast dispatching path for those IRQs, by relying on ++ * a straightforward logic based on assumptions that must always be ++ * true for invariant head domains. The following assumptions are ++ * made when dealing with such interrupts: ++ * ++ * 1- Wired interrupts are purely dynamic, i.e. the decision to ++ * propagate them down the pipeline must be done from the head domain ++ * ISR. ++ * 2- Wired interrupts cannot be shared or sticky. ++ * 3- The root domain cannot be an invariant pipeline head, in ++ * consequence of what the root domain cannot handle wired ++ * interrupts. ++ * 4- Wired interrupts must have a valid acknowledge handler for the ++ * head domain (if needed, see __ipipe_handle_irq). ++ * ++ * Called with hw interrupts off. ++ */ ++ ++void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); ++ ++ prefetchw(p); ++ ++ if (unlikely(test_bit(IPIPE_LOCK_FLAG, &head->irqs[irq].control))) { ++ /* ++ * If we can't process this IRQ right now, we must ++ * mark it as held, so that it will get played during ++ * normal log sync when the corresponding interrupt ++ * source is eventually unlocked. ++ */ ++ __ipipe_set_irq_held(p, irq); ++ return; ++ } ++ ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) { ++ __ipipe_set_irq_pending(head, irq); ++ return; ++ } ++ ++ __ipipe_dispatch_wired_nocheck(head, irq); ++} ++ ++void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq) /* hw interrupts off */ ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); ++ struct ipipe_domain *old; ++ ++ prefetchw(p); ++ ++ old = __ipipe_current_domain; ++ __ipipe_current_domain = head; /* Switch to the head domain. */ ++ ++ p->irqall[irq]++; ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */ ++ __ipipe_run_irqtail(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (__ipipe_current_domain == head) { ++ __ipipe_current_domain = old; ++ if (old == head) { ++ if (__ipipe_ipending_p(p)) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ return; ++ } ++ } ++ ++ __ipipe_walk_pipeline(&head->p_link); ++} ++ ++/* ++ * __ipipe_sync_stage() -- Flush the pending IRQs for the current ++ * domain (and processor). This routine flushes the interrupt log ++ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for ++ * more on the deferred interrupt scheme). Every interrupt that ++ * occurred while the pipeline was stalled gets played. WARNING: ++ * callers on SMP boxen should always check for CPU migration on ++ * return of this routine. ++ * ++ * This routine must be called with hw interrupts off. ++ */ ++void __ipipe_sync_stage(int dovirt) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *ipd; ++ int cpu, irq; ++ ++ ipd = __ipipe_current_domain; ++ p = ipipe_cpudom_ptr(ipd); ++ ++ if (__test_and_set_bit(IPIPE_SYNC_FLAG, &p->status)) { ++#ifdef __IPIPE_FEATURE_NESTED_ROOTIRQS ++ /* ++ * Caution: some archs do not support this ++ * (mis)feature (e.g. x86_32). ++ */ ++ if (ipd != ipipe_root_domain) ++#endif ++ return; ++ } ++ ++ cpu = ipipe_processor_id(); ++ ++ for (;;) { ++ irq = __ipipe_next_irq(p, dovirt); ++ if (irq < 0) ++ break; ++ /* ++ * Make sure the compiler does not reorder ++ * wrongly, so that all updates to maps are ++ * done before the handler gets called. ++ */ ++ barrier(); ++ ++ if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ continue; ++ ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ smp_wmb(); ++ ++ if (ipd == ipipe_root_domain) ++ trace_hardirqs_off(); ++ ++ __ipipe_run_isr(ipd, irq); ++ barrier(); ++ p = ipipe_cpudom_ptr(__ipipe_current_domain); ++#ifdef CONFIG_SMP ++ { ++ int newcpu = ipipe_processor_id(); ++ ++ if (newcpu != cpu) { /* Handle CPU migration. */ ++ /* ++ * We expect any domain to clear the SYNC bit each ++ * time it switches in a new task, so that preemptions ++ * and/or CPU migrations (in the SMP case) over the ++ * ISR do not lock out the log syncer for some ++ * indefinite amount of time. In the Linux case, ++ * schedule() handles this (see kernel/sched.c). For ++ * this reason, we don't bother clearing it here for ++ * the source CPU in the migration handling case, ++ * since it must have scheduled another task in by ++ * now. ++ */ ++ __set_bit(IPIPE_SYNC_FLAG, &p->status); ++ cpu = newcpu; ++ } ++ } ++#endif /* CONFIG_SMP */ ++#ifdef CONFIG_TRACE_IRQFLAGS ++ if (__ipipe_root_domain_p && ++ test_bit(IPIPE_STALL_FLAG, &p->status)) ++ trace_hardirqs_on(); ++#endif ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ } ++ ++ __clear_bit(IPIPE_SYNC_FLAG, &p->status); ++} ++ ++/* ipipe_register_domain() -- Link a new domain to the pipeline. */ ++ ++int ipipe_register_domain(struct ipipe_domain *ipd, ++ struct ipipe_domain_attr *attr) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct list_head *pos = NULL; ++ struct ipipe_domain *_ipd; ++ unsigned long flags; ++ ++ if (!ipipe_root_domain_p) { ++ printk(KERN_WARNING ++ "I-pipe: Only the root domain may register a new domain.\n"); ++ return -EPERM; ++ } ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ if (attr->priority == IPIPE_HEAD_PRIORITY) { ++ if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) { ++ ipipe_critical_exit(flags); ++ return -EAGAIN; /* Cannot override current head. */ ++ } ++ ipd->slot = IPIPE_HEAD_SLOT; ++ } else ++ ipd->slot = ffz(__ipipe_domain_slot_map); ++ ++ if (ipd->slot < CONFIG_IPIPE_DOMAINS) { ++ set_bit(ipd->slot, &__ipipe_domain_slot_map); ++ list_for_each(pos, &__ipipe_pipeline) { ++ _ipd = list_entry(pos, struct ipipe_domain, p_link); ++ if (_ipd->domid == attr->domid) ++ break; ++ } ++ } ++ ++ ipipe_critical_exit(flags); ++ ++ if (pos != &__ipipe_pipeline) { ++ if (ipd->slot < CONFIG_IPIPE_DOMAINS) ++ clear_bit(ipd->slot, &__ipipe_domain_slot_map); ++ return -EBUSY; ++ } ++ ++#ifndef CONFIG_SMP ++ /* ++ * Set up the perdomain pointers for direct access to the ++ * percpu domain data. This saves a costly multiply each time ++ * we need to refer to the contents of the percpu domain data ++ * array. ++ */ ++ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot]; ++#endif ++ ++ ipd->name = attr->name; ++ ipd->domid = attr->domid; ++ ipd->pdd = attr->pdd; ++ ipd->flags = 0; ++ ++ if (attr->priority == IPIPE_HEAD_PRIORITY) { ++ ipd->priority = INT_MAX; ++ __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); ++ } ++ else ++ ipd->priority = attr->priority; ++ ++ __ipipe_init_stage(ipd); ++ ++ INIT_LIST_HEAD(&ipd->p_link); ++ ++#ifdef CONFIG_PROC_FS ++ __ipipe_add_domain_proc(ipd); ++#endif /* CONFIG_PROC_FS */ ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ list_for_each(pos, &__ipipe_pipeline) { ++ _ipd = list_entry(pos, struct ipipe_domain, p_link); ++ if (ipd->priority > _ipd->priority) ++ break; ++ } ++ ++ list_add_tail(&ipd->p_link, pos); ++ ++ ipipe_critical_exit(flags); ++ ++ printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name); ++ ++ if (attr->entry == NULL) ++ return 0; ++ ++ /* ++ * Finally, allow the new domain to perform its initialization ++ * duties. ++ */ ++ local_irq_save_hw_smp(flags); ++ __ipipe_current_domain = ipd; ++ local_irq_restore_hw_smp(flags); ++ attr->entry(); ++ local_irq_save_hw(flags); ++ __ipipe_current_domain = ipipe_root_domain; ++ p = ipipe_root_cpudom_ptr(); ++ ++ if (__ipipe_ipending_p(p) && ++ !test_bit(IPIPE_STALL_FLAG, &p->status)) ++ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); ++ ++ local_irq_restore_hw(flags); ++ ++ return 0; ++} ++ ++/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ ++ ++int ipipe_unregister_domain(struct ipipe_domain *ipd) ++{ ++ unsigned long flags; ++ ++ if (!ipipe_root_domain_p) { ++ printk(KERN_WARNING ++ "I-pipe: Only the root domain may unregister a domain.\n"); ++ return -EPERM; ++ } ++ ++ if (ipd == ipipe_root_domain) { ++ printk(KERN_WARNING ++ "I-pipe: Cannot unregister the root domain.\n"); ++ return -EPERM; ++ } ++#ifdef CONFIG_SMP ++ { ++ struct ipipe_percpu_domain_data *p; ++ unsigned int irq; ++ int cpu; ++ ++ /* ++ * In the SMP case, wait for the logged events to drain on ++ * other processors before eventually removing the domain ++ * from the pipeline. ++ */ ++ ++ ipipe_unstall_pipeline_from(ipd); ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { ++ clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); ++ clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); ++ set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); ++ } ++ ++ ipipe_critical_exit(flags); ++ ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ while (__ipipe_ipending_p(p)) ++ cpu_relax(); ++ } ++ } ++#endif /* CONFIG_SMP */ ++ ++ mutex_lock(&ipd->mutex); ++ ++#ifdef CONFIG_PROC_FS ++ __ipipe_remove_domain_proc(ipd); ++#endif /* CONFIG_PROC_FS */ ++ ++ /* ++ * Simply remove the domain from the pipeline and we are almost done. ++ */ ++ ++ flags = ipipe_critical_enter(NULL); ++ list_del_init(&ipd->p_link); ++ ipipe_critical_exit(flags); ++ ++ __ipipe_cleanup_domain(ipd); ++ ++ mutex_unlock(&ipd->mutex); ++ ++ printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name); ++ ++ return 0; ++} ++ ++/* ++ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of ++ * a running interrupt handler to the next domain down the pipeline. ++ * ipipe_schedule_irq() -- Does almost the same as above, but attempts ++ * to pend the interrupt for the current domain first. ++ * Must be called hw IRQs off. ++ */ ++void __ipipe_pend_irq(unsigned irq, struct list_head *head) ++{ ++ struct ipipe_domain *ipd; ++ struct list_head *ln; ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ BUG_ON(irq >= IPIPE_NR_IRQS || ++ (ipipe_virtual_irq_p(irq) ++ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))); ++#endif ++ for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) { ++ ipd = list_entry(ln, struct ipipe_domain, p_link); ++ if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { ++ __ipipe_set_irq_pending(ipd, irq); ++ return; ++ } ++ } ++} ++ ++/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ ++ ++int ipipe_free_virq(unsigned virq) ++{ ++ if (!ipipe_virtual_irq_p(virq)) ++ return -EINVAL; ++ ++ clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); ++ ++ return 0; ++} ++ ++void ipipe_init_attr(struct ipipe_domain_attr *attr) ++{ ++ attr->name = "anon"; ++ attr->domid = 1; ++ attr->entry = NULL; ++ attr->priority = IPIPE_ROOT_PRIO; ++ attr->pdd = NULL; ++} ++ ++/* ++ * ipipe_catch_event() -- Interpose or remove an event handler for a ++ * given domain. ++ */ ++ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, ++ unsigned event, ++ ipipe_event_handler_t handler) ++{ ++ ipipe_event_handler_t old_handler; ++ unsigned long flags; ++ int self = 0, cpu; ++ ++ if (event & IPIPE_EVENT_SELF) { ++ event &= ~IPIPE_EVENT_SELF; ++ self = 1; ++ } ++ ++ if (event >= IPIPE_NR_EVENTS) ++ return NULL; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ if (!(old_handler = xchg(&ipd->evhand[event],handler))) { ++ if (handler) { ++ if (self) ++ ipd->evself |= (1LL << event); ++ else ++ __ipipe_event_monitors[event]++; ++ } ++ } ++ else if (!handler) { ++ if (ipd->evself & (1LL << event)) ++ ipd->evself &= ~(1LL << event); ++ else ++ __ipipe_event_monitors[event]--; ++ } else if ((ipd->evself & (1LL << event)) && !self) { ++ __ipipe_event_monitors[event]++; ++ ipd->evself &= ~(1LL << event); ++ } else if (!(ipd->evself & (1LL << event)) && self) { ++ __ipipe_event_monitors[event]--; ++ ipd->evself |= (1LL << event); ++ } ++ ++ ipipe_critical_exit(flags); ++ ++ if (!handler && ipipe_root_domain_p) { ++ /* ++ * If we cleared a handler on behalf of the root ++ * domain, we have to wait for any current invocation ++ * to drain, since our caller might subsequently unmap ++ * the target domain. To this aim, this code ++ * synchronizes with __ipipe_dispatch_event(), ++ * guaranteeing that either the dispatcher sees a null ++ * handler in which case it discards the invocation ++ * (which also prevents from entering a livelock), or ++ * finds a valid handler and calls it. Symmetrically, ++ * ipipe_catch_event() ensures that the called code ++ * won't be unmapped under our feet until the event ++ * synchronization flag is cleared for the given event ++ * on all CPUs. ++ */ ++ preempt_disable(); ++ cpu = smp_processor_id(); ++ /* ++ * Hack: this solves the potential migration issue ++ * raised in __ipipe_dispatch_event(). This is a ++ * work-around which makes the assumption that other ++ * CPUs will subsequently, either process at least one ++ * interrupt for the target domain, or call ++ * __ipipe_dispatch_event() without going through a ++ * migration while running the handler at least once; ++ * practically, this is safe on any normally running ++ * system. ++ */ ++ ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event); ++ preempt_enable(); ++ ++ for_each_online_cpu(cpu) { ++ while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event)) ++ schedule_timeout_interruptible(HZ / 50); ++ } ++ } ++ ++ return old_handler; ++} ++ ++cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) ++{ ++#ifdef CONFIG_SMP ++ if (irq >= IPIPE_NR_XIRQS) ++ /* Allow changing affinity of external IRQs only. */ ++ return CPU_MASK_NONE; ++ ++ if (num_online_cpus() > 1) ++ return __ipipe_set_irq_affinity(irq,cpumask); ++#endif /* CONFIG_SMP */ ++ ++ return CPU_MASK_NONE; ++} ++ ++int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) ++ ++{ ++#ifdef CONFIG_SMP ++ return __ipipe_send_ipi(ipi,cpumask); ++#else /* !CONFIG_SMP */ ++ return -EINVAL; ++#endif /* CONFIG_SMP */ ++} ++ ++int ipipe_alloc_ptdkey (void) ++{ ++ unsigned long flags; ++ int key = -1; ++ ++ spin_lock_irqsave(&__ipipe_pipelock,flags); ++ ++ if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { ++ key = ffz(__ipipe_ptd_key_map); ++ set_bit(key,&__ipipe_ptd_key_map); ++ __ipipe_ptd_key_count++; ++ } ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock,flags); ++ ++ return key; ++} ++ ++int ipipe_free_ptdkey (int key) ++{ ++ unsigned long flags; ++ ++ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&__ipipe_pipelock,flags); ++ ++ if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) ++ __ipipe_ptd_key_count--; ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock,flags); ++ ++ return 0; ++} ++ ++int ipipe_set_ptd (int key, void *value) ++ ++{ ++ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) ++ return -EINVAL; ++ ++ current->ptd[key] = value; ++ ++ return 0; ++} ++ ++void *ipipe_get_ptd (int key) ++ ++{ ++ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) ++ return NULL; ++ ++ return current->ptd[key]; ++} ++ ++#ifdef CONFIG_PROC_FS ++ ++struct proc_dir_entry *ipipe_proc_root; ++ ++static int __ipipe_version_info_proc(char *page, ++ char **start, ++ off_t off, int count, int *eof, void *data) ++{ ++ int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING); ++ ++ len -= off; ++ ++ if (len <= off + count) ++ *eof = 1; ++ ++ *start = page + off; ++ ++ if(len > count) ++ len = count; ++ ++ if(len < 0) ++ len = 0; ++ ++ return len; ++} ++ ++static int __ipipe_common_info_show(struct seq_file *p, void *data) ++{ ++ struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; ++ char handling, stickiness, lockbit, exclusive, virtuality; ++ ++ unsigned long ctlbits; ++ unsigned irq; ++ ++ seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n"); ++ seq_printf(p, " |+---- Sticky\n"); ++ seq_printf(p, " ||+--- Locked\n"); ++ seq_printf(p, " |||+-- Exclusive\n"); ++ seq_printf(p, " ||||+- Virtual\n"); ++ seq_printf(p, "[IRQ] |||||\n"); ++ ++ mutex_lock(&ipd->mutex); ++ ++ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { ++ /* Remember to protect against ++ * ipipe_virtual_irq/ipipe_control_irq if more fields ++ * get involved. */ ++ ctlbits = ipd->irqs[irq].control; ++ ++ if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) ++ /* ++ * There might be a hole between the last external ++ * IRQ and the first virtual one; skip it. ++ */ ++ continue; ++ ++ if (ipipe_virtual_irq_p(irq) ++ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) ++ /* Non-allocated virtual IRQ; skip it. */ ++ continue; ++ ++ /* ++ * Statuses are as follows: ++ * o "accepted" means handled _and_ passed down the pipeline. ++ * o "grabbed" means handled, but the interrupt might be ++ * terminated _or_ passed down the pipeline depending on ++ * what the domain handler asks for to the I-pipe. ++ * o "wired" is basically the same as "grabbed", except that ++ * the interrupt is unconditionally delivered to an invariant ++ * pipeline head domain. ++ * o "passed" means unhandled by the domain but passed ++ * down the pipeline. ++ * o "discarded" means unhandled and _not_ passed down the ++ * pipeline. The interrupt merely disappears from the ++ * current domain down to the end of the pipeline. ++ */ ++ if (ctlbits & IPIPE_HANDLE_MASK) { ++ if (ctlbits & IPIPE_PASS_MASK) ++ handling = 'A'; ++ else if (ctlbits & IPIPE_WIRED_MASK) ++ handling = 'W'; ++ else ++ handling = 'G'; ++ } else if (ctlbits & IPIPE_PASS_MASK) ++ /* Do not output if no major action is taken. */ ++ continue; ++ else ++ handling = 'D'; ++ ++ if (ctlbits & IPIPE_STICKY_MASK) ++ stickiness = 'S'; ++ else ++ stickiness = '.'; ++ ++ if (ctlbits & IPIPE_LOCK_MASK) ++ lockbit = 'L'; ++ else ++ lockbit = '.'; ++ ++ if (ctlbits & IPIPE_EXCLUSIVE_MASK) ++ exclusive = 'X'; ++ else ++ exclusive = '.'; ++ ++ if (ipipe_virtual_irq_p(irq)) ++ virtuality = 'V'; ++ else ++ virtuality = '.'; ++ ++ seq_printf(p, " %3u: %c%c%c%c%c\n", ++ irq, handling, stickiness, lockbit, exclusive, virtuality); ++ } ++ ++ seq_printf(p, "[Domain info]\n"); ++ ++ seq_printf(p, "id=0x%.8x\n", ipd->domid); ++ ++ if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) ++ seq_printf(p, "priority=topmost\n"); ++ else ++ seq_printf(p, "priority=%d\n", ipd->priority); ++ ++ mutex_unlock(&ipd->mutex); ++ ++ return 0; ++} ++ ++static int __ipipe_common_info_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data); ++} ++ ++static struct file_operations __ipipe_info_proc_ops = { ++ .owner = THIS_MODULE, ++ .open = __ipipe_common_info_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void __ipipe_add_domain_proc(struct ipipe_domain *ipd) ++{ ++ struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root); ++ if (e) { ++ e->proc_fops = &__ipipe_info_proc_ops; ++ e->data = (void*) ipd; ++ } ++} ++ ++void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) ++{ ++ remove_proc_entry(ipd->name,ipipe_proc_root); ++} ++ ++void __init ipipe_init_proc(void) ++{ ++ ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0); ++ create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL); ++ __ipipe_add_domain_proc(ipipe_root_domain); ++ ++ __ipipe_init_tracer(); ++} ++ ++#endif /* CONFIG_PROC_FS */ ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ++DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 }; ++DEFINE_PER_CPU(int, ipipe_saved_context_check_state); ++ ++void ipipe_check_context(struct ipipe_domain *border_domain) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *this_domain; ++ unsigned long flags; ++ int cpu; ++ ++ local_irq_save_hw_smp(flags); ++ ++ this_domain = __ipipe_current_domain; ++ p = ipipe_head_cpudom_ptr(); ++ if (likely(this_domain->priority <= border_domain->priority && ++ !test_bit(IPIPE_STALL_FLAG, &p->status))) { ++ local_irq_restore_hw_smp(flags); ++ return; ++ } ++ ++ cpu = ipipe_processor_id(); ++ if (!per_cpu(ipipe_percpu_context_check, cpu)) { ++ local_irq_restore_hw_smp(flags); ++ return; ++ } ++ ++ local_irq_restore_hw_smp(flags); ++ ++ ipipe_context_check_off(); ++ ipipe_trace_panic_freeze(); ++ ipipe_set_printk_sync(__ipipe_current_domain); ++ ++ if (this_domain->priority > border_domain->priority) ++ printk(KERN_ERR "I-pipe: Detected illicit call from domain " ++ "'%s'\n" ++ KERN_ERR " into a service reserved for domain " ++ "'%s' and below.\n", ++ this_domain->name, border_domain->name); ++ else ++ printk(KERN_ERR "I-pipe: Detected stalled topmost domain, " ++ "probably caused by a bug.\n" ++ " A critical section may have been " ++ "left unterminated.\n"); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++} ++ ++EXPORT_SYMBOL(ipipe_check_context); ++ ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++ ++int notrace __ipipe_check_percpu_access(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *this_domain; ++ unsigned long flags; ++ int ret = 0; ++ ++ local_irq_save_hw_notrace(flags); ++ ++ this_domain = __raw_get_cpu_var(ipipe_percpu_domain); ++ ++ /* ++ * Only the root domain may implement preemptive CPU migration ++ * of tasks, so anything above in the pipeline should be fine. ++ */ ++ if (this_domain->priority > IPIPE_ROOT_PRIO) ++ goto out; ++ ++ if (raw_irqs_disabled_flags(flags)) ++ goto out; ++ ++ /* ++ * Last chance: hw interrupts were enabled on entry while ++ * running over the root domain, but the root stage might be ++ * currently stalled, in which case preemption would be ++ * disabled, and no migration could occur. ++ */ ++ if (this_domain == ipipe_root_domain) { ++ p = ipipe_root_cpudom_ptr(); ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) ++ goto out; ++ } ++ /* ++ * Our caller may end up accessing the wrong per-cpu variable ++ * instance due to CPU migration; tell it to complain about ++ * this. ++ */ ++ ret = 1; ++out: ++ local_irq_restore_hw_notrace(flags); ++ ++ return ret; ++} ++ ++#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ ++ ++EXPORT_SYMBOL(ipipe_virtualize_irq); ++EXPORT_SYMBOL(ipipe_control_irq); ++EXPORT_SYMBOL(ipipe_suspend_domain); ++EXPORT_SYMBOL(ipipe_alloc_virq); ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray); ++EXPORT_SYMBOL(ipipe_root); ++EXPORT_SYMBOL(ipipe_stall_pipeline_from); ++EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); ++EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); ++EXPORT_SYMBOL(ipipe_restore_pipeline_from); ++EXPORT_SYMBOL(ipipe_unstall_pipeline_head); ++EXPORT_SYMBOL(__ipipe_restore_pipeline_head); ++EXPORT_SYMBOL(__ipipe_unstall_root); ++EXPORT_SYMBOL(__ipipe_restore_root); ++EXPORT_SYMBOL(__ipipe_spin_lock_irq); ++EXPORT_SYMBOL(__ipipe_spin_unlock_irq); ++EXPORT_SYMBOL(__ipipe_spin_lock_irqsave); ++EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore); ++EXPORT_SYMBOL(__ipipe_pipeline); ++EXPORT_SYMBOL(__ipipe_lock_irq); ++EXPORT_SYMBOL(__ipipe_unlock_irq); ++EXPORT_SYMBOL(ipipe_register_domain); ++EXPORT_SYMBOL(ipipe_unregister_domain); ++EXPORT_SYMBOL(ipipe_free_virq); ++EXPORT_SYMBOL(ipipe_init_attr); ++EXPORT_SYMBOL(ipipe_catch_event); ++EXPORT_SYMBOL(ipipe_alloc_ptdkey); ++EXPORT_SYMBOL(ipipe_free_ptdkey); ++EXPORT_SYMBOL(ipipe_set_ptd); ++EXPORT_SYMBOL(ipipe_get_ptd); ++EXPORT_SYMBOL(ipipe_set_irq_affinity); ++EXPORT_SYMBOL(ipipe_send_ipi); ++EXPORT_SYMBOL(__ipipe_pend_irq); ++EXPORT_SYMBOL(__ipipe_set_irq_pending); ++EXPORT_SYMBOL(__ipipe_event_monitors); ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++EXPORT_SYMBOL(__ipipe_check_percpu_access); ++#endif ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++EXPORT_SYMBOL(ipipe_request_tickdev); ++EXPORT_SYMBOL(ipipe_release_tickdev); ++#endif ++ ++EXPORT_SYMBOL(ipipe_critical_enter); ++EXPORT_SYMBOL(ipipe_critical_exit); ++EXPORT_SYMBOL(ipipe_trigger_irq); ++EXPORT_SYMBOL(ipipe_get_sysinfo); +diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c +new file mode 100644 +index 0000000..d3c1866 +--- /dev/null ++++ b/kernel/ipipe/tracer.c +@@ -0,0 +1,1441 @@ ++/* -*- linux-c -*- ++ * kernel/ipipe/tracer.c ++ * ++ * Copyright (C) 2005 Luotao Fu. ++ * 2005-2008 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ ++#define IPIPE_DEFAULT_ACTIVE 0 ++#define IPIPE_DEFAULT_MAX 1 ++#define IPIPE_DEFAULT_FROZEN 2 ++ ++#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) ++#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) ++ ++#define IPIPE_DEFAULT_PRE_TRACE 10 ++#define IPIPE_DEFAULT_POST_TRACE 10 ++#define IPIPE_DEFAULT_BACK_TRACE 100 ++ ++#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ ++#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ ++ ++#define IPIPE_TFLG_NMI_LOCK 0x0001 ++#define IPIPE_TFLG_NMI_HIT 0x0002 ++#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 ++ ++#define IPIPE_TFLG_HWIRQ_OFF 0x0100 ++#define IPIPE_TFLG_FREEZING 0x0200 ++#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ ++#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 ++#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ ++#define IPIPE_TFLG_DOMSTATE_BITS 3 ++ ++#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ ++ (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) ++#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ ++ ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) ++ ++struct ipipe_trace_point { ++ short type; ++ short flags; ++ unsigned long eip; ++ unsigned long parent_eip; ++ unsigned long v; ++ unsigned long long timestamp; ++}; ++ ++struct ipipe_trace_path { ++ volatile int flags; ++ int dump_lock; /* separated from flags due to cross-cpu access */ ++ int trace_pos; /* next point to fill */ ++ int begin, end; /* finalised path begin and end */ ++ int post_trace; /* non-zero when in post-trace phase */ ++ unsigned long long length; /* max path length in cycles */ ++ unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ ++ unsigned long nmi_saved_parent_eip; ++ unsigned long nmi_saved_v; ++ struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; ++} ____cacheline_aligned_in_smp; ++ ++enum ipipe_trace_type ++{ ++ IPIPE_TRACE_FUNC = 0, ++ IPIPE_TRACE_BEGIN, ++ IPIPE_TRACE_END, ++ IPIPE_TRACE_FREEZE, ++ IPIPE_TRACE_SPECIAL, ++ IPIPE_TRACE_PID, ++ IPIPE_TRACE_EVENT, ++}; ++ ++#define IPIPE_TYPE_MASK 0x0007 ++#define IPIPE_TYPE_BITS 3 ++ ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); ++#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ ++static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = ++ { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++int ipipe_trace_enable = 0; ++ ++static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; ++static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; ++static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; ++static IPIPE_DEFINE_SPINLOCK(global_path_lock); ++static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; ++static int post_trace = IPIPE_DEFAULT_POST_TRACE; ++static int back_trace = IPIPE_DEFAULT_BACK_TRACE; ++static int verbose_trace = 1; ++static unsigned long trace_overhead; ++ ++static unsigned long trigger_begin; ++static unsigned long trigger_end; ++ ++static DEFINE_MUTEX(out_mutex); ++static struct ipipe_trace_path *print_path; ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++static struct ipipe_trace_path *panic_path; ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++static int print_pre_trace; ++static int print_post_trace; ++ ++ ++static long __ipipe_signed_tsc2us(long long tsc); ++static void ++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); ++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); ++ ++ ++static notrace void ++__ipipe_store_domain_states(struct ipipe_trace_point *point) ++{ ++ struct ipipe_domain *ipd; ++ struct list_head *pos; ++ int i = 0; ++ ++ list_for_each_prev(pos, &__ipipe_pipeline) { ++ ipd = list_entry(pos, struct ipipe_domain, p_link); ++ ++ if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status))) ++ point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); ++ ++ if (ipd == __ipipe_current_domain) ++ point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT; ++ ++ if (++i > IPIPE_TFLG_DOMSTATE_BITS) ++ break; ++ } ++} ++ ++static notrace int __ipipe_get_free_trace_path(int old, int cpu) ++{ ++ int new_active = old; ++ struct ipipe_trace_path *tp; ++ ++ do { ++ if (++new_active == IPIPE_TRACE_PATHS) ++ new_active = 0; ++ tp = &per_cpu(trace_path, cpu)[new_active]; ++ } while (new_active == per_cpu(max_path, cpu) || ++ new_active == per_cpu(frozen_path, cpu) || ++ tp->dump_lock); ++ ++ return new_active; ++} ++ ++static notrace void ++__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, ++ struct ipipe_trace_path *old_tp, int old_pos) ++{ ++ int i; ++ ++ new_tp->trace_pos = pre_trace+1; ++ ++ for (i = new_tp->trace_pos; i > 0; i--) ++ memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], ++ &old_tp->point[WRAP_POINT_NO(old_pos-i)], ++ sizeof(struct ipipe_trace_point)); ++ ++ /* mark the end (i.e. the point before point[0]) invalid */ ++ new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; ++} ++ ++static notrace struct ipipe_trace_path * ++__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) ++{ ++ struct ipipe_trace_path *old_tp = tp; ++ long active = per_cpu(active_path, cpu); ++ unsigned long long length; ++ ++ /* do we have a new worst case? */ ++ length = tp->point[tp->end].timestamp - ++ tp->point[tp->begin].timestamp; ++ if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { ++ /* we need protection here against other cpus trying ++ to start a proc dump */ ++ spin_lock(&global_path_lock); ++ ++ /* active path holds new worst case */ ++ tp->length = length; ++ per_cpu(max_path, cpu) = active; ++ ++ /* find next unused trace path */ ++ active = __ipipe_get_free_trace_path(active, cpu); ++ ++ spin_unlock(&global_path_lock); ++ ++ tp = &per_cpu(trace_path, cpu)[active]; ++ ++ /* migrate last entries for pre-tracing */ ++ __ipipe_migrate_pre_trace(tp, old_tp, pos); ++ } ++ ++ return tp; ++} ++ ++static notrace struct ipipe_trace_path * ++__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) ++{ ++ struct ipipe_trace_path *old_tp = tp; ++ long active = per_cpu(active_path, cpu); ++ int n; ++ ++ /* frozen paths have no core (begin=end) */ ++ tp->begin = tp->end; ++ ++ /* we need protection here against other cpus trying ++ * to set their frozen path or to start a proc dump */ ++ spin_lock(&global_path_lock); ++ ++ per_cpu(frozen_path, cpu) = active; ++ ++ /* find next unused trace path */ ++ active = __ipipe_get_free_trace_path(active, cpu); ++ ++ /* check if this is the first frozen path */ ++ for_each_possible_cpu(n) { ++ if (n != cpu && ++ per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) ++ tp->end = -1; ++ } ++ ++ spin_unlock(&global_path_lock); ++ ++ tp = &per_cpu(trace_path, cpu)[active]; ++ ++ /* migrate last entries for pre-tracing */ ++ __ipipe_migrate_pre_trace(tp, old_tp, pos); ++ ++ return tp; ++} ++ ++void notrace ++__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, ++ unsigned long parent_eip, unsigned long v) ++{ ++ struct ipipe_trace_path *tp, *old_tp; ++ int pos, next_pos, begin; ++ struct ipipe_trace_point *point; ++ unsigned long flags; ++ int cpu; ++ ++ local_irq_save_hw_notrace(flags); ++ ++ cpu = ipipe_processor_id(); ++ restart: ++ tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ /* here starts a race window with NMIs - catched below */ ++ ++ /* check for NMI recursion */ ++ if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { ++ tp->flags |= IPIPE_TFLG_NMI_HIT; ++ ++ /* first freeze request from NMI context? */ ++ if ((type == IPIPE_TRACE_FREEZE) && ++ !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { ++ /* save arguments and mark deferred freezing */ ++ tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; ++ tp->nmi_saved_eip = eip; ++ tp->nmi_saved_parent_eip = parent_eip; ++ tp->nmi_saved_v = v; ++ } ++ return; /* no need for restoring flags inside IRQ */ ++ } ++ ++ /* clear NMI events and set lock (atomically per cpu) */ ++ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | ++ IPIPE_TFLG_NMI_FREEZE_REQ)) ++ | IPIPE_TFLG_NMI_LOCK; ++ ++ /* check active_path again - some nasty NMI may have switched ++ * it meanwhile */ ++ if (unlikely(tp != ++ &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { ++ /* release lock on wrong path and restart */ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* there is no chance that the NMI got deferred ++ * => no need to check for pending freeze requests */ ++ goto restart; ++ } ++ ++ /* get the point buffer */ ++ pos = tp->trace_pos; ++ point = &tp->point[pos]; ++ ++ /* store all trace point data */ ++ point->type = type; ++ point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; ++ point->eip = eip; ++ point->parent_eip = parent_eip; ++ point->v = v; ++ ipipe_read_tsc(point->timestamp); ++ ++ __ipipe_store_domain_states(point); ++ ++ /* forward to next point buffer */ ++ next_pos = WRAP_POINT_NO(pos+1); ++ tp->trace_pos = next_pos; ++ ++ /* only mark beginning if we haven't started yet */ ++ begin = tp->begin; ++ if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) ++ tp->begin = pos; ++ ++ /* end of critical path, start post-trace if not already started */ ++ if (unlikely(type == IPIPE_TRACE_END) && ++ (begin >= 0) && !tp->post_trace) ++ tp->post_trace = post_trace + 1; ++ ++ /* freeze only if the slot is free and we are not already freezing */ ++ if ((unlikely(type == IPIPE_TRACE_FREEZE) || ++ (unlikely(eip >= trigger_begin && eip <= trigger_end) && ++ type == IPIPE_TRACE_FUNC)) && ++ per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && ++ !(tp->flags & IPIPE_TFLG_FREEZING)) { ++ tp->post_trace = post_trace + 1; ++ tp->flags |= IPIPE_TFLG_FREEZING; ++ } ++ ++ /* enforce end of trace in case of overflow */ ++ if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { ++ tp->end = pos; ++ goto enforce_end; ++ } ++ ++ /* stop tracing this path if we are in post-trace and ++ * a) that phase is over now or ++ * b) a new TRACE_BEGIN came in but we are not freezing this path */ ++ if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || ++ ((type == IPIPE_TRACE_BEGIN) && ++ !(tp->flags & IPIPE_TFLG_FREEZING))))) { ++ /* store the path's end (i.e. excluding post-trace) */ ++ tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); ++ ++ enforce_end: ++ if (tp->flags & IPIPE_TFLG_FREEZING) ++ tp = __ipipe_trace_freeze(cpu, tp, pos); ++ else ++ tp = __ipipe_trace_end(cpu, tp, pos); ++ ++ /* reset the active path, maybe already start a new one */ ++ tp->begin = (type == IPIPE_TRACE_BEGIN) ? ++ WRAP_POINT_NO(tp->trace_pos - 1) : -1; ++ tp->end = -1; ++ tp->post_trace = 0; ++ tp->flags = 0; ++ ++ /* update active_path not earlier to avoid races with NMIs */ ++ per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); ++ } ++ ++ /* we still have old_tp and point, ++ * let's reset NMI lock and check for catches */ ++ old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { ++ /* well, this late tagging may not immediately be visible for ++ * other cpus already dumping this path - a minor issue */ ++ point->flags |= IPIPE_TFLG_NMI_HIT; ++ ++ /* handle deferred freezing from NMI context */ ++ if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) ++ __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, ++ old_tp->nmi_saved_parent_eip, ++ old_tp->nmi_saved_v); ++ } ++ ++ local_irq_restore_hw_notrace(flags); ++} ++ ++static unsigned long __ipipe_global_path_lock(void) ++{ ++ unsigned long flags; ++ int cpu; ++ struct ipipe_trace_path *tp; ++ ++ spin_lock_irqsave(&global_path_lock, flags); ++ ++ cpu = ipipe_processor_id(); ++ restart: ++ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ /* here is small race window with NMIs - catched below */ ++ ++ /* clear NMI events and set lock (atomically per cpu) */ ++ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | ++ IPIPE_TFLG_NMI_FREEZE_REQ)) ++ | IPIPE_TFLG_NMI_LOCK; ++ ++ /* check active_path again - some nasty NMI may have switched ++ * it meanwhile */ ++ if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { ++ /* release lock on wrong path and restart */ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* there is no chance that the NMI got deferred ++ * => no need to check for pending freeze requests */ ++ goto restart; ++ } ++ ++ return flags; ++} ++ ++static void __ipipe_global_path_unlock(unsigned long flags) ++{ ++ int cpu; ++ struct ipipe_trace_path *tp; ++ ++ /* release spinlock first - it's not involved in the NMI issue */ ++ __ipipe_spin_unlock_irqbegin(&global_path_lock); ++ ++ cpu = ipipe_processor_id(); ++ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* handle deferred freezing from NMI context */ ++ if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) ++ __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, ++ tp->nmi_saved_parent_eip, tp->nmi_saved_v); ++ ++ /* See __ipipe_spin_lock_irqsave() and friends. */ ++ __ipipe_spin_unlock_irqcomplete(flags); ++} ++ ++void notrace ipipe_trace_begin(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_begin); ++ ++void notrace ipipe_trace_end(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_end); ++ ++void notrace ipipe_trace_freeze(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_freeze); ++ ++void notrace ipipe_trace_special(unsigned char id, unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), ++ __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_special); ++ ++void notrace ipipe_trace_pid(pid_t pid, short prio) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), ++ __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, pid); ++} ++EXPORT_SYMBOL(ipipe_trace_pid); ++ ++void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), ++ __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, delay_tsc); ++} ++EXPORT_SYMBOL(ipipe_trace_event); ++ ++int ipipe_trace_max_reset(void) ++{ ++ int cpu; ++ unsigned long flags; ++ struct ipipe_trace_path *path; ++ int ret = 0; ++ ++ flags = __ipipe_global_path_lock(); ++ ++ for_each_possible_cpu(cpu) { ++ path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; ++ ++ if (path->dump_lock) { ++ ret = -EBUSY; ++ break; ++ } ++ ++ path->begin = -1; ++ path->end = -1; ++ path->trace_pos = 0; ++ path->length = 0; ++ } ++ ++ __ipipe_global_path_unlock(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL(ipipe_trace_max_reset); ++ ++int ipipe_trace_frozen_reset(void) ++{ ++ int cpu; ++ unsigned long flags; ++ struct ipipe_trace_path *path; ++ int ret = 0; ++ ++ flags = __ipipe_global_path_lock(); ++ ++ for_each_online_cpu(cpu) { ++ path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; ++ ++ if (path->dump_lock) { ++ ret = -EBUSY; ++ break; ++ } ++ ++ path->begin = -1; ++ path->end = -1; ++ path->trace_pos = 0; ++ path->length = 0; ++ } ++ ++ __ipipe_global_path_unlock(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL(ipipe_trace_frozen_reset); ++ ++static void ++__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, ++ int trylock) ++{ ++ struct task_struct *task = NULL; ++ char buf[8]; ++ int i; ++ int locked = 1; ++ ++ if (trylock) { ++ if (!read_trylock(&tasklist_lock)) ++ locked = 0; ++ } else ++ read_lock(&tasklist_lock); ++ ++ if (locked) ++ task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); ++ ++ if (task) ++ strncpy(task_info, task->comm, 11); ++ else ++ strcpy(task_info, "--"); ++ ++ if (locked) ++ read_unlock(&tasklist_lock); ++ ++ for (i = strlen(task_info); i < 11; i++) ++ task_info[i] = ' '; ++ ++ sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); ++ strcpy(task_info + (11 - strlen(buf)), buf); ++} ++ ++static void ++__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, ++ struct ipipe_trace_point *point) ++{ ++ long time; ++ int type; ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ path->point[path->begin].timestamp + point->v); ++ type = point->type >> IPIPE_TYPE_BITS; ++ ++ if (type == 0) ++ /* ++ * Event type #0 is predefined, stands for the next ++ * timer tick. ++ */ ++ sprintf(buf, "tick@%-6ld", time); ++ else ++ sprintf(buf, "%3d@%-7ld", type, time); ++} ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++void ipipe_trace_panic_freeze(void) ++{ ++ unsigned long flags; ++ int cpu; ++ ++ if (!ipipe_trace_enable) ++ return; ++ ++ ipipe_trace_enable = 0; ++ local_irq_save_hw_notrace(flags); ++ ++ cpu = ipipe_processor_id(); ++ ++ panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ local_irq_restore_hw(flags); ++} ++EXPORT_SYMBOL(ipipe_trace_panic_freeze); ++ ++void ipipe_trace_panic_dump(void) ++{ ++ int cnt = back_trace; ++ int start, pos; ++ char buf[16]; ++ ++ if (!panic_path) ++ return; ++ ++ ipipe_context_check_off(); ++ ++ printk("I-pipe tracer log (%d points):\n", cnt); ++ ++ start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); ++ ++ while (cnt-- > 0) { ++ struct ipipe_trace_point *point = &panic_path->point[pos]; ++ long time; ++ char info[16]; ++ int i; ++ ++ printk(" %c", ++ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); ++ ++ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) ++ printk("%c", ++ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '#' : '+') : ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '*' : ' ')); ++ ++ if (!point->eip) ++ printk("--\n"); ++ else { ++ __ipipe_trace_point_type(buf, point); ++ printk("%s", buf); ++ ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ printk(" "); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ __ipipe_get_task_info(info, ++ point, 1); ++ printk("%s", info); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ __ipipe_get_event_date(info, ++ panic_path, point); ++ printk("%s", info); ++ break; ++ ++ default: ++ printk("0x%08lx ", point->v); ++ } ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ panic_path->point[start].timestamp); ++ printk(" %5ld ", time); ++ ++ __ipipe_print_symname(NULL, point->eip); ++ printk(" ("); ++ __ipipe_print_symname(NULL, point->parent_eip); ++ printk(")\n"); ++ } ++ pos = WRAP_POINT_NO(pos - 1); ++ } ++ ++ panic_path = NULL; ++} ++EXPORT_SYMBOL(ipipe_trace_panic_dump); ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++ ++ ++/* --- /proc output --- */ ++ ++static notrace int __ipipe_in_critical_trpath(long point_no) ++{ ++ return ((WRAP_POINT_NO(point_no-print_path->begin) < ++ WRAP_POINT_NO(print_path->end-print_path->begin)) || ++ ((print_path->end == print_path->begin) && ++ (WRAP_POINT_NO(point_no-print_path->end) > ++ print_post_trace))); ++} ++ ++static long __ipipe_signed_tsc2us(long long tsc) ++{ ++ unsigned long long abs_tsc; ++ long us; ++ ++ /* ipipe_tsc2us works on unsigned => handle sign separately */ ++ abs_tsc = (tsc >= 0) ? tsc : -tsc; ++ us = ipipe_tsc2us(abs_tsc); ++ if (tsc < 0) ++ return -us; ++ else ++ return us; ++} ++ ++static void ++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) ++{ ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ strcpy(buf, "func "); ++ break; ++ ++ case IPIPE_TRACE_BEGIN: ++ strcpy(buf, "begin "); ++ break; ++ ++ case IPIPE_TRACE_END: ++ strcpy(buf, "end "); ++ break; ++ ++ case IPIPE_TRACE_FREEZE: ++ strcpy(buf, "freeze "); ++ break; ++ ++ case IPIPE_TRACE_SPECIAL: ++ sprintf(buf, "(0x%02x) ", ++ point->type >> IPIPE_TYPE_BITS); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ sprintf(buf, "[%5d] ", (pid_t)point->v); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ sprintf(buf, "event "); ++ break; ++ } ++} ++ ++static void ++__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) ++{ ++ char mark = ' '; ++ int point_no = point - print_path->point; ++ int i; ++ ++ if (print_path->end == point_no) ++ mark = '<'; ++ else if (print_path->begin == point_no) ++ mark = '>'; ++ else if (__ipipe_in_critical_trpath(point_no)) ++ mark = ':'; ++ seq_printf(m, "%c%c", mark, ++ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); ++ ++ if (!verbose_trace) ++ return; ++ ++ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) ++ seq_printf(m, "%c", ++ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '#' : '+') : ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); ++} ++ ++static void ++__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) ++{ ++ unsigned long delay = 0; ++ int next; ++ char *mark = " "; ++ ++ next = WRAP_POINT_NO(point+1 - print_path->point); ++ ++ if (next != print_path->trace_pos) ++ delay = ipipe_tsc2ns(print_path->point[next].timestamp - ++ point->timestamp); ++ ++ if (__ipipe_in_critical_trpath(point - print_path->point)) { ++ if (delay > IPIPE_DELAY_WARN) ++ mark = "! "; ++ else if (delay > IPIPE_DELAY_NOTE) ++ mark = "+ "; ++ } ++ seq_puts(m, mark); ++ ++ if (verbose_trace) ++ seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, ++ (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); ++ else ++ seq_puts(m, " "); ++} ++ ++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) ++{ ++ char namebuf[KSYM_NAME_LEN+1]; ++ unsigned long size, offset; ++ const char *sym_name; ++ char *modname; ++ ++ sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++ if (!m) { ++ /* panic dump */ ++ if (sym_name) { ++ printk("%s+0x%lx", sym_name, offset); ++ if (modname) ++ printk(" [%s]", modname); ++ } ++ } else ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++ { ++ if (sym_name) { ++ if (verbose_trace) { ++ seq_printf(m, "%s+0x%lx", sym_name, offset); ++ if (modname) ++ seq_printf(m, " [%s]", modname); ++ } else ++ seq_puts(m, sym_name); ++ } else ++ seq_printf(m, "<%08lx>", eip); ++ } ++} ++ ++static void __ipipe_print_headline(struct seq_file *m) ++{ ++ seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " ++ "us\n\n", trace_overhead/1000, trace_overhead%1000); ++ ++ if (verbose_trace) { ++ const char *name[4] = { [0 ... 3] = "" }; ++ struct list_head *pos; ++ int i = 0; ++ ++ list_for_each_prev(pos, &__ipipe_pipeline) { ++ struct ipipe_domain *ipd = ++ list_entry(pos, struct ipipe_domain, p_link); ++ ++ name[i] = ipd->name; ++ if (++i > 3) ++ break; ++ } ++ ++ seq_printf(m, ++ " +----- Hard IRQs ('|': locked)\n" ++ " |+---- %s\n" ++ " ||+--- %s\n" ++ " |||+-- %s\n" ++ " ||||+- %s%s\n" ++ " ||||| +---------- " ++ "Delay flag ('+': > %d us, '!': > %d us)\n" ++ " ||||| | +- " ++ "NMI noise ('N')\n" ++ " ||||| | |\n" ++ " Type User Val. Time Delay Function " ++ "(Parent)\n", ++ name[3], name[2], name[1], name[0], ++ name[0] ? " ('*': domain stalled, '+': current, " ++ "'#': current+stalled)" : "", ++ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); ++ } else ++ seq_printf(m, ++ " +--------------- Hard IRQs ('|': locked)\n" ++ " | +- Delay flag " ++ "('+': > %d us, '!': > %d us)\n" ++ " | |\n" ++ " Type Time Function (Parent)\n", ++ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); ++} ++ ++static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) ++{ ++ loff_t n = *pos; ++ ++ mutex_lock(&out_mutex); ++ ++ if (!n) { ++ struct ipipe_trace_path *tp; ++ unsigned long length_usecs; ++ int points, cpu; ++ unsigned long flags; ++ ++ /* protect against max_path/frozen_path updates while we ++ * haven't locked our target path, also avoid recursively ++ * taking global_path_lock from NMI context */ ++ flags = __ipipe_global_path_lock(); ++ ++ /* find the longest of all per-cpu paths */ ++ print_path = NULL; ++ for_each_online_cpu(cpu) { ++ tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; ++ if ((print_path == NULL) || ++ (tp->length > print_path->length)) { ++ print_path = tp; ++ break; ++ } ++ } ++ print_path->dump_lock = 1; ++ ++ __ipipe_global_path_unlock(flags); ++ ++ /* does this path actually contain data? */ ++ if (print_path->end == print_path->begin) ++ return NULL; ++ ++ /* number of points inside the critical path */ ++ points = WRAP_POINT_NO(print_path->end-print_path->begin+1); ++ ++ /* pre- and post-tracing length, post-trace length was frozen ++ in __ipipe_trace, pre-trace may have to be reduced due to ++ buffer overrun */ ++ print_pre_trace = pre_trace; ++ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - ++ print_path->end - 1); ++ if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) ++ print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - ++ print_post_trace; ++ ++ length_usecs = ipipe_tsc2us(print_path->length); ++ seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" ++ "------------------------------------------------------------\n", ++ UTS_RELEASE, IPIPE_ARCH_STRING); ++ seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " ++ "%d (-%d/+%d), Length: %lu us\n", ++ cpu, print_path->point[print_path->begin].timestamp, ++ points, print_pre_trace, print_post_trace, length_usecs); ++ __ipipe_print_headline(m); ++ } ++ ++ /* check if we are inside the trace range */ ++ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + ++ print_pre_trace + print_post_trace)) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin - ++ print_pre_trace + n)]; ++} ++ ++static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) ++{ ++ loff_t n = ++*pos; ++ ++ /* check if we are inside the trace range with the next entry */ ++ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + ++ print_pre_trace + print_post_trace)) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin - ++ print_pre_trace + *pos)]; ++} ++ ++static void __ipipe_prtrace_stop(struct seq_file *m, void *p) ++{ ++ if (print_path) ++ print_path->dump_lock = 0; ++ mutex_unlock(&out_mutex); ++} ++ ++static int __ipipe_prtrace_show(struct seq_file *m, void *p) ++{ ++ long time; ++ struct ipipe_trace_point *point = p; ++ char buf[16]; ++ ++ if (!point->eip) { ++ seq_puts(m, "--\n"); ++ return 0; ++ } ++ ++ __ipipe_print_pathmark(m, point); ++ __ipipe_trace_point_type(buf, point); ++ seq_puts(m, buf); ++ if (verbose_trace) ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ seq_puts(m, " "); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ __ipipe_get_task_info(buf, point, 0); ++ seq_puts(m, buf); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ __ipipe_get_event_date(buf, print_path, point); ++ seq_puts(m, buf); ++ break; ++ ++ default: ++ seq_printf(m, "0x%08lx ", point->v); ++ } ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ print_path->point[print_path->begin].timestamp); ++ seq_printf(m, "%5ld", time); ++ ++ __ipipe_print_delay(m, point); ++ __ipipe_print_symname(m, point->eip); ++ seq_puts(m, " ("); ++ __ipipe_print_symname(m, point->parent_eip); ++ seq_puts(m, ")\n"); ++ ++ return 0; ++} ++ ++static struct seq_operations __ipipe_max_ptrace_ops = { ++ .start = __ipipe_max_prtrace_start, ++ .next = __ipipe_prtrace_next, ++ .stop = __ipipe_prtrace_stop, ++ .show = __ipipe_prtrace_show ++}; ++ ++static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &__ipipe_max_ptrace_ops); ++} ++ ++static ssize_t ++__ipipe_max_reset(struct file *file, const char __user *pbuffer, ++ size_t count, loff_t *data) ++{ ++ mutex_lock(&out_mutex); ++ ipipe_trace_max_reset(); ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++struct file_operations __ipipe_max_prtrace_fops = { ++ .open = __ipipe_max_prtrace_open, ++ .read = seq_read, ++ .write = __ipipe_max_reset, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) ++{ ++ loff_t n = *pos; ++ ++ mutex_lock(&out_mutex); ++ ++ if (!n) { ++ struct ipipe_trace_path *tp; ++ int cpu; ++ unsigned long flags; ++ ++ /* protect against max_path/frozen_path updates while we ++ * haven't locked our target path, also avoid recursively ++ * taking global_path_lock from NMI context */ ++ flags = __ipipe_global_path_lock(); ++ ++ /* find the first of all per-cpu frozen paths */ ++ print_path = NULL; ++ for_each_online_cpu(cpu) { ++ tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; ++ if (tp->end >= 0) { ++ print_path = tp; ++ break; ++ } ++ } ++ if (print_path) ++ print_path->dump_lock = 1; ++ ++ __ipipe_global_path_unlock(flags); ++ ++ if (!print_path) ++ return NULL; ++ ++ /* back- and post-tracing length, post-trace length was frozen ++ in __ipipe_trace, back-trace may have to be reduced due to ++ buffer overrun */ ++ print_pre_trace = back_trace-1; /* substract freeze point */ ++ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - ++ print_path->end - 1); ++ if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) ++ print_pre_trace = IPIPE_TRACE_POINTS - 2 - ++ print_post_trace; ++ ++ seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" ++ "------------------------------------------------------" ++ "------\n", ++ UTS_RELEASE, IPIPE_ARCH_STRING); ++ seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", ++ cpu, print_path->point[print_path->begin].timestamp, ++ print_pre_trace+1, print_post_trace); ++ __ipipe_print_headline(m); ++ } ++ ++ /* check if we are inside the trace range */ ++ if (n >= print_pre_trace + 1 + print_post_trace) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin- ++ print_pre_trace+n)]; ++} ++ ++static struct seq_operations __ipipe_frozen_ptrace_ops = { ++ .start = __ipipe_frozen_prtrace_start, ++ .next = __ipipe_prtrace_next, ++ .stop = __ipipe_prtrace_stop, ++ .show = __ipipe_prtrace_show ++}; ++ ++static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &__ipipe_frozen_ptrace_ops); ++} ++ ++static ssize_t ++__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, ++ size_t count, loff_t *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, pbuffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ ipipe_trace_frozen_reset(); ++ if (val > 0) ++ ipipe_trace_freeze(-1); ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++struct file_operations __ipipe_frozen_prtrace_fops = { ++ .open = __ipipe_frozen_prtrace_open, ++ .read = seq_read, ++ .write = __ipipe_frozen_ctrl, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static int __ipipe_rd_proc_val(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ int len; ++ ++ len = sprintf(page, "%u\n", *(int *)data); ++ len -= off; ++ if (len <= off + count) ++ *eof = 1; ++ *start = page + off; ++ if (len > count) ++ len = count; ++ if (len < 0) ++ len = 0; ++ ++ return len; ++} ++ ++static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer, ++ unsigned long count, void *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, buffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ *(int *)data = val; ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count, ++ int *eof, void *data) ++{ ++ int len; ++ ++ if (!trigger_begin) ++ return 0; ++ ++ len = sprint_symbol(page, trigger_begin); ++ page[len++] = '\n'; ++ ++ len -= off; ++ if (len <= off + count) ++ *eof = 1; ++ *start = page + off; ++ if (len > count) ++ len = count; ++ if (len < 0) ++ len = 0; ++ ++ return len; ++} ++ ++static int __ipipe_wr_trigger(struct file *file, const char __user *buffer, ++ unsigned long count, void *data) ++{ ++ char buf[KSYM_SYMBOL_LEN]; ++ unsigned long begin, end; ++ ++ if (count > sizeof(buf) - 1) ++ count = sizeof(buf) - 1; ++ if (copy_from_user(buf, buffer, count)) ++ return -EFAULT; ++ buf[count] = 0; ++ if (buf[count-1] == '\n') ++ buf[count-1] = 0; ++ ++ begin = kallsyms_lookup_name(buf); ++ if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) ++ return -ENOENT; ++ end += begin - 1; ++ ++ mutex_lock(&out_mutex); ++ /* invalidate the current range before setting a new one */ ++ trigger_end = 0; ++ wmb(); ++ ipipe_trace_frozen_reset(); ++ ++ /* set new range */ ++ trigger_begin = begin; ++ wmb(); ++ trigger_end = end; ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++static void notrace ++ipipe_trace_function(unsigned long ip, unsigned long parent_ip) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); ++} ++ ++static struct ftrace_ops ipipe_trace_ops = { ++ .func = ipipe_trace_function ++}; ++ ++static int __ipipe_wr_enable(struct file *file, const char __user *buffer, ++ unsigned long count, void *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, buffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ ++ if (ipipe_trace_enable) { ++ if (!val) ++ unregister_ftrace_function(&ipipe_trace_ops); ++ } else if (val) ++ register_ftrace_function(&ipipe_trace_ops); ++ ++ ipipe_trace_enable = val; ++ ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++ ++extern struct proc_dir_entry *ipipe_proc_root; ++ ++static struct proc_dir_entry * __init ++__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, ++ const char *name, int *value_ptr) ++{ ++ struct proc_dir_entry *entry; ++ ++ entry = create_proc_entry(name, 0644, trace_dir); ++ if (entry) { ++ entry->data = value_ptr; ++ entry->read_proc = __ipipe_rd_proc_val; ++ entry->write_proc = __ipipe_wr_proc_val; ++ } ++ return entry; ++} ++ ++void __init __ipipe_init_tracer(void) ++{ ++ struct proc_dir_entry *trace_dir; ++ struct proc_dir_entry *entry; ++ unsigned long long start, end, min = ULLONG_MAX; ++ int i; ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++ int cpu, path; ++ ++ for_each_possible_cpu(cpu) { ++ struct ipipe_trace_path *tp_buf; ++ ++ tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * ++ IPIPE_TRACE_PATHS, cpu_to_node(cpu)); ++ if (!tp_buf) { ++ printk(KERN_ERR "I-pipe: " ++ "insufficient memory for trace buffer.\n"); ++ return; ++ } ++ memset(tp_buf, 0, ++ sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); ++ for (path = 0; path < IPIPE_TRACE_PATHS; path++) { ++ tp_buf[path].begin = -1; ++ tp_buf[path].end = -1; ++ } ++ per_cpu(trace_path, cpu) = tp_buf; ++ } ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++ /* Calculate minimum overhead of __ipipe_trace() */ ++ local_irq_disable_hw(); ++ for (i = 0; i < 100; i++) { ++ ipipe_read_tsc(start); ++ __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, 0); ++ ipipe_read_tsc(end); ++ ++ end -= start; ++ if (end < min) ++ min = end; ++ } ++ local_irq_enable_hw(); ++ trace_overhead = ipipe_tsc2ns(min); ++ ++#ifdef CONFIG_IPIPE_TRACE_ENABLE ++ ipipe_trace_enable = 1; ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++ register_ftrace_function(&ipipe_trace_ops); ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++#endif /* CONFIG_IPIPE_TRACE_ENABLE */ ++ ++ trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root); ++ ++ entry = create_proc_entry("max", 0644, trace_dir); ++ if (entry) ++ entry->proc_fops = &__ipipe_max_prtrace_fops; ++ ++ entry = create_proc_entry("frozen", 0644, trace_dir); ++ if (entry) ++ entry->proc_fops = &__ipipe_frozen_prtrace_fops; ++ ++ entry = create_proc_entry("trigger", 0644, trace_dir); ++ if (entry) { ++ entry->read_proc = __ipipe_rd_trigger; ++ entry->write_proc = __ipipe_wr_trigger; ++ } ++ ++ __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", ++ &pre_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", ++ &post_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", ++ &back_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "verbose", ++ &verbose_trace); ++ entry = __ipipe_create_trace_proc_val(trace_dir, "enable", ++ &ipipe_trace_enable); ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++ if (entry) ++ entry->write_proc = __ipipe_wr_enable; ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++} +diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c +index e570d19..7cebb6f 100644 +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #include "internals.h" + +@@ -459,7 +460,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) + irqreturn_t action_ret; + + spin_lock(&desc->lock); ++#ifndef CONFIG_IPIPE + mask_ack_irq(desc, irq); ++#endif + + if (unlikely(desc->status & IRQ_INPROGRESS)) + goto out_unlock; +@@ -539,8 +542,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) + + spin_lock(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; ++#ifdef CONFIG_IPIPE ++ desc->chip->unmask(irq); ++out: ++#else + out: + desc->chip->eoi(irq); ++#endif + + spin_unlock(&desc->lock); + } +@@ -582,8 +590,10 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) + kstat_incr_irqs_this_cpu(irq, desc); + + /* Start handling the irq */ ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif + + /* Mark the IRQ currently in progress.*/ + desc->status |= IRQ_INPROGRESS; +@@ -637,8 +647,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) + + kstat_incr_irqs_this_cpu(irq, desc); + ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif /* CONFIG_IPIPE */ + + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) +@@ -648,6 +660,134 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) + desc->chip->eoi(irq); + } + ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc) ++{ ++ mask_ack_irq(desc, irq); ++} ++ ++void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc) ++{ ++ if (desc->chip->unmask) ++ desc->chip->unmask(irq); ++} ++ ++void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc) ++{ ++ desc->chip->eoi(irq); ++} ++ ++void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc) ++{ ++ /* ++ * Non-requestable IRQs should not be masked in EOI handler. ++ */ ++ if (!(desc->status & IRQ_NOREQUEST)) ++ desc->chip->unmask(irq); ++} ++ ++void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc) ++{ ++ desc->chip->ack(irq); ++} ++ ++void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc) ++{ ++ if (desc->chip->ack) ++ desc->chip->ack(irq); ++} ++ ++void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc) ++{ ++ if (desc->chip->eoi) ++ desc->chip->eoi(irq); ++} ++ ++void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc) ++{ ++ static int done; ++ ++ handle_bad_irq(irq, desc); ++ ++ if (!done) { ++ printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n", ++ __FUNCTION__, irq); ++ done = 1; ++ } ++} ++ ++void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) ++{ ++ if (unlikely(handle == NULL)) { ++ desc->ipipe_ack = &__ipipe_ack_bad_irq; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ } else { ++ if (is_chained) { ++ desc->ipipe_ack = handle; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ handle = __ipipe_noack_irq; ++ } else if (handle == &handle_simple_irq) { ++ desc->ipipe_ack = &__ipipe_ack_simple_irq; ++ desc->ipipe_end = &__ipipe_end_simple_irq; ++ } else if (handle == &handle_level_irq) { ++ desc->ipipe_ack = &__ipipe_ack_level_irq; ++ desc->ipipe_end = &__ipipe_end_level_irq; ++ } else if (handle == &handle_edge_irq) { ++ desc->ipipe_ack = &__ipipe_ack_edge_irq; ++ desc->ipipe_end = &__ipipe_end_edge_irq; ++ } else if (handle == &handle_fasteoi_irq) { ++ desc->ipipe_ack = &__ipipe_ack_fasteoi_irq; ++ desc->ipipe_end = &__ipipe_end_fasteoi_irq; ++ } else if (handle == &handle_percpu_irq) { ++ desc->ipipe_ack = &__ipipe_ack_percpu_irq; ++ desc->ipipe_end = &__ipipe_end_percpu_irq; ++ } else if (desc->chip == &no_irq_chip) { ++ desc->ipipe_ack = &__ipipe_noack_irq; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ } else { ++ desc->ipipe_ack = &__ipipe_ack_bad_irq; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ } ++ } ++ ++ /* Suppress intermediate trampoline routine. */ ++ ipipe_root_domain->irqs[desc->irq].acknowledge = desc->ipipe_ack; ++ ++ return handle; ++} ++ ++#else /* !CONFIG_IPIPE */ ++ ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) ++{ ++ return handle; ++} ++ ++#endif /* !CONFIG_IPIPE */ ++ + void + __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name) +@@ -679,6 +819,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + chip_bus_lock(irq, desc); + spin_lock_irqsave(&desc->lock, flags); + ++ handle = __fixup_irq_handler(desc, handle, is_chained); ++ + /* Uninstall? */ + if (handle == handle_bad_irq) { + if (desc->chip != &no_irq_chip) +diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c +index 17c71bb..406f375 100644 +--- a/kernel/irq/handle.c ++++ b/kernel/irq/handle.c +@@ -462,8 +462,10 @@ unsigned int __do_IRQ(unsigned int irq) + /* + * No locking required for CPU-local interrupts: + */ ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif + if (likely(!(desc->status & IRQ_DISABLED))) { + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) +@@ -474,8 +476,10 @@ unsigned int __do_IRQ(unsigned int irq) + } + + spin_lock(&desc->lock); ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif + /* + * REPLAY is when Linux resends an IRQ that was dropped earlier + * WAITING is used by probe to mark irqs that are being tested +diff --git a/kernel/lockdep.c b/kernel/lockdep.c +index 9af5672..fa84d6d 100644 +--- a/kernel/lockdep.c ++++ b/kernel/lockdep.c +@@ -2318,7 +2318,7 @@ void trace_hardirqs_on_caller(unsigned long ip) + /* we'll do an OFF -> ON transition: */ + curr->hardirqs_enabled = 1; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; +@@ -2361,7 +2361,7 @@ void trace_hardirqs_off_caller(unsigned long ip) + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + + if (curr->hardirqs_enabled) { +@@ -2393,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip) + if (unlikely(!debug_locks)) + return; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + + if (curr->softirqs_enabled) { +@@ -2427,7 +2427,7 @@ void trace_softirqs_off(unsigned long ip) + if (unlikely(!debug_locks)) + return; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + + if (curr->softirqs_enabled) { +diff --git a/kernel/panic.c b/kernel/panic.c +index 96b45d0..63f5b9e 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + int panic_on_oops; + static unsigned long tainted_mask; +@@ -304,6 +305,8 @@ void oops_enter(void) + { + tracing_off(); + /* can't trust the integrity of the kernel anymore: */ ++ ipipe_trace_panic_freeze(); ++ ipipe_disable_context_check(ipipe_processor_id()); + debug_locks_off(); + do_oops_enter_exit(); + } +diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c +index 04a9e90..49bc6cd 100644 +--- a/kernel/power/hibernate.c ++++ b/kernel/power/hibernate.c +@@ -238,6 +238,7 @@ static int create_image(int platform_mode) + goto Enable_cpus; + + local_irq_disable(); ++ local_irq_disable_hw_cond(); + + error = sysdev_suspend(PMSG_FREEZE); + if (error) { +@@ -267,6 +268,7 @@ static int create_image(int platform_mode) + */ + + Enable_irqs: ++ local_irq_enable_hw_cond(); + local_irq_enable(); + + Enable_cpus: +@@ -359,6 +361,7 @@ static int resume_target_kernel(bool platform_mode) + goto Enable_cpus; + + local_irq_disable(); ++ local_irq_disable_hw_cond(); + + error = sysdev_suspend(PMSG_QUIESCE); + if (error) +@@ -390,6 +393,7 @@ static int resume_target_kernel(bool platform_mode) + sysdev_resume(); + + Enable_irqs: ++ local_irq_enable_hw_cond(); + local_irq_enable(); + + Enable_cpus: +@@ -471,6 +475,7 @@ int hibernation_platform_enter(void) + goto Platform_finish; + + local_irq_disable(); ++ local_irq_disable_hw_cond(); + sysdev_suspend(PMSG_HIBERNATE); + hibernation_ops->enter(); + /* We should never get here */ +diff --git a/kernel/printk.c b/kernel/printk.c +index f38b07f..f3f0057 100644 +--- a/kernel/printk.c ++++ b/kernel/printk.c +@@ -564,6 +564,41 @@ static int have_callable_console(void) + return 0; + } + ++#ifdef CONFIG_IPIPE ++ ++static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED; ++ ++static int __ipipe_printk_fill; ++ ++static char __ipipe_printk_buf[__LOG_BUF_LEN]; ++ ++void __ipipe_flush_printk (unsigned virq, void *cookie) ++{ ++ char *p = __ipipe_printk_buf; ++ int len, lmax, out = 0; ++ unsigned long flags; ++ ++ goto start; ++ ++ do { ++ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++ start: ++ lmax = __ipipe_printk_fill; ++ while (out < lmax) { ++ len = strlen(p) + 1; ++ printk("%s",p); ++ p += len; ++ out += len; ++ } ++ spin_lock_irqsave(&__ipipe_printk_lock, flags); ++ } ++ while (__ipipe_printk_fill != lmax); ++ ++ __ipipe_printk_fill = 0; ++ ++ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++} ++ + /** + * printk - print a kernel message + * @fmt: format string +@@ -588,6 +623,65 @@ static int have_callable_console(void) + + asmlinkage int printk(const char *fmt, ...) + { ++ int r, fbytes, oldcount; ++ unsigned long flags; ++ int sprintk = 1; ++ int cs = -1; ++ va_list args; ++ ++ va_start(args, fmt); ++ ++ local_irq_save_hw(flags); ++ ++ if (test_bit(IPIPE_SPRINTK_FLAG, &__ipipe_current_domain->flags) || ++ oops_in_progress) ++ cs = ipipe_disable_context_check(ipipe_processor_id()); ++ else if (__ipipe_current_domain == ipipe_root_domain) { ++ struct ipipe_domain *dom; ++ ++ list_for_each_entry(dom, &__ipipe_pipeline, p_link) { ++ if (dom == ipipe_root_domain) ++ break; ++ if (test_bit(IPIPE_STALL_FLAG, ++ &ipipe_cpudom_var(dom, status))) ++ sprintk = 0; ++ } ++ } else ++ sprintk = 0; ++ ++ local_irq_restore_hw(flags); ++ ++ if (sprintk) { ++ r = vprintk(fmt, args); ++ if (cs != -1) ++ ipipe_restore_context_check(ipipe_processor_id(), cs); ++ goto out; ++ } ++ ++ spin_lock_irqsave(&__ipipe_printk_lock, flags); ++ ++ oldcount = __ipipe_printk_fill; ++ fbytes = __LOG_BUF_LEN - oldcount; ++ ++ if (fbytes > 1) { ++ r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, ++ fbytes, fmt, args) + 1; /* account for the null byte */ ++ __ipipe_printk_fill += r; ++ } else ++ r = 0; ++ ++ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++ ++ if (oldcount == 0) ++ ipipe_trigger_irq(__ipipe_printk_virq); ++out: ++ va_end(args); ++ ++ return r; ++} ++#else /* !CONFIG_IPIPE */ ++asmlinkage int printk(const char *fmt, ...) ++{ + va_list args; + int r; + +@@ -597,6 +691,7 @@ asmlinkage int printk(const char *fmt, ...) + + return r; + } ++#endif /* CONFIG_IPIPE */ + + /* cpu currently holding logbuf_lock */ + static volatile unsigned int printk_cpu = UINT_MAX; +diff --git a/kernel/sched.c b/kernel/sched.c +index 34d924e..ca4b359 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2342,6 +2342,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) + { + int cpu, orig_cpu, this_cpu, success = 0; ++ unsigned int old_state; + unsigned long flags; + struct rq *rq, *orig_rq; + +@@ -2353,7 +2354,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + smp_wmb(); + rq = orig_rq = task_rq_lock(p, &flags); + update_rq_clock(rq); +- if (!(p->state & state)) ++ old_state = p->state; ++ if (!(old_state & state) || ++ (old_state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH))) + goto out; + + if (p->se.on_rq) +@@ -2838,13 +2841,15 @@ asmlinkage void schedule_tail(struct task_struct *prev) + #endif + if (current->set_child_tid) + put_user(task_pid_vnr(current), current->set_child_tid); ++ ++ ipipe_init_notify(current); + } + + /* + * context_switch - switch to the new MM and the new + * thread's register state. + */ +-static inline void ++static inline int + context_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) + { +@@ -2886,12 +2891,23 @@ context_switch(struct rq *rq, struct task_struct *prev, + switch_to(prev, next, prev); + + barrier(); ++ ++#ifdef CONFIG_IPIPE_DELAYED_ATOMICSW ++ current->state &= ~TASK_ATOMICSWITCH; ++#else ++ prev->state &= ~TASK_ATOMICSWITCH; ++#endif ++ if (task_hijacked(prev)) ++ return 1; ++ + /* + * this_rq must be evaluated again because prev may have moved + * CPUs since it called schedule(), thus the 'rq' on its stack + * frame will be invalid. + */ + finish_task_switch(this_rq(), prev); ++ ++ return 0; + } + + /* +@@ -5298,6 +5314,7 @@ notrace unsigned long get_parent_ip(unsigned long addr) + + void __kprobes add_preempt_count(int val) + { ++ ipipe_check_context(ipipe_root_domain); + #ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? +@@ -5320,6 +5337,7 @@ EXPORT_SYMBOL(add_preempt_count); + + void __kprobes sub_preempt_count(int val) + { ++ ipipe_check_context(ipipe_root_domain); + #ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? +@@ -5368,6 +5386,7 @@ static noinline void __schedule_bug(struct task_struct *prev) + */ + static inline void schedule_debug(struct task_struct *prev) + { ++ ipipe_check_context(ipipe_root_domain); + /* + * Test if we are atomic. Since do_exit() needs to call into + * schedule() atomically, we ignore that path for now. +@@ -5446,7 +5465,7 @@ pick_next_task(struct rq *rq) + /* + * schedule() is the main scheduler function. + */ +-asmlinkage void __sched schedule(void) ++asmlinkage int __sched schedule(void) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -5460,6 +5479,9 @@ need_resched: + rcu_sched_qs(cpu); + prev = rq->curr; + switch_count = &prev->nivcsw; ++ if (unlikely(prev->state & TASK_ATOMICSWITCH)) ++ /* Pop one disable level -- one still remains. */ ++ preempt_enable(); + + release_kernel_lock(prev); + need_resched_nonpreemptible: +@@ -5497,15 +5519,18 @@ need_resched_nonpreemptible: + rq->curr = next; + ++*switch_count; + +- context_switch(rq, prev, next); /* unlocks the rq */ ++ if (context_switch(rq, prev, next)) /* unlocks the rq */ ++ return 1; /* task hijacked by higher domain */ + /* + * the context switch might have flipped the stack from under + * us, hence refresh the local variables. + */ + cpu = smp_processor_id(); + rq = cpu_rq(cpu); +- } else ++ } else { ++ prev->state &= ~TASK_ATOMICSWITCH; + spin_unlock_irq(&rq->lock); ++ } + + post_schedule(rq); + +@@ -5515,6 +5540,8 @@ need_resched_nonpreemptible: + preempt_enable_no_resched(); + if (need_resched()) + goto need_resched; ++ ++ return 0; + } + EXPORT_SYMBOL(schedule); + +@@ -5598,7 +5625,8 @@ asmlinkage void __sched preempt_schedule(void) + + do { + add_preempt_count(PREEMPT_ACTIVE); +- schedule(); ++ if (schedule()) ++ return; + sub_preempt_count(PREEMPT_ACTIVE); + + /* +@@ -6369,6 +6397,7 @@ recheck: + oldprio = p->prio; + prev_class = p->sched_class; + __setscheduler(rq, p, policy, param->sched_priority); ++ ipipe_setsched_notify(p); + + if (running) + p->sched_class->set_curr_task(rq); +@@ -7020,6 +7049,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) + #else + task_thread_info(idle)->preempt_count = 0; + #endif ++ ipipe_check_context(ipipe_root_domain); + /* + * The idle tasks have their own, simple scheduling class: + */ +@@ -10963,3 +10993,64 @@ void synchronize_sched_expedited(void) + EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + + #endif /* #else #ifndef CONFIG_SMP */ ++ ++#ifdef CONFIG_IPIPE ++ ++int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio) ++{ ++ const struct sched_class *prev_class = p->sched_class; ++ int oldprio, on_rq, running; ++ unsigned long flags; ++ struct rq *rq; ++ ++ spin_lock_irqsave(&p->pi_lock, flags); ++ rq = __task_rq_lock(p); ++ update_rq_clock(rq); ++ on_rq = p->se.on_rq; ++ running = task_current(rq, p); ++ if (on_rq) ++ deactivate_task(rq, p, 0); ++ if (running) ++ p->sched_class->put_prev_task(rq, p); ++ ++ p->sched_reset_on_fork = 0; ++ ++ oldprio = p->prio; ++ __setscheduler(rq, p, policy, prio); ++ ipipe_setsched_notify(p); ++ ++ if (running) ++ p->sched_class->set_curr_task(rq); ++ if (on_rq) { ++ activate_task(rq, p, 0); ++ ++ check_class_changed(rq, p, prev_class, oldprio, running); ++ } ++ __task_rq_unlock(rq); ++ spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ rt_mutex_adjust_pi(p); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_setscheduler_root); ++ ++int ipipe_reenter_root(struct task_struct *prev, int policy, int prio) ++{ ++ struct rq *rq = this_rq(); ++ ++ finish_task_switch(rq, prev); ++ ++ post_schedule(rq); ++ ++ (void)reacquire_kernel_lock(current); ++ preempt_enable_no_resched(); ++ ++ if (current->policy != policy || current->rt_priority != prio) ++ return ipipe_setscheduler_root(current, policy, prio); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_reenter_root); ++ ++#endif /* CONFIG_IPIPE */ +diff --git a/kernel/signal.c b/kernel/signal.c +index 4d0658d..a7eac5f 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -518,6 +518,7 @@ void signal_wake_up(struct task_struct *t, int resume) + unsigned int mask; + + set_tsk_thread_flag(t, TIF_SIGPENDING); ++ ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ + + /* + * For SIGKILL, we want to wake it up in the stopped/traced/killable +diff --git a/kernel/spinlock.c b/kernel/spinlock.c +index 5ddab73..97cf064 100644 +--- a/kernel/spinlock.c ++++ b/kernel/spinlock.c +@@ -50,7 +50,9 @@ EXPORT_SYMBOL(_write_trylock); + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + + #ifndef _read_lock + void __lockfunc _read_lock(rwlock_t *lock) +diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c +index 83c4417..782a209 100644 +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -69,7 +69,7 @@ static void tick_periodic(int cpu) + write_sequnlock(&xtime_lock); + } + +- update_process_times(user_mode(get_irq_regs())); ++ update_root_process_times(get_irq_regs()); + profile_tick(CPU_PROFILING); + } + +@@ -177,6 +177,10 @@ static void tick_setup_device(struct tick_device *td, + + td->evtdev = newdev; + ++ /* I-pipe: derive global tick IRQ from CPU 0 */ ++ if (cpu == 0) ++ ipipe_update_tick_evtdev(newdev); ++ + /* + * When the device is not per cpu, pin the interrupt to the + * current cpu: +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index 44320b1..45ec05a 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -549,7 +549,7 @@ static void tick_nohz_handler(struct clock_event_device *dev) + ts->idle_jiffies++; + } + +- update_process_times(user_mode(regs)); ++ update_root_process_times(regs); + profile_tick(CPU_PROFILING); + + while (tick_nohz_reprogram(ts, now)) { +@@ -700,7 +700,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) + touch_softlockup_watchdog(); + ts->idle_jiffies++; + } +- update_process_times(user_mode(regs)); ++ update_root_process_times(regs); + profile_tick(CPU_PROFILING); + } + +diff --git a/kernel/timer.c b/kernel/timer.c +index 5db5a8d..1b45eb9 100644 +--- a/kernel/timer.c ++++ b/kernel/timer.c +@@ -1204,6 +1204,25 @@ void update_process_times(int user_tick) + run_posix_cpu_timers(p); + } + ++#ifdef CONFIG_IPIPE ++ ++void update_root_process_times(struct pt_regs *regs) ++{ ++ int cpu, user_tick = user_mode(regs); ++ ++ if (__ipipe_root_tick_p(regs)) { ++ update_process_times(user_tick); ++ return; ++ } ++ ++ run_local_timers(); ++ cpu = smp_processor_id(); ++ rcu_check_callbacks(cpu, user_tick); ++ run_posix_cpu_timers(current); ++} ++ ++#endif ++ + /* + * This function runs timers and the timer-tq in bottom half context. + */ +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 0cccb6c..eaba13e 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + +@@ -1142,6 +1143,9 @@ static int __ftrace_modify_code(void *data) + + static void ftrace_run_update_code(int command) + { ++#ifdef CONFIG_IPIPE ++ unsigned long flags; ++#endif /* CONFIG_IPIPE */ + int ret; + + ret = ftrace_arch_code_modify_prepare(); +@@ -1149,7 +1153,13 @@ static void ftrace_run_update_code(int command) + if (ret) + return; + ++#ifdef CONFIG_IPIPE ++ flags = ipipe_critical_enter(NULL); ++ __ftrace_modify_code(&command); ++ ipipe_critical_exit(flags); ++#else /* !CONFIG_IPIPE */ + stop_machine(__ftrace_modify_code, &command, NULL); ++#endif /* !CONFIG_IPIPE */ + + ret = ftrace_arch_code_modify_post_process(); + FTRACE_WARN_ON(ret); +@@ -2648,9 +2658,9 @@ static int ftrace_convert_nops(struct module *mod, + } + + /* disable interrupts to prevent kstop machine */ +- local_irq_save(flags); ++ local_irq_save_hw_notrace(flags); + ftrace_update_code(mod); +- local_irq_restore(flags); ++ local_irq_restore_hw_notrace(flags); + mutex_unlock(&ftrace_lock); + + return 0; +@@ -2729,9 +2739,9 @@ void __init ftrace_init(void) + /* Keep the ftrace pointer to the stub */ + addr = (unsigned long)ftrace_stub; + +- local_irq_save(flags); ++ local_irq_save_hw_notrace(flags); + ftrace_dyn_arch_init(&addr); +- local_irq_restore(flags); ++ local_irq_restore_hw_notrace(flags); + + /* ftrace_dyn_arch_init places the return code in addr */ + if (addr) +diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug +index 234ceb1..faffad9 100644 +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -136,6 +136,8 @@ config DEBUG_SECTION_MISMATCH + - Enable verbose reporting from modpost to help solving + the section mismatches reported. + ++source "kernel/ipipe/Kconfig.debug" ++ + config DEBUG_KERNEL + bool "Kernel debugging" + help +diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c +index 9681d54..2dba50c 100644 +--- a/lib/bust_spinlocks.c ++++ b/lib/bust_spinlocks.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + + void __attribute__((weak)) bust_spinlocks(int yes) +@@ -24,6 +25,7 @@ void __attribute__((weak)) bust_spinlocks(int yes) + unblank_screen(); + #endif + console_unblank(); ++ ipipe_trace_panic_dump(); + if (--oops_in_progress == 0) + wake_up_klogd(); + } +diff --git a/lib/ioremap.c b/lib/ioremap.c +index 14c6078..a275469 100644 +--- a/lib/ioremap.c ++++ b/lib/ioremap.c +@@ -85,8 +85,8 @@ int ioremap_page_range(unsigned long addr, + if (err) + break; + } while (pgd++, addr = next, addr != end); +- +- flush_cache_vmap(start, end); ++ __ipipe_pin_range_globally(start, end); ++ flush_cache_vmap(start, end); + + return err; + } +diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c +index 4689cb0..3d12764 100644 +--- a/lib/smp_processor_id.c ++++ b/lib/smp_processor_id.c +@@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void) + unsigned long preempt_count = preempt_count(); + int this_cpu = raw_smp_processor_id(); + ++ if (!ipipe_root_domain_p) ++ goto out; ++ + if (likely(preempt_count)) + goto out; + +- if (irqs_disabled()) ++ if (irqs_disabled() || irqs_disabled_hw()) + goto out; + + /* +diff --git a/mm/memory.c b/mm/memory.c +index 4e59455..b8d365d 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -566,6 +567,32 @@ out: + return pfn_to_page(pfn); + } + ++static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) ++{ ++ /* ++ * If the source page was a PFN mapping, we don't have ++ * a "struct page" for it. We do a best-effort copy by ++ * just copying from the original user address. If that ++ * fails, we just zero-fill it. Live with it. ++ */ ++ if (unlikely(!src)) { ++ void *kaddr = kmap_atomic(dst, KM_USER0); ++ void __user *uaddr = (void __user *)(va & PAGE_MASK); ++ ++ /* ++ * This really shouldn't fail, because the page is there ++ * in the page tables. But it might just be unreadable, ++ * in which case we just give up and fill the result with ++ * zeroes. ++ */ ++ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) ++ memset(kaddr, 0, PAGE_SIZE); ++ kunmap_atomic(kaddr, KM_USER0); ++ flush_dcache_page(dst); ++ } else ++ copy_user_highpage(dst, src, va, vma); ++} ++ + /* + * copy one vm_area from one task to the other. Assumes the page tables + * already present in the new task to be cleared in the whole range +@@ -574,8 +601,8 @@ out: + + static inline void + copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, +- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, +- unsigned long addr, int *rss) ++ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, ++ unsigned long addr, int *rss, struct page *uncow_page) + { + unsigned long vm_flags = vma->vm_flags; + pte_t pte = *src_pte; +@@ -614,6 +641,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, + * in the parent and the child + */ + if (is_cow_mapping(vm_flags)) { ++#ifdef CONFIG_IPIPE ++ if (uncow_page) { ++ struct page *old_page = vm_normal_page(vma, addr, pte); ++ cow_user_page(uncow_page, old_page, addr, vma); ++ pte = mk_pte(uncow_page, vma->vm_page_prot); ++ ++ if (vm_flags & VM_SHARED) ++ pte = pte_mkclean(pte); ++ pte = pte_mkold(pte); ++ ++ page_add_new_anon_rmap(uncow_page, vma, addr); ++ rss[!!PageAnon(uncow_page)]++; ++ goto out_set_pte; ++ } ++#endif /* CONFIG_IPIPE */ + ptep_set_wrprotect(src_mm, addr, src_pte); + pte = pte_wrprotect(pte); + } +@@ -645,13 +687,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *src_pte, *dst_pte; + spinlock_t *src_ptl, *dst_ptl; + int progress = 0; ++ struct page *uncow_page = NULL; + int rss[2]; +- ++#ifdef CONFIG_IPIPE ++ int do_cow_break = 0; ++again: ++ if (do_cow_break) { ++ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); ++ if (!uncow_page) ++ return -ENOMEM; ++ do_cow_break = 0; ++ } ++#else + again: ++#endif + rss[1] = rss[0] = 0; + dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); +- if (!dst_pte) ++ if (!dst_pte) { ++ if (uncow_page) ++ page_cache_release(uncow_page); + return -ENOMEM; ++ } + src_pte = pte_offset_map_nested(src_pmd, addr); + src_ptl = pte_lockptr(src_mm, src_pmd); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); +@@ -674,7 +730,25 @@ again: + progress++; + continue; + } +- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); ++#ifdef CONFIG_IPIPE ++ if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { ++ if (is_cow_mapping(vma->vm_flags) && ++ test_bit(MMF_VM_PINNED, &src_mm->flags) && ++ ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { ++ arch_leave_lazy_mmu_mode(); ++ spin_unlock(src_ptl); ++ pte_unmap_nested(src_pte); ++ add_mm_rss(dst_mm, rss[0], rss[1]); ++ pte_unmap_unlock(dst_pte, dst_ptl); ++ cond_resched(); ++ do_cow_break = 1; ++ goto again; ++ } ++ } ++#endif ++ copy_one_pte(dst_mm, src_mm, dst_pte, ++ src_pte, vma, addr, rss, uncow_page); ++ uncow_page = NULL; + progress += 8; + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + +@@ -1941,32 +2015,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) + return pte; + } + +-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) +-{ +- /* +- * If the source page was a PFN mapping, we don't have +- * a "struct page" for it. We do a best-effort copy by +- * just copying from the original user address. If that +- * fails, we just zero-fill it. Live with it. +- */ +- if (unlikely(!src)) { +- void *kaddr = kmap_atomic(dst, KM_USER0); +- void __user *uaddr = (void __user *)(va & PAGE_MASK); +- +- /* +- * This really shouldn't fail, because the page is there +- * in the page tables. But it might just be unreadable, +- * in which case we just give up and fill the result with +- * zeroes. +- */ +- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) +- memset(kaddr, 0, PAGE_SIZE); +- kunmap_atomic(kaddr, KM_USER0); +- flush_dcache_page(dst); +- } else +- copy_user_highpage(dst, src, va, vma); +-} +- + /* + * This routine handles present pages, when users try to write + * to a shared page. It is done by copying the page to a new address +@@ -3377,3 +3425,111 @@ void might_fault(void) + } + EXPORT_SYMBOL(might_fault); + #endif ++ ++#ifdef CONFIG_IPIPE ++ ++static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ spinlock_t *ptl; ++ pte_t *pte; ++ ++ do { ++ pte = pte_offset_map_lock(mm, pmd, addr, &ptl); ++ if (!pte) ++ continue; ++ ++ if (!pte_present(*pte) || pte_write(*pte)) { ++ pte_unmap_unlock(pte, ptl); ++ continue; ++ } ++ ++ if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM) ++ return -ENOMEM; ++ } while (addr += PAGE_SIZE, addr != end); ++ return 0; ++} ++ ++static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ pmd_t *pmd; ++ ++ pmd = pmd_offset(pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (pmd_none_or_clear_bad(pmd)) ++ continue; ++ if (ipipe_pin_pte_range(mm, pmd, vma, addr, next)) ++ return -ENOMEM; ++ } while (pmd++, addr = next, addr != end); ++ return 0; ++} ++ ++static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ pud_t *pud; ++ ++ pud = pud_offset(pgd, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ if (ipipe_pin_pmd_range(mm, pud, vma, addr, next)) ++ return -ENOMEM; ++ } while (pud++, addr = next, addr != end); ++ return 0; ++} ++ ++int ipipe_disable_ondemand_mappings(struct task_struct *tsk) ++{ ++ unsigned long addr, next, end; ++ struct vm_area_struct *vma; ++ struct mm_struct *mm; ++ int result = 0; ++ pgd_t *pgd; ++ ++ mm = get_task_mm(tsk); ++ if (!mm) ++ return -EPERM; ++ ++ down_write(&mm->mmap_sem); ++ if (test_bit(MMF_VM_PINNED, &mm->flags)) ++ goto done_mm; ++ ++ for (vma = mm->mmap; vma; vma = vma->vm_next) { ++ if (!is_cow_mapping(vma->vm_flags) ++ || !(vma->vm_flags & VM_WRITE)) ++ continue; ++ ++ addr = vma->vm_start; ++ end = vma->vm_end; ++ ++ pgd = pgd_offset(mm, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) { ++ result = -ENOMEM; ++ goto done_mm; ++ } ++ } while (pgd++, addr = next, addr != end); ++ } ++ set_bit(MMF_VM_PINNED, &mm->flags); ++ ++ done_mm: ++ up_write(&mm->mmap_sem); ++ mmput(mm); ++ return result; ++} ++ ++EXPORT_SYMBOL(ipipe_disable_ondemand_mappings); ++ ++#endif +diff --git a/mm/mmu_context.c b/mm/mmu_context.c +index ded9081..cb2ac0e 100644 +--- a/mm/mmu_context.c ++++ b/mm/mmu_context.c +@@ -23,15 +23,18 @@ void use_mm(struct mm_struct *mm) + { + struct mm_struct *active_mm; + struct task_struct *tsk = current; ++ unsigned long flags; + + task_lock(tsk); + active_mm = tsk->active_mm; ++ ipipe_mm_switch_protect(flags); + if (active_mm != mm) { + atomic_inc(&mm->mm_count); + tsk->active_mm = mm; + } + tsk->mm = mm; +- switch_mm(active_mm, mm, tsk); ++ __switch_mm(active_mm, mm, tsk); ++ ipipe_mm_switch_unprotect(flags); + task_unlock(tsk); + + if (active_mm != mm) +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index c228731..4da3110 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -172,6 +172,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, + return err; + } while (pgd++, addr = next, addr != end); + ++ __ipipe_pin_range_globally(start, end); ++ + return nr; + } + --- /dev/null +++ xenomai-2.5.3/ksrc/arch/powerpc/patches/adeos-ipipe-2.6.32-powerpc-DENX-2.8-00.patch @@ -0,0 +1,11488 @@ +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index dd76b63..c95407d 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -125,6 +125,7 @@ config PPC + select HAVE_LMB + select HAVE_DMA_ATTRS + select HAVE_DMA_API_DEBUG ++ select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select USE_GENERIC_SMP_HELPERS if SMP + select HAVE_OPROFILE + select HAVE_SYSCALL_WRAPPERS if PPC64 +@@ -145,6 +146,10 @@ config SYSVIPC_COMPAT + depends on COMPAT && SYSVIPC + default y + ++config SOFTDISABLE ++ bool ++ default (PPC64 && !IPIPE) ++ + # All PPC32s use generic nvram driver through ppc_md + config GENERIC_NVRAM + bool +@@ -249,6 +254,29 @@ source "arch/powerpc/platforms/Kconfig" + + menu "Kernel options" + ++source "kernel/ipipe/Kconfig" ++ ++config IPIPE_HAVE_PREEMPTIBLE_SWITCH ++ bool ++ depends on IPIPE ++ default y ++ ++if IPIPE ++config RUNLATCH ++ bool "Enable RUNLATCH support" ++ depends on PPC64 ++ default n if IPIPE ++ ---help--- ++ This option is costly latency-wise, so default is to keep ++ it off when the interrupt pipeline is enabled. ++endif ++if !IPIPE ++config RUNLATCH ++ bool ++ depends on PPC64 ++ default y ++endif ++ + config HIGHMEM + bool "High memory support" + depends on PPC32 +diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile +index 7bfc8ad..334b1f3 100644 +--- a/arch/powerpc/boot/Makefile ++++ b/arch/powerpc/boot/Makefile +@@ -29,6 +29,14 @@ ifdef CONFIG_DEBUG_INFO + BOOTCFLAGS += -g + endif + ++ifdef CONFIG_IPIPE_TRACE ++# do not trace the boot loader ++nullstring := ++space := $(nullstring) # end of the line ++pg_flag = $(nullstring) -pg # end of the line ++BOOTCFLAGS := $(subst ${pg_flag},${space},${BOOTCFLAGS}) ++endif ++ + ifeq ($(call cc-option-yn, -fstack-protector),y) + BOOTCFLAGS += -fno-stack-protector + endif +diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h +index a98653b..ff4d4b9 100644 +--- a/arch/powerpc/include/asm/exception-64s.h ++++ b/arch/powerpc/include/asm/exception-64s.h +@@ -47,6 +47,30 @@ + #define EX_R3 64 + #define EX_LR 72 + ++#ifdef CONFIG_SOFTDISABLE ++#define COPY_SOFTISTATE(mreg) \ ++ lbz mreg,PACASOFTIRQEN(r13); \ ++ std mreg,SOFTE(r1); ++#define TEST_SOFTISTATE(mreg, dlabel) \ ++ lbz mreg,PACASOFTIRQEN(r13); \ ++ cmpwi mreg,0; \ ++ beq- dlabel; ++#else ++#ifdef CONFIG_IPIPE ++/* Do NOT alter Rc(eq) in this code; our caller uses it. */ ++#define COPY_SOFTISTATE(mreg) \ ++ ld mreg,PACAROOTPCPU(r13); \ ++ ld mreg,0(mreg); \ ++ nor mreg,mreg,mreg; \ ++ clrldi mreg,mreg,63; \ ++ std mreg,SOFTE(r1); ++#define TEST_SOFTISTATE(mreg, dlabel) ++#else ++#define COPY_SOFTISTATE(mreg) ++#define TEST_SOFTISTATE(mreg, dlabel) ++#endif ++#endif ++ + /* + * We're short on space and time in the exception prolog, so we can't + * use the normal SET_REG_IMMEDIATE macro. Normally we just need the +@@ -128,9 +152,8 @@ + std r9,_LINK(r1); \ + mfctr r10; /* save CTR in stackframe */ \ + std r10,_CTR(r1); \ +- lbz r10,PACASOFTIRQEN(r13); \ ++ COPY_SOFTISTATE(r10); \ + mfspr r11,SPRN_XER; /* save XER in stackframe */ \ +- std r10,SOFTE(r1); \ + std r11,_XER(r1); \ + li r9,(n)+1; \ + std r9,_TRAP(r1); /* set trap number */ \ +@@ -174,10 +197,8 @@ label##_pSeries: \ + mfspr r13,SPRN_SPRG_PACA; /* get paca address into r13 */ \ + std r9,PACA_EXGEN+EX_R9(r13); /* save r9, r10 */ \ + std r10,PACA_EXGEN+EX_R10(r13); \ +- lbz r10,PACASOFTIRQEN(r13); \ + mfcr r9; \ +- cmpwi r10,0; \ +- beq masked_interrupt; \ ++ TEST_SOFTISTATE(r10, masked_interrupt); \ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + std r10,PACA_EXGEN+EX_R13(r13); \ + std r11,PACA_EXGEN+EX_R11(r13); \ +@@ -192,6 +213,28 @@ label##_pSeries: \ + rfid; \ + b . /* prevent speculative execution */ + ++#ifdef CONFIG_IPIPE ++/* IBM legacy I-Series are not supported. */ ++#define ENABLE_INTS \ ++ ld r12,_MSR(r1); \ ++ mfmsr r11; \ ++ rlwimi r11,r12,0,MSR_EE; \ ++ mtmsrd r11,1 ++#define DISABLE_INTS /* We lie, mostly... */ \ ++ ld r11,PACAROOTPCPU(r13); \ ++ ld r10,0(r11); \ ++ ori r10,r10,1; \ ++ std r10,0(r11); \ ++ mfmsr r10; \ ++ ori r10,r10,MSR_EE; \ ++ mtmsrd r10,1; ++#define DISABLE_INTS_REALLY \ ++ mfmsr r11; \ ++ rldicl r11,r11,48,1;/* clear MSR_EE */ \ ++ rotldi r11,r11,16; \ ++ mtmsrd r11,1; ++#else /* !CONFIG_IPIPE */ ++ + #ifdef CONFIG_PPC_ISERIES + #define DISABLE_INTS \ + li r11,0; \ +@@ -219,6 +262,8 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) + rlwimi r11,r12,0,MSR_EE; \ + mtmsrd r11,1 + ++#endif /* !CONFIG_IPIPE */ ++ + #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +@@ -226,6 +271,7 @@ label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + bl .save_nvgprs; \ ++ TRACE_DISABLE_INTS; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except +@@ -242,6 +288,7 @@ label##_common: \ + FINISH_NAP; \ + DISABLE_INTS; \ + bl .save_nvgprs; \ ++ TRACE_DISABLE_INTS; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except +@@ -256,10 +303,24 @@ label##_common: \ + BEGIN_FTR_SECTION \ + bl .ppc64_runlatch_on; \ + END_FTR_SECTION_IFSET(CPU_FTR_CTRL) \ ++ TRACE_DISABLE_INTS; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite + ++#ifdef CONFIG_IPIPE ++#define IPIPE_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ ++ .align 7; \ ++ .globl label##_common; \ ++label##_common: \ ++ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ ++ DISABLE_INTS_REALLY; \ ++ TRACE_DISABLE_INTS_REALLY; \ ++ addi r3,r1,STACK_FRAME_OVERHEAD; \ ++ bl hdlr; \ ++ b .__ipipe_ret_from_except_lite ++#endif /* CONFIG_IPIPE */ ++ + /* + * When the idle code in power4_idle puts the CPU into NAP mode, + * it has to do so in a loop, and relies on the external interrupt +diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h +index dde1296..8865751 100644 +--- a/arch/powerpc/include/asm/ftrace.h ++++ b/arch/powerpc/include/asm/ftrace.h +@@ -9,9 +9,21 @@ + + /* Based off of objdump optput from glibc */ + +-#define MCOUNT_SAVE_FRAME \ +- stwu r1,-48(r1); \ +- stw r3, 12(r1); \ ++#define MCOUNT_SAVE_FRAME \ ++ stwu r1,-48(r1); \ ++ stw r3, 12(r1); \ ++ LOAD_REG_IMMEDIATE(r3, function_trace_stop) \ ++ lwz r3, 0(r3); \ ++ cmpwi r3, 0; \ ++ lwz r3, 12(r1); \ ++ beq 1f; \ ++ mflr r0; \ ++ mtctr r0; \ ++ lwz r0, 52(r1); \ ++ mtlr r0; \ ++ addi r1, r1, 48; \ ++ bctr; \ ++1: \ + stw r4, 16(r1); \ + stw r5, 20(r1); \ + stw r6, 24(r1); \ +diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h +index abbc2aa..a56f7bd 100644 +--- a/arch/powerpc/include/asm/hw_irq.h ++++ b/arch/powerpc/include/asm/hw_irq.h +@@ -13,6 +13,10 @@ + + extern void timer_interrupt(struct pt_regs *); + ++#include ++ ++#ifndef CONFIG_IPIPE ++ + #ifdef CONFIG_PPC64 + #include + +@@ -129,6 +133,8 @@ static inline int irqs_disabled_flags(unsigned long flags) + + #endif /* CONFIG_PPC64 */ + ++#endif /* !CONFIG_IPIPE */ ++ + /* + * interrupt-retrigger: should we handle this via lost interrupts and IPIs + * or should we not care like we do now ? --BenH. +diff --git a/arch/powerpc/include/asm/ipipe.h b/arch/powerpc/include/asm/ipipe.h +new file mode 100644 +index 0000000..5cbf735 +--- /dev/null ++++ b/arch/powerpc/include/asm/ipipe.h +@@ -0,0 +1,275 @@ ++/* ++ * include/asm-powerpc/ipipe.h ++ * ++ * I-pipe 32/64bit merge - Copyright (C) 2007 Philippe Gerum. ++ * I-pipe PA6T support - Copyright (C) 2007 Philippe Gerum. ++ * I-pipe 64-bit PowerPC port - Copyright (C) 2005 Heikki Lindholm. ++ * I-pipe PowerPC support - Copyright (C) 2002-2005 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __ASM_POWERPC_IPIPE_H ++#define __ASM_POWERPC_IPIPE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_PPC64 ++#ifdef CONFIG_PPC_ISERIES ++#error "I-pipe: IBM I-series not supported, sorry" ++#endif ++#include ++#endif ++ ++#define IPIPE_ARCH_STRING "2.8-00" ++#define IPIPE_MAJOR_NUMBER 2 ++#define IPIPE_MINOR_NUMBER 8 ++#define IPIPE_PATCH_NUMBER 0 ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ ++#define prepare_arch_switch(next) \ ++ do { \ ++ local_irq_enable_hw(); \ ++ ipipe_schedule_notify(current ,next); \ ++ } while(0) ++ ++#define task_hijacked(p) \ ++ ({ \ ++ unsigned long __flags__; \ ++ int __x__; \ ++ local_irq_save_hw_smp(__flags__); \ ++ __x__ = __ipipe_root_domain_p; \ ++ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status)); \ ++ local_irq_restore_hw_smp(__flags__); \ ++ !__x__; \ ++ }) ++ ++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++#define prepare_arch_switch(next) \ ++ do { \ ++ ipipe_schedule_notify(current ,next); \ ++ local_irq_disable_hw(); \ ++ } while(0) ++ ++#define task_hijacked(p) \ ++ ({ \ ++ int __x__ = __ipipe_root_domain_p; \ ++ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status)); \ ++ if (__x__) local_irq_enable_hw(); !__x__; \ ++ }) ++ ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ ++struct ipipe_domain; ++ ++struct ipipe_sysinfo { ++ ++ int ncpus; /* Number of CPUs on board */ ++ u64 cpufreq; /* CPU frequency (in Hz) */ ++ ++ /* Arch-dependent block */ ++ ++ struct { ++ unsigned tmirq; /* Decrementer virtual IRQ */ ++ u64 tmfreq; /* Timebase frequency */ ++ } archdep; ++}; ++ ++#ifdef CONFIG_DEBUGGER ++extern cpumask_t __ipipe_dbrk_pending; ++#endif ++ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++struct mm; ++DECLARE_PER_CPU(struct mm_struct *, ipipe_active_mm); ++#define ipipe_mm_switch_protect(flags) \ ++ do { \ ++ preempt_disable(); \ ++ per_cpu(ipipe_active_mm, smp_processor_id()) = NULL; \ ++ barrier(); \ ++ (void)(flags); \ ++ } while(0) ++#define ipipe_mm_switch_unprotect(flags) \ ++ do { \ ++ preempt_enable(); \ ++ (void)(flags); \ ++ } while(0) ++#else ++#define ipipe_mm_switch_protect(flags) local_irq_save_hw_cond(flags) ++#define ipipe_mm_switch_unprotect(flags) local_irq_restore_hw_cond(flags) ++#endif ++ ++#define ipipe_cpu_freq() ppc_tb_freq ++#ifdef CONFIG_PPC64 ++#define ipipe_read_tsc(t) (t = mftb()) ++#define ipipe_tsc2ns(t) (((t) * 1000UL) / (ipipe_cpu_freq() / 1000000UL)) ++#define ipipe_tsc2us(t) ((t) / (ipipe_cpu_freq() / 1000000UL)) ++#else ++#define ipipe_read_tsc(t) \ ++ ({ \ ++ unsigned long __tbu; \ ++ __asm__ __volatile__ ("1: mftbu %0\n" \ ++ "mftb %1\n" \ ++ "mftbu %2\n" \ ++ "cmpw %2,%0\n" \ ++ "bne- 1b\n" \ ++ :"=r" (((unsigned long *)&t)[0]), \ ++ "=r" (((unsigned long *)&t)[1]), \ ++ "=r" (__tbu)); \ ++ t; \ ++ }) ++#define ipipe_tsc2ns(t) ((((unsigned long)(t)) * 1000) / (ipipe_cpu_freq() / 1000000)) ++#define ipipe_tsc2us(t) \ ++ ({ \ ++ unsigned long long delta = (t); \ ++ do_div(delta, ipipe_cpu_freq()/1000000+1); \ ++ (unsigned long)delta; \ ++ }) ++#endif ++#define __ipipe_read_timebase() \ ++ ({ \ ++ unsigned long long t; \ ++ ipipe_read_tsc(t); \ ++ t; \ ++ }) ++ ++/* Private interface -- Internal use only */ ++ ++#define __ipipe_check_platform() do { } while(0) ++#define __ipipe_enable_irq(irq) enable_irq(irq) ++#define __ipipe_disable_irq(irq) disable_irq(irq) ++#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0) ++ ++void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq); ++ ++void __ipipe_init_platform(void); ++ ++void __ipipe_enable_pipeline(void); ++ ++void __ipipe_end_irq(unsigned irq); ++ ++static inline int __ipipe_check_tickdev(const char *devname) ++{ ++ return 1; ++} ++ ++#ifdef CONFIG_SMP ++struct ipipe_ipi_struct { ++ volatile unsigned long value; ++} ____cacheline_aligned; ++ ++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); ++ ++void __ipipe_register_ipi(unsigned int irq); ++#else ++#define __ipipe_hook_critical_ipi(ipd) do { } while(0) ++#endif /* CONFIG_SMP */ ++ ++DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs); ++ ++void __ipipe_handle_irq(int irq, struct pt_regs *regs); ++ ++static inline void ipipe_handle_chained_irq(unsigned int irq) ++{ ++ struct pt_regs regs; /* dummy */ ++ ++ ipipe_trace_irq_entry(irq); ++ __ipipe_handle_irq(irq, ®s); ++ ipipe_trace_irq_exit(irq); ++} ++ ++struct irq_desc; ++void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc); ++void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc); ++void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); ++void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); ++ ++void __ipipe_serial_debug(const char *fmt, ...); ++ ++#define __ipipe_tick_irq IPIPE_TIMER_VIRQ ++ ++static inline unsigned long __ipipe_ffnz(unsigned long ul) ++{ ++#ifdef CONFIG_PPC64 ++ __asm__ __volatile__("cntlzd %0, %1":"=r"(ul):"r"(ul & (-ul))); ++ return 63 - ul; ++#else ++ __asm__ __volatile__("cntlzw %0, %1":"=r"(ul):"r"(ul & (-ul))); ++ return 31 - ul; ++#endif ++} ++ ++/* ++ * When running handlers, enable hw interrupts for all domains but the ++ * one heading the pipeline, so that IRQs can never be significantly ++ * deferred for the latter. ++ */ ++#define __ipipe_run_isr(ipd, irq) \ ++do { \ ++ if (!__ipipe_pipeline_head_p(ipd)) \ ++ local_irq_enable_hw(); \ ++ if (ipd == ipipe_root_domain) \ ++ if (likely(!ipipe_virtual_irq_p(irq))) \ ++ ipd->irqs[irq].handler(irq, NULL); \ ++ else { \ ++ irq_enter(); \ ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);\ ++ irq_exit(); \ ++ } \ ++ else { \ ++ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ ++ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ ++ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ ++ } \ ++ local_irq_disable_hw(); \ ++} while(0) ++ ++#define __ipipe_syscall_watched_p(p, sc) \ ++ (((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls) ++ ++#define __ipipe_root_tick_p(regs) ((regs)->msr & MSR_EE) ++ ++void handle_one_irq(unsigned int irq); ++ ++void check_stack_overflow(void); ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define task_hijacked(p) 0 ++ ++#define ipipe_handle_chained_irq(irq) generic_handle_irq(irq) ++ ++#define ipipe_mm_switch_protect(flags) do { (void)(flags); } while(0) ++#define ipipe_mm_switch_unprotect(flags) do { (void)(flags); } while(0) ++ ++#endif /* CONFIG_IPIPE */ ++ ++#define ipipe_update_tick_evtdev(evtdev) do { } while (0) ++ ++#endif /* !__ASM_POWERPC_IPIPE_H */ +diff --git a/arch/powerpc/include/asm/ipipe_base.h b/arch/powerpc/include/asm/ipipe_base.h +new file mode 100644 +index 0000000..8f2a661 +--- /dev/null ++++ b/arch/powerpc/include/asm/ipipe_base.h +@@ -0,0 +1,154 @@ ++/* -*- linux-c -*- ++ * include/asm-powerpc/ipipe_base.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __ASM_POWERPC_IPIPE_BASE_H ++#define __ASM_POWERPC_IPIPE_BASE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#define IPIPE_NR_XIRQS NR_IRQS ++#ifdef CONFIG_PPC64 ++#define IPIPE_IRQ_ISHIFT 6 /* 64-bit arch. */ ++#else ++#define IPIPE_IRQ_ISHIFT 5 /* 32-bit arch. */ ++#endif ++ ++/* ++ * The first virtual interrupt is reserved for the timer (see ++ * __ipipe_init_platform). ++ */ ++#define IPIPE_TIMER_VIRQ IPIPE_VIRQ_BASE ++ ++#ifdef CONFIG_SMP ++/* ++ * These are virtual IPI numbers. The OpenPIC supports only 4 IPIs and ++ * all are already used by Linux. The virtualization layer is ++ * implemented by piggybacking the debugger break IPI 0x3, ++ * which is demultiplexed in __ipipe_ipi_demux(). ++ */ ++/* these are bit numbers in practice */ ++#define IPIPE_MSG_CRITICAL_IPI 0 ++#define IPIPE_MSG_SERVICE_IPI0 (IPIPE_MSG_CRITICAL_IPI + 1) ++#define IPIPE_MSG_SERVICE_IPI1 (IPIPE_MSG_CRITICAL_IPI + 2) ++#define IPIPE_MSG_SERVICE_IPI2 (IPIPE_MSG_CRITICAL_IPI + 3) ++#define IPIPE_MSG_SERVICE_IPI3 (IPIPE_MSG_CRITICAL_IPI + 4) ++#define IPIPE_MSG_SERVICE_IPI4 (IPIPE_MSG_CRITICAL_IPI + 5) ++ ++#define IPIPE_MSG_IPI_MASK ((1UL << IPIPE_MSG_CRITICAL_IPI) | \ ++ (1UL << IPIPE_MSG_SERVICE_IPI0) | \ ++ (1UL << IPIPE_MSG_SERVICE_IPI1) | \ ++ (1UL << IPIPE_MSG_SERVICE_IPI2) | \ ++ (1UL << IPIPE_MSG_SERVICE_IPI3) | \ ++ (1UL << IPIPE_MSG_SERVICE_IPI4)) ++ ++#define IPIPE_CRITICAL_IPI (IPIPE_VIRQ_BASE + 1) ++#define IPIPE_SERVICE_IPI0 (IPIPE_CRITICAL_IPI + 1) ++#define IPIPE_SERVICE_IPI1 (IPIPE_CRITICAL_IPI + 2) ++#define IPIPE_SERVICE_IPI2 (IPIPE_CRITICAL_IPI + 3) ++#define IPIPE_SERVICE_IPI3 (IPIPE_CRITICAL_IPI + 4) ++#define IPIPE_SERVICE_IPI4 (IPIPE_CRITICAL_IPI + 5) ++ ++#define IPIPE_MSG_IPI_OFFSET (IPIPE_CRITICAL_IPI) ++ ++#define ipipe_processor_id() raw_smp_processor_id() ++#else /* !CONFIG_SMP */ ++#define ipipe_processor_id() 0 ++#endif /* CONFIG_SMP */ ++ ++/* traps */ ++#define IPIPE_TRAP_ACCESS 0 /* Data or instruction access exception */ ++#define IPIPE_TRAP_ALIGNMENT 1 /* Alignment exception */ ++#define IPIPE_TRAP_ALTUNAVAIL 2 /* Altivec unavailable */ ++#define IPIPE_TRAP_PCE 3 /* Program check exception */ ++#define IPIPE_TRAP_MCE 4 /* Machine check exception */ ++#define IPIPE_TRAP_UNKNOWN 5 /* Unknown exception */ ++#define IPIPE_TRAP_IABR 6 /* Instruction breakpoint */ ++#define IPIPE_TRAP_RM 7 /* Run mode exception */ ++#define IPIPE_TRAP_SSTEP 8 /* Single-step exception */ ++#define IPIPE_TRAP_NREC 9 /* Non-recoverable exception */ ++#define IPIPE_TRAP_SOFTEMU 10 /* Software emulation */ ++#define IPIPE_TRAP_DEBUG 11 /* Debug exception */ ++#define IPIPE_TRAP_SPE 12 /* SPE exception */ ++#define IPIPE_TRAP_ALTASSIST 13 /* Altivec assist exception */ ++#define IPIPE_TRAP_CACHE 14 /* Cache-locking exception (FSL) */ ++#define IPIPE_TRAP_KFPUNAVAIL 15 /* FP unavailable exception */ ++#define IPIPE_NR_FAULTS 16 ++/* Pseudo-vectors used for kernel events */ ++#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS ++#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) ++#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) ++#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) ++#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) ++#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) ++#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) ++#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) ++#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP ++#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) ++ ++#ifndef __ASSEMBLY__ ++ ++#ifdef CONFIG_SMP ++ ++void __ipipe_stall_root(void); ++ ++unsigned long __ipipe_test_and_stall_root(void); ++ ++unsigned long __ipipe_test_root(void); ++ ++#else /* !CONFIG_SMP */ ++ ++#include ++ ++#if __GNUC__ >= 4 ++/* Alias to ipipe_root_cpudom_var(status) */ ++extern unsigned long __ipipe_root_status; ++#else ++extern unsigned long *const __ipipe_root_status_addr; ++#define __ipipe_root_status (*__ipipe_root_status_addr) ++#endif ++ ++static __inline__ void __ipipe_stall_root(void) ++{ ++ volatile unsigned long *p = &__ipipe_root_status; ++ set_bit(0, p); ++} ++ ++static __inline__ unsigned long __ipipe_test_and_stall_root(void) ++{ ++ volatile unsigned long *p = &__ipipe_root_status; ++ return test_and_set_bit(0, p); ++} ++ ++static __inline__ unsigned long __ipipe_test_root(void) ++{ ++ volatile unsigned long *p = &__ipipe_root_status; ++ return test_bit(0, p); ++} ++ ++#endif /* !CONFIG_SMP */ ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#define __IPIPE_FEATURE_PREEMPTIBLE_SWITCH 1 ++ ++#endif /* CONFIG_IPIPE */ ++ ++#endif /* !__ASM_POWERPC_IPIPE_BASE_H */ +diff --git a/arch/powerpc/include/asm/ipipe_hwirq.h b/arch/powerpc/include/asm/ipipe_hwirq.h +new file mode 100644 +index 0000000..c3162c6 +--- /dev/null ++++ b/arch/powerpc/include/asm/ipipe_hwirq.h +@@ -0,0 +1,219 @@ ++/* -*- linux-c -*- ++ * include/asm-powerpc/ipipe_hwirq.h ++ * ++ * Copyright (C) 2009 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _ASM_POWERPC_IPIPE_HWIRQ_H ++ ++#ifdef CONFIG_PPC32 ++ ++#if defined(CONFIG_BOOKE) ++#define local_irq_restore_hw_notrace(x) __asm__ __volatile__("wrtee %0" : : "r" (x) : "memory") ++#else ++#define local_irq_restore_hw_notrace(x) mtmsr(x) ++#endif ++ ++static inline void local_irq_disable_hw_notrace(void) ++{ ++#ifdef CONFIG_BOOKE ++ __asm__ __volatile__("wrteei 0": : :"memory"); ++#else ++ unsigned long msr = mfmsr(); ++ mtmsr(msr & ~MSR_EE); ++#endif ++} ++ ++static inline void local_irq_enable_hw_notrace(void) ++{ ++#ifdef CONFIG_BOOKE ++ __asm__ __volatile__("wrteei 1": : :"memory"); ++#else ++ unsigned long msr = mfmsr(); ++ mtmsr(msr | MSR_EE); ++#endif ++} ++ ++static inline void local_irq_save_ptr_hw(unsigned long *x) ++{ ++ unsigned long msr = mfmsr(); ++ *x = msr; ++#ifdef CONFIG_BOOKE ++ __asm__ __volatile__("wrteei 0": : :"memory"); ++#else ++ mtmsr(msr & ~MSR_EE); ++#endif ++} ++ ++#else /* CONFIG_PPC64 */ ++ ++#include ++ ++#ifdef CONFIG_PPC_BOOK3E ++static inline void local_irq_disable_hw_notrace(void) ++{ ++ __asm__ __volatile__("wrteei 0": : :"memory"); ++} ++ ++static inline void local_irq_enable_hw_notrace(void) ++{ ++ __asm__ __volatile__("wrteei 1": : :"memory"); ++} ++#else /* !CONFIG_PPC_BOOK3E */ ++static inline void local_irq_disable_hw_notrace(void) ++{ ++ __mtmsrd(mfmsr() & ~MSR_EE, 1); ++} ++ ++static inline void local_irq_enable_hw_notrace(void) ++{ ++ __mtmsrd(mfmsr() | MSR_EE, 1); ++} ++#endif /* !CONFIG_PPC_BOOK3E */ ++ ++static inline void local_irq_save_ptr_hw(unsigned long *x) ++{ ++ unsigned long msr = mfmsr(); ++ local_irq_disable_hw_notrace(); ++ __asm__ __volatile__("": : :"memory"); ++ *x = msr; ++} ++ ++#define local_irq_restore_hw_notrace(x) __mtmsrd(x, 1) ++ ++#endif /* CONFIG_PPC64 */ ++ ++#define local_irq_save_hw_notrace(x) local_irq_save_ptr_hw(&(x)) ++ ++#ifdef CONFIG_IPIPE ++ ++#include ++#include ++ ++#ifdef CONFIG_SOFTDISABLE ++#error "CONFIG_SOFTDISABLE and CONFIG_IPIPE are mutually exclusive" ++#endif ++ ++#define irqs_disabled_hw() ((mfmsr() & MSR_EE) == 0) ++#define local_save_flags_hw(x) ((x) = mfmsr()) ++#define raw_irqs_disabled_flags(x) (((x) & MSR_EE) == 0) ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ++static inline void local_irq_disable_hw(void) ++{ ++ if (!irqs_disabled_hw()) { ++ local_irq_disable_hw_notrace(); ++ ipipe_trace_begin(0x80000000); ++ } ++} ++ ++static inline void local_irq_enable_hw(void) ++{ ++ if (irqs_disabled_hw()) { ++ ipipe_trace_end(0x80000000); ++ local_irq_enable_hw_notrace(); ++ } ++} ++ ++#define local_irq_save_hw(x) \ ++ do { \ ++ local_irq_save_ptr_hw(&(x)); \ ++ if (!irqs_disabled_flags(x)) \ ++ ipipe_trace_begin(0x80000001); \ ++ } while(0) ++ ++static inline void local_irq_restore_hw(unsigned long x) ++{ ++ if (!raw_irqs_disabled_flags(x)) ++ ipipe_trace_end(0x80000001); ++ ++ local_irq_restore_hw_notrace(x); ++} ++ ++#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ ++ ++#define local_irq_disable_hw local_irq_disable_hw_notrace ++#define local_irq_enable_hw local_irq_enable_hw_notrace ++#define local_irq_save_hw local_irq_save_hw_notrace ++#define local_irq_restore_hw local_irq_restore_hw_notrace ++ ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++ ++#define raw_local_irq_disable() \ ++ do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ __ipipe_stall_root(); \ ++ barrier(); \ ++ } while (0) ++ ++#define raw_local_irq_enable() \ ++ do { \ ++ barrier(); \ ++ ipipe_check_context(ipipe_root_domain); \ ++ __ipipe_unstall_root(); \ ++ } while (0) ++ ++static inline void raw_local_irq_restore(unsigned long x) ++{ ++ if (!raw_irqs_disabled_flags(x)) ++ raw_local_irq_enable(); ++} ++ ++#define raw_local_irq_save(x) \ ++ do { \ ++ (x) = (!__ipipe_test_and_stall_root()) << MSR_EE_LG; \ ++ barrier(); \ ++ } while (0) ++ ++#define raw_local_save_flags(x) \ ++ do { \ ++ (x) = (!__ipipe_test_root()) << MSR_EE_LG; \ ++ } while (0) ++ ++#define raw_irqs_disabled() __ipipe_test_root() ++#define raw_irqs_disabled_flags(x) (((x) & MSR_EE) == 0) ++#define hard_irq_disable() local_irq_disable_hw() ++ ++static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real) ++{ ++ /* ++ * Merge virtual and real interrupt mask bits into a single ++ * long word. We know MSR_EE will not conflict with 1L<<31. ++ */ ++ return (real & ~(1L << 31)) | ((long)(virt != 0) << 31); ++} ++ ++static inline int raw_demangle_irq_bits(unsigned long *x) ++{ ++ int virt = (*x & (1L << 31)) != 0; ++ *x &= ~(1L << 31); ++ return virt; ++} ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define local_irq_save_hw(x) raw_local_irq_save(x) ++#define local_irq_restore_hw(x) raw_local_irq_restore(x) ++#define local_irq_enable_hw() raw_local_irq_enable() ++#define local_irq_disable_hw() raw_local_irq_disable() ++#define irqs_disabled_hw() raw_irqs_disabled() ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#endif /* !_ASM_POWERPC_IPIPE_HWIRQ_H */ +diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h +index 5f68ecf..5059e59 100644 +--- a/arch/powerpc/include/asm/irqflags.h ++++ b/arch/powerpc/include/asm/irqflags.h +@@ -10,7 +10,17 @@ + */ + #include + +-#else ++#elif CONFIG_IPIPE ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++#define TRACE_DISABLE_INTS bl .__ipipe_trace_irqson ++#define TRACE_ENABLE_INTS bl .__ipipe_trace_irqson ++#define TRACE_DISABLE_INTS_REALLY bl .__ipipe_trace_irqsoff ++#else ++#define TRACE_DISABLE_INTS ++#define TRACE_ENABLE_INTS ++#define TRACE_DISABLE_INTS_REALLY ++#endif ++#else /* !CONFIG_IPIPE */ + #ifdef CONFIG_TRACE_IRQFLAGS + /* + * Most of the CPU's IRQ-state tracing is done from assembly code; we +diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h +index b34e94d..a3820af 100644 +--- a/arch/powerpc/include/asm/mmu_context.h ++++ b/arch/powerpc/include/asm/mmu_context.h +@@ -32,11 +32,17 @@ extern void mmu_context_init(void); + * switch_mm is the entry point called from the architecture independent + * code in kernel/sched.c + */ +-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, +- struct task_struct *tsk) ++static inline void __switch_mm(struct mm_struct *prev, struct mm_struct *next, ++ struct task_struct *tsk) + { ++ int cpu = smp_processor_id(); ++ ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && \ ++ !defined(CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH) ++ WARN_ON_ONCE(!irqs_disabled_hw()); ++#endif + /* Mark this context has been used on the new CPU */ +- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); ++ cpumask_set_cpu(cpu, mm_cpumask(next)); + + /* 32-bit keeps track of the current PGDIR in the thread struct */ + #ifdef CONFIG_PPC32 +@@ -62,6 +68,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + /* The actual HW switching method differs between the various + * sub architectures. + */ ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++#ifdef CONFIG_PPC_STD_MMU_64 ++ do { ++ per_cpu(ipipe_active_mm, cpu) = NULL; /* mm state is undefined. */ ++ barrier(); ++ if (cpu_has_feature(CPU_FTR_SLB)) ++ switch_slb(tsk, next); ++ else ++ switch_stab(tsk, next); ++ barrier(); ++ per_cpu(ipipe_active_mm, cpu) = next; ++ } while (test_and_clear_thread_flag(TIF_MMSWITCH_INT)); ++#else ++ do { ++ per_cpu(ipipe_active_mm, cpu) = NULL; /* mm state is undefined. */ ++ barrier(); ++ switch_mmu_context(prev, next); ++ barrier(); ++ per_cpu(ipipe_active_mm, cpu) = next; ++ } while (test_and_clear_thread_flag(TIF_MMSWITCH_INT)); ++#endif ++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ + #ifdef CONFIG_PPC_STD_MMU_64 + if (cpu_has_feature(CPU_FTR_SLB)) + switch_slb(tsk, next); +@@ -71,7 +99,21 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + /* Out of line for now */ + switch_mmu_context(prev, next); + #endif ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++} + ++static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ++ struct task_struct *tsk) ++{ ++#ifndef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ unsigned long flags; ++ local_irq_save_hw(flags); ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ __switch_mm(prev, next, tsk); ++#ifndef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++ local_irq_restore_hw(flags); ++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */ ++ return; + } + + #define deactivate_mm(tsk,mm) do { } while (0) +@@ -85,7 +127,7 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) + unsigned long flags; + + local_irq_save(flags); +- switch_mm(prev, next, current); ++ __switch_mm(prev, next, current); + local_irq_restore(flags); + } + +diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h +index b06fa85..4a360cb 100644 +--- a/arch/powerpc/include/asm/mpic.h ++++ b/arch/powerpc/include/asm/mpic.h +@@ -289,7 +289,7 @@ struct mpic + #ifdef CONFIG_MPIC_U3_HT_IRQS + /* The fixup table */ + struct mpic_irq_fixup *fixups; +- spinlock_t fixup_lock; ++ ipipe_spinlock_t fixup_lock; + #endif + + /* Register access method */ +diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h +index 7d8514c..31a1cf4 100644 +--- a/arch/powerpc/include/asm/paca.h ++++ b/arch/powerpc/include/asm/paca.h +@@ -119,8 +119,12 @@ struct paca_struct { + u64 saved_r1; /* r1 save for RTAS calls */ + u64 saved_msr; /* MSR saved here by enter_rtas */ + u16 trap_save; /* Used when bad stack is encountered */ ++#ifdef CONFIG_SOFTDISABLE + u8 soft_enabled; /* irq soft-enable flag */ + u8 hard_enabled; /* set if irqs are enabled in MSR */ ++#elif CONFIG_IPIPE ++ u64 root_percpu; /* Address of per_cpu data for the root domain */ ++#endif + u8 io_sync; /* writel() needs spin_unlock sync */ + u8 perf_event_pending; /* PM interrupt while soft-disabled */ + +diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h +index 8c34149..2fb6eba 100644 +--- a/arch/powerpc/include/asm/ptrace.h ++++ b/arch/powerpc/include/asm/ptrace.h +@@ -36,7 +36,7 @@ struct pt_regs { + unsigned long xer; + unsigned long ccr; + #ifdef __powerpc64__ +- unsigned long softe; /* Soft enabled/disabled */ ++ unsigned long softe; /* Soft enabled/disabled (CONFIG_SOFTDISABLE || CONFIG_IPIPE) */ + #else + unsigned long mq; /* 601 only (not used at present) */ + /* Used on APUS to hold IPL value. */ +diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h +index cf51966..e7d287c 100644 +--- a/arch/powerpc/include/asm/qe_ic.h ++++ b/arch/powerpc/include/asm/qe_ic.h +@@ -74,6 +74,13 @@ static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) + { return 0; } + #endif /* CONFIG_QUICC_ENGINE */ + ++#ifdef CONFIG_IPIPE ++void __ipipe_qe_ic_cascade_irq(struct qe_ic *qe_ic, unsigned int virq); ++#define qe_ic_cascade_irq(qe_ic, irq) __ipipe_qe_ic_cascade_irq(qe_ic, irq) ++#else ++#define qe_ic_cascade_irq(qe_ic, irq) generic_handle_irq(irq) ++#endif ++ + void qe_ic_set_highest_priority(unsigned int virq, int high); + int qe_ic_set_priority(unsigned int virq, unsigned int priority); + int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high); +@@ -85,7 +92,7 @@ static inline void qe_ic_cascade_low_ipic(unsigned int irq, + unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); + + if (cascade_irq != NO_IRQ) +- generic_handle_irq(cascade_irq); ++ qe_ic_cascade_irq(qe_ic, cascade_irq); + } + + static inline void qe_ic_cascade_high_ipic(unsigned int irq, +@@ -95,7 +102,7 @@ static inline void qe_ic_cascade_high_ipic(unsigned int irq, + unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); + + if (cascade_irq != NO_IRQ) +- generic_handle_irq(cascade_irq); ++ qe_ic_cascade_irq(qe_ic, cascade_irq); + } + + static inline void qe_ic_cascade_low_mpic(unsigned int irq, +@@ -105,7 +112,7 @@ static inline void qe_ic_cascade_low_mpic(unsigned int irq, + unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); + + if (cascade_irq != NO_IRQ) +- generic_handle_irq(cascade_irq); ++ qe_ic_cascade_irq(qe_ic, cascade_irq); + + desc->chip->eoi(irq); + } +@@ -117,7 +124,7 @@ static inline void qe_ic_cascade_high_mpic(unsigned int irq, + unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); + + if (cascade_irq != NO_IRQ) +- generic_handle_irq(cascade_irq); ++ qe_ic_cascade_irq(qe_ic, cascade_irq); + + desc->chip->eoi(irq); + } +@@ -133,7 +140,7 @@ static inline void qe_ic_cascade_muxed_mpic(unsigned int irq, + cascade_irq = qe_ic_get_low_irq(qe_ic); + + if (cascade_irq != NO_IRQ) +- generic_handle_irq(cascade_irq); ++ qe_ic_cascade_irq(qe_ic, cascade_irq); + + desc->chip->eoi(irq); + } +diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h +index 6315edc..1cce38d 100644 +--- a/arch/powerpc/include/asm/reg.h ++++ b/arch/powerpc/include/asm/reg.h +@@ -928,7 +928,7 @@ + + #define proc_trap() asm volatile("trap") + +-#ifdef CONFIG_PPC64 ++#ifdef CONFIG_RUNLATCH + + extern void ppc64_runlatch_on(void); + extern void ppc64_runlatch_off(void); +diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h +index d9ea8d3..9c57dee 100644 +--- a/arch/powerpc/include/asm/smp.h ++++ b/arch/powerpc/include/asm/smp.h +@@ -54,8 +54,14 @@ void generic_mach_cpu_die(void); + /* 32-bit */ + extern int smp_hw_index[]; + ++#ifdef CONFIG_IPIPE ++extern int smp_logical_index[]; ++#define raw_smp_processor_id() (smp_logical_index[mfspr(SPRN_PIR)]) ++#define hard_smp_processor_id() (smp_hw_index[raw_smp_processor_id()]) ++#else + #define raw_smp_processor_id() (current_thread_info()->cpu) + #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()]) ++#endif + + static inline int get_hard_smp_processor_id(int cpu) + { +@@ -65,6 +71,10 @@ static inline int get_hard_smp_processor_id(int cpu) + static inline void set_hard_smp_processor_id(int cpu, int phys) + { + smp_hw_index[cpu] = phys; ++#ifdef CONFIG_IPIPE ++ BUG_ON(phys >= NR_CPUS); ++ smp_logical_index[phys] = cpu; ++#endif + } + #endif + +@@ -80,6 +90,7 @@ extern int cpu_to_core_id(int cpu); + #define PPC_MSG_RESCHEDULE 1 + #define PPC_MSG_CALL_FUNC_SINGLE 2 + #define PPC_MSG_DEBUGGER_BREAK 3 ++#define PPC_MSG_IPIPE_DEMUX PPC_MSG_DEBUGGER_BREAK + + /* + * irq controllers that have dedicated ipis per message and don't +diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h +index c8b3292..8615765 100644 +--- a/arch/powerpc/include/asm/thread_info.h ++++ b/arch/powerpc/include/asm/thread_info.h +@@ -112,6 +112,7 @@ static inline struct thread_info *current_thread_info(void) + #define TIF_FREEZE 14 /* Freezing for suspend */ + #define TIF_RUNLATCH 15 /* Is the runlatch enabled? */ + #define TIF_ABI_PENDING 16 /* 32/64 bit switch needed */ ++#define TIF_MMSWITCH_INT 20 /* MMU context switch interrupted */ + + /* as above, but as bit values */ + #define _TIF_SYSCALL_TRACE (1<thread_info to ¤t->thread, which is coarser ++ * than the vanilla implementation, but likely sensitive enough ++ * to catch overflows soon enough though. ++ */ ++ addi r12,r9,THREAD ++ cmplw 0,r1,r9 ++ cmplw 1,r1,r12 ++ crand 1,1,4 ++ bgt- stack_ovf /* if r9 < r1 < r9+THREAD */ ++#else /* CONFIG_IPIPE */ + cmplw r1,r9 /* if r1 <= ksp_limit */ + ble- stack_ovf /* then the kernel stack overflowed */ ++#endif /* CONFIG_IPIPE */ + 5: + #if defined(CONFIG_6xx) || defined(CONFIG_E500) + rlwinm r9,r1,0,0,31-THREAD_SHIFT +@@ -286,6 +303,21 @@ _GLOBAL(DoSyscall) + lwz r11,_CCR(r1) /* Clear SO bit in CR */ + rlwinm r11,r11,0,4,2 + stw r11,_CCR(r1) ++#ifdef CONFIG_IPIPE ++ addi r3,r1,GPR0 ++ bl __ipipe_syscall_root ++ cmpwi r3,0 ++ lwz r3,GPR3(r1) ++ lwz r0,GPR0(r1) ++ lwz r4,GPR4(r1) ++ lwz r5,GPR5(r1) ++ lwz r6,GPR6(r1) ++ lwz r7,GPR7(r1) ++ lwz r8,GPR8(r1) ++ lwz r9,GPR9(r1) ++ bgt .ipipe_end_syscall ++ blt ret_from_syscall ++#endif /* CONFIG_IPIPE */ + #ifdef SHOW_SYSCALLS + bl do_show_syscall + #endif /* SHOW_SYSCALLS */ +@@ -402,11 +434,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) + b 1b + #endif /* CONFIG_44x */ + ++#ifdef CONFIG_IPIPE ++.ipipe_end_syscall: ++ LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ ++ SYNC ++ MTMSRD(r10) ++ b syscall_exit_cont ++#endif /* CONFIG_IPIPE */ ++ + 66: li r3,-ENOSYS + b ret_from_syscall + + .globl ret_from_fork + ret_from_fork: ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ stwu r1,-4(r1) ++ stw r3,0(r1) ++ lis r3,(0x80000000)@h ++ ori r3,r3,(0x80000000)@l ++ bl ipipe_trace_end ++ lwz r3,0(r1) ++ addi r1,r1,4 ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++ LOAD_MSR_KERNEL(r10,MSR_KERNEL) ++ ori r10,r10,MSR_EE ++ SYNC ++ MTMSRD(r10) ++#endif /* CONFIG_IPIPE */ + REST_NVGPRS(r1) + bl schedule_tail + li r3,0 +@@ -788,6 +843,12 @@ ret_from_except: + SYNC /* Some chip revs have problems here... */ + MTMSRD(r10) /* disable interrupts */ + ++#ifdef CONFIG_IPIPE ++ bl __ipipe_check_root ++ cmpwi r3, 0 ++ mfmsr r10 /* this is used later, might be messed */ ++ beq- restore ++#endif /* CONFIG_IPIPE */ + lwz r3,_MSR(r1) /* Returning to user mode? */ + andi. r0,r3,MSR_PR + beq resume_kernel +@@ -811,6 +872,12 @@ restore_user: + #ifdef CONFIG_PREEMPT + b restore + ++#ifdef CONFIG_IPIPE ++#define PREEMPT_SCHEDULE_IRQ __ipipe_preempt_schedule_irq ++#else ++#define PREEMPT_SCHEDULE_IRQ preempt_schedule_irq ++#endif ++ + /* N.B. the only way to get here is from the beq following ret_from_except. */ + resume_kernel: + /* check current_thread_info->preempt_count */ +@@ -830,7 +897,7 @@ resume_kernel: + */ + bl trace_hardirqs_off + #endif +-1: bl preempt_schedule_irq ++1: bl PREEMPT_SCHEDULE_IRQ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r3,TI_FLAGS(r9) + andi. r0,r3,_TIF_NEED_RESCHED +@@ -1227,6 +1294,13 @@ ee_restarts: + .space 4 + .previous + ++#ifdef CONFIG_IPIPE ++_GLOBAL(__ipipe_ret_from_except) ++ cmpwi r3, 0 ++ bne+ ret_from_except ++ b restore ++#endif /* CONFIG_IPIPE */ ++ + /* + * PROM code for specific machines follows. Put it + * here so it's easy to add arch-specific sections later. +diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S +index 2306d40..daf6ec1 100644 +--- a/arch/powerpc/kernel/entry_64.S ++++ b/arch/powerpc/kernel/entry_64.S +@@ -33,6 +33,11 @@ + #include + #include + ++#ifdef CONFIG_IPIPE ++#define PREEMPT_SCHEDULE_IRQ .__ipipe_preempt_schedule_irq ++#else ++#define PREEMPT_SCHEDULE_IRQ .preempt_schedule_irq ++#endif + /* + * System calls. + */ +@@ -105,6 +110,7 @@ system_call_common: + addi r9,r1,STACK_FRAME_OVERHEAD + ld r12,_MSR(r1) + #endif /* CONFIG_TRACE_IRQFLAGS */ ++#ifdef CONFIG_SOFTDISABLE + li r10,1 + stb r10,PACASOFTIRQEN(r13) + stb r10,PACAHARDIRQEN(r13) +@@ -120,6 +126,24 @@ BEGIN_FW_FTR_SECTION + 2: + END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) + #endif /* CONFIG_PPC_ISERIES */ ++#endif /* CONFIG_SOFTDISABLE */ ++ ++#ifdef CONFIG_IPIPE ++ addi r3,r1,GPR0 ++ bl .__ipipe_syscall_root ++ cmpwi r3,0 ++ ld r0,GPR0(r1) ++ ld r3,GPR3(r1) ++ ld r4,GPR4(r1) ++ ld r5,GPR5(r1) ++ ld r6,GPR6(r1) ++ ld r7,GPR7(r1) ++ ld r8,GPR8(r1) ++ ld r9,GPR9(r1) ++ bgt ipipe_end_syscall ++ blt syscall_exit ++ addi r9,r1,STACK_FRAME_OVERHEAD ++#endif /* CONFIG_IPIPE */ + + /* Hard enable interrupts */ + #ifdef CONFIG_PPC_BOOK3E +@@ -257,10 +281,27 @@ syscall_dotrace: + ld r10,TI_FLAGS(r10) + b syscall_dotrace_cont + ++#ifdef CONFIG_IPIPE ++ /* ++ * We get there upon return from __ipipe_syscall_root, ++ * with hw interrupts off. ++ */ ++ipipe_end_syscall: ++ ld r8,_MSR(r1) ++#ifdef CONFIG_PPC_BOOK3S ++ /* No MSR:RI on BookE */ ++ andi. r10,r8,MSR_RI ++ beq- unrecov_restore ++#endif ++ ld r5,_CCR(r1) ++ mfmsr r10 ++ b syscall_error_cont ++#endif /* CONFIG_IPIPE */ ++ + syscall_enosys: + li r3,-ENOSYS + b syscall_exit +- ++ + syscall_exit_work: + /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. + If TIF_NOERROR is set, just save r3 as it is. */ +@@ -295,6 +336,18 @@ syscall_exit_work: + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq .ret_from_except_lite + ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ bl .save_nvgprs ++ bl .__ipipe_trace_irqson ++#ifdef CONFIG_PPC_BOOK3E ++ wrteei 1 ++#else ++ /* Re-enable interrupts */ ++ mfmsr r10 ++ ori r10,r10,MSR_EE ++ mtmsrd r10,1 ++#endif /* CONFIG_PPC_BOOK3E */ ++#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ + /* Re-enable interrupts */ + #ifdef CONFIG_PPC_BOOK3E + wrteei 1 +@@ -305,6 +358,7 @@ syscall_exit_work: + #endif /* CONFIG_PPC_BOOK3E */ + + bl .save_nvgprs ++#endif /* !CONFIG_IPIPE_TRACE_IRQSOFF */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_syscall_trace_leave + b .ret_from_except +@@ -355,6 +409,22 @@ _GLOBAL(ppc64_swapcontext) + b syscall_exit + + _GLOBAL(ret_from_fork) ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ stdu r1,-8(r1) ++ std r3,0(r1) ++ bl .__ipipe_trace_irqson ++ ld r3,0(r1) ++ addi r1,r1,8 ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++#ifdef CONFIG_PPC_BOOK3E ++ wrteei 1 ++#else /* !CONFIG_PPC_BOOK3E */ ++ mfmsr r10 ++ ori r10,r10,MSR_EE ++ mtmsrd r10,1 ++#endif /* !CONFIG_PPC_BOOK3E */ ++#endif /* CONFIG_IPIPE */ + bl .schedule_tail + REST_NVGPRS(r1) + li r3,0 +@@ -507,6 +577,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + addi r1,r1,SWITCH_FRAME_SIZE + blr + ++ ++#ifdef CONFIG_IPIPE ++_GLOBAL(__ipipe_ret_from_except_lite) ++ cmpwi r3,0 ++ /* FIXME: branching to __ipipe_check_root is useless here */ ++ bne+ .ret_from_except_lite ++ b restore ++#endif /* CONFIG_IPIPE */ ++ + .align 7 + _GLOBAL(ret_from_except) + ld r11,_TRAP(r1) +@@ -529,6 +608,16 @@ _GLOBAL(ret_from_except_lite) + mtmsrd r9,1 /* Update machine state */ + #endif /* CONFIG_PPC_BOOK3E */ + ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ bl .__ipipe_trace_irqsoff ++#endif ++ bl .__ipipe_check_root ++ cmpwi r3,0 ++ mfmsr r10 /* this is used later, might be messed */ ++ beq- restore ++#endif /* CONFIG_IPIPE */ ++ + #ifdef CONFIG_PREEMPT + clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + li r0,_TIF_NEED_RESCHED /* bits to check */ +@@ -552,6 +641,7 @@ _GLOBAL(ret_from_except_lite) + #endif + + restore: ++#ifdef CONFIG_SOFTDISABLE + BEGIN_FW_FTR_SECTION + ld r5,SOFTE(r1) + FW_FTR_SECTION_ELSE +@@ -568,12 +658,38 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) + bl .perf_event_do_pending + 27: + #endif /* CONFIG_PERF_EVENTS */ +- + /* extract EE bit and use it to restore paca->hard_enabled */ + ld r3,_MSR(r1) + rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ + stb r4,PACAHARDIRQEN(r13) + ++#else /* !CONFIG_SOFTDISABLE */ ++#ifdef CONFIG_IPIPE ++#ifdef CONFIG_PERF_EVENTS ++ /* check paca->perf_event_pending if we're enabling ints */ ++ ld r5,_MSR(r1) ++ rldicl r5,r5,49,63 /* r0 = (r3 >> 15) & 1 */ ++ lbz r3,PACAPERFPEND(r13) ++ and. r3,r3,r5 ++ beq 27f ++ bl .perf_event_do_pending ++27: ++#endif /* CONFIG_PERF_EVENTS */ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ld r3,_MSR(r1) ++ rldicl r3,r3,49,63 /* r0 = (r3 >> 15) & 1 */ ++ bl .__ipipe_trace_irqsx ++#endif ++ ld r3,SOFTE(r1) /* currently hard-disabled, so this is safe */ ++ nor r3,r3,r3 ++ ld r4,PACAROOTPCPU(r13) ++ ld r5,0(r4) ++ insrdi r5,r3,1,63 ++ std r5,0(r4) ++ ld r3,_MSR(r1) ++#endif /* CONFIG_IPIPE */ ++#endif /* !CONFIG_SOFTDISABLE */ ++ + #ifdef CONFIG_PPC_BOOK3E + b .exception_return_book3e + #else +@@ -663,6 +779,7 @@ do_work: + crandc eq,cr1*4+eq,eq + bne restore + ++#ifdef CONFIG_SOFTDISABLE + /* Here we are preempting the current task. + * + * Ensure interrupts are soft-disabled. We also properly mark +@@ -673,9 +790,14 @@ do_work: + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + TRACE_DISABLE_INTS ++#endif + +- /* Call the scheduler with soft IRQs off */ +-1: bl .preempt_schedule_irq ++ /* ++ * Call the scheduler with soft IRQs off. When the interrupt ++ * pipeline is enabled, we enter the IRQ preemption code with hw ++ * interrupts disabled as well. ++ */ ++1: bl PREEMPT_SCHEDULE_IRQ + + /* Hard-disable interrupts again (and update PACA) */ + #ifdef CONFIG_PPC_BOOK3E +@@ -686,8 +808,14 @@ do_work: + rotldi r10,r10,16 + mtmsrd r10,1 + #endif /* CONFIG_PPC_BOOK3E */ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ /* FIXME: watch out for register wreckage */ ++ bl .__ipipe_trace_irqsoff ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ ++#ifdef CONFIG_SOFTDISABLE + li r0,0 + stb r0,PACAHARDIRQEN(r13) ++#endif + + /* Re-test flags and eventually loop */ + clrrdi r9,r1,THREAD_SHIFT +@@ -709,6 +837,9 @@ user_work: + + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ bl .__ipipe_trace_irqson ++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + bl .schedule + b .ret_from_except_lite + +@@ -762,7 +893,7 @@ _GLOBAL(enter_rtas) + li r0,0 + mtcr r0 + +-#ifdef CONFIG_BUG ++#ifdef CONFIG_BUG && CONFIG_SOFTDISABLE + /* There is no way it is acceptable to get here with interrupts enabled, + * check it with the asm equivalent of WARN_ON + */ +@@ -935,6 +1066,10 @@ _GLOBAL(_mcount) + blr + + _GLOBAL(ftrace_caller) ++ LOAD_REG_IMMEDIATE(r3, function_trace_stop) ++ lwz r3, 0(r3) ++ cmpwi r3, 0 ++ bne ftrace_stub + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) +@@ -962,6 +1097,10 @@ _GLOBAL(mcount) + blr + + _GLOBAL(_mcount) ++ LOAD_REG_IMMEDIATE(r3, function_trace_stop) ++ lwz r3, 0(r3) ++ cmpwi r3, 0 ++ bne ftrace_stub + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) +diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S +index 1808876..0a34196 100644 +--- a/arch/powerpc/kernel/exceptions-64s.S ++++ b/arch/powerpc/kernel/exceptions-64s.S +@@ -215,6 +215,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) + STD_EXCEPTION_PSERIES(., altivec_unavailable) + STD_EXCEPTION_PSERIES(., vsx_unavailable) + ++#ifdef CONFIG_SOFTDISABLE + /* + * An interrupt came in while soft-disabled; clear EE in SRR1, + * clear paca->hard_enabled and return. +@@ -231,6 +232,7 @@ masked_interrupt: + mfspr r13,SPRN_SPRG_SCRATCH0 + rfid + b . ++#endif /* CONFIG_SOFTDISABLE */ + + .align 7 + do_stab_bolted_pSeries: +@@ -315,11 +317,16 @@ machine_check_common: + FINISH_NAP + DISABLE_INTS + bl .save_nvgprs ++ TRACE_DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b .ret_from_except + ++#ifdef CONFIG_IPIPE ++ IPIPE_EXCEPTION_COMMON_LITE(0x900, decrementer, .__ipipe_grab_timer) ++#else /* !CONFIG_IPIPE */ + STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) ++#endif /* CONFIG_IPIPE */ + STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) + STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) + STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) +@@ -549,6 +556,7 @@ unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs ++ TRACE_DISABLE_INTS + 1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b +@@ -558,15 +566,23 @@ unrecov_slb: + .globl hardware_interrupt_entry + hardware_interrupt_common: + EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) +- FINISH_NAP + hardware_interrupt_entry: ++#ifdef CONFIG_IPIPE ++ DISABLE_INTS_REALLY ++ TRACE_DISABLE_INTS_REALLY ++ addi r3,r1,STACK_FRAME_OVERHEAD ++ bl .__ipipe_grab_irq ++ b .__ipipe_ret_from_except_lite ++#else /* !CONFIG_IPIPE */ + DISABLE_INTS ++ FINISH_NAP + BEGIN_FTR_SECTION + bl .ppc64_runlatch_on + END_FTR_SECTION_IFSET(CPU_FTR_CTRL) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite ++#endif /* CONFIG_IPIPE */ + + #ifdef CONFIG_PPC_970_NAP + power4_fixup_nap: +@@ -590,8 +606,9 @@ alignment_common: + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl .save_nvgprs +- addi r3,r1,STACK_FRAME_OVERHEAD ++ TRACE_ENABLE_INTS + ENABLE_INTS ++ addi r3,r1,STACK_FRAME_OVERHEAD + bl .alignment_exception + b .ret_from_except + +@@ -600,8 +617,9 @@ alignment_common: + program_check_common: + EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + bl .save_nvgprs +- addi r3,r1,STACK_FRAME_OVERHEAD ++ TRACE_ENABLE_INTS + ENABLE_INTS ++ addi r3,r1,STACK_FRAME_OVERHEAD + bl .program_check_exception + b .ret_from_except + +@@ -611,8 +629,9 @@ fp_unavailable_common: + EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) + bne 1f /* if from user, just load it up */ + bl .save_nvgprs +- addi r3,r1,STACK_FRAME_OVERHEAD ++ TRACE_ENABLE_INTS + ENABLE_INTS ++ addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_fp_unavailable_exception + BUG_OPCODE + 1: bl .load_up_fpu +@@ -631,8 +650,9 @@ BEGIN_FTR_SECTION + END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + #endif + bl .save_nvgprs +- addi r3,r1,STACK_FRAME_OVERHEAD ++ TRACE_ENABLE_INTS + ENABLE_INTS ++ addi r3,r1,STACK_FRAME_OVERHEAD + bl .altivec_unavailable_exception + b .ret_from_except + +@@ -665,12 +685,23 @@ __end_handlers: + * ret_from_except or ret_from_except_lite instead of this. + */ + fast_exc_return_irq: /* restores irq state too */ ++#ifdef CONFIG_SOFTDISABLE + ld r3,SOFTE(r1) + TRACE_AND_RESTORE_IRQ(r3); + ld r12,_MSR(r1) + rldicl r4,r12,49,63 /* get MSR_EE to LSB */ + stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */ + b 1f ++#else ++#ifdef CONFIG_IPIPE ++ ld r3,SOFTE(r1) /* currently hard-disabled, so this is safe */ ++ nor r3,r3,r3 ++ ld r4,PACAROOTPCPU(r13) ++ ld r5,0(r4) ++ insrdi r5,r3,1,63 ++ std r5,0(r4) ++#endif /* CONFIG_IPIPE */ ++#endif /* CONFIG_SOFTDISABLE */ + + .globl fast_exception_return + fast_exception_return: +@@ -711,6 +742,7 @@ fast_exception_return: + + unrecov_fer: + bl .save_nvgprs ++ TRACE_DISABLE_INTS + 1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b +@@ -807,13 +839,23 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) + * to what it was before the trap. Note that .raw_local_irq_restore + * handles any interrupts pending at this point. + */ ++#ifdef CONFIG_SOFTDISABLE + ld r3,SOFTE(r1) + TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f) + bl .raw_local_irq_restore ++#else ++#ifdef CONFIG_IPIPE ++ ld r3,SOFTE(r1) ++ nor r3,r3,r3 ++ andi. r3,r3,1 ++ bl __ipipe_restore_if_root ++#endif ++#endif + b 11f + + /* Here we have a page fault that hash_page can't handle. */ + handle_page_fault: ++ TRACE_ENABLE_INTS + ENABLE_INTS + 11: ld r4,_DAR(r1) + ld r5,_DSISR(r1) +diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S +index fc8f5b1..1498010 100644 +--- a/arch/powerpc/kernel/fpu.S ++++ b/arch/powerpc/kernel/fpu.S +@@ -122,7 +122,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) + * Enables the FPU for use in the kernel on return. + */ + _GLOBAL(giveup_fpu) ++#ifdef CONFIG_IPIPE ++ mfmsr r6 ++#ifdef CONFIG_PPC64 ++ rldicl r5,r6,48,1 /* clear MSR_EE */ ++ rotldi r5,r5,16 ++#else ++ rlwinm r5,r6,0,17,15 /* clear MSR_EE */ ++#endif ++#else + mfmsr r5 ++#endif + ori r5,r5,MSR_FP + #ifdef CONFIG_VSX + BEGIN_FTR_SECTION +@@ -135,7 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) + SYNC_601 + isync + PPC_LCMPI 0,r3,0 +- beqlr- /* if no previous owner, done */ ++ beq- 2f /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + PPC_LL r5,PT_REGS(r3) + PPC_LCMPI 0,r5,0 +@@ -158,6 +168,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) + LOAD_REG_ADDRBASE(r4,last_task_used_math) + PPC_STL r5,ADDROFF(last_task_used_math)(r4) + #endif /* CONFIG_SMP */ ++2: ++#ifdef CONFIG_IPIPE /* restore interrupt state */ ++ andi. r6,r6,MSR_EE ++ beqlr ++ mfmsr r5 ++ ori r5,r5,MSR_EE ++ SYNC_601 ++ ISYNC_601 ++ MTMSRD(r5) ++ SYNC_601 ++ isync ++#endif + blr + + /* +diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S +index 829c3fe..6bbea24 100644 +--- a/arch/powerpc/kernel/head_32.S ++++ b/arch/powerpc/kernel/head_32.S +@@ -322,6 +322,12 @@ i##n: \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + ++#ifdef CONFIG_IPIPE ++#define EXC_XFER_IPIPE(n, hdlr) \ ++ EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ ++ __ipipe_ret_from_except) ++#endif /* CONFIG_IPIPE */ ++ + #define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) +@@ -406,7 +412,11 @@ InstructionAccess: + EXC_XFER_EE_LITE(0x400, handle_page_fault) + + /* External interrupt */ ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x500, HardwareInterrupt, __ipipe_grab_irq, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + /* Alignment exception */ + . = 0x600 +@@ -440,7 +450,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) + EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + + /* Decrementer */ ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x900, Decrementer, __ipipe_grab_timer, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) +@@ -1016,6 +1030,12 @@ _ENTRY(switch_mmu_context) + lwz r3,MMCONTEXTID(r4) + cmpwi cr0,r3,0 + blt- 4f ++#ifdef CONFIG_IPIPE ++ mfmsr r7 ++ rlwinm r0,r7,0,17,15 /* clear MSR_EE in r0 */ ++ mtmsr r0 ++ sync ++#endif + mulli r3,r3,897 /* multiply context by skew factor */ + rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ + addis r3,r3,0x6000 /* Set Ks, Ku bits */ +@@ -1039,6 +1059,9 @@ _ENTRY(switch_mmu_context) + rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b ++#ifdef CONFIG_IPIPE ++ mtmsr r7 ++#endif + sync + isync + blr +diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S +index a90625f..178dcf7 100644 +--- a/arch/powerpc/kernel/head_40x.S ++++ b/arch/powerpc/kernel/head_40x.S +@@ -234,6 +234,12 @@ label: + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + ++#ifdef CONFIG_IPIPE ++#define EXC_XFER_IPIPE(n, hdlr) \ ++ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ++ __ipipe_ret_from_except) ++#endif /* CONFIG_IPIPE */ ++ + #define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) +@@ -402,7 +408,11 @@ label: + EXC_XFER_EE_LITE(0x400, handle_page_fault) + + /* 0x0500 - External Interrupt Exception */ ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x0500, HardwareInterrupt, __ipipe_grab_irq, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + /* 0x0600 - Alignment Exception */ + START_EXCEPTION(0x0600, Alignment) +@@ -440,7 +450,11 @@ label: + lis r0,TSR_PIS@h + mtspr SPRN_TSR,r0 /* Clear the PIT exception */ + addi r3,r1,STACK_FRAME_OVERHEAD ++#ifdef CONFIG_IPIPE ++ EXC_XFER_IPIPE(0x1000, __ipipe_grab_timer) ++#else /* !CONFIG_IPIPE */ + EXC_XFER_LITE(0x1000, timer_interrupt) ++#endif /* CONFIG_IPIPE */ + + #if 0 + /* NOTE: +diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S +index 711368b..440838f 100644 +--- a/arch/powerpc/kernel/head_44x.S ++++ b/arch/powerpc/kernel/head_44x.S +@@ -310,8 +310,11 @@ interrupt_base: + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + +- /* External Input Interrupt */ +- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x0500, ExternalInput, __ipipe_grab_irq, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ ++ EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION +diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S +index d3604d7..82bb71e 100644 +--- a/arch/powerpc/kernel/head_64.S ++++ b/arch/powerpc/kernel/head_64.S +@@ -631,14 +631,20 @@ __secondary_start: + #ifdef CONFIG_PPC_ISERIES + BEGIN_FW_FTR_SECTION + ori r4,r4,MSR_EE ++#ifdef CONFIG_SOFTDISABLE + li r8,1 + stb r8,PACAHARDIRQEN(r13) ++#endif + END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) + #endif + BEGIN_FW_FTR_SECTION ++#ifdef CONFIG_SOFTDISABLE + stb r7,PACAHARDIRQEN(r13) ++#endif + END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) ++#ifdef CONFIG_SOFTDISABLE + stb r7,PACASOFTIRQEN(r13) ++#endif + + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 +@@ -768,8 +774,10 @@ _INIT_GLOBAL(start_here_common) + + /* Load up the kernel context */ + 5: ++#ifdef CONFIG_SOFTDISABLE + li r5,0 + stb r5,PACASOFTIRQEN(r13) /* Soft Disabled */ ++#endif + #ifdef CONFIG_PPC_ISERIES + BEGIN_FW_FTR_SECTION + mfmsr r5 +@@ -778,7 +786,9 @@ BEGIN_FW_FTR_SECTION + li r5,1 + END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) + #endif ++#ifdef CONFIG_SOFTDISABLE + stb r5,PACAHARDIRQEN(r13) /* Hard Disabled on others */ ++#endif + + bl .start_kernel + +diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S +index 6ded19d..d19d0de 100644 +--- a/arch/powerpc/kernel/head_8xx.S ++++ b/arch/powerpc/kernel/head_8xx.S +@@ -185,6 +185,12 @@ i##n: \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + ++#ifdef CONFIG_IPIPE ++#define EXC_XFER_IPIPE(n, hdlr) \ ++ EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ ++ __ipipe_ret_from_except) ++#endif /* CONFIG_IPIPE */ ++ + #define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) +@@ -236,7 +242,11 @@ InstructionAccess: + EXC_XFER_EE_LITE(0x400, handle_page_fault) + + /* External interrupt */ ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x500, HardwareInterrupt, __ipipe_grab_irq, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + /* Alignment exception */ + . = 0x600 +@@ -257,7 +267,11 @@ Alignment: + EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) + + /* Decrementer */ ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x900, Decrementer, __ipipe_grab_timer, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) +diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h +index 50504ae..01b3d31 100644 +--- a/arch/powerpc/kernel/head_booke.h ++++ b/arch/powerpc/kernel/head_booke.h +@@ -208,6 +208,12 @@ label: + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + ++#ifdef CONFIG_IPIPE ++#define EXC_XFER_IPIPE(n, hdlr) \ ++ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ++ __ipipe_ret_from_except) ++#endif /* CONFIG_IPIPE */ ++ + #define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) +@@ -372,6 +378,15 @@ label: + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_STD(0x0700, program_check_exception) + ++#ifdef CONFIG_IPIPE ++#define DECREMENTER_EXCEPTION \ ++ START_EXCEPTION(Decrementer) \ ++ NORMAL_EXCEPTION_PROLOG; \ ++ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ ++ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ ++ addi r3,r1,STACK_FRAME_OVERHEAD; \ ++ EXC_XFER_IPIPE(0x0900, __ipipe_grab_timer) ++#else /* !CONFIG_IPIPE */ + #define DECREMENTER_EXCEPTION \ + START_EXCEPTION(Decrementer) \ + NORMAL_EXCEPTION_PROLOG; \ +@@ -379,6 +394,7 @@ label: + mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_LITE(0x0900, timer_interrupt) ++#endif /* CONFIG_IPIPE */ + + #define FP_UNAVAILABLE_EXCEPTION \ + START_EXCEPTION(FloatingPointUnavailable) \ +diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S +index 975788c..b013bd3 100644 +--- a/arch/powerpc/kernel/head_fsl_booke.S ++++ b/arch/powerpc/kernel/head_fsl_booke.S +@@ -488,7 +488,11 @@ interrupt_base: + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ ++#ifdef CONFIG_IPIPE ++ EXCEPTION(0x0500, ExternalInput, __ipipe_grab_irq, EXC_XFER_IPIPE) ++#else /* !CONFIG_IPIPE */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) ++#endif /* CONFIG_IPIPE */ + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION +diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c +index 88d9c1d..e8f824a 100644 +--- a/arch/powerpc/kernel/idle.c ++++ b/arch/powerpc/kernel/idle.c +@@ -59,6 +59,7 @@ void cpu_idle(void) + tick_nohz_stop_sched_tick(1); + while (!need_resched() && !cpu_should_die()) { + ppc64_runlatch_off(); ++ ipipe_suspend_domain(); + + if (ppc_md.power_save) { + clear_thread_flag(TIF_POLLING_NRFLAG); +@@ -67,7 +68,7 @@ void cpu_idle(void) + * is ordered w.r.t. need_resched() test. + */ + smp_mb(); +- local_irq_disable(); ++ local_irq_disable_hw(); + + /* Don't trace irqs off for idle */ + stop_critical_timings(); +@@ -78,7 +79,7 @@ void cpu_idle(void) + + start_critical_timings(); + +- local_irq_enable(); ++ local_irq_enable_hw(); + set_thread_flag(TIF_POLLING_NRFLAG); + + } else { +diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S +index 5328709..8c3a2b7 100644 +--- a/arch/powerpc/kernel/idle_power4.S ++++ b/arch/powerpc/kernel/idle_power4.S +@@ -34,9 +34,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) + rldicl r0,r7,48,1 + rotldi r0,r0,16 + mtmsrd r0,1 /* hard-disable interrupts */ ++#ifdef CONFIG_SOFTDISABLE + li r0,1 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) ++#endif CONFIG_SOFTDISABLE + BEGIN_FTR_SECTION + DSSALL + sync +@@ -59,10 +61,12 @@ _GLOBAL(power4_cpu_offline_powersave) + rldicl r0,r7,48,1 + rotldi r0,r0,16 + mtmsrd r0,1 /* hard-disable interrupts */ ++#ifdef CONFIG_SOFTDISABLE + li r0,1 + li r6,0 + stb r0,PACAHARDIRQEN(r13) /* we'll hard-enable shortly */ + stb r6,PACASOFTIRQEN(r13) /* soft-disable irqs */ ++#endif + BEGIN_FTR_SECTION + DSSALL + sync +diff --git a/arch/powerpc/kernel/ipipe.c b/arch/powerpc/kernel/ipipe.c +new file mode 100644 +index 0000000..df92fce +--- /dev/null ++++ b/arch/powerpc/kernel/ipipe.c +@@ -0,0 +1,845 @@ ++/* -*- linux-c -*- ++ * linux/arch/powerpc/kernel/ipipe.c ++ * ++ * Copyright (C) 2005 Heikki Lindholm (PPC64 port). ++ * Copyright (C) 2004 Wolfgang Grandegger (Adeos/ppc port over 2.4). ++ * Copyright (C) 2002-2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Architecture-dependent I-PIPE core support for PowerPC 32/64bit. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_PPC_BOOK3E_64 ++#error "BOOK3E/64bit architecture not supported, yet" ++#endif ++ ++static void __ipipe_do_IRQ(unsigned irq, void *cookie); ++ ++static void __ipipe_do_timer(unsigned irq, void *cookie); ++ ++DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs); ++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH ++DEFINE_PER_CPU(struct mm_struct *, ipipe_active_mm); ++EXPORT_PER_CPU_SYMBOL(ipipe_active_mm); ++#endif ++ ++#define DECREMENTER_MAX 0x7fffffff ++ ++#ifdef CONFIG_SMP ++ ++static cpumask_t __ipipe_cpu_sync_map; ++ ++static cpumask_t __ipipe_cpu_lock_map; ++ ++static ipipe_spinlock_t __ipipe_cpu_barrier = IPIPE_SPIN_LOCK_UNLOCKED; ++ ++static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); ++ ++static void (*__ipipe_cpu_sync) (void); ++ ++static DEFINE_PER_CPU(struct ipipe_ipi_struct, ipipe_ipi_message); ++ ++unsigned int __ipipe_ipi_irq = NR_IRQS + 1; /* dummy value */ ++ ++#ifdef CONFIG_DEBUGGER ++cpumask_t __ipipe_dbrk_pending; /* pending debugger break IPIs */ ++#endif ++ ++/* Always called with hw interrupts off. */ ++ ++void __ipipe_do_critical_sync(unsigned irq, void *cookie) ++{ ++ cpu_set(ipipe_processor_id(), __ipipe_cpu_sync_map); ++ ++ /* ++ * Now we are in sync with the lock requestor running on another ++ * CPU. Enter a spinning wait until he releases the global ++ * lock. ++ */ ++ spin_lock(&__ipipe_cpu_barrier); ++ ++ /* Got it. Now get out. */ ++ ++ if (__ipipe_cpu_sync) ++ /* Call the sync routine if any. */ ++ __ipipe_cpu_sync(); ++ ++ spin_unlock(&__ipipe_cpu_barrier); ++ ++ cpu_clear(ipipe_processor_id(), __ipipe_cpu_sync_map); ++} ++ ++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) ++{ ++ ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = NULL; ++ ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync; ++ ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL; ++ /* Immediately handle in the current domain but *never* pass */ ++ ipd->irqs[IPIPE_CRITICAL_IPI].control = ++ IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK; ++} ++ ++void __ipipe_register_ipi(unsigned int irq) ++{ ++ __ipipe_ipi_irq = irq; ++ mb(); ++#ifndef CONFIG_DEBUGGER ++ irq_desc[irq].chip->startup(irq); ++#endif ++} ++ ++static void __ipipe_ipi_demux(int irq, struct pt_regs *regs) ++{ ++ int ipi, cpu = ipipe_processor_id(); ++ struct irq_desc *desc = irq_desc + irq; ++ ++ desc->ipipe_ack(irq, desc); ++ ++ kstat_incr_irqs_this_cpu(irq, desc); ++ ++ while (per_cpu(ipipe_ipi_message, cpu).value & IPIPE_MSG_IPI_MASK) { ++ for (ipi = IPIPE_MSG_CRITICAL_IPI; ipi <= IPIPE_MSG_SERVICE_IPI4; ++ipi) { ++ if (test_and_clear_bit(ipi, &per_cpu(ipipe_ipi_message, cpu).value)) { ++ mb(); ++ __ipipe_handle_irq(ipi + IPIPE_MSG_IPI_OFFSET, NULL); ++ } ++ } ++ } ++ ++#ifdef CONFIG_DEBUGGER ++ /* ++ * The debugger IPI handler should be NMI-safe, so let's call ++ * it immediately in case the IPI is pending. ++ */ ++ if (cpu_isset(cpu, __ipipe_dbrk_pending)) { ++ cpu_clear(cpu, __ipipe_dbrk_pending); ++ debugger_ipi(regs); ++ } ++#endif /* CONFIG_DEBUGGER */ ++ ++ __ipipe_end_irq(irq); ++} ++ ++cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask) ++{ ++ cpumask_t oldmask; ++ ++ if (irq_to_desc(irq)->chip->set_affinity == NULL) ++ return CPU_MASK_NONE; ++ ++ if (cpus_empty(cpumask)) ++ return CPU_MASK_NONE; /* Return mask value -- no change. */ ++ ++ cpus_and(cpumask, cpumask, cpu_online_map); ++ if (cpus_empty(cpumask)) ++ return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */ ++ ++ cpumask_copy(&oldmask, irq_to_desc(irq)->affinity); ++ irq_to_desc(irq)->chip->set_affinity(irq, &cpumask); ++ ++ return oldmask; ++} ++ ++int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask) ++{ ++ unsigned long flags; ++ cpumask_t testmask; ++ int cpu; ++ ++ local_irq_save_hw(flags); ++ ++ ipi -= IPIPE_MSG_IPI_OFFSET; ++ for_each_online_cpu(cpu) { ++ if (cpu_isset(cpu, cpumask)) ++ set_bit(ipi, &per_cpu(ipipe_ipi_message, cpu).value); ++ } ++ mb(); ++ ++ if (unlikely(cpus_empty(cpumask))) ++ goto out; ++ ++ cpus_setall(testmask); ++ cpu_clear(ipipe_processor_id(), testmask); ++ if (likely(cpus_equal(cpumask, testmask))) ++ smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_IPIPE_DEMUX); ++ else { ++ /* Long path. */ ++ for_each_cpu_mask_nr(cpu, cpumask) ++ smp_ops->message_pass(cpu, PPC_MSG_IPIPE_DEMUX); ++ } ++out: ++ local_irq_restore_hw(flags); ++ ++ return 0; ++} ++ ++void __ipipe_stall_root(void) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++ local_irq_restore_hw(flags); ++} ++ ++unsigned long __ipipe_test_and_stall_root(void) ++{ ++ unsigned long flags; ++ int x; ++ ++ local_irq_save_hw(flags); ++ x = test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++ local_irq_restore_hw(flags); ++ ++ return x; ++} ++ ++unsigned long __ipipe_test_root(void) ++{ ++ unsigned long flags; ++ int x; ++ ++ local_irq_save_hw(flags); ++ x = test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++ local_irq_restore_hw(flags); ++ ++ return x; ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/* ++ * ipipe_critical_enter() -- Grab the superlock excluding all CPUs ++ * but the current one from a critical section. This lock is used when ++ * we must enforce a global critical section for a single CPU in a ++ * possibly SMP system whichever context the CPUs are running. ++ */ ++unsigned long ipipe_critical_enter(void (*syncfn) (void)) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ ++#ifdef CONFIG_SMP ++ if (likely(num_online_cpus() > 1)) { ++ /* We might be running a SMP-kernel on a UP box... */ ++ int cpu = ipipe_processor_id(); ++ cpumask_t lock_map; ++ cpumask_t others; ++ ++ if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) { ++ while (cpu_test_and_set(BITS_PER_LONG - 1, __ipipe_cpu_lock_map)) { ++ int n = 0; ++ do { ++ cpu_relax(); ++ } while (++n < cpu); ++ } ++ ++ spin_lock(&__ipipe_cpu_barrier); ++ ++ __ipipe_cpu_sync = syncfn; ++ ++ /* Send the sync IPI to all processors but the current one. */ ++ cpus_setall(others); ++ cpu_clear(ipipe_processor_id(), others); ++ __ipipe_send_ipi(IPIPE_CRITICAL_IPI, others); ++ ++ cpus_andnot(lock_map, cpu_online_map, ++ __ipipe_cpu_lock_map); ++ ++ while (!cpus_equal(__ipipe_cpu_sync_map, lock_map)) ++ cpu_relax(); ++ } ++ ++ atomic_inc(&__ipipe_critical_count); ++ } ++#endif /* CONFIG_SMP */ ++ ++ return flags; ++} ++ ++/* ipipe_critical_exit() -- Release the superlock. */ ++ ++void ipipe_critical_exit(unsigned long flags) ++{ ++#ifdef CONFIG_SMP ++ if (likely(num_online_cpus() > 1)) { ++ /* We might be running a SMP-kernel on a UP box... */ ++ if (atomic_dec_and_test(&__ipipe_critical_count)) { ++ spin_unlock(&__ipipe_cpu_barrier); ++ ++ while (!cpus_empty(__ipipe_cpu_sync_map)) ++ cpu_relax(); ++ ++ cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map); ++ cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map); ++ } ++ } ++#endif /* CONFIG_SMP */ ++ ++ local_irq_restore_hw(flags); ++} ++ ++void __ipipe_init_platform(void) ++{ ++ unsigned int virq; ++ ++ /* ++ * Allocate a virtual IRQ for the decrementer trap early to ++ * get it mapped to IPIPE_VIRQ_BASE ++ */ ++ ++ virq = ipipe_alloc_virq(); ++ ++ if (virq != IPIPE_TIMER_VIRQ) ++ panic("I-pipe: cannot reserve timer virq #%d (got #%d)", ++ IPIPE_TIMER_VIRQ, virq); ++ ++#ifdef CONFIG_SMP ++ virq = ipipe_alloc_virq(); ++ if (virq != IPIPE_CRITICAL_IPI) ++ panic("I-pipe: cannot reserve critical IPI virq #%d (got #%d)", ++ IPIPE_CRITICAL_IPI, virq); ++ virq = ipipe_alloc_virq(); ++ if (virq != IPIPE_SERVICE_IPI0) ++ panic("I-pipe: cannot reserve service IPI 0 virq #%d (got #%d)", ++ IPIPE_SERVICE_IPI0, virq); ++ virq = ipipe_alloc_virq(); ++ if (virq != IPIPE_SERVICE_IPI1) ++ panic("I-pipe: cannot reserve service IPI 1 virq #%d (got #%d)", ++ IPIPE_SERVICE_IPI1, virq); ++ virq = ipipe_alloc_virq(); ++ if (virq != IPIPE_SERVICE_IPI2) ++ panic("I-pipe: cannot reserve service IPI 2 virq #%d (got #%d)", ++ IPIPE_SERVICE_IPI2, virq); ++ virq = ipipe_alloc_virq(); ++ if (virq != IPIPE_SERVICE_IPI3) ++ panic("I-pipe: cannot reserve service IPI 3 virq #%d (got #%d)", ++ IPIPE_SERVICE_IPI3, virq); ++ virq = ipipe_alloc_virq(); ++ if (virq != IPIPE_SERVICE_IPI4) ++ panic("I-pipe: cannot reserve service IPI 4 virq #%d (got #%d)", ++ IPIPE_SERVICE_IPI4, virq); ++#endif ++} ++ ++void __ipipe_end_irq(unsigned irq) ++{ ++ struct irq_desc *desc = get_irq_desc(irq); ++ desc->ipipe_end(irq, desc); ++} ++ ++void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq) ++{ ++ get_irq_desc(irq)->status &= ~IRQ_DISABLED; ++} ++ ++static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc) ++{ ++ desc->ipipe_ack(irq, desc); ++} ++ ++/* ++ * __ipipe_enable_pipeline() -- We are running on the boot CPU, hw ++ * interrupts are off, and secondary CPUs are still lost in space. ++ */ ++void __ipipe_enable_pipeline(void) ++{ ++ unsigned long flags; ++ unsigned irq; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ /* First, virtualize all interrupts from the root domain. */ ++ ++ for (irq = 0; irq < NR_IRQS; irq++) ++ ipipe_virtualize_irq(ipipe_root_domain, ++ irq, ++ &__ipipe_do_IRQ, NULL, ++ &__ipipe_ack_irq, ++ IPIPE_HANDLE_MASK | IPIPE_PASS_MASK); ++ /* ++ * We use a virtual IRQ to handle the timer irq (decrementer trap) ++ * which has been allocated early in __ipipe_init_platform(). ++ */ ++ ++ ipipe_virtualize_irq(ipipe_root_domain, ++ IPIPE_TIMER_VIRQ, ++ &__ipipe_do_timer, NULL, ++ NULL, IPIPE_HANDLE_MASK | IPIPE_PASS_MASK); ++ ++ ipipe_critical_exit(flags); ++} ++ ++int ipipe_get_sysinfo(struct ipipe_sysinfo *info) ++{ ++ info->ncpus = num_online_cpus(); ++ info->cpufreq = ipipe_cpu_freq(); ++ info->archdep.tmirq = IPIPE_TIMER_VIRQ; ++ info->archdep.tmfreq = info->cpufreq; ++ ++ return 0; ++} ++ ++/* ++ * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline ++ * just like if it has been actually received from a hw source. Also ++ * works for virtual interrupts. ++ */ ++int ipipe_trigger_irq(unsigned irq) ++{ ++ unsigned long flags; ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ if (irq >= IPIPE_NR_IRQS || ++ (ipipe_virtual_irq_p(irq) ++ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) ++ return -EINVAL; ++#endif ++ local_irq_save_hw(flags); ++ __ipipe_handle_irq(irq, NULL); ++ local_irq_restore_hw(flags); ++ ++ return 1; ++} ++ ++/* ++ * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic ++ * interrupt protection log is maintained here for each domain. Hw ++ * interrupts are off on entry. ++ */ ++void __ipipe_handle_irq(int irq, struct pt_regs *regs) ++{ ++ struct ipipe_domain *this_domain, *next_domain; ++ struct list_head *head, *pos; ++ int m_ack; ++ ++ /* Software-triggered IRQs do not need any ack. */ ++ m_ack = (regs == NULL); ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ if (unlikely(irq >= IPIPE_NR_IRQS)) { ++ printk(KERN_ERR "I-pipe: spurious interrupt %d\n", irq); ++ return; ++ } ++#endif ++ this_domain = __ipipe_current_domain; ++ ++ if (unlikely(test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))) ++ head = &this_domain->p_link; ++ else { ++ head = __ipipe_pipeline.next; ++ next_domain = list_entry(head, struct ipipe_domain, p_link); ++ if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) { ++ if (!m_ack && next_domain->irqs[irq].acknowledge) ++ next_domain->irqs[irq].acknowledge(irq, irq_desc + irq); ++ __ipipe_dispatch_wired(next_domain, irq); ++ return; ++ } ++ } ++ ++ /* Ack the interrupt. */ ++ ++ pos = head; ++ ++ while (pos != &__ipipe_pipeline) { ++ next_domain = list_entry(pos, struct ipipe_domain, p_link); ++ prefetch(next_domain); ++ /* ++ * For each domain handling the incoming IRQ, mark it as ++ * pending in its log. ++ */ ++ if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) { ++ /* ++ * Domains that handle this IRQ are polled for ++ * acknowledging it by decreasing priority order. The ++ * interrupt must be made pending _first_ in the ++ * domain's status flags before the PIC is unlocked. ++ */ ++ __ipipe_set_irq_pending(next_domain, irq); ++ ++ if (!m_ack && next_domain->irqs[irq].acknowledge) { ++ next_domain->irqs[irq].acknowledge(irq, irq_desc + irq); ++ m_ack = 1; ++ } ++ } ++ ++ /* ++ * If the domain does not want the IRQ to be passed down the ++ * interrupt pipe, exit the loop now. ++ */ ++ if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) ++ break; ++ ++ pos = next_domain->p_link.next; ++ } ++ ++ /* ++ * If the interrupt preempted the head domain, then do not ++ * even try to walk the pipeline, unless an interrupt is ++ * pending for it. ++ */ ++ if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) && ++ ipipe_head_cpudom_var(irqpend_himask) == 0) ++ return; ++ ++ /* ++ * Now walk the pipeline, yielding control to the highest ++ * priority domain that has pending interrupt(s) or ++ * immediately to the current domain if the interrupt has been ++ * marked as 'sticky'. This search does not go beyond the ++ * current domain in the pipeline. ++ */ ++ ++ __ipipe_walk_pipeline(head); ++} ++ ++asmlinkage int __ipipe_grab_irq(struct pt_regs *regs) ++{ ++ extern int ppc_spurious_interrupts; ++ int irq; ++ ++ irq = ppc_md.get_irq(); ++ if (unlikely(irq == NO_IRQ)) { ++ ppc_spurious_interrupts++; ++ goto root_checks; ++ } ++ ++ if (likely(irq != NO_IRQ_IGNORE)) { ++ ipipe_trace_irq_entry(irq); ++#ifdef CONFIG_SMP ++ /* Check for cascaded I-pipe IPIs */ ++ if (irq == __ipipe_ipi_irq) { ++ __ipipe_ipi_demux(irq, regs); ++ ipipe_trace_irq_exit(irq); ++ goto root_checks; ++ } ++#endif /* CONFIG_SMP */ ++ __ipipe_handle_irq(irq, regs); ++ ipipe_trace_irq_exit(irq); ++ } ++ ++root_checks: ++ ++ if (__ipipe_root_domain_p) { ++#ifdef CONFIG_PPC_970_NAP ++ struct thread_info *ti = current_thread_info(); ++ /* Emulate the napping check when 100% sure we do run ++ * over the root context. */ ++ if (test_and_clear_bit(TLF_NAPPING, &ti->local_flags)) ++ regs->nip = regs->link; ++#endif ++#ifdef CONFIG_PPC64 ++ ppc64_runlatch_on(); ++#endif ++ if (!test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static void __ipipe_do_IRQ(unsigned irq, void *cookie) ++{ ++ struct pt_regs *old_regs; ++ ++ /* Provide a valid register frame, even if not the exact one. */ ++ old_regs = set_irq_regs(&__raw_get_cpu_var(__ipipe_tick_regs)); ++ irq_enter(); ++ check_stack_overflow(); ++ handle_one_irq(irq); ++ irq_exit(); ++ set_irq_regs(old_regs); ++} ++ ++static void __ipipe_do_timer(unsigned irq, void *cookie) ++{ ++ check_stack_overflow(); ++ timer_interrupt(&__raw_get_cpu_var(__ipipe_tick_regs)); ++} ++ ++asmlinkage int __ipipe_grab_timer(struct pt_regs *regs) ++{ ++ struct ipipe_domain *ipd, *head; ++ ++ ipd = __ipipe_current_domain; ++ head = __ipipe_pipeline_head(); ++ ++ set_dec(DECREMENTER_MAX); ++ ++ ipipe_trace_irq_entry(IPIPE_TIMER_VIRQ); ++ ++ __raw_get_cpu_var(__ipipe_tick_regs).msr = regs->msr; /* for timer_interrupt() */ ++ __raw_get_cpu_var(__ipipe_tick_regs).nip = regs->nip; ++ ++ if (ipd != &ipipe_root) ++ __raw_get_cpu_var(__ipipe_tick_regs).msr &= ~MSR_EE; ++ else if (unlikely(list_empty(&__ipipe_pipeline))) ++ head = ipd; ++ ++ if (test_bit(IPIPE_WIRED_FLAG, &head->irqs[IPIPE_TIMER_VIRQ].control)) ++ /* ++ * Finding a wired IRQ means that we do have a ++ * registered head domain as well. The decrementer ++ * interrupt requires no acknowledge, so we may branch ++ * to the wired IRQ dispatcher directly. Additionally, ++ * we may bypass checks for locked interrupts or ++ * stalled stage (the decrementer cannot be locked and ++ * the head domain is obviously not stalled since we ++ * got there). ++ */ ++ __ipipe_dispatch_wired_nocheck(head, IPIPE_TIMER_VIRQ); ++ else ++ __ipipe_handle_irq(IPIPE_TIMER_VIRQ, NULL); ++ ++ ipipe_trace_irq_exit(IPIPE_TIMER_VIRQ); ++ ++ if (ipd == &ipipe_root) { ++#ifdef CONFIG_PPC_970_NAP ++ struct thread_info *ti = current_thread_info(); ++ /* Emulate the napping check when 100% sure we do run ++ * over the root context. */ ++ if (test_and_clear_bit(TLF_NAPPING, &ti->local_flags)) ++ regs->nip = regs->link; ++#endif ++#ifdef CONFIG_PPC64 ++ ppc64_runlatch_on(); ++#endif ++ if (!test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++asmlinkage notrace int __ipipe_check_root(void) /* hw IRQs off */ ++{ ++ return __ipipe_root_domain_p; ++} ++ ++#ifdef CONFIG_PPC64 ++ ++#include ++#include ++ ++asmlinkage notrace void __ipipe_restore_if_root(unsigned long x) /* hw IRQs on */ ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ ++ if (likely(!__ipipe_root_domain_p)) ++ goto done; ++ ++ p = ipipe_root_cpudom_ptr(); ++ ++ if (x) ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ else ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if ((int)mfspr(SPRN_DEC) < 0) ++ mtspr(SPRN_DEC, 1); ++ ++ /* ++ * Force the delivery of pending soft-disabled interrupts on ++ * PS3. Any HV call will have this side effect. ++ */ ++ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { ++ u64 tmp; ++ lv1_get_version_info(&tmp); ++ } ++done: ++ local_irq_restore_hw(flags); ++} ++ ++#endif /* CONFIG_PPC64 */ ++ ++#ifdef CONFIG_PREEMPT ++ ++asmlinkage void __sched preempt_schedule_irq(void); ++ ++void __sched __ipipe_preempt_schedule_irq(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long flags; ++ /* ++ * We have no IRQ state fixup on entry to exceptions, so we ++ * have to stall the root stage before rescheduling. ++ */ ++#ifdef CONFIG_IPIPE_DEBUG ++ BUG_ON(!irqs_disabled_hw()); ++#endif ++ local_irq_save(flags); ++ local_irq_enable_hw(); ++ preempt_schedule_irq(); /* Ok, may reschedule now. */ ++ local_irq_disable_hw(); ++ /* ++ * Flush any pending interrupt that may have been logged after ++ * preempt_schedule_irq() stalled the root stage before ++ * returning to us, and now. ++ */ ++ p = ipipe_root_cpudom_ptr(); ++ if (unlikely(p->irqpend_himask != 0)) { ++ add_preempt_count(PREEMPT_ACTIVE); ++ clear_bit(IPIPE_STALL_FLAG, &p->status); ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ sub_preempt_count(PREEMPT_ACTIVE); ++ } ++ ++ __local_irq_restore_nosync(flags); ++} ++ ++#endif /* CONFIG_PREEMPT */ ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++ ++asmlinkage notrace void __ipipe_trace_irqsoff(void) ++{ ++ ipipe_trace_irqsoff(); ++} ++ ++asmlinkage notrace void __ipipe_trace_irqson(void) ++{ ++ ipipe_trace_irqson(); ++} ++ ++asmlinkage notrace void __ipipe_trace_irqsx(unsigned long msr_ee) ++{ ++ if (msr_ee) ++ ipipe_trace_irqson(); ++ else ++ ipipe_trace_irqsoff(); ++} ++ ++#endif ++ ++asmlinkage int __ipipe_syscall_root(struct pt_regs *regs) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int ret; ++ ++#ifdef CONFIG_PPC64 ++ WARN_ON_ONCE(!irqs_disabled_hw()); ++ /* ++ * Unlike ppc32, hw interrupts are off on entry here. We did ++ * not copy the stall state on entry yet, so do it now. ++ */ ++ p = ipipe_root_cpudom_ptr(); ++ regs->softe = !test_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ /* We ran DISABLE_INTS before being sent to the syscall ++ * dispatcher, so we need to unstall the root stage, unless ++ * the root domain is not current. */ ++ if (__ipipe_root_domain_p) ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++#else ++ WARN_ON_ONCE(irqs_disabled_hw()); ++#endif ++ /* ++ * This routine either returns: ++ * 0 -- if the syscall is to be passed to Linux; ++ * >0 -- if the syscall should not be passed to Linux, and no ++ * tail work should be performed; ++ * <0 -- if the syscall should not be passed to Linux but the ++ * tail work has to be performed (for handling signals etc). ++ */ ++ ++ if (!__ipipe_syscall_watched_p(current, regs->gpr[0]) || ++ !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL)) ++ return 0; ++ ++#ifdef CONFIG_PPC64 ++ local_irq_enable_hw(); ++#endif ++ ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs); ++ ++ local_irq_disable_hw(); ++ ++ if (!__ipipe_root_domain_p) { ++#ifdef CONFIG_PPC32 ++ local_irq_enable_hw(); ++#endif ++ return 1; ++ } ++ ++ p = ipipe_root_cpudom_ptr(); ++ if ((p->irqpend_himask & IPIPE_IRQMASK_VIRT) != 0) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT); ++ ++#ifdef CONFIG_PPC32 ++ local_irq_enable_hw(); ++#endif ++ ++ return -ret; ++} ++ ++void __ipipe_pin_range_globally(unsigned long start, unsigned long end) ++{ ++ /* We don't support this. */ ++} ++ ++#ifdef CONFIG_SMP ++EXPORT_SYMBOL(__ipipe_stall_root); ++EXPORT_SYMBOL(__ipipe_test_root); ++EXPORT_SYMBOL(__ipipe_test_and_stall_root); ++#else ++EXPORT_SYMBOL_GPL(last_task_used_math); ++#endif ++ ++EXPORT_SYMBOL_GPL(__switch_to); ++EXPORT_SYMBOL_GPL(show_stack); ++EXPORT_SYMBOL_GPL(_switch); ++EXPORT_SYMBOL_GPL(tasklist_lock); ++#ifdef CONFIG_PPC64 ++EXPORT_PER_CPU_SYMBOL(ppc64_tlb_batch); ++EXPORT_SYMBOL_GPL(switch_slb); ++EXPORT_SYMBOL_GPL(switch_stab); ++EXPORT_SYMBOL_GPL(__flush_tlb_pending); ++EXPORT_SYMBOL_GPL(mmu_linear_psize); ++EXPORT_SYMBOL_GPL(mmu_psize_defs); ++#else /* !CONFIG_PPC64 */ ++void atomic_set_mask(unsigned long mask, unsigned long *ptr); ++void atomic_clear_mask(unsigned long mask, unsigned long *ptr); ++#ifdef FEW_CONTEXTS ++EXPORT_SYMBOL_GPL(nr_free_contexts); ++EXPORT_SYMBOL_GPL(context_mm); ++EXPORT_SYMBOL_GPL(steal_context); ++#endif /* !FEW_CONTEXTS */ ++EXPORT_SYMBOL_GPL(atomic_set_mask); ++EXPORT_SYMBOL_GPL(atomic_clear_mask); ++#endif /* !CONFIG_PPC64 */ +diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c +index e5d1211..9ab7ff4 100644 +--- a/arch/powerpc/kernel/irq.c ++++ b/arch/powerpc/kernel/irq.c +@@ -72,7 +72,7 @@ + #endif + + int __irq_offset_value; +-static int ppc_spurious_interrupts; ++int ppc_spurious_interrupts; + + #ifdef CONFIG_PPC32 + EXPORT_SYMBOL(__irq_offset_value); +@@ -89,6 +89,8 @@ EXPORT_SYMBOL(irq_desc); + + int distribute_irqs = 1; + ++#ifdef CONFIG_SOFTDISABLE ++ + static inline notrace unsigned long get_hard_enabled(void) + { + unsigned long enabled; +@@ -173,6 +175,9 @@ notrace void raw_local_irq_restore(unsigned long en) + __hard_irq_enable(); + } + EXPORT_SYMBOL(raw_local_irq_restore); ++ ++#endif /* !CONFIG_SOFTDISABLE */ ++ + #endif /* CONFIG_PPC64 */ + + int show_interrupts(struct seq_file *p, void *v) +@@ -257,7 +262,7 @@ void fixup_irqs(cpumask_t map) + #endif + + #ifdef CONFIG_IRQSTACKS +-static inline void handle_one_irq(unsigned int irq) ++static inline void __handle_one_irq(unsigned int irq) + { + struct thread_info *curtp, *irqtp; + unsigned long saved_sp_limit; +@@ -298,13 +303,13 @@ static inline void handle_one_irq(unsigned int irq) + set_bits(irqtp->flags, &curtp->flags); + } + #else +-static inline void handle_one_irq(unsigned int irq) ++static inline void __handle_one_irq(unsigned int irq) + { + generic_handle_irq(irq); + } + #endif + +-static inline void check_stack_overflow(void) ++static inline void __check_stack_overflow(void) + { + #ifdef CONFIG_DEBUG_STACKOVERFLOW + long sp; +@@ -320,6 +325,16 @@ static inline void check_stack_overflow(void) + #endif + } + ++void handle_one_irq(unsigned int irq) ++{ ++ __handle_one_irq(irq); ++} ++ ++void check_stack_overflow(void) ++{ ++ __check_stack_overflow(); ++} ++ + void do_IRQ(struct pt_regs *regs) + { + struct pt_regs *old_regs = set_irq_regs(regs); +diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c +index 69956f7..e1c9db2 100644 +--- a/arch/powerpc/kernel/ppc_ksyms.c ++++ b/arch/powerpc/kernel/ppc_ksyms.c +@@ -123,6 +123,9 @@ EXPORT_SYMBOL(flush_dcache_range); + #ifdef CONFIG_SMP + #ifdef CONFIG_PPC32 + EXPORT_SYMBOL(smp_hw_index); ++#ifdef CONFIG_IPIPE ++EXPORT_SYMBOL(smp_logical_index); ++#endif + #endif + #endif + +diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c +index 0f945dc..9f6b06d 100644 +--- a/arch/powerpc/kernel/process.c ++++ b/arch/powerpc/kernel/process.c +@@ -98,8 +98,12 @@ void flush_fp_to_thread(struct task_struct *tsk) + + void enable_kernel_fp(void) + { ++ unsigned long flags; ++ + WARN_ON(preemptible()); + ++ local_irq_save_hw_cond(flags); ++ + #ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); +@@ -108,6 +112,7 @@ void enable_kernel_fp(void) + #else + giveup_fpu(last_task_used_math); + #endif /* CONFIG_SMP */ ++ local_irq_restore_hw_cond(flags); + } + EXPORT_SYMBOL(enable_kernel_fp); + +@@ -402,7 +407,7 @@ struct task_struct *__switch_to(struct task_struct *prev, + } + #endif + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + + account_system_vtime(current); + account_process_vtime(current); +@@ -421,7 +426,7 @@ struct task_struct *__switch_to(struct task_struct *prev, + vsid = get_vsid(current->mm->context.id, 0, ssize); + + /* current is still really us, just a different us :-) */ +- if (current->mm) { ++ if (__ipipe_root_domain_p && current->mm) { + #ifdef CONFIG_PPC_64K_PAGES + __hash_page_64K(0, _PAGE_USER|_PAGE_RW, vsid, ¤t->zero_pte.pte, 0x300, 1, ssize); + #else +@@ -430,7 +435,7 @@ struct task_struct *__switch_to(struct task_struct *prev, + } + #endif + +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + return last; + } +@@ -1102,7 +1107,7 @@ void dump_stack(void) + } + EXPORT_SYMBOL(dump_stack); + +-#ifdef CONFIG_PPC64 ++#ifdef CONFIG_RUNLATCH + void ppc64_runlatch_on(void) + { + unsigned long ctrl; +diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh +index 1ac136b..78d5d7c 100644 +--- a/arch/powerpc/kernel/prom_init_check.sh ++++ b/arch/powerpc/kernel/prom_init_check.sh +@@ -20,7 +20,7 @@ WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush + _end enter_prom memcpy memset reloc_offset __secondary_hold + __secondary_hold_acknowledge __secondary_hold_spinloop __start + strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 +-reloc_got2 kernstart_addr memstart_addr linux_banner" ++reloc_got2 kernstart_addr memstart_addr linux_banner _mcount" + + NM="$1" + OBJ="$2" +diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c +index 53bcf3d..de918cb 100644 +--- a/arch/powerpc/kernel/setup_32.c ++++ b/arch/powerpc/kernel/setup_32.c +@@ -52,6 +52,9 @@ EXPORT_SYMBOL_GPL(boot_cpuid); + int boot_cpuid_phys; + + int smp_hw_index[NR_CPUS]; ++#ifdef CONFIG_IPIPE ++int smp_logical_index[NR_CPUS]; ++#endif + + unsigned long ISA_DMA_THRESHOLD; + unsigned int DMA_MODE_READ; +diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c +index 04f638d..f4fe166 100644 +--- a/arch/powerpc/kernel/setup_64.c ++++ b/arch/powerpc/kernel/setup_64.c +@@ -224,8 +224,10 @@ void __init early_setup(unsigned long dt_ptr) + #ifdef CONFIG_SMP + void early_setup_secondary(void) + { ++#ifdef CONFIG_SOFTDISABLE + /* Mark interrupts enabled in PACA */ + get_paca()->soft_enabled = 0; ++#endif + + /* Initialize the hash table or TLB handling */ + early_init_mmu_secondary(); +@@ -331,6 +333,12 @@ static void __init initialize_cache_info(void) + */ + void __init setup_system(void) + { ++#ifdef CONFIG_IPIPE ++ /* Early temporary init, before per-cpu areas are moved to ++ * their final location. */ ++ get_paca()->root_percpu = (u64)&ipipe_percpudom(&ipipe_root, status, 0); ++#endif ++ + DBG(" -> setup_system()\n"); + + /* Apply the CPUs-specific and firmware specific fixups to kernel +@@ -640,6 +648,10 @@ void __init setup_per_cpu_areas(void) + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; ++#ifdef CONFIG_IPIPE ++ /* Reset pointer to the relocated per-cpu root domain data. */ ++ get_paca()->root_percpu = (u64)&ipipe_percpudom(&ipipe_root, status, 0); ++#endif + } + #endif + +diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c +index 9b86a74..2f94924 100644 +--- a/arch/powerpc/kernel/smp.c ++++ b/arch/powerpc/kernel/smp.c +@@ -154,7 +154,7 @@ const char *smp_ipi_name[] = { + [PPC_MSG_CALL_FUNCTION] = "ipi call function", + [PPC_MSG_RESCHEDULE] = "ipi reschedule", + [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single", +- [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", ++ [PPC_MSG_DEBUGGER_BREAK] = "ipi I-pipe/debugger", + }; + + /* optional function to request ipi, for controllers with >= 4 ipis */ +@@ -165,11 +165,17 @@ int smp_request_message_ipi(int virq, int msg) + if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { + return -EINVAL; + } ++#ifdef CONFIG_IPIPE ++ if (msg == PPC_MSG_DEBUGGER_BREAK) ++ /* Piggyback the debugger IPI for the I-pipe. */ ++ __ipipe_register_ipi(virq); ++#endif + #if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC) + if (msg == PPC_MSG_DEBUGGER_BREAK) { + return 1; + } + #endif ++ + err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU, + smp_ipi_name[msg], 0); + WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", +@@ -200,8 +206,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) + #ifdef CONFIG_DEBUGGER + void smp_send_debugger_break(int cpu) + { +- if (likely(smp_ops)) ++ if (likely(smp_ops)) { ++#ifdef CONFIG_IPIPE ++ cpu_set(cpu, __ipipe_dbrk_pending); ++#endif + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); ++ } + } + #endif + +@@ -210,6 +220,10 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) + { + crash_ipi_function_ptr = crash_ipi_callback; + if (crash_ipi_callback && smp_ops) { ++#ifdef CONFIG_IPIPE ++ cpus_setall(__ipipe_dbrk_pending); ++ cpu_clear(ipipe_processor_id(), __ipipe_dbrk_pending); ++#endif + mb(); + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + } +@@ -488,6 +502,9 @@ int __devinit start_secondary(void *unused) + struct device_node *l2_cache; + int i, base; + ++#if defined(CONFIG_IPIPE) && defined(CONFIG_PPC64) ++ get_paca()->root_percpu = (u64)&ipipe_percpudom(&ipipe_root, status, cpu); ++#endif + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + +diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c +index 5566e2d..6dab166 100644 +--- a/arch/powerpc/kernel/time.c ++++ b/arch/powerpc/kernel/time.c +@@ -126,6 +126,8 @@ struct decrementer_clock { + + static DEFINE_PER_CPU(struct decrementer_clock, decrementers); + ++DEFINE_PER_CPU(int, disarm_decr); ++ + #ifdef CONFIG_PPC_ISERIES + static unsigned long __initdata iSeries_recal_titan; + static signed long __initdata iSeries_recal_tb; +@@ -569,11 +571,13 @@ void timer_interrupt(struct pt_regs * regs) + struct pt_regs *old_regs; + struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); + struct clock_event_device *evt = &decrementer->event; ++ int cpu = smp_processor_id(); + u64 now; + + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continuue to take decrementer exceptions */ +- set_dec(DECREMENTER_MAX); ++ if (!per_cpu(disarm_decr, cpu)) ++ set_dec(DECREMENTER_MAX); + + #ifdef CONFIG_PPC_PASEMI_A2_WORKAROUNDS + extern spinlock_t native_tlbie_lock; +@@ -592,16 +596,25 @@ void timer_interrupt(struct pt_regs * regs) + do_IRQ(regs); + #endif + +- now = get_tb_or_rtc(); +- if (now < decrementer->next_tb) { +- /* not time for this event yet */ +- now = decrementer->next_tb - now; +- if (now <= DECREMENTER_MAX) +- set_dec((int)now); +- return; ++ if (!per_cpu(disarm_decr, cpu)) { ++ now = get_tb_or_rtc(); ++ if (now < decrementer->next_tb) { ++ /* not time for this event yet */ ++ now = decrementer->next_tb - now; ++ if (now <= DECREMENTER_MAX) ++ set_dec((int)now); ++ return; ++ } + } + old_regs = set_irq_regs(regs); ++#ifndef CONFIG_IPIPE ++ /* ++ * The timer interrupt is a virtual one when the I-pipe is ++ * active, therefore we already called irq_enter() for it (see ++ * __ipipe_run_isr). ++ */ + irq_enter(); ++#endif + + calculate_steal_time(); + +@@ -626,7 +639,9 @@ void timer_interrupt(struct pt_regs * regs) + } + #endif + ++#ifndef CONFIG_IPIPE + irq_exit(); ++#endif + set_irq_regs(old_regs); + } + +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index 6f0ae1a..c08ee50 100644 +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -491,6 +491,9 @@ void machine_check_exception(struct pt_regs *regs) + { + int recover = 0; + ++ if (ipipe_trap_notify(IPIPE_TRAP_MCE, regs)) ++ return; ++ + /* See if any machine dependent calls. In theory, we would want + * to call the CPU first, and call the ppc_md. one if the CPU + * one returns a positive number. However there is existing code +@@ -549,11 +552,17 @@ void unknown_exception(struct pt_regs *regs) + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + ++ if (ipipe_trap_notify(IPIPE_TRAP_UNKNOWN, regs)) ++ return; ++ + _exception(SIGTRAP, regs, 0, 0); + } + + void instruction_breakpoint_exception(struct pt_regs *regs) + { ++ if (ipipe_trap_notify(IPIPE_TRAP_IABR, regs)) ++ return; ++ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; +@@ -564,6 +573,8 @@ void instruction_breakpoint_exception(struct pt_regs *regs) + + void RunModeException(struct pt_regs *regs) + { ++ if (ipipe_trap_notify(IPIPE_TRAP_RM, regs)) ++ return; + _exception(SIGTRAP, regs, 0, 0); + } + +@@ -571,6 +582,9 @@ void __kprobes single_step_exception(struct pt_regs *regs) + { + regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + ++ if (ipipe_trap_notify(IPIPE_TRAP_SSTEP, regs)) ++ return; ++ + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; +@@ -590,6 +604,8 @@ static void emulate_single_step(struct pt_regs *regs) + { + if (single_stepping(regs)) { + clear_single_step(regs); ++ if (ipipe_trap_notify(IPIPE_TRAP_SSTEP, regs)) ++ return; + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } + } +@@ -816,6 +832,9 @@ void __kprobes program_check_exception(struct pt_regs *regs) + /* We can now get here via a FP Unavailable exception if the core + * has no FPU, in that case the reason flags will be 0 */ + ++ if (ipipe_trap_notify(IPIPE_TRAP_PCE, regs)) ++ return; ++ + if (reason & REASON_FP) { + /* IEEE FP exception */ + parse_fpe(regs); +@@ -888,6 +907,9 @@ void alignment_exception(struct pt_regs *regs) + { + int sig, code, fixed = 0; + ++ if (ipipe_trap_notify(IPIPE_TRAP_ALIGNMENT, regs)) ++ return; ++ + /* we don't implement logging of alignment exceptions */ + if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + fixed = fix_alignment(regs); +@@ -925,6 +947,8 @@ void nonrecoverable_exception(struct pt_regs *regs) + { + printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", + regs->nip, regs->msr); ++ if (ipipe_trap_notify(IPIPE_TRAP_NREC, regs)) ++ return; + debugger(regs); + die("nonrecoverable exception", regs, SIGKILL); + } +@@ -940,11 +964,16 @@ void kernel_fp_unavailable_exception(struct pt_regs *regs) + { + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); ++ if (ipipe_trap_notify(IPIPE_TRAP_KFPUNAVAIL, regs)) ++ return; + die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); + } + + void altivec_unavailable_exception(struct pt_regs *regs) + { ++ if (ipipe_trap_notify(IPIPE_TRAP_ALTUNAVAIL, regs)) ++ return; ++ + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ +@@ -985,6 +1014,9 @@ void SoftwareEmulation(struct pt_regs *regs) + int errcode; + #endif + ++ if (ipipe_trap_notify(IPIPE_TRAP_SOFTEMU, regs)) ++ return; ++ + CHECK_FULL_REGS(regs); + + if (!user_mode(regs)) { +@@ -1041,6 +1073,9 @@ void SoftwareEmulation(struct pt_regs *regs) + + void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) + { ++ if (ipipe_trap_notify(IPIPE_TRAP_DEBUG, regs)) ++ return; ++ + /* Hack alert: On BookE, Branch Taken stops on the branch itself, while + * on server, it stops on the target of the branch. In order to simulate + * the server behaviour, we thus restart right away with a single step +@@ -1121,6 +1156,9 @@ void altivec_assist_exception(struct pt_regs *regs) + { + int err; + ++ if (ipipe_trap_notify(IPIPE_TRAP_ALTASSIST, regs)) ++ return; ++ + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); +@@ -1192,8 +1230,11 @@ void CacheLockingException(struct pt_regs *regs, unsigned long address, + * as priv ops, in the future we could try to do + * something smarter + */ +- if (error_code & (ESR_DLK|ESR_ILK)) ++ if (error_code & (ESR_DLK|ESR_ILK)) { ++ if (ipipe_trap_notify(IPIPE_TRAP_CACHE, regs)) ++ return; + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); ++ } + return; + } + #endif /* CONFIG_FSL_BOOKE */ +@@ -1207,6 +1248,9 @@ void SPEFloatingPointException(struct pt_regs *regs) + int code = 0; + int err; + ++ if (ipipe_trap_notify(IPIPE_TRAP_SPE, regs)) ++ return; ++ + preempt_disable(); + if (regs->msr & MSR_SPE) + giveup_spe(current); +@@ -1292,6 +1336,8 @@ void unrecoverable_exception(struct pt_regs *regs) + { + printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", + regs->trap, regs->nip); ++ if (ipipe_trap_notify(IPIPE_TRAP_NREC, regs)) ++ return; + die("Unrecoverable exception", regs, SIGABRT); + } + +diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c +index 7c975d4..0c163f2 100644 +--- a/arch/powerpc/lib/code-patching.c ++++ b/arch/powerpc/lib/code-patching.c +@@ -15,17 +15,20 @@ + #include + + ++notrace + void patch_instruction(unsigned int *addr, unsigned int instr) + { + *addr = instr; + asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr)); + } + ++notrace + void patch_branch(unsigned int *addr, unsigned long target, int flags) + { + patch_instruction(addr, create_branch(addr, target, flags)); + } + ++notrace + unsigned int create_branch(const unsigned int *addr, + unsigned long target, int flags) + { +@@ -46,6 +49,7 @@ unsigned int create_branch(const unsigned int *addr, + return instruction; + } + ++notrace + unsigned int create_cond_branch(const unsigned int *addr, + unsigned long target, int flags) + { +@@ -66,21 +70,25 @@ unsigned int create_cond_branch(const unsigned int *addr, + return instruction; + } + ++notrace + static unsigned int branch_opcode(unsigned int instr) + { + return (instr >> 26) & 0x3F; + } + ++notrace + static int instr_is_branch_iform(unsigned int instr) + { + return branch_opcode(instr) == 18; + } + ++notrace + static int instr_is_branch_bform(unsigned int instr) + { + return branch_opcode(instr) == 16; + } + ++notrace + int instr_is_relative_branch(unsigned int instr) + { + if (instr & BRANCH_ABSOLUTE) +@@ -89,6 +97,7 @@ int instr_is_relative_branch(unsigned int instr) + return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); + } + ++notrace + static unsigned long branch_iform_target(const unsigned int *instr) + { + signed long imm; +@@ -105,6 +114,7 @@ static unsigned long branch_iform_target(const unsigned int *instr) + return (unsigned long)imm; + } + ++notrace + static unsigned long branch_bform_target(const unsigned int *instr) + { + signed long imm; +@@ -121,6 +131,7 @@ static unsigned long branch_bform_target(const unsigned int *instr) + return (unsigned long)imm; + } + ++notrace + unsigned long branch_target(const unsigned int *instr) + { + if (instr_is_branch_iform(*instr)) +@@ -131,6 +142,7 @@ unsigned long branch_target(const unsigned int *instr) + return 0; + } + ++notrace + int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) + { + if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr)) +@@ -139,6 +151,7 @@ int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr) + return 0; + } + ++notrace + unsigned int translate_branch(const unsigned int *dest, const unsigned int *src) + { + unsigned long target; +diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c +index 7e8865b..c9e779a 100644 +--- a/arch/powerpc/lib/feature-fixups.c ++++ b/arch/powerpc/lib/feature-fixups.c +@@ -28,6 +28,7 @@ struct fixup_entry { + long alt_end_off; + }; + ++notrace + static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) + { + /* +@@ -38,6 +39,7 @@ static unsigned int *calc_addr(struct fixup_entry *fcur, long offset) + return (unsigned int *)((unsigned long)fcur + offset); + } + ++notrace + static int patch_alt_instruction(unsigned int *src, unsigned int *dest, + unsigned int *alt_start, unsigned int *alt_end) + { +@@ -61,6 +63,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, + return 0; + } + ++notrace + static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) + { + unsigned int *start, *end, *alt_start, *alt_end, *src, *dest; +@@ -90,6 +93,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) + return 0; + } + ++notrace + void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) + { + struct fixup_entry *fcur, *fend; +@@ -110,6 +114,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) + } + } + ++notrace + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) + { + unsigned int *start, *end, *dest; +diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c +index e7dae82..0df7ca5 100644 +--- a/arch/powerpc/mm/fault.c ++++ b/arch/powerpc/mm/fault.c +@@ -119,13 +119,18 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) + { + struct vm_area_struct * vma; +- struct mm_struct *mm = current->mm; ++ struct mm_struct *mm; + siginfo_t info; + int code = SEGV_MAPERR; + int is_write = 0, ret; + int trap = TRAP(regs); + int is_exec = trap == 0x400; + ++ if (ipipe_trap_notify(IPIPE_TRAP_ACCESS,regs)) ++ return 0; ++ ++ mm = current->mm; ++ + #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + /* + * Fortunately the bit assignments in SRR1 for an instruction +diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S +index b13d589..8c18895 100644 +--- a/arch/powerpc/mm/hash_low_32.S ++++ b/arch/powerpc/mm/hash_low_32.S +@@ -496,7 +496,11 @@ htab_hash_searches: + * + * We assume that there is a hash table in use (Hash != 0). + */ ++#ifdef CONFIG_IPIPE ++_GLOBAL(__flush_hash_pages) ++#else + _GLOBAL(flush_hash_pages) ++#endif + tophys(r7,0) + + /* +@@ -531,18 +535,9 @@ _GLOBAL(flush_hash_pages) + addi r6,r6,-1 + b 1b + +- /* Convert context and va to VSID */ +-2: mulli r3,r3,897*16 /* multiply context by context skew */ +- rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ +- mulli r0,r0,0x111 /* multiply by ESID skew */ +- add r3,r3,r0 /* note code below trims to 24 bits */ +- +- /* Construct the high word of the PPC-style PTE (r11) */ +- rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ +- rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ +- SET_V(r11) /* set V (valid) bit */ +- ++2: + #ifdef CONFIG_SMP ++ li r11,0 + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) +@@ -557,10 +552,36 @@ _GLOBAL(flush_hash_pages) + 11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b ++ mtmsr r10 ++ SYNC_601 ++ isync ++ li r11,1 ++ rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ ++ rlwinm r0,r0,0,28,26 /* clear MSR_DR */ ++ mtmsr r0 ++ SYNC_601 ++ isync + b 11b + 12: isync ++ cmpwi r11,0 ++ beq 13f ++ li r0,0 ++ stw r0,0(r9) /* clear mmu_hash_lock */ ++ b 1b ++13: + #endif + ++ /* Convert context and va to VSID */ ++ mulli r3,r3,897*16 /* multiply context by context skew */ ++ rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ ++ mulli r0,r0,0x111 /* multiply by ESID skew */ ++ add r3,r3,r0 /* note code below trims to 24 bits */ ++ ++ /* Construct the high word of the PPC-style PTE (r11) */ ++ rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ ++ rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ ++ SET_V(r11) /* set V (valid) bit */ ++ + /* + * Check the _PAGE_HASHPTE bit in the linux PTE. If it is + * already clear, we're done (for this pte). If not, +@@ -631,7 +652,7 @@ _GLOBAL(flush_hash_patch_B) + + 19: mtmsr r10 + SYNC_601 +- isync ++ sync + blr + + /* +diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c +index 9d75db4..f7f2e60 100644 +--- a/arch/powerpc/mm/hash_native_64.c ++++ b/arch/powerpc/mm/hash_native_64.c +@@ -38,9 +38,9 @@ + #define HPTE_LOCK_BIT 3 + + #ifdef CONFIG_PPC_PASEMI_A2_WORKAROUNDS +-DEFINE_SPINLOCK(native_tlbie_lock); ++IPIPE_DEFINE_SPINLOCK(native_tlbie_lock); + #else +-static DEFINE_SPINLOCK(native_tlbie_lock); ++static IPIPE_DEFINE_SPINLOCK(native_tlbie_lock); + #endif + + static inline void __tlbie(unsigned long va, int psize, int ssize) +@@ -146,7 +146,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long vflags, int psize, int ssize) + { + struct hash_pte *hptep = htab_address + hpte_group; +- unsigned long hpte_v, hpte_r; ++ unsigned long hpte_v, hpte_r, flags; + int i; + + if (!(vflags & HPTE_V_BOLTED)) { +@@ -155,6 +155,8 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va, + hpte_group, va, pa, rflags, vflags, psize); + } + ++ local_irq_save_hw(flags); ++ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (! (hptep->v & HPTE_V_VALID)) { + /* retry with lock held */ +@@ -167,8 +169,28 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va, + hptep++; + } + +- if (i == HPTES_PER_GROUP) ++ if (i == HPTES_PER_GROUP) { ++ local_irq_restore_hw(flags); + return -1; ++ } ++ ++#ifdef CONFIG_PPC_PASEMI_A2_WORKAROUNDS ++ /* Workaround for bug 4910: No non-guarded access over IOB */ ++ if (pa >= 0x80000000 && pa < 0x100000000) ++ rflags |= _PAGE_GUARDED; ++#endif ++ ++#ifdef CONFIG_PPC_PASEMI_A2_WORKAROUNDS ++ /* Workaround for bug 4910: No non-guarded access over IOB */ ++ if (pa >= 0x80000000 && pa < 0x100000000) ++ rflags |= _PAGE_GUARDED; ++#endif ++ ++#ifdef CONFIG_PPC_PASEMI_A2_WORKAROUNDS ++ /* Workaround for bug 4910: No non-guarded access over IOB */ ++ if (pa >= 0x80000000 && pa < 0x100000000) ++ rflags |= _PAGE_GUARDED; ++#endif + + #ifdef CONFIG_PPC_PASEMI_A2_WORKAROUNDS + /* Workaround for bug 4910: No non-guarded access over IOB */ +@@ -193,6 +215,8 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va, + */ + hptep->v = hpte_v; + ++ local_irq_restore_hw(flags); ++ + __asm__ __volatile__ ("ptesync" : : : "memory"); + + return i | (!!(vflags & HPTE_V_SECONDARY) << 3); +@@ -203,13 +227,15 @@ static long native_hpte_remove(unsigned long hpte_group) + struct hash_pte *hptep; + int i; + int slot_offset; +- unsigned long hpte_v; ++ unsigned long hpte_v, flags; + + DBG_LOW(" remove(group=%lx)\n", hpte_group); + + /* pick a random entry to start at */ + slot_offset = mftb() & 0x7; + ++ local_irq_save_hw(flags); ++ + for (i = 0; i < HPTES_PER_GROUP; i++) { + hptep = htab_address + hpte_group + slot_offset; + hpte_v = hptep->v; +@@ -228,12 +254,16 @@ static long native_hpte_remove(unsigned long hpte_group) + slot_offset &= 0x7; + } + +- if (i == HPTES_PER_GROUP) ++ if (i == HPTES_PER_GROUP) { ++ local_irq_restore_hw(flags); + return -1; ++ } + + /* Invalidate the hpte. NOTE: this also unlocks it */ + hptep->v = 0; + ++ local_irq_restore_hw(flags); ++ + return i; + } + +@@ -242,7 +272,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + int local) + { + struct hash_pte *hptep = htab_address + slot; +- unsigned long hpte_v, want_v; ++ unsigned long hpte_v, want_v, flags; + int ret = 0; + + want_v = hpte_encode_v(va, psize, ssize); +@@ -250,6 +280,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)", + va, want_v & HPTE_V_AVPN, slot, newpp); + ++ local_irq_save_hw(flags); ++ + native_lock_hpte(hptep); + + hpte_v = hptep->v; +@@ -266,6 +298,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + } + native_unlock_hpte(hptep); + ++ local_irq_restore_hw(flags); ++ + /* Ensure it is out of the tlb too. */ + tlbie(va, psize, ssize, local); + +@@ -336,10 +370,10 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, + unsigned long want_v; + unsigned long flags; + +- local_irq_save(flags); +- + DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot); + ++ local_irq_save(flags); ++ + want_v = hpte_encode_v(va, psize, ssize); + native_lock_hpte(hptep); + hpte_v = hptep->v; +diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c +index 1ade7eb..264ee69 100644 +--- a/arch/powerpc/mm/hash_utils_64.c ++++ b/arch/powerpc/mm/hash_utils_64.c +@@ -111,7 +111,7 @@ int mmu_ci_restrictions; + #ifdef CONFIG_DEBUG_PAGEALLOC + static u8 *linear_map_hash_slots; + static unsigned long linear_map_hash_count; +-static DEFINE_SPINLOCK(linear_map_hash_lock); ++static IPIPE_DEFINE_SPINLOCK(linear_map_hash_lock); + #endif /* CONFIG_DEBUG_PAGEALLOC */ + + /* There are definitions of page sizes arrays to be used when none +@@ -894,6 +894,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) + const struct cpumask *tmp; + int rc, user_region = 0, local = 0; + int psize, ssize; ++ unsigned long flags; + + DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", + ea, access, trap); +@@ -1012,6 +1013,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) + #endif + } + } ++ ++ local_irq_save_hw(flags); ++ + if (user_region) { + if (psize != get_paca_psize(ea)) { + get_paca()->context = mm->context; +@@ -1023,6 +1027,10 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_vmalloc_update(); + } ++ ++ local_irq_restore_hw(flags); ++#else ++ (void)flags; + #endif /* CONFIG_PPC_64K_PAGES */ + + #ifdef CONFIG_PPC_HAS_HASH_64K +@@ -1155,6 +1163,10 @@ void flush_hash_range(unsigned long number, int local) + */ + void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) + { ++ if (ipipe_trap_notify(IPIPE_TRAP_ACCESS, regs)) ++ /* Not all access faults go through do_page_fault(). */ ++ return; ++ + if (user_mode(regs)) { + #ifdef CONFIG_PPC_SUBPAGE_PROT + if (rc == -2) +diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c +index be4f34c..0ce6420 100644 +--- a/arch/powerpc/mm/mmu_context_nohash.c ++++ b/arch/powerpc/mm/mmu_context_nohash.c +@@ -56,7 +56,7 @@ static unsigned int next_context, nr_free_contexts; + static unsigned long *context_map; + static unsigned long *stale_map[NR_CPUS]; + static struct mm_struct **context_mm; +-static DEFINE_SPINLOCK(context_lock); ++static IPIPE_DEFINE_SPINLOCK(context_lock); + + #define CTX_MAP_SIZE \ + (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) +@@ -138,7 +138,7 @@ static unsigned int steal_context_smp(unsigned int id) + static unsigned int steal_context_up(unsigned int id) + { + struct mm_struct *mm; +- int cpu = smp_processor_id(); ++ int cpu = ipipe_processor_id(); + + /* Pick up the victim mm */ + mm = context_mm[id]; +@@ -190,9 +190,10 @@ static void context_check_map(void) { } + + void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) + { +- unsigned int i, id, cpu = smp_processor_id(); +- unsigned long *map; ++ unsigned int i, id, cpu = ipipe_processor_id(); ++ unsigned long *map, flags; + ++ local_irq_save_hw_cond(flags); + /* No lockless fast path .. yet */ + spin_lock(&context_lock); + +@@ -279,6 +280,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) + pr_hardcont(" -> %d\n", id); + set_context(id, next->pgd); + spin_unlock(&context_lock); ++ local_irq_restore_hw_cond(flags); + } + + /* +diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c +index 93bc4e9..d4bd9c7 100644 +--- a/arch/powerpc/mm/slb.c ++++ b/arch/powerpc/mm/slb.c +@@ -143,17 +143,24 @@ static void __slb_flush_and_rebolt(void) + + void slb_flush_and_rebolt(void) + { ++#ifdef CONFIG_IPIPE ++ unsigned long flags; + +- WARN_ON(!irqs_disabled()); +- ++ local_save_flags_hw(flags); ++#else ++ WARN_ON(!irqs_disabled()); ++#endif + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ +- hard_irq_disable(); ++ hard_irq_disable(); + + __slb_flush_and_rebolt(); + get_paca()->slb_cache_ptr = 0; ++#ifdef CONFIG_IPIPE ++ local_irq_restore_hw(flags); ++#endif + } + + void slb_vmalloc_update(void) +@@ -202,6 +209,13 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long exec_base; ++#ifdef CONFIG_IPIPE ++ unsigned long flags; ++ ++ local_save_flags_hw(flags); ++#else ++ WARN_ON(!irqs_disabled()); ++#endif + + /* + * We need interrupts hard-disabled here, not just soft-disabled, +@@ -235,6 +249,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) + get_paca()->slb_cache_ptr = 0; + get_paca()->context = mm->context; + ++#ifdef CONFIG_IPIPE ++ local_irq_restore_hw(flags); ++#endif + /* + * preload some userspace segments into the SLB. + * Almost all 32 and 64bit PowerPC executables are linked at +diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c +index 687fdda..c692b82 100644 +--- a/arch/powerpc/mm/stab.c ++++ b/arch/powerpc/mm/stab.c +@@ -168,6 +168,13 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long unmapped_base; ++#ifdef CONFIG_IPIPE ++ unsigned long flags; ++ ++ local_save_flags_hw(flags); ++#else ++ WARN_ON(!irqs_disabled()); ++#endif + + /* Force previous translations to complete. DRENG */ + asm volatile("isync" : : : "memory"); +@@ -211,6 +218,9 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) + + __get_cpu_var(stab_cache_ptr) = 0; + ++#ifdef CONFIG_IPIPE ++ local_irq_restore_hw(flags); ++#endif + /* Now preload some entries for the new task */ + if (test_tsk_thread_flag(tsk, TIF_32BIT)) + unmapped_base = TASK_UNMAPPED_BASE_USER32; +diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c +index 8aaa8b7..a4cd5d1 100644 +--- a/arch/powerpc/mm/tlb_hash32.c ++++ b/arch/powerpc/mm/tlb_hash32.c +@@ -100,6 +100,37 @@ void tlb_flush(struct mmu_gather *tlb) + #define FINISH_FLUSH do { } while (0) + #endif + ++#ifdef CONFIG_IPIPE ++ ++int __flush_hash_pages(unsigned context, unsigned long va, ++ unsigned long pmdval, int count); ++ ++int flush_hash_pages(unsigned context, unsigned long va, ++ unsigned long pmdval, int count) ++{ ++ int bulk, ret = 0; ++ /* ++ * Submitting flush requests on insanely large PTE counts ++ * (e.g. HIGHMEM) may cause severe latency penalty on high ++ * priority domains since this must be done with hw interrupts ++ * off (typically, peaks over 400 us have been observed on ++ * 864xD). We split flush requests in bulks of 64 PTEs to ++ * prevent that; the modified assembly helper which performs ++ * the actual flush (__flush_hash_pages()) will spin on the ++ * mmu_lock with interrupts enabled to further reduce latency. ++ */ ++ while (count > 0) { ++ bulk = count > 64 ? 64 : count; ++ ret |= __flush_hash_pages(context, va, pmdval, bulk); ++ va += (bulk << PAGE_SHIFT); ++ count -= bulk; ++ } ++ ++ return ret; ++} ++ ++#endif /* CONFIG_IPIPE */ ++ + static void flush_range(struct mm_struct *mm, unsigned long start, + unsigned long end) + { +diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +index 7ee979f..7d1e831 100644 +--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c ++++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +@@ -24,7 +24,7 @@ + + #include "pq2.h" + +-static DEFINE_SPINLOCK(pci_pic_lock); ++static IPIPE_DEFINE_SPINLOCK(pci_pic_lock); + + struct pq2ads_pci_pic { + struct device_node *node; +@@ -38,18 +38,42 @@ struct pq2ads_pci_pic { + + #define NUM_IRQS 32 + ++static inline void __pq2ads_pci_mask_irq(struct pq2ads_pci_pic *priv, ++ unsigned int irq) ++{ ++ setbits32(&priv->regs->mask, 1 << irq); ++ mb(); ++} ++ ++static inline void __pq2ads_pci_unmask_irq(struct pq2ads_pci_pic *priv, ++ unsigned int irq) ++{ ++ clrbits32(&priv->regs->mask, 1 << irq); ++} ++ + static void pq2ads_pci_mask_irq(unsigned int virq) + { + struct pq2ads_pci_pic *priv = get_irq_chip_data(virq); + int irq = NUM_IRQS - virq_to_hw(virq) - 1; ++ unsigned long flags; + + if (irq != -1) { +- unsigned long flags; + spin_lock_irqsave(&pci_pic_lock, flags); ++ __pq2ads_pci_mask_irq(priv, irq); ++ ipipe_irq_lock(virq); ++ spin_unlock_irqrestore(&pci_pic_lock, flags); ++ } ++} + +- setbits32(&priv->regs->mask, 1 << irq); +- mb(); ++static void pq2ads_pci_mask_ack_irq(unsigned int virq) ++{ ++ struct pq2ads_pci_pic *priv = get_irq_chip_data(virq); ++ int irq = NUM_IRQS - virq_to_hw(virq) - 1; + ++ if (irq != -1) { ++ unsigned long flags; ++ spin_lock_irqsave(&pci_pic_lock, flags); ++ __pq2ads_pci_mask_irq(priv, irq); + spin_unlock_irqrestore(&pci_pic_lock, flags); + } + } +@@ -58,12 +82,12 @@ static void pq2ads_pci_unmask_irq(unsigned int virq) + { + struct pq2ads_pci_pic *priv = get_irq_chip_data(virq); + int irq = NUM_IRQS - virq_to_hw(virq) - 1; ++ unsigned long flags; + + if (irq != -1) { +- unsigned long flags; +- + spin_lock_irqsave(&pci_pic_lock, flags); +- clrbits32(&priv->regs->mask, 1 << irq); ++ __pq2ads_pci_unmask_irq(priv, irq); ++ ipipe_irq_unlock(virq); + spin_unlock_irqrestore(&pci_pic_lock, flags); + } + } +@@ -73,7 +97,7 @@ static struct irq_chip pq2ads_pci_ic = { + .name = "PQ2 ADS PCI", + .end = pq2ads_pci_unmask_irq, + .mask = pq2ads_pci_mask_irq, +- .mask_ack = pq2ads_pci_mask_irq, ++ .mask_ack = pq2ads_pci_mask_ack_irq, + .ack = pq2ads_pci_mask_irq, + .unmask = pq2ads_pci_unmask_irq, + .enable = pq2ads_pci_unmask_irq, +@@ -98,7 +122,7 @@ static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc) + for (bit = 0; pend != 0; ++bit, pend <<= 1) { + if (pend & 0x80000000) { + int virq = irq_linear_revmap(priv->host, bit); +- generic_handle_irq(virq); ++ ipipe_handle_chained_irq(virq); + } + } + } +diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c +index 5b0ab99..6e057d7 100644 +--- a/arch/powerpc/platforms/85xx/tqm85xx.c ++++ b/arch/powerpc/platforms/85xx/tqm85xx.c +@@ -46,10 +46,10 @@ static void cpm2_cascade(unsigned int irq, struct irq_desc *desc) + { + int cascade_irq; + +- while ((cascade_irq = cpm2_get_irq()) >= 0) +- generic_handle_irq(cascade_irq); +- + desc->chip->eoi(irq); ++ ++ while ((cascade_irq = cpm2_get_irq()) >= 0) ++ ipipe_handle_chained_irq(cascade_irq); + } + #endif /* CONFIG_CPM2 */ + +diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c +index 8547e86..59c45d5 100644 +--- a/arch/powerpc/platforms/cell/spu_base.c ++++ b/arch/powerpc/platforms/cell/spu_base.c +@@ -57,7 +57,7 @@ EXPORT_SYMBOL_GPL(force_sig_info); + /* + * Protects cbe_spu_info and spu->number. + */ +-static DEFINE_SPINLOCK(spu_lock); ++static IPIPE_DEFINE_SPINLOCK(spu_lock); + + /* + * List of all spus in the system. +diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c +index 94f4447..b8b5eac 100644 +--- a/arch/powerpc/platforms/iseries/irq.c ++++ b/arch/powerpc/platforms/iseries/irq.c +@@ -80,7 +80,7 @@ struct pci_event { + } data; + }; + +-static DEFINE_SPINLOCK(pending_irqs_lock); ++static IPIPE_DEFINE_SPINLOCK(pending_irqs_lock); + static int num_pending_irqs; + static int pending_irqs[NR_IRQS]; + +diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c +index d212006..a929d40 100644 +--- a/arch/powerpc/platforms/powermac/pic.c ++++ b/arch/powerpc/platforms/powermac/pic.c +@@ -57,7 +57,7 @@ static int max_irqs; + static int max_real_irqs; + static u32 level_mask[4]; + +-static DEFINE_SPINLOCK(pmac_pic_lock); ++static IPIPE_DEFINE_SPINLOCK(pmac_pic_lock); + + #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) + static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; +diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c +index 1e8a1e3..9e46980 100644 +--- a/arch/powerpc/platforms/ps3/htab.c ++++ b/arch/powerpc/platforms/ps3/htab.c +@@ -41,7 +41,7 @@ enum ps3_lpar_vas_id { + }; + + +-static DEFINE_SPINLOCK(ps3_htab_lock); ++static IPIPE_DEFINE_SPINLOCK(ps3_htab_lock); + + static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long pa, unsigned long rflags, unsigned long vflags, +diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c +index 8ec5ccf..13a5755 100644 +--- a/arch/powerpc/platforms/ps3/interrupt.c ++++ b/arch/powerpc/platforms/ps3/interrupt.c +@@ -74,7 +74,7 @@ struct ps3_bmp { + u64 unused_2[3]; + }; + u64 ipi_debug_brk_mask; +- spinlock_t lock; ++ ipipe_spinlock_t lock; + }; + + /** +diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c +index 903eb9e..dfb2cda 100644 +--- a/arch/powerpc/platforms/pseries/lpar.c ++++ b/arch/powerpc/platforms/pseries/lpar.c +@@ -334,7 +334,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, + return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); + } + +-static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); ++static IPIPE_DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); + + static long pSeries_lpar_hpte_remove(unsigned long hpte_group) + { +diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c +index 78f1f7c..91b47fd 100644 +--- a/arch/powerpc/sysdev/cpm2_pic.c ++++ b/arch/powerpc/sysdev/cpm2_pic.c +@@ -82,44 +82,61 @@ static void cpm2_mask_irq(unsigned int virq) + { + int bit, word; + unsigned int irq_nr = virq_to_hw(virq); ++ unsigned long flags; + + bit = irq_to_siubit[irq_nr]; + word = irq_to_siureg[irq_nr]; + ++ local_irq_save_hw_cond(flags); ++ ipipe_irq_lock(virq); + ppc_cached_irq_mask[word] &= ~(1 << bit); + out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]); ++ local_irq_restore_hw_cond(flags); + } + + static void cpm2_unmask_irq(unsigned int virq) + { + int bit, word; + unsigned int irq_nr = virq_to_hw(virq); ++ unsigned long flags; + + bit = irq_to_siubit[irq_nr]; + word = irq_to_siureg[irq_nr]; + ++ local_irq_save_hw_cond(flags); + ppc_cached_irq_mask[word] |= 1 << bit; + out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]); ++ ipipe_irq_unlock(virq); ++ local_irq_restore_hw_cond(flags); + } + +-static void cpm2_ack(unsigned int virq) ++static void cpm2_mask_ack(unsigned int virq) + { + int bit, word; + unsigned int irq_nr = virq_to_hw(virq); ++ unsigned long flags; + + bit = irq_to_siubit[irq_nr]; + word = irq_to_siureg[irq_nr]; + ++ local_irq_save_hw_cond(flags); ++ ppc_cached_irq_mask[word] &= ~(1 << bit); ++ out_be32(&cpm2_intctl->ic_simrh + word, ppc_cached_irq_mask[word]); + out_be32(&cpm2_intctl->ic_sipnrh + word, 1 << bit); ++ local_irq_restore_hw_cond(flags); + } + + static void cpm2_end_irq(unsigned int virq) + { + int bit, word; + unsigned int irq_nr = virq_to_hw(virq); ++ unsigned long flags; + +- if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS)) +- && irq_desc[irq_nr].action) { ++ local_irq_save_hw_cond(flags); ++ ++ if (!__ipipe_root_domain_p || ++ (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS)) ++ && irq_desc[irq_nr].action)) { + + bit = irq_to_siubit[irq_nr]; + word = irq_to_siureg[irq_nr]; +@@ -133,6 +150,8 @@ static void cpm2_end_irq(unsigned int virq) + */ + mb(); + } ++ ++ local_irq_restore_hw_cond(flags); + } + + static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type) +@@ -185,7 +204,7 @@ static struct irq_chip cpm2_pic = { + .typename = " CPM2 SIU ", + .mask = cpm2_mask_irq, + .unmask = cpm2_unmask_irq, +- .ack = cpm2_ack, ++ .mask_ack = cpm2_mask_ack, + .eoi = cpm2_end_irq, + .set_type = cpm2_set_irq_type, + }; +diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c +index a96584a..c037c88 100644 +--- a/arch/powerpc/sysdev/i8259.c ++++ b/arch/powerpc/sysdev/i8259.c +@@ -23,7 +23,7 @@ static unsigned char cached_8259[2] = { 0xff, 0xff }; + #define cached_A1 (cached_8259[0]) + #define cached_21 (cached_8259[1]) + +-static DEFINE_SPINLOCK(i8259_lock); ++static IPIPE_DEFINE_SPINLOCK(i8259_lock); + + static struct irq_host *i8259_host; + +diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c +index cb7689c..e02e2fa 100644 +--- a/arch/powerpc/sysdev/ipic.c ++++ b/arch/powerpc/sysdev/ipic.c +@@ -32,7 +32,7 @@ + + static struct ipic * primary_ipic; + static struct irq_chip ipic_level_irq_chip, ipic_edge_irq_chip; +-static DEFINE_SPINLOCK(ipic_lock); ++static IPIPE_DEFINE_SPINLOCK(ipic_lock); + + static struct ipic_info ipic_info[] = { + [1] = { +diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c +index 5d2d552..5903a16 100644 +--- a/arch/powerpc/sysdev/mpc8xx_pic.c ++++ b/arch/powerpc/sysdev/mpc8xx_pic.c +@@ -29,24 +29,30 @@ static void mpc8xx_unmask_irq(unsigned int virq) + { + int bit, word; + unsigned int irq_nr = (unsigned int)irq_map[virq].hwirq; ++ unsigned long flags; + + bit = irq_nr & 0x1f; + word = irq_nr >> 5; + ++ local_irq_save_hw_cond(flags); + ppc_cached_irq_mask[word] |= (1 << (31-bit)); + out_be32(&siu_reg->sc_simask, ppc_cached_irq_mask[word]); ++ local_irq_restore_hw_cond(flags); + } + + static void mpc8xx_mask_irq(unsigned int virq) + { + int bit, word; + unsigned int irq_nr = (unsigned int)irq_map[virq].hwirq; ++ unsigned long flags; + + bit = irq_nr & 0x1f; + word = irq_nr >> 5; + ++ local_irq_save_hw_cond(flags); + ppc_cached_irq_mask[word] &= ~(1 << (31-bit)); + out_be32(&siu_reg->sc_simask, ppc_cached_irq_mask[word]); ++ local_irq_save_hw_cond(flags); + } + + static void mpc8xx_ack(unsigned int virq) +diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c +index 30c44e6..661e732 100644 +--- a/arch/powerpc/sysdev/mpic.c ++++ b/arch/powerpc/sysdev/mpic.c +@@ -46,7 +46,7 @@ + + static struct mpic *mpics; + static struct mpic *mpic_primary; +-static DEFINE_SPINLOCK(mpic_lock); ++static IPIPE_DEFINE_SPINLOCK(mpic_lock); + + #ifdef CONFIG_PPC32 /* XXX for now */ + #ifdef CONFIG_IRQ_ALL_CPUS +@@ -670,33 +670,44 @@ static inline void mpic_eoi(struct mpic *mpic) + */ + + +-void mpic_unmask_irq(unsigned int irq) ++void __mpic_unmask_irq(unsigned int irq) + { + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); + +- DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); +- + mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), + mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & + ~MPIC_VECPRI_MASK); + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { +- printk(KERN_ERR "mpic_enable_irq timeout\n"); ++ printk(KERN_ERR "mpic_unmask_irq timeout\n"); + break; + } + } while(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK); + } + +-void mpic_mask_irq(unsigned int irq) ++void mpic_unmask_irq(unsigned int irq) + { +- unsigned int loops = 100000; ++#ifdef DEBUG + struct mpic *mpic = mpic_from_irq(irq); +- unsigned int src = mpic_irq_to_hw(irq); ++#endif ++ unsigned long flags; + +- DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); ++ DBG("%p: %s: unmask_irq: %d (src %d)\n", mpic, mpic->name, irq, src); ++ ++ spin_lock_irqsave(&mpic_lock, flags); ++ __mpic_unmask_irq(irq); ++ ipipe_irq_unlock(irq); ++ spin_unlock_irqrestore(&mpic_lock, flags); ++} ++ ++static inline void __mpic_mask_irq(unsigned int irq) ++{ ++ struct mpic *mpic = mpic_from_irq(irq); ++ unsigned int src = mpic_irq_to_hw(irq); ++ unsigned int loops = 100000; + + mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), + mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) | +@@ -705,15 +716,31 @@ void mpic_mask_irq(unsigned int irq) + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { +- printk(KERN_ERR "mpic_enable_irq timeout\n"); ++ printk(KERN_ERR "mpic_mask_irq timeout, irq %u\n", irq); + break; + } + } while(!(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK)); + } + ++void mpic_mask_irq(unsigned int irq) ++{ ++#ifdef DEBUG ++ struct mpic *mpic = mpic_from_irq(irq); ++#endif ++ unsigned long flags; ++ ++ DBG("%s: mask_irq: irq %u (src %d)\n", mpic->name, irq, mpic_irq_to_hw(irq)); ++ ++ spin_lock_irqsave(&mpic_lock, flags); ++ __mpic_mask_irq(irq); ++ ipipe_irq_lock(irq); ++ spin_unlock_irqrestore(&mpic_lock, flags); ++} ++ + void mpic_end_irq(unsigned int irq) + { + struct mpic *mpic = mpic_from_irq(irq); ++ unsigned long flags; + + #ifdef DEBUG_IRQ + DBG("%s: end_irq: %d\n", mpic->name, irq); +@@ -723,6 +750,14 @@ void mpic_end_irq(unsigned int irq) + * latched another edge interrupt coming in anyway + */ + ++#ifdef CONFIG_IPIPE ++ spin_lock_irqsave(&mpic_lock, flags); ++ if (!(irq_desc[irq].status & IRQ_NOREQUEST)) ++ __mpic_mask_irq(irq); ++ spin_unlock_irqrestore(&mpic_lock, flags); ++#else ++ (void)flags; ++#endif + mpic_eoi(mpic); + } + +@@ -732,8 +767,11 @@ static void mpic_unmask_ht_irq(unsigned int irq) + { + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); ++ unsigned long flags; + +- mpic_unmask_irq(irq); ++ spin_lock_irqsave(&mpic_lock, flags); ++ __mpic_unmask_irq(irq); ++ spin_unlock_irqrestore(&mpic_lock, flags); + + if (irq_desc[irq].status & IRQ_LEVEL) + mpic_ht_end_irq(mpic, src); +@@ -763,9 +801,18 @@ static void mpic_end_ht_irq(unsigned int irq) + { + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = mpic_irq_to_hw(irq); ++ unsigned long flags; + + #ifdef DEBUG_IRQ +- DBG("%s: end_irq: %d\n", mpic->name, irq); ++ DBG("%s: end_ht_irq: %d\n", mpic->name, irq); ++#endif ++ ++#ifdef CONFIG_IPIPE ++ spin_lock_irqsave(&mpic_lock, flags); ++ __mpic_mask_irq(irq); ++ spin_unlock_irqrestore(&mpic_lock, flags); ++#else ++ (void)flags; + #endif + /* We always EOI on end_irq() even for edge interrupts since that + * should only lower the priority, the MPIC should have properly +@@ -784,9 +831,12 @@ static void mpic_unmask_ipi(unsigned int irq) + { + struct mpic *mpic = mpic_from_ipi(irq); + unsigned int src = mpic_irq_to_hw(irq) - mpic->ipi_vecs[0]; ++ unsigned long flags; + +- DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src); ++ DBG("%s: unmask_ipi: %d (ipi %d)\n", mpic->name, irq, src); ++ spin_lock_irqsave(&mpic_lock, flags); + mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); ++ spin_unlock_irqrestore(&mpic_lock, flags); + } + + static void mpic_mask_ipi(unsigned int irq) +@@ -858,6 +908,7 @@ int mpic_set_irq_type(unsigned int virq, unsigned int flow_type) + unsigned int src = mpic_irq_to_hw(virq); + struct irq_desc *desc = get_irq_desc(virq); + unsigned int vecpri, vold, vnew; ++ unsigned long flags; + + DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n", + mpic, virq, src, flow_type); +@@ -882,6 +933,8 @@ int mpic_set_irq_type(unsigned int virq, unsigned int flow_type) + else + vecpri = mpic_type_to_vecpri(mpic, flow_type); + ++ local_irq_save_hw_cond(flags); ++ + vold = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)); + vnew = vold & ~(MPIC_INFO(VECPRI_POLARITY_MASK) | + MPIC_INFO(VECPRI_SENSE_MASK)); +@@ -889,6 +942,8 @@ int mpic_set_irq_type(unsigned int virq, unsigned int flow_type) + if (vold != vnew) + mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vnew); + ++ local_irq_restore_hw_cond(flags); ++ + return 0; + } + +@@ -1576,6 +1631,7 @@ unsigned int mpic_get_mcirq(void) + } + + #ifdef CONFIG_SMP ++ + void mpic_request_ipis(void) + { + struct mpic *mpic = mpic_primary; +diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c +index 3faa42e..380d17e 100644 +--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c ++++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c +@@ -33,7 +33,7 @@ + + #include "qe_ic.h" + +-static DEFINE_SPINLOCK(qe_ic_lock); ++static IPIPE_DEFINE_SPINLOCK(qe_ic_lock); + + static struct qe_ic_info qe_ic_info[] = { + [1] = { +@@ -236,6 +236,20 @@ static void qe_ic_mask_irq(unsigned int virq) + spin_unlock_irqrestore(&qe_ic_lock, flags); + } + ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_qe_ic_cascade_irq(struct qe_ic *qe_ic, unsigned int virq) ++{ ++ ++ struct pt_regs regs; /* Contents not used. */ ++ ++ ipipe_trace_irq_entry(virq); ++ __ipipe_handle_irq(virq, ®s); ++ ipipe_trace_irq_exit(virq); ++} ++ ++#endif ++ + static struct irq_chip qe_ic_irq_chip = { + .typename = " QEIC ", + .unmask = qe_ic_unmask_irq, +diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c +index cf244a4..2dc1fa8 100644 +--- a/arch/powerpc/sysdev/tsi108_pci.c ++++ b/arch/powerpc/sysdev/tsi108_pci.c +@@ -250,7 +250,9 @@ static void tsi108_pci_int_mask(u_int irq) + { + u_int irp_cfg; + int int_line = (irq - IRQ_PCI_INTAD_BASE); ++ unsigned long flags; + ++ local_irq_save_hw_cond(flags); + irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL); + mb(); + irp_cfg |= (1 << int_line); /* INTx_DIR = output */ +@@ -258,19 +260,23 @@ static void tsi108_pci_int_mask(u_int irq) + tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg); + mb(); + irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL); ++ local_irq_restore_hw_cond(flags); + } + + static void tsi108_pci_int_unmask(u_int irq) + { + u_int irp_cfg; + int int_line = (irq - IRQ_PCI_INTAD_BASE); ++ unsigned long flags; + ++ local_irq_save_hw_cond(flags); + irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL); + mb(); + irp_cfg &= ~(1 << int_line); + irp_cfg |= (3 << (8 + (int_line * 2))); + tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg); + mb(); ++ local_irq_restore_hw_cond(flags); + } + + static void init_pci_source(void) +@@ -361,6 +367,9 @@ static void tsi108_pci_irq_ack(u_int irq) + + static void tsi108_pci_irq_end(u_int irq) + { ++ unsigned long flags; ++ ++ local_irq_save_hw_cond(flags); + tsi108_pci_int_unmask(irq); + + /* Enable interrupts from PCI block */ +@@ -368,6 +377,7 @@ static void tsi108_pci_irq_end(u_int irq) + tsi108_read_reg(TSI108_PCI_OFFSET + + TSI108_PCI_IRP_ENABLE) | + TSI108_PCI_IRP_ENABLE_P_INT); ++ local_irq_restore_hw_cond(flags); + mb(); + } + +diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c +index 466ce9a..8871faa 100644 +--- a/arch/powerpc/sysdev/uic.c ++++ b/arch/powerpc/sysdev/uic.c +@@ -49,7 +49,7 @@ struct uic { + int index; + int dcrbase; + +- spinlock_t lock; ++ ipipe_spinlock_t lock; + + /* The remapper for this UIC */ + struct irq_host *irqhost; +@@ -71,6 +71,7 @@ static void uic_unmask_irq(unsigned int virq) + er = mfdcr(uic->dcrbase + UIC_ER); + er |= sr; + mtdcr(uic->dcrbase + UIC_ER, er); ++ ipipe_irq_unlock(virq); + spin_unlock_irqrestore(&uic->lock, flags); + } + +@@ -82,6 +83,7 @@ static void uic_mask_irq(unsigned int virq) + u32 er; + + spin_lock_irqsave(&uic->lock, flags); ++ ipipe_irq_lock(virq); + er = mfdcr(uic->dcrbase + UIC_ER); + er &= ~(1 << (31 - src)); + mtdcr(uic->dcrbase + UIC_ER, er); +@@ -239,7 +241,16 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) + src = 32 - ffs(msr); + + subvirq = irq_linear_revmap(uic->irqhost, src); ++#ifdef CONFIG_IPIPE ++ { ++ struct pt_regs regs; /* Contents not used. */ ++ ipipe_trace_irq_entry(subvirq); ++ __ipipe_handle_irq(subvirq, ®s); ++ ipipe_trace_irq_exit(subvirq); ++ } ++#else + generic_handle_irq(subvirq); ++#endif + + uic_irq_ret: + spin_lock(&desc->lock); +diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c +index 737a1c4..15e81de 100644 +--- a/drivers/pci/htirq.c ++++ b/drivers/pci/htirq.c +@@ -21,7 +21,7 @@ + * With multiple simultaneous hypertransport irq devices it might pay + * to make this more fine grained. But start with simple, stupid, and correct. + */ +-static DEFINE_SPINLOCK(ht_irq_lock); ++static IPIPE_DEFINE_SPINLOCK(ht_irq_lock); + + struct ht_irq_cfg { + struct pci_dev *dev; +diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c +index 13ddf3b..385b4d9 100644 +--- a/drivers/serial/8250.c ++++ b/drivers/serial/8250.c +@@ -3168,6 +3168,51 @@ static int serial8250_resume(struct platform_device *dev) + return 0; + } + ++#if defined(CONFIG_IPIPE_DEBUG) && defined(CONFIG_SERIAL_8250_CONSOLE) ++ ++#include ++ ++void __weak __ipipe_serial_debug(const char *fmt, ...) ++{ ++ struct uart_8250_port *up = &serial8250_ports[0]; ++ unsigned int ier, count; ++ unsigned long flags; ++ char buf[128]; ++ va_list ap; ++ ++ va_start(ap, fmt); ++ vsprintf(buf, fmt, ap); ++ va_end(ap); ++ count = strlen(buf); ++ ++ touch_nmi_watchdog(); ++ ++ local_irq_save_hw(flags); ++ ++ /* ++ * First save the IER then disable the interrupts ++ */ ++ ier = serial_in(up, UART_IER); ++ ++ if (up->capabilities & UART_CAP_UUE) ++ serial_out(up, UART_IER, UART_IER_UUE); ++ else ++ serial_out(up, UART_IER, 0); ++ ++ uart_console_write(&up->port, buf, count, serial8250_console_putchar); ++ ++ /* ++ * Finally, wait for transmitter to become empty ++ * and restore the IER ++ */ ++ wait_for_xmitr(up, BOTH_EMPTY); ++ serial_out(up, UART_IER, ier); ++ ++ local_irq_restore_hw(flags); ++} ++ ++#endif ++ + static struct platform_driver serial8250_isa_driver = { + .probe = serial8250_probe, + .remove = __devexit_p(serial8250_remove), +diff --git a/fs/exec.c b/fs/exec.c +index ac45e7d..4c22c5b 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -699,6 +699,7 @@ static int exec_mmap(struct mm_struct *mm) + { + struct task_struct *tsk; + struct mm_struct * old_mm, *active_mm; ++ unsigned long flags; + + /* Notify parent that we're no longer interested in the old VM */ + tsk = current; +@@ -721,8 +722,10 @@ static int exec_mmap(struct mm_struct *mm) + task_lock(tsk); + active_mm = tsk->active_mm; + tsk->mm = mm; ++ ipipe_mm_switch_protect(flags); + tsk->active_mm = mm; + activate_mm(active_mm, mm); ++ ipipe_mm_switch_unprotect(flags); + task_unlock(tsk); + arch_pick_mmap_layout(mm); + if (old_mm) { +diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h +index c99c64d..5d01b93 100644 +--- a/include/asm-generic/atomic.h ++++ b/include/asm-generic/atomic.h +@@ -60,11 +60,11 @@ static inline int atomic_add_return(int i, atomic_t *v) + unsigned long flags; + int temp; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + temp = v->counter; + temp += i; + v->counter = temp; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + return temp; + } +@@ -82,11 +82,11 @@ static inline int atomic_sub_return(int i, atomic_t *v) + unsigned long flags; + int temp; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + temp = v->counter; + temp -= i; + v->counter = temp; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + + return temp; + } +@@ -139,9 +139,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) + unsigned long flags; + + mask = ~mask; +- local_irq_save(flags); ++ local_irq_save_hw(flags); + *addr &= mask; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + } + + #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h +index c894646..8d42ffe 100644 +--- a/include/asm-generic/bitops/atomic.h ++++ b/include/asm-generic/bitops/atomic.h +@@ -21,20 +21,20 @@ extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; + * this is the substitute */ + #define _atomic_spin_lock_irqsave(l,f) do { \ + raw_spinlock_t *s = ATOMIC_HASH(l); \ +- local_irq_save(f); \ ++ local_irq_save_hw(f); \ + __raw_spin_lock(s); \ + } while(0) + + #define _atomic_spin_unlock_irqrestore(l,f) do { \ + raw_spinlock_t *s = ATOMIC_HASH(l); \ + __raw_spin_unlock(s); \ +- local_irq_restore(f); \ ++ local_irq_restore_hw(f); \ + } while(0) + + + #else +-# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) +-# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) ++# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save_hw(f); } while (0) ++# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore_hw(f); } while (0) + #endif + + /* +diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h +index b2ba2fc..ed01ab9 100644 +--- a/include/asm-generic/cmpxchg-local.h ++++ b/include/asm-generic/cmpxchg-local.h +@@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, + if (size == 8 && sizeof(unsigned long) != 8) + wrong_size_cmpxchg(ptr); + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + switch (size) { + case 1: prev = *(u8 *)ptr; + if (prev == old) +@@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, + default: + wrong_size_cmpxchg(ptr); + } +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + return prev; + } + +@@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, + u64 prev; + unsigned long flags; + +- local_irq_save(flags); ++ local_irq_save_hw(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; +- local_irq_restore(flags); ++ local_irq_restore_hw(flags); + return prev; + } + +diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h +index 90079c3..65e872e 100644 +--- a/include/asm-generic/percpu.h ++++ b/include/asm-generic/percpu.h +@@ -56,6 +56,20 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; + #define __raw_get_cpu_var(var) \ + (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)) + ++#ifdef CONFIG_IPIPE ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++extern int __ipipe_check_percpu_access(void); ++#define __ipipe_local_cpu_offset \ ++ ({ \ ++ WARN_ON_ONCE(__ipipe_check_percpu_access()); \ ++ __my_cpu_offset; \ ++ }) ++#else ++#define __ipipe_local_cpu_offset __my_cpu_offset ++#endif ++#define __ipipe_get_cpu_var(var) \ ++ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __ipipe_local_cpu_offset)) ++#endif /* CONFIG_IPIPE */ + + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA + extern void setup_per_cpu_areas(void); +@@ -66,6 +80,7 @@ extern void setup_per_cpu_areas(void); + #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) + #define __get_cpu_var(var) per_cpu_var(var) + #define __raw_get_cpu_var(var) per_cpu_var(var) ++#define __ipipe_get_cpu_var(var) __raw_get_cpu_var(var) + + #endif /* SMP */ + +diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h +index 6d527ee..c997ef1 100644 +--- a/include/linux/hardirq.h ++++ b/include/linux/hardirq.h +@@ -183,24 +183,28 @@ extern void irq_enter(void); + */ + extern void irq_exit(void); + +-#define nmi_enter() \ +- do { \ +- ftrace_nmi_enter(); \ +- BUG_ON(in_nmi()); \ +- add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ +- lockdep_off(); \ +- rcu_nmi_enter(); \ +- trace_hardirq_enter(); \ ++#define nmi_enter() \ ++ do { \ ++ if (likely(!ipipe_test_foreign_stack())) { \ ++ ftrace_nmi_enter(); \ ++ BUG_ON(in_nmi()); \ ++ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ++ lockdep_off(); \ ++ rcu_nmi_enter(); \ ++ trace_hardirq_enter(); \ ++ } \ + } while (0) + +-#define nmi_exit() \ +- do { \ +- trace_hardirq_exit(); \ +- rcu_nmi_exit(); \ +- lockdep_on(); \ +- BUG_ON(!in_nmi()); \ +- sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ +- ftrace_nmi_exit(); \ ++#define nmi_exit() \ ++ do { \ ++ if (likely(!ipipe_test_foreign_stack())) { \ ++ trace_hardirq_exit(); \ ++ rcu_nmi_exit(); \ ++ lockdep_on(); \ ++ BUG_ON(!in_nmi()); \ ++ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ++ ftrace_nmi_exit(); \ ++ } \ + } while (0) + + #endif /* LINUX_HARDIRQ_H */ +diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h +new file mode 100644 +index 0000000..7aee3a5 +--- /dev/null ++++ b/include/linux/ipipe.h +@@ -0,0 +1,691 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe.h ++ * ++ * Copyright (C) 2002-2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_H ++#define __LINUX_IPIPE_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ++#include ++#include ++ ++static inline int ipipe_disable_context_check(int cpu) ++{ ++ return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0); ++} ++ ++static inline void ipipe_restore_context_check(int cpu, int old_state) ++{ ++ per_cpu(ipipe_percpu_context_check, cpu) = old_state; ++} ++ ++static inline void ipipe_context_check_off(void) ++{ ++ int cpu; ++ for_each_online_cpu(cpu) ++ per_cpu(ipipe_percpu_context_check, cpu) = 0; ++} ++ ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++static inline int ipipe_disable_context_check(int cpu) ++{ ++ return 0; ++} ++ ++static inline void ipipe_restore_context_check(int cpu, int old_state) { } ++ ++static inline void ipipe_context_check_off(void) { } ++ ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++#ifdef CONFIG_IPIPE ++ ++/* ++ * Sanity check: IPIPE_VIRQ_BASE depends on CONFIG_NR_CPUS, and if the ++ * latter gets too large, we fail to map the virtual interrupts. ++ */ ++#if IPIPE_VIRQ_BASE / BITS_PER_LONG > BITS_PER_LONG ++#error "CONFIG_NR_CPUS is too large, please lower it." ++#endif ++ ++#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING ++#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ ++ (IPIPE_MINOR_NUMBER << 8) | \ ++ (IPIPE_PATCH_NUMBER)) ++ ++#ifndef BROKEN_BUILTIN_RETURN_ADDRESS ++#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) ++#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) ++#endif /* !BUILTIN_RETURN_ADDRESS */ ++ ++#define IPIPE_ROOT_PRIO 100 ++#define IPIPE_ROOT_ID 0 ++#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */ ++ ++#define IPIPE_RESET_TIMER 0x1 ++#define IPIPE_GRAB_TIMER 0x2 ++ ++/* Global domain flags */ ++#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ ++#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ ++ ++/* Interrupt control bits */ ++#define IPIPE_HANDLE_FLAG 0 ++#define IPIPE_PASS_FLAG 1 ++#define IPIPE_ENABLE_FLAG 2 ++#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG ++#define IPIPE_STICKY_FLAG 3 ++#define IPIPE_SYSTEM_FLAG 4 ++#define IPIPE_LOCK_FLAG 5 ++#define IPIPE_WIRED_FLAG 6 ++#define IPIPE_EXCLUSIVE_FLAG 7 ++ ++#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) ++#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) ++#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) ++#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK ++#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) ++#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) ++#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) ++#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) ++#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) ++ ++#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) ++#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK) ++ ++#define IPIPE_EVENT_SELF 0x80000000 ++ ++#define IPIPE_NR_CPUS NR_CPUS ++ ++/* This accessor assumes hw IRQs are off on SMP; allows assignment. */ ++#define __ipipe_current_domain __ipipe_get_cpu_var(ipipe_percpu_domain) ++/* This read-only accessor makes sure that hw IRQs are off on SMP. */ ++#define ipipe_current_domain \ ++ ({ \ ++ struct ipipe_domain *__ipd__; \ ++ unsigned long __flags__; \ ++ local_irq_save_hw_smp(__flags__); \ ++ __ipd__ = __ipipe_current_domain; \ ++ local_irq_restore_hw_smp(__flags__); \ ++ __ipd__; \ ++ }) ++ ++#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \ ++ (irq) < IPIPE_NR_IRQS) ++ ++#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) ++ ++struct irq_desc; ++ ++typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc); ++ ++typedef int (*ipipe_event_handler_t)(unsigned event, ++ struct ipipe_domain *from, ++ void *data); ++struct ipipe_domain { ++ ++ int slot; /* Slot number in percpu domain data array. */ ++ struct list_head p_link; /* Link in pipeline */ ++ ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ ++ unsigned long long evself; /* Self-monitored event bits. */ ++ ++ struct irqdesc { ++ unsigned long control; ++ ipipe_irq_ackfn_t acknowledge; ++ ipipe_irq_handler_t handler; ++ void *cookie; ++ } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; ++ ++ int priority; ++ void *pdd; ++ unsigned long flags; ++ unsigned domid; ++ const char *name; ++ struct mutex mutex; ++}; ++ ++#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */ ++ ++struct ipipe_domain_attr { ++ ++ unsigned domid; /* Domain identifier -- Magic value set by caller */ ++ const char *name; /* Domain name -- Warning: won't be dup'ed! */ ++ int priority; /* Priority in interrupt pipeline */ ++ void (*entry) (void); /* Domain entry point */ ++ void *pdd; /* Per-domain (opaque) data pointer */ ++}; ++ ++#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie ++#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler ++#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irqall, cpu)[irq] ++ ++extern unsigned __ipipe_printk_virq; ++ ++extern unsigned long __ipipe_virtual_irq_map; ++ ++extern struct list_head __ipipe_pipeline; ++ ++extern int __ipipe_event_monitors[]; ++ ++/* Private interface */ ++ ++void ipipe_init(void); ++ ++#ifdef CONFIG_PROC_FS ++void ipipe_init_proc(void); ++ ++#ifdef CONFIG_IPIPE_TRACE ++void __ipipe_init_tracer(void); ++#else /* !CONFIG_IPIPE_TRACE */ ++#define __ipipe_init_tracer() do { } while(0) ++#endif /* CONFIG_IPIPE_TRACE */ ++ ++#else /* !CONFIG_PROC_FS */ ++#define ipipe_init_proc() do { } while(0) ++#endif /* CONFIG_PROC_FS */ ++ ++void __ipipe_init_stage(struct ipipe_domain *ipd); ++ ++void __ipipe_cleanup_domain(struct ipipe_domain *ipd); ++ ++void __ipipe_add_domain_proc(struct ipipe_domain *ipd); ++ ++void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); ++ ++void __ipipe_flush_printk(unsigned irq, void *cookie); ++ ++void __ipipe_walk_pipeline(struct list_head *pos); ++ ++void __ipipe_pend_irq(unsigned irq, struct list_head *head); ++ ++int __ipipe_dispatch_event(unsigned event, void *data); ++ ++void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq); ++ ++void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq); ++ ++void __ipipe_sync_stage(unsigned long syncmask); ++ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq); ++ ++void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq); ++ ++void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq); ++ ++void __ipipe_pin_range_globally(unsigned long start, unsigned long end); ++ ++/* Must be called hw IRQs off. */ ++static inline void ipipe_irq_lock(unsigned irq) ++{ ++ __ipipe_lock_irq(__ipipe_current_domain, ipipe_processor_id(), irq); ++} ++ ++/* Must be called hw IRQs off. */ ++static inline void ipipe_irq_unlock(unsigned irq) ++{ ++ __ipipe_unlock_irq(__ipipe_current_domain, irq); ++} ++ ++#ifndef __ipipe_sync_pipeline ++#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask) ++#endif ++ ++#ifndef __ipipe_run_irqtail ++#define __ipipe_run_irqtail() do { } while(0) ++#endif ++ ++#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next) ++ ++/* ++ * Keep the following as a macro, so that client code could check for ++ * the support of the invariant pipeline head optimization. ++ */ ++#define __ipipe_pipeline_head() \ ++ list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link) ++ ++#define local_irq_enable_hw_cond() local_irq_enable_hw() ++#define local_irq_disable_hw_cond() local_irq_disable_hw() ++#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) ++#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) ++ ++#ifdef CONFIG_SMP ++cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask); ++int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask); ++#define local_irq_save_hw_smp(flags) local_irq_save_hw(flags) ++#define local_irq_restore_hw_smp(flags) local_irq_restore_hw(flags) ++#else /* !CONFIG_SMP */ ++#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) ++#define local_irq_restore_hw_smp(flags) do { } while(0) ++#endif /* CONFIG_SMP */ ++ ++#define local_irq_save_full(vflags, rflags) \ ++ do { \ ++ local_irq_save(vflags); \ ++ local_irq_save_hw(rflags); \ ++ } while(0) ++ ++#define local_irq_restore_full(vflags, rflags) \ ++ do { \ ++ local_irq_restore_hw(rflags); \ ++ local_irq_restore(vflags); \ ++ } while(0) ++ ++static inline void __local_irq_restore_nosync(unsigned long x) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr(); ++ ++ if (raw_irqs_disabled_flags(x)) { ++ set_bit(IPIPE_STALL_FLAG, &p->status); ++ trace_hardirqs_off(); ++ } else { ++ trace_hardirqs_on(); ++ clear_bit(IPIPE_STALL_FLAG, &p->status); ++ } ++} ++ ++static inline void local_irq_restore_nosync(unsigned long x) ++{ ++ unsigned long flags; ++ local_irq_save_hw_smp(flags); ++ __local_irq_restore_nosync(x); ++ local_irq_restore_hw_smp(flags); ++} ++ ++#define __ipipe_root_domain_p (__ipipe_current_domain == ipipe_root_domain) ++#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain) ++ ++static inline int __ipipe_event_monitored_p(int ev) ++{ ++ if (__ipipe_event_monitors[ev] > 0) ++ return 1; ++ ++ return (ipipe_current_domain->evself & (1LL << ev)) != 0; ++} ++ ++#define ipipe_sigwake_notify(p) \ ++do { \ ++ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE, p); \ ++} while(0) ++ ++#define ipipe_exit_notify(p) \ ++do { \ ++ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_EXIT, p); \ ++} while(0) ++ ++#define ipipe_setsched_notify(p) \ ++do { \ ++ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED, p); \ ++} while(0) ++ ++#define ipipe_schedule_notify(prev, next) \ ++do { \ ++ if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) && \ ++ __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ ++ __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next); \ ++} while(0) ++ ++#define ipipe_trap_notify(ex, regs) \ ++({ \ ++ unsigned long __flags__; \ ++ int __ret__ = 0; \ ++ local_irq_save_hw_smp(__flags__); \ ++ if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \ ++ ((current)->flags & PF_EVNOTIFY)) && \ ++ __ipipe_event_monitored_p(ex)) { \ ++ local_irq_restore_hw_smp(__flags__); \ ++ __ret__ = __ipipe_dispatch_event(ex, regs); \ ++ } else \ ++ local_irq_restore_hw_smp(__flags__); \ ++ __ret__; \ ++}) ++ ++static inline void ipipe_init_notify(struct task_struct *p) ++{ ++ if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) ++ __ipipe_dispatch_event(IPIPE_EVENT_INIT, p); ++} ++ ++struct mm_struct; ++ ++static inline void ipipe_cleanup_notify(struct mm_struct *mm) ++{ ++ if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) ++ __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP, mm); ++} ++ ++/* Public interface */ ++ ++int ipipe_register_domain(struct ipipe_domain *ipd, ++ struct ipipe_domain_attr *attr); ++ ++int ipipe_unregister_domain(struct ipipe_domain *ipd); ++ ++void ipipe_suspend_domain(void); ++ ++int ipipe_virtualize_irq(struct ipipe_domain *ipd, ++ unsigned irq, ++ ipipe_irq_handler_t handler, ++ void *cookie, ++ ipipe_irq_ackfn_t acknowledge, ++ unsigned modemask); ++ ++int ipipe_control_irq(unsigned irq, ++ unsigned clrmask, ++ unsigned setmask); ++ ++unsigned ipipe_alloc_virq(void); ++ ++int ipipe_free_virq(unsigned virq); ++ ++int ipipe_trigger_irq(unsigned irq); ++ ++static inline void __ipipe_propagate_irq(unsigned irq) ++{ ++ struct list_head *next = __ipipe_current_domain->p_link.next; ++ if (next == &ipipe_root.p_link) { ++ /* Fast path: root must handle all interrupts. */ ++ __ipipe_set_irq_pending(&ipipe_root, irq); ++ return; ++ } ++ __ipipe_pend_irq(irq, next); ++} ++ ++static inline void __ipipe_schedule_irq(unsigned irq) ++{ ++ __ipipe_pend_irq(irq, &__ipipe_current_domain->p_link); ++} ++ ++static inline void __ipipe_schedule_irq_head(unsigned irq) ++{ ++ __ipipe_set_irq_pending(__ipipe_pipeline_head(), irq); ++} ++ ++static inline void __ipipe_schedule_irq_root(unsigned irq) ++{ ++ __ipipe_set_irq_pending(&ipipe_root, irq); ++} ++ ++static inline void ipipe_propagate_irq(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_propagate_irq(irq); ++ local_irq_restore_hw(flags); ++} ++ ++static inline void ipipe_schedule_irq(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_schedule_irq(irq); ++ local_irq_restore_hw(flags); ++} ++ ++static inline void ipipe_schedule_irq_head(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_schedule_irq_head(irq); ++ local_irq_restore_hw(flags); ++} ++ ++static inline void ipipe_schedule_irq_root(unsigned irq) ++{ ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ __ipipe_schedule_irq_root(irq); ++ local_irq_restore_hw(flags); ++} ++ ++void ipipe_stall_pipeline_from(struct ipipe_domain *ipd); ++ ++unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd); ++ ++unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd); ++ ++static inline void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ ipipe_test_and_unstall_pipeline_from(ipd); ++} ++ ++void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, ++ unsigned long x); ++ ++static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd) ++{ ++ return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++} ++ ++static inline void ipipe_stall_pipeline_head(void) ++{ ++ local_irq_disable_hw(); ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); ++} ++ ++static inline unsigned long ipipe_test_and_stall_pipeline_head(void) ++{ ++ local_irq_disable_hw(); ++ return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); ++} ++ ++void ipipe_unstall_pipeline_head(void); ++ ++void __ipipe_restore_pipeline_head(unsigned long x); ++ ++static inline void ipipe_restore_pipeline_head(unsigned long x) ++{ ++ /* On some archs, __test_and_set_bit() might return different ++ * truth value than test_bit(), so we test the exclusive OR of ++ * both statuses, assuming that the lowest bit is always set in ++ * the truth value (if this is wrong, the failed optimization will ++ * be caught in __ipipe_restore_pipeline_head() if ++ * CONFIG_DEBUG_KERNEL is set). */ ++ if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1) ++ __ipipe_restore_pipeline_head(x); ++} ++ ++#define ipipe_unstall_pipeline() \ ++ ipipe_unstall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_test_and_unstall_pipeline() \ ++ ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_test_pipeline() \ ++ ipipe_test_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_test_and_stall_pipeline() \ ++ ipipe_test_and_stall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_stall_pipeline() \ ++ ipipe_stall_pipeline_from(ipipe_current_domain) ++ ++#define ipipe_restore_pipeline(x) \ ++ ipipe_restore_pipeline_from(ipipe_current_domain, (x)) ++ ++void ipipe_init_attr(struct ipipe_domain_attr *attr); ++ ++int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); ++ ++unsigned long ipipe_critical_enter(void (*syncfn) (void)); ++ ++void ipipe_critical_exit(unsigned long flags); ++ ++static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) ++{ ++ set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); ++} ++ ++static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) ++{ ++ clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); ++} ++ ++static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) ++{ ++ /* Must be called hw interrupts off. */ ++ __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); ++} ++ ++static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) ++{ ++ /* Must be called hw interrupts off. */ ++ __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); ++} ++ ++static inline int ipipe_test_foreign_stack(void) ++{ ++ /* Must be called hw interrupts off. */ ++ return test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)); ++} ++ ++#ifndef ipipe_safe_current ++#define ipipe_safe_current() \ ++({ \ ++ struct task_struct *p; \ ++ unsigned long flags; \ ++ local_irq_save_hw_smp(flags); \ ++ p = ipipe_test_foreign_stack() ? &init_task : current; \ ++ local_irq_restore_hw_smp(flags); \ ++ p; \ ++}) ++#endif ++ ++ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, ++ unsigned event, ++ ipipe_event_handler_t handler); ++ ++cpumask_t ipipe_set_irq_affinity(unsigned irq, ++ cpumask_t cpumask); ++ ++int ipipe_send_ipi(unsigned ipi, ++ cpumask_t cpumask); ++ ++int ipipe_setscheduler_root(struct task_struct *p, ++ int policy, ++ int prio); ++ ++int ipipe_reenter_root(struct task_struct *prev, ++ int policy, ++ int prio); ++ ++int ipipe_alloc_ptdkey(void); ++ ++int ipipe_free_ptdkey(int key); ++ ++int ipipe_set_ptd(int key, ++ void *value); ++ ++void *ipipe_get_ptd(int key); ++ ++int ipipe_disable_ondemand_mappings(struct task_struct *tsk); ++ ++static inline void ipipe_nmi_enter(void) ++{ ++ int cpu = ipipe_processor_id(); ++ ++ per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status); ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ per_cpu(ipipe_saved_context_check_state, cpu) = ++ ipipe_disable_context_check(cpu); ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++} ++ ++static inline void ipipe_nmi_exit(void) ++{ ++ int cpu = ipipe_processor_id(); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ipipe_restore_context_check ++ (cpu, per_cpu(ipipe_saved_context_check_state, cpu)); ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++ if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu))) ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); ++} ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_init() do { } while(0) ++#define ipipe_suspend_domain() do { } while(0) ++#define ipipe_sigwake_notify(p) do { } while(0) ++#define ipipe_setsched_notify(p) do { } while(0) ++#define ipipe_init_notify(p) do { } while(0) ++#define ipipe_exit_notify(p) do { } while(0) ++#define ipipe_cleanup_notify(mm) do { } while(0) ++#define ipipe_trap_notify(t,r) 0 ++#define ipipe_init_proc() do { } while(0) ++ ++static inline void __ipipe_pin_range_globally(unsigned long start, ++ unsigned long end) ++{ ++} ++ ++static inline int ipipe_test_foreign_stack(void) ++{ ++ return 0; ++} ++ ++#define local_irq_enable_hw_cond() do { } while(0) ++#define local_irq_disable_hw_cond() do { } while(0) ++#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) ++#define local_irq_restore_hw_cond(flags) do { } while(0) ++#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) ++#define local_irq_restore_hw_smp(flags) do { } while(0) ++ ++#define ipipe_irq_lock(irq) do { } while(0) ++#define ipipe_irq_unlock(irq) do { } while(0) ++ ++#define __ipipe_root_domain_p 1 ++#define ipipe_root_domain_p 1 ++#define ipipe_safe_current current ++#define ipipe_processor_id() smp_processor_id() ++ ++#define ipipe_nmi_enter() do { } while (0) ++#define ipipe_nmi_exit() do { } while (0) ++ ++#define local_irq_disable_head() local_irq_disable() ++ ++#define local_irq_save_full(vflags, rflags) do { (void)(vflags); local_irq_save(rflags); } while(0) ++#define local_irq_restore_full(vflags, rflags) do { (void)(vflags); local_irq_restore(rflags); } while(0) ++#define local_irq_restore_nosync(vflags) local_irq_restore(vflags) ++ ++#endif /* CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_H */ +diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h +new file mode 100644 +index 0000000..ab2c970 +--- /dev/null ++++ b/include/linux/ipipe_base.h +@@ -0,0 +1,103 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_base.h ++ * ++ * Copyright (C) 2002-2007 Philippe Gerum. ++ * 2007 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_BASE_H ++#define __LINUX_IPIPE_BASE_H ++ ++#ifdef CONFIG_IPIPE ++ ++#include ++ ++/* Number of virtual IRQs */ ++#define IPIPE_NR_VIRQS BITS_PER_LONG ++/* First virtual IRQ # */ ++#define IPIPE_VIRQ_BASE (((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) * BITS_PER_LONG) ++/* Total number of IRQ slots */ ++#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS) ++/* Number of indirect words needed to map the whole IRQ space. */ ++#define IPIPE_IRQ_IWORDS ((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) ++#define IPIPE_IRQ_IMASK (BITS_PER_LONG - 1) ++#define IPIPE_IRQMASK_ANY (~0L) ++#define IPIPE_IRQMASK_VIRT (IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS_PER_LONG)) ++ ++/* Per-cpu pipeline status */ ++#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */ ++#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the domain */ ++#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack */ ++ ++#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) ++#define IPIPE_SYNC_MASK (1L << IPIPE_SYNC_FLAG) ++#define IPIPE_NOSTACK_MASK (1L << IPIPE_NOSTACK_FLAG) ++ ++typedef void (*ipipe_irq_handler_t)(unsigned irq, ++ void *cookie); ++ ++extern struct ipipe_domain ipipe_root; ++ ++#define ipipe_root_domain (&ipipe_root) ++ ++void __ipipe_unstall_root(void); ++ ++void __ipipe_restore_root(unsigned long x); ++ ++#define ipipe_preempt_disable(flags) \ ++ do { \ ++ local_irq_save_hw(flags); \ ++ if (__ipipe_root_domain_p) \ ++ preempt_disable(); \ ++ } while (0) ++ ++#define ipipe_preempt_enable(flags) \ ++ do { \ ++ if (__ipipe_root_domain_p) { \ ++ preempt_enable_no_resched(); \ ++ local_irq_restore_hw(flags); \ ++ preempt_check_resched(); \ ++ } else \ ++ local_irq_restore_hw(flags); \ ++ } while (0) ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++void ipipe_check_context(struct ipipe_domain *border_ipd); ++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { } ++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++/* Generic features */ ++ ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++#define __IPIPE_FEATURE_REQUEST_TICKDEV 1 ++#endif ++#define __IPIPE_FEATURE_DELAYED_ATOMICSW 1 ++#define __IPIPE_FEATURE_FASTPEND_IRQ 1 ++#define __IPIPE_FEATURE_TRACE_EVENT 1 ++ ++#else /* !CONFIG_IPIPE */ ++#define ipipe_preempt_disable(flags) do { \ ++ preempt_disable(); \ ++ (void)(flags); \ ++ } while (0) ++#define ipipe_preempt_enable(flags) preempt_enable() ++#define ipipe_check_context(ipd) do { } while(0) ++#endif /* CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_BASE_H */ +diff --git a/include/linux/ipipe_compat.h b/include/linux/ipipe_compat.h +new file mode 100644 +index 0000000..50a245c +--- /dev/null ++++ b/include/linux/ipipe_compat.h +@@ -0,0 +1,54 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_compat.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_COMPAT_H ++#define __LINUX_IPIPE_COMPAT_H ++ ++#ifdef CONFIG_IPIPE_COMPAT ++/* ++ * OBSOLETE: defined only for backward compatibility. Will be removed ++ * in future releases, please update client code accordingly. ++ */ ++ ++#ifdef CONFIG_SMP ++#define ipipe_declare_cpuid int cpuid ++#define ipipe_load_cpuid() do { \ ++ cpuid = ipipe_processor_id(); \ ++ } while(0) ++#define ipipe_lock_cpu(flags) do { \ ++ local_irq_save_hw(flags); \ ++ cpuid = ipipe_processor_id(); \ ++ } while(0) ++#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) ++#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags) ++#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags) ++#else /* !CONFIG_SMP */ ++#define ipipe_declare_cpuid const int cpuid = 0 ++#define ipipe_load_cpuid() do { } while(0) ++#define ipipe_lock_cpu(flags) local_irq_save_hw(flags) ++#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) ++#define ipipe_get_cpu(flags) do { (void)(flags); } while(0) ++#define ipipe_put_cpu(flags) do { } while(0) ++#endif /* CONFIG_SMP */ ++ ++#endif /* CONFIG_IPIPE_COMPAT */ ++ ++#endif /* !__LINUX_IPIPE_COMPAT_H */ +diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h +new file mode 100644 +index 0000000..b751d54 +--- /dev/null ++++ b/include/linux/ipipe_lock.h +@@ -0,0 +1,144 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_lock.h ++ * ++ * Copyright (C) 2009 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_LOCK_H ++#define __LINUX_IPIPE_LOCK_H ++ ++typedef struct { ++ raw_spinlock_t bare_lock; ++} __ipipe_spinlock_t; ++ ++#define ipipe_lock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) ++ ++#define common_lock_p(lock) \ ++ __builtin_types_compatible_p(typeof(lock), spinlock_t *) ++ ++#define bare_lock(lock) (&((__ipipe_spinlock_t *)(lock))->bare_lock) ++#define std_lock(lock) ((spinlock_t *)(lock)) ++ ++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) ++ ++extern int __bad_spinlock_type(void); ++#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ (flags) = __ipipe_spin_lock_irqsave(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ (flags) = _spin_lock_irqsave(std_lock(lock)); \ ++ else __bad_spinlock_type(); \ ++ } while (0) ++ ++#else /* !(CONFIG_SMP || CONFIG_DEBUG_SPINLOCK) */ ++ ++#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ (flags) = __ipipe_spin_lock_irqsave(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ _spin_lock_irqsave(std_lock(lock), flags); \ ++ } while (0) ++ ++#endif /* !(CONFIG_SMP || CONFIG_DEBUG_SPINLOCK) */ ++ ++#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ __ipipe_spin_unlock_irqrestore(bare_lock(lock), flags); \ ++ else if (common_lock_p(lock)) \ ++ _spin_unlock_irqrestore(std_lock(lock), flags); \ ++ } while (0) ++ ++#define PICK_SPINOP(op, lock) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ __raw_spin##op(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ _spin##op(std_lock(lock)); \ ++ } while (0) ++ ++#define PICK_SPINOP_IRQ(op, lock) \ ++ do { \ ++ if (ipipe_lock_p(lock)) \ ++ __ipipe_spin##op##_irq(bare_lock(lock)); \ ++ else if (common_lock_p(lock)) \ ++ _spin##op##_irq(std_lock(lock)); \ ++ } while (0) ++ ++#define __raw_spin_lock_init(lock) \ ++ do { \ ++ IPIPE_DEFINE_SPINLOCK(__lock__); \ ++ *((ipipe_spinlock_t *)lock) = __lock__; \ ++ } while (0) ++ ++#ifdef CONFIG_IPIPE ++ ++#define ipipe_spinlock_t __ipipe_spinlock_t ++#define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED ++#define IPIPE_DECLARE_SPINLOCK(x) extern ipipe_spinlock_t x ++#define IPIPE_SPIN_LOCK_UNLOCKED \ ++ (__ipipe_spinlock_t) { .bare_lock = __RAW_SPIN_LOCK_UNLOCKED } ++ ++#define spin_lock_irqsave_cond(lock, flags) \ ++ spin_lock_irqsave(lock, flags) ++ ++#define spin_unlock_irqrestore_cond(lock, flags) \ ++ spin_unlock_irqrestore(lock, flags) ++ ++void __ipipe_spin_lock_irq(raw_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irq(raw_spinlock_t *lock); ++ ++unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, ++ unsigned long x); ++ ++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); ++ ++void __ipipe_spin_unlock_irqcomplete(unsigned long x); ++ ++#else /* !CONFIG_IPIPE */ ++ ++#define ipipe_spinlock_t spinlock_t ++#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) ++#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x ++#define IPIPE_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED ++ ++#define spin_lock_irqsave_cond(lock, flags) \ ++ do { \ ++ (void)(flags); \ ++ spin_lock(lock); \ ++ } while(0) ++ ++#define spin_unlock_irqrestore_cond(lock, flags) \ ++ spin_unlock(lock) ++ ++#define __ipipe_spin_lock_irq(lock) do { } while (0) ++#define __ipipe_spin_unlock_irq(lock) do { } while (0) ++#define __ipipe_spin_lock_irqsave(lock) 0 ++#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) ++#define __ipipe_spin_unlock_irqbegin(lock) do { } while (0) ++#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) ++ ++#endif /* !CONFIG_IPIPE */ ++ ++#endif /* !__LINUX_IPIPE_LOCK_H */ +diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h +new file mode 100644 +index 0000000..4d83119 +--- /dev/null ++++ b/include/linux/ipipe_percpu.h +@@ -0,0 +1,86 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_percpu.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_PERCPU_H ++#define __LINUX_IPIPE_PERCPU_H ++ ++#include ++#include ++ ++struct ipipe_domain; ++ ++struct ipipe_percpu_domain_data { ++ unsigned long status; /* <= Must be first in struct. */ ++ unsigned long irqpend_himask; ++ unsigned long irqpend_lomask[IPIPE_IRQ_IWORDS]; ++ unsigned long irqheld_mask[IPIPE_IRQ_IWORDS]; ++ unsigned long irqall[IPIPE_NR_IRQS]; ++ u64 evsync; ++}; ++ ++/* ++ * CAREFUL: all accessors based on __raw_get_cpu_var() you may find in ++ * this file should be used only while hw interrupts are off, to ++ * prevent from CPU migration regardless of the running domain. ++ */ ++#ifdef CONFIG_SMP ++#define ipipe_percpudom_ptr(ipd, cpu) \ ++ (&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot]) ++#define ipipe_cpudom_ptr(ipd) \ ++ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot]) ++#else ++DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]); ++#define ipipe_percpudom_ptr(ipd, cpu) \ ++ (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]) ++#define ipipe_cpudom_ptr(ipd) \ ++ (__ipipe_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]) ++#endif ++#define ipipe_percpudom(ipd, var, cpu) (ipipe_percpudom_ptr(ipd, cpu)->var) ++#define ipipe_cpudom_var(ipd, var) (ipipe_cpudom_ptr(ipd)->var) ++ ++#define IPIPE_ROOT_SLOT 0 ++#define IPIPE_HEAD_SLOT (CONFIG_IPIPE_DOMAINS - 1) ++ ++DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]); ++ ++DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); ++ ++DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root); ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++DECLARE_PER_CPU(int, ipipe_percpu_context_check); ++DECLARE_PER_CPU(int, ipipe_saved_context_check_state); ++#endif ++ ++#define ipipe_root_cpudom_ptr(var) \ ++ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT]) ++ ++#define ipipe_root_cpudom_var(var) ipipe_root_cpudom_ptr()->var ++ ++#define ipipe_this_cpudom_var(var) \ ++ ipipe_cpudom_var(__ipipe_current_domain, var) ++ ++#define ipipe_head_cpudom_ptr() \ ++ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT]) ++ ++#define ipipe_head_cpudom_var(var) ipipe_head_cpudom_ptr()->var ++ ++#endif /* !__LINUX_IPIPE_PERCPU_H */ +diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h +new file mode 100644 +index 0000000..4a1cb1b +--- /dev/null ++++ b/include/linux/ipipe_tickdev.h +@@ -0,0 +1,58 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_tickdev.h ++ * ++ * Copyright (C) 2007 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef __LINUX_IPIPE_TICKDEV_H ++#define __LINUX_IPIPE_TICKDEV_H ++ ++#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS) ++ ++#include ++ ++struct tick_device; ++ ++struct ipipe_tick_device { ++ ++ void (*emul_set_mode)(enum clock_event_mode, ++ struct clock_event_device *cdev); ++ int (*emul_set_tick)(unsigned long delta, ++ struct clock_event_device *cdev); ++ void (*real_set_mode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev); ++ int (*real_set_tick)(unsigned long delta, ++ struct clock_event_device *cdev); ++ struct tick_device *slave; ++ unsigned long real_max_delta_ns; ++ unsigned long real_mult; ++ int real_shift; ++}; ++ ++int ipipe_request_tickdev(const char *devname, ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev), ++ int (*emutick)(unsigned long evt, ++ struct clock_event_device *cdev), ++ int cpu, unsigned long *tmfreq); ++ ++void ipipe_release_tickdev(int cpu); ++ ++#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */ ++ ++#endif /* !__LINUX_IPIPE_TICKDEV_H */ +diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h +new file mode 100644 +index 0000000..627b354 +--- /dev/null ++++ b/include/linux/ipipe_trace.h +@@ -0,0 +1,72 @@ ++/* -*- linux-c -*- ++ * include/linux/ipipe_trace.h ++ * ++ * Copyright (C) 2005 Luotao Fu. ++ * 2005-2007 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _LINUX_IPIPE_TRACE_H ++#define _LINUX_IPIPE_TRACE_H ++ ++#ifdef CONFIG_IPIPE_TRACE ++ ++#include ++ ++void ipipe_trace_begin(unsigned long v); ++void ipipe_trace_end(unsigned long v); ++void ipipe_trace_freeze(unsigned long v); ++void ipipe_trace_special(unsigned char special_id, unsigned long v); ++void ipipe_trace_pid(pid_t pid, short prio); ++void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); ++int ipipe_trace_max_reset(void); ++int ipipe_trace_frozen_reset(void); ++ ++#else /* !CONFIG_IPIPE_TRACE */ ++ ++#define ipipe_trace_begin(v) do { (void)(v); } while(0) ++#define ipipe_trace_end(v) do { (void)(v); } while(0) ++#define ipipe_trace_freeze(v) do { (void)(v); } while(0) ++#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) ++#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) ++#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) ++#define ipipe_trace_max_reset() do { } while(0) ++#define ipipe_trace_froze_reset() do { } while(0) ++ ++#endif /* !CONFIG_IPIPE_TRACE */ ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++void ipipe_trace_panic_freeze(void); ++void ipipe_trace_panic_dump(void); ++#else ++static inline void ipipe_trace_panic_freeze(void) { } ++static inline void ipipe_trace_panic_dump(void) { } ++#endif ++ ++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF ++#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) ++#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) ++#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) ++#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) ++#else ++#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) ++#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) ++#define ipipe_trace_irqsoff() do { } while(0) ++#define ipipe_trace_irqson() do { } while(0) ++#endif ++ ++#endif /* !__LINUX_IPIPE_TRACE_H */ +diff --git a/include/linux/irq.h b/include/linux/irq.h +index ae9653d..23e446a 100644 +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -124,6 +124,9 @@ struct irq_chip { + void (*end)(unsigned int irq); + int (*set_affinity)(unsigned int irq, + const struct cpumask *dest); ++#ifdef CONFIG_IPIPE ++ void (*move)(unsigned int irq); ++#endif /* CONFIG_IPIPE */ + int (*retrigger)(unsigned int irq); + int (*set_type)(unsigned int irq, unsigned int flow_type); + int (*set_wake)(unsigned int irq, unsigned int on); +@@ -173,6 +176,12 @@ struct irq_2_iommu; + * @name: flow handler name for /proc/interrupts output + */ + struct irq_desc { ++#ifdef CONFIG_IPIPE ++ void (*ipipe_ack)(unsigned int irq, ++ struct irq_desc *desc); ++ void (*ipipe_end)(unsigned int irq, ++ struct irq_desc *desc); ++#endif /* CONFIG_IPIPE */ + unsigned int irq; + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; +@@ -346,6 +355,10 @@ extern void + set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, + irq_flow_handler_t handle, const char *name); + ++extern irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, ++ int is_chained); ++ + extern void + __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name); +@@ -357,6 +370,7 @@ static inline void __set_irq_handler_unlocked(int irq, + struct irq_desc *desc; + + desc = irq_to_desc(irq); ++ handler = __fixup_irq_handler(desc, handler, 0); + desc->handle_irq = handler; + } + +diff --git a/include/linux/kernel.h b/include/linux/kernel.h +index f4e3184..3b80b7b 100644 +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -119,9 +120,12 @@ struct user; + + #ifdef CONFIG_PREEMPT_VOLUNTARY + extern int _cond_resched(void); +-# define might_resched() _cond_resched() ++# define might_resched() do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ _cond_resched(); \ ++ } while (0) + #else +-# define might_resched() do { } while (0) ++# define might_resched() ipipe_check_context(ipipe_root_domain) + #endif + + #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 72b1a10..80553be 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -9,13 +9,20 @@ + #include + #include + #include ++#include + + #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) + extern void add_preempt_count(int val); + extern void sub_preempt_count(int val); + #else +-# define add_preempt_count(val) do { preempt_count() += (val); } while (0) +-# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) ++# define add_preempt_count(val) do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ preempt_count() += (val); \ ++ } while (0) ++# define sub_preempt_count(val) do { \ ++ ipipe_check_context(ipipe_root_domain); \ ++ preempt_count() -= (val); \ ++ } while (0) + #endif + + #define inc_preempt_count() add_preempt_count(1) +diff --git a/include/linux/sched.h b/include/linux/sched.h +index c4496d9..9794fa7 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -61,6 +61,7 @@ struct sched_param { + #include + #include + #include ++#include + + #include + #include +@@ -195,6 +196,13 @@ extern unsigned long long time_sync_thresh; + #define TASK_DEAD 64 + #define TASK_WAKEKILL 128 + #define TASK_WAKING 256 ++#ifdef CONFIG_IPIPE ++#define TASK_ATOMICSWITCH 512 ++#define TASK_NOWAKEUP 1024 ++#else /* !CONFIG_IPIPE */ ++#define TASK_ATOMICSWITCH 0 ++#define TASK_NOWAKEUP 0 ++#endif /* CONFIG_IPIPE */ + + /* Convenience macros for the sake of set_task_state */ + #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +@@ -302,6 +310,15 @@ extern void trap_init(void); + extern void update_process_times(int user); + extern void scheduler_tick(void); + ++#ifdef CONFIG_IPIPE ++void update_root_process_times(struct pt_regs *regs); ++#else /* !CONFIG_IPIPE */ ++static inline void update_root_process_times(struct pt_regs *regs) ++{ ++ update_process_times(user_mode(regs)); ++} ++#endif /* CONFIG_IPIPE */ ++ + extern void sched_show_task(struct task_struct *p); + + #ifdef CONFIG_DETECT_SOFTLOCKUP +@@ -349,8 +366,8 @@ extern signed long schedule_timeout(signed long timeout); + extern signed long schedule_timeout_interruptible(signed long timeout); + extern signed long schedule_timeout_killable(signed long timeout); + extern signed long schedule_timeout_uninterruptible(signed long timeout); +-asmlinkage void __schedule(void); +-asmlinkage void schedule(void); ++asmlinkage int __schedule(void); ++asmlinkage int schedule(void); + extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); + + struct nsproxy; +@@ -475,6 +492,9 @@ extern int get_dumpable(struct mm_struct *mm); + #endif + /* leave room for more dump flags */ + #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ ++#ifdef CONFIG_IPIPE ++#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */ ++#endif + + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) + +@@ -1497,6 +1517,9 @@ struct task_struct { + #endif + atomic_t fs_excl; /* holding fs exclusive resources */ + struct rcu_head rcu; ++#ifdef CONFIG_IPIPE ++ void *ptd[IPIPE_ROOT_NPTDKEYS]; ++#endif + + /* + * cache last used pipe for splice +@@ -1740,6 +1763,11 @@ extern cputime_t task_gtime(struct task_struct *p); + #define PF_EXITING 0x00000004 /* getting shut down */ + #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ + #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ ++#ifdef CONFIG_IPIPE ++#define PF_EVNOTIFY 0x00000020 /* Notify other domains about internal events */ ++#else ++#define PF_EVNOTIFY 0 ++#endif /* CONFIG_IPIPE */ + #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ + #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ + #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h +index f0ca7a7..3096642 100644 +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -90,10 +90,12 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); + # include + #endif + ++#include ++ + #ifdef CONFIG_DEBUG_SPINLOCK + extern void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key); +-# define spin_lock_init(lock) \ ++# define _spin_lock_init(lock) \ + do { \ + static struct lock_class_key __key; \ + \ +@@ -101,10 +103,12 @@ do { \ + } while (0) + + #else +-# define spin_lock_init(lock) \ ++# define _spin_lock_init(lock) \ + do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) + #endif + ++# define spin_lock_init(lock) PICK_SPINOP(_lock_init, lock) ++ + #ifdef CONFIG_DEBUG_SPINLOCK + extern void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key); +@@ -186,7 +190,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + #define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) + #define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) + +-#define spin_lock(lock) _spin_lock(lock) ++#define spin_lock(lock) PICK_SPINOP(_lock, lock) + + #ifdef CONFIG_DEBUG_LOCK_ALLOC + # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) +@@ -208,7 +212,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + #define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ +- flags = _spin_lock_irqsave(lock); \ ++ PICK_SPINLOCK_IRQSAVE(lock, flags); \ + } while (0) + #define read_lock_irqsave(lock, flags) \ + do { \ +@@ -240,7 +244,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + #define spin_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ +- _spin_lock_irqsave(lock, flags); \ ++ PICK_SPINLOCK_IRQSAVE(lock, flags); \ + } while (0) + #define read_lock_irqsave(lock, flags) \ + do { \ +@@ -257,23 +261,23 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } + + #endif + +-#define spin_lock_irq(lock) _spin_lock_irq(lock) ++#define spin_lock_irq(lock) PICK_SPINOP_IRQ(_lock, lock) + #define spin_lock_bh(lock) _spin_lock_bh(lock) + #define read_lock_irq(lock) _read_lock_irq(lock) + #define read_lock_bh(lock) _read_lock_bh(lock) + #define write_lock_irq(lock) _write_lock_irq(lock) + #define write_lock_bh(lock) _write_lock_bh(lock) +-#define spin_unlock(lock) _spin_unlock(lock) ++#define spin_unlock(lock) PICK_SPINOP(_unlock, lock) + #define read_unlock(lock) _read_unlock(lock) + #define write_unlock(lock) _write_unlock(lock) +-#define spin_unlock_irq(lock) _spin_unlock_irq(lock) ++#define spin_unlock_irq(lock) PICK_SPINOP_IRQ(_unlock, lock) + #define read_unlock_irq(lock) _read_unlock_irq(lock) + #define write_unlock_irq(lock) _write_unlock_irq(lock) + + #define spin_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ +- _spin_unlock_irqrestore(lock, flags); \ ++ PICK_SPINUNLOCK_IRQRESTORE(lock, flags); \ + } while (0) + #define spin_unlock_bh(lock) _spin_unlock_bh(lock) + +diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h +index 7a7e18f..190bc0a 100644 +--- a/include/linux/spinlock_api_smp.h ++++ b/include/linux/spinlock_api_smp.h +@@ -229,7 +229,9 @@ static inline int __write_trylock(rwlock_t *lock) + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + + static inline void __read_lock(rwlock_t *lock) + { +@@ -250,7 +252,7 @@ static inline unsigned long __spin_lock_irqsave(spinlock_t *lock) + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +-#ifdef CONFIG_LOCKDEP ++#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + #else + _raw_spin_lock_flags(lock, &flags); +diff --git a/init/Kconfig b/init/Kconfig +index bbf7773..8df8b6d 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -78,6 +78,7 @@ config INIT_ENV_ARG_LIMIT + + config LOCALVERSION + string "Local version - append to kernel release" ++ default "-ipipe" + help + Append an extra string to the end of your kernel version. + This will show up when you type uname, for example. +diff --git a/init/main.c b/init/main.c +index 1fdfd0e..a3c862f 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -536,7 +536,7 @@ asmlinkage void __init start_kernel(void) + + cgroup_init_early(); + +- local_irq_disable(); ++ local_irq_disable_hw(); + early_boot_irqs_off(); + early_init_irq_lock_class(); + +@@ -602,6 +602,11 @@ asmlinkage void __init start_kernel(void) + softirq_init(); + timekeeping_init(); + time_init(); ++ /* ++ * We need to wait for the interrupt and time subsystems to be ++ * initialized before enabling the pipeline. ++ */ ++ ipipe_init(); + profile_init(); + if (!irqs_disabled()) + printk(KERN_CRIT "start_kernel(): bug: interrupts were " +@@ -783,6 +788,7 @@ static void __init do_basic_setup(void) + init_tmpfs(); + driver_init(); + init_irq_proc(); ++ ipipe_init_proc(); + do_ctors(); + do_initcalls(); + } +diff --git a/kernel/Makefile b/kernel/Makefile +index d7c13d2..b6a84ee 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -83,6 +83,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o + obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o + obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o + obj-$(CONFIG_RELAY) += relay.o ++obj-$(CONFIG_IPIPE) += ipipe/ + obj-$(CONFIG_SYSCTL) += utsname_sysctl.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +diff --git a/kernel/exit.c b/kernel/exit.c +index f7864ac..f5c3129 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -963,6 +963,7 @@ NORET_TYPE void do_exit(long code) + acct_process(); + trace_sched_process_exit(tsk); + ++ ipipe_exit_notify(tsk); + exit_sem(tsk); + exit_files(tsk); + exit_fs(tsk); +diff --git a/kernel/fork.c b/kernel/fork.c +index cbd6989..3d44cd6 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -540,6 +540,7 @@ void mmput(struct mm_struct *mm) + exit_aio(mm); + ksm_exit(mm); + exit_mmap(mm); ++ ipipe_cleanup_notify(mm); + set_mm_exe_file(mm, NULL); + if (!list_empty(&mm->mmlist)) { + spin_lock(&mmlist_lock); +@@ -947,7 +948,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) + { + unsigned long new_flags = p->flags; + +- new_flags &= ~PF_SUPERPRIV; ++ new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY); + new_flags |= PF_FORKNOEXEC; + new_flags |= PF_STARTING; + p->flags = new_flags; +@@ -1332,6 +1333,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, + write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); + cgroup_post_fork(p); ++#ifdef CONFIG_IPIPE ++ memset(p->ptd, 0, sizeof(p->ptd)); ++#endif /* CONFIG_IPIPE */ + perf_event_fork(p); + return p; + +@@ -1740,11 +1744,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) + } + + if (new_mm) { ++ unsigned long flags; + mm = current->mm; + active_mm = current->active_mm; + current->mm = new_mm; ++ ipipe_mm_switch_protect(flags); + current->active_mm = new_mm; + activate_mm(active_mm, new_mm); ++ ipipe_mm_switch_unprotect(flags); + new_mm = mm; + } + +diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig +new file mode 100644 +index 0000000..de5e6a3 +--- /dev/null ++++ b/kernel/ipipe/Kconfig +@@ -0,0 +1,35 @@ ++config IPIPE ++ bool "Interrupt pipeline" ++ default y ++ ---help--- ++ Activate this option if you want the interrupt pipeline to be ++ compiled in. ++ ++config IPIPE_DOMAINS ++ int "Max domains" ++ depends on IPIPE ++ default 4 ++ ---help--- ++ The maximum number of I-pipe domains to run concurrently. ++ ++config IPIPE_COMPAT ++ bool "Maintain code compatibility with older releases" ++ depends on IPIPE ++ default y ++ ---help--- ++ Activate this option if you want the compatibility code to be ++ defined, so that older I-pipe clients may use obsolete ++ constructs. WARNING: obsolete code will be eventually ++ deprecated in future I-pipe releases, and removed from the ++ compatibility support as time passes. Please fix I-pipe ++ clients to get rid of such uses as soon as possible. ++ ++config IPIPE_DELAYED_ATOMICSW ++ bool ++ depends on IPIPE ++ default n ++ ++config IPIPE_UNMASKED_CONTEXT_SWITCH ++ bool ++ depends on IPIPE ++ default n +diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug +new file mode 100644 +index 0000000..629c894 +--- /dev/null ++++ b/kernel/ipipe/Kconfig.debug +@@ -0,0 +1,97 @@ ++config IPIPE_DEBUG ++ bool "I-pipe debugging" ++ depends on IPIPE ++ ++config IPIPE_DEBUG_CONTEXT ++ bool "Check for illicit cross-domain calls" ++ depends on IPIPE_DEBUG ++ default y ++ ---help--- ++ Enable this feature to arm checkpoints in the kernel that ++ verify the correct invocation context. On entry of critical ++ Linux services a warning is issued if the caller is not ++ running over the root domain. ++ ++config IPIPE_DEBUG_INTERNAL ++ bool "Enable internal debug checks" ++ depends on IPIPE_DEBUG ++ default y ++ ---help--- ++ When this feature is enabled, I-pipe will perform internal ++ consistency checks of its subsystems, e.g. on per-cpu variable ++ access. ++ ++config IPIPE_TRACE ++ bool "Latency tracing" ++ depends on IPIPE_DEBUG ++ select FRAME_POINTER ++ select KALLSYMS ++ select PROC_FS ++ ---help--- ++ Activate this option if you want to use per-function tracing of ++ the kernel. The tracer will collect data via instrumentation ++ features like the one below or with the help of explicite calls ++ of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the ++ in-kernel tracing API. The collected data and runtime control ++ is available via /proc/ipipe/trace/*. ++ ++if IPIPE_TRACE ++ ++config IPIPE_TRACE_ENABLE ++ bool "Enable tracing on boot" ++ default y ++ ---help--- ++ Disable this option if you want to arm the tracer after booting ++ manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce ++ boot time on slow embedded devices due to the tracer overhead. ++ ++config IPIPE_TRACE_MCOUNT ++ bool "Instrument function entries" ++ default y ++ select FUNCTION_TRACER ++ select TRACING ++ select CONTEXT_SWITCH_TRACER ++ select FTRACE_MCOUNT_RECORD ++ select DYNAMIC_FTRACE ++ ---help--- ++ When enabled, records every kernel function entry in the tracer ++ log. While this slows down the system noticeably, it provides ++ the highest level of information about the flow of events. ++ However, it can be switch off in order to record only explicit ++ I-pipe trace points. ++ ++config IPIPE_TRACE_IRQSOFF ++ bool "Trace IRQs-off times" ++ default y ++ ---help--- ++ Activate this option if I-pipe shall trace the longest path ++ with hard-IRQs switched off. ++ ++config IPIPE_TRACE_SHIFT ++ int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" ++ range 10 18 ++ default 14 ++ ---help--- ++ The number of trace points to hold tracing data for each ++ trace path, as a power of 2. ++ ++config IPIPE_TRACE_VMALLOC ++ bool "Use vmalloc'ed trace buffer" ++ default y if EMBEDDED ++ ---help--- ++ Instead of reserving static kernel data, the required buffer ++ is allocated via vmalloc during boot-up when this option is ++ enabled. This can help to start systems that are low on memory, ++ but it slightly degrades overall performance. Try this option ++ when a traced kernel hangs unexpectedly at boot time. ++ ++config IPIPE_TRACE_PANIC ++ bool "Enable panic back traces" ++ default y ++ ---help--- ++ Provides services to freeze and dump a back trace on panic ++ situations. This is used on IPIPE_DEBUG_CONTEXT exceptions ++ as well as ordinary kernel oopses. You can control the number ++ of printed back trace points via /proc/ipipe/trace. ++ ++endif +diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile +new file mode 100644 +index 0000000..6257dfa +--- /dev/null ++++ b/kernel/ipipe/Makefile +@@ -0,0 +1,3 @@ ++ ++obj-$(CONFIG_IPIPE) += core.o ++obj-$(CONFIG_IPIPE_TRACE) += tracer.o +diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c +new file mode 100644 +index 0000000..50d8d23 +--- /dev/null ++++ b/kernel/ipipe/core.c +@@ -0,0 +1,1802 @@ ++/* -*- linux-c -*- ++ * linux/kernel/ipipe/core.c ++ * ++ * Copyright (C) 2002-2005 Philippe Gerum. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Architecture-independent I-PIPE core support. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#ifdef CONFIG_PROC_FS ++#include ++#include ++#endif /* CONFIG_PROC_FS */ ++#include ++#include ++#include ++ ++static int __ipipe_ptd_key_count; ++ ++static unsigned long __ipipe_ptd_key_map; ++ ++static unsigned long __ipipe_domain_slot_map; ++ ++struct ipipe_domain ipipe_root; ++ ++#ifndef CONFIG_SMP ++/* ++ * Create an alias to the unique root status, so that arch-dep code ++ * may get simple and easy access to this percpu variable. We also ++ * create an array of pointers to the percpu domain data; this tends ++ * to produce a better code when reaching non-root domains. We make ++ * sure that the early boot code would be able to dereference the ++ * pointer to the root domain data safely by statically initializing ++ * its value (local_irq*() routines depend on this). ++ */ ++#if __GNUC__ >= 4 ++extern unsigned long __ipipe_root_status ++__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray))))); ++EXPORT_SYMBOL(__ipipe_root_status); ++#else /* __GNUC__ < 4 */ ++/* ++ * Work around a GCC 3.x issue making alias symbols unusable as ++ * constant initializers. ++ */ ++unsigned long *const __ipipe_root_status_addr = ++ &__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status; ++EXPORT_SYMBOL(__ipipe_root_status_addr); ++#endif /* __GNUC__ < 4 */ ++ ++DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) = ++{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) }; ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr); ++#endif /* !CONFIG_SMP */ ++ ++DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) = ++{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */ ++ ++DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root }; ++ ++DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */ ++ ++static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock); ++ ++LIST_HEAD(__ipipe_pipeline); ++ ++unsigned long __ipipe_virtual_irq_map; ++ ++#ifdef CONFIG_PRINTK ++unsigned __ipipe_printk_virq; ++#endif /* CONFIG_PRINTK */ ++ ++int __ipipe_event_monitors[IPIPE_NR_EVENTS]; ++ ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++ ++DECLARE_PER_CPU(struct tick_device, tick_cpu_device); ++ ++static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device); ++ ++int ipipe_request_tickdev(const char *devname, ++ void (*emumode)(enum clock_event_mode mode, ++ struct clock_event_device *cdev), ++ int (*emutick)(unsigned long delta, ++ struct clock_event_device *cdev), ++ int cpu, unsigned long *tmfreq) ++{ ++ struct ipipe_tick_device *itd; ++ struct tick_device *slave; ++ struct clock_event_device *evtdev; ++ unsigned long long freq; ++ unsigned long flags; ++ int status; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ itd = &per_cpu(ipipe_tick_cpu_device, cpu); ++ ++ if (itd->slave != NULL) { ++ status = -EBUSY; ++ goto out; ++ } ++ ++ slave = &per_cpu(tick_cpu_device, cpu); ++ ++ if (strcmp(slave->evtdev->name, devname)) { ++ /* ++ * No conflict so far with the current tick device, ++ * check whether the requested device is sane and has ++ * been blessed by the kernel. ++ */ ++ status = __ipipe_check_tickdev(devname) ? ++ CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN; ++ goto out; ++ } ++ ++ /* ++ * Our caller asks for using the same clock event device for ++ * ticking than we do, let's create a tick emulation device to ++ * interpose on the set_next_event() method, so that we may ++ * both manage the device in oneshot mode. Only the tick ++ * emulation code will actually program the clockchip hardware ++ * for the next shot, though. ++ * ++ * CAUTION: we still have to grab the tick device even when it ++ * current runs in periodic mode, since the kernel may switch ++ * to oneshot dynamically (highres/no_hz tick mode). ++ */ ++ ++ evtdev = slave->evtdev; ++ status = evtdev->mode; ++ ++ if (status == CLOCK_EVT_MODE_SHUTDOWN) ++ goto out; ++ ++ itd->slave = slave; ++ itd->emul_set_mode = emumode; ++ itd->emul_set_tick = emutick; ++ itd->real_set_mode = evtdev->set_mode; ++ itd->real_set_tick = evtdev->set_next_event; ++ itd->real_max_delta_ns = evtdev->max_delta_ns; ++ itd->real_mult = evtdev->mult; ++ itd->real_shift = evtdev->shift; ++ freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; ++ *tmfreq = (unsigned long)freq; ++ evtdev->set_mode = emumode; ++ evtdev->set_next_event = emutick; ++ evtdev->max_delta_ns = ULONG_MAX; ++ evtdev->mult = 1; ++ evtdev->shift = 0; ++out: ++ ipipe_critical_exit(flags); ++ ++ return status; ++} ++ ++void ipipe_release_tickdev(int cpu) ++{ ++ struct ipipe_tick_device *itd; ++ struct tick_device *slave; ++ struct clock_event_device *evtdev; ++ unsigned long flags; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ itd = &per_cpu(ipipe_tick_cpu_device, cpu); ++ ++ if (itd->slave != NULL) { ++ slave = &per_cpu(tick_cpu_device, cpu); ++ evtdev = slave->evtdev; ++ evtdev->set_mode = itd->real_set_mode; ++ evtdev->set_next_event = itd->real_set_tick; ++ evtdev->max_delta_ns = itd->real_max_delta_ns; ++ evtdev->mult = itd->real_mult; ++ evtdev->shift = itd->real_shift; ++ itd->slave = NULL; ++ } ++ ++ ipipe_critical_exit(flags); ++} ++ ++#endif /* CONFIG_GENERIC_CLOCKEVENTS */ ++ ++/* ++ * ipipe_init() -- Initialization routine of the IPIPE layer. Called ++ * by the host kernel early during the boot procedure. ++ */ ++void __init ipipe_init(void) ++{ ++ struct ipipe_domain *ipd = &ipipe_root; ++ ++ __ipipe_check_platform(); /* Do platform dependent checks first. */ ++ ++ /* ++ * A lightweight registration code for the root domain. We are ++ * running on the boot CPU, hw interrupts are off, and ++ * secondary CPUs are still lost in space. ++ */ ++ ++ /* Reserve percpu data slot #0 for the root domain. */ ++ ipd->slot = 0; ++ set_bit(0, &__ipipe_domain_slot_map); ++ ++ ipd->name = "Linux"; ++ ipd->domid = IPIPE_ROOT_ID; ++ ipd->priority = IPIPE_ROOT_PRIO; ++ ++ __ipipe_init_stage(ipd); ++ ++ INIT_LIST_HEAD(&ipd->p_link); ++ list_add_tail(&ipd->p_link, &__ipipe_pipeline); ++ ++ __ipipe_init_platform(); ++ ++#ifdef CONFIG_PRINTK ++ __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */ ++ ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk; ++ ipd->irqs[__ipipe_printk_virq].cookie = NULL; ++ ipd->irqs[__ipipe_printk_virq].acknowledge = NULL; ++ ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; ++#endif /* CONFIG_PRINTK */ ++ ++ __ipipe_enable_pipeline(); ++ ++ printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", ++ IPIPE_VERSION_STRING); ++} ++ ++void __ipipe_init_stage(struct ipipe_domain *ipd) ++{ ++ int cpu, n; ++ ++ for_each_online_cpu(cpu) { ++ ++ ipipe_percpudom(ipd, irqpend_himask, cpu) = 0; ++ ++ for (n = 0; n < IPIPE_IRQ_IWORDS; n++) { ++ ipipe_percpudom(ipd, irqpend_lomask, cpu)[n] = 0; ++ ipipe_percpudom(ipd, irqheld_mask, cpu)[n] = 0; ++ } ++ ++ for (n = 0; n < IPIPE_NR_IRQS; n++) ++ ipipe_percpudom(ipd, irqall, cpu)[n] = 0; ++ ++ ipipe_percpudom(ipd, evsync, cpu) = 0; ++ } ++ ++ for (n = 0; n < IPIPE_NR_IRQS; n++) { ++ ipd->irqs[n].acknowledge = NULL; ++ ipd->irqs[n].handler = NULL; ++ ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */ ++ } ++ ++ for (n = 0; n < IPIPE_NR_EVENTS; n++) ++ ipd->evhand[n] = NULL; ++ ++ ipd->evself = 0LL; ++ mutex_init(&ipd->mutex); ++ ++ __ipipe_hook_critical_ipi(ipd); ++} ++ ++void __ipipe_cleanup_domain(struct ipipe_domain *ipd) ++{ ++ ipipe_unstall_pipeline_from(ipd); ++ ++#ifdef CONFIG_SMP ++ { ++ int cpu; ++ ++ for_each_online_cpu(cpu) { ++ while (ipipe_percpudom(ipd, irqpend_himask, cpu) != 0) ++ cpu_relax(); ++ } ++ } ++#else ++ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL; ++#endif ++ ++ clear_bit(ipd->slot, &__ipipe_domain_slot_map); ++} ++ ++void __ipipe_unstall_root(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ ++ local_irq_disable_hw(); ++ ++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL ++ /* This helps catching bad usage from assembly call sites. */ ++ BUG_ON(!__ipipe_root_domain_p); ++#endif ++ ++ p = ipipe_root_cpudom_ptr(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(p->irqpend_himask != 0)) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ ++ local_irq_enable_hw(); ++} ++ ++void __ipipe_restore_root(unsigned long x) ++{ ++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL ++ BUG_ON(!ipipe_root_domain_p); ++#endif ++ ++ if (x) ++ __ipipe_stall_root(); ++ else ++ __ipipe_unstall_root(); ++} ++ ++void ipipe_stall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ unsigned long flags; ++ /* ++ * We have to prevent against race on updating the status ++ * variable _and_ CPU migration at the same time, so disable ++ * hw IRQs here. ++ */ ++ local_irq_save_hw(flags); ++ ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++ ++ if (!__ipipe_pipeline_head_p(ipd)) ++ local_irq_restore_hw(flags); ++} ++ ++unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ unsigned long flags, x; ++ ++ /* See ipipe_stall_pipeline_from() */ ++ local_irq_save_hw(flags); ++ ++ x = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++ ++ if (!__ipipe_pipeline_head_p(ipd)) ++ local_irq_restore_hw(flags); ++ ++ return x; ++} ++ ++unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd) ++{ ++ unsigned long flags, x; ++ struct list_head *pos; ++ ++ local_irq_save_hw(flags); ++ ++ x = __test_and_clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); ++ ++ if (ipd == __ipipe_current_domain) ++ pos = &ipd->p_link; ++ else ++ pos = __ipipe_pipeline.next; ++ ++ __ipipe_walk_pipeline(pos); ++ ++ if (likely(__ipipe_pipeline_head_p(ipd))) ++ local_irq_enable_hw(); ++ else ++ local_irq_restore_hw(flags); ++ ++ return x; ++} ++ ++void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, ++ unsigned long x) ++{ ++ if (x) ++ ipipe_stall_pipeline_from(ipd); ++ else ++ ipipe_unstall_pipeline_from(ipd); ++} ++ ++void ipipe_unstall_pipeline_head(void) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); ++ ++ local_irq_disable_hw(); ++ ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (unlikely(p->irqpend_himask != 0)) { ++ struct ipipe_domain *head_domain = __ipipe_pipeline_head(); ++ if (likely(head_domain == __ipipe_current_domain)) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ else ++ __ipipe_walk_pipeline(&head_domain->p_link); ++ } ++ ++ local_irq_enable_hw(); ++} ++ ++void __ipipe_restore_pipeline_head(unsigned long x) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); ++ ++ local_irq_disable_hw(); ++ ++ if (x) { ++#ifdef CONFIG_DEBUG_KERNEL ++ static int warned; ++ if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { ++ /* ++ * Already stalled albeit ipipe_restore_pipeline_head() ++ * should have detected it? Send a warning once. ++ */ ++ warned = 1; ++ printk(KERN_WARNING ++ "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); ++ dump_stack(); ++ } ++#else /* !CONFIG_DEBUG_KERNEL */ ++ set_bit(IPIPE_STALL_FLAG, &p->status); ++#endif /* CONFIG_DEBUG_KERNEL */ ++ } ++ else { ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ if (unlikely(p->irqpend_himask != 0)) { ++ struct ipipe_domain *head_domain = __ipipe_pipeline_head(); ++ if (likely(head_domain == __ipipe_current_domain)) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ else ++ __ipipe_walk_pipeline(&head_domain->p_link); ++ } ++ local_irq_enable_hw(); ++ } ++} ++ ++void __ipipe_spin_lock_irq(raw_spinlock_t *lock) ++{ ++ local_irq_disable_hw(); ++ __raw_spin_lock(lock); ++ __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++} ++ ++void __ipipe_spin_unlock_irq(raw_spinlock_t *lock) ++{ ++ __raw_spin_unlock(lock); ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ local_irq_enable_hw(); ++} ++ ++unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock) ++{ ++ unsigned long flags; ++ int s; ++ ++ local_irq_save_hw(flags); ++ __raw_spin_lock(lock); ++ s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ ++ return raw_mangle_irq_bits(s, flags); ++} ++ ++void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x) ++{ ++ __raw_spin_unlock(lock); ++ if (!raw_demangle_irq_bits(&x)) ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ local_irq_restore_hw(x); ++} ++ ++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) ++{ ++ __raw_spin_unlock(&lock->bare_lock); ++} ++ ++void __ipipe_spin_unlock_irqcomplete(unsigned long x) ++{ ++ if (!raw_demangle_irq_bits(&x)) ++ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); ++ local_irq_restore_hw(x); ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq) ++{ ++ int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK; ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); ++ ++ prefetchw(p); ++ ++ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { ++ __set_bit(rank, &p->irqpend_lomask[level]); ++ __set_bit(level, &p->irqpend_himask); ++ } else ++ __set_bit(rank, &p->irqheld_mask[level]); ++ ++ p->irqall[irq]++; ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int level, rank; ++ ++ if (unlikely(test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) ++ return; ++ ++ level = irq >> IPIPE_IRQ_ISHIFT; ++ rank = irq & IPIPE_IRQ_IMASK; ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ ++ if (__test_and_clear_bit(rank, &p->irqpend_lomask[level])) ++ __set_bit(rank, &p->irqheld_mask[level]); ++ if (p->irqpend_lomask[level] == 0) ++ __clear_bit(level, &p->irqpend_himask); ++} ++ ++/* Must be called hw IRQs off. */ ++void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p; ++ int cpu, level, rank; ++ ++ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) ++ return; ++ ++ level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK; ++ for_each_online_cpu(cpu) { ++ p = ipipe_percpudom_ptr(ipd, cpu); ++ if (test_and_clear_bit(rank, &p->irqheld_mask[level])) { ++ /* We need atomic ops here: */ ++ set_bit(rank, &p->irqpend_lomask[level]); ++ set_bit(level, &p->irqpend_himask); ++ } ++ } ++} ++ ++/* ++ * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must ++ * be called with local hw interrupts disabled. ++ */ ++void __ipipe_walk_pipeline(struct list_head *pos) ++{ ++ struct ipipe_domain *this_domain = __ipipe_current_domain, *next_domain; ++ struct ipipe_percpu_domain_data *p, *np; ++ ++ p = ipipe_cpudom_ptr(this_domain); ++ ++ while (pos != &__ipipe_pipeline) { ++ ++ next_domain = list_entry(pos, struct ipipe_domain, p_link); ++ np = ipipe_cpudom_ptr(next_domain); ++ ++ if (test_bit(IPIPE_STALL_FLAG, &np->status)) ++ break; /* Stalled stage -- do not go further. */ ++ ++ if (np->irqpend_himask) { ++ if (next_domain == this_domain) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ else { ++ ++ p->evsync = 0; ++ __ipipe_current_domain = next_domain; ++ ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ ++ ++ if (__ipipe_current_domain == next_domain) ++ __ipipe_current_domain = this_domain; ++ /* ++ * Otherwise, something changed the current domain under our ++ * feet recycling the register set; do not override the new ++ * domain. ++ */ ++ ++ if (p->irqpend_himask && ++ !test_bit(IPIPE_STALL_FLAG, &p->status)) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ } ++ break; ++ } else if (next_domain == this_domain) ++ break; ++ ++ pos = next_domain->p_link.next; ++ } ++} ++ ++/* ++ * ipipe_suspend_domain() -- Suspend the current domain, switching to ++ * the next one which has pending work down the pipeline. ++ */ ++void ipipe_suspend_domain(void) ++{ ++ struct ipipe_domain *this_domain, *next_domain; ++ struct ipipe_percpu_domain_data *p; ++ struct list_head *ln; ++ unsigned long flags; ++ ++ local_irq_save_hw(flags); ++ ++ this_domain = next_domain = __ipipe_current_domain; ++ p = ipipe_cpudom_ptr(this_domain); ++ p->status &= ~(IPIPE_STALL_MASK|IPIPE_SYNC_MASK); ++ ++ if (p->irqpend_himask != 0) ++ goto sync_stage; ++ ++ for (;;) { ++ ln = next_domain->p_link.next; ++ ++ if (ln == &__ipipe_pipeline) ++ break; ++ ++ next_domain = list_entry(ln, struct ipipe_domain, p_link); ++ p = ipipe_cpudom_ptr(next_domain); ++ ++ if (p->status & IPIPE_STALL_MASK) ++ break; ++ ++ if (p->irqpend_himask == 0) ++ continue; ++ ++ __ipipe_current_domain = next_domain; ++sync_stage: ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ ++ if (__ipipe_current_domain != next_domain) ++ /* ++ * Something has changed the current domain under our ++ * feet, recycling the register set; take note. ++ */ ++ this_domain = __ipipe_current_domain; ++ } ++ ++ __ipipe_current_domain = this_domain; ++ ++ local_irq_restore_hw(flags); ++} ++ ++ ++/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. ++ * Virtual interrupts are handled in exactly the same way than their ++ * hw-generated counterparts wrt pipelining. ++ */ ++unsigned ipipe_alloc_virq(void) ++{ ++ unsigned long flags, irq = 0; ++ int ipos; ++ ++ spin_lock_irqsave(&__ipipe_pipelock, flags); ++ ++ if (__ipipe_virtual_irq_map != ~0) { ++ ipos = ffz(__ipipe_virtual_irq_map); ++ set_bit(ipos, &__ipipe_virtual_irq_map); ++ irq = ipos + IPIPE_VIRQ_BASE; ++ } ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ ++ return irq; ++} ++ ++/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw ++ acknowledge routine) to an interrupt for a given domain. */ ++ ++int ipipe_virtualize_irq(struct ipipe_domain *ipd, ++ unsigned irq, ++ ipipe_irq_handler_t handler, ++ void *cookie, ++ ipipe_irq_ackfn_t acknowledge, ++ unsigned modemask) ++{ ++ ipipe_irq_handler_t old_handler; ++ unsigned long flags; ++ int err; ++ ++ if (irq >= IPIPE_NR_IRQS) ++ return -EINVAL; ++ ++ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) ++ return -EPERM; ++ ++ if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) ++ /* Silently unwire interrupts for non-heading domains. */ ++ modemask &= ~IPIPE_WIRED_MASK; ++ ++ spin_lock_irqsave(&__ipipe_pipelock, flags); ++ ++ old_handler = ipd->irqs[irq].handler; ++ ++ if (handler != NULL) { ++ if (handler == IPIPE_SAME_HANDLER) { ++ handler = old_handler; ++ cookie = ipd->irqs[irq].cookie; ++ ++ if (handler == NULL) { ++ err = -EINVAL; ++ goto unlock_and_exit; ++ } ++ } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && ++ old_handler != NULL) { ++ err = -EBUSY; ++ goto unlock_and_exit; ++ } ++ ++ /* Wired interrupts can only be delivered to domains ++ * always heading the pipeline, and using dynamic ++ * propagation. */ ++ ++ if ((modemask & IPIPE_WIRED_MASK) != 0) { ++ if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) { ++ err = -EINVAL; ++ goto unlock_and_exit; ++ } ++ modemask |= (IPIPE_HANDLE_MASK); ++ } ++ ++ if ((modemask & IPIPE_STICKY_MASK) != 0) ++ modemask |= IPIPE_HANDLE_MASK; ++ } else ++ modemask &= ++ ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | ++ IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); ++ ++ if (acknowledge == NULL && !ipipe_virtual_irq_p(irq)) ++ /* ++ * Acknowledge handler unspecified for a hw interrupt: ++ * use the Linux-defined handler instead. ++ */ ++ acknowledge = ipipe_root_domain->irqs[irq].acknowledge; ++ ++ ipd->irqs[irq].handler = handler; ++ ipd->irqs[irq].cookie = cookie; ++ ipd->irqs[irq].acknowledge = acknowledge; ++ ipd->irqs[irq].control = modemask; ++ ++ if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) { ++ if (handler != NULL) { ++ __ipipe_enable_irqdesc(ipd, irq); ++ ++ if ((modemask & IPIPE_ENABLE_MASK) != 0) { ++ if (ipd != __ipipe_current_domain) { ++ /* ++ * IRQ enable/disable state is domain-sensitive, so we ++ * may not change it for another domain. What is ++ * allowed however is forcing some domain to handle an ++ * interrupt source, by passing the proper 'ipd' ++ * descriptor which thus may be different from ++ * __ipipe_current_domain. ++ */ ++ err = -EPERM; ++ goto unlock_and_exit; ++ } ++ __ipipe_enable_irq(irq); ++ } ++ } else if (old_handler != NULL) ++ __ipipe_disable_irqdesc(ipd, irq); ++ } ++ ++ err = 0; ++ ++ unlock_and_exit: ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ ++ return err; ++} ++ ++/* ipipe_control_irq() -- Change modes of a pipelined interrupt for ++ * the current domain. */ ++ ++int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask) ++{ ++ struct ipipe_domain *ipd; ++ unsigned long flags; ++ ++ if (irq >= IPIPE_NR_IRQS) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&__ipipe_pipelock, flags); ++ ++ ipd = __ipipe_current_domain; ++ ++ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) { ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ return -EPERM; ++ } ++ ++ if (ipd->irqs[irq].handler == NULL) ++ setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); ++ ++ if ((setmask & IPIPE_STICKY_MASK) != 0) ++ setmask |= IPIPE_HANDLE_MASK; ++ ++ if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */ ++ clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); ++ ++ ipd->irqs[irq].control &= ~clrmask; ++ ipd->irqs[irq].control |= setmask; ++ ++ if ((setmask & IPIPE_ENABLE_MASK) != 0) ++ __ipipe_enable_irq(irq); ++ else if ((clrmask & IPIPE_ENABLE_MASK) != 0) ++ __ipipe_disable_irq(irq); ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock, flags); ++ ++ return 0; ++} ++ ++/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ ++ ++int __ipipe_dispatch_event (unsigned event, void *data) ++{ ++ struct ipipe_domain *start_domain, *this_domain, *next_domain; ++ ipipe_event_handler_t evhand; ++ struct list_head *pos, *npos; ++ unsigned long flags; ++ int propagate = 1; ++ ++ local_irq_save_hw(flags); ++ ++ start_domain = this_domain = __ipipe_current_domain; ++ ++ list_for_each_safe(pos, npos, &__ipipe_pipeline) { ++ /* ++ * Note: Domain migration may occur while running ++ * event or interrupt handlers, in which case the ++ * current register set is going to be recycled for a ++ * different domain than the initiating one. We do ++ * care for that, always tracking the current domain ++ * descriptor upon return from those handlers. ++ */ ++ next_domain = list_entry(pos, struct ipipe_domain, p_link); ++ ++ /* ++ * Keep a cached copy of the handler's address since ++ * ipipe_catch_event() may clear it under our feet. ++ */ ++ evhand = next_domain->evhand[event]; ++ ++ if (evhand != NULL) { ++ __ipipe_current_domain = next_domain; ++ ipipe_cpudom_var(next_domain, evsync) |= (1LL << event); ++ local_irq_restore_hw(flags); ++ propagate = !evhand(event, start_domain, data); ++ local_irq_save_hw(flags); ++ /* ++ * We may have a migration issue here, if the ++ * current task is migrated to another CPU on ++ * behalf of the invoked handler, usually when ++ * a syscall event is processed. However, ++ * ipipe_catch_event() will make sure that a ++ * CPU that clears a handler for any given ++ * event will not attempt to wait for itself ++ * to clear the evsync bit for that event, ++ * which practically plugs the hole, without ++ * resorting to a much more complex strategy. ++ */ ++ ipipe_cpudom_var(next_domain, evsync) &= ~(1LL << event); ++ if (__ipipe_current_domain != next_domain) ++ this_domain = __ipipe_current_domain; ++ } ++ ++ if (next_domain != ipipe_root_domain && /* NEVER sync the root stage here. */ ++ ipipe_cpudom_var(next_domain, irqpend_himask) != 0 && ++ !test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status))) { ++ __ipipe_current_domain = next_domain; ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ if (__ipipe_current_domain != next_domain) ++ this_domain = __ipipe_current_domain; ++ } ++ ++ __ipipe_current_domain = this_domain; ++ ++ if (next_domain == this_domain || !propagate) ++ break; ++ } ++ ++ local_irq_restore_hw(flags); ++ ++ return !propagate; ++} ++ ++/* ++ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired ++ * interrupts are immediately and unconditionally delivered to the ++ * domain heading the pipeline upon receipt, and such domain must have ++ * been registered as an invariant head for the system (priority == ++ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is ++ * to get an extra-fast dispatching path for those IRQs, by relying on ++ * a straightforward logic based on assumptions that must always be ++ * true for invariant head domains. The following assumptions are ++ * made when dealing with such interrupts: ++ * ++ * 1- Wired interrupts are purely dynamic, i.e. the decision to ++ * propagate them down the pipeline must be done from the head domain ++ * ISR. ++ * 2- Wired interrupts cannot be shared or sticky. ++ * 3- The root domain cannot be an invariant pipeline head, in ++ * consequence of what the root domain cannot handle wired ++ * interrupts. ++ * 4- Wired interrupts must have a valid acknowledge handler for the ++ * head domain (if needed, see __ipipe_handle_irq). ++ * ++ * Called with hw interrupts off. ++ */ ++ ++void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq) ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); ++ ++ prefetchw(p); ++ ++ if (unlikely(test_bit(IPIPE_LOCK_FLAG, &head->irqs[irq].control))) { ++ /* ++ * If we can't process this IRQ right now, we must ++ * mark it as held, so that it will get played during ++ * normal log sync when the corresponding interrupt ++ * source is eventually unlocked. ++ */ ++ p->irqall[irq]++; ++ __set_bit(irq & IPIPE_IRQ_IMASK, &p->irqheld_mask[irq >> IPIPE_IRQ_ISHIFT]); ++ return; ++ } ++ ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) { ++ __ipipe_set_irq_pending(head, irq); ++ return; ++ } ++ ++ __ipipe_dispatch_wired_nocheck(head, irq); ++} ++ ++void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq) /* hw interrupts off */ ++{ ++ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); ++ struct ipipe_domain *old; ++ ++ prefetchw(p); ++ ++ old = __ipipe_current_domain; ++ __ipipe_current_domain = head; /* Switch to the head domain. */ ++ ++ p->irqall[irq]++; ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */ ++ __ipipe_run_irqtail(); ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ ++ if (__ipipe_current_domain == head) { ++ __ipipe_current_domain = old; ++ if (old == head) { ++ if (p->irqpend_himask) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ return; ++ } ++ } ++ ++ __ipipe_walk_pipeline(&head->p_link); ++} ++ ++/* ++ * __ipipe_sync_stage() -- Flush the pending IRQs for the current ++ * domain (and processor). This routine flushes the interrupt log ++ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for ++ * more on the deferred interrupt scheme). Every interrupt that ++ * occurred while the pipeline was stalled gets played. WARNING: ++ * callers on SMP boxen should always check for CPU migration on ++ * return of this routine. One can control the kind of interrupts ++ * which are going to be sync'ed using the syncmask ++ * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT ++ * plays virtual interrupts only. ++ * ++ * This routine must be called with hw interrupts off. ++ */ ++void __ipipe_sync_stage(unsigned long syncmask) ++{ ++ struct ipipe_percpu_domain_data *p; ++ unsigned long mask, submask; ++ struct ipipe_domain *ipd; ++ int level, rank, cpu; ++ unsigned irq; ++ ++ ipd = __ipipe_current_domain; ++ p = ipipe_cpudom_ptr(ipd); ++ ++ if (__test_and_set_bit(IPIPE_SYNC_FLAG, &p->status)) { ++ /* ++ * Some questionable code in the root domain may enter ++ * busy waits for IRQs over interrupt context, so we ++ * unfortunately have to allow piling up IRQs for ++ * them. Non-root domains are not allowed to do this. ++ */ ++ if (ipd != ipipe_root_domain) ++ return; ++ } ++ ++ cpu = ipipe_processor_id(); ++ ++ /* ++ * The policy here is to keep the dispatching code interrupt-free ++ * by stalling the current stage. If the upper domain handler ++ * (which we call) wants to re-enable interrupts while in a safe ++ * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's ++ * sigaction()), it will have to unstall (then stall again before ++ * returning to us!) the stage when it sees fit. ++ */ ++ while ((mask = (p->irqpend_himask & syncmask)) != 0) { ++ level = __ipipe_ffnz(mask); ++ ++ while ((submask = p->irqpend_lomask[level]) != 0) { ++ rank = __ipipe_ffnz(submask); ++ irq = (level << IPIPE_IRQ_ISHIFT) + rank; ++ ++ __clear_bit(rank, &p->irqpend_lomask[level]); ++ ++ if (p->irqpend_lomask[level] == 0) ++ __clear_bit(level, &p->irqpend_himask); ++ /* ++ * Make sure the compiler will not postpone ++ * the pending bitmask updates before calling ++ * the interrupt handling routine. Otherwise, ++ * those late updates could overwrite any ++ * change to irqpend_hi/lomask due to a nested ++ * interrupt, leaving the latter unprocessed ++ * (seen on mpc836x). ++ */ ++ barrier(); ++ ++ if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) ++ continue; ++ ++ __set_bit(IPIPE_STALL_FLAG, &p->status); ++ smp_wmb(); ++ ++ if (ipd == ipipe_root_domain) ++ trace_hardirqs_off(); ++ ++ __ipipe_run_isr(ipd, irq); ++ barrier(); ++ p = ipipe_cpudom_ptr(__ipipe_current_domain); ++#ifdef CONFIG_SMP ++ { ++ int newcpu = ipipe_processor_id(); ++ ++ if (newcpu != cpu) { /* Handle CPU migration. */ ++ /* ++ * We expect any domain to clear the SYNC bit each ++ * time it switches in a new task, so that preemptions ++ * and/or CPU migrations (in the SMP case) over the ++ * ISR do not lock out the log syncer for some ++ * indefinite amount of time. In the Linux case, ++ * schedule() handles this (see kernel/sched.c). For ++ * this reason, we don't bother clearing it here for ++ * the source CPU in the migration handling case, ++ * since it must have scheduled another task in by ++ * now. ++ */ ++ __set_bit(IPIPE_SYNC_FLAG, &p->status); ++ cpu = newcpu; ++ } ++ } ++#endif /* CONFIG_SMP */ ++#ifdef CONFIG_TRACE_IRQFLAGS ++ if (__ipipe_root_domain_p && ++ test_bit(IPIPE_STALL_FLAG, &p->status)) ++ trace_hardirqs_on(); ++#endif ++ __clear_bit(IPIPE_STALL_FLAG, &p->status); ++ } ++ } ++ ++ __clear_bit(IPIPE_SYNC_FLAG, &p->status); ++} ++ ++/* ipipe_register_domain() -- Link a new domain to the pipeline. */ ++ ++int ipipe_register_domain(struct ipipe_domain *ipd, ++ struct ipipe_domain_attr *attr) ++{ ++ struct ipipe_domain *_ipd; ++ struct list_head *pos = NULL; ++ unsigned long flags; ++ ++ if (!ipipe_root_domain_p) { ++ printk(KERN_WARNING ++ "I-pipe: Only the root domain may register a new domain.\n"); ++ return -EPERM; ++ } ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ if (attr->priority == IPIPE_HEAD_PRIORITY) { ++ if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) { ++ ipipe_critical_exit(flags); ++ return -EAGAIN; /* Cannot override current head. */ ++ } ++ ipd->slot = IPIPE_HEAD_SLOT; ++ } else ++ ipd->slot = ffz(__ipipe_domain_slot_map); ++ ++ if (ipd->slot < CONFIG_IPIPE_DOMAINS) { ++ set_bit(ipd->slot, &__ipipe_domain_slot_map); ++ list_for_each(pos, &__ipipe_pipeline) { ++ _ipd = list_entry(pos, struct ipipe_domain, p_link); ++ if (_ipd->domid == attr->domid) ++ break; ++ } ++ } ++ ++ ipipe_critical_exit(flags); ++ ++ if (pos != &__ipipe_pipeline) { ++ if (ipd->slot < CONFIG_IPIPE_DOMAINS) ++ clear_bit(ipd->slot, &__ipipe_domain_slot_map); ++ return -EBUSY; ++ } ++ ++#ifndef CONFIG_SMP ++ /* ++ * Set up the perdomain pointers for direct access to the ++ * percpu domain data. This saves a costly multiply each time ++ * we need to refer to the contents of the percpu domain data ++ * array. ++ */ ++ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot]; ++#endif ++ ++ ipd->name = attr->name; ++ ipd->domid = attr->domid; ++ ipd->pdd = attr->pdd; ++ ipd->flags = 0; ++ ++ if (attr->priority == IPIPE_HEAD_PRIORITY) { ++ ipd->priority = INT_MAX; ++ __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); ++ } ++ else ++ ipd->priority = attr->priority; ++ ++ __ipipe_init_stage(ipd); ++ ++ INIT_LIST_HEAD(&ipd->p_link); ++ ++#ifdef CONFIG_PROC_FS ++ __ipipe_add_domain_proc(ipd); ++#endif /* CONFIG_PROC_FS */ ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ list_for_each(pos, &__ipipe_pipeline) { ++ _ipd = list_entry(pos, struct ipipe_domain, p_link); ++ if (ipd->priority > _ipd->priority) ++ break; ++ } ++ ++ list_add_tail(&ipd->p_link, pos); ++ ++ ipipe_critical_exit(flags); ++ ++ printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name); ++ ++ /* ++ * Finally, allow the new domain to perform its initialization ++ * chores. ++ */ ++ ++ if (attr->entry != NULL) { ++ local_irq_save_hw_smp(flags); ++ __ipipe_current_domain = ipd; ++ local_irq_restore_hw_smp(flags); ++ attr->entry(); ++ local_irq_save_hw(flags); ++ __ipipe_current_domain = ipipe_root_domain; ++ ++ if (ipipe_root_cpudom_var(irqpend_himask) != 0 && ++ !test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) ++ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); ++ ++ local_irq_restore_hw(flags); ++ } ++ ++ return 0; ++} ++ ++/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ ++ ++int ipipe_unregister_domain(struct ipipe_domain *ipd) ++{ ++ unsigned long flags; ++ ++ if (!ipipe_root_domain_p) { ++ printk(KERN_WARNING ++ "I-pipe: Only the root domain may unregister a domain.\n"); ++ return -EPERM; ++ } ++ ++ if (ipd == ipipe_root_domain) { ++ printk(KERN_WARNING ++ "I-pipe: Cannot unregister the root domain.\n"); ++ return -EPERM; ++ } ++#ifdef CONFIG_SMP ++ { ++ unsigned irq; ++ int cpu; ++ ++ /* ++ * In the SMP case, wait for the logged events to drain on ++ * other processors before eventually removing the domain ++ * from the pipeline. ++ */ ++ ++ ipipe_unstall_pipeline_from(ipd); ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { ++ clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); ++ clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); ++ set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); ++ } ++ ++ ipipe_critical_exit(flags); ++ ++ for_each_online_cpu(cpu) { ++ while (ipipe_percpudom(ipd, irqpend_himask, cpu) > 0) ++ cpu_relax(); ++ } ++ } ++#endif /* CONFIG_SMP */ ++ ++ mutex_lock(&ipd->mutex); ++ ++#ifdef CONFIG_PROC_FS ++ __ipipe_remove_domain_proc(ipd); ++#endif /* CONFIG_PROC_FS */ ++ ++ /* ++ * Simply remove the domain from the pipeline and we are almost done. ++ */ ++ ++ flags = ipipe_critical_enter(NULL); ++ list_del_init(&ipd->p_link); ++ ipipe_critical_exit(flags); ++ ++ __ipipe_cleanup_domain(ipd); ++ ++ mutex_unlock(&ipd->mutex); ++ ++ printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name); ++ ++ return 0; ++} ++ ++/* ++ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of ++ * a running interrupt handler to the next domain down the pipeline. ++ * ipipe_schedule_irq() -- Does almost the same as above, but attempts ++ * to pend the interrupt for the current domain first. ++ * Must be called hw IRQs off. ++ */ ++void __ipipe_pend_irq(unsigned irq, struct list_head *head) ++{ ++ struct ipipe_domain *ipd; ++ struct list_head *ln; ++ ++#ifdef CONFIG_IPIPE_DEBUG ++ BUG_ON(irq >= IPIPE_NR_IRQS || ++ (ipipe_virtual_irq_p(irq) ++ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))); ++#endif ++ for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) { ++ ipd = list_entry(ln, struct ipipe_domain, p_link); ++ if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { ++ __ipipe_set_irq_pending(ipd, irq); ++ return; ++ } ++ } ++} ++ ++/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ ++ ++int ipipe_free_virq(unsigned virq) ++{ ++ if (!ipipe_virtual_irq_p(virq)) ++ return -EINVAL; ++ ++ clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); ++ ++ return 0; ++} ++ ++void ipipe_init_attr(struct ipipe_domain_attr *attr) ++{ ++ attr->name = "anon"; ++ attr->domid = 1; ++ attr->entry = NULL; ++ attr->priority = IPIPE_ROOT_PRIO; ++ attr->pdd = NULL; ++} ++ ++/* ++ * ipipe_catch_event() -- Interpose or remove an event handler for a ++ * given domain. ++ */ ++ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, ++ unsigned event, ++ ipipe_event_handler_t handler) ++{ ++ ipipe_event_handler_t old_handler; ++ unsigned long flags; ++ int self = 0, cpu; ++ ++ if (event & IPIPE_EVENT_SELF) { ++ event &= ~IPIPE_EVENT_SELF; ++ self = 1; ++ } ++ ++ if (event >= IPIPE_NR_EVENTS) ++ return NULL; ++ ++ flags = ipipe_critical_enter(NULL); ++ ++ if (!(old_handler = xchg(&ipd->evhand[event],handler))) { ++ if (handler) { ++ if (self) ++ ipd->evself |= (1LL << event); ++ else ++ __ipipe_event_monitors[event]++; ++ } ++ } ++ else if (!handler) { ++ if (ipd->evself & (1LL << event)) ++ ipd->evself &= ~(1LL << event); ++ else ++ __ipipe_event_monitors[event]--; ++ } else if ((ipd->evself & (1LL << event)) && !self) { ++ __ipipe_event_monitors[event]++; ++ ipd->evself &= ~(1LL << event); ++ } else if (!(ipd->evself & (1LL << event)) && self) { ++ __ipipe_event_monitors[event]--; ++ ipd->evself |= (1LL << event); ++ } ++ ++ ipipe_critical_exit(flags); ++ ++ if (!handler && ipipe_root_domain_p) { ++ /* ++ * If we cleared a handler on behalf of the root ++ * domain, we have to wait for any current invocation ++ * to drain, since our caller might subsequently unmap ++ * the target domain. To this aim, this code ++ * synchronizes with __ipipe_dispatch_event(), ++ * guaranteeing that either the dispatcher sees a null ++ * handler in which case it discards the invocation ++ * (which also prevents from entering a livelock), or ++ * finds a valid handler and calls it. Symmetrically, ++ * ipipe_catch_event() ensures that the called code ++ * won't be unmapped under our feet until the event ++ * synchronization flag is cleared for the given event ++ * on all CPUs. ++ */ ++ preempt_disable(); ++ cpu = smp_processor_id(); ++ /* ++ * Hack: this solves the potential migration issue ++ * raised in __ipipe_dispatch_event(). This is a ++ * work-around which makes the assumption that other ++ * CPUs will subsequently, either process at least one ++ * interrupt for the target domain, or call ++ * __ipipe_dispatch_event() without going through a ++ * migration while running the handler at least once; ++ * practically, this is safe on any normally running ++ * system. ++ */ ++ ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event); ++ preempt_enable(); ++ ++ for_each_online_cpu(cpu) { ++ while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event)) ++ schedule_timeout_interruptible(HZ / 50); ++ } ++ } ++ ++ return old_handler; ++} ++ ++cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) ++{ ++#ifdef CONFIG_SMP ++ if (irq >= IPIPE_NR_XIRQS) ++ /* Allow changing affinity of external IRQs only. */ ++ return CPU_MASK_NONE; ++ ++ if (num_online_cpus() > 1) ++ return __ipipe_set_irq_affinity(irq,cpumask); ++#endif /* CONFIG_SMP */ ++ ++ return CPU_MASK_NONE; ++} ++ ++int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) ++ ++{ ++#ifdef CONFIG_SMP ++ return __ipipe_send_ipi(ipi,cpumask); ++#else /* !CONFIG_SMP */ ++ return -EINVAL; ++#endif /* CONFIG_SMP */ ++} ++ ++int ipipe_alloc_ptdkey (void) ++{ ++ unsigned long flags; ++ int key = -1; ++ ++ spin_lock_irqsave(&__ipipe_pipelock,flags); ++ ++ if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { ++ key = ffz(__ipipe_ptd_key_map); ++ set_bit(key,&__ipipe_ptd_key_map); ++ __ipipe_ptd_key_count++; ++ } ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock,flags); ++ ++ return key; ++} ++ ++int ipipe_free_ptdkey (int key) ++{ ++ unsigned long flags; ++ ++ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&__ipipe_pipelock,flags); ++ ++ if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) ++ __ipipe_ptd_key_count--; ++ ++ spin_unlock_irqrestore(&__ipipe_pipelock,flags); ++ ++ return 0; ++} ++ ++int ipipe_set_ptd (int key, void *value) ++ ++{ ++ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) ++ return -EINVAL; ++ ++ current->ptd[key] = value; ++ ++ return 0; ++} ++ ++void *ipipe_get_ptd (int key) ++ ++{ ++ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) ++ return NULL; ++ ++ return current->ptd[key]; ++} ++ ++#ifdef CONFIG_PROC_FS ++ ++struct proc_dir_entry *ipipe_proc_root; ++ ++static int __ipipe_version_info_proc(char *page, ++ char **start, ++ off_t off, int count, int *eof, void *data) ++{ ++ int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING); ++ ++ len -= off; ++ ++ if (len <= off + count) ++ *eof = 1; ++ ++ *start = page + off; ++ ++ if(len > count) ++ len = count; ++ ++ if(len < 0) ++ len = 0; ++ ++ return len; ++} ++ ++static int __ipipe_common_info_show(struct seq_file *p, void *data) ++{ ++ struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; ++ char handling, stickiness, lockbit, exclusive, virtuality; ++ ++ unsigned long ctlbits; ++ unsigned irq; ++ ++ seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n"); ++ seq_printf(p, " |+---- Sticky\n"); ++ seq_printf(p, " ||+--- Locked\n"); ++ seq_printf(p, " |||+-- Exclusive\n"); ++ seq_printf(p, " ||||+- Virtual\n"); ++ seq_printf(p, "[IRQ] |||||\n"); ++ ++ mutex_lock(&ipd->mutex); ++ ++ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { ++ /* Remember to protect against ++ * ipipe_virtual_irq/ipipe_control_irq if more fields ++ * get involved. */ ++ ctlbits = ipd->irqs[irq].control; ++ ++ if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) ++ /* ++ * There might be a hole between the last external ++ * IRQ and the first virtual one; skip it. ++ */ ++ continue; ++ ++ if (ipipe_virtual_irq_p(irq) ++ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) ++ /* Non-allocated virtual IRQ; skip it. */ ++ continue; ++ ++ /* ++ * Statuses are as follows: ++ * o "accepted" means handled _and_ passed down the pipeline. ++ * o "grabbed" means handled, but the interrupt might be ++ * terminated _or_ passed down the pipeline depending on ++ * what the domain handler asks for to the I-pipe. ++ * o "wired" is basically the same as "grabbed", except that ++ * the interrupt is unconditionally delivered to an invariant ++ * pipeline head domain. ++ * o "passed" means unhandled by the domain but passed ++ * down the pipeline. ++ * o "discarded" means unhandled and _not_ passed down the ++ * pipeline. The interrupt merely disappears from the ++ * current domain down to the end of the pipeline. ++ */ ++ if (ctlbits & IPIPE_HANDLE_MASK) { ++ if (ctlbits & IPIPE_PASS_MASK) ++ handling = 'A'; ++ else if (ctlbits & IPIPE_WIRED_MASK) ++ handling = 'W'; ++ else ++ handling = 'G'; ++ } else if (ctlbits & IPIPE_PASS_MASK) ++ /* Do not output if no major action is taken. */ ++ continue; ++ else ++ handling = 'D'; ++ ++ if (ctlbits & IPIPE_STICKY_MASK) ++ stickiness = 'S'; ++ else ++ stickiness = '.'; ++ ++ if (ctlbits & IPIPE_LOCK_MASK) ++ lockbit = 'L'; ++ else ++ lockbit = '.'; ++ ++ if (ctlbits & IPIPE_EXCLUSIVE_MASK) ++ exclusive = 'X'; ++ else ++ exclusive = '.'; ++ ++ if (ipipe_virtual_irq_p(irq)) ++ virtuality = 'V'; ++ else ++ virtuality = '.'; ++ ++ seq_printf(p, " %3u: %c%c%c%c%c\n", ++ irq, handling, stickiness, lockbit, exclusive, virtuality); ++ } ++ ++ seq_printf(p, "[Domain info]\n"); ++ ++ seq_printf(p, "id=0x%.8x\n", ipd->domid); ++ ++ if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) ++ seq_printf(p, "priority=topmost\n"); ++ else ++ seq_printf(p, "priority=%d\n", ipd->priority); ++ ++ mutex_unlock(&ipd->mutex); ++ ++ return 0; ++} ++ ++static int __ipipe_common_info_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data); ++} ++ ++static struct file_operations __ipipe_info_proc_ops = { ++ .owner = THIS_MODULE, ++ .open = __ipipe_common_info_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void __ipipe_add_domain_proc(struct ipipe_domain *ipd) ++{ ++ struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root); ++ if (e) { ++ e->proc_fops = &__ipipe_info_proc_ops; ++ e->data = (void*) ipd; ++ } ++} ++ ++void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) ++{ ++ remove_proc_entry(ipd->name,ipipe_proc_root); ++} ++ ++void __init ipipe_init_proc(void) ++{ ++ ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0); ++ create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL); ++ __ipipe_add_domain_proc(ipipe_root_domain); ++ ++ __ipipe_init_tracer(); ++} ++ ++#endif /* CONFIG_PROC_FS */ ++ ++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT ++ ++DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 }; ++DEFINE_PER_CPU(int, ipipe_saved_context_check_state); ++ ++void ipipe_check_context(struct ipipe_domain *border_domain) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *this_domain; ++ unsigned long flags; ++ int cpu; ++ ++ local_irq_save_hw_smp(flags); ++ ++ this_domain = __ipipe_current_domain; ++ p = ipipe_head_cpudom_ptr(); ++ if (likely(this_domain->priority <= border_domain->priority && ++ !test_bit(IPIPE_STALL_FLAG, &p->status))) { ++ local_irq_restore_hw_smp(flags); ++ return; ++ } ++ ++ cpu = ipipe_processor_id(); ++ if (!per_cpu(ipipe_percpu_context_check, cpu)) { ++ local_irq_restore_hw_smp(flags); ++ return; ++ } ++ ++ local_irq_restore_hw_smp(flags); ++ ++ ipipe_context_check_off(); ++ ipipe_trace_panic_freeze(); ++ ipipe_set_printk_sync(__ipipe_current_domain); ++ ++ if (this_domain->priority > border_domain->priority) ++ printk(KERN_ERR "I-pipe: Detected illicit call from domain " ++ "'%s'\n" ++ KERN_ERR " into a service reserved for domain " ++ "'%s' and below.\n", ++ this_domain->name, border_domain->name); ++ else ++ printk(KERN_ERR "I-pipe: Detected stalled topmost domain, " ++ "probably caused by a bug.\n" ++ " A critical section may have been " ++ "left unterminated.\n"); ++ dump_stack(); ++ ipipe_trace_panic_dump(); ++} ++ ++EXPORT_SYMBOL(ipipe_check_context); ++ ++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ ++ ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++ ++int notrace __ipipe_check_percpu_access(void) ++{ ++ struct ipipe_percpu_domain_data *p; ++ struct ipipe_domain *this_domain; ++ unsigned long flags; ++ int ret = 0; ++ ++ local_irq_save_hw_notrace(flags); ++ ++ this_domain = __raw_get_cpu_var(ipipe_percpu_domain); ++ ++ /* ++ * Only the root domain may implement preemptive CPU migration ++ * of tasks, so anything above in the pipeline should be fine. ++ */ ++ if (this_domain->priority > IPIPE_ROOT_PRIO) ++ goto out; ++ ++ if (raw_irqs_disabled_flags(flags)) ++ goto out; ++ ++ /* ++ * Last chance: hw interrupts were enabled on entry while ++ * running over the root domain, but the root stage might be ++ * currently stalled, in which case preemption would be ++ * disabled, and no migration could occur. ++ */ ++ if (this_domain == ipipe_root_domain) { ++ p = ipipe_root_cpudom_ptr(); ++ if (test_bit(IPIPE_STALL_FLAG, &p->status)) ++ goto out; ++ } ++ /* ++ * Our caller may end up accessing the wrong per-cpu variable ++ * instance due to CPU migration; tell it to complain about ++ * this. ++ */ ++ ret = 1; ++out: ++ local_irq_restore_hw_notrace(flags); ++ ++ return ret; ++} ++ ++#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ ++ ++EXPORT_SYMBOL(ipipe_virtualize_irq); ++EXPORT_SYMBOL(ipipe_control_irq); ++EXPORT_SYMBOL(ipipe_suspend_domain); ++EXPORT_SYMBOL(ipipe_alloc_virq); ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); ++EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray); ++EXPORT_SYMBOL(ipipe_root); ++EXPORT_SYMBOL(ipipe_stall_pipeline_from); ++EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); ++EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); ++EXPORT_SYMBOL(ipipe_restore_pipeline_from); ++EXPORT_SYMBOL(ipipe_unstall_pipeline_head); ++EXPORT_SYMBOL(__ipipe_restore_pipeline_head); ++EXPORT_SYMBOL(__ipipe_unstall_root); ++EXPORT_SYMBOL(__ipipe_restore_root); ++EXPORT_SYMBOL(__ipipe_spin_lock_irq); ++EXPORT_SYMBOL(__ipipe_spin_unlock_irq); ++EXPORT_SYMBOL(__ipipe_spin_lock_irqsave); ++EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore); ++EXPORT_SYMBOL(__ipipe_pipeline); ++EXPORT_SYMBOL(__ipipe_lock_irq); ++EXPORT_SYMBOL(__ipipe_unlock_irq); ++EXPORT_SYMBOL(ipipe_register_domain); ++EXPORT_SYMBOL(ipipe_unregister_domain); ++EXPORT_SYMBOL(ipipe_free_virq); ++EXPORT_SYMBOL(ipipe_init_attr); ++EXPORT_SYMBOL(ipipe_catch_event); ++EXPORT_SYMBOL(ipipe_alloc_ptdkey); ++EXPORT_SYMBOL(ipipe_free_ptdkey); ++EXPORT_SYMBOL(ipipe_set_ptd); ++EXPORT_SYMBOL(ipipe_get_ptd); ++EXPORT_SYMBOL(ipipe_set_irq_affinity); ++EXPORT_SYMBOL(ipipe_send_ipi); ++EXPORT_SYMBOL(__ipipe_pend_irq); ++EXPORT_SYMBOL(__ipipe_set_irq_pending); ++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) ++EXPORT_SYMBOL(__ipipe_check_percpu_access); ++#endif ++#ifdef CONFIG_GENERIC_CLOCKEVENTS ++EXPORT_SYMBOL(ipipe_request_tickdev); ++EXPORT_SYMBOL(ipipe_release_tickdev); ++#endif ++ ++EXPORT_SYMBOL(ipipe_critical_enter); ++EXPORT_SYMBOL(ipipe_critical_exit); ++EXPORT_SYMBOL(ipipe_trigger_irq); ++EXPORT_SYMBOL(ipipe_get_sysinfo); +diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c +new file mode 100644 +index 0000000..d3c1866 +--- /dev/null ++++ b/kernel/ipipe/tracer.c +@@ -0,0 +1,1441 @@ ++/* -*- linux-c -*- ++ * kernel/ipipe/tracer.c ++ * ++ * Copyright (C) 2005 Luotao Fu. ++ * 2005-2008 Jan Kiszka. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, ++ * USA; either version 2 of the License, or (at your option) any later ++ * version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ ++#define IPIPE_DEFAULT_ACTIVE 0 ++#define IPIPE_DEFAULT_MAX 1 ++#define IPIPE_DEFAULT_FROZEN 2 ++ ++#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) ++#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) ++ ++#define IPIPE_DEFAULT_PRE_TRACE 10 ++#define IPIPE_DEFAULT_POST_TRACE 10 ++#define IPIPE_DEFAULT_BACK_TRACE 100 ++ ++#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ ++#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ ++ ++#define IPIPE_TFLG_NMI_LOCK 0x0001 ++#define IPIPE_TFLG_NMI_HIT 0x0002 ++#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 ++ ++#define IPIPE_TFLG_HWIRQ_OFF 0x0100 ++#define IPIPE_TFLG_FREEZING 0x0200 ++#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ ++#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 ++#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ ++#define IPIPE_TFLG_DOMSTATE_BITS 3 ++ ++#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ ++ (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) ++#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ ++ ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) ++ ++struct ipipe_trace_point { ++ short type; ++ short flags; ++ unsigned long eip; ++ unsigned long parent_eip; ++ unsigned long v; ++ unsigned long long timestamp; ++}; ++ ++struct ipipe_trace_path { ++ volatile int flags; ++ int dump_lock; /* separated from flags due to cross-cpu access */ ++ int trace_pos; /* next point to fill */ ++ int begin, end; /* finalised path begin and end */ ++ int post_trace; /* non-zero when in post-trace phase */ ++ unsigned long long length; /* max path length in cycles */ ++ unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ ++ unsigned long nmi_saved_parent_eip; ++ unsigned long nmi_saved_v; ++ struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; ++} ____cacheline_aligned_in_smp; ++ ++enum ipipe_trace_type ++{ ++ IPIPE_TRACE_FUNC = 0, ++ IPIPE_TRACE_BEGIN, ++ IPIPE_TRACE_END, ++ IPIPE_TRACE_FREEZE, ++ IPIPE_TRACE_SPECIAL, ++ IPIPE_TRACE_PID, ++ IPIPE_TRACE_EVENT, ++}; ++ ++#define IPIPE_TYPE_MASK 0x0007 ++#define IPIPE_TYPE_BITS 3 ++ ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); ++#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ ++static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = ++ { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++int ipipe_trace_enable = 0; ++ ++static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; ++static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; ++static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; ++static IPIPE_DEFINE_SPINLOCK(global_path_lock); ++static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; ++static int post_trace = IPIPE_DEFAULT_POST_TRACE; ++static int back_trace = IPIPE_DEFAULT_BACK_TRACE; ++static int verbose_trace = 1; ++static unsigned long trace_overhead; ++ ++static unsigned long trigger_begin; ++static unsigned long trigger_end; ++ ++static DEFINE_MUTEX(out_mutex); ++static struct ipipe_trace_path *print_path; ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++static struct ipipe_trace_path *panic_path; ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++static int print_pre_trace; ++static int print_post_trace; ++ ++ ++static long __ipipe_signed_tsc2us(long long tsc); ++static void ++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); ++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); ++ ++ ++static notrace void ++__ipipe_store_domain_states(struct ipipe_trace_point *point) ++{ ++ struct ipipe_domain *ipd; ++ struct list_head *pos; ++ int i = 0; ++ ++ list_for_each_prev(pos, &__ipipe_pipeline) { ++ ipd = list_entry(pos, struct ipipe_domain, p_link); ++ ++ if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status))) ++ point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); ++ ++ if (ipd == __ipipe_current_domain) ++ point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT; ++ ++ if (++i > IPIPE_TFLG_DOMSTATE_BITS) ++ break; ++ } ++} ++ ++static notrace int __ipipe_get_free_trace_path(int old, int cpu) ++{ ++ int new_active = old; ++ struct ipipe_trace_path *tp; ++ ++ do { ++ if (++new_active == IPIPE_TRACE_PATHS) ++ new_active = 0; ++ tp = &per_cpu(trace_path, cpu)[new_active]; ++ } while (new_active == per_cpu(max_path, cpu) || ++ new_active == per_cpu(frozen_path, cpu) || ++ tp->dump_lock); ++ ++ return new_active; ++} ++ ++static notrace void ++__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, ++ struct ipipe_trace_path *old_tp, int old_pos) ++{ ++ int i; ++ ++ new_tp->trace_pos = pre_trace+1; ++ ++ for (i = new_tp->trace_pos; i > 0; i--) ++ memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], ++ &old_tp->point[WRAP_POINT_NO(old_pos-i)], ++ sizeof(struct ipipe_trace_point)); ++ ++ /* mark the end (i.e. the point before point[0]) invalid */ ++ new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; ++} ++ ++static notrace struct ipipe_trace_path * ++__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) ++{ ++ struct ipipe_trace_path *old_tp = tp; ++ long active = per_cpu(active_path, cpu); ++ unsigned long long length; ++ ++ /* do we have a new worst case? */ ++ length = tp->point[tp->end].timestamp - ++ tp->point[tp->begin].timestamp; ++ if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { ++ /* we need protection here against other cpus trying ++ to start a proc dump */ ++ spin_lock(&global_path_lock); ++ ++ /* active path holds new worst case */ ++ tp->length = length; ++ per_cpu(max_path, cpu) = active; ++ ++ /* find next unused trace path */ ++ active = __ipipe_get_free_trace_path(active, cpu); ++ ++ spin_unlock(&global_path_lock); ++ ++ tp = &per_cpu(trace_path, cpu)[active]; ++ ++ /* migrate last entries for pre-tracing */ ++ __ipipe_migrate_pre_trace(tp, old_tp, pos); ++ } ++ ++ return tp; ++} ++ ++static notrace struct ipipe_trace_path * ++__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) ++{ ++ struct ipipe_trace_path *old_tp = tp; ++ long active = per_cpu(active_path, cpu); ++ int n; ++ ++ /* frozen paths have no core (begin=end) */ ++ tp->begin = tp->end; ++ ++ /* we need protection here against other cpus trying ++ * to set their frozen path or to start a proc dump */ ++ spin_lock(&global_path_lock); ++ ++ per_cpu(frozen_path, cpu) = active; ++ ++ /* find next unused trace path */ ++ active = __ipipe_get_free_trace_path(active, cpu); ++ ++ /* check if this is the first frozen path */ ++ for_each_possible_cpu(n) { ++ if (n != cpu && ++ per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) ++ tp->end = -1; ++ } ++ ++ spin_unlock(&global_path_lock); ++ ++ tp = &per_cpu(trace_path, cpu)[active]; ++ ++ /* migrate last entries for pre-tracing */ ++ __ipipe_migrate_pre_trace(tp, old_tp, pos); ++ ++ return tp; ++} ++ ++void notrace ++__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, ++ unsigned long parent_eip, unsigned long v) ++{ ++ struct ipipe_trace_path *tp, *old_tp; ++ int pos, next_pos, begin; ++ struct ipipe_trace_point *point; ++ unsigned long flags; ++ int cpu; ++ ++ local_irq_save_hw_notrace(flags); ++ ++ cpu = ipipe_processor_id(); ++ restart: ++ tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ /* here starts a race window with NMIs - catched below */ ++ ++ /* check for NMI recursion */ ++ if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { ++ tp->flags |= IPIPE_TFLG_NMI_HIT; ++ ++ /* first freeze request from NMI context? */ ++ if ((type == IPIPE_TRACE_FREEZE) && ++ !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { ++ /* save arguments and mark deferred freezing */ ++ tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; ++ tp->nmi_saved_eip = eip; ++ tp->nmi_saved_parent_eip = parent_eip; ++ tp->nmi_saved_v = v; ++ } ++ return; /* no need for restoring flags inside IRQ */ ++ } ++ ++ /* clear NMI events and set lock (atomically per cpu) */ ++ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | ++ IPIPE_TFLG_NMI_FREEZE_REQ)) ++ | IPIPE_TFLG_NMI_LOCK; ++ ++ /* check active_path again - some nasty NMI may have switched ++ * it meanwhile */ ++ if (unlikely(tp != ++ &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { ++ /* release lock on wrong path and restart */ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* there is no chance that the NMI got deferred ++ * => no need to check for pending freeze requests */ ++ goto restart; ++ } ++ ++ /* get the point buffer */ ++ pos = tp->trace_pos; ++ point = &tp->point[pos]; ++ ++ /* store all trace point data */ ++ point->type = type; ++ point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; ++ point->eip = eip; ++ point->parent_eip = parent_eip; ++ point->v = v; ++ ipipe_read_tsc(point->timestamp); ++ ++ __ipipe_store_domain_states(point); ++ ++ /* forward to next point buffer */ ++ next_pos = WRAP_POINT_NO(pos+1); ++ tp->trace_pos = next_pos; ++ ++ /* only mark beginning if we haven't started yet */ ++ begin = tp->begin; ++ if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) ++ tp->begin = pos; ++ ++ /* end of critical path, start post-trace if not already started */ ++ if (unlikely(type == IPIPE_TRACE_END) && ++ (begin >= 0) && !tp->post_trace) ++ tp->post_trace = post_trace + 1; ++ ++ /* freeze only if the slot is free and we are not already freezing */ ++ if ((unlikely(type == IPIPE_TRACE_FREEZE) || ++ (unlikely(eip >= trigger_begin && eip <= trigger_end) && ++ type == IPIPE_TRACE_FUNC)) && ++ per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && ++ !(tp->flags & IPIPE_TFLG_FREEZING)) { ++ tp->post_trace = post_trace + 1; ++ tp->flags |= IPIPE_TFLG_FREEZING; ++ } ++ ++ /* enforce end of trace in case of overflow */ ++ if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { ++ tp->end = pos; ++ goto enforce_end; ++ } ++ ++ /* stop tracing this path if we are in post-trace and ++ * a) that phase is over now or ++ * b) a new TRACE_BEGIN came in but we are not freezing this path */ ++ if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || ++ ((type == IPIPE_TRACE_BEGIN) && ++ !(tp->flags & IPIPE_TFLG_FREEZING))))) { ++ /* store the path's end (i.e. excluding post-trace) */ ++ tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); ++ ++ enforce_end: ++ if (tp->flags & IPIPE_TFLG_FREEZING) ++ tp = __ipipe_trace_freeze(cpu, tp, pos); ++ else ++ tp = __ipipe_trace_end(cpu, tp, pos); ++ ++ /* reset the active path, maybe already start a new one */ ++ tp->begin = (type == IPIPE_TRACE_BEGIN) ? ++ WRAP_POINT_NO(tp->trace_pos - 1) : -1; ++ tp->end = -1; ++ tp->post_trace = 0; ++ tp->flags = 0; ++ ++ /* update active_path not earlier to avoid races with NMIs */ ++ per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); ++ } ++ ++ /* we still have old_tp and point, ++ * let's reset NMI lock and check for catches */ ++ old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { ++ /* well, this late tagging may not immediately be visible for ++ * other cpus already dumping this path - a minor issue */ ++ point->flags |= IPIPE_TFLG_NMI_HIT; ++ ++ /* handle deferred freezing from NMI context */ ++ if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) ++ __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, ++ old_tp->nmi_saved_parent_eip, ++ old_tp->nmi_saved_v); ++ } ++ ++ local_irq_restore_hw_notrace(flags); ++} ++ ++static unsigned long __ipipe_global_path_lock(void) ++{ ++ unsigned long flags; ++ int cpu; ++ struct ipipe_trace_path *tp; ++ ++ spin_lock_irqsave(&global_path_lock, flags); ++ ++ cpu = ipipe_processor_id(); ++ restart: ++ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ /* here is small race window with NMIs - catched below */ ++ ++ /* clear NMI events and set lock (atomically per cpu) */ ++ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | ++ IPIPE_TFLG_NMI_FREEZE_REQ)) ++ | IPIPE_TFLG_NMI_LOCK; ++ ++ /* check active_path again - some nasty NMI may have switched ++ * it meanwhile */ ++ if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { ++ /* release lock on wrong path and restart */ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* there is no chance that the NMI got deferred ++ * => no need to check for pending freeze requests */ ++ goto restart; ++ } ++ ++ return flags; ++} ++ ++static void __ipipe_global_path_unlock(unsigned long flags) ++{ ++ int cpu; ++ struct ipipe_trace_path *tp; ++ ++ /* release spinlock first - it's not involved in the NMI issue */ ++ __ipipe_spin_unlock_irqbegin(&global_path_lock); ++ ++ cpu = ipipe_processor_id(); ++ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; ++ ++ /* handle deferred freezing from NMI context */ ++ if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) ++ __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, ++ tp->nmi_saved_parent_eip, tp->nmi_saved_v); ++ ++ /* See __ipipe_spin_lock_irqsave() and friends. */ ++ __ipipe_spin_unlock_irqcomplete(flags); ++} ++ ++void notrace ipipe_trace_begin(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_begin); ++ ++void notrace ipipe_trace_end(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_end); ++ ++void notrace ipipe_trace_freeze(unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_freeze); ++ ++void notrace ipipe_trace_special(unsigned char id, unsigned long v) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), ++ __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, v); ++} ++EXPORT_SYMBOL(ipipe_trace_special); ++ ++void notrace ipipe_trace_pid(pid_t pid, short prio) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), ++ __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, pid); ++} ++EXPORT_SYMBOL(ipipe_trace_pid); ++ ++void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), ++ __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, delay_tsc); ++} ++EXPORT_SYMBOL(ipipe_trace_event); ++ ++int ipipe_trace_max_reset(void) ++{ ++ int cpu; ++ unsigned long flags; ++ struct ipipe_trace_path *path; ++ int ret = 0; ++ ++ flags = __ipipe_global_path_lock(); ++ ++ for_each_possible_cpu(cpu) { ++ path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; ++ ++ if (path->dump_lock) { ++ ret = -EBUSY; ++ break; ++ } ++ ++ path->begin = -1; ++ path->end = -1; ++ path->trace_pos = 0; ++ path->length = 0; ++ } ++ ++ __ipipe_global_path_unlock(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL(ipipe_trace_max_reset); ++ ++int ipipe_trace_frozen_reset(void) ++{ ++ int cpu; ++ unsigned long flags; ++ struct ipipe_trace_path *path; ++ int ret = 0; ++ ++ flags = __ipipe_global_path_lock(); ++ ++ for_each_online_cpu(cpu) { ++ path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; ++ ++ if (path->dump_lock) { ++ ret = -EBUSY; ++ break; ++ } ++ ++ path->begin = -1; ++ path->end = -1; ++ path->trace_pos = 0; ++ path->length = 0; ++ } ++ ++ __ipipe_global_path_unlock(flags); ++ ++ return ret; ++} ++EXPORT_SYMBOL(ipipe_trace_frozen_reset); ++ ++static void ++__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, ++ int trylock) ++{ ++ struct task_struct *task = NULL; ++ char buf[8]; ++ int i; ++ int locked = 1; ++ ++ if (trylock) { ++ if (!read_trylock(&tasklist_lock)) ++ locked = 0; ++ } else ++ read_lock(&tasklist_lock); ++ ++ if (locked) ++ task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); ++ ++ if (task) ++ strncpy(task_info, task->comm, 11); ++ else ++ strcpy(task_info, "--"); ++ ++ if (locked) ++ read_unlock(&tasklist_lock); ++ ++ for (i = strlen(task_info); i < 11; i++) ++ task_info[i] = ' '; ++ ++ sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); ++ strcpy(task_info + (11 - strlen(buf)), buf); ++} ++ ++static void ++__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, ++ struct ipipe_trace_point *point) ++{ ++ long time; ++ int type; ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ path->point[path->begin].timestamp + point->v); ++ type = point->type >> IPIPE_TYPE_BITS; ++ ++ if (type == 0) ++ /* ++ * Event type #0 is predefined, stands for the next ++ * timer tick. ++ */ ++ sprintf(buf, "tick@%-6ld", time); ++ else ++ sprintf(buf, "%3d@%-7ld", type, time); ++} ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++void ipipe_trace_panic_freeze(void) ++{ ++ unsigned long flags; ++ int cpu; ++ ++ if (!ipipe_trace_enable) ++ return; ++ ++ ipipe_trace_enable = 0; ++ local_irq_save_hw_notrace(flags); ++ ++ cpu = ipipe_processor_id(); ++ ++ panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; ++ ++ local_irq_restore_hw(flags); ++} ++EXPORT_SYMBOL(ipipe_trace_panic_freeze); ++ ++void ipipe_trace_panic_dump(void) ++{ ++ int cnt = back_trace; ++ int start, pos; ++ char buf[16]; ++ ++ if (!panic_path) ++ return; ++ ++ ipipe_context_check_off(); ++ ++ printk("I-pipe tracer log (%d points):\n", cnt); ++ ++ start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); ++ ++ while (cnt-- > 0) { ++ struct ipipe_trace_point *point = &panic_path->point[pos]; ++ long time; ++ char info[16]; ++ int i; ++ ++ printk(" %c", ++ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); ++ ++ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) ++ printk("%c", ++ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '#' : '+') : ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '*' : ' ')); ++ ++ if (!point->eip) ++ printk("--\n"); ++ else { ++ __ipipe_trace_point_type(buf, point); ++ printk("%s", buf); ++ ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ printk(" "); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ __ipipe_get_task_info(info, ++ point, 1); ++ printk("%s", info); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ __ipipe_get_event_date(info, ++ panic_path, point); ++ printk("%s", info); ++ break; ++ ++ default: ++ printk("0x%08lx ", point->v); ++ } ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ panic_path->point[start].timestamp); ++ printk(" %5ld ", time); ++ ++ __ipipe_print_symname(NULL, point->eip); ++ printk(" ("); ++ __ipipe_print_symname(NULL, point->parent_eip); ++ printk(")\n"); ++ } ++ pos = WRAP_POINT_NO(pos - 1); ++ } ++ ++ panic_path = NULL; ++} ++EXPORT_SYMBOL(ipipe_trace_panic_dump); ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++ ++ ++/* --- /proc output --- */ ++ ++static notrace int __ipipe_in_critical_trpath(long point_no) ++{ ++ return ((WRAP_POINT_NO(point_no-print_path->begin) < ++ WRAP_POINT_NO(print_path->end-print_path->begin)) || ++ ((print_path->end == print_path->begin) && ++ (WRAP_POINT_NO(point_no-print_path->end) > ++ print_post_trace))); ++} ++ ++static long __ipipe_signed_tsc2us(long long tsc) ++{ ++ unsigned long long abs_tsc; ++ long us; ++ ++ /* ipipe_tsc2us works on unsigned => handle sign separately */ ++ abs_tsc = (tsc >= 0) ? tsc : -tsc; ++ us = ipipe_tsc2us(abs_tsc); ++ if (tsc < 0) ++ return -us; ++ else ++ return us; ++} ++ ++static void ++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) ++{ ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ strcpy(buf, "func "); ++ break; ++ ++ case IPIPE_TRACE_BEGIN: ++ strcpy(buf, "begin "); ++ break; ++ ++ case IPIPE_TRACE_END: ++ strcpy(buf, "end "); ++ break; ++ ++ case IPIPE_TRACE_FREEZE: ++ strcpy(buf, "freeze "); ++ break; ++ ++ case IPIPE_TRACE_SPECIAL: ++ sprintf(buf, "(0x%02x) ", ++ point->type >> IPIPE_TYPE_BITS); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ sprintf(buf, "[%5d] ", (pid_t)point->v); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ sprintf(buf, "event "); ++ break; ++ } ++} ++ ++static void ++__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) ++{ ++ char mark = ' '; ++ int point_no = point - print_path->point; ++ int i; ++ ++ if (print_path->end == point_no) ++ mark = '<'; ++ else if (print_path->begin == point_no) ++ mark = '>'; ++ else if (__ipipe_in_critical_trpath(point_no)) ++ mark = ':'; ++ seq_printf(m, "%c%c", mark, ++ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); ++ ++ if (!verbose_trace) ++ return; ++ ++ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) ++ seq_printf(m, "%c", ++ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? ++ '#' : '+') : ++ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); ++} ++ ++static void ++__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) ++{ ++ unsigned long delay = 0; ++ int next; ++ char *mark = " "; ++ ++ next = WRAP_POINT_NO(point+1 - print_path->point); ++ ++ if (next != print_path->trace_pos) ++ delay = ipipe_tsc2ns(print_path->point[next].timestamp - ++ point->timestamp); ++ ++ if (__ipipe_in_critical_trpath(point - print_path->point)) { ++ if (delay > IPIPE_DELAY_WARN) ++ mark = "! "; ++ else if (delay > IPIPE_DELAY_NOTE) ++ mark = "+ "; ++ } ++ seq_puts(m, mark); ++ ++ if (verbose_trace) ++ seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, ++ (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); ++ else ++ seq_puts(m, " "); ++} ++ ++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) ++{ ++ char namebuf[KSYM_NAME_LEN+1]; ++ unsigned long size, offset; ++ const char *sym_name; ++ char *modname; ++ ++ sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); ++ ++#ifdef CONFIG_IPIPE_TRACE_PANIC ++ if (!m) { ++ /* panic dump */ ++ if (sym_name) { ++ printk("%s+0x%lx", sym_name, offset); ++ if (modname) ++ printk(" [%s]", modname); ++ } ++ } else ++#endif /* CONFIG_IPIPE_TRACE_PANIC */ ++ { ++ if (sym_name) { ++ if (verbose_trace) { ++ seq_printf(m, "%s+0x%lx", sym_name, offset); ++ if (modname) ++ seq_printf(m, " [%s]", modname); ++ } else ++ seq_puts(m, sym_name); ++ } else ++ seq_printf(m, "<%08lx>", eip); ++ } ++} ++ ++static void __ipipe_print_headline(struct seq_file *m) ++{ ++ seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " ++ "us\n\n", trace_overhead/1000, trace_overhead%1000); ++ ++ if (verbose_trace) { ++ const char *name[4] = { [0 ... 3] = "" }; ++ struct list_head *pos; ++ int i = 0; ++ ++ list_for_each_prev(pos, &__ipipe_pipeline) { ++ struct ipipe_domain *ipd = ++ list_entry(pos, struct ipipe_domain, p_link); ++ ++ name[i] = ipd->name; ++ if (++i > 3) ++ break; ++ } ++ ++ seq_printf(m, ++ " +----- Hard IRQs ('|': locked)\n" ++ " |+---- %s\n" ++ " ||+--- %s\n" ++ " |||+-- %s\n" ++ " ||||+- %s%s\n" ++ " ||||| +---------- " ++ "Delay flag ('+': > %d us, '!': > %d us)\n" ++ " ||||| | +- " ++ "NMI noise ('N')\n" ++ " ||||| | |\n" ++ " Type User Val. Time Delay Function " ++ "(Parent)\n", ++ name[3], name[2], name[1], name[0], ++ name[0] ? " ('*': domain stalled, '+': current, " ++ "'#': current+stalled)" : "", ++ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); ++ } else ++ seq_printf(m, ++ " +--------------- Hard IRQs ('|': locked)\n" ++ " | +- Delay flag " ++ "('+': > %d us, '!': > %d us)\n" ++ " | |\n" ++ " Type Time Function (Parent)\n", ++ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); ++} ++ ++static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) ++{ ++ loff_t n = *pos; ++ ++ mutex_lock(&out_mutex); ++ ++ if (!n) { ++ struct ipipe_trace_path *tp; ++ unsigned long length_usecs; ++ int points, cpu; ++ unsigned long flags; ++ ++ /* protect against max_path/frozen_path updates while we ++ * haven't locked our target path, also avoid recursively ++ * taking global_path_lock from NMI context */ ++ flags = __ipipe_global_path_lock(); ++ ++ /* find the longest of all per-cpu paths */ ++ print_path = NULL; ++ for_each_online_cpu(cpu) { ++ tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; ++ if ((print_path == NULL) || ++ (tp->length > print_path->length)) { ++ print_path = tp; ++ break; ++ } ++ } ++ print_path->dump_lock = 1; ++ ++ __ipipe_global_path_unlock(flags); ++ ++ /* does this path actually contain data? */ ++ if (print_path->end == print_path->begin) ++ return NULL; ++ ++ /* number of points inside the critical path */ ++ points = WRAP_POINT_NO(print_path->end-print_path->begin+1); ++ ++ /* pre- and post-tracing length, post-trace length was frozen ++ in __ipipe_trace, pre-trace may have to be reduced due to ++ buffer overrun */ ++ print_pre_trace = pre_trace; ++ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - ++ print_path->end - 1); ++ if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) ++ print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - ++ print_post_trace; ++ ++ length_usecs = ipipe_tsc2us(print_path->length); ++ seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" ++ "------------------------------------------------------------\n", ++ UTS_RELEASE, IPIPE_ARCH_STRING); ++ seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " ++ "%d (-%d/+%d), Length: %lu us\n", ++ cpu, print_path->point[print_path->begin].timestamp, ++ points, print_pre_trace, print_post_trace, length_usecs); ++ __ipipe_print_headline(m); ++ } ++ ++ /* check if we are inside the trace range */ ++ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + ++ print_pre_trace + print_post_trace)) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin - ++ print_pre_trace + n)]; ++} ++ ++static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) ++{ ++ loff_t n = ++*pos; ++ ++ /* check if we are inside the trace range with the next entry */ ++ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + ++ print_pre_trace + print_post_trace)) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin - ++ print_pre_trace + *pos)]; ++} ++ ++static void __ipipe_prtrace_stop(struct seq_file *m, void *p) ++{ ++ if (print_path) ++ print_path->dump_lock = 0; ++ mutex_unlock(&out_mutex); ++} ++ ++static int __ipipe_prtrace_show(struct seq_file *m, void *p) ++{ ++ long time; ++ struct ipipe_trace_point *point = p; ++ char buf[16]; ++ ++ if (!point->eip) { ++ seq_puts(m, "--\n"); ++ return 0; ++ } ++ ++ __ipipe_print_pathmark(m, point); ++ __ipipe_trace_point_type(buf, point); ++ seq_puts(m, buf); ++ if (verbose_trace) ++ switch (point->type & IPIPE_TYPE_MASK) { ++ case IPIPE_TRACE_FUNC: ++ seq_puts(m, " "); ++ break; ++ ++ case IPIPE_TRACE_PID: ++ __ipipe_get_task_info(buf, point, 0); ++ seq_puts(m, buf); ++ break; ++ ++ case IPIPE_TRACE_EVENT: ++ __ipipe_get_event_date(buf, print_path, point); ++ seq_puts(m, buf); ++ break; ++ ++ default: ++ seq_printf(m, "0x%08lx ", point->v); ++ } ++ ++ time = __ipipe_signed_tsc2us(point->timestamp - ++ print_path->point[print_path->begin].timestamp); ++ seq_printf(m, "%5ld", time); ++ ++ __ipipe_print_delay(m, point); ++ __ipipe_print_symname(m, point->eip); ++ seq_puts(m, " ("); ++ __ipipe_print_symname(m, point->parent_eip); ++ seq_puts(m, ")\n"); ++ ++ return 0; ++} ++ ++static struct seq_operations __ipipe_max_ptrace_ops = { ++ .start = __ipipe_max_prtrace_start, ++ .next = __ipipe_prtrace_next, ++ .stop = __ipipe_prtrace_stop, ++ .show = __ipipe_prtrace_show ++}; ++ ++static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &__ipipe_max_ptrace_ops); ++} ++ ++static ssize_t ++__ipipe_max_reset(struct file *file, const char __user *pbuffer, ++ size_t count, loff_t *data) ++{ ++ mutex_lock(&out_mutex); ++ ipipe_trace_max_reset(); ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++struct file_operations __ipipe_max_prtrace_fops = { ++ .open = __ipipe_max_prtrace_open, ++ .read = seq_read, ++ .write = __ipipe_max_reset, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) ++{ ++ loff_t n = *pos; ++ ++ mutex_lock(&out_mutex); ++ ++ if (!n) { ++ struct ipipe_trace_path *tp; ++ int cpu; ++ unsigned long flags; ++ ++ /* protect against max_path/frozen_path updates while we ++ * haven't locked our target path, also avoid recursively ++ * taking global_path_lock from NMI context */ ++ flags = __ipipe_global_path_lock(); ++ ++ /* find the first of all per-cpu frozen paths */ ++ print_path = NULL; ++ for_each_online_cpu(cpu) { ++ tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; ++ if (tp->end >= 0) { ++ print_path = tp; ++ break; ++ } ++ } ++ if (print_path) ++ print_path->dump_lock = 1; ++ ++ __ipipe_global_path_unlock(flags); ++ ++ if (!print_path) ++ return NULL; ++ ++ /* back- and post-tracing length, post-trace length was frozen ++ in __ipipe_trace, back-trace may have to be reduced due to ++ buffer overrun */ ++ print_pre_trace = back_trace-1; /* substract freeze point */ ++ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - ++ print_path->end - 1); ++ if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) ++ print_pre_trace = IPIPE_TRACE_POINTS - 2 - ++ print_post_trace; ++ ++ seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" ++ "------------------------------------------------------" ++ "------\n", ++ UTS_RELEASE, IPIPE_ARCH_STRING); ++ seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", ++ cpu, print_path->point[print_path->begin].timestamp, ++ print_pre_trace+1, print_post_trace); ++ __ipipe_print_headline(m); ++ } ++ ++ /* check if we are inside the trace range */ ++ if (n >= print_pre_trace + 1 + print_post_trace) ++ return NULL; ++ ++ /* return the next point to be shown */ ++ return &print_path->point[WRAP_POINT_NO(print_path->begin- ++ print_pre_trace+n)]; ++} ++ ++static struct seq_operations __ipipe_frozen_ptrace_ops = { ++ .start = __ipipe_frozen_prtrace_start, ++ .next = __ipipe_prtrace_next, ++ .stop = __ipipe_prtrace_stop, ++ .show = __ipipe_prtrace_show ++}; ++ ++static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &__ipipe_frozen_ptrace_ops); ++} ++ ++static ssize_t ++__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, ++ size_t count, loff_t *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, pbuffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ ipipe_trace_frozen_reset(); ++ if (val > 0) ++ ipipe_trace_freeze(-1); ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++struct file_operations __ipipe_frozen_prtrace_fops = { ++ .open = __ipipe_frozen_prtrace_open, ++ .read = seq_read, ++ .write = __ipipe_frozen_ctrl, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static int __ipipe_rd_proc_val(char *page, char **start, off_t off, ++ int count, int *eof, void *data) ++{ ++ int len; ++ ++ len = sprintf(page, "%u\n", *(int *)data); ++ len -= off; ++ if (len <= off + count) ++ *eof = 1; ++ *start = page + off; ++ if (len > count) ++ len = count; ++ if (len < 0) ++ len = 0; ++ ++ return len; ++} ++ ++static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer, ++ unsigned long count, void *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, buffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ *(int *)data = val; ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count, ++ int *eof, void *data) ++{ ++ int len; ++ ++ if (!trigger_begin) ++ return 0; ++ ++ len = sprint_symbol(page, trigger_begin); ++ page[len++] = '\n'; ++ ++ len -= off; ++ if (len <= off + count) ++ *eof = 1; ++ *start = page + off; ++ if (len > count) ++ len = count; ++ if (len < 0) ++ len = 0; ++ ++ return len; ++} ++ ++static int __ipipe_wr_trigger(struct file *file, const char __user *buffer, ++ unsigned long count, void *data) ++{ ++ char buf[KSYM_SYMBOL_LEN]; ++ unsigned long begin, end; ++ ++ if (count > sizeof(buf) - 1) ++ count = sizeof(buf) - 1; ++ if (copy_from_user(buf, buffer, count)) ++ return -EFAULT; ++ buf[count] = 0; ++ if (buf[count-1] == '\n') ++ buf[count-1] = 0; ++ ++ begin = kallsyms_lookup_name(buf); ++ if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) ++ return -ENOENT; ++ end += begin - 1; ++ ++ mutex_lock(&out_mutex); ++ /* invalidate the current range before setting a new one */ ++ trigger_end = 0; ++ wmb(); ++ ipipe_trace_frozen_reset(); ++ ++ /* set new range */ ++ trigger_begin = begin; ++ wmb(); ++ trigger_end = end; ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++ ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++static void notrace ++ipipe_trace_function(unsigned long ip, unsigned long parent_ip) ++{ ++ if (!ipipe_trace_enable) ++ return; ++ __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); ++} ++ ++static struct ftrace_ops ipipe_trace_ops = { ++ .func = ipipe_trace_function ++}; ++ ++static int __ipipe_wr_enable(struct file *file, const char __user *buffer, ++ unsigned long count, void *data) ++{ ++ char *end, buf[16]; ++ int val; ++ int n; ++ ++ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; ++ ++ if (copy_from_user(buf, buffer, n)) ++ return -EFAULT; ++ ++ buf[n] = '\0'; ++ val = simple_strtol(buf, &end, 0); ++ ++ if (((*end != '\0') && !isspace(*end)) || (val < 0)) ++ return -EINVAL; ++ ++ mutex_lock(&out_mutex); ++ ++ if (ipipe_trace_enable) { ++ if (!val) ++ unregister_ftrace_function(&ipipe_trace_ops); ++ } else if (val) ++ register_ftrace_function(&ipipe_trace_ops); ++ ++ ipipe_trace_enable = val; ++ ++ mutex_unlock(&out_mutex); ++ ++ return count; ++} ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++ ++extern struct proc_dir_entry *ipipe_proc_root; ++ ++static struct proc_dir_entry * __init ++__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, ++ const char *name, int *value_ptr) ++{ ++ struct proc_dir_entry *entry; ++ ++ entry = create_proc_entry(name, 0644, trace_dir); ++ if (entry) { ++ entry->data = value_ptr; ++ entry->read_proc = __ipipe_rd_proc_val; ++ entry->write_proc = __ipipe_wr_proc_val; ++ } ++ return entry; ++} ++ ++void __init __ipipe_init_tracer(void) ++{ ++ struct proc_dir_entry *trace_dir; ++ struct proc_dir_entry *entry; ++ unsigned long long start, end, min = ULLONG_MAX; ++ int i; ++#ifdef CONFIG_IPIPE_TRACE_VMALLOC ++ int cpu, path; ++ ++ for_each_possible_cpu(cpu) { ++ struct ipipe_trace_path *tp_buf; ++ ++ tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * ++ IPIPE_TRACE_PATHS, cpu_to_node(cpu)); ++ if (!tp_buf) { ++ printk(KERN_ERR "I-pipe: " ++ "insufficient memory for trace buffer.\n"); ++ return; ++ } ++ memset(tp_buf, 0, ++ sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); ++ for (path = 0; path < IPIPE_TRACE_PATHS; path++) { ++ tp_buf[path].begin = -1; ++ tp_buf[path].end = -1; ++ } ++ per_cpu(trace_path, cpu) = tp_buf; ++ } ++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ ++ ++ /* Calculate minimum overhead of __ipipe_trace() */ ++ local_irq_disable_hw(); ++ for (i = 0; i < 100; i++) { ++ ipipe_read_tsc(start); ++ __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, ++ __BUILTIN_RETURN_ADDRESS1, 0); ++ ipipe_read_tsc(end); ++ ++ end -= start; ++ if (end < min) ++ min = end; ++ } ++ local_irq_enable_hw(); ++ trace_overhead = ipipe_tsc2ns(min); ++ ++#ifdef CONFIG_IPIPE_TRACE_ENABLE ++ ipipe_trace_enable = 1; ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++ register_ftrace_function(&ipipe_trace_ops); ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++#endif /* CONFIG_IPIPE_TRACE_ENABLE */ ++ ++ trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root); ++ ++ entry = create_proc_entry("max", 0644, trace_dir); ++ if (entry) ++ entry->proc_fops = &__ipipe_max_prtrace_fops; ++ ++ entry = create_proc_entry("frozen", 0644, trace_dir); ++ if (entry) ++ entry->proc_fops = &__ipipe_frozen_prtrace_fops; ++ ++ entry = create_proc_entry("trigger", 0644, trace_dir); ++ if (entry) { ++ entry->read_proc = __ipipe_rd_trigger; ++ entry->write_proc = __ipipe_wr_trigger; ++ } ++ ++ __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", ++ &pre_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", ++ &post_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", ++ &back_trace); ++ __ipipe_create_trace_proc_val(trace_dir, "verbose", ++ &verbose_trace); ++ entry = __ipipe_create_trace_proc_val(trace_dir, "enable", ++ &ipipe_trace_enable); ++#ifdef CONFIG_IPIPE_TRACE_MCOUNT ++ if (entry) ++ entry->write_proc = __ipipe_wr_enable; ++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ ++} +diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c +index c166019..5e045ab 100644 +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #include "internals.h" + +@@ -425,7 +426,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) + irqreturn_t action_ret; + + spin_lock(&desc->lock); ++#ifndef CONFIG_IPIPE + mask_ack_irq(desc, irq); ++#endif + + if (unlikely(desc->status & IRQ_INPROGRESS)) + goto out_unlock; +@@ -505,8 +508,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) + + spin_lock(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; ++#ifdef CONFIG_IPIPE ++ desc->chip->unmask(irq); ++out: ++#else + out: + desc->chip->eoi(irq); ++#endif + + spin_unlock(&desc->lock); + } +@@ -548,8 +556,10 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) + kstat_incr_irqs_this_cpu(irq, desc); + + /* Start handling the irq */ ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif + + /* Mark the IRQ currently in progress.*/ + desc->status |= IRQ_INPROGRESS; +@@ -603,8 +613,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) + + kstat_incr_irqs_this_cpu(irq, desc); + ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif /* CONFIG_IPIPE */ + + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) +@@ -614,6 +626,134 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) + desc->chip->eoi(irq); + } + ++#ifdef CONFIG_IPIPE ++ ++void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc) ++{ ++ mask_ack_irq(desc, irq); ++} ++ ++void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc) ++{ ++ if (desc->chip->unmask) ++ desc->chip->unmask(irq); ++} ++ ++void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc) ++{ ++ desc->chip->eoi(irq); ++} ++ ++void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc) ++{ ++ /* ++ * Non-requestable IRQs should not be masked in EOI handler. ++ */ ++ if (!(desc->status & IRQ_NOREQUEST)) ++ desc->chip->unmask(irq); ++} ++ ++void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc) ++{ ++ desc->chip->ack(irq); ++} ++ ++void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc) ++{ ++ if (desc->chip->ack) ++ desc->chip->ack(irq); ++} ++ ++void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc) ++{ ++ if (desc->chip->eoi) ++ desc->chip->eoi(irq); ++} ++ ++void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc) ++{ ++ static int done; ++ ++ handle_bad_irq(irq, desc); ++ ++ if (!done) { ++ printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n", ++ __FUNCTION__, irq); ++ done = 1; ++ } ++} ++ ++void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc) ++{ ++} ++ ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) ++{ ++ if (unlikely(handle == NULL)) { ++ desc->ipipe_ack = &__ipipe_ack_bad_irq; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ } else { ++ if (is_chained) { ++ desc->ipipe_ack = handle; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ handle = __ipipe_noack_irq; ++ } else if (handle == &handle_simple_irq) { ++ desc->ipipe_ack = &__ipipe_ack_simple_irq; ++ desc->ipipe_end = &__ipipe_end_simple_irq; ++ } else if (handle == &handle_level_irq) { ++ desc->ipipe_ack = &__ipipe_ack_level_irq; ++ desc->ipipe_end = &__ipipe_end_level_irq; ++ } else if (handle == &handle_edge_irq) { ++ desc->ipipe_ack = &__ipipe_ack_edge_irq; ++ desc->ipipe_end = &__ipipe_end_edge_irq; ++ } else if (handle == &handle_fasteoi_irq) { ++ desc->ipipe_ack = &__ipipe_ack_fasteoi_irq; ++ desc->ipipe_end = &__ipipe_end_fasteoi_irq; ++ } else if (handle == &handle_percpu_irq) { ++ desc->ipipe_ack = &__ipipe_ack_percpu_irq; ++ desc->ipipe_end = &__ipipe_end_percpu_irq; ++ } else if (desc->chip == &no_irq_chip) { ++ desc->ipipe_ack = &__ipipe_noack_irq; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ } else { ++ desc->ipipe_ack = &__ipipe_ack_bad_irq; ++ desc->ipipe_end = &__ipipe_noend_irq; ++ } ++ } ++ ++ /* Suppress intermediate trampoline routine. */ ++ ipipe_root_domain->irqs[desc->irq].acknowledge = desc->ipipe_ack; ++ ++ return handle; ++} ++ ++#else /* !CONFIG_IPIPE */ ++ ++irq_flow_handler_t ++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) ++{ ++ return handle; ++} ++ ++#endif /* !CONFIG_IPIPE */ ++ + void + __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name) +@@ -645,6 +785,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + chip_bus_lock(irq, desc); + spin_lock_irqsave(&desc->lock, flags); + ++ handle = __fixup_irq_handler(desc, handle, is_chained); ++ + /* Uninstall? */ + if (handle == handle_bad_irq) { + if (desc->chip != &no_irq_chip) +diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c +index 17c71bb..406f375 100644 +--- a/kernel/irq/handle.c ++++ b/kernel/irq/handle.c +@@ -462,8 +462,10 @@ unsigned int __do_IRQ(unsigned int irq) + /* + * No locking required for CPU-local interrupts: + */ ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif + if (likely(!(desc->status & IRQ_DISABLED))) { + action_ret = handle_IRQ_event(irq, desc->action); + if (!noirqdebug) +@@ -474,8 +476,10 @@ unsigned int __do_IRQ(unsigned int irq) + } + + spin_lock(&desc->lock); ++#ifndef CONFIG_IPIPE + if (desc->chip->ack) + desc->chip->ack(irq); ++#endif + /* + * REPLAY is when Linux resends an IRQ that was dropped earlier + * WAITING is used by probe to mark irqs that are being tested +diff --git a/kernel/lockdep.c b/kernel/lockdep.c +index 9af5672..fa84d6d 100644 +--- a/kernel/lockdep.c ++++ b/kernel/lockdep.c +@@ -2318,7 +2318,7 @@ void trace_hardirqs_on_caller(unsigned long ip) + /* we'll do an OFF -> ON transition: */ + curr->hardirqs_enabled = 1; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; +@@ -2361,7 +2361,7 @@ void trace_hardirqs_off_caller(unsigned long ip) + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + + if (curr->hardirqs_enabled) { +@@ -2393,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip) + if (unlikely(!debug_locks)) + return; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + + if (curr->softirqs_enabled) { +@@ -2427,7 +2427,7 @@ void trace_softirqs_off(unsigned long ip) + if (unlikely(!debug_locks)) + return; + +- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) ++ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) + return; + + if (curr->softirqs_enabled) { +diff --git a/kernel/panic.c b/kernel/panic.c +index b83e8af..9a08cc3 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + int panic_on_oops; + static unsigned long tainted_mask; +@@ -304,6 +305,8 @@ void oops_enter(void) + { + tracing_off(); + /* can't trust the integrity of the kernel anymore: */ ++ ipipe_trace_panic_freeze(); ++ ipipe_disable_context_check(ipipe_processor_id()); + debug_locks_off(); + do_oops_enter_exit(); + } +diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c +index 04a9e90..49bc6cd 100644 +--- a/kernel/power/hibernate.c ++++ b/kernel/power/hibernate.c +@@ -238,6 +238,7 @@ static int create_image(int platform_mode) + goto Enable_cpus; + + local_irq_disable(); ++ local_irq_disable_hw_cond(); + + error = sysdev_suspend(PMSG_FREEZE); + if (error) { +@@ -267,6 +268,7 @@ static int create_image(int platform_mode) + */ + + Enable_irqs: ++ local_irq_enable_hw_cond(); + local_irq_enable(); + + Enable_cpus: +@@ -359,6 +361,7 @@ static int resume_target_kernel(bool platform_mode) + goto Enable_cpus; + + local_irq_disable(); ++ local_irq_disable_hw_cond(); + + error = sysdev_suspend(PMSG_QUIESCE); + if (error) +@@ -390,6 +393,7 @@ static int resume_target_kernel(bool platform_mode) + sysdev_resume(); + + Enable_irqs: ++ local_irq_enable_hw_cond(); + local_irq_enable(); + + Enable_cpus: +@@ -471,6 +475,7 @@ int hibernation_platform_enter(void) + goto Platform_finish; + + local_irq_disable(); ++ local_irq_disable_hw_cond(); + sysdev_suspend(PMSG_HIBERNATE); + hibernation_ops->enter(); + /* We should never get here */ +diff --git a/kernel/printk.c b/kernel/printk.c +index 84605a4..36af386 100644 +--- a/kernel/printk.c ++++ b/kernel/printk.c +@@ -669,6 +669,41 @@ static int have_callable_console(void) + return 0; + } + ++#ifdef CONFIG_IPIPE ++ ++static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED; ++ ++static int __ipipe_printk_fill; ++ ++static char __ipipe_printk_buf[__LOG_BUF_LEN]; ++ ++void __ipipe_flush_printk (unsigned virq, void *cookie) ++{ ++ char *p = __ipipe_printk_buf; ++ int len, lmax, out = 0; ++ unsigned long flags; ++ ++ goto start; ++ ++ do { ++ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++ start: ++ lmax = __ipipe_printk_fill; ++ while (out < lmax) { ++ len = strlen(p) + 1; ++ printk("%s",p); ++ p += len; ++ out += len; ++ } ++ spin_lock_irqsave(&__ipipe_printk_lock, flags); ++ } ++ while (__ipipe_printk_fill != lmax); ++ ++ __ipipe_printk_fill = 0; ++ ++ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++} ++ + /** + * printk - print a kernel message + * @fmt: format string +@@ -693,6 +728,65 @@ static int have_callable_console(void) + + asmlinkage int printk(const char *fmt, ...) + { ++ int r, fbytes, oldcount; ++ unsigned long flags; ++ int sprintk = 1; ++ int cs = -1; ++ va_list args; ++ ++ va_start(args, fmt); ++ ++ local_irq_save_hw(flags); ++ ++ if (test_bit(IPIPE_SPRINTK_FLAG, &__ipipe_current_domain->flags) || ++ oops_in_progress) ++ cs = ipipe_disable_context_check(ipipe_processor_id()); ++ else if (__ipipe_current_domain == ipipe_root_domain) { ++ struct ipipe_domain *dom; ++ ++ list_for_each_entry(dom, &__ipipe_pipeline, p_link) { ++ if (dom == ipipe_root_domain) ++ break; ++ if (test_bit(IPIPE_STALL_FLAG, ++ &ipipe_cpudom_var(dom, status))) ++ sprintk = 0; ++ } ++ } else ++ sprintk = 0; ++ ++ local_irq_restore_hw(flags); ++ ++ if (sprintk) { ++ r = vprintk(fmt, args); ++ if (cs != -1) ++ ipipe_restore_context_check(ipipe_processor_id(), cs); ++ goto out; ++ } ++ ++ spin_lock_irqsave(&__ipipe_printk_lock, flags); ++ ++ oldcount = __ipipe_printk_fill; ++ fbytes = __LOG_BUF_LEN - oldcount; ++ ++ if (fbytes > 1) { ++ r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, ++ fbytes, fmt, args) + 1; /* account for the null byte */ ++ __ipipe_printk_fill += r; ++ } else ++ r = 0; ++ ++ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); ++ ++ if (oldcount == 0) ++ ipipe_trigger_irq(__ipipe_printk_virq); ++out: ++ va_end(args); ++ ++ return r; ++} ++#else /* !CONFIG_IPIPE */ ++asmlinkage int printk(const char *fmt, ...) ++{ + va_list args; + int r; + +@@ -702,6 +796,7 @@ asmlinkage int printk(const char *fmt, ...) + + return r; + } ++#endif /* CONFIG_IPIPE */ + + /* cpu currently holding logbuf_lock */ + static volatile unsigned int printk_cpu = UINT_MAX; +diff --git a/kernel/sched.c b/kernel/sched.c +index cc397a1..0b65871 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2345,6 +2345,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) + { + int cpu, orig_cpu, this_cpu, success = 0; ++ unsigned int old_state; + unsigned long flags; + struct rq *rq, *orig_rq; + +@@ -2356,7 +2357,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + smp_wmb(); + rq = orig_rq = task_rq_lock(p, &flags); + update_rq_clock(rq); +- if (!(p->state & state)) ++ old_state = p->state; ++ if (!(old_state & state) || ++ (old_state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH))) + goto out; + + if (p->se.on_rq) +@@ -2841,13 +2844,15 @@ asmlinkage void schedule_tail(struct task_struct *prev) + #endif + if (current->set_child_tid) + put_user(task_pid_vnr(current), current->set_child_tid); ++ ++ ipipe_init_notify(current); + } + + /* + * context_switch - switch to the new MM and the new + * thread's register state. + */ +-static inline void ++static inline int + context_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) + { +@@ -2898,12 +2903,23 @@ context_switch(struct rq *rq, struct task_struct *prev, + switch_to(prev, next, prev); + + barrier(); ++ ++#ifdef CONFIG_IPIPE_DELAYED_ATOMICSW ++ current->state &= ~TASK_ATOMICSWITCH; ++#else ++ prev->state &= ~TASK_ATOMICSWITCH; ++#endif ++ if (task_hijacked(prev)) ++ return 1; ++ + /* + * this_rq must be evaluated again because prev may have moved + * CPUs since it called schedule(), thus the 'rq' on its stack + * frame will be invalid. + */ + finish_task_switch(this_rq(), prev); ++ ++ return 0; + } + + /* +@@ -5293,6 +5309,7 @@ notrace unsigned long get_parent_ip(unsigned long addr) + + void __kprobes add_preempt_count(int val) + { ++ ipipe_check_context(ipipe_root_domain); + #ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? +@@ -5315,6 +5332,7 @@ EXPORT_SYMBOL(add_preempt_count); + + void __kprobes sub_preempt_count(int val) + { ++ ipipe_check_context(ipipe_root_domain); + #ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? +@@ -5363,6 +5381,7 @@ static noinline void __schedule_bug(struct task_struct *prev) + */ + static inline void schedule_debug(struct task_struct *prev) + { ++ ipipe_check_context(ipipe_root_domain); + /* + * Test if we are atomic. Since do_exit() needs to call into + * schedule() atomically, we ignore that path for now. +@@ -5441,7 +5460,7 @@ pick_next_task(struct rq *rq) + /* + * schedule() is the main scheduler function. + */ +-asmlinkage void __sched schedule(void) ++asmlinkage int __sched schedule(void) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -5455,6 +5474,9 @@ need_resched: + rcu_sched_qs(cpu); + prev = rq->curr; + switch_count = &prev->nivcsw; ++ if (unlikely(prev->state & TASK_ATOMICSWITCH)) ++ /* Pop one disable level -- one still remains. */ ++ preempt_enable(); + + release_kernel_lock(prev); + need_resched_nonpreemptible: +@@ -5492,15 +5514,18 @@ need_resched_nonpreemptible: + rq->curr = next; + ++*switch_count; + +- context_switch(rq, prev, next); /* unlocks the rq */ ++ if (context_switch(rq, prev, next)) /* unlocks the rq */ ++ return 1; /* task hijacked by higher domain */ + /* + * the context switch might have flipped the stack from under + * us, hence refresh the local variables. + */ + cpu = smp_processor_id(); + rq = cpu_rq(cpu); +- } else ++ } else { ++ prev->state &= ~TASK_ATOMICSWITCH; + spin_unlock_irq(&rq->lock); ++ } + + post_schedule(rq); + +@@ -5510,6 +5535,8 @@ need_resched_nonpreemptible: + preempt_enable_no_resched(); + if (need_resched()) + goto need_resched; ++ ++ return 0; + } + EXPORT_SYMBOL(schedule); + +@@ -5593,7 +5620,8 @@ asmlinkage void __sched preempt_schedule(void) + + do { + add_preempt_count(PREEMPT_ACTIVE); +- schedule(); ++ if (schedule()) ++ return; + sub_preempt_count(PREEMPT_ACTIVE); + + /* +@@ -6362,6 +6390,7 @@ recheck: + + oldprio = p->prio; + __setscheduler(rq, p, policy, param->sched_priority); ++ ipipe_setsched_notify(p); + + if (running) + p->sched_class->set_curr_task(rq); +@@ -7010,6 +7039,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) + #else + task_thread_info(idle)->preempt_count = 0; + #endif ++ ipipe_check_context(ipipe_root_domain); + /* + * The idle tasks have their own, simple scheduling class: + */ +@@ -10947,3 +10977,64 @@ void synchronize_sched_expedited(void) + EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + + #endif /* #else #ifndef CONFIG_SMP */ ++ ++#ifdef CONFIG_IPIPE ++ ++int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio) ++{ ++ const struct sched_class *prev_class = p->sched_class; ++ int oldprio, on_rq, running; ++ unsigned long flags; ++ struct rq *rq; ++ ++ spin_lock_irqsave(&p->pi_lock, flags); ++ rq = __task_rq_lock(p); ++ update_rq_clock(rq); ++ on_rq = p->se.on_rq; ++ running = task_current(rq, p); ++ if (on_rq) ++ deactivate_task(rq, p, 0); ++ if (running) ++ p->sched_class->put_prev_task(rq, p); ++ ++ p->sched_reset_on_fork = 0; ++ ++ oldprio = p->prio; ++ __setscheduler(rq, p, policy, prio); ++ ipipe_setsched_notify(p); ++ ++ if (running) ++ p->sched_class->set_curr_task(rq); ++ if (on_rq) { ++ activate_task(rq, p, 0); ++ ++ check_class_changed(rq, p, prev_class, oldprio, running); ++ } ++ __task_rq_unlock(rq); ++ spin_unlock_irqrestore(&p->pi_lock, flags); ++ ++ rt_mutex_adjust_pi(p); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_setscheduler_root); ++ ++int ipipe_reenter_root(struct task_struct *prev, int policy, int prio) ++{ ++ struct rq *rq = this_rq(); ++ ++ finish_task_switch(rq, prev); ++ ++ post_schedule(rq); ++ ++ (void)reacquire_kernel_lock(current); ++ preempt_enable_no_resched(); ++ ++ if (current->policy != policy || current->rt_priority != prio) ++ return ipipe_setscheduler_root(current, policy, prio); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ipipe_reenter_root); ++ ++#endif /* CONFIG_IPIPE */ +diff --git a/kernel/signal.c b/kernel/signal.c +index 6705320..45e997e 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -518,6 +518,7 @@ void signal_wake_up(struct task_struct *t, int resume) + unsigned int mask; + + set_tsk_thread_flag(t, TIF_SIGPENDING); ++ ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ + + /* + * For SIGKILL, we want to wake it up in the stopped/traced/killable +diff --git a/kernel/spinlock.c b/kernel/spinlock.c +index 5ddab73..97cf064 100644 +--- a/kernel/spinlock.c ++++ b/kernel/spinlock.c +@@ -50,7 +50,9 @@ EXPORT_SYMBOL(_write_trylock); + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) ++#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ ++ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ ++ defined(CONFIG_IPIPE) + + #ifndef _read_lock + void __lockfunc _read_lock(rwlock_t *lock) +diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c +index 83c4417..782a209 100644 +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -69,7 +69,7 @@ static void tick_periodic(int cpu) + write_sequnlock(&xtime_lock); + } + +- update_process_times(user_mode(get_irq_regs())); ++ update_root_process_times(get_irq_regs()); + profile_tick(CPU_PROFILING); + } + +@@ -177,6 +177,10 @@ static void tick_setup_device(struct tick_device *td, + + td->evtdev = newdev; + ++ /* I-pipe: derive global tick IRQ from CPU 0 */ ++ if (cpu == 0) ++ ipipe_update_tick_evtdev(newdev); ++ + /* + * When the device is not per cpu, pin the interrupt to the + * current cpu: +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index 89aed59..aef7075 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -525,7 +525,7 @@ static void tick_nohz_handler(struct clock_event_device *dev) + ts->idle_jiffies++; + } + +- update_process_times(user_mode(regs)); ++ update_root_process_times(regs); + profile_tick(CPU_PROFILING); + + while (tick_nohz_reprogram(ts, now)) { +@@ -676,7 +676,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) + touch_softlockup_watchdog(); + ts->idle_jiffies++; + } +- update_process_times(user_mode(regs)); ++ update_root_process_times(regs); + profile_tick(CPU_PROFILING); + } + +diff --git a/kernel/timer.c b/kernel/timer.c +index 5db5a8d..1b45eb9 100644 +--- a/kernel/timer.c ++++ b/kernel/timer.c +@@ -1204,6 +1204,25 @@ void update_process_times(int user_tick) + run_posix_cpu_timers(p); + } + ++#ifdef CONFIG_IPIPE ++ ++void update_root_process_times(struct pt_regs *regs) ++{ ++ int cpu, user_tick = user_mode(regs); ++ ++ if (__ipipe_root_tick_p(regs)) { ++ update_process_times(user_tick); ++ return; ++ } ++ ++ run_local_timers(); ++ cpu = smp_processor_id(); ++ rcu_check_callbacks(cpu, user_tick); ++ run_posix_cpu_timers(current); ++} ++ ++#endif ++ + /* + * This function runs timers and the timer-tq in bottom half context. + */ +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 6dc4e5e..7252373 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + +@@ -1142,6 +1143,9 @@ static int __ftrace_modify_code(void *data) + + static void ftrace_run_update_code(int command) + { ++#ifdef CONFIG_IPIPE ++ unsigned long flags; ++#endif /* CONFIG_IPIPE */ + int ret; + + ret = ftrace_arch_code_modify_prepare(); +@@ -1149,7 +1153,13 @@ static void ftrace_run_update_code(int command) + if (ret) + return; + ++#ifdef CONFIG_IPIPE ++ flags = ipipe_critical_enter(NULL); ++ __ftrace_modify_code(&command); ++ ipipe_critical_exit(flags); ++#else /* !CONFIG_IPIPE */ + stop_machine(__ftrace_modify_code, &command, NULL); ++#endif /* !CONFIG_IPIPE */ + + ret = ftrace_arch_code_modify_post_process(); + FTRACE_WARN_ON(ret); +@@ -2648,9 +2658,9 @@ static int ftrace_convert_nops(struct module *mod, + } + + /* disable interrupts to prevent kstop machine */ +- local_irq_save(flags); ++ local_irq_save_hw_notrace(flags); + ftrace_update_code(mod); +- local_irq_restore(flags); ++ local_irq_restore_hw_notrace(flags); + mutex_unlock(&ftrace_lock); + + return 0; +@@ -2729,9 +2739,9 @@ void __init ftrace_init(void) + /* Keep the ftrace pointer to the stub */ + addr = (unsigned long)ftrace_stub; + +- local_irq_save(flags); ++ local_irq_save_hw_notrace(flags); + ftrace_dyn_arch_init(&addr); +- local_irq_restore(flags); ++ local_irq_restore_hw_notrace(flags); + + /* ftrace_dyn_arch_init places the return code in addr */ + if (addr) +diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug +index 234ceb1..faffad9 100644 +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -136,6 +136,8 @@ config DEBUG_SECTION_MISMATCH + - Enable verbose reporting from modpost to help solving + the section mismatches reported. + ++source "kernel/ipipe/Kconfig.debug" ++ + config DEBUG_KERNEL + bool "Kernel debugging" + help +diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c +index 9681d54..2dba50c 100644 +--- a/lib/bust_spinlocks.c ++++ b/lib/bust_spinlocks.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + + void __attribute__((weak)) bust_spinlocks(int yes) +@@ -24,6 +25,7 @@ void __attribute__((weak)) bust_spinlocks(int yes) + unblank_screen(); + #endif + console_unblank(); ++ ipipe_trace_panic_dump(); + if (--oops_in_progress == 0) + wake_up_klogd(); + } +diff --git a/lib/ioremap.c b/lib/ioremap.c +index 14c6078..a275469 100644 +--- a/lib/ioremap.c ++++ b/lib/ioremap.c +@@ -85,8 +85,8 @@ int ioremap_page_range(unsigned long addr, + if (err) + break; + } while (pgd++, addr = next, addr != end); +- +- flush_cache_vmap(start, end); ++ __ipipe_pin_range_globally(start, end); ++ flush_cache_vmap(start, end); + + return err; + } +diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c +index 4689cb0..3d12764 100644 +--- a/lib/smp_processor_id.c ++++ b/lib/smp_processor_id.c +@@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void) + unsigned long preempt_count = preempt_count(); + int this_cpu = raw_smp_processor_id(); + ++ if (!ipipe_root_domain_p) ++ goto out; ++ + if (likely(preempt_count)) + goto out; + +- if (irqs_disabled()) ++ if (irqs_disabled() || irqs_disabled_hw()) + goto out; + + /* +diff --git a/mm/memory.c b/mm/memory.c +index 4e59455..b8d365d 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -566,6 +567,32 @@ out: + return pfn_to_page(pfn); + } + ++static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) ++{ ++ /* ++ * If the source page was a PFN mapping, we don't have ++ * a "struct page" for it. We do a best-effort copy by ++ * just copying from the original user address. If that ++ * fails, we just zero-fill it. Live with it. ++ */ ++ if (unlikely(!src)) { ++ void *kaddr = kmap_atomic(dst, KM_USER0); ++ void __user *uaddr = (void __user *)(va & PAGE_MASK); ++ ++ /* ++ * This really shouldn't fail, because the page is there ++ * in the page tables. But it might just be unreadable, ++ * in which case we just give up and fill the result with ++ * zeroes. ++ */ ++ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) ++ memset(kaddr, 0, PAGE_SIZE); ++ kunmap_atomic(kaddr, KM_USER0); ++ flush_dcache_page(dst); ++ } else ++ copy_user_highpage(dst, src, va, vma); ++} ++ + /* + * copy one vm_area from one task to the other. Assumes the page tables + * already present in the new task to be cleared in the whole range +@@ -574,8 +601,8 @@ out: + + static inline void + copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, +- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, +- unsigned long addr, int *rss) ++ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, ++ unsigned long addr, int *rss, struct page *uncow_page) + { + unsigned long vm_flags = vma->vm_flags; + pte_t pte = *src_pte; +@@ -614,6 +641,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, + * in the parent and the child + */ + if (is_cow_mapping(vm_flags)) { ++#ifdef CONFIG_IPIPE ++ if (uncow_page) { ++ struct page *old_page = vm_normal_page(vma, addr, pte); ++ cow_user_page(uncow_page, old_page, addr, vma); ++ pte = mk_pte(uncow_page, vma->vm_page_prot); ++ ++ if (vm_flags & VM_SHARED) ++ pte = pte_mkclean(pte); ++ pte = pte_mkold(pte); ++ ++ page_add_new_anon_rmap(uncow_page, vma, addr); ++ rss[!!PageAnon(uncow_page)]++; ++ goto out_set_pte; ++ } ++#endif /* CONFIG_IPIPE */ + ptep_set_wrprotect(src_mm, addr, src_pte); + pte = pte_wrprotect(pte); + } +@@ -645,13 +687,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *src_pte, *dst_pte; + spinlock_t *src_ptl, *dst_ptl; + int progress = 0; ++ struct page *uncow_page = NULL; + int rss[2]; +- ++#ifdef CONFIG_IPIPE ++ int do_cow_break = 0; ++again: ++ if (do_cow_break) { ++ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); ++ if (!uncow_page) ++ return -ENOMEM; ++ do_cow_break = 0; ++ } ++#else + again: ++#endif + rss[1] = rss[0] = 0; + dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); +- if (!dst_pte) ++ if (!dst_pte) { ++ if (uncow_page) ++ page_cache_release(uncow_page); + return -ENOMEM; ++ } + src_pte = pte_offset_map_nested(src_pmd, addr); + src_ptl = pte_lockptr(src_mm, src_pmd); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); +@@ -674,7 +730,25 @@ again: + progress++; + continue; + } +- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); ++#ifdef CONFIG_IPIPE ++ if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { ++ if (is_cow_mapping(vma->vm_flags) && ++ test_bit(MMF_VM_PINNED, &src_mm->flags) && ++ ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { ++ arch_leave_lazy_mmu_mode(); ++ spin_unlock(src_ptl); ++ pte_unmap_nested(src_pte); ++ add_mm_rss(dst_mm, rss[0], rss[1]); ++ pte_unmap_unlock(dst_pte, dst_ptl); ++ cond_resched(); ++ do_cow_break = 1; ++ goto again; ++ } ++ } ++#endif ++ copy_one_pte(dst_mm, src_mm, dst_pte, ++ src_pte, vma, addr, rss, uncow_page); ++ uncow_page = NULL; + progress += 8; + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + +@@ -1941,32 +2015,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) + return pte; + } + +-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) +-{ +- /* +- * If the source page was a PFN mapping, we don't have +- * a "struct page" for it. We do a best-effort copy by +- * just copying from the original user address. If that +- * fails, we just zero-fill it. Live with it. +- */ +- if (unlikely(!src)) { +- void *kaddr = kmap_atomic(dst, KM_USER0); +- void __user *uaddr = (void __user *)(va & PAGE_MASK); +- +- /* +- * This really shouldn't fail, because the page is there +- * in the page tables. But it might just be unreadable, +- * in which case we just give up and fill the result with +- * zeroes. +- */ +- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) +- memset(kaddr, 0, PAGE_SIZE); +- kunmap_atomic(kaddr, KM_USER0); +- flush_dcache_page(dst); +- } else +- copy_user_highpage(dst, src, va, vma); +-} +- + /* + * This routine handles present pages, when users try to write + * to a shared page. It is done by copying the page to a new address +@@ -3377,3 +3425,111 @@ void might_fault(void) + } + EXPORT_SYMBOL(might_fault); + #endif ++ ++#ifdef CONFIG_IPIPE ++ ++static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ spinlock_t *ptl; ++ pte_t *pte; ++ ++ do { ++ pte = pte_offset_map_lock(mm, pmd, addr, &ptl); ++ if (!pte) ++ continue; ++ ++ if (!pte_present(*pte) || pte_write(*pte)) { ++ pte_unmap_unlock(pte, ptl); ++ continue; ++ } ++ ++ if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM) ++ return -ENOMEM; ++ } while (addr += PAGE_SIZE, addr != end); ++ return 0; ++} ++ ++static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ pmd_t *pmd; ++ ++ pmd = pmd_offset(pud, addr); ++ do { ++ next = pmd_addr_end(addr, end); ++ if (pmd_none_or_clear_bad(pmd)) ++ continue; ++ if (ipipe_pin_pte_range(mm, pmd, vma, addr, next)) ++ return -ENOMEM; ++ } while (pmd++, addr = next, addr != end); ++ return 0; ++} ++ ++static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd, ++ struct vm_area_struct *vma, ++ unsigned long addr, unsigned long end) ++{ ++ unsigned long next; ++ pud_t *pud; ++ ++ pud = pud_offset(pgd, addr); ++ do { ++ next = pud_addr_end(addr, end); ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ if (ipipe_pin_pmd_range(mm, pud, vma, addr, next)) ++ return -ENOMEM; ++ } while (pud++, addr = next, addr != end); ++ return 0; ++} ++ ++int ipipe_disable_ondemand_mappings(struct task_struct *tsk) ++{ ++ unsigned long addr, next, end; ++ struct vm_area_struct *vma; ++ struct mm_struct *mm; ++ int result = 0; ++ pgd_t *pgd; ++ ++ mm = get_task_mm(tsk); ++ if (!mm) ++ return -EPERM; ++ ++ down_write(&mm->mmap_sem); ++ if (test_bit(MMF_VM_PINNED, &mm->flags)) ++ goto done_mm; ++ ++ for (vma = mm->mmap; vma; vma = vma->vm_next) { ++ if (!is_cow_mapping(vma->vm_flags) ++ || !(vma->vm_flags & VM_WRITE)) ++ continue; ++ ++ addr = vma->vm_start; ++ end = vma->vm_end; ++ ++ pgd = pgd_offset(mm, addr); ++ do { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) { ++ result = -ENOMEM; ++ goto done_mm; ++ } ++ } while (pgd++, addr = next, addr != end); ++ } ++ set_bit(MMF_VM_PINNED, &mm->flags); ++ ++ done_mm: ++ up_write(&mm->mmap_sem); ++ mmput(mm); ++ return result; ++} ++ ++EXPORT_SYMBOL(ipipe_disable_ondemand_mappings); ++ ++#endif +diff --git a/mm/mmu_context.c b/mm/mmu_context.c +index ded9081..cb2ac0e 100644 +--- a/mm/mmu_context.c ++++ b/mm/mmu_context.c +@@ -23,15 +23,18 @@ void use_mm(struct mm_struct *mm) + { + struct mm_struct *active_mm; + struct task_struct *tsk = current; ++ unsigned long flags; + + task_lock(tsk); + active_mm = tsk->active_mm; ++ ipipe_mm_switch_protect(flags); + if (active_mm != mm) { + atomic_inc(&mm->mm_count); + tsk->active_mm = mm; + } + tsk->mm = mm; +- switch_mm(active_mm, mm, tsk); ++ __switch_mm(active_mm, mm, tsk); ++ ipipe_mm_switch_unprotect(flags); + task_unlock(tsk); + + if (active_mm != mm) +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index 7758726..66c3f68 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -172,6 +172,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, + return err; + } while (pgd++, addr = next, addr != end); + ++ __ipipe_pin_range_globally(start, end); ++ + return nr; + } + debian/libxenomai1.lintian0000664000000000000000000000110512142755576012773 0ustar # The package libxenomai1 first didn't contain a library called *xenomai*. # Therefore, I called it libxenomai1. Now, upstream introduced libxenomai0 in # the package. I'm leaving the package name libxenomai1 for now since # downgrading the number in the package name is probably a bad idea, and # synchronizing the package name with the SO version number isn't easily # possible anyway since the package contains several libraries. libxenomai1: package-name-doesnt-match-sonames libanalogy1 libnative3 libpsos0 libpthread-rt1 librtdm1 libuitron0 libvrtx0 libvxworks1 libxenomai0 debian/xenomai-runtime.install0000664000000000000000000000005711655576471013724 0ustar usr/bin usr/sbin usr/share/man usr/lib/xenomai debian/control0000664000000000000000000001016212154633307010577 0ustar Source: xenomai Section: devel Priority: extra Maintainer: Ubuntu Developers XSBC-Original-Maintainer: Roland Stigge Build-Depends: debhelper (>= 9), findutils (>= 4.2.28), autotools-dev, autoconf, automake, libtool Standards-Version: 3.9.4 Homepage: http://www.xenomai.org/ Package: xenomai-runtime Section: devel Architecture: amd64 arm armeb armel i386 powerpc powerpcspe Depends: ${shlibs:Depends}, ${misc:Depends} Suggests: linux-patch-xenomai, xenomai-doc Replaces: xenomai Conflicts: xenomai Description: Xenomai runtime utilities Xenomai is a real-time development framework cooperating with the Linux kernel in order to provide a pervasive, interface-agnostic, hard real-time support to user-space applications, seamlessly integrated into the GNU/Linux environment. Xenomai provides its own API and emulation layers ("skins") to make migration from other RTOS easier. Examples are: pSOS+, VxWorks, VRTX, uiTRON, RTAI, POSIX. . This package contains the runtime programs and the testsuite for the Xenomai realtime system. Package: linux-patch-xenomai Section: kernel Architecture: all Depends: ${misc:Depends} Suggests: xenomai, linux-source-2.6, kernel-package Description: Linux kernel patches for Xenomai Xenomai is a real-time development framework cooperating with the Linux kernel in order to provide a pervasive, interface-agnostic, hard real-time support to user-space applications, seamlessly integrated into the GNU/Linux environment. Xenomai provides its own API and emulation layers ("skins") to make migration from other RTOS easier. Examples are: pSOS+, VxWorks, VRTX, uiTRON, RTAI, POSIX. . This package contains patches for 2.6 series kernels - These are intended for use with kernel-package and a virgin Linux source tree. Note: These patches include the base adeos-ipipe patch along with all the additional material normally added by the prepare-kernel.sh script. . This package contains the following patches: . Package: libxenomai1 Section: libs Architecture: amd64 arm armeb armel i386 powerpc powerpcspe Depends: ${shlibs:Depends}, ${misc:Depends} Suggests: linux-patch-xenomai, xenomai-doc Replaces: xenomai Conflicts: xenomai Breaks: udev (<< 136-1) Description: Shared libraries for Xenomai Xenomai is a real-time development framework cooperating with the Linux kernel in order to provide a pervasive, interface-agnostic, hard real-time support to user-space applications, seamlessly integrated into the GNU/Linux environment. Xenomai provides its own API and emulation layers ("skins") to make migration from other RTOS easier. Examples are: pSOS+, VxWorks, VRTX, uiTRON, RTAI, POSIX. . This package contains the shared libraries. Package: libxenomai-dev Section: libdevel Architecture: amd64 arm armeb armel i386 powerpc powerpcspe Depends: libxenomai1 (= ${binary:Version}), ${misc:Depends} Suggests: linux-patch-xenomai, xenomai-doc Replaces: xenomai Conflicts: xenomai Description: Headers and static libs for Xenomai Xenomai is a real-time development framework cooperating with the Linux kernel in order to provide a pervasive, interface-agnostic, hard real-time support to user-space applications, seamlessly integrated into the GNU/Linux environment. Xenomai provides its own API and emulation layers ("skins") to make migration from other RTOS easier. Examples are: pSOS+, VxWorks, VRTX, uiTRON, RTAI, POSIX. . This package contains development files (header files), the static libraries and scripts used to compile realtime applications. Package: xenomai-doc Section: doc Architecture: all Depends: ${misc:Depends} Suggests: xenomai Conflicts: xenomai-docs Replaces: xenomai-docs Description: Xenomai documentation Xenomai is a real-time development framework cooperating with the Linux kernel in order to provide a pervasive, interface-agnostic, hard real-time support to user-space applications, seamlessly integrated into the GNU/Linux environment. Xenomai provides its own API and emulation layers ("skins") to make migration from other RTOS easier. Examples are: pSOS+, VxWorks, VRTX, uiTRON, RTAI, POSIX. . Documentation for Xenomai realtime API. debian/linux-patch-xenomai.dirs0000664000000000000000000000004511560573003013744 0ustar usr/src/kernel-patches/diffs/xenomai debian/copyright0000664000000000000000000000257011373562011011126 0ustar This package was debianized by Paul Corner On: Sat Mar 3 12:00 GMT 2007 The primary author of the upstream package is Philippe Gerum. It was downloaded from http://www.xenomai.org/ Copyright (C) 2001,2002,2003,2004,2005,2006,2007,2008 Philippe Gerum . Copyright (C) 2005 Dmitry Adamushko Copyright (C) 2001,2003,2004,2005,2006,2008 Gilles Chanteperdrix Copyright (C) 2005,2006,2007 Jan Kiszka Copyright (C) 2006 Wolfgang Grandegger License: Xenomai is licensed under GPL version 2, the user space libraries are LGPL version 2.1. On Debian systems, the complete texts of the GNU General Public License v2 and the GNU Lesser General Public License v2 can be found in the file `/usr/share/common-licenses/GPL-2' and `/usr/share/common-licenses/LGPL-2.1' respectively. The documentation under doc/docbook/xenomai/ is licensed as: Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. On Debian systems, the complete texts of the GNU Free Documentation License version 1.2 can be found in the file /usr/share/common-licenses/GFDL-1.2. debian/libxenomai-dev.install0000664000000000000000000000111311373562011013456 0ustar usr/bin/xeno-config usr/include usr/lib/*.la usr/lib/*.a usr/lib/*.so usr/lib/posix.wrappers usr/share/man/man1/xeno-config.1 ../../examples usr/share/libxenomai-dev ../../ksrc/skins/vxworks/demos usr/share/libxenomai-dev/examples/kernel/skins/vxworks ../../ksrc/skins/native/demos usr/share/libxenomai-dev/examples/kernel/skins/native ../../ksrc/skins/native/snippets usr/share/libxenomai-dev/examples/kernel/skins/native ../../ksrc/skins/posix/demos usr/share/libxenomai-dev/examples/kernel/skins/posix ../../ksrc/skins/psos+/demos usr/share/libxenomai-dev/examples/kernel/skins/psos+ debian/changelog0000664000000000000000000003575512154641403011061 0ustar xenomai (2.6.2.1-2ubuntu2) saucy; urgency=low * Do not strip the testsuite. Should work-around a build failure. -- Dmitry Shachnev Sat, 08 Jun 2013 18:41:52 +0400 xenomai (2.6.2.1-2ubuntu1) saucy; urgency=low * Merge from Debian unstable. (LP: #1186588) Remaining changes: - debian/rules: Set OMAP3 as default target machine as that is the closest to what we support from what upstream configure script has to offer. - debian/control: Add Breaks on udev to get correct version. - debian/libxenomai1.preinst: Remove symlink on upgrade, remove old udev. rule unless modified in which case move to new name. - debian/libxenomai1.postinst: Do not create symlink. - debian/libxenomai1.postrm: No symlink to remove. * fix_thumb_build.patch no more nedded as it reguarded a previous /include/asm-arm/atomic.h version. -- Alessandro Losavio Thu, 06 Jun 2013 20:16:03 +0100 xenomai (2.6.2.1-2) unstable; urgency=low * debian/control: Standards-Version: 3.9.4 * debian/compat: 9 * Fix build on powerpcspe by patching upstream * Malfunction of FD_ISSET with RTDM is fixed upstream, thanks to Alexandre Duch and Gilles Chanteperdrix (Closes: #695657) -- Roland Stigge Thu, 09 May 2013 18:31:31 +0200 xenomai (2.6.2.1-1) experimental; urgency=low * New upstream release -- Roland Stigge Sat, 26 Jan 2013 15:07:20 +0100 xenomai (2.6.2-1) experimental; urgency=low * New upstream release * Added powerpcspe to list of supported architectures -- Roland Stigge Thu, 27 Dec 2012 13:08:39 +0100 xenomai (2.6.1-git20121216-1) experimental; urgency=low * New upstream pre-release -- Roland Stigge Sun, 16 Dec 2012 11:44:21 +0100 xenomai (2.6.1-2) experimental; urgency=low * Removed obsolete Debian patches * Include newer ipipe patches into binary package -- Roland Stigge Mon, 13 Aug 2012 14:43:44 +0200 xenomai (2.6.1-1) experimental; urgency=low * New upstream release -- Roland Stigge Thu, 02 Aug 2012 17:40:54 +0200 xenomai (2.6.0-2) unstable; urgency=low * Add patch from upstream, fixing a bug in clock_nanosleep (Closes: #677633) Comment from patch author: Note however that with xenomai, a signal received during nanosleep will trigger a switch to secondary mode. So, this usage is only possible for a real-time thread in exceptional conditions (to handle an error for instance), not when latency matters. * debian/control: Standards-Version: 3.9.3 -- Roland Stigge Sat, 16 Jun 2012 17:10:54 +0200 xenomai (2.6.0-1) unstable; urgency=low * New upstream release * Needed to autoreconf, adding necessary build-dependencies -- Roland Stigge Sun, 06 Nov 2011 15:32:52 +0100 xenomai (2.5.6-3) unstable; urgency=low * Added -fno-ommit-frame-pointer to build, necessary from gcc 4.6 on (Closes: #637425) * Removed *.la files from libxenomai-dev -- Roland Stigge Fri, 12 Aug 2011 20:39:11 +0200 xenomai (2.5.6-2) unstable; urgency=low * Removed usage of dh-kpatches in the build process (and from Build-Depends) since this package will probably be removed from Debian (and make-kpkg doesn't support automatic patching anymore anyway) * Standards-Version: 3.9.2 * debhelper compatibility level 8 -- Roland Stigge Wed, 04 May 2011 11:09:19 +0200 xenomai (2.5.6-1) unstable; urgency=low * New upstream release -- Roland Stigge Thu, 07 Apr 2011 17:52:24 +0200 xenomai (2.5.5.2-1ubuntu2) natty; urgency=low * debian/patches/fix_thumb_build.patch: Add IF-THEN instruction to fix Thumb-2 build on armel. * debian/rules: Set OMAP3 as default target machine as that is the closest to what we support from what upstream configure script has to offer. -- Jani Monoses Mon, 28 Mar 2011 22:55:28 +0300 xenomai (2.5.5.2-1ubuntu1) natty; urgency=low * Merge from debian unstable. Remaining changes: - debian/rules: Create file for debhelper to pick up, use debhelper to install it. - debian/libxenomai1.preinst: Remove symlink on upgrade, remove old udev. rule unless modified in which case move to new name. - debian/libxenomai1.postinst: Do not create symlink. - debian/libxenomai1.postrm: No symlink to remove. - debian/control: Add Breaks on udev to get correct version. -- Artur Rona Wed, 22 Dec 2010 14:25:48 +0100 xenomai (2.5.5.2-1) unstable; urgency=low * New upstream release (Closes: #593585, #593586) * Using scripts/prepare-patch.sh now instead of debian/prepare-patch.sh -- Roland Stigge Sun, 12 Dec 2010 19:45:39 +0100 xenomai (2.5.4-3ubuntu1) natty; urgency=low * Merge from debian unstable. Remaining changes: - debian/rules: Create file for debhelper to pick up, use debhelper to install it. - debian/libxenomai1.preinst: Remove symlink on upgrade, remove old udev. rule unless modified in which case move to new name. - debian/libxenomai1.postinst: Do not create symlink. - debian/libxenomai1.postrm: No symlink to remove. - debian/control: add Breaks on udev to get correct version. * New upstream release fixes issue: - Support higher kernel than 2.6.29. (LP: #544284) -- Artur Rona Sat, 16 Oct 2010 00:25:10 +0200 xenomai (2.5.4-3) unstable; urgency=low * Fix x86 boot problems (Closes: #594914) -- Roland Stigge Sat, 04 Sep 2010 14:26:25 +0200 xenomai (2.5.4-2) unstable; urgency=low * Fixed runlevels in debian/libxenomai1.xenomai.init (Closes: #592360) -- Roland Stigge Thu, 19 Aug 2010 23:25:42 +0200 xenomai (2.5.4-1) unstable; urgency=low * New upstream release * Standards-Version: 3.9.1 -- Roland Stigge Wed, 04 Aug 2010 22:29:28 +0200 xenomai (2.5.3-2) unstable; urgency=low * Switch to dpkg-source 3.0 (quilt) format -- Roland Stigge Sat, 15 May 2010 20:13:42 +0200 xenomai (2.5.3-1) unstable; urgency=low * New upstream release -- Roland Stigge Sat, 15 May 2010 20:13:10 +0200 xenomai (2.5.2-2) unstable; urgency=low * Added patch from Stefan Kisdaroczi : - Create group xenomai on install - Added a init-script which sets /sys/.../xenomai_gid if /sys/.../xenomai_gid exists - Added a modprobe-script that adds the xenomai_gid parameter if the user did call modprobe without xenomai_gid= -- Roland Stigge Sun, 02 May 2010 17:06:14 +0200 xenomai (2.5.2-1) unstable; urgency=low * New upstream release -- Roland Stigge Mon, 29 Mar 2010 20:51:07 +0200 xenomai (2.5.1-4) unstable; urgency=low * Added patches by Stefan Kisdaroczi : - debian/copyright: Typo and email address (Closes: #571099) - debian/control: ia64 support removed (Closes: #571104) - debian/rules: Added dpkg-cross support -- Roland Stigge Wed, 24 Feb 2010 22:20:10 +0100 xenomai (2.5.1-3) unstable; urgency=low * xenomai-runtime: Replaced "xenomai-" prefixed executables with wrapper script /usr/bin/xeno to call original executables from /usr/lib/xenomai/, e.g. "xeno latency" * debian/prepare-patch.sh: Synchronized from upstream (Closes: #569721) * Removed 2.6.24 adeos-ipipe patches * Enabled patching/use of Debian's kernel (2.6.32) -- Roland Stigge Sun, 14 Feb 2010 13:35:15 +0100 xenomai (2.5.1-2) unstable; urgency=low * xenomai-runtime: Renamed /usr/bin/* to /usr/bin/xenomai-* (Closes: #569357) -- Roland Stigge Fri, 12 Feb 2010 21:18:00 +0100 xenomai (2.5.1-1) unstable; urgency=low * New upstream release (Closes: #553334, #566566, #536731) * debian/control: Standards-Version: 3.8.4 -- Roland Stigge Sat, 06 Feb 2010 11:05:50 +0100 xenomai (2.4.8-2ubuntu1) karmic; urgency=low * Merge from debian unstable (LP: #391918), remaining changes: - Add lpia to supported architectures. - debian/rules: Create file for debhelper to pick up, use debhelper to install it. - debian/libxenomai1.dirs: Do not create directory. - debian/libxenomai1.preinst: Remove symlink on upgrade, remove old udev. rule unless modified in which case move to new name. - debian/libxenomai1.postinst: Do not create symlink. - debian/libxenomai1.postrm: No symlink to remove. - Bump build-depend on debhelper to install udev rules into /lib/udev/rules.d, add Breaks on udev to get correct version. -- Andres Rodriguez Wed, 24 Jun 2009 22:17:01 -0500 xenomai (2.4.8-2) unstable; urgency=low * debian/libxenomai1.{postinst,postrm}: "set -e" * debian/control: linux-patch-xenomai: Section: kernel -- Roland Stigge Wed, 24 Jun 2009 17:47:41 +0200 xenomai (2.4.8-1) unstable; urgency=low * New upstream release - includes limits.h include as in patch from Peter Green (Closes: #527654) * debian/control: - Standards-Version: 3.8.2 -- Roland Stigge Mon, 22 Jun 2009 22:50:23 +0200 xenomai (2.4.4-3ubuntu3) jaunty; urgency=low * debian/rules: Create file for debhelper to pick up, use debhelper to install it * debian/libxenomai1.dirs: Do not create directory * debian/libxenomai1.preinst: Remove symlink on upgrade, remove old udev rule unless modified in which case move to new name. * debian/libxenomai1.postinst: Do not create symlink * debian/libxenomai1.postrm: No symlink to remove * Bump build-depend on debhelper to install udev rules into /lib/udev/rules.d, add Breaks on udev to get correct version. -- Scott James Remnant Tue, 13 Jan 2009 16:51:22 +0000 xenomai (2.4.4-3ubuntu2) intrepid; urgency=low * Add lpia to supported architectures. -- Luca Falavigna Tue, 26 Aug 2008 15:35:26 +0000 xenomai (2.4.4-3ubuntu1) intrepid; urgency=low * Include limits.h in these files, fix FTBFS with glibc 2.8: - src/testsuite/irqbench/irqloop.c - src/testsuite/switchtest/switchtest.c: -- Luca Falavigna Tue, 26 Aug 2008 15:18:20 +0000 xenomai (2.4.4-3) unstable; urgency=low * debian/rules: Fixed concurrent ./configure + make runs, thanks to Albin Tonnerre (Closes: #486461) -- Roland Stigge Mon, 16 Jun 2008 11:44:01 +0200 xenomai (2.4.4-2) unstable; urgency=low * Backported kernel 2.6.24 patches for x86, powerpc and arm (for lenny) from xenomai 2.4.3 * debian/control: Standards-Version: 3.8.0 -- Roland Stigge Thu, 12 Jun 2008 14:27:12 +0200 xenomai (2.4.4-1) unstable; urgency=low * New upstream release * debian/rules: configure --enable-arm-mach=generic for ARM * debian/control: Standards-Version: 3.8.0 -- Roland Stigge Mon, 09 Jun 2008 14:28:01 +0200 xenomai (2.4.4~svn20080526-1) experimental; urgency=low * New upstream snapshot -- Roland Stigge Mon, 26 May 2008 11:58:30 +0200 xenomai (2.4.3-7) unstable; urgency=low * Included patch from Riku Voipio to fix ARM compile issues (Closes: #477720) -- Roland Stigge Thu, 24 Apr 2008 23:18:01 +0200 xenomai (2.4.3-6) unstable; urgency=low * Added lintian override for recursive include directories (necessary according to upstream) * Added missing man pages (can be removed when integrated upstream) * debian/rules: Fixed klatency/run script to be proper shell script -- Roland Stigge Tue, 22 Apr 2008 12:23:22 +0200 xenomai (2.4.3-5) unstable; urgency=low * Install xeno-config into libxenomai-dev where the executable is -- Roland Stigge Sat, 19 Apr 2008 22:15:07 +0200 xenomai (2.4.3-4) unstable; urgency=low * Move /usr/lib/xenomai to original upstream /usr/share/xenomai -- Roland Stigge Fri, 18 Apr 2008 19:51:49 +0200 xenomai (2.4.3-3) unstable; urgency=low * debian/rules: Separated prepare-patch.sh calls for individual architectures (Closes: #473098) -- Roland Stigge Thu, 10 Apr 2008 10:01:33 +0200 xenomai (2.4.3-2) unstable; urgency=low * Enable x86 patches for both i386 and amd64 (Closes: #473098) -- Roland Stigge Tue, 01 Apr 2008 10:04:51 +0200 xenomai (2.4.3-1) unstable; urgency=low * New upstream release * Added debian/watch -- Roland Stigge Tue, 25 Mar 2008 14:13:50 +0100 xenomai (2.4.2-3) unstable; urgency=low * Fixed bashisms by using "#!/bin/bash" (Closes: #471871) -- Roland Stigge Sat, 22 Mar 2008 17:01:11 +0100 xenomai (2.4.2-2) unstable; urgency=low * debian/copyright: Refined Copyrights * debian/control: Added ia64 to remaining relevant Architecture lines * debian/rules: Moved DEBIAN/rules adjustment for architecture independent target to arch-indep target -- Roland Stigge Thu, 28 Feb 2008 20:55:23 +0100 xenomai (2.4.2-1) unstable; urgency=low * Initial revision for the Debian Archive * debian/control: - Reorganized binary package structure - Standards-Version: 3.7.3 -- Roland Stigge Wed, 20 Feb 2008 18:06:49 +0100 xenomai (2.4.0-0+rc6) unstable; urgency=low * Update prepare-patch.sh to use combined x86/i386 Xenomai tree. * Split patch generation out of build-stamp so that it only gets called once along with the configure. * In pursuit of silencing lintian: * Rename top level ChangeLog to changelog when installed. * Add ldconfig to post install/remove scripts. * Compress changelogs. -- Paul Corner Wed, 14 Nov 2007 21:48:27 +0000 xenomai (2.3.50-05+r2299) unstable; urgency=low * Add top level ChangeLog and CREDITS to each package. * rebuilt from SVN tag 2299 -- Paul Corner Thu, 15 Mar 2007 11:03:14 +0000 xenomai (2.3.50-04) unstable; urgency=low * Remove demo & snippets from the patches - Installed in xenomai-dev * linux-patches-xenomai lists patches, architectures, & kernel version in the package description meta-data. -- Paul Corner Mon, 12 Mar 2007 17:30:00 +0000 xenomai (2.3.50-03) unstable; urgency=low * Revision 2281 * Fix bug in postinst script - Need to overwrite the symlink -- Paul Corner Sun, 11 Mar 2007 20:30:00 +0000 xenomai (2.3.50-02) unstable; urgency=low * Apply patch to fix includedir bug - rules updated.. * debian/ contents added to EXTRA_DIST. -- Paul Corner Wed, 7 Mar 2007 23:29:00 +0000 xenomai (2.3.50-01) unstable; urgency=low * Initial build. * General notes: prepare-patch.sh generates a per-kernel/arch for use with kernel-package - A single patch is needed for apply/unapply steps. The scripts/prepare-kernel.sh appends a few lines to some Makefiles which would cause problems on the unapply stage. -- Paul Corner Sat, 3 Mar 2007 12:00:00 +0000 debian/libxenomai1.postinst0000664000000000000000000000061012154633307013206 0ustar #!/bin/sh -e case "$1" in configure) # Add the xenomai group unless it's already there if ! getent group xenomai >/dev/null; then addgroup --quiet --system xenomai || true fi ;; abort-upgrade|abort-remove|abort-deconfigure) ;; *) echo "postinst called with unknown argument \`$1'" >&2 exit 1 ;; esac #DEBHELPER# debian/rules0000775000000000000000000001151612154640451010256 0ustar #!/usr/bin/make -f # Author: Paul Corner # Created on: Sat Mar 3 12:00 GMT 2007 # License: GPL Ver. 2 #export DH_VERBOSE=1 DEB_HOST_GNU_CPU ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU) DEB_HOST_ARCH ?= $(shell dpkg-architecture -qDEB_HOST_ARCH) DEB_HOST_GNU_TYPE=$(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) DEB_BUILD_GNU_TYPE=$(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) CPPFLAGS:=$(shell dpkg-buildflags --get CPPFLAGS) CFLAGS:=$(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) CXXFLAGS:=$(shell dpkg-buildflags --get CXXFLAGS) $(CPPFLAGS) LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS) ifeq ($(DEB_HOST_ARCH), i386) # Note: Would like to use --includedir=/usr/include/xenomai, but # there appears to be a `make install` problem. CONFIG_OPTS = \ --enable-x86-tsc endif ifeq ($(DEB_HOST_ARCH), amd64) CONFIG_OPTS = \ --enable-x86-tsc \ --enable-x86-sep endif ifeq ($(DEB_HOST_ARCH), powerpc) CONFIG_OPTS = endif ifeq ($(DEB_HOST_ARCH), armeb) CONFIG_OPTS = --enable-arm-mach=generic --enable-arm-eabi endif ifeq ($(DEB_HOST_ARCH), armel) CONFIG_OPTS = --enable-arm-mach=omap3 --enable-arm-eabi endif ifeq ($(DEB_HOST_ARCH), arm) CONFIG_OPTS = --enable-arm-mach=generic endif CONFIG_OPTS += --prefix=/usr \ --includedir=/usr/include/xenomai \ --mandir=/usr/share/man \ --with-testdir=/usr/lib/xenomai ifneq ($(DEB_HOST_GNU_TYPE),$(DEB_BUILD_GNU_TYPE)) CONFIG_OPTS += --build $(DEB_BUILD_GNU_TYPE) --host $(DEB_HOST_GNU_TYPE) else CONFIG_OPTS += --build $(DEB_BUILD_GNU_TYPE) endif # necessary from GCC 4.6 on CFLAGS += -fno-omit-frame-pointer build: build-arch build-indep build-arch: build-arch-stamp build-arch-stamp: config.status xenomai-patch-stamp dh_testdir $(MAKE) touch build-arch-stamp build-indep: build-indep-stamp build-indep-stamp: config.status dh_testdir touch build-indep-stamp xenomai-patch-stamp: dh_testdir for i in arm i386 powerpc x86_64 x86 ; do \ bash $(CURDIR)/scripts/prepare-patch.sh $$i ; \ done touch xenomai-patch-stamp config.status: configure-stamp configure-stamp: dh_testdir autoreconf -fi ./configure $(CONFIG_OPTS) --with-testdir=/usr/lib/xenomai/testsuite \ CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" CPPFLAGS="$(CPPFLAGS)" touch configure-stamp clean: dh_testdir dh_testroot rm -f configure-stamp xenomai-patch-stamp build-indep-stamp build-arch-stamp -if test -f Makefile ; then \ $(MAKE) distclean ; \ fi dh_clean rm -fR $(CURDIR)/tmp rm -f $(CURDIR)/*.patch rm -f include/asm-x86/ipipe/Makefile.in -rm -f configure install: build dh_testdir dh_testroot dh_prep dh_installdirs $(MAKE) install DESTDIR=$(CURDIR)/debian/tmp/ dh_install --sourcedir=$(CURDIR)/debian/tmp # Patch collection: cp *ipipe-*.patch $(CURDIR)/debian/linux-patch-xenomai/usr/src/kernel-patches/diffs/xenomai # xeno-config should be only in libxenomai-dev rm -f $(CURDIR)/debian/xenomai-runtime/usr/bin/xeno-config rm -f $(CURDIR)/debian/xenomai-runtime/usr/share/man/man1/xeno-config.1 : > $(CURDIR)/debian/libxenomai1.udev for f in $(CURDIR)/ksrc/nucleus/udev/*.rules ; do \ cat $$f >> $(CURDIR)/debian/libxenomai1.udev ; \ done install -m 644 debian/libxenomai1.modprobe $(CURDIR)/debian/libxenomai1/etc/modprobe.d/xenomai.conf # remove empty directory rm -rf $(CURDIR)/debian/xenomai-doc/usr/share/doc/xenomai-doc/ps cp debian/libxenomai1.lintian $(CURDIR)/debian/libxenomai1/usr/share/lintian/overrides/libxenomai1 cp debian/libxenomai-dev.lintian $(CURDIR)/debian/libxenomai-dev/usr/share/lintian/overrides/libxenomai-dev # remove *.la files rm $(CURDIR)/debian/libxenomai-dev/usr/lib/*.la # Build architecture-independent files here. binary-indep: build install dh_testdir -i dh_testroot -i dh_installdocs -i dh_link -i dh_installchangelogs -i dh_strip -i dh_compress -i -X.pdf $(CURDIR)/debian/linux-patch-xenomai/usr/src/kernel-patches/diffs/xenomai/* dh_fixperms -i dh_makeshlibs -i dh_installdeb -i dh_shlibdeps -i dh_gencontrol -i dh_md5sums -i dh_builddeb -i # Build architecture-dependent files here. binary-arch: build install dh_testdir -s dh_testroot -s dh_installinit -s --name=xenomai dh_installman -s dh_installdocs -s -A dh_installudev dh_link -s dh_installchangelogs -s dh_strip -s -Xtestsuite dh_compress -s dh_fixperms -s dh_makeshlibs -s dh_installdeb -s dh_shlibdeps -s dh_gencontrol -s # Echo config options to control. echo " ." >> $(CURDIR)/debian/libxenomai1/DEBIAN/control echo " Compiled with the following options." >> \ $(CURDIR)/debian/libxenomai1/DEBIAN/control echo "$(CONFIG_OPTS)" | awk '{ for ( i=1 ; i<=NF ; i++ ) print " "$$i }' >> \ $(CURDIR)/debian/libxenomai1/DEBIAN/control # End of hackery. dh_md5sums -s dh_builddeb -s # We have nothing to do by default. binary: binary-indep binary-arch .PHONY: build clean binary-indep binary-arch binary install debian/xenomai-doc.install0000664000000000000000000000006211373562011012760 0ustar usr/share/doc/xenomai/* usr/share/doc/xenomai-doc debian/source/0000775000000000000000000000000011373562065010500 5ustar debian/source/format0000664000000000000000000000001411373562065011706 0ustar 3.0 (quilt) debian/linux-patch-xenomai.README.Debian0000664000000000000000000000126411560573003015125 0ustar Xenomai kernel patches in Debian ================================ With this package, you can patch and build kernels suitable for usage with Xenomai. This can be done with, e.g.: # cd /usr/src/linux-source-2.6.32 # gunzip -c /usr/src/kernel-patches/diffs/xenomai/adeos-ipipe-2.6.37-x86-2.9-00.patch.gz | patch -p1 # make config # make-kpkg --rootcmd fakeroot --initrd kernel_image (Replace the above patch file with the current appropriate filename.) This is intended for vanilla and Debian kernel sources. NOTE: In the kernel config, PARAVIRT should be turned off for Xenomai kernels to compile successfully. -- Roland Stigge , Sun, 14 Feb 2010 15:14:40 +0100 debian/libxenomai1.preinst0000664000000000000000000000103312154633307013007 0ustar #!/bin/sh set -e if [ "$1" = install ] || [ "$1" = upgrade ]; then if [ -L "/etc/udev/rules.d/xenomai.rules"]; then rm -f "/etc/udev/rules.d/xenomai.rules" fi if [ -e "/etc/udev/xenomai.rules" ]; then if [ "`md5sum \"/etc/udev/xenomai.rules\" | sed -e \"s/ .*//\"`" = \ "`dpkg-query -W -f='${Conffiles}' libxenomai1 | sed -n -e \"\\\\' /etc/udev/xenomai.rules's/.* //p\"`" ] then rm -f "/etc/udev/xenomai.rules" else mv "/etc/udev/xenomai.rules" "/etc/udev/rules.d/40-libxenomai1.rules" fi fi fi #DEBHELPER# debian/libxenomai1.xenomai.init0000664000000000000000000000130011433320501013706 0ustar #!/bin/sh -e ### BEGIN INIT INFO # Provides: xenomai # Required-Start: mountkernfs # Required-Stop: # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Set xeno_nucleus group ### END INIT INFO GROUP=xenomai INITNAME=/etc/init.d/xenomai FILENAME=/sys/module/xeno_nucleus/parameters/xenomai_gid GID=$(getent group $GROUP | cut -d: -f3) test -e $FILENAME || exit 0 test -n "$GID" || exit 0 case "$1" in start) echo "$GID" > $FILENAME ;; stop) echo "-1" > $FILENAME ;; restart|force-reload) $0 start ;; *) echo "Usage: $INITNAME {start|stop|restart|force-reload}" exit 1 ;; esac exit 0 debian/watch0000664000000000000000000000011011373562011010210 0ustar version=3 http://download.gna.org/xenomai/stable/xenomai-(.*)\.tar\.bz2 debian/libxenomai1.modprobe0000664000000000000000000000037611373562011013136 0ustar install xeno_nucleus /sbin/modprobe --ignore-install xeno_nucleus $CMDLINE_OPTS \ $(/usr/bin/test $(/bin/echo -n '$CMDLINE_OPTS' | /bin/grep xenomai_gid) \ || /usr/bin/getent group xenomai | /usr/bin/cut -d: -f3 | /bin/sed -e 's/^/xenomai_gid\=/') debian/compat0000664000000000000000000000000212142747607010400 0ustar 9 debian/clean0000664000000000000000000000453012067043613010201 0ustar Makefile.in aclocal.m4 config/Makefile.in config/config.guess config/config.sub config/depcomp config/install-sh config/libtool.m4 config/ltmain.sh config/ltoptions.m4 config/ltversion.m4 config/lt~obsolete.m4 config/missing doc/Makefile.in doc/docbook/Makefile.in doc/docbook/custom-stylesheets/Makefile.in doc/docbook/custom-stylesheets/xsl/Makefile.in doc/docbook/custom-stylesheets/xsl/common/Makefile.in doc/docbook/custom-stylesheets/xsl/fo/Makefile.in doc/docbook/custom-stylesheets/xsl/html/Makefile.in doc/docbook/xenomai/Makefile.in doc/doxygen/Makefile.in doc/man/Makefile.in doc/txt/Makefile.in doc/asciidoc/Makefile.in include/Makefile.in include/analogy/Makefile.in include/asm-arm/Makefile.in include/asm-arm/bits/Makefile.in include/asm-blackfin/Makefile.in include/asm-blackfin/bits/Makefile.in include/asm-generic/Makefile.in include/asm-generic/bits/Makefile.in include/asm-nios2/Makefile.in include/asm-nios2/bits/Makefile.in include/asm-powerpc/Makefile.in include/asm-powerpc/bits/Makefile.in include/asm-sh/Makefile.in include/asm-sh/bits/Makefile.in include/asm-sim/Makefile.in include/asm-sim/bits/Makefile.in include/asm-x86/Makefile.in include/asm-x86/bits/Makefile.in include/native/Makefile.in include/nucleus/Makefile.in include/posix/Makefile.in include/posix/sys/Makefile.in include/psos+/Makefile.in include/rtdm/Makefile.in include/uitron/Makefile.in include/vrtx/Makefile.in include/vxworks/Makefile.in scripts/Makefile.in src/Makefile.in src/drvlib/Makefile.in src/drvlib/analogy/Makefile.in src/include/Makefile.in src/skins/Makefile.in src/skins/common/Makefile.in src/skins/native/Makefile.in src/skins/posix/Makefile.in src/skins/psos+/Makefile.in src/skins/rtdm/Makefile.in src/skins/uitron/Makefile.in src/skins/vrtx/Makefile.in src/skins/vxworks/Makefile.in src/testsuite/Makefile.in src/testsuite/clocktest/Makefile.in src/testsuite/cyclic/Makefile.in src/testsuite/irqbench/Makefile.in src/testsuite/klatency/Makefile.in src/testsuite/latency/Makefile.in src/testsuite/regression/Makefile.in src/testsuite/regression/native+posix/Makefile.in src/testsuite/regression/native/Makefile.in src/testsuite/regression/posix/Makefile.in src/testsuite/switchtest/Makefile.in src/testsuite/unit/Makefile.in src/testsuite/xeno-test/Makefile.in src/utils/Makefile.in src/utils/analogy/Makefile.in src/utils/can/Makefile.in src/utils/ps/Makefile.in config.log debian/libxenomai1.install0000664000000000000000000000001711373562011012765 0ustar usr/lib/*.so.* debian/libxenomai-dev.lintian0000664000000000000000000000077611373562011013464 0ustar # Upstream author insists that those recursive directories exist for # compatibility reasons of compiles applications libxenomai-dev: symlink-is-self-recursive usr/include/xenomai/asm-generic/xenomai . libxenomai-dev: symlink-is-self-recursive usr/include/xenomai/asm-sim/asm . libxenomai-dev: symlink-is-self-recursive usr/include/xenomai/asm-sim/xenomai . libxenomai-dev: symlink-is-self-recursive usr/include/xenomai/asm-x86/xenomai . libxenomai-dev: symlink-is-self-recursive usr/include/xenomai/xenomai . debian/libxenomai-dev.dirs0000664000000000000000000000003411373562011012752 0ustar usr/share/lintian/overrides debian/libxenomai1.postrm0000664000000000000000000000010412154633307012645 0ustar #!/bin/sh -e case "$1" in purge | remove) ;; esac #DEBHELPER#