diff -Nur linux-4.9.28.orig/arch/arm/include/asm/irq.h linux-4.9.28/arch/arm/include/asm/irq.h --- linux-4.9.28.orig/arch/arm/include/asm/irq.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/include/asm/irq.h 2017-05-19 03:37:25.122174217 +0200 @@ -22,6 +22,8 @@ #endif #ifndef __ASSEMBLY__ +#include + struct irqaction; struct pt_regs; extern void migrate_irqs(void); diff -Nur linux-4.9.28.orig/arch/arm/include/asm/switch_to.h linux-4.9.28/arch/arm/include/asm/switch_to.h --- linux-4.9.28.orig/arch/arm/include/asm/switch_to.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/include/asm/switch_to.h 2017-05-19 03:37:25.122174217 +0200 @@ -3,6 +3,13 @@ #include +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * For v7 SMP cores running a preemptible kernel we may be pre-empted * during a TLB maintenance operation, so execute an inner-shareable dsb @@ -25,6 +32,7 @@ #define switch_to(prev,next,last) \ do { \ __complete_pending_tlbi(); \ + switch_kmaps(prev, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff -Nur linux-4.9.28.orig/arch/arm/include/asm/thread_info.h linux-4.9.28/arch/arm/include/asm/thread_info.h --- linux-4.9.28.orig/arch/arm/include/asm/thread_info.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/include/asm/thread_info.h 2017-05-19 03:37:25.122174217 +0200 @@ -49,6 +49,7 @@ struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ @@ -142,7 +143,8 @@ #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ +#define TIF_NEED_RESCHED_LAZY 7 #define TIF_NOHZ 12 /* in adaptive nohz mode */ #define TIF_USING_IWMMXT 17 @@ -152,6 +154,7 @@ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) @@ -167,7 +170,8 @@ * Change these and you break ASM code in entry-common.S */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED_LAZY) #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff -Nur linux-4.9.28.orig/arch/arm/Kconfig linux-4.9.28/arch/arm/Kconfig --- linux-4.9.28.orig/arch/arm/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/Kconfig 2017-05-19 03:37:25.122174217 +0200 @@ -36,7 +36,7 @@ select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) @@ -75,6 +75,7 @@ select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS diff -Nur linux-4.9.28.orig/arch/arm/kernel/asm-offsets.c linux-4.9.28/arch/arm/kernel/asm-offsets.c --- linux-4.9.28.orig/arch/arm/kernel/asm-offsets.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/asm-offsets.c 2017-05-19 03:37:25.122174217 +0200 @@ -65,6 +65,7 @@ BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff -Nur linux-4.9.28.orig/arch/arm/kernel/entry-armv.S linux-4.9.28/arch/arm/kernel/entry-armv.S --- linux-4.9.28.orig/arch/arm/kernel/entry-armv.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/entry-armv.S 2017-05-19 03:37:25.122174217 +0200 @@ -220,11 +220,18 @@ #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 + bne 1f @ return from exeption + ldr r0, [tsk, #TI_FLAGS] @ get flags + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set + blne svc_preempt @ preempt! + + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r8, #0 @ if preempt lazy count != 0 movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED_LAZY blne svc_preempt +1: #endif svc_exit r5, irq = 1 @ return from exception @@ -239,8 +246,14 @@ 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED + bne 1b + tst r0, #_TIF_NEED_RESCHED_LAZY reteq r8 @ go again - b 1b + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r0, #0 @ if preempt lazy count != 0 + beq 1b + ret r8 @ go again + #endif __und_fault: diff -Nur linux-4.9.28.orig/arch/arm/kernel/entry-common.S linux-4.9.28/arch/arm/kernel/entry-common.S --- linux-4.9.28.orig/arch/arm/kernel/entry-common.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/entry-common.S 2017-05-19 03:37:25.122174217 +0200 @@ -36,7 +36,9 @@ UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) + bne fast_work_pending + tst r1, #_TIF_SECCOMP bne fast_work_pending /* perform architecture specific actions before user return */ @@ -62,8 +64,11 @@ str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 disable_irq_notrace @ disable interrupts ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) + bne do_slower_path + tst r1, #_TIF_SECCOMP beq no_work_pending +do_slower_path: UNWIND(.fnend ) ENDPROC(ret_fast_syscall) diff -Nur linux-4.9.28.orig/arch/arm/kernel/patch.c linux-4.9.28/arch/arm/kernel/patch.c --- linux-4.9.28.orig/arch/arm/kernel/patch.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/patch.c 2017-05-19 03:37:25.122174217 +0200 @@ -15,7 +15,7 @@ unsigned int insn; }; -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) @@ -32,7 +32,7 @@ return addr; if (flags) - spin_lock_irqsave(&patch_lock, *flags); + raw_spin_lock_irqsave(&patch_lock, *flags); else __acquire(&patch_lock); @@ -47,7 +47,7 @@ clear_fixmap(fixmap); if (flags) - spin_unlock_irqrestore(&patch_lock, *flags); + raw_spin_unlock_irqrestore(&patch_lock, *flags); else __release(&patch_lock); } diff -Nur linux-4.9.28.orig/arch/arm/kernel/process.c linux-4.9.28/arch/arm/kernel/process.c --- linux-4.9.28.orig/arch/arm/kernel/process.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/process.c 2017-05-19 03:37:25.122174217 +0200 @@ -322,6 +322,30 @@ } #ifdef CONFIG_MMU +/* + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not + * initialized by pgtable_page_ctor() then a coredump of the vector page will + * fail. + */ +static int __init vectors_user_mapping_init_page(void) +{ + struct page *page; + unsigned long addr = 0xffff0000; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + page = pmd_page(*(pmd)); + + pgtable_page_ctor(page); + + return 0; +} +late_initcall(vectors_user_mapping_init_page); + #ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the diff -Nur linux-4.9.28.orig/arch/arm/kernel/signal.c linux-4.9.28/arch/arm/kernel/signal.c --- linux-4.9.28.orig/arch/arm/kernel/signal.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/signal.c 2017-05-19 03:37:25.122174217 +0200 @@ -572,7 +572,8 @@ */ trace_hardirqs_off(); do { - if (likely(thread_flags & _TIF_NEED_RESCHED)) { + if (likely(thread_flags & (_TIF_NEED_RESCHED | + _TIF_NEED_RESCHED_LAZY))) { schedule(); } else { if (unlikely(!user_mode(regs))) diff -Nur linux-4.9.28.orig/arch/arm/kernel/smp.c linux-4.9.28/arch/arm/kernel/smp.c --- linux-4.9.28.orig/arch/arm/kernel/smp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/smp.c 2017-05-19 03:37:25.122174217 +0200 @@ -234,8 +234,6 @@ flush_cache_louis(); local_flush_tlb_all(); - clear_tasks_mm_cpumask(cpu); - return 0; } @@ -251,6 +249,9 @@ pr_err("CPU%u: cpu didn't die\n", cpu); return; } + + clear_tasks_mm_cpumask(cpu); + pr_notice("CPU%u: shutdown\n", cpu); /* diff -Nur linux-4.9.28.orig/arch/arm/kernel/unwind.c linux-4.9.28/arch/arm/kernel/unwind.c --- linux-4.9.28.orig/arch/arm/kernel/unwind.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kernel/unwind.c 2017-05-19 03:37:25.122174217 +0200 @@ -93,7 +93,7 @@ static const struct unwind_idx *__origin_unwind_idx; extern const struct unwind_idx __stop_unwind_idx[]; -static DEFINE_SPINLOCK(unwind_lock); +static DEFINE_RAW_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); /* Convert a prel31 symbol to an absolute address */ @@ -201,7 +201,7 @@ /* module unwind tables */ struct unwind_table *table; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_for_each_entry(table, &unwind_tables, list) { if (addr >= table->begin_addr && addr < table->end_addr) { @@ -213,7 +213,7 @@ break; } } - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); } pr_debug("%s: idx = %p\n", __func__, idx); @@ -529,9 +529,9 @@ tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); return tab; } @@ -543,9 +543,9 @@ if (!tab) return; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_del(&tab->list); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); kfree(tab); } diff -Nur linux-4.9.28.orig/arch/arm/kvm/arm.c linux-4.9.28/arch/arm/kvm/arm.c --- linux-4.9.28.orig/arch/arm/kvm/arm.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/kvm/arm.c 2017-05-19 03:37:25.122174217 +0200 @@ -619,7 +619,7 @@ * involves poking the GIC, which must be done in a * non-preemptible context. */ - preempt_disable(); + migrate_disable(); kvm_pmu_flush_hwstate(vcpu); kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); @@ -640,7 +640,7 @@ kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); - preempt_enable(); + migrate_enable(); continue; } @@ -696,7 +696,7 @@ kvm_vgic_sync_hwstate(vcpu); - preempt_enable(); + migrate_enable(); ret = handle_exit(vcpu, run, ret); } diff -Nur linux-4.9.28.orig/arch/arm/mach-exynos/platsmp.c linux-4.9.28/arch/arm/mach-exynos/platsmp.c --- linux-4.9.28.orig/arch/arm/mach-exynos/platsmp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-exynos/platsmp.c 2017-05-19 03:37:25.122174217 +0200 @@ -229,7 +229,7 @@ return (void __iomem *)(S5P_VA_SCU); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void exynos_secondary_init(unsigned int cpu) { @@ -242,8 +242,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) @@ -307,7 +307,7 @@ * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -334,7 +334,7 @@ if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return -ETIMEDOUT; } } @@ -380,7 +380,7 @@ * calibrations, then wait for it to finish */ fail: - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? ret : 0; } diff -Nur linux-4.9.28.orig/arch/arm/mach-hisi/platmcpm.c linux-4.9.28/arch/arm/mach-hisi/platmcpm.c --- linux-4.9.28.orig/arch/arm/mach-hisi/platmcpm.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-hisi/platmcpm.c 2017-05-19 03:37:25.122174217 +0200 @@ -61,7 +61,7 @@ static void __iomem *sysctrl, *fabric; static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static u32 fabric_phys_addr; /* * [0]: bootwrapper physical address @@ -113,7 +113,7 @@ if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) return -EINVAL; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); if (hip04_cpu_table[cluster][cpu]) goto out; @@ -147,7 +147,7 @@ out: hip04_cpu_table[cluster][cpu]++; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } @@ -162,11 +162,11 @@ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); hip04_cpu_table[cluster][cpu]--; if (hip04_cpu_table[cluster][cpu] == 1) { /* A power_up request went ahead of us. */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return; } else if (hip04_cpu_table[cluster][cpu] > 1) { pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); @@ -174,7 +174,7 @@ } last_man = hip04_cluster_is_down(cluster); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); if (last_man) { /* Since it's Cortex A15, disable L2 prefetching. */ asm volatile( @@ -203,7 +203,7 @@ cpu >= HIP04_MAX_CPUS_PER_CLUSTER); count = TIMEOUT_MSEC / POLL_MSEC; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); for (tries = 0; tries < count; tries++) { if (hip04_cpu_table[cluster][cpu]) goto err; @@ -211,10 +211,10 @@ data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); if (data & CORE_WFI_STATUS(cpu)) break; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); /* Wait for clean L2 when the whole cluster is down. */ msleep(POLL_MSEC); - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); } if (tries >= count) goto err; @@ -231,10 +231,10 @@ goto err; if (hip04_cluster_is_down(cluster)) hip04_set_snoop_filter(cluster, 0); - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 1; err: - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } #endif diff -Nur linux-4.9.28.orig/arch/arm/mach-omap2/omap-smp.c linux-4.9.28/arch/arm/mach-omap2/omap-smp.c --- linux-4.9.28.orig/arch/arm/mach-omap2/omap-smp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-omap2/omap-smp.c 2017-05-19 03:37:25.122174217 +0200 @@ -64,7 +64,7 @@ .startup_addr = omap5_secondary_startup, }; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __iomem *omap4_get_scu_base(void) { @@ -131,8 +131,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -146,7 +146,7 @@ * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Update the AuxCoreBoot0 with boot state for secondary core. @@ -223,7 +223,7 @@ * Now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return 0; } diff -Nur linux-4.9.28.orig/arch/arm/mach-prima2/platsmp.c linux-4.9.28/arch/arm/mach-prima2/platsmp.c --- linux-4.9.28.orig/arch/arm/mach-prima2/platsmp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-prima2/platsmp.c 2017-05-19 03:37:25.122174217 +0200 @@ -22,7 +22,7 @@ static void __iomem *clk_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sirfsoc_secondary_init(unsigned int cpu) { @@ -36,8 +36,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static const struct of_device_id clk_ids[] = { @@ -75,7 +75,7 @@ /* make sure write buffer is drained */ mb(); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -107,7 +107,7 @@ * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff -Nur linux-4.9.28.orig/arch/arm/mach-qcom/platsmp.c linux-4.9.28/arch/arm/mach-qcom/platsmp.c --- linux-4.9.28.orig/arch/arm/mach-qcom/platsmp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-qcom/platsmp.c 2017-05-19 03:37:25.122174217 +0200 @@ -46,7 +46,7 @@ extern void secondary_startup_arm(void); -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); #ifdef CONFIG_HOTPLUG_CPU static void qcom_cpu_die(unsigned int cpu) @@ -60,8 +60,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int scss_release_secondary(unsigned int cpu) @@ -284,7 +284,7 @@ * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Send the secondary CPU a soft interrupt, thereby causing @@ -297,7 +297,7 @@ * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return ret; } diff -Nur linux-4.9.28.orig/arch/arm/mach-spear/platsmp.c linux-4.9.28/arch/arm/mach-spear/platsmp.c --- linux-4.9.28.orig/arch/arm/mach-spear/platsmp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-spear/platsmp.c 2017-05-19 03:37:25.122174217 +0200 @@ -32,7 +32,7 @@ sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __iomem *scu_base = IOMEM(VA_SCU_BASE); @@ -47,8 +47,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -59,7 +59,7 @@ * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -84,7 +84,7 @@ * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff -Nur linux-4.9.28.orig/arch/arm/mach-sti/platsmp.c linux-4.9.28/arch/arm/mach-sti/platsmp.c --- linux-4.9.28.orig/arch/arm/mach-sti/platsmp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mach-sti/platsmp.c 2017-05-19 03:37:25.122174217 +0200 @@ -35,7 +35,7 @@ sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sti_secondary_init(unsigned int cpu) { @@ -48,8 +48,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -60,7 +60,7 @@ * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -91,7 +91,7 @@ * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff -Nur linux-4.9.28.orig/arch/arm/mm/fault.c linux-4.9.28/arch/arm/mm/fault.c --- linux-4.9.28.orig/arch/arm/mm/fault.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mm/fault.c 2017-05-19 03:37:25.122174217 +0200 @@ -430,6 +430,9 @@ if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (interrupts_enabled(regs)) + local_irq_enable(); + if (user_mode(regs)) goto bad_area; @@ -497,6 +500,9 @@ static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (interrupts_enabled(regs)) + local_irq_enable(); + do_bad_area(addr, fsr, regs); return 0; } diff -Nur linux-4.9.28.orig/arch/arm/mm/highmem.c linux-4.9.28/arch/arm/mm/highmem.c --- linux-4.9.28.orig/arch/arm/mm/highmem.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/mm/highmem.c 2017-05-19 03:37:25.126174470 +0200 @@ -34,6 +34,11 @@ return *ptep; } +static unsigned int fixmap_idx(int type) +{ + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); +} + void *kmap(struct page *page) { might_sleep(); @@ -54,12 +59,13 @@ void *kmap_atomic(struct page *page) { + pte_t pte = mk_pte(page, kmap_prot); unsigned int idx; unsigned long vaddr; void *kmap; int type; - preempt_disable(); + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -79,7 +85,7 @@ type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* @@ -93,7 +99,10 @@ * in place, so the contained TLB flush ensures the TLB is updated * with the new mapping. */ - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_fixmap_pte(idx, pte); return (void *)vaddr; } @@ -106,44 +115,75 @@ if (kvaddr >= (void *)FIXADDR_START) { type = kmap_atomic_idx(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(idx)); - set_fixmap_pte(idx, __pte(0)); #else (void) idx; /* to kill a warning */ #endif + set_fixmap_pte(idx, __pte(0)); kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); } pagefault_enable(); - preempt_enable(); + preempt_enable_nort(); } EXPORT_SYMBOL(__kunmap_atomic); void *kmap_atomic_pfn(unsigned long pfn) { + pte_t pte = pfn_pte(pfn, kmap_prot); unsigned long vaddr; int idx, type; struct page *page = pfn_to_page(pfn); - preempt_disable(); + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); type = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_fixmap_pte(idx, pte); return (void *)vaddr; } +#if defined CONFIG_PREEMPT_RT_FULL +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = fixmap_idx(i); + + set_fixmap_pte(idx, __pte(0)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = fixmap_idx(i); + + if (!pte_none(next_p->kmap_pte[i])) + set_fixmap_pte(idx, next_p->kmap_pte[i]); + } +} +#endif diff -Nur linux-4.9.28.orig/arch/arm/plat-versatile/platsmp.c linux-4.9.28/arch/arm/plat-versatile/platsmp.c --- linux-4.9.28.orig/arch/arm/plat-versatile/platsmp.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm/plat-versatile/platsmp.c 2017-05-19 03:37:25.126174470 +0200 @@ -32,7 +32,7 @@ sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void versatile_secondary_init(unsigned int cpu) { @@ -45,8 +45,8 @@ /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -57,7 +57,7 @@ * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * This is really belt and braces; we hold unintended secondary @@ -87,7 +87,7 @@ * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff -Nur linux-4.9.28.orig/arch/arm64/include/asm/thread_info.h linux-4.9.28/arch/arm64/include/asm/thread_info.h --- linux-4.9.28.orig/arch/arm64/include/asm/thread_info.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm64/include/asm/thread_info.h 2017-05-19 03:37:25.126174470 +0200 @@ -49,6 +49,7 @@ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ }; @@ -112,6 +113,7 @@ #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +#define TIF_NEED_RESCHED_LAZY 4 #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 @@ -127,6 +129,7 @@ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) @@ -135,7 +138,9 @@ #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_NEED_RESCHED_LAZY) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff -Nur linux-4.9.28.orig/arch/arm64/Kconfig linux-4.9.28/arch/arm64/Kconfig --- linux-4.9.28.orig/arch/arm64/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm64/Kconfig 2017-05-19 03:37:25.126174470 +0200 @@ -91,6 +91,7 @@ select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS @@ -704,7 +705,7 @@ config XEN bool "Xen guest support on ARM64" - depends on ARM64 && OF + depends on ARM64 && OF && !PREEMPT_RT_FULL select SWIOTLB_XEN select PARAVIRT help diff -Nur linux-4.9.28.orig/arch/arm64/kernel/asm-offsets.c linux-4.9.28/arch/arm64/kernel/asm-offsets.c --- linux-4.9.28.orig/arch/arm64/kernel/asm-offsets.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm64/kernel/asm-offsets.c 2017-05-19 03:37:25.126174470 +0200 @@ -38,6 +38,7 @@ BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff -Nur linux-4.9.28.orig/arch/arm64/kernel/entry.S linux-4.9.28/arch/arm64/kernel/entry.S --- linux-4.9.28.orig/arch/arm64/kernel/entry.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm64/kernel/entry.S 2017-05-19 03:37:25.126174470 +0200 @@ -428,11 +428,16 @@ #ifdef CONFIG_PREEMPT ldr w24, [tsk, #TI_PREEMPT] // get preempt count - cbnz w24, 1f // preempt count != 0 + cbnz w24, 2f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? - bl el1_preempt + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? + + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count + cbnz w24, 2f // preempt lazy count != 0 + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? 1: + bl el1_preempt +2: #endif #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on @@ -446,6 +451,7 @@ 1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? ret x24 #endif diff -Nur linux-4.9.28.orig/arch/arm64/kernel/signal.c linux-4.9.28/arch/arm64/kernel/signal.c --- linux-4.9.28.orig/arch/arm64/kernel/signal.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/arm64/kernel/signal.c 2017-05-19 03:37:25.126174470 +0200 @@ -409,7 +409,7 @@ */ trace_hardirqs_off(); do { - if (thread_flags & _TIF_NEED_RESCHED) { + if (thread_flags & _TIF_NEED_RESCHED_MASK) { schedule(); } else { local_irq_enable(); diff -Nur linux-4.9.28.orig/arch/Kconfig linux-4.9.28/arch/Kconfig --- linux-4.9.28.orig/arch/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/Kconfig 2017-05-19 03:37:25.122174217 +0200 @@ -9,6 +9,7 @@ tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE + depends on !PREEMPT_RT_FULL select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help @@ -52,6 +53,7 @@ config JUMP_LABEL bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) help This option enables a transparent branch optimization that makes certain almost-always-true or almost-always-false branch diff -Nur linux-4.9.28.orig/arch/mips/Kconfig linux-4.9.28/arch/mips/Kconfig --- linux-4.9.28.orig/arch/mips/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/mips/Kconfig 2017-05-19 03:37:25.126174470 +0200 @@ -2515,7 +2515,7 @@ # config HIGHMEM bool "High Memory Support" - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL config CPU_SUPPORTS_HIGHMEM bool diff -Nur linux-4.9.28.orig/arch/powerpc/include/asm/thread_info.h linux-4.9.28/arch/powerpc/include/asm/thread_info.h --- linux-4.9.28.orig/arch/powerpc/include/asm/thread_info.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/powerpc/include/asm/thread_info.h 2017-05-19 03:37:25.126174470 +0200 @@ -43,6 +43,8 @@ int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => preemptable, + <0 => BUG */ unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; @@ -88,8 +90,7 @@ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ @@ -107,6 +108,8 @@ #if defined(CONFIG_PPC64) #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<flags) set_bits(irqtp->flags, &curtp->flags); } +#endif irq_hw_number_t virq_to_hw(unsigned int virq) { diff -Nur linux-4.9.28.orig/arch/powerpc/kernel/misc_32.S linux-4.9.28/arch/powerpc/kernel/misc_32.S --- linux-4.9.28.orig/arch/powerpc/kernel/misc_32.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/powerpc/kernel/misc_32.S 2017-05-19 03:37:25.126174470 +0200 @@ -41,6 +41,7 @@ * We store the saved ksp_limit in the unused part * of the STACK_FRAME_OVERHEAD */ +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) @@ -57,6 +58,7 @@ stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr +#endif /* * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); diff -Nur linux-4.9.28.orig/arch/powerpc/kernel/misc_64.S linux-4.9.28/arch/powerpc/kernel/misc_64.S --- linux-4.9.28.orig/arch/powerpc/kernel/misc_64.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/powerpc/kernel/misc_64.S 2017-05-19 03:37:25.126174470 +0200 @@ -31,6 +31,7 @@ .text +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) @@ -41,6 +42,7 @@ ld r0,16(r1) mtlr r0 blr +#endif _GLOBAL(call_do_irq) mflr r0 diff -Nur linux-4.9.28.orig/arch/powerpc/kvm/Kconfig linux-4.9.28/arch/powerpc/kvm/Kconfig --- linux-4.9.28.orig/arch/powerpc/kvm/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/powerpc/kvm/Kconfig 2017-05-19 03:37:25.126174470 +0200 @@ -175,6 +175,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 + depends on !PREEMPT_RT_FULL select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING diff -Nur linux-4.9.28.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.9.28/arch/powerpc/platforms/ps3/device-init.c --- linux-4.9.28.orig/arch/powerpc/platforms/ps3/device-init.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/powerpc/platforms/ps3/device-init.c 2017-05-19 03:37:25.126174470 +0200 @@ -752,7 +752,7 @@ } pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); - res = wait_event_interruptible(dev->done.wait, + res = swait_event_interruptible(dev->done.wait, dev->done.done || kthread_should_stop()); if (kthread_should_stop()) res = -EINTR; diff -Nur linux-4.9.28.orig/arch/sh/kernel/irq.c linux-4.9.28/arch/sh/kernel/irq.c --- linux-4.9.28.orig/arch/sh/kernel/irq.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/sh/kernel/irq.c 2017-05-19 03:37:25.126174470 +0200 @@ -147,6 +147,7 @@ hardirq_ctx[cpu] = NULL; } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct thread_info *curctx; @@ -174,6 +175,7 @@ "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" ); } +#endif #else static inline void handle_one_irq(unsigned int irq) { diff -Nur linux-4.9.28.orig/arch/sparc/Kconfig linux-4.9.28/arch/sparc/Kconfig --- linux-4.9.28.orig/arch/sparc/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/sparc/Kconfig 2017-05-19 03:37:25.126174470 +0200 @@ -194,12 +194,10 @@ source kernel/Kconfig.hz config RWSEM_GENERIC_SPINLOCK - bool - default y if SPARC32 + def_bool PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM - bool - default y if SPARC64 + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_HWEIGHT bool diff -Nur linux-4.9.28.orig/arch/sparc/kernel/irq_64.c linux-4.9.28/arch/sparc/kernel/irq_64.c --- linux-4.9.28.orig/arch/sparc/kernel/irq_64.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/sparc/kernel/irq_64.c 2017-05-19 03:37:25.126174470 +0200 @@ -854,6 +854,7 @@ set_irq_regs(old_regs); } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { void *orig_sp, *sp = softirq_stack[smp_processor_id()]; @@ -868,6 +869,7 @@ __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); } +#endif #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) diff -Nur linux-4.9.28.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.9.28/arch/x86/crypto/aesni-intel_glue.c --- linux-4.9.28.orig/arch/x86/crypto/aesni-intel_glue.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/crypto/aesni-intel_glue.c 2017-05-19 03:37:25.130174719 +0200 @@ -372,14 +372,14 @@ err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -396,14 +396,14 @@ err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -420,14 +420,14 @@ err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -444,14 +444,14 @@ err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -503,18 +503,20 @@ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + kernel_fpu_begin(); aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { + kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); + kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } - kernel_fpu_end(); return err; } diff -Nur linux-4.9.28.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.9.28/arch/x86/crypto/cast5_avx_glue.c --- linux-4.9.28.orig/arch/x86/crypto/cast5_avx_glue.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/crypto/cast5_avx_glue.c 2017-05-19 03:37:25.130174719 +0200 @@ -59,7 +59,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, bool enc) { - bool fpu_enabled = false; + bool fpu_enabled; struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; @@ -75,7 +75,7 @@ u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { @@ -103,10 +103,9 @@ } while (nbytes >= bsize); done: + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -227,7 +226,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -236,12 +235,11 @@ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __cbc_decrypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -311,7 +309,7 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -320,13 +318,12 @@ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __ctr_crypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - cast5_fpu_end(fpu_enabled); - if (walk.nbytes) { ctr_crypt_final(desc, &walk); err = blkcipher_walk_done(desc, &walk, 0); diff -Nur linux-4.9.28.orig/arch/x86/crypto/glue_helper.c linux-4.9.28/arch/x86/crypto/glue_helper.c --- linux-4.9.28.orig/arch/x86/crypto/glue_helper.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/crypto/glue_helper.c 2017-05-19 03:37:25.130174719 +0200 @@ -39,7 +39,7 @@ void *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = 128 / 8; unsigned int nbytes, i, func_bytes; - bool fpu_enabled = false; + bool fpu_enabled; int err; err = blkcipher_walk_virt(desc, walk); @@ -49,7 +49,7 @@ u8 *wdst = walk->dst.virt.addr; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; @@ -71,10 +71,10 @@ } done: + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } @@ -194,7 +194,7 @@ struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -203,12 +203,12 @@ while ((nbytes = walk.nbytes)) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); @@ -277,7 +277,7 @@ struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -286,13 +286,12 @@ while ((nbytes = walk.nbytes) >= bsize) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); - if (walk.nbytes) { glue_ctr_crypt_final_128bit( gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); @@ -347,7 +346,7 @@ void *tweak_ctx, void *crypt_ctx) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -360,21 +359,21 @@ /* set minimum length to bsize, for tweak_fn */ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, + desc, false, nbytes < bsize ? bsize : nbytes); - /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); + glue_fpu_end(fpu_enabled); while (nbytes) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, false, nbytes); nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); nbytes = walk.nbytes; } - - glue_fpu_end(fpu_enabled); - return err; } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); diff -Nur linux-4.9.28.orig/arch/x86/entry/common.c linux-4.9.28/arch/x86/entry/common.c --- linux-4.9.28.orig/arch/x86/entry/common.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/entry/common.c 2017-05-19 03:37:25.130174719 +0200 @@ -129,7 +129,7 @@ #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { @@ -145,9 +145,16 @@ /* We have work to do. */ local_irq_enable(); - if (cached_flags & _TIF_NEED_RESCHED) + if (cached_flags & _TIF_NEED_RESCHED_MASK) schedule(); +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif if (cached_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff -Nur linux-4.9.28.orig/arch/x86/entry/entry_32.S linux-4.9.28/arch/x86/entry/entry_32.S --- linux-4.9.28.orig/arch/x86/entry/entry_32.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/entry/entry_32.S 2017-05-19 03:37:25.130174719 +0200 @@ -308,8 +308,25 @@ ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) need_resched: + # preempt count == 0 + NEED_RS set? cmpl $0, PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz restore_all +#else + jz test_int_off + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jne restore_all + + movl PER_CPU_VAR(current_task), %ebp + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all + + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) + jz restore_all +test_int_off: +#endif testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq diff -Nur linux-4.9.28.orig/arch/x86/entry/entry_64.S linux-4.9.28/arch/x86/entry/entry_64.S --- linux-4.9.28.orig/arch/x86/entry/entry_64.S 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/entry/entry_64.S 2017-05-19 03:37:25.130174719 +0200 @@ -546,7 +546,23 @@ bt $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz 1f +#else + jz do_preempt_schedule_irq + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jnz 1f + + movq PER_CPU_VAR(current_task), %rcx + cmpl $0, TASK_TI_preempt_lazy_count(%rcx) + jnz 1f + + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) + jnc 1f +do_preempt_schedule_irq: +#endif call preempt_schedule_irq jmp 0b 1: @@ -894,6 +910,7 @@ jmp 2b .previous +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) pushq %rbp @@ -906,6 +923,7 @@ decl PER_CPU_VAR(irq_count) ret END(do_softirq_own_stack) +#endif #ifdef CONFIG_XEN idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 diff -Nur linux-4.9.28.orig/arch/x86/include/asm/preempt.h linux-4.9.28/arch/x86/include/asm/preempt.h --- linux-4.9.28.orig/arch/x86/include/asm/preempt.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/include/asm/preempt.h 2017-05-19 03:37:25.130174719 +0200 @@ -79,17 +79,46 @@ * a decrement which hits zero means we have no preempt_count and should * reschedule. */ -static __always_inline bool __preempt_count_dec_and_test(void) +static __always_inline bool ____preempt_count_dec_and_test(void) { GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); } +static __always_inline bool __preempt_count_dec_and_test(void) +{ + if (____preempt_count_dec_and_test()) + return true; +#ifdef CONFIG_PREEMPT_LAZY + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else + return false; +#endif +} + /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(int preempt_offset) { +#ifdef CONFIG_PREEMPT_LAZY + u32 tmp; + + tmp = raw_cpu_read_4(__preempt_count); + if (tmp == preempt_offset) + return true; + + /* preempt count == 0 ? */ + tmp &= ~PREEMPT_NEED_RESCHED; + if (tmp) + return false; + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); +#endif } #ifdef CONFIG_PREEMPT diff -Nur linux-4.9.28.orig/arch/x86/include/asm/signal.h linux-4.9.28/arch/x86/include/asm/signal.h --- linux-4.9.28.orig/arch/x86/include/asm/signal.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/include/asm/signal.h 2017-05-19 03:37:25.130174719 +0200 @@ -27,6 +27,19 @@ #define SA_IA32_ABI 0x02000000u #define SA_X32_ABI 0x01000000u +/* + * Because some traps use the IST stack, we must keep preemption + * disabled while calling do_trap(), but do_trap() may call + * force_sig_info() which will grab the signal spin_locks for the + * task, which in PREEMPT_RT_FULL are mutexes. By defining + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the + * trap. + */ +#if defined(CONFIG_PREEMPT_RT_FULL) +#define ARCH_RT_DELAYS_SIGNAL_SEND +#endif + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff -Nur linux-4.9.28.orig/arch/x86/include/asm/stackprotector.h linux-4.9.28/arch/x86/include/asm/stackprotector.h --- linux-4.9.28.orig/arch/x86/include/asm/stackprotector.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/include/asm/stackprotector.h 2017-05-19 03:37:25.130174719 +0200 @@ -59,7 +59,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; + u64 uninitialized_var(canary); u64 tsc; #ifdef CONFIG_X86_64 @@ -70,8 +70,15 @@ * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. + * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary + * uninitialized and use the TSC based randomness on top of it. */ +#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); +#endif tsc = rdtsc(); canary += tsc + (tsc << 32UL); diff -Nur linux-4.9.28.orig/arch/x86/include/asm/thread_info.h linux-4.9.28/arch/x86/include/asm/thread_info.h --- linux-4.9.28.orig/arch/x86/include/asm/thread_info.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/include/asm/thread_info.h 2017-05-19 03:37:25.130174719 +0200 @@ -54,11 +54,14 @@ struct thread_info { unsigned long flags; /* low level flags */ + int preempt_lazy_count; /* 0 => lazy preemptable + <0 => BUG */ }; #define INIT_THREAD_INFO(tsk) \ { \ .flags = 0, \ + .preempt_lazy_count = 0, \ } #define init_stack (init_thread_union.stack) @@ -67,6 +70,10 @@ #include +#define GET_THREAD_INFO(reg) \ + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ + _ASM_SUB $(THREAD_SIZE),reg ; + #endif /* @@ -85,6 +92,7 @@ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -108,6 +116,7 @@ #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -143,6 +152,8 @@ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + #define STACK_WARN (THREAD_SIZE/8) /* diff -Nur linux-4.9.28.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.9.28/arch/x86/include/asm/uv/uv_bau.h --- linux-4.9.28.orig/arch/x86/include/asm/uv/uv_bau.h 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/include/asm/uv/uv_bau.h 2017-05-19 03:37:25.130174719 +0200 @@ -624,9 +624,9 @@ cycles_t send_message; cycles_t period_end; cycles_t period_time; - spinlock_t uvhub_lock; - spinlock_t queue_lock; - spinlock_t disable_lock; + raw_spinlock_t uvhub_lock; + raw_spinlock_t queue_lock; + raw_spinlock_t disable_lock; /* tunables */ int max_concurr; int max_concurr_const; @@ -815,15 +815,15 @@ * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) { - spin_lock(lock); + raw_spin_lock(lock); if (atomic_read(v) >= u) { - spin_unlock(lock); + raw_spin_unlock(lock); return 0; } atomic_inc(v); - spin_unlock(lock); + raw_spin_unlock(lock); return 1; } diff -Nur linux-4.9.28.orig/arch/x86/Kconfig linux-4.9.28/arch/x86/Kconfig --- linux-4.9.28.orig/arch/x86/Kconfig 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/Kconfig 2017-05-19 03:37:25.130174719 +0200 @@ -17,6 +17,7 @@ ### Arch settings config X86 def_bool y + select HAVE_PREEMPT_LAZY select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ANON_INODES @@ -232,8 +233,11 @@ def_bool y depends on ISA_DMA_API +config RWSEM_GENERIC_SPINLOCK + def_bool PREEMPT_RT_FULL + config RWSEM_XCHGADD_ALGORITHM - def_bool y + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_CALIBRATE_DELAY def_bool y @@ -897,7 +901,7 @@ config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL - select CPUMASK_OFFSTACK + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff -Nur linux-4.9.28.orig/arch/x86/kernel/acpi/boot.c linux-4.9.28/arch/x86/kernel/acpi/boot.c --- linux-4.9.28.orig/arch/x86/kernel/acpi/boot.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/kernel/acpi/boot.c 2017-05-19 03:37:25.130174719 +0200 @@ -87,7 +87,9 @@ * ->ioapic_mutex * ->ioapic_lock */ +#ifdef CONFIG_X86_IO_APIC static DEFINE_MUTEX(acpi_ioapic_lock); +#endif /* -------------------------------------------------------------------------- Boot-time Configuration diff -Nur linux-4.9.28.orig/arch/x86/kernel/apic/io_apic.c linux-4.9.28/arch/x86/kernel/apic/io_apic.c --- linux-4.9.28.orig/arch/x86/kernel/apic/io_apic.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/kernel/apic/io_apic.c 2017-05-19 03:37:25.130174719 +0200 @@ -1712,7 +1712,8 @@ static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { mask_ioapic_irq(data); return true; } diff -Nur linux-4.9.28.orig/arch/x86/kernel/asm-offsets.c linux-4.9.28/arch/x86/kernel/asm-offsets.c --- linux-4.9.28.orig/arch/x86/kernel/asm-offsets.c 2017-05-14 14:00:37.000000000 +0200 +++ linux-4.9.28/arch/x86/kernel/asm-offsets.c 2017-05-19 03:37:25.130174719 +0200 @@ -36,6 +36,7 @@ BLANK(); OFFSET(TASK_TI_flags, task_struct, thread_info.flags); + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); @@ -91,4 +92,5 @@ BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); } diff -Nur linux-4.9.28.orig/arch/x86/kernel/cpu/mcheck/mce.c li