diff --git a/Makefile b/Makefile index 03373bb..27a566c 100644 --- a/Makefile +++ b/Makefile @@ -351,7 +351,9 @@ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ - -Werror-implicit-function-declaration + -Werror-implicit-function-declaration \ + -fdump-rtl-expand-raw-details + KBUILD_AFLAGS := -D__ASSEMBLY__ # Read KERNELRELEASE from include/config/kernel.release (if it exists) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a6efe0a..bff14df 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -120,7 +120,7 @@ config RWSEM_XCHGADD_ALGORITHM def_bool X86_XADD config ARCH_HAS_CPU_IDLE_WAIT - def_bool y + def_bool y if !MVIAC7 config GENERIC_CALIBRATE_DELAY def_bool y @@ -1011,7 +1011,7 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" - depends on !M386 && !M486 + depends on !M386 && !M486 && !MVIAC7 select X86_PAE ---help--- Select this if you have a 32-bit processor and more than 4 diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 80177ec..4bda12e 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -31,7 +31,7 @@ cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) -cflags-$(CONFIG_MVIAC7) += -march=i686 +cflags-$(CONFIG_MVIAC7) += -march=i686 $(call tune,pentium3) cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) # AMD Elan support @@ -46,6 +46,6 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. -ifneq ($(CONFIG_X86_P6_NOP),y) -cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,) -endif +# ifneq ($(CONFIG_X86_P6_NOP),y) +# cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,) +# endif diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index f6aa18e..5afa8d2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -35,7 +35,11 @@ "661:\n\tlock; " #else /* ! CONFIG_SMP */ +# ifdef CONFIG_MVIAC7 /* C7-M is in-order retirement */ +#define LOCK_PREFIX "\n\tlock; " +# else #define LOCK_PREFIX "" +# endif #endif /* This must be included *after* the definition of LOCK_PREFIX */ diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index 2228020..3790657 100644 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h @@ -134,6 +134,12 @@ enum cpu_processor_bit { CPU_AMD_0F_BIT, CPU_AMD_10_BIT, CPU_AMD_11_BIT, +/* Centaur */ + CPU_CENTAUR_C3_BIT, + CPU_CENTAUR_C5_BIT, + CPU_CENTAUR_C7A_BIT, + CPU_CENTAUR_C7D_BIT, + CPU_CENTAUR_NANO_BIT, }; #define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) @@ -183,8 +189,21 @@ enum cpu_processor_bit { /* Select all supported AMD CPUs */ #define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS) + +#define CPU_CENT_C3 (1 << CPU_CENTAUR_C3_BIT) +#define CPU_CENT_C5 (1 << CPU_CENTAUR_C5_BIT) +#define CPU_CENT_C7A (1 << CPU_CENTAUR_C7A_BIT) +#define CPU_CENT_C7D (1 << CPU_CENTAUR_C7D_BIT) +#define CPU_CENT_NANO (1 << CPU_CENTAUR_NANO_BIT) + +#define CPU_CENT_C7 (CPU_CENT_C7A | CPU_CENT_C7D) +#define CPU_CENT_C7_PLUS (CPU_CENT_C7 | CPU_CENT_NANO) + +#define CPU_CENT_ALL (CPU_CENT_C3 | CPU_CENT_C5 | CPU_CENT_C7 \ + | CPU_CENT_NANO) + /* Select all supported CPUs */ -#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL) +#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL | CPU_CENT_ALL) #define MAX_CPU_FILES 512 diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 1f11ce4..56210a1 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -19,7 +19,8 @@ "\t.previous\n" \ _ASM_EXTABLE(1b, 3b) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ - : "i" (-EFAULT), "0" (oparg), "1" (0)) + : "i" (-EFAULT), "0" (oparg), "1" (0) \ + : "memory") #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ asm volatile("1:\tmovl %2, %0\n" \ @@ -35,7 +36,8 @@ _ASM_EXTABLE(2b, 4b) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "1" (0)) + : "r" (oparg), "i" (-EFAULT), "1" (0) \ + : "memory", "cc") static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) { @@ -68,13 +70,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) uaddr, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op2(LOCK_PREFIX "orl %4, %3", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + __futex_atomic_op2(LOCK_PREFIX "andl %4, %3", ret, oldval, uaddr, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op2(LOCK_PREFIX "xorl %4, %3", ret, oldval, uaddr, oparg); break; default: ret = -ENOSYS; diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 2bdab21..eb4bf92 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -39,14 +39,20 @@ static inline void native_irq_enable(void) asm volatile("sti": : :"memory"); } +/* + * We are about to stop all instruction processing - + * any attempt to "save a cycle" is brain-dead programming. + * So in this case, ease to a stop rather than just halt. + */ static inline void native_safe_halt(void) { - asm volatile("sti; hlt": : :"memory"); + asm volatile("sti; wbinvd; rep; nop; hlt": : :"memory"); } +/* The only reason to stop with interrupts disabled is sucide. */ static inline void native_halt(void) { - asm volatile("hlt": : :"memory"); + asm volatile("sti; wbinvd; rep; nop; hlt": : :"memory"); } #endif diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b7e5db8..6226945 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -27,6 +27,8 @@ # define REG_PTR_MODE "q" #endif +#if 0 /* Don't trash the other settings */ + #if defined(CONFIG_X86_32) && \ (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) /* @@ -38,6 +40,10 @@ # define UNLOCK_LOCK_PREFIX #endif +#endif + +# define UNLOCK_LOCK_PREFIX LOCK_PREFIX + /* * Ticket locks are conceptually two parts, one indicating the current head of * the queue, and the other indicating the current tail. The lock is acquired diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index bd37ed4..20ca9c4 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -45,12 +45,16 @@ extern int no_timer_check; */ DECLARE_PER_CPU(unsigned long, cyc2ns); +DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; + int cpu = smp_processor_id(); + unsigned long long ns = per_cpu(cyc2ns_offset, cpu); + ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; + return ns; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 46e29ab..7add52b 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -2,6 +2,7 @@ * CPU x86 architecture debug code * * Copyright(C) 2009 Jaswinder Singh Rajput + * Copyright(C) 2009 VIA Technology, Inc. * * For licencing details see kernel-base/COPYING */ @@ -225,6 +226,93 @@ static struct cpu_debug_range cpu_amd_range[] = { { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, }, }; +/* Centaur Registers Range */ +static struct cpu_debug_range cpu_centaur_range[] = { + { 0x00000000, 0x00000001, CPU_MC, CPU_CENT_NANO }, + { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CENT_NANO }, + { 0x00000010, 0x00000010, CPU_TIME, CPU_CENT_C7_PLUS }, + { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_CENT_NANO }, + { 0x0000001B, 0x0000001B, CPU_APIC, CPU_CENT_C7_PLUS, }, + { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_CENT_C7_PLUS, }, + { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CENT_NANO, }, + /* IUA32_BIOS_UDPT_TRIG, trigger bios update */ + { 0x00000079, 0x00000079, CPU_BIOS, CPU_CENT_NANO, }, + /* IA32_BIOS_SIGN_ID, determine ucode update level */ + { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_CENT_NANO, }, + /* IA32_SMM_MONITOR_CTL */ + { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_CENT_NANO, }, + { 0x000000C1, 0x000000C2, CPU_PMC, CPU_CENT_C7_PLUS, }, + { 0x000000C3, 0x000000C3, CPU_PMC, CPU_CENT_NANO, }, + /* MSR_FSB_FREQ */ + { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CENT_NANO, }, + /* IA32_MPERF, IA32_APERF */ + { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CENT_C7_PLUS, }, + /* IA32_MTRCAP */ + { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_CENT_C7_PLUS, }, + /* MSR_BBL_CR_CTL3 */ + { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_CENT_C7_PLUS, }, + /* IA32_SYSENTER_{CS,ESP,EIP} */ + { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_CENT_C7_PLUS, }, + { 0x00000186, 0x00000187, CPU_PMC, CPU_CENT_C7_PLUS, }, + /* performance status, performance control */ + { 0x00000198, 0x00000199, CPU_PERF, CPU_CENT_C7_PLUS, }, + /* clock modulation */ + { 0x0000019A, 0x0000019A, CPU_FREQ, CPU_CENT_C7_PLUS }, + /* thermal interrupt / status / control */ + { 0x0000019B, 0x0000019D, CPU_THERM, CPU_CENT_C7_PLUS }, + /* MISC_Enable, enable TM1/TM2/TM3 */ + { 0x000001A0, 0x000001A0, CPU_MISC, CPU_CENT_C7_PLUS, }, + /* IA32_LATFORM_BRV */ + { 0x000001A1, 0x000001A1, CPU_PLATFORM, CPU_CENT_NANO, }, + /* IA32_DEBUGCTLA */ + { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CENT_NANO, }, + { 0x00000200, 0x0000020F, CPU_MTRR, CPU_CENT_C7_PLUS, }, + { 0x00000250, 0x00000250, CPU_MTRR, CPU_CENT_C7_PLUS, }, + { 0x00000258, 0x00000259, CPU_MTRR, CPU_CENT_C7_PLUS, }, + { 0x00000268, 0x0000026F, CPU_MTRR, CPU_CENT_C7_PLUS, }, + /* IA32_CR_PAT */ + { 0x00000277, 0x00000277, CPU_PAT, CPU_CENT_NANO, }, + { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_CENT_C7_PLUS, }, + { 0x00000309, 0x0000030B, CPU_PMC, CPU_CENT_NANO, }, + /* IA32_PERF_CAPABILITIES */ + { 0x00000345, 0x00000345, CPU_PMC, CPU_CENT_NANO, }, + /* IA32_PERF_{FIXED_CTR_CTRL,GLOBAL_{STATUS,CTRL,OVF_CTRL}} */ + { 0x0000038D, 0x00000390, CPU_PMC, CPU_CENT_NANO, }, + /* IA32_PEBS_ENABLE */ + { 0x000003F1, 0x000003F1, CPU_PMC, CPU_CENT_NANO, }, + { 0x00000400, 0x00000412, CPU_MC, CPU_CENT_NANO, }, + { 0x00000480, 0x0000048B, CPU_VMX, CPU_CENT_NANO, }, + /* FCR5: ucode update status */ + { 0x00001025, 0x00001025, CPU_BIOS, CPU_CENT_NANO, }, + { 0x00001107, 0x00001109, CPU_FEATURES, CPU_CENT_C7, }, + /* Performance Status Register */ + { 0x0000110A, 0x0000110A, CPU_FEATURES, CPU_CENT_C7D, }, + { 0x0000110B, 0x0000110B, CPU_MISC, CPU_CENT_C7, }, + /* HCR2/DSCR2: Overstress VID, Parallax Temp, Inflection ratio */ + { 0x00001140, 0x00001142, CPU_FREQ, CPU_CENT_C7D, }, + /* user defined temperature threshuld */ + { 0x00001167, 0x00001167, CPU_THERM, CPU_CENT_C7, }, + /* die temperature in degrees C */ + { 0x00001169, 0x00001169, CPU_THERM, CPU_CENT_C7, }, + /* CENT_HARDWARECTRL3 */ + { 0x00001203, 0x00001203, CPU_FEATURES, CPU_CENT_NANO, }, + /* change reported steppling/model/family */ + { 0x00001204, 0x00001204, CPU_MISC, CPU_CENT_C7_PLUS, }, + /* characters of alternate vendor string */ + { 0x00001206, 0x00001207, CPU_MISC, CPU_CENT_C7_PLUS, }, + /* CENT_INFLECTION_RATIO */ + { 0x00001406, 0x00001406, CPU_THERM, CPU_CENT_NANO, }, + /* thermal monitor temperature in celsius */ + { 0x00001423, 0x00001423, CPU_THERM, CPU_CENT_NANO, }, + /* CENT_TMREFCNT{0,1} */ + { 0x00001424, 0x00001425, CPU_TIME, CPU_CENT_NANO, }, + /* IA32_EFER */ + { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_CENT_NANO, }, + /* IA32_{STAR,LSTAR,CSTAR,FMASK} */ + { 0xC0000081, 0xC0000084, CPU_CALL, CPU_CENT_NANO, }, + /* IA32_{FS_BASE,GS_BASE,KERNEL_GS_BASE} */ + { 0xC0000100, 0xC0000102, CPU_BASE, CPU_CENT_NANO, }, +}; /* Intel */ static int get_intel_modelflag(unsigned model) @@ -310,6 +398,77 @@ static int get_amd_modelflag(unsigned model) return flag; } +/* VIA / Centaur */ +static int get_centaur_modelflag(unsigned model) +{ + int flag; + + switch ((model >> 8) & 0xff) { + case 5: + switch (model & 0xff) { + case 4: + /* IDT WinChip C6 */ + case 8: + /* IDT WinChip 2 */ + case 9: + /* IDT WinChip 3 */ + default: + flag = CPU_NONE; + break; + } + break; + case 6: + switch (model & 0xff) { + case 0x00: + /* Cyrix 6x86 MX */ + flag = CPU_NONE; + break; + case 0x05: + /* VIA Cyrix III */ + flag = CPU_NONE; + break; + case 0x06: + /* VIA Cyrix III Samuel */ + flag = CPU_NONE; + break; + case 0x07: + /* 0..7 VIA C3 Samuel II / C3 */ + /* 8..F VIA C3 Ezra / Eden ESP */ + flag = CPU_CENT_C3; + break; + case 0x08: + /* 0..7 VIA C3 Ezra-T / Eden ESP */ + flag = CPU_CENT_C5; + break; + case 0x09: + /* VIA Antaur */ + flag = CPU_NONE; + break; + case 0x0a: + /* C7-M Type A, Esther core 90nm SOI */ + flag = CPU_CENT_C7A; + break; + case 0x0d: + /* C7-M Type D, Esther core 90nm */ + flag = CPU_CENT_C7D; + break; + case 0x0f: + /* Nano 65nm */ + flag = CPU_CENT_NANO; + break; + default: + flag = CPU_NONE; + break; + } + break; + default: + flag = CPU_NONE; + break; + } + + return flag; +} + static int get_cpu_modelflag(unsigned cpu) { int flag; @@ -323,6 +482,9 @@ static int get_cpu_modelflag(unsigned cpu) case X86_VENDOR_AMD: flag = get_amd_modelflag(flag & 0xffff); break; + case X86_VENDOR_CENTAUR: + flag = get_centaur_modelflag(flag); + break; default: flag = CPU_NONE; break; @@ -342,6 +504,9 @@ static int get_cpu_range_count(unsigned cpu) case X86_VENDOR_AMD: index = ARRAY_SIZE(cpu_amd_range); break; + case X86_VENDOR_CENTAUR: + index = ARRAY_SIZE(cpu_centaur_range); + break; default: index = 0; break; @@ -375,6 +540,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag) (cpu_amd_range[i].flag & flag)) return 1; break; + case X86_VENDOR_CENTAUR: + if ((cpu_centaur_range[i].model & modelflag) && + (cpu_centaur_range[i].flag & flag)) + return 1; + break; } } @@ -404,6 +574,12 @@ static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, *max = cpu_amd_range[index].max; } break; + case X86_VENDOR_CENTAUR: + if ((cpu_centaur_range[index].model & modelflag) && + (cpu_centaur_range[index].flag & flag)) { + *min = cpu_centaur_range[index].min; + *max = cpu_centaur_range[index].max; + } } return *max; diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 752e8c6..1ad5aab 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -89,12 +89,7 @@ static unsigned int acpi_pstate_strict; static int check_est_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data(cpuid); - - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return 0; - - return 1; + return cpu_has(cpu, X86_FEATURE_EST); } static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 3068388..36b02ff 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -299,6 +299,7 @@ ENTRY(startup_32_smp) orl %edx,%eax movl %eax,%cr4 +#ifdef CONFIG_X86_PAE /* No PAE on MVIAC7 */ btl $5, %eax # check if PAE is enabled jnc 6f @@ -320,6 +321,7 @@ ENTRY(startup_32_smp) btsl $11, %eax /* Make changes effective */ wrmsr +#endif 6: diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index d57de05..73ce94e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -589,22 +589,26 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); */ DEFINE_PER_CPU(unsigned long, cyc2ns); +DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - unsigned long long tsc_now, ns_now; + unsigned long long tsc_now, ns_now, *offset; unsigned long flags, *scale; local_irq_save(flags); sched_clock_idle_sleep_event(); scale = &per_cpu(cyc2ns, cpu); + offset = &per_cpu(cyc2ns_offset, cpu); rdtscll(tsc_now); ns_now = __cycles_2_ns(tsc_now); - if (cpu_khz) + if (cpu_khz) { *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); + } sched_clock_idle_wakeup_event(0); local_irq_restore(flags); @@ -631,12 +635,11 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj, dummy; + unsigned long *lpj = NULL; if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; - lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP lpj = &cpu_data(freq->cpu).loops_per_jiffy; diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 62ad500..9a38df6 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -82,13 +82,13 @@ SECTIONS *(.data.idt) } - . = ALIGN(32); + . = ALIGN(L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } /* rarely changed data like cpu maps */ - . = ALIGN(32); + . = ALIGN(L1_CACHE_BYTES); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) _edata = .; /* End of data section */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b727..f99f12e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -981,11 +981,11 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) tsk = current; mm = tsk->mm; - prefetchw(&mm->mmap_sem); - /* Get the faulting address: */ address = read_cr2(); + prefetchw(&mm->mmap_sem); + if (unlikely(kmmio_fault(regs, address))) return; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2202b62..fb80148 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -426,11 +426,14 @@ int __init pcibios_init(void) * and P4. It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ + if (c->x86_clflush_size > 0) { + pci_cache_line_size = c->x86_clflush_size >> 2; + printk(KERN_DEBUG "PCI: pci_cache_line_size set for %d byte cache\n", + c->x86_clflush_size); + } else { + pci_cache_line_size = 32 >>2; + printk(KERN_DEBUG "PCI: pci_cache_line_size defaulted to 32 byte cache\n"); + } pcibios_resource_survey(); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 10a2d91..daea6a9 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -244,22 +244,12 @@ int acpi_processor_resume(struct acpi_device * device) #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) static void tsc_check_state(int state) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - case X86_VENDOR_INTEL: - /* - * AMD Fam10h TSC will tick in all - * C/P/S0/S1 states when this bit is set. - */ - if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - return; + if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + return; - /*FALL THROUGH*/ - default: - /* TSC could halt in idle, so notify users */ - if (state > ACPI_STATE_C1) - mark_tsc_unstable("TSC halts in idle"); - } + /* TSC could halt in idle, so notify users */ + if (state > ACPI_STATE_C1) + mark_tsc_unstable("TSC halts in idle"); } #else static void tsc_check_state(int state) { return; } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index d73f5f4..e863833 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -787,6 +787,14 @@ config SENSORS_THMC50 This driver can also be built as a module. If so, the module will be called thmc50. +config SENSORS_VIA_CPUTEMP + tristate "VIA CPU temperature sensor" + depends on X86 + help + If you say yes here you get support for the temperature + sensor inside your CPU. Supported all are all known variants + of the VIA C7 and Nano. + config SENSORS_VIA686A tristate "VIA686A" depends on PCI diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 0ae2698..3edf7fa 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o obj-$(CONFIG_SENSORS_THMC50) += thmc50.o +obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o obj-$(CONFIG_SENSORS_VIA686A) += via686a.o obj-$(CONFIG_SENSORS_VT1211) += vt1211.o obj-$(CONFIG_SENSORS_VT8231) += vt8231.o diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index a0127e9..706ec0f 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -1682,7 +1682,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) static void serial_unlink_irq_chain(struct uart_8250_port *up) { - struct irq_info *i; + struct irq_info *i = NULL; struct hlist_node *n; struct hlist_head *h; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index be86ae3..1c7eaa4 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -160,8 +160,10 @@ static inline char *portspeed(int portstatus) } /* Note that hdev or one of its children must be locked! */ -static inline struct usb_hub *hdev_to_hub(struct usb_device *hdev) +static struct usb_hub *hdev_to_hub(struct usb_device *hdev) { + if (!hdev || !hdev->actconfig) + return NULL; return usb_get_intfdata(hdev->actconfig->interface[0]); } @@ -382,8 +384,10 @@ static void kick_khubd(struct usb_hub *hub) void usb_kick_khubd(struct usb_device *hdev) { - /* FIXME: What if hdev isn't bound to the hub driver? */ - kick_khubd(hdev_to_hub(hdev)); + struct usb_hub *hub = hdev_to_hub(hdev); + + if (hub) + kick_khubd(hub); } diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5..34956c8 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,8 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +40,10 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/kernel/futex.c b/kernel/futex.c index d546b2d..794c862 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -19,6 +19,10 @@ * PRIVATE futexes by Eric Dumazet * Copyright (C) 2007 Eric Dumazet * + * Requeue-PI support by Darren Hart + * Copyright (C) IBM Corporation, 2009 + * Thanks to Thomas Gleixner for conceptual design and careful reviews. + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -96,8 +100,8 @@ struct futex_pi_state { */ struct futex_q { struct plist_node list; - /* There can only be a single waiter */ - wait_queue_head_t waiter; + /* Waiter reference */ + struct task_struct *task; /* Which hash list lock to use: */ spinlock_t *lock_ptr; @@ -107,7 +111,9 @@ struct futex_q { /* Optional priority inheritance state: */ struct futex_pi_state *pi_state; - struct task_struct *task; + + /* rt_waiter storage for requeue_pi: */ + struct rt_mutex_waiter *rt_waiter; /* Bitset for the optional bitmasked wakeup */ u32 bitset; @@ -278,6 +284,44 @@ void put_futex_key(int fshared, union futex_key *key) drop_futex_key_refs(key); } +/* + * fault_in_user_writeable - fault in user address and verify RW access + * @uaddr: pointer to faulting user space address + * + * Slow path to fixup the fault we just took in the atomic write + * access to @uaddr. + * + * We have no generic implementation of a non destructive write to the + * user address. We know that we faulted in the atomic pagefault + * disabled section so we can as well avoid the #PF overhead by + * calling get_user_pages() right away. + */ +static int fault_in_user_writeable(u32 __user *uaddr) +{ + int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, + 1, 1, 0, NULL, NULL); + return ret < 0 ? ret : 0; +} + +/** + * futex_top_waiter() - Return the highest priority waiter on a futex + * @hb: the hash bucket the futex_q's reside in + * @key: the futex key (to distinguish it from other futex futex_q's) + * + * Must be called with the hb lock held. + */ +static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, + union futex_key *key) +{ + struct futex_q *this; + + plist_for_each_entry(this, &hb->chain, list) { + if (match_futex(&this->key, key)) + return this; + } + return NULL; +} + static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) { u32 curval; @@ -539,28 +583,160 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return 0; } +/** + * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex + * @uaddr: the pi futex user address + * @hb: the pi futex hash bucket + * @key: the futex key associated with uaddr and hb + * @ps: the pi_state pointer where we store the result of the + * lookup + * @task: the task to perform the atomic lock work for. This will + * be "current" except in the case of requeue pi. + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) + * + * Returns: + * 0 - ready to wait + * 1 - acquired the lock + * <0 - error + * + * The hb->lock and futex_key refs shall be held by the caller. + */ +static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, + union futex_key *key, + struct futex_pi_state **ps, + struct task_struct *task, int set_waiters) +{ + int lock_taken, ret, ownerdied = 0; + u32 uval, newval, curval; + +retry: + ret = lock_taken = 0; + + /* + * To avoid races, we attempt to take the lock here again + * (by doing a 0 -> TID atomic cmpxchg), while holding all + * the locks. It will most likely not succeed. + */ + newval = task_pid_vnr(task); + if (set_waiters) + newval |= FUTEX_WAITERS; + + curval = cmpxchg_futex_value_locked(uaddr, 0, newval); + + if (unlikely(curval == -EFAULT)) + return -EFAULT; + + /* + * Detect deadlocks. + */ + if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) + return -EDEADLK; + + /* + * Surprise - we got the lock. Just return to userspace: + */ + if (unlikely(!curval)) + return 1; + + uval = curval; + + /* + * Set the FUTEX_WAITERS flag, so the owner will know it has someone + * to wake at the next unlock. + */ + newval = curval | FUTEX_WAITERS; + + /* + * There are two cases, where a futex might have no owner (the + * owner TID is 0): OWNER_DIED. We take over the futex in this + * case. We also do an unconditional take over, when the owner + * of the futex died. + * + * This is safe as we are protected by the hash bucket lock ! + */ + if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { + /* Keep the OWNER_DIED bit */ + newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); + ownerdied = 0; + lock_taken = 1; + } + + curval = cmpxchg_futex_value_locked(uaddr, uval, newval); + + if (unlikely(curval == -EFAULT)) + return -EFAULT; + if (unlikely(curval != uval)) + goto retry; + + /* + * We took the lock due to owner died take over. + */ + if (unlikely(lock_taken)) + return 1; + + /* + * We dont have the lock. Look up the PI state (or create it if + * we are the first waiter): + */ + ret = lookup_pi_state(uval, hb, key, ps); + + if (unlikely(ret)) { + switch (ret) { + case -ESRCH: + /* + * No owner found for this futex. Check if the + * OWNER_DIED bit is set to figure out whether + * this is a robust futex or not. + */ + if (get_futex_value_locked(&curval, uaddr)) + return -EFAULT; + + /* + * We simply start over in case of a robust + * futex. The code above will take the futex + * and return happy. + */ + if (curval & FUTEX_OWNER_DIED) { + ownerdied = 1; + goto retry; + } + default: + break; + } + } + + return ret; +} + /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. */ static void wake_futex(struct futex_q *q) { - plist_del(&q->list, &q->list.plist); + struct task_struct *p = q->task; + /* - * The lock in wake_up_all() is a crucial memory barrier after the - * plist_del() and also before assigning to q->lock_ptr. + * We set q->lock_ptr = NULL _before_ we wake up the task. If + * a non futex wake up happens on another CPU then the task + * might exit and p would dereference a non existing task + * struct. Prevent this by holding a reference on p across the + * wake up. */ - wake_up(&q->waiter); + get_task_struct(p); + + plist_del(&q->list, &q->list.plist); /* - * The waiting task can free the futex_q as soon as this is written, - * without taking any locks. This must come last. - * - * A memory barrier is required here to prevent the following store to - * lock_ptr from getting ahead of the wakeup. Clearing the lock at the - * end of wake_up() does not prevent this store from moving. + * The waiting task can free the futex_q as soon as + * q->lock_ptr = NULL is written, without taking any locks. A + * memory barrier is required here to prevent the following + * store to lock_ptr from getting ahead of the plist_del. */ smp_wmb(); q->lock_ptr = NULL; + + wake_up_state(p, TASK_NORMAL); + put_task_struct(p); } static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) @@ -689,7 +865,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key)) { - if (this->pi_state) { + if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } @@ -739,7 +915,6 @@ retry: retry_private: op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { - u32 dummy; double_unlock_hb(hb1, hb2); @@ -757,7 +932,7 @@ retry_private: goto out_put_keys; } - ret = get_user(dummy, uaddr2); + ret = fault_in_user_writeable(uaddr2); if (ret) goto out_put_keys; @@ -802,24 +977,185 @@ out: return ret; } -/* - * Requeue all waiters hashed on one physical page to another - * physical page. +/** + * requeue_futex() - Requeue a futex_q from one hb to another + * @q: the futex_q to requeue + * @hb1: the source hash_bucket + * @hb2: the target hash_bucket + * @key2: the new key for the requeued futex_q + */ +static inline +void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, union futex_key *key2) +{ + + /* + * If key1 and key2 hash to the same bucket, no need to + * requeue. + */ + if (likely(&hb1->chain != &hb2->chain)) { + plist_del(&q->list, &hb1->chain); + plist_add(&q->list, &hb2->chain); + q->lock_ptr = &hb2->lock; +#ifdef CONFIG_DEBUG_PI_LIST + q->list.plist.lock = &hb2->lock; +#endif + } + get_futex_key_refs(key2); + q->key = *key2; +} + +/** + * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue + * q: the futex_q + * key: the key of the requeue target futex + * + * During futex_requeue, with requeue_pi=1, it is possible to acquire the + * target futex if it is uncontended or via a lock steal. Set the futex_q key + * to the requeue target futex so the waiter can detect the wakeup on the right + * futex, but remove it from the hb and NULL the rt_waiter so it can detect + * atomic lock acquisition. Must be called with the q->lock_ptr held. + */ +static inline +void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) +{ + drop_futex_key_refs(&q->key); + get_futex_key_refs(key); + q->key = *key; + + WARN_ON(plist_node_empty(&q->list)); + plist_del(&q->list, &q->list.plist); + + WARN_ON(!q->rt_waiter); + q->rt_waiter = NULL; + + wake_up_state(q->task, TASK_NORMAL); +} + +/** + * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter + * @pifutex: the user address of the to futex + * @hb1: the from futex hash bucket, must be locked by the caller + * @hb2: the to futex hash bucket, must be locked by the caller + * @key1: the from futex key + * @key2: the to futex key + * @ps: address to store the pi_state pointer + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) + * + * Try and get the lock on behalf of the top waiter if we can do it atomically. + * Wake the top waiter if we succeed. If the caller specified set_waiters, + * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. + * hb1 and hb2 must be held by the caller. + * + * Returns: + * 0 - failed to acquire the lock atomicly + * 1 - acquired the lock + * <0 - error + */ +static int futex_proxy_trylock_atomic(u32 __user *pifutex, + struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, + union futex_key *key1, union futex_key *key2, + struct futex_pi_state **ps, int set_waiters) +{ + struct futex_q *top_waiter = NULL; + u32 curval; + int ret; + + if (get_futex_value_locked(&curval, pifutex)) + return -EFAULT; + + /* + * Find the top_waiter and determine if there are additional waiters. + * If the caller intends to requeue more than 1 waiter to pifutex, + * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, + * as we have means to handle the possible fault. If not, don't set + * the bit unecessarily as it will force the subsequent unlock to enter + * the kernel. + */ + top_waiter = futex_top_waiter(hb1, key1); + + /* There are no waiters, nothing for us to do. */ + if (!top_waiter) + return 0; + + /* + * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in + * the contended case or if set_waiters is 1. The pi_state is returned + * in ps in contended cases. + */ + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, + set_waiters); + if (ret == 1) + requeue_pi_wake_futex(top_waiter, key2); + + return ret; +} + +/** + * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 + * uaddr1: source futex user address + * uaddr2: target futex user address + * nr_wake: number of waiters to wake (must be 1 for requeue_pi) + * nr_requeue: number of waiters to requeue (0-INT_MAX) + * requeue_pi: if we are attempting to requeue from a non-pi futex to a + * pi futex (pi to pi requeue is not supported) + * + * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire + * uaddr2 atomically on behalf of the top waiter. + * + * Returns: + * >=0 - on success, the number of tasks requeued or woken + * <0 - on error */ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval) + int nr_wake, int nr_requeue, u32 *cmpval, + int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; + int drop_count = 0, task_count = 0, ret; + struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; - int ret, drop_count = 0; + u32 curval2; + + if (requeue_pi) { + /* + * requeue_pi requires a pi_state, try to allocate it now + * without any locks in case it fails. + */ + if (refill_pi_state_cache()) + return -ENOMEM; + /* + * requeue_pi must wake as many tasks as it can, up to nr_wake + * + nr_requeue, since it acquires the rt_mutex prior to + * returning to userspace, so as to not leave the rt_mutex with + * waiters and no owner. However, second and third wake-ups + * cannot be predicted as they involve race conditions with the + * first wake and a fault while looking up the pi_state. Both + * pthread_cond_signal() and pthread_cond_broadcast() should + * use nr_wake=1. + */ + if (nr_wake != 1) + return -EINVAL; + } retry: + if (pi_state != NULL) { + /* + * We will have to lookup the pi_state again, so free this one + * to keep the accounting correct. + */ + free_pi_state(pi_state); + pi_state = NULL; + } + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ); + ret = get_futex_key(uaddr2, fshared, &key2, + requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -854,32 +1190,99 @@ retry_private: } } + if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + /* + * Attempt to acquire uaddr2 and wake the top waiter. If we + * intend to requeue waiters, force setting the FUTEX_WAITERS + * bit. We force this here where we are able to easily handle + * faults rather in the requeue loop below. + */ + ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, + &key2, &pi_state, nr_requeue); + + /* + * At this point the top_waiter has either taken uaddr2 or is + * waiting on it. If the former, then the pi_state will not + * exist yet, look it up one more time to ensure we have a + * reference to it. + */ + if (ret == 1) { + WARN_ON(pi_state); + task_count++; + ret = get_futex_value_locked(&curval2, uaddr2); + if (!ret) + ret = lookup_pi_state(curval2, hb2, &key2, + &pi_state); + } + + switch (ret) { + case 0: + break; + case -EFAULT: + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + ret = fault_in_user_writeable(uaddr2); + if (!ret) + goto retry; + goto out; + case -EAGAIN: + /* The owner was exiting, try again. */ + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + cond_resched(); + goto retry; + default: + goto out_unlock; + } + } + head1 = &hb1->chain; plist_for_each_entry_safe(this, next, head1, list) { - if (!match_futex (&this->key, &key1)) + if (task_count - nr_wake >= nr_requeue) + break; + + if (!match_futex(&this->key, &key1)) continue; - if (++ret <= nr_wake) { + + WARN_ON(!requeue_pi && this->rt_waiter); + WARN_ON(requeue_pi && !this->rt_waiter); + + /* + * Wake nr_wake waiters. For requeue_pi, if we acquired the + * lock, we already woke the top_waiter. If not, it will be + * woken by futex_unlock_pi(). + */ + if (++task_count <= nr_wake && !requeue_pi) { wake_futex(this); - } else { - /* - * If key1 and key2 hash to the same bucket, no need to - * requeue. - */ - if (likely(head1 != &hb2->chain)) { - plist_del(&this->list, &hb1->chain); - plist_add(&this->list, &hb2->chain); - this->lock_ptr = &hb2->lock; -#ifdef CONFIG_DEBUG_PI_LIST - this->list.plist.lock = &hb2->lock; -#endif - } - this->key = key2; - get_futex_key_refs(&key2); - drop_count++; + continue; + } - if (ret - nr_wake >= nr_requeue) - break; + /* + * Requeue nr_requeue waiters and possibly one more in the case + * of requeue_pi if we couldn't acquire the lock atomically. + */ + if (requeue_pi) { + /* Prepare the waiter to take the rt_mutex. */ + atomic_inc(&pi_state->refcount); + this->pi_state = pi_state; + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, + this->task, 1); + if (ret == 1) { + /* We got the lock. */ + requeue_pi_wake_futex(this, &key2); + continue; + } else if (ret) { + /* -EDEADLK */ + this->pi_state = NULL; + free_pi_state(pi_state); + goto out_unlock; + } } + requeue_futex(this, hb1, hb2, &key2); + drop_count++; } out_unlock: @@ -899,7 +1302,9 @@ out_put_keys: out_put_key1: put_futex_key(fshared, &key1); out: - return ret; + if (pi_state != NULL) + free_pi_state(pi_state); + return ret ? ret : task_count; } /* The key must be already stored in q->key. */ @@ -907,8 +1312,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) { struct futex_hash_bucket *hb; - init_waitqueue_head(&q->waiter); - get_futex_key_refs(&q->key); hb = hash_futex(&q->key); q->lock_ptr = &hb->lock; @@ -1097,7 +1500,7 @@ retry: handle_fault: spin_unlock(q->lock_ptr); - ret = get_user(uval, uaddr); + ret = fault_in_user_writeable(uaddr); spin_lock(q->lock_ptr); @@ -1119,35 +1522,149 @@ handle_fault: */ #define FLAGS_SHARED 0x01 #define FLAGS_CLOCKRT 0x02 +#define FLAGS_HAS_TIMEOUT 0x04 static long futex_wait_restart(struct restart_block *restart); -static int futex_wait(u32 __user *uaddr, int fshared, - u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +/** + * fixup_owner() - Post lock pi_state and corner case management + * @uaddr: user address of the futex + * @fshared: whether the futex is shared (1) or not (0) + * @q: futex_q (contains pi_state and access to the rt_mutex) + * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) + * + * After attempting to lock an rt_mutex, this function is called to cleanup + * the pi_state owner as well as handle race conditions that may allow us to + * acquire the lock. Must be called with the hb lock held. + * + * Returns: + * 1 - success, lock taken + * 0 - success, lock not taken + * <0 - on error (-EFAULT) + */ +static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, + int locked) { - struct task_struct *curr = current; - struct restart_block *restart; - DECLARE_WAITQUEUE(wait, curr); - struct futex_hash_bucket *hb; - struct futex_q q; - u32 uval; - int ret; - struct hrtimer_sleeper t; - int rem = 0; + struct task_struct *owner; + int ret = 0; - if (!bitset) - return -EINVAL; + if (locked) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case: + */ + if (q->pi_state->owner != current) + ret = fixup_pi_state_owner(uaddr, q, current, fshared); + goto out; + } - q.pi_state = NULL; - q.bitset = bitset; -retry: - q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ); - if (unlikely(ret != 0)) + /* + * Catch the rare case, where the lock was released when we were on the + * way back before we locked the hash bucket. + */ + if (q->pi_state->owner == current) { + /* + * Try to get the rt_mutex now. This might fail as some other + * task acquired the rt_mutex after we removed ourself from the + * rt_mutex waiters list. + */ + if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { + locked = 1; + goto out; + } + + /* + * pi_state is incorrect, some other task did a lock steal and + * we returned due to timeout or signal without taking the + * rt_mutex. Too late. We can access the rt_mutex_owner without + * locking, as the other task is now blocked on the hash bucket + * lock. Fix the state up. + */ + owner = rt_mutex_owner(&q->pi_state->pi_mutex); + ret = fixup_pi_state_owner(uaddr, q, owner, fshared); goto out; + } -retry_private: - hb = queue_lock(&q); + /* + * Paranoia check. If we did not take the lock, then we should not be + * the owner, nor the pending owner, of the rt_mutex. + */ + if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) + printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " + "pi-state %p\n", ret, + q->pi_state->pi_mutex.owner, + q->pi_state->owner); + +out: + return ret ? ret : locked; +} + +/** + * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal + * @hb: the futex hash bucket, must be locked by the caller + * @q: the futex_q to queue up on + * @timeout: the prepared hrtimer_sleeper, or null for no timeout + */ +static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, + struct hrtimer_sleeper *timeout) +{ + queue_me(q, hb); + + /* + * There might have been scheduling since the queue_me(), as we + * cannot hold a spinlock across the get_user() in case it + * faults, and we cannot just set TASK_INTERRUPTIBLE state when + * queueing ourselves into the futex hash. This code thus has to + * rely on the futex_wake() code removing us from hash when it + * wakes us up. + */ + set_current_state(TASK_INTERRUPTIBLE); + + /* Arm the timer */ + if (timeout) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + /* + * !plist_node_empty() is safe here without any lock. + * q.lock_ptr != 0 is not safe, because of ordering against wakeup. + */ + if (likely(!plist_node_empty(&q->list))) { + /* + * If the timer has already expired, current will already be + * flagged for rescheduling. Only call schedule if there + * is no timeout, or if it has yet to expire. + */ + if (!timeout || timeout->task) + schedule(); + } + __set_current_state(TASK_RUNNING); +} + +/** + * futex_wait_setup() - Prepare to wait on a futex + * @uaddr: the futex userspace address + * @val: the expected value + * @fshared: whether the futex is shared (1) or not (0) + * @q: the associated futex_q + * @hb: storage for hash_bucket pointer to be returned to caller + * + * Setup the futex_q and locate the hash_bucket. Get the futex value and + * compare it with the expected value. Handle atomic faults internally. + * Return with the hb lock held and a q.key reference on success, and unlocked + * with no q.key reference on failure. + * + * Returns: + * 0 - uaddr contains val and hb has been locked + * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked + */ +static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, + struct futex_q *q, struct futex_hash_bucket **hb) +{ + u32 uval; + int ret; /* * Access the page AFTER the hash-bucket is locked. @@ -1165,95 +1682,83 @@ retry_private: * A consequence is that futex_wait() can return zero and absorb * a wakeup when *uaddr != val on entry to the syscall. This is * rare, but normal. - * - * For shared futexes, we hold the mmap semaphore, so the mapping - * cannot have changed since we looked it up in get_futex_key. */ +retry: + q->key = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); + if (unlikely(ret != 0)) + return ret; + +retry_private: + *hb = queue_lock(q); + ret = get_futex_value_locked(&uval, uaddr); - if (unlikely(ret)) { - queue_unlock(&q, hb); + if (ret) { + queue_unlock(q, *hb); ret = get_user(uval, uaddr); if (ret) - goto out_put_key; + goto out; if (!fshared) goto retry_private; - put_futex_key(fshared, &q.key); + put_futex_key(fshared, &q->key); goto retry; } - ret = -EWOULDBLOCK; - if (unlikely(uval != val)) { - queue_unlock(&q, hb); - goto out_put_key; - } - /* Only actually queue if *uaddr contained val. */ - queue_me(&q, hb); + if (uval != val) { + queue_unlock(q, *hb); + ret = -EWOULDBLOCK; + } - /* - * There might have been scheduling since the queue_me(), as we - * cannot hold a spinlock across the get_user() in case it - * faults, and we cannot just set TASK_INTERRUPTIBLE state when - * queueing ourselves into the futex hash. This code thus has to - * rely on the futex_wake() code removing us from hash when it - * wakes us up. - */ +out: + if (ret) + put_futex_key(fshared, &q->key); + return ret; +} - /* add_wait_queue is the barrier after __set_current_state. */ - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&q.waiter, &wait); - /* - * !plist_node_empty() is safe here without any lock. - * q.lock_ptr != 0 is not safe, because of ordering against wakeup. - */ - if (likely(!plist_node_empty(&q.list))) { - if (!abs_time) - schedule(); - else { - hrtimer_init_on_stack(&t.timer, - clockrt ? CLOCK_REALTIME : - CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(&t, current); - hrtimer_set_expires_range_ns(&t.timer, *abs_time, - current->timer_slack_ns); - - hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&t.timer)) - t.task = NULL; +static int futex_wait(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +{ + struct hrtimer_sleeper timeout, *to = NULL; + struct restart_block *restart; + struct futex_hash_bucket *hb; + struct futex_q q; + int ret; - /* - * the timer could have already expired, in which - * case current would be flagged for rescheduling. - * Don't bother calling schedule. - */ - if (likely(t.task)) - schedule(); + if (!bitset) + return -EINVAL; - hrtimer_cancel(&t.timer); + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = NULL; - /* Flag if a timeout occured */ - rem = (t.task == NULL); + if (abs_time) { + to = &timeout; - destroy_hrtimer_on_stack(&t.timer); - } + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); } - __set_current_state(TASK_RUNNING); - /* - * NOTE: we don't remove ourselves from the waitqueue because - * we are the only user of it. - */ + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) + goto out; + + /* queue_me and wait for wakeup, timeout, or a signal. */ + futex_wait_queue_me(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ ret = 0; if (!unqueue_me(&q)) goto out_put_key; ret = -ETIMEDOUT; - if (rem) + if (to && !to->task) goto out_put_key; /* @@ -1270,7 +1775,7 @@ retry_private: restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; - restart->futex.flags = 0; + restart->futex.flags = FLAGS_HAS_TIMEOUT; if (fshared) restart->futex.flags |= FLAGS_SHARED; @@ -1282,6 +1787,10 @@ retry_private: out_put_key: put_futex_key(fshared, &q.key); out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } return ret; } @@ -1290,13 +1799,16 @@ static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; int fshared = 0; - ktime_t t; + ktime_t t, *tp = NULL; - t.tv64 = restart->futex.time; + if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { + t.tv64 = restart->futex.time; + tp = &t; + } restart->fn = do_no_restart_syscall; if (restart->futex.flags & FLAGS_SHARED) fshared = 1; - return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, + return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, restart->futex.bitset, restart->futex.flags & FLAGS_CLOCKRT); } @@ -1312,11 +1824,9 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; - struct task_struct *curr = current; struct futex_hash_bucket *hb; - u32 uval, newval, curval; struct futex_q q; - int ret, lock_taken, ownerdied = 0; + int res, ret; if (refill_pi_state_cache()) return -ENOMEM; @@ -1330,6 +1840,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, } q.pi_state = NULL; + q.rt_waiter = NULL; retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); @@ -1339,81 +1850,15 @@ retry: retry_private: hb = queue_lock(&q); -retry_locked: - ret = lock_taken = 0; - - /* - * To avoid races, we attempt to take the lock here again - * (by doing a 0 -> TID atomic cmpxchg), while holding all - * the locks. It will most likely not succeed. - */ - newval = task_pid_vnr(current); - - curval = cmpxchg_futex_value_locked(uaddr, 0, newval); - - if (unlikely(curval == -EFAULT)) - goto uaddr_faulted; - - /* - * Detect deadlocks. In case of REQUEUE_PI this is a valid - * situation and we return success to user space. - */ - if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { - ret = -EDEADLK; - goto out_unlock_put_key; - } - - /* - * Surprise - we got the lock. Just return to userspace: - */ - if (unlikely(!curval)) - goto out_unlock_put_key; - - uval = curval; - - /* - * Set the WAITERS flag, so the owner will know it has someone - * to wake at next unlock - */ - newval = curval | FUTEX_WAITERS; - - /* - * There are two cases, where a futex might have no owner (the - * owner TID is 0): OWNER_DIED. We take over the futex in this - * case. We also do an unconditional take over, when the owner - * of the futex died. - * - * This is safe as we are protected by the hash bucket lock ! - */ - if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { - /* Keep the OWNER_DIED bit */ - newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current); - ownerdied = 0; - lock_taken = 1; - } - - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (unlikely(curval == -EFAULT)) - goto uaddr_faulted; - if (unlikely(curval != uval)) - goto retry_locked; - - /* - * We took the lock due to owner died take over. - */ - if (unlikely(lock_taken)) - goto out_unlock_put_key; - - /* - * We dont have the lock. Look up the PI state (or create it if - * we are the first waiter): - */ - ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); - + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); if (unlikely(ret)) { switch (ret) { - + case 1: + /* We got the lock. */ + ret = 0; + goto out_unlock_put_key; + case -EFAULT: + goto uaddr_faulted; case -EAGAIN: /* * Task is exiting and we just wait for the @@ -1423,25 +1868,6 @@ retry_locked: put_futex_key(fshared, &q.key); cond_resched(); goto retry; - - case -ESRCH: - /* - * No owner found for this futex. Check if the - * OWNER_DIED bit is set to figure out whether - * this is a robust futex or not. - */ - if (get_futex_value_locked(&curval, uaddr)) - goto uaddr_faulted; - - /* - * We simply start over in case of a robust - * futex. The code above will take the futex - * and return happy. - */ - if (curval & FUTEX_OWNER_DIED) { - ownerdied = 1; - goto retry_locked; - } default: goto out_unlock_put_key; } @@ -1465,71 +1891,21 @@ retry_locked: } spin_lock(q.lock_ptr); - - if (!ret) { - /* - * Got the lock. We might not be the anticipated owner - * if we did a lock-steal - fix up the PI-state in - * that case: - */ - if (q.pi_state->owner != curr) - ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); - } else { - /* - * Catch the rare case, where the lock was released - * when we were on the way back before we locked the - * hash bucket. - */ - if (q.pi_state->owner == curr) { - /* - * Try to get the rt_mutex now. This might - * fail as some other task acquired the - * rt_mutex after we removed ourself from the - * rt_mutex waiters list. - */ - if (rt_mutex_trylock(&q.pi_state->pi_mutex)) - ret = 0; - else { - /* - * pi_state is incorrect, some other - * task did a lock steal and we - * returned due to timeout or signal - * without taking the rt_mutex. Too - * late. We can access the - * rt_mutex_owner without locking, as - * the other task is now blocked on - * the hash bucket lock. Fix the state - * up. - */ - struct task_struct *owner; - int res; - - owner = rt_mutex_owner(&q.pi_state->pi_mutex); - res = fixup_pi_state_owner(uaddr, &q, owner, - fshared); - - /* propagate -EFAULT, if the fixup failed */ - if (res) - ret = res; - } - } else { - /* - * Paranoia check. If we did not take the lock - * in the trylock above, then we should not be - * the owner of the rtmutex, neither the real - * nor the pending one: - */ - if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr) - printk(KERN_ERR "futex_lock_pi: ret = %d " - "pi-mutex: %p pi-state %p\n", ret, - q.pi_state->pi_mutex.owner, - q.pi_state->owner); - } - } + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it acquired + * the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock it and return the fault to userspace. + * If fixup_owner() faulted and was unable to handle the fault, unlock + * it and return the fault to userspace. */ if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) rt_mutex_unlock(&q.pi_state->pi_mutex); @@ -1537,9 +1913,7 @@ retry_locked: /* Unqueue and drop the lock */ unqueue_me_pi(&q); - if (to) - destroy_hrtimer_on_stack(&to->timer); - return ret != -EINTR ? ret : -ERESTARTNOINTR; + goto out; out_unlock_put_key: queue_unlock(&q, hb); @@ -1549,19 +1923,12 @@ out_put_key: out: if (to) destroy_hrtimer_on_stack(&to->timer); - return ret; + return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: - /* - * We have to r/w *(int __user *)uaddr, and we have to modify it - * atomically. Therefore, if we continue to fault after get_user() - * below, we need to handle the fault ourselves, while still holding - * the mmap_sem. This can occur if the uaddr is under contention as - * we have to drop the mmap_sem in order to call get_user(). - */ queue_unlock(&q, hb); - ret = get_user(uval, uaddr); + ret = fault_in_user_writeable(uaddr); if (ret) goto out_put_key; @@ -1572,7 +1939,6 @@ uaddr_faulted: goto retry; } - /* * Userspace attempted a TID -> 0 atomic transition, and failed. * This is the in-kernel slowpath: we look up the PI state (if any), @@ -1657,23 +2023,239 @@ out: return ret; pi_faulted: - /* - * We have to r/w *(int __user *)uaddr, and we have to modify it - * atomically. Therefore, if we continue to fault after get_user() - * below, we need to handle the fault ourselves, while still holding - * the mmap_sem. This can occur if the uaddr is under contention as - * we have to drop the mmap_sem in order to call get_user(). - */ spin_unlock(&hb->lock); put_futex_key(fshared, &key); - ret = get_user(uval, uaddr); + ret = fault_in_user_writeable(uaddr); if (!ret) goto retry; return ret; } +/** + * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex + * @hb: the hash_bucket futex_q was original enqueued on + * @q: the futex_q woken while waiting to be requeued + * @key2: the futex_key of the requeue target futex + * @timeout: the timeout associated with the wait (NULL if none) + * + * Detect if the task was woken on the initial futex as opposed to the requeue + * target futex. If so, determine if it was a timeout or a signal that caused + * the wakeup and return the appropriate error code to the caller. Must be + * called with the hb lock held. + * + * Returns + * 0 - no early wakeup detected + * <0 - -ETIMEDOUT or -ERESTARTNOINTR + */ +static inline +int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, + struct futex_q *q, union futex_key *key2, + struct hrtimer_sleeper *timeout) +{ + int ret = 0; + + /* + * With the hb lock held, we avoid races while we process the wakeup. + * We only need to hold hb (and not hb2) to ensure atomicity as the + * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. + * It can't be requeued from uaddr2 to something else since we don't + * support a PI aware source futex for requeue. + */ + if (!match_futex(&q->key, key2)) { + WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); + /* + * We were woken prior to requeue by a timeout or a signal. + * Unqueue the futex_q and determine which it was. + */ + plist_del(&q->list, &q->list.plist); + drop_futex_key_refs(&q->key); + + if (timeout && !timeout->task) + ret = -ETIMEDOUT; + else + ret = -ERESTARTNOINTR; + } + return ret; +} + +/** + * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 + * @uaddr: the futex we initialyl wait on (non-pi) + * @fshared: whether the futexes are shared (1) or not (0). They must be + * the same type, no requeueing from private to shared, etc. + * @val: the expected value of uaddr + * @abs_time: absolute timeout + * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. + * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) + * @uaddr2: the pi futex we will take prior to returning to user-space + * + * The caller will wait on uaddr and will be requeued by futex_requeue() to + * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and + * complete the acquisition of the rt_mutex prior to returning to userspace. + * This ensures the rt_mutex maintains an owner when it has waiters; without + * one, the pi logic wouldn't know which task to boost/deboost, if there was a + * need to. + * + * We call schedule in futex_wait_queue_me() when we enqueue and return there + * via the following: + * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() + * 2) wakeup on uaddr2 after a requeue and subsequent unlock + * 3) signal (before or after requeue) + * 4) timeout (before or after requeue) + * + * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. + * + * If 2, we may then block on trying to take the rt_mutex and return via: + * 5) successful lock + * 6) signal + * 7) timeout + * 8) other lock acquisition failure + * + * If 6, we setup a restart_block with futex_lock_pi() as the function. + * + * If 4 or 7, we cleanup and return with -ETIMEDOUT. + * + * Returns: + * 0 - On success + * <0 - On error + */ +static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, + int clockrt, u32 __user *uaddr2) +{ + struct hrtimer_sleeper timeout, *to = NULL; + struct rt_mutex_waiter rt_waiter; + struct rt_mutex *pi_mutex = NULL; + struct futex_hash_bucket *hb; + union futex_key key2; + struct futex_q q; + int res, ret; + + if (!bitset) + return -EINVAL; + + if (abs_time) { + to = &timeout; + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } + + /* + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ + debug_rt_mutex_init_waiter(&rt_waiter); + rt_waiter.task = NULL; + + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = &rt_waiter; + + key2 = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); + if (unlikely(ret != 0)) + goto out; + + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) + goto out_key2; + + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ + futex_wait_queue_me(hb, &q, to); + + spin_lock(&hb->lock); + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); + spin_unlock(&hb->lock); + if (ret) + goto out_put_keys; + + /* + * In order for us to be here, we know our q.key == key2, and since + * we took the hb->lock above, we also know that futex_requeue() has + * completed and we no longer have to concern ourselves with a wakeup + * race with the atomic proxy lock acquition by the requeue code. + */ + + /* Check if the requeue code acquired the second futex for us. */ + if (!q.rt_waiter) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case. + */ + if (q.pi_state && (q.pi_state->owner != current)) { + spin_lock(q.lock_ptr); + ret = fixup_pi_state_owner(uaddr2, &q, current, + fshared); + spin_unlock(q.lock_ptr); + } + } else { + /* + * We have been woken up by futex_unlock_pi(), a timeout, or a + * signal. futex_unlock_pi() will not destroy the lock_ptr nor + * the pi_state. + */ + WARN_ON(!&q.pi_state); + pi_mutex = &q.pi_state->pi_mutex; + ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); + debug_rt_mutex_free_waiter(&rt_waiter); + + spin_lock(q.lock_ptr); + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr2, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it + * acquired the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; + + /* Unqueue and drop the lock. */ + unqueue_me_pi(&q); + } + + /* + * If fixup_pi_state_owner() faulted and was unable to handle the + * fault, unlock the rt_mutex and return the fault to userspace. + */ + if (ret == -EFAULT) { + if (rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + } else if (ret == -EINTR) { + /* + * We've already been requeued, but we have no way to + * restart by calling futex_lock_pi() directly. We + * could restart the syscall, but that will look at + * the user space value and return right away. So we + * drop back with EWOULDBLOCK to tell user space that + * "val" has been changed. That's the same what the + * restart of the syscall would do in + * futex_wait_setup(). + */ + ret = -EWOULDBLOCK; + } + +out_put_keys: + put_futex_key(fshared, &q.key); +out_key2: + put_futex_key(fshared, &key2); + +out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + return ret; +} + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. @@ -1896,7 +2478,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, fshared = 1; clockrt = op & FUTEX_CLOCK_REALTIME; - if (clockrt && cmd != FUTEX_WAIT_BITSET) + if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; switch (cmd) { @@ -1911,10 +2493,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake(uaddr, fshared, val, val3); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 0); break; case FUTEX_WAKE_OP: ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); @@ -1931,6 +2514,15 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (futex_cmpxchg_enabled) ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; + case FUTEX_WAIT_REQUEUE_PI: + val3 = FUTEX_BITSET_MATCH_ANY; + ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, + clockrt, uaddr2); + break; + case FUTEX_CMP_REQUEUE_PI: + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 1); + break; default: ret = -ENOSYS; } @@ -1948,7 +2540,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET)) { + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) @@ -1960,11 +2553,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, tp = &t; } /* - * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. + * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_WAKE_OP) + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 69d9cb9..fcd107a 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * assigned pending owner [which might not have taken the * lock yet]: */ -static inline int try_to_steal_lock(struct rt_mutex *lock) +static inline int try_to_steal_lock(struct rt_mutex *lock, + struct task_struct *task) { struct task_struct *pendowner = rt_mutex_owner(lock); struct rt_mutex_waiter *next; @@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) if (!rt_mutex_owner_pending(lock)) return 0; - if (pendowner == current) + if (pendowner == task) return 1; spin_lock_irqsave(&pendowner->pi_lock, flags); - if (current->prio >= pendowner->prio) { + if (task->prio >= pendowner->prio) { spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 0; } @@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) * We are going to steal the lock and a waiter was * enqueued on the pending owners pi_waiters queue. So * we have to enqueue this waiter into - * current->pi_waiters list. This covers the case, - * where current is boosted because it holds another + * task->pi_waiters list. This covers the case, + * where task is boosted because it holds another * lock and gets unboosted because the booster is * interrupted, so we would delay a waiter with higher - * priority as current->normal_prio. + * priority as task->normal_prio. * * Note: in the rare case of a SCHED_OTHER task changing * its priority and thus stealing the lock, next->task - * might be current: + * might be task: */ - if (likely(next->task != current)) { - spin_lock_irqsave(¤t->pi_lock, flags); - plist_add(&next->pi_list_entry, ¤t->pi_waiters); - __rt_mutex_adjust_prio(current); - spin_unlock_irqrestore(¤t->pi_lock, flags); + if (likely(next->task != task)) { + spin_lock_irqsave(&task->pi_lock, flags); + plist_add(&next->pi_list_entry, &task->pi_waiters); + __rt_mutex_adjust_prio(task); + spin_unlock_irqrestore(&task->pi_lock, flags); } return 1; } @@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ mark_rt_mutex_waiters(lock); - if (rt_mutex_owner(lock) && !try_to_steal_lock(lock)) + if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) return 0; /* We got the lock. */ @@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) { struct task_struct *owner = rt_mutex_owner(lock); @@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, unsigned long flags; int chain_walk = 0, res; - spin_lock_irqsave(¤t->pi_lock, flags); - __rt_mutex_adjust_prio(current); - waiter->task = current; + spin_lock_irqsave(&task->pi_lock, flags); + __rt_mutex_adjust_prio(task); + waiter->task = task; waiter->lock = lock; - plist_node_init(&waiter->list_entry, current->prio); - plist_node_init(&waiter->pi_list_entry, current->prio); + plist_node_init(&waiter->list_entry, task->prio); + plist_node_init(&waiter->pi_list_entry, task->prio); /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) top_waiter = rt_mutex_top_waiter(lock); plist_add(&waiter->list_entry, &lock->wait_list); - current->pi_blocked_on = waiter; + task->pi_blocked_on = waiter; - spin_unlock_irqrestore(¤t->pi_lock, flags); + spin_unlock_irqrestore(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { spin_lock_irqsave(&owner->pi_lock, flags); @@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, spin_unlock(&lock->wait_lock); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - current); + task); spin_lock(&lock->wait_lock); @@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task) rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); } -/* - * Slow path lock function: +/** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take + * @state: the state the task should block in (TASK_INTERRUPTIBLE + * or TASK_UNINTERRUPTIBLE) + * @timeout: the pre-initialized and started timer, or NULL for none + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: passed to task_blocks_on_rt_mutex + * + * lock->wait_lock must be held by the caller. */ static int __sched -rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - int detect_deadlock) +__rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + struct rt_mutex_waiter *waiter, + int detect_deadlock) { - struct rt_mutex_waiter waiter; int ret = 0; - debug_rt_mutex_init_waiter(&waiter); - waiter.task = NULL; - - spin_lock(&lock->wait_lock); - - /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock)) { - spin_unlock(&lock->wait_lock); - return 0; - } - - set_current_state(state); - - /* Setup the timer, when timeout != NULL */ - if (unlikely(timeout)) { - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&timeout->timer)) - timeout->task = NULL; - } - for (;;) { /* Try to acquire the lock: */ if (try_to_take_rt_mutex(lock)) @@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, } /* - * waiter.task is NULL the first time we come here and + * waiter->task is NULL the first time we come here and * when we have been woken up by the previous owner * but the lock got stolen by a higher prio task. */ - if (!waiter.task) { - ret = task_blocks_on_rt_mutex(lock, &waiter, + if (!waiter->task) { + ret = task_blocks_on_rt_mutex(lock, waiter, current, detect_deadlock); /* * If we got woken up by the owner then start loop * all over without going into schedule to try * to get the lock now: */ - if (unlikely(!waiter.task)) { + if (unlikely(!waiter->task)) { /* * Reset the return value. We might * have returned with -EDEADLK and the @@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, spin_unlock(&lock->wait_lock); - debug_rt_mutex_print_deadlock(&waiter); + debug_rt_mutex_print_deadlock(waiter); - if (waiter.task) + if (waiter->task) schedule_rt_mutex(lock); spin_lock(&lock->wait_lock); set_current_state(state); } + return ret; +} + +/* + * Slow path lock function: + */ +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + int detect_deadlock) +{ + struct rt_mutex_waiter waiter; + int ret = 0; + + debug_rt_mutex_init_waiter(&waiter); + waiter.task = NULL; + + spin_lock(&lock->wait_lock); + + /* Try to acquire the lock again: */ + if (try_to_take_rt_mutex(lock)) { + spin_unlock(&lock->wait_lock); + return 0; + } + + set_current_state(state); + + /* Setup the timer, when timeout != NULL */ + if (unlikely(timeout)) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, + detect_deadlock); + set_current_state(TASK_RUNNING); if (unlikely(waiter.task)) @@ -864,9 +891,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); /** - * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible - * the timeout structure is provided - * by the caller + * rt_mutex_timed_lock - lock a rt_mutex interruptible + * the timeout structure is provided + * by the caller * * @lock: the rt_mutex to be locked * @timeout: timeout structure or NULL (no timeout) @@ -875,7 +902,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); * Returns: * 0 on success * -EINTR when interrupted by a signal - * -ETIMEOUT when the timeout expired + * -ETIMEDOUT when the timeout expired * -EDEADLK when the lock would deadlock (when deadlock detection is on) */ int @@ -913,7 +940,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) } EXPORT_SYMBOL_GPL(rt_mutex_unlock); -/*** +/** * rt_mutex_destroy - mark a mutex unusable * @lock: the mutex to be destroyed * @@ -986,6 +1013,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, } /** + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for FUTEX_REQUEUE_PI support. + */ +int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) +{ + int ret; + + spin_lock(&lock->wait_lock); + + mark_rt_mutex_waiters(lock); + + if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { + /* We got the lock for task. */ + debug_rt_mutex_lock(lock); + + rt_mutex_set_owner(lock, task, 0); + + rt_mutex_deadlock_account_lock(lock, task); + return 1; + } + + ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); + + + if (ret && !waiter->task) { + /* + * Reset the return value. We might have + * returned with -EDEADLK and the owner + * released the lock while we were walking the + * pi chain. Let the waiter sort it out. + */ + ret = 0; + } + spin_unlock(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + + return ret; +} + +/** * rt_mutex_next_owner - return the next owner of the lock * * @lock: the rt lock query @@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) return rt_mutex_top_waiter(lock)->task; } + +/** + * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * @lock: the rt_mutex we were woken on + * @to: the timeout, null if none. hrtimer should already have + * been started. + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Complete the lock acquisition started our behalf by another thread. + * + * Returns: + * 0 - success + * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * + * Special API call for PI-futex requeue support + */ +int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock) +{ + int ret; + + spin_lock(&lock->wait_lock); + + set_current_state(TASK_INTERRUPTIBLE); + + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, + detect_deadlock); + + set_current_state(TASK_RUNNING); + + if (unlikely(waiter->task)) + remove_waiter(lock, waiter); + + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); + + spin_unlock(&lock->wait_lock); + + /* + * Readjust priority, when we did not get the lock. We might have been + * the pending owner and boosted. Since we did not take the lock, the + * PI boost has to go. + */ + if (unlikely(ret)) + rt_mutex_adjust_prio(current); + + return ret; +} diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index e124bf5..97a2f81 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); +extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, + int detect_deadlock); +extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" diff --git a/kernel/up.c b/kernel/up.c index 1ff27a2..961144b 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -10,11 +10,13 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int wait) { + unsigned long flags; + WARN_ON(cpu != 0); - local_irq_disable(); + local_irq_save(flags); (func)(info); - local_irq_enable(); + local_irq_restore(flags); return 0; } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index d9233ec..2681dfa 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -216,7 +216,7 @@ minstrel_get_next_sample(struct minstrel_sta_info *mi) unsigned int sample_ndx; sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); mi->sample_idx++; - if (mi->sample_idx > (mi->n_rates - 2)) { + if ((int) mi->sample_idx > (mi->n_rates - 2)) { mi->sample_idx = 0; mi->sample_column++; if (mi->sample_column >= SAMPLE_COLUMNS)