From f8dfd5ed149ae340451f25847b434297c20d4645 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 19 Apr 2008 19:19:54 +0200 Subject: x86: KGDB build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 24362ecf5f9a..f47f0eb886b8 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,11 +46,7 @@ #include #include -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif +#include /* * Put the error code here just in case the user cares: -- cgit v1.2.3 From 4a3575fd436aa98957184afd745e4ada8f1542d8 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Mon, 25 Feb 2008 15:18:37 +0800 Subject: x86: EFI_PAGE_SHIFT fix Make x86 EFI code works when EFI_PAGE_SHIFT != PAGE_SHIFT. The memrage_efi_to_native() provided in this patch can be used on other EFI platform such as IA64 too. This patch has been tested on Intel x86_64 platform with EFI 64/32 firmware. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 18 +++++++++++++----- arch/x86/kernel/efi_64.c | 12 ++++++------ include/linux/efi.h | 7 +++++++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 759e02bec070..77d424cf68b3 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; + u64 addr, npages; /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void) if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; - set_memory_x(md->virt_addr, md->num_pages); + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_x(addr, npages); } } @@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab; + u64 end, systab, addr, npages; void *p, *va; efi.systab = NULL; @@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if ((end >> PAGE_SHIFT) <= max_pfn_mapped) + if (PFN_UP(end) <= max_pfn_mapped) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); @@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) - set_memory_uc(md->virt_addr, md->num_pages); + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d143a1e76b30..d0060fdcccac 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void) void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped; + static unsigned pages_mapped __initdata; unsigned i, pages; + unsigned long offset; - /* phys_addr and size must be page aligned */ - if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) - return NULL; + pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; - pages = size >> PAGE_SHIFT; if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; @@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) } return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)); + (pages_mapped - pages)) + offset; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 14813b595802..a5f359a7ad0e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -394,4 +395,10 @@ struct efi_generic_dev_path { u16 length; } __attribute ((packed)); +static inline void memrange_efi_to_native(u64 *addr, u64 *npages) +{ + *npages = PFN_UP(*addr + (*npages< Date: Tue, 18 Mar 2008 12:51:22 -0700 Subject: x86_64: do not reserve ramdisk two times ramdisk is reserved via reserve_early in x86_64_start_kernel, later early_res_to_bootmem() will convert to reservation in bootmem. so don't need to reserve that again. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/setup_64.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d6d54faa84df..993c76773256 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data) reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); +#ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data) unsigned long ramdisk_end = ramdisk_image + ramdisk_size; reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } +#endif reserve_ebda_region(); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 674ef3510cdf..0aa291bff4e0 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -420,11 +420,14 @@ void __init setup_arch(char **cmdline_p) unsigned long end_of_mem = end_pfn << PAGE_SHIFT; if (ramdisk_end <= end_of_mem) { - reserve_bootmem_generic(ramdisk_image, ramdisk_size); + /* + * don't need to reserve again, already reserved early + * in x86_64_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; } else { - /* Assumes everything on node 0 */ free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", -- cgit v1.2.3 From 8ce116e5993cf64729a4d2b3dc2c0f072852654b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 08:52:16 +0100 Subject: x86: clean up cpu capabilities accesses, p4-clockmod.c Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 14791ec55cfd..199e4e05e5dc 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void) if (c->x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) + if (!test_cpu_cap(c, X86_FEATURE_ACPI) || + !test_cpu_cap(c, X86_FEATURE_ACC)) return -ENODEV; ret = cpufreq_register_driver(&p4clockmod_driver); -- cgit v1.2.3 From a7d5ac87b220829bb077cdc8e01c4fd4714ae41e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 4 Mar 2008 22:05:27 -0800 Subject: x86: pageattr.c fix shadowed variable warning irqs_disabled() uses flags internally, use _flags to avoid shadowing code calling into this macro. Introduced between 2.6.25-rc3 and -rc4 Fixes the sparse warning: arch/x86/mm/pageattr.c:383:21: warning: symbol 'flags' shadows an earlier one arch/x86/mm/pageattr.c:369:16: originally declared here Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/irqflags.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 412e025bc5c7..e600c4e9b8c5 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -84,10 +84,10 @@ #define irqs_disabled() \ ({ \ - unsigned long flags; \ + unsigned long _flags; \ \ - raw_local_save_flags(flags); \ - raw_irqs_disabled_flags(flags); \ + raw_local_save_flags(_flags); \ + raw_irqs_disabled_flags(_flags); \ }) #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) -- cgit v1.2.3 From cf9b111c170733dde39139e8989b676ec8b81573 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sat, 8 Mar 2008 18:15:06 +0800 Subject: x86: remove pointless comments Remove old comments that include the old arch/i386 directory. Signed-off-by: WANG Cong Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/a20.c | 2 -- arch/x86/boot/apm.c | 2 -- arch/x86/boot/bitops.h | 2 -- arch/x86/boot/boot.h | 2 -- arch/x86/boot/cmdline.c | 2 -- arch/x86/boot/copy.S | 2 -- arch/x86/boot/cpucheck.c | 2 -- arch/x86/boot/edd.c | 2 -- arch/x86/boot/install.sh | 2 -- arch/x86/boot/main.c | 2 -- arch/x86/boot/mca.c | 2 -- arch/x86/boot/memory.c | 2 -- arch/x86/boot/pm.c | 2 -- arch/x86/boot/pmjump.S | 2 -- arch/x86/boot/printf.c | 2 -- arch/x86/boot/string.c | 2 -- arch/x86/boot/tty.c | 2 -- arch/x86/boot/version.c | 2 -- arch/x86/boot/video-bios.c | 2 -- arch/x86/boot/video-vesa.c | 2 -- arch/x86/boot/video-vga.c | 2 -- arch/x86/boot/video.c | 2 -- arch/x86/boot/video.h | 2 -- arch/x86/boot/voyager.c | 2 -- arch/x86/kernel/acpi/cstate.c | 2 -- arch/x86/kernel/acpi/processor.c | 2 -- arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 - arch/x86/kernel/entry_32.S | 1 - arch/x86/kernel/head_32.S | 1 - arch/x86/mach-visws/visws_apic.c | 2 -- arch/x86/mach-voyager/voyager_basic.c | 2 -- arch/x86/mach-voyager/voyager_cat.c | 2 -- arch/x86/mach-voyager/voyager_smp.c | 2 -- arch/x86/mach-voyager/voyager_thread.c | 2 -- arch/x86/mm/init_32.c | 1 - arch/x86/mm/pgtable_32.c | 4 ---- arch/x86/video/fbdev.c | 1 - 37 files changed, 71 deletions(-) diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 31348d054fca..90943f83e84d 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/a20.c - * * Enable A20 gate (return -1 on failure) */ diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index c117c7fb859c..7aa6033001f9 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -12,8 +12,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/apm.c - * * Get APM BIOS information */ diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 8dcc8dc7db88..878e4b9940d9 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/bitops.h - * * Very simple bitops for the boot code. */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 09578070bfba..a34b9982c7cb 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/boot.h - * * Header file for the real-mode kernel code */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 680408a0f463..a1d35634bce0 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cmdline.c - * * Simple command-line parser for early boot. */ diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index ef127e56a3cf..ef50c84e8b4b 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/copy.S - * * Memory copy routines */ diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 2462c88689ed..7804389ee005 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cpucheck.c - * * Check for obligatory CPU features and abort if the features are not * present. This code should be compilable as 16-, 32- or 64-bit * code, so be very careful with types and inline assembly. diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 8721dc46a0b6..d84a48ece785 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/edd.c - * * Get EDD BIOS disk information */ diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 88d77761d01b..8d60ee15dfd9 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh @@ -1,7 +1,5 @@ #!/bin/sh # -# arch/i386/boot/install.sh -# # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive # for more details. diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 7828da5cfd07..77569a4a3be1 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/main.c - * * Main module for the real-mode kernel code */ diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 68222f2d4b67..911eaae5d696 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/mca.c - * * Get the MCA system description table */ diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index e77d89f9e8aa..acad32eb4290 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/memory.c - * * Memory detection code */ diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index a93cb8bded4d..328956fdb59e 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pm.c - * * Prepare the machine for transition to protected mode. */ diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index f5402d51f7c3..ab049d40a884 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pmjump.S - * * The actual transition into protected mode */ diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 7e7e890699be..c1d00c0274c4 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/printf.c - * * Oh, it's a waste of space, but oh-so-yummy for debugging. This * version of printf() does not include 64-bit support. "Live with * it." diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 481a22097781..f94b7a0c2abf 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/string.c - * * Very basic string functions */ diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index f3f14bd26371..0be77b39328a 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/tty.c - * * Very simple screen I/O * XXX: Probably should add very simple serial I/O? */ diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index c61462f7d9a7..2723d9b5ce43 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/version.c - * * Kernel version string */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 39e247e96172..49f26aaaebc8 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-bios.c - * * Standard video BIOS modes * * We have two options for this; silent and scanned. diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 5d5a3f6e8b5c..401ad998ad08 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vesa.c - * * VESA text modes */ diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 330d6589a2ad..40ecb8d7688c 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vga.c - * * Common all-VGA modes */ diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index c1c47ba069ef..83598b23093a 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.c - * * Select video mode */ diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index d69347f79e8e..ee63f5d14461 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.h - * * Header file for the real-mode video probing code */ diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c index 6499e3239b41..433909d61e5c 100644 --- a/arch/x86/boot/voyager.c +++ b/arch/x86/boot/voyager.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/voyager.c - * * Get the Voyager config information */ diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8ca3557a6d59..9366fb68d8d8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/cstate.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for SMP C-states on Intel CPUs diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 324eb0cab19c..de2d2e4ebad9 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/processor.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for platforms with Intel CPUs diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 9b7e01daa1ca..1f4cc48c14c6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9ba49a26dff8..f0f8934fc303 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/entry.S * * Copyright (C) 1991, 1992 Linus Torvalds */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 826988a6e964..90f038af3adc 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. * * Copyright (C) 1991, 1992 Linus Torvalds * diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index 710faf71a650..cef9cb1d15ac 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/mach-visws/visws_apic.c - * * Copyright (C) 1999 Bent Hagemark, Ingo Molnar * * SGI Visual Workstation interrupt controller diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c index 6a949e4edde8..46d6f8067690 100644 --- a/arch/x86/mach-voyager/voyager_basic.c +++ b/arch/x86/mach-voyager/voyager_basic.c @@ -2,8 +2,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager.c - * * This file contains all the voyager specific routines for getting * initialisation of the architecture to function. For additional * features see: diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c index 17a7904f75b1..ecab9fff0fd1 100644 --- a/arch/x86/mach-voyager/voyager_cat.c +++ b/arch/x86/mach-voyager/voyager_cat.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_cat.c - * * This file contains all the logic for manipulating the CAT bus * in a level 5 machine. * diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index be7235bf105d..96f60c7cd124 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_smp.c - * * This file provides all the same external entries as smp.c but uses * the voyager hal to provide the functionality */ diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c index c69c931818ed..15464a20fb38 100644 --- a/arch/x86/mach-voyager/voyager_thread.c +++ b/arch/x86/mach-voyager/voyager_thread.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_thread.c - * * This module provides the machine status monitor thread for the * voyager architecture. This allows us to monitor the machine * environment (temp, voltage, fan function) and the front panel and diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1500dc8d63e4..9ec62da85fd7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/mm/init.c * * Copyright (C) 1995 Linus Torvalds * diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 3165ec0672bd..6fb9e7c6893f 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -1,7 +1,3 @@ -/* - * linux/arch/i386/mm/pgtable.c - */ - #include #include #include diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 48fb38d7d2c0..4db42bff8c60 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -1,5 +1,4 @@ /* - * arch/i386/video/fbdev.c - i386 Framebuffer * * Copyright (C) 2007 Antonino Daplas * -- cgit v1.2.3 From 120d5bf128906c790df810e159d2e1239d08fef1 Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Wed, 9 Apr 2008 22:53:50 +0200 Subject: x86: remove vm86.h inclusion from process_32.c I've made a small investigation about vm86.h inclusion rules and it looks like everything is more or less ok. Files that rely on asm/vm86.h symbols are: - kprobes.c - process_32.c - signal_32.c - traps_32.c - vm86_32.c File process_32.c includes vm86.h explicitly. We can remove that include and it won't break anything. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3903a8f2eb97..91e147b486dd 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -45,7 +45,6 @@ #include #include #include -#include #ifdef CONFIG_MATH_EMULATION #include #endif -- cgit v1.2.3 From 1a7a34af78923f8807d054a15133a8fcf47e385e Mon Sep 17 00:00:00 2001 From: Jacek Luczak Date: Thu, 10 Apr 2008 13:40:57 +0200 Subject: x86: e820_64, fix section mismatch warning fix section mismatch warnings which occurs on my x86_64 box while compiling linux-next-20080410: Warning messages: WARNING: arch/x86/kernel/built-in.o(.text+0x7bc2): Section mismatch in reference from the function bad_addr() to the variable .init.data:early_res The function bad_addr() references the variable __initdata early_res. This is often because bad_addr lacks a __initdata annotation or the annotation of early_res is wrong. WARNING: arch/x86/kernel/built-in.o(.text+0x7c3b): Section mismatch in reference from the function bad_addr_size() to the variable .init.data:early_res The function bad_addr_size() references the variable __initdata early_res. This is often because bad_addr_size lacks a __initdata annotation or the annotation of early_res is wrong. Signed-off-by: Jacek Luczak Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/e820_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 7f6c0c85c8f6..cbd42e51cb08 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void) } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) { int i; @@ -116,7 +116,7 @@ again: } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) { int i; -- cgit v1.2.3 From 4c8337ac425b220594fec45ad6d3ac76d3ce2b90 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 10 Apr 2008 15:09:50 -0700 Subject: x86: fix arch/x86/mm/ioremap.c warning Fix printk formats in x86/mm/ioremap.c: next-20080410/arch/x86/mm/ioremap.c:137: warning: format '%llx' expects type 'long long unsigned int', but argument 2 has type 'resource_size_t' next-20080410/arch/x86/mm/ioremap.c:188: warning: format '%llx' expects type 'long long unsigned int', but argument 2 has type 'resource_size_t' next-20080410/arch/x86/mm/ioremap.c:188: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'long unsigned int' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/ioremap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c590fd200e29..3a4baf95e24d 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,7 +134,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, if (!phys_addr_valid(phys_addr)) { printk(KERN_WARNING "ioremap: invalid physical address %llx\n", - phys_addr); + (unsigned long long)phys_addr); WARN_ON_ONCE(1); return NULL; } @@ -187,7 +187,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, new_prot_val == _PAGE_CACHE_WB)) { pr_debug( "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", - phys_addr, phys_addr + size, + (unsigned long long)phys_addr, + (unsigned long long)(phys_addr + size), prot_val, new_prot_val); free_memtype(phys_addr, phys_addr + size); return NULL; -- cgit v1.2.3 From 7c53976404e2f906c60b69cc5793add87ee49c6a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 8 Apr 2008 12:54:30 +0200 Subject: x86: cleanup boot-heap usage The kernel decompressor wrapper uses memory located beyond the end of the image. This might lead to hard to debug problems, but even if it can be proven to be safe, it is at the very least unclean. I don't see any advantages either, unless you count it not being zeroed out as an advantage. This patch moves the boot-heap area to the bss segment. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/head_32.S | 15 +++++++++------ arch/x86/boot/compressed/head_64.S | 22 +++++++++++++--------- arch/x86/boot/compressed/misc.c | 8 +------- include/asm-x86/boot.h | 8 ++++++++ 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 036e635f18a3..ba7736cf2ec7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -130,7 +130,7 @@ relocated: /* * Setup the stack for the decompressor */ - leal stack_end(%ebx), %esp + leal boot_stack_end(%ebx), %esp /* * Do the decompression, and jump to the new kernel.. @@ -142,8 +142,8 @@ relocated: pushl %eax # input_len leal input_data(%ebx), %eax pushl %eax # input_data - leal _end(%ebx), %eax - pushl %eax # end of the image as third argument + leal boot_heap(%ebx), %eax + pushl %eax # heap area as third argument pushl %esi # real mode pointer as second arg call decompress_kernel addl $20, %esp @@ -181,7 +181,10 @@ relocated: jmp *%ebp .bss +/* Stack and heap for uncompression */ .balign 4 -stack: - .fill 4096, 1, 0 -stack_end: +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e8657b98c902..7a212a62db36 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ startup_32: subl $1b, %ebp /* setup a stack and make sure cpu supports long mode. */ - movl $user_stack_end, %eax + movl $boot_stack_end, %eax addl %ebp, %eax movl %eax, %esp @@ -274,7 +275,7 @@ relocated: stosb /* Setup the stack */ - leaq user_stack_end(%rip), %rsp + leaq boot_stack_end(%rip), %rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -285,7 +286,7 @@ relocated: */ pushq %rsi # Save the real mode argument movq %rsi, %rdi # real mode address - leaq _heap(%rip), %rsi # _heap + leaq boot_heap(%rip), %rsi # malloc area for uncompression leaq input_data(%rip), %rdx # input_data movl input_len(%rip), %eax movq %rax, %rcx # input_len @@ -310,9 +311,12 @@ gdt: .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ gdt_end: - .bss -/* Stack for uncompression */ - .balign 4 -user_stack: - .fill 4096,4,0 -user_stack_end: + +.bss +/* Stack and heap for uncompression */ +.balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dad4e699f5a3..90456cee47c3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -217,12 +217,6 @@ static void putstr(const char *); static memptr free_mem_ptr; static memptr free_mem_end_ptr; -#ifdef CONFIG_X86_64 -#define HEAP_SIZE 0x7000 -#else -#define HEAP_SIZE 0x4000 -#endif - static char *vidmem; static int vidport; static int lines, cols; @@ -449,7 +443,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ - free_mem_end_ptr = heap + HEAP_SIZE; + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; inbuf = input_data; /* Input buffer */ insize = input_len; inptr = 0; diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h index ed8affbf96cb..2faed7ecb092 100644 --- a/include/asm-x86/boot.h +++ b/include/asm-x86/boot.h @@ -17,4 +17,12 @@ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +#ifdef CONFIG_X86_64 +#define BOOT_HEAP_SIZE 0x7000 +#define BOOT_STACK_SIZE 0x4000 +#else +#define BOOT_HEAP_SIZE 0x4000 +#define BOOT_STACK_SIZE 0x1000 +#endif + #endif /* _ASM_BOOT_H */ -- cgit v1.2.3 From 4a9f54cfd21f313b9858f951783512d3f14e58a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 10 Apr 2008 15:06:38 -0700 Subject: x86: cleanup: change _end to end_before_pgt cleanup: change the _end in compressed vmlinux_64.lds. also change _heap to _ebss that is not needed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/boot/compressed/vmlinux_64.lds | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 7a212a62db36..d8819efac81d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -244,9 +244,9 @@ ENTRY(startup_64) /* Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _end(%rip), %r8 - leaq _end(%rbx), %r9 - movq $_end /* - $startup_32 */, %rcx + leaq _end_before_pgt(%rip), %r8 + leaq _end_before_pgt(%rbx), %r9 + movq $_end_before_pgt /* - $startup_32 */, %rcx 1: subq $8, %r8 subq $8, %r9 movq 0(%r8), %rax @@ -268,7 +268,7 @@ relocated: */ xorq %rax, %rax leaq _edata(%rbx), %rdi - leaq _end(%rbx), %rcx + leaq _end_before_pgt(%rbx), %rcx subq %rdi, %rcx cld rep diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds index 7e5c7209f6cc..bef1ac891bce 100644 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ b/arch/x86/boot/compressed/vmlinux_64.lds @@ -39,10 +39,10 @@ SECTIONS *(.bss.*) *(COMMON) . = ALIGN(8); - _end = . ; + _end_before_pgt = . ; . = ALIGN(4096); pgtable = . ; . = . + 4096 * 6; - _heap = .; + _ebss = .; } } -- cgit v1.2.3 From f5a1b191b37ac2609e2babeec1b21f411da93e4d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 12 Apr 2008 10:28:25 +0200 Subject: x86: fix exec mappings comments - noexec32 is on by default for years already - add noexec32 to kernel-parameters and fix noexec typo in there Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 10 +++++++++- arch/x86/kernel/setup64.c | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4b0f1ae31a4c..f4839606988b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1280,8 +1280,16 @@ and is between 256 and 4096 characters. It is defined in the file noexec [IA-64] noexec [X86-32,X86-64] + On X86-32 available only on PAE configured kernels. noexec=on: enable non-executable mappings (default) - noexec=off: disable nn-executable mappings + noexec=off: disable non-executable mappings + + noexec32 [X86-64] + This affects only 32-bit executables. + noexec32=on: enable non-executable mappings (default) + read doesn't imply executable mappings + noexec32=off: disable non-executable mappings + read implies executable mappings nofxsr [BUGS=X86-32] Disables x86 floating point extended register save and restore. The kernel will only save diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 9042fb0e36f5..aee0e8200777 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -74,8 +74,8 @@ int force_personality32 = 0; Control non executable heap for 32bit processes. To control the stack too use noexec=off -on PROT_READ does not imply PROT_EXEC for 32bit processes -off PROT_READ implies PROT_EXEC (default) +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC */ static int __init nonx32_setup(char *str) { -- cgit v1.2.3 From 4bd01600b214275a80a69b44393d7e81d43c2faa Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 19 Feb 2008 11:02:30 +0100 Subject: x86: clean up =0 initializations in arch/x86/kernel/tsc_32.c Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 3d7e6e9fa6c2..06af8cf8251f 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); * if the CPU frequency is scaled, TSC-based delays will need a different * loops_per_jiffy value to function properly. */ -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long cpu_khz_ref; static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) @@ -283,7 +283,7 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz = 0; +static unsigned long current_tsc_khz; static cycle_t read_tsc(void) { -- cgit v1.2.3 From 5deb45e39b946901ae028ccd3a1d0b35fa387475 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 19 Apr 2008 19:19:55 +0200 Subject: ftrace: add notrace annotations for NMI routines This annotates NMI functions with notrace. Some tracers may be able to live with this, but some cannot. The safest is to turn it off, it's not particularly interesting anyway. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi_32.c | 3 ++- arch/x86/kernel/nmi_64.c | 6 ++++-- arch/x86/kernel/traps_32.c | 12 ++++++------ arch/x86/kernel/traps_64.c | 11 ++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 8421d0ac6f22..11b14bbaa61e 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { /* diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 11f9130ac513..5a29ded994fa 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -313,7 +313,8 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { int sum; int touched = 0; @@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) static unsigned ignore_nmis; -asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); add_pda(__nmi_count,1); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 65791ca2824a..dc4273010f2a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -681,7 +681,7 @@ gp_in_kernel: } } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { printk(KERN_EMERG @@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) clear_mem_error(reason); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs *regs) +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; @@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) static int ignore_nmis; -__kprobes void do_nmi(struct pt_regs *regs, long error_code) +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 79aa6fc0815c..6d883b13ef4f 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err) oops_end(flags, regs, SIGSEGV); } -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) { unsigned long flags; @@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, die("general protection fault", regs, error_code); } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", @@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs * regs) { printk("NMI: IOCK error (debug interrupt?)\n"); @@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) /* Runs on IST stack. This code must keep interrupts off all the time. Nested NMIs are prevented by the CPU. */ -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; -- cgit v1.2.3 From 8fb402bccf203ecca8f9e0202b8fd3c937dece6f Mon Sep 17 00:00:00 2001 From: Erik Bosman Date: Fri, 11 Apr 2008 18:54:17 +0200 Subject: generic, x86: add prctl commands PR_GET_TSC and PR_SET_TSC This patch adds prctl commands that make it possible to deny the execution of timestamp counters in userspace. If this is not implemented on a specific architecture, prctl will return -EINVAL. ned-off-by: Erik Bosman Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/prctl.h | 6 ++++++ kernel/sys.c | 13 ++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 3800639775ae..5c80b1939636 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -67,4 +67,10 @@ #define PR_CAPBSET_READ 23 #define PR_CAPBSET_DROP 24 +/* Get/set the process' ability to use the timestamp counter instruction */ +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index a626116af5db..6a0cc71ee88d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -67,6 +67,12 @@ #ifndef SET_ENDIAN # define SET_ENDIAN(a,b) (-EINVAL) #endif +#ifndef GET_TSC_CTL +# define GET_TSC_CTL(a) (-EINVAL) +#endif +#ifndef SET_TSC_CTL +# define SET_TSC_CTL(a) (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -1737,7 +1743,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, #else return -EINVAL; #endif - + case PR_GET_TSC: + error = GET_TSC_CTL(arg2); + break; + case PR_SET_TSC: + error = SET_TSC_CTL(arg2); + break; default: error = -EINVAL; break; -- cgit v1.2.3 From 529e25f646e08901a6dad5768f681efffd77225e Mon Sep 17 00:00:00 2001 From: Erik Bosman Date: Mon, 14 Apr 2008 00:24:18 +0200 Subject: x86: implement prctl PR_GET_TSC and PR_SET_TSC This patch implements the PR_GET_TSC and PR_SET_TSC prctl() commands on the x86 platform (both 32 and 64 bit.) These commands control the ability to read the timestamp counter from userspace (the RDTSC instruction.) While the RDTSC instuction is a useful profiling tool, it is also the source of some non-determinism in ring-3. For deterministic replay applications it is useful to be able to trap and emulate (and record the outcome of) this instruction. This patch uses code earlier used to disable the timestamp counter for the SECCOMP framework. A side-effect of this patch is that the SECCOMP environment will now also disable the timestamp counter on x86_64 due to the addition of the TIF_NOTSC define on this platform. The code which enables/disables the RDTSC instruction during context switches is in the __switch_to_xtra function, which already handles other unusual conditions, so normal performance should not have to suffer from this change. Signed-off-by: Erik Bosman Acked-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 43 ++++++++++++++++++++++--- arch/x86/kernel/process_64.c | 68 ++++++++++++++++++++++++++++++++++++++++ include/asm-x86/processor.h | 7 +++++ include/asm-x86/thread_info_64.h | 4 ++- include/asm-x86/tsc.h | 1 + 5 files changed, 118 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 91e147b486dd..a3790a3f8a83 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -523,11 +524,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); -#ifdef CONFIG_SECCOMP static void hard_disable_TSC(void) { write_cr4(read_cr4() | X86_CR4_TSD); } + void disable_TSC(void) { preempt_disable(); @@ -539,11 +540,47 @@ void disable_TSC(void) hard_disable_TSC(); preempt_enable(); } + static void hard_enable_TSC(void) { write_cr4(read_cr4() & ~X86_CR4_TSD); } -#endif /* CONFIG_SECCOMP */ + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, @@ -577,7 +614,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_debugreg(next->debugreg7, 7); } -#ifdef CONFIG_SECCOMP if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ @@ -586,7 +622,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else hard_enable_TSC(); } -#endif #ifdef X86_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e75ccc8a2b87..4c13b1406c70 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -535,6 +536,64 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +static void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} + +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} + +static void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} + /* * This special macro can be used to load a debugging register */ @@ -572,6 +631,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, loaddebug(next, 7); } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Copy the relevant range of the IO bitmap. diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 6e26c7c717a2..eaf4548a23d2 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -918,4 +918,11 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, #define KSTK_EIP(task) (task_pt_regs(task)->ip) +/* Get/set a process' ability to use the timestamp counter instruction */ +#define GET_TSC_CTL(adr) get_tsc_mode((adr)) +#define SET_TSC_CTL(val) set_tsc_mode((val)) + +extern int get_tsc_mode(unsigned long adr); +extern int set_tsc_mode(unsigned int val); + #endif diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index 1e5c6f6152cd..b17f5f6c2c59 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h @@ -126,6 +126,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ +#define TIF_NOTSC 28 /* TSC is not accessible in userland */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -147,6 +148,7 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +#define _TIF_NOTSC (1 << TIF_NOTSC) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -160,7 +162,7 @@ static inline struct thread_info *stack_thread_info(void) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS) + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index d2d8eb5b55f5..0434bd8349a7 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -18,6 +18,7 @@ extern unsigned int cpu_khz; extern unsigned int tsc_khz; extern void disable_TSC(void); +extern void enable_TSC(void); static inline cycles_t get_cycles(void) { -- cgit v1.2.3 From f1326973262382150c26bf4dfccd0fce310c4a9c Mon Sep 17 00:00:00 2001 From: Erik Bosman Date: Fri, 11 Apr 2008 18:57:22 +0200 Subject: generic, x86: add tests for prctl PR_GET_TSC and PR_SET_TSC This patch adds three tests that test whether the PR_GET_TSC and PR_SET_TSC commands have the desirable effect. The tests check whether the control register is updated correctly at context switches and try to discover bugs while enabling/disabling the timestamp counter. Signed-off-by: Erik Bosman Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- .../prctl/disable-tsc-ctxt-sw-stress-test.c | 96 ++++++++++++++++++++++ .../prctl/disable-tsc-on-off-stress-test.c | 95 +++++++++++++++++++++ Documentation/prctl/disable-tsc-test.c | 94 +++++++++++++++++++++ 3 files changed, 285 insertions(+) create mode 100644 Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c create mode 100644 Documentation/prctl/disable-tsc-on-off-stress-test.c create mode 100644 Documentation/prctl/disable-tsc-test.c diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c new file mode 100644 index 000000000000..f8e8e95e81fd --- /dev/null +++ b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c @@ -0,0 +1,96 @@ +/* + * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) + * + * Tests if the control register is updated correctly + * at context switches + * + * Warning: this test will cause a very high load for a few seconds + * + */ + +#include +#include +#include +#include +#include +#include + + +#include +#include + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +uint64_t rdtsc() { +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +void sigsegv_expect(int sig) +{ + /* */ +} + +void segvtask(void) +{ + if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) + { + perror("prctl"); + exit(0); + } + signal(SIGSEGV, sigsegv_expect); + alarm(10); + rdtsc(); + fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); + exit(0); +} + + +void sigsegv_fail(int sig) +{ + fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); + exit(0); +} + +void rdtsctask(void) +{ + if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) + { + perror("prctl"); + exit(0); + } + signal(SIGSEGV, sigsegv_fail); + alarm(10); + for(;;) rdtsc(); +} + + +int main(int argc, char **argv) +{ + int n_tasks = 100, i; + + fprintf(stderr, "[No further output means we're allright]\n"); + + for (i=0; i +#include +#include +#include +#include +#include + + +#include +#include + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +/* snippet from wikipedia :-) */ + +uint64_t rdtsc() { +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +int should_segv = 0; + +void sigsegv_cb(int sig) +{ + if (!should_segv) + { + fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); + exit(0); + } + if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) + { + perror("prctl"); + exit(0); + } + should_segv = 0; + + rdtsc(); +} + +void task(void) +{ + signal(SIGSEGV, sigsegv_cb); + alarm(10); + for(;;) + { + rdtsc(); + if (should_segv) + { + fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); + exit(0); + } + if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) + { + perror("prctl"); + exit(0); + } + should_segv = 1; + } +} + + +int main(int argc, char **argv) +{ + int n_tasks = 100, i; + + fprintf(stderr, "[No further output means we're allright]\n"); + + for (i=0; i +#include +#include +#include +#include + + +#include +#include + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +const char *tsc_names[] = +{ + [0] = "[not set]", + [PR_TSC_ENABLE] = "PR_TSC_ENABLE", + [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV", +}; + +uint64_t rdtsc() { +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +void sigsegv_cb(int sig) +{ + int tsc_val = 0; + + printf("[ SIG_SEGV ]\n"); + printf("prctl(PR_GET_TSC, &tsc_val); "); + fflush(stdout); + + if ( prctl(PR_GET_TSC, &tsc_val) == -1) + perror("prctl"); + + printf("tsc_val == %s\n", tsc_names[tsc_val]); + printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); + fflush(stdout); + if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) + perror("prctl"); + + printf("rdtsc() == "); +} + +int main(int argc, char **argv) +{ + int tsc_val = 0; + + signal(SIGSEGV, sigsegv_cb); + + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_GET_TSC, &tsc_val); "); + fflush(stdout); + + if ( prctl(PR_GET_TSC, &tsc_val) == -1) + perror("prctl"); + + printf("tsc_val == %s\n", tsc_names[tsc_val]); + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); + fflush(stdout); + + if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) + perror("prctl"); + + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n"); + fflush(stdout); + + if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1) + perror("prctl"); + + printf("rdtsc() == "); + fflush(stdout); + printf("%llu\n", (unsigned long long)rdtsc()); + fflush(stdout); + + exit(EXIT_SUCCESS); +} + -- cgit v1.2.3 From d8bb6f4c1670c8324e4135c61ef07486f7f17379 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Apr 2008 19:45:18 +0200 Subject: x86: tsc prevent time going backwards We already catch most of the TSC problems by sanity checks, but there is a subtle bug which has been in the code forever. This can cause time jumps in the range of hours. This was reported in: http://lkml.org/lkml/2007/8/23/96 and http://lkml.org/lkml/2008/3/31/23 I was able to reproduce the problem with a gettimeofday loop test on a dual core and a quad core machine which both have sychronized TSCs. The TSCs seems not to be perfectly in sync though, but the kernel is not able to detect the slight delta in the sync check. Still there exists an extremly small window where this delta can be observed with a real big time jump. So far I was only able to reproduce this with the vsyscall gettimeofday implementation, but in theory this might be observable with the syscall based version as well. CPU 0 updates the clock source variables under xtime/vyscall lock and CPU1, where the TSC is slighty behind CPU0, is reading the time right after the seqlock was unlocked. The clocksource reference data was updated with the TSC from CPU0 and the value which is read from TSC on CPU1 is less than the reference data. This results in a huge delta value due to the unsigned subtraction of the TSC value and the reference value. This algorithm can not be changed due to the support of wrapping clock sources like pm timer. The huge delta is converted to nanoseconds and added to xtime, which is then observable by the caller. The next gettimeofday call on CPU1 will show the correct time ag