X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Facpi%2Fprocessor_idle.c;h=ee5759bef9454df682d7520032e8c33f5bfbc0b8;hb=38ff4ffc039ba5a5878f2dcbb03d87c3a1f02f1b;hp=cc049338e418b5b968888faafd22a85492752bab;hpb=6eb0a0fd059598ee0d49c6283ce25cccd743e9fc;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index cc04933..ee5759b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -3,7 +3,7 @@ * * Copyright (C) 2001, 2002 Andy Grover * Copyright (C) 2001, 2002 Paul Diefenbaugh - * Copyright (C) 2004 Dominik Brodowski + * Copyright (C) 2004, 2005 Dominik Brodowski * Copyright (C) 2004 Anil S Keshavamurthy * - Added processor hotplug support * Copyright (C) 2005 Venkatesh Pallipadi @@ -38,6 +38,18 @@ #include #include #include /* need_resched() */ +#include +#include + +/* + * Include the apic definitions for x86 to have the APIC timer related defines + * available also for UP (on SMP it gets magically included via linux/smp.h). + * asm/acpi.h is not an option, as it would require more include magic. Also + * creating an empty asm-ia64/apic.h would just trade pest vs. cholera. + */ +#ifdef CONFIG_X86 +#include +#endif #include #include @@ -47,17 +59,16 @@ #define ACPI_PROCESSOR_COMPONENT 0x01000000 #define ACPI_PROCESSOR_CLASS "processor" -#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver" #define _COMPONENT ACPI_PROCESSOR_COMPONENT -ACPI_MODULE_NAME("acpi_processor") +ACPI_MODULE_NAME("processor_idle"); #define ACPI_PROCESSOR_FILE_POWER "power" #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ -static void (*pm_idle_save) (void); +static void (*pm_idle_save) (void) __read_mostly; module_param(max_cstate, uint, 0644); -static unsigned int nocst = 0; +static unsigned int nocst __read_mostly; module_param(nocst, uint, 0000); /* @@ -67,7 +78,7 @@ module_param(nocst, uint, 0000); * 100 HZ: 0x0000000F: 4 jiffies = 40ms * reduce history for more aggressive entry into C3 */ -static unsigned int bm_history = +static unsigned int bm_history __read_mostly = (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); module_param(bm_history, uint, 0644); /* -------------------------------------------------------------------------- @@ -94,23 +105,63 @@ static int set_max_cstate(struct dmi_system_id *id) return 0; } -static struct dmi_system_id __initdata processor_power_dmi_table[] = { - {set_max_cstate, "IBM ThinkPad R40e", { - DMI_MATCH(DMI_BIOS_VENDOR, - "IBM"), - DMI_MATCH(DMI_BIOS_VERSION, - "1SET60WW")}, - (void *)1}, - {set_max_cstate, "Medion 41700", { - DMI_MATCH(DMI_BIOS_VENDOR, - "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, - "R01-A1J")}, (void *)1}, - {set_max_cstate, "Clevo 5600D", { - DMI_MATCH(DMI_BIOS_VENDOR, - "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, - "SHE845M0.86C.0013.D.0302131307")}, +/* Actually this shouldn't be __cpuinitdata, would be better to fix the + callers to only run once -AK */ +static struct dmi_system_id __cpuinitdata processor_power_dmi_table[] = { + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET70WW")}, (void *)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW")}, (void *)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET43WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET45WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET47WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET50WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET52WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET55WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET56WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET59WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET61WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET62WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET64WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET65WW") }, (void*)1}, + { set_max_cstate, "IBM ThinkPad R40e", { + DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), + DMI_MATCH(DMI_BIOS_VERSION,"1SET68WW") }, (void*)1}, + { set_max_cstate, "Medion 41700", { + DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), + DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J")}, (void *)1}, + { set_max_cstate, "Clevo 5600D", { + DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), + DMI_MATCH(DMI_BIOS_VERSION,"SHE845M0.86C.0013.D.0302131307")}, (void *)2}, {}, }; @@ -119,7 +170,7 @@ static inline u32 ticks_elapsed(u32 t1, u32 t2) { if (t2 >= t1) return (t2 - t1); - else if (!acpi_fadt.tmr_val_ext) + else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); else return ((0xFFFFFFFF - t1) + t2); @@ -146,8 +197,7 @@ acpi_processor_power_activate(struct acpi_processor *pr, case ACPI_STATE_C3: /* Disable bus master reload */ if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, - ACPI_MTX_DO_NOT_LOCK); + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); break; } } @@ -157,8 +207,7 @@ acpi_processor_power_activate(struct acpi_processor *pr, case ACPI_STATE_C3: /* Enable bus master reload */ if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1, - ACPI_MTX_DO_NOT_LOCK); + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); break; } @@ -169,15 +218,112 @@ acpi_processor_power_activate(struct acpi_processor *pr, static void acpi_safe_halt(void) { - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb__after_clear_bit(); + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); if (!need_resched()) safe_halt(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; } static atomic_t c3_cpu_count; +/* Common C-state entry for C2, C3, .. */ +static void acpi_cstate_enter(struct acpi_processor_cx *cstate) +{ + if (cstate->space_id == ACPI_CSTATE_FFH) { + /* Call into architectural FFH based C-state */ + acpi_processor_ffh_cstate_enter(cstate); + } else { + int unused; + /* IO port based C-state */ + inb(cstate->address); + /* Dummy wait op - must do something useless after P_LVL2 read + because chipsets cannot guarantee that STPCLK# signal + gets asserted in time to freeze execution properly. */ + unused = inl(acpi_gbl_FADT.xpm_timer_block.address); + } +} + +#ifdef ARCH_APICTIMER_STOPS_ON_C3 + +/* + * Some BIOS implementations switch to C3 in the published C2 state. + * This seems to be a common problem on AMD boxen, but other vendors + * are affected too. We pick the most conservative approach: we assume + * that the local APIC stops in both C2 and C3. + */ +static void acpi_timer_check_state(int state, struct acpi_processor *pr, + struct acpi_processor_cx *cx) +{ + struct acpi_processor_power *pwr = &pr->power; + u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2; + + /* + * Check, if one of the previous states already marked the lapic + * unstable + */ + if (pwr->timer_broadcast_on_state < state) + return; + + if (cx->type >= type) + pr->power.timer_broadcast_on_state = state; +} + +static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) +{ +#ifdef CONFIG_GENERIC_CLOCKEVENTS + unsigned long reason; + + reason = pr->power.timer_broadcast_on_state < INT_MAX ? + CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; + + clockevents_notify(reason, &pr->id); +#else + cpumask_t mask = cpumask_of_cpu(pr->id); + + if (pr->power.timer_broadcast_on_state < INT_MAX) + on_each_cpu(switch_APIC_timer_to_ipi, &mask, 1, 1); + else + on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); +#endif +} + +/* Power(C) State timer broadcast control */ +static void acpi_state_timer_broadcast(struct acpi_processor *pr, + struct acpi_processor_cx *cx, + int broadcast) +{ +#ifdef CONFIG_GENERIC_CLOCKEVENTS + + int state = cx - pr->power.states; + + if (state >= pr->power.timer_broadcast_on_state) { + unsigned long reason; + + reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : + CLOCK_EVT_NOTIFY_BROADCAST_EXIT; + clockevents_notify(reason, &pr->id); + } +#endif +} + +#else + +static void acpi_timer_check_state(int state, struct acpi_processor *pr, + struct acpi_processor_cx *cstate) { } +static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } +static void acpi_state_timer_broadcast(struct acpi_processor *pr, + struct acpi_processor_cx *cx, + int broadcast) +{ +} + +#endif + static void acpi_processor_idle(void) { struct acpi_processor *pr = NULL; @@ -224,23 +370,15 @@ static void acpi_processor_idle(void) u32 bm_status = 0; unsigned long diff = jiffies - pr->power.bm_check_timestamp; - if (diff > 32) - diff = 32; + if (diff > 31) + diff = 31; - while (diff) { - /* if we didn't get called, assume there was busmaster activity */ - diff--; - if (diff) - pr->power.bm_activity |= 0x1; - pr->power.bm_activity <<= 1; - } + pr->power.bm_activity <<= diff; - acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, - &bm_status, ACPI_MTX_DO_NOT_LOCK); + acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); if (bm_status) { - pr->power.bm_activity++; - acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, - 1, ACPI_MTX_DO_NOT_LOCK); + pr->power.bm_activity |= 0x1; + acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); } /* * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect @@ -250,16 +388,16 @@ static void acpi_processor_idle(void) else if (errata.piix4.bmisx) { if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) - pr->power.bm_activity++; + pr->power.bm_activity |= 0x1; } pr->power.bm_check_timestamp = jiffies; /* - * Apply bus mastering demotion policy. Automatically demote + * If bus mastering is or was active this jiffy, demote * to avoid a faulty transition. Note that the processor * won't enter a low-power state during this call (to this - * funciton) but should upon the next. + * function) but should upon the next. * * TBD: A better policy might be to fallback to the demotion * state (use it for this quantum only) istead of @@ -267,7 +405,8 @@ static void acpi_processor_idle(void) * qualification. This may, however, introduce DMA * issues (e.g. floppy DMA transfer overrun/underrun). */ - if (pr->power.bm_activity & cx->demotion.threshold.bm) { + if ((pr->power.bm_activity & 0x1) && + cx->demotion.threshold.bm) { local_irq_enable(); next_state = cx->demotion.state; goto end; @@ -281,22 +420,24 @@ static void acpi_processor_idle(void) * detection phase, to work cleanly with logical CPU hotplug. */ if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && - !pr->flags.has_cst && !acpi_fadt.plvl2_up) + !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) cx = &pr->power.states[ACPI_STATE_C1]; #endif - cx->usage++; - /* * Sleep: * ------ * Invoke the current Cx state to put the processor to sleep. */ if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb__after_clear_bit(); + current_thread_info()->status &= ~TS_POLLING; + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); if (need_resched()) { - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; local_irq_enable(); return; } @@ -325,19 +466,24 @@ static void acpi_processor_idle(void) case ACPI_STATE_C2: /* Get start time (ticks) */ - t1 = inl(acpi_fadt.xpm_tmr_blk.address); + t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); /* Invoke C2 */ - inb(cx->address); - /* Dummy op - must do something useless after P_LVL2 read */ - t2 = inl(acpi_fadt.xpm_tmr_blk.address); + acpi_state_timer_broadcast(pr, cx, 1); + acpi_cstate_enter(cx); /* Get end time (ticks) */ - t2 = inl(acpi_fadt.xpm_tmr_blk.address); + t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); + +#ifdef CONFIG_GENERIC_TIME + /* TSC halts in C2, so notify users */ + mark_tsc_unstable("possible TSC halt in C2"); +#endif /* Re-enable interrupts */ local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; + acpi_state_timer_broadcast(pr, cx, 0); break; case ACPI_STATE_C3: @@ -349,8 +495,7 @@ static void acpi_processor_idle(void) * All CPUs are trying to go to C3 * Disable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, - ACPI_MTX_DO_NOT_LOCK); + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); } } else { /* SMP with no shared cache... Invalidate cache */ @@ -358,39 +503,45 @@ static void acpi_processor_idle(void) } /* Get start time (ticks) */ - t1 = inl(acpi_fadt.xpm_tmr_blk.address); + t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); /* Invoke C3 */ - inb(cx->address); - /* Dummy op - must do something useless after P_LVL3 read */ - t2 = inl(acpi_fadt.xpm_tmr_blk.address); + acpi_state_timer_broadcast(pr, cx, 1); + acpi_cstate_enter(cx); /* Get end time (ticks) */ - t2 = inl(acpi_fadt.xpm_tmr_blk.address); + t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); if (pr->flags.bm_check) { /* Enable bus master arbitration */ atomic_dec(&c3_cpu_count); - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, - ACPI_MTX_DO_NOT_LOCK); + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); } +#ifdef CONFIG_GENERIC_TIME + /* TSC halts in C3, so notify users */ + mark_tsc_unstable("TSC halts in C3"); +#endif /* Re-enable interrupts */ local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; /* Compute time (ticks) that we were actually asleep */ sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; + acpi_state_timer_broadcast(pr, cx, 0); break; default: local_irq_enable(); return; } + cx->usage++; + if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0)) + cx->time += sleep_ticks; next_state = pr->power.state; #ifdef CONFIG_HOTPLUG_CPU /* Don't do promotion/demotion */ if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) && - !pr->flags.has_cst && !acpi_fadt.plvl2_up) { + !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) { next_state = cx; goto end; } @@ -406,7 +557,8 @@ static void acpi_processor_idle(void) */ if (cx->promotion.state && ((cx->promotion.state - pr->power.states) <= max_cstate)) { - if (sleep_ticks > cx->promotion.threshold.ticks) { + if (sleep_ticks > cx->promotion.threshold.ticks && + cx->promotion.state->latency <= system_latency_constraint()) { cx->promotion.count++; cx->demotion.count = 0; if (cx->promotion.count >= @@ -447,8 +599,10 @@ static void acpi_processor_idle(void) end: /* * Demote if current state exceeds max_cstate + * or if the latency of the current state is unacceptable */ - if ((pr->power.state - pr->power.states) > max_cstate) { + if ((pr->power.state - pr->power.states) > max_cstate || + pr->power.state->latency > system_latency_constraint()) { if (cx->demotion.state) next_state = cx->demotion.state; } @@ -471,10 +625,9 @@ static int acpi_processor_set_power_policy(struct acpi_processor *pr) struct acpi_processor_cx *higher = NULL; struct acpi_processor_cx *cx; - ACPI_FUNCTION_TRACE("acpi_processor_set_power_policy"); if (!pr) - return_VALUE(-EINVAL); + return -EINVAL; /* * This function sets the default Cx state policy (OS idle handler). @@ -498,7 +651,7 @@ static int acpi_processor_set_power_policy(struct acpi_processor *pr) } if (!state_is_set) - return_VALUE(-ENODEV); + return -ENODEV; /* demotion */ for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { @@ -537,38 +690,30 @@ static int acpi_processor_set_power_policy(struct acpi_processor *pr) higher = cx; } - return_VALUE(0); + return 0; } static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) { - ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_fadt"); if (!pr) - return_VALUE(-EINVAL); + return -EINVAL; if (!pr->pblk) - return_VALUE(-ENODEV); - - memset(pr->power.states, 0, sizeof(pr->power.states)); + return -ENODEV; /* if info is obtained from pblk/fadt, type equals state */ - pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; - /* the C0 state only exists as a filler in our array, - * and all processors need to support C1 */ - pr->power.states[ACPI_STATE_C0].valid = 1; - pr->power.states[ACPI_STATE_C1].valid = 1; - #ifndef CONFIG_HOTPLUG_CPU /* * Check for P_LVL2_UP flag before entering C2 and above on * an SMP system. */ - if ((num_online_cpus() > 1) && !acpi_fadt.plvl2_up) - return_VALUE(-ENODEV); + if ((num_online_cpus() > 1) && + !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) + return -ENODEV; #endif /* determine C2 and C3 address from pblk */ @@ -576,66 +721,56 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; /* determine latencies from FADT */ - pr->power.states[ACPI_STATE_C2].latency = acpi_fadt.plvl2_lat; - pr->power.states[ACPI_STATE_C3].latency = acpi_fadt.plvl3_lat; + pr->power.states[ACPI_STATE_C2].latency = acpi_gbl_FADT.C2latency; + pr->power.states[ACPI_STATE_C3].latency = acpi_gbl_FADT.C3latency; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "lvl2[0x%08x] lvl3[0x%08x]\n", pr->power.states[ACPI_STATE_C2].address, pr->power.states[ACPI_STATE_C3].address)); - return_VALUE(0); + return 0; } -static int acpi_processor_get_power_info_default_c1(struct acpi_processor *pr) +static int acpi_processor_get_power_info_default(struct acpi_processor *pr) { - ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_default_c1"); - - memset(pr->power.states, 0, sizeof(pr->power.states)); - - /* if info is obtained from pblk/fadt, type equals state */ - pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; - pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; - pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; - - /* the C0 state only exists as a filler in our array, - * and all processors need to support C1 */ + if (!pr->power.states[ACPI_STATE_C1].valid) { + /* set the first C-State to C1 */ + /* all processors need to support C1 */ + pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; + pr->power.states[ACPI_STATE_C1].valid = 1; + } + /* the C0 state only exists as a filler in our array */ pr->power.states[ACPI_STATE_C0].valid = 1; - pr->power.states[ACPI_STATE_C1].valid = 1; - - return_VALUE(0); + return 0; } static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) { acpi_status status = 0; acpi_integer count; + int current_count; int i; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *cst; - ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_cst"); if (nocst) - return_VALUE(-ENODEV); + return -ENODEV; - pr->power.count = 0; - for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++) - memset(&(pr->power.states[i]), 0, - sizeof(struct acpi_processor_cx)); + current_count = 0; status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); if (ACPI_FAILURE(status)) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); - return_VALUE(-ENODEV); + return -ENODEV; } - cst = (union acpi_object *)buffer.pointer; + cst = buffer.pointer; /* There must be at least 2 elements */ if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "not enough elements in _CST\n")); + printk(KERN_ERR PREFIX "not enough elements in _CST\n"); status = -EFAULT; goto end; } @@ -644,22 +779,11 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) /* Validate number of power states. */ if (count < 1 || count != cst->package.count - 1) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "count given by _CST is not valid\n")); + printk(KERN_ERR PREFIX "count given by _CST is not valid\n"); status = -EFAULT; goto end; } - /* We support up to ACPI_PROCESSOR_MAX_POWER. */ - if (count > ACPI_PROCESSOR_MAX_POWER) { - printk(KERN_WARNING - "Limiting number of power states to max (%d)\n", - ACPI_PROCESSOR_MAX_POWER); - printk(KERN_WARNING - "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); - count = ACPI_PROCESSOR_MAX_POWER; - } - /* Tell driver that at least _CST is supported. */ pr->flags.has_cst = 1; @@ -671,14 +795,14 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) memset(&cx, 0, sizeof(cx)); - element = (union acpi_object *)&(cst->package.elements[i]); + element = &(cst->package.elements[i]); if (element->type != ACPI_TYPE_PACKAGE) continue; if (element->package.count != 4) continue; - obj = (union acpi_object *)&(element->package.elements[0]); + obj = &(element->package.elements[0]); if (obj->type != ACPI_TYPE_BUFFER) continue; @@ -689,58 +813,87 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr) (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) continue; - cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ? - 0 : reg->address; - /* There should be an easy way to extract an integer... */ - obj = (union acpi_object *)&(element->package.elements[1]); + obj = &(element->package.elements[1]); if (obj->type != ACPI_TYPE_INTEGER) continue; cx.type = obj->integer.value; + /* + * Some buggy BIOSes won't list C1 in _CST - + * Let acpi_processor_get_power_info_default() handle them later + */ + if (i == 1 && cx.type != ACPI_STATE_C1) + current_count++; + + cx.address = reg->address; + cx.index = current_count + 1; + + cx.space_id = ACPI_CSTATE_SYSTEMIO; + if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (acpi_processor_ffh_cstate_probe + (pr->id, &cx, reg) == 0) { + cx.space_id = ACPI_CSTATE_FFH; + } else if (cx.type != ACPI_STATE_C1) { + /* + * C1 is a special case where FIXED_HARDWARE + * can be handled in non-MWAIT way as well. + * In that case, save this _CST entry info. + * That is, we retain space_id of SYSTEM_IO for + * halt based C1. + * Otherwise, ignore this info and continue. + */ + continue; + } + } - if ((cx.type != ACPI_STATE_C1) && - (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) - continue; - - if ((cx.type < ACPI_STATE_C1) || (cx.type > ACPI_STATE_C3)) - continue; - - obj = (union acpi_object *)&(element->package.elements[2]); + obj = &(element->package.elements[2]); if (obj->type != ACPI_TYPE_INTEGER) continue; cx.latency = obj->integer.value; - obj = (union acpi_object *)&(element->package.elements[3]); + obj = &(element->package.elements[3]); if (obj->type != ACPI_TYPE_INTEGER) continue; cx.power = obj->integer.value; - (pr->power.count)++; - memcpy(&(pr->power.states[pr->power.count]), &cx, sizeof(cx)); + current_count++; + memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx)); + + /* + * We support total ACPI_PROCESSOR_MAX_POWER - 1 + * (From 1 through ACPI_PROCESSOR_MAX_POWER - 1) + */ + if (current_count >= (ACPI_PROCESSOR_MAX_POWER - 1)) { + printk(KERN_WARNING + "Limiting number of power states to max (%d)\n", + ACPI_PROCESSOR_MAX_POWER); + printk(KERN_WARNING + "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); + break; + } } ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", - pr->power.count)); + current_count)); /* Validate number of power states discovered */ - if (pr->power.count < 2) + if (current_count < 2) status = -EFAULT; end: - acpi_os_free(buffer.pointer); + kfree(buffer.pointer); - return_VALUE(status); + return status; } static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) { - ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c2"); if (!cx->address) - return_VOID; + return; /* * C2 latency must be less than or equal to 100 @@ -749,7 +902,7 @@ static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "latency too large [%d]\n", cx->latency)); - return_VOID; + return; } /* @@ -759,7 +912,7 @@ static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) cx->valid = 1; cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); - return_VOID; + return; } static void acpi_processor_power_verify_c3(struct acpi_processor *pr, @@ -767,10 +920,9 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, { static int bm_check_flag; - ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c3"); if (!cx->address) - return_VOID; + return; /* * C3 latency must be less than or equal to 1000 @@ -779,7 +931,7 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "latency too large [%d]\n", cx->latency)); - return_VOID; + return; } /* @@ -792,7 +944,7 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, else if (errata.piix4.fdma) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "C3 not supported on PIIX4 with Type-F DMA\n")); - return_VOID; + return; } /* All the logic here assumes flags.bm_check is same across all CPUs */ @@ -809,21 +961,20 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, if (!pr->flags.bm_control) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "C3 support requires bus mastering control\n")); - return_VOID; + return; } } else { /* * WBINVD should be set in fadt, for C3 state to be * supported on when bm_check is not required. */ - if (acpi_fadt.wb_invd != 1) { + if (!(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD)) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Cache invalidation should work properly" " for C3 to be enabled on SMP systems\n")); - return_VOID; + return; } - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, - 0, ACPI_MTX_DO_NOT_LOCK); + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); } /* @@ -835,7 +986,7 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, cx->valid = 1; cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); - return_VOID; + return; } static int acpi_processor_power_verify(struct acpi_processor *pr) @@ -843,14 +994,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) unsigned int i; unsigned int working = 0; -#ifdef ARCH_APICTIMER_STOPS_ON_C3 - struct cpuinfo_x86 *c = cpu_data + pr->id; - cpumask_t mask = cpumask_of_cpu(pr->id); - - if (c->x86_vendor == X86_VENDOR_INTEL) { - on_each_cpu(switch_ipi_to_APIC_timer, &mask, 1, 1); - } -#endif + pr->power.timer_broadcast_on_state = INT_MAX; for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { struct acpi_processor_cx *cx = &pr->power.states[i]; @@ -862,16 +1006,14 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) case ACPI_STATE_C2: acpi_processor_power_verify_c2(cx); + if (cx->valid) + acpi_timer_check_state(i, pr, cx); break; case ACPI_STATE_C3: acpi_processor_power_verify_c3(pr, cx); -#ifdef ARCH_APICTIMER_STOPS_ON_C3 - if (c->x86_vendor == X86_VENDOR_INTEL) { - on_each_cpu(switch_APIC_timer_to_ipi, - &mask, 1, 1); - } -#endif + if (cx->valid) + acpi_timer_check_state(i, pr, cx); break; } @@ -879,6 +1021,8 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) working++; } + acpi_propagate_timer_broadcast(pr); + return (working); } @@ -887,17 +1031,23 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr) unsigned int i; int result; - ACPI_FUNCTION_TRACE("acpi_processor_get_power_info"); /* NOTE: the idle thread may not be running while calling * this function */ + /* Zero initialize all the C-states info. */ + memset(pr->power.states, 0, sizeof(pr->power.states)); + result = acpi_processor_get_power_info_cst(pr); if (result == -ENODEV) result = acpi_processor_get_power_info_fadt(pr); - if ((result) || (acpi_processor_power_verify(pr) < 2)) - result = acpi_processor_get_power_info_default_c1(pr); + if (result) + return result; + + acpi_processor_get_power_info_default(pr); + + pr->power.count = acpi_processor_power_verify(pr); /* * Set Default Policy @@ -909,7 +1059,7 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr) */ result = acpi_processor_set_power_policy(pr); if (result) - return_VALUE(result); + return result; /* * if one state of type C2 or C3 is available, mark this @@ -923,24 +1073,23 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr) } } - return_VALUE(0); + return 0; } int acpi_processor_cst_has_changed(struct acpi_processor *pr) { int result = 0; - ACPI_FUNCTION_TRACE("acpi_processor_cst_has_changed"); if (!pr) - return_VALUE(-EINVAL); + return -EINVAL; if (nocst) { - return_VALUE(-ENODEV); + return -ENODEV; } if (!pr->flags.power_setup_done) - return_VALUE(-ENODEV); + return -ENODEV; /* Fall back to the default idle loop */ pm_idle = pm_idle_save; @@ -951,26 +1100,27 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) pm_idle = acpi_processor_idle; - return_VALUE(result); + return result; } /* proc interface */ static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) { - struct acpi_processor *pr = (struct acpi_processor *)seq->private; + struct acpi_processor *pr = seq->private; unsigned int i; - ACPI_FUNCTION_TRACE("acpi_processor_power_seq_show"); if (!pr) goto end; seq_printf(seq, "active state: C%zd\n" "max_cstate: C%d\n" - "bus master activity: %08x\n", + "bus master activity: %08x\n" + "maximum allowed latency: %d usec\n", pr->power.state ? pr->power.state - pr->power.states : 0, - max_cstate, (unsigned)pr->power.bm_activity); + max_cstate, (unsigned)pr->power.bm_activity, + system_latency_constraint()); seq_puts(seq, "states:\n"); @@ -1013,13 +1163,14 @@ static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) else seq_puts(seq, "demotion[--] "); - seq_printf(seq, "latency[%03d] usage[%08d]\n", + seq_printf(seq, "latency[%03d] usage[%08d] duration[%020llu]\n", pr->power.states[i].latency, - pr->power.states[i].usage); + pr->power.states[i].usage, + (unsigned long long)pr->power.states[i].time); } end: - return_VALUE(0); + return 0; } static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) @@ -1028,22 +1179,45 @@ static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) PDE(inode)->data); } -static struct file_operations acpi_processor_power_fops = { +static const struct file_operations acpi_processor_power_fops = { .open = acpi_processor_power_open_fs, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; -int acpi_processor_power_init(struct acpi_processor *pr, +#ifdef CONFIG_SMP +static void smp_callback(void *v) +{ + /* we already woke the CPU up, nothing more to do */ +} + +/* + * This function gets called when a part of the kernel has a new latency + * requirement. This means we need to get all processors out of their C-state, + * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that + * wakes them all right up. + */ +static int acpi_processor_latency_notify(struct notifier_block *b, + unsigned long l, void *v) +{ + smp_call_function(smp_callback, NULL, 0, 1); + return NOTIFY_OK; +} + +static struct notifier_block acpi_processor_latency_notifier = { + .notifier_call = acpi_processor_latency_notify, +}; +#endif + +int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { acpi_status status = 0; - static int first_run = 0; + static int first_run; struct proc_dir_entry *entry = NULL; unsigned int i; - ACPI_FUNCTION_TRACE("acpi_processor_power_init"); if (!first_run) { dmi_check_system(processor_power_dmi_table); @@ -1052,22 +1226,23 @@ int acpi_processor_power_init(struct acpi_processor *pr, "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; +#ifdef CONFIG_SMP + register_latency_notifier(&acpi_processor_latency_notifier); +#endif } if (!pr) - return_VALUE(-EINVAL); + return -EINVAL; - if (acpi_fadt.cst_cnt && !nocst) { + if (acpi_gbl_FADT.cst_control && !nocst) { status = - acpi_os_write_port(acpi_fadt.smi_cmd, acpi_fadt.cst_cnt, 8); + acpi_os_write_port(acpi_gbl_FADT.smi_command, acpi_gbl_FADT.cst_control, 8); if (ACPI_FAILURE(status)) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "Notifying BIOS of _CST ability failed\n")); + ACPI_EXCEPTION((AE_INFO, status, + "Notifying BIOS of _CST ability failed")); } } - acpi_processor_power_init_pdc(&(pr->power), pr->id); - acpi_processor_set_pdc(pr, pr->power.pdc); acpi_processor_get_power_info(pr); /* @@ -1093,9 +1268,7 @@ int acpi_processor_power_init(struct acpi_processor *pr, entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, S_IRUGO, acpi_device_dir(device)); if (!entry) - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "Unable to create '%s' fs entry\n", - ACPI_PROCESSOR_FILE_POWER)); + return -EIO; else { entry->proc_fops = &acpi_processor_power_fops; entry->data = acpi_driver_data(device); @@ -1104,13 +1277,12 @@ int acpi_processor_power_init(struct acpi_processor *pr, pr->flags.power_setup_done = 1; - return_VALUE(0); + return 0; } int acpi_processor_power_exit(struct acpi_processor *pr, struct acpi_device *device) { - ACPI_FUNCTION_TRACE("acpi_processor_power_exit"); pr->flags.power_setup_done = 0; @@ -1128,7 +1300,10 @@ int acpi_processor_power_exit(struct acpi_processor *pr, * copies of pm_idle before proceeding. */ cpu_idle_wait(); +#ifdef CONFIG_SMP + unregister_latency_notifier(&acpi_processor_latency_notifier); +#endif } - return_VALUE(0); + return 0; }