OSDN Git Service

change x41t device tree into thinkpad for all thinkpads, merge latest changes from...
[android-x86/device-ibm-thinkpad.git] / phc-intel / phc-intel.c
1 /*
2  * acpi-cpufreq.c - ACPI Processor P-States Driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
8  *
9  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10  *
11  *  This program is free software; you can redistribute it and/or modify
12  *  it under the terms of the GNU General Public License as published by
13  *  the Free Software Foundation; either version 2 of the License, or (at
14  *  your option) any later version.
15  *
16  *  This program is distributed in the hope that it will be useful, but
17  *  WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with this program; if not, write to the Free Software Foundation, Inc.,
23  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
24  *
25  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26  */
27
28 /* This file has been patched with Linux PHC: www.linux-phc.org
29 * Patch version: linux-phc-0.3.2
30 */
31
32 #include <linux/kernel.h>
33 #include <linux/module.h>
34 #include <linux/init.h>
35 #include <linux/smp.h>
36 #include <linux/sched.h>
37 #include <linux/compiler.h>
38 #include <linux/dmi.h>
39 #include <linux/slab.h>
40 #include <linux/cpufreq.h>
41
42 #include <linux/acpi.h>
43 #include <linux/io.h>
44 #include <linux/delay.h>
45 #include <linux/uaccess.h>
46
47 #include <acpi/processor.h>
48
49 #include <asm/msr.h>
50 #include <asm/processor.h>
51 #include <asm/cpufeature.h>
52 #include "../drivers/cpufreq/mperf.h"
53
54 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
55 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
56 MODULE_LICENSE("GPL");
57
58 enum {
59         UNDEFINED_CAPABLE = 0,
60         SYSTEM_INTEL_MSR_CAPABLE,
61         SYSTEM_IO_CAPABLE,
62 };
63
64 #define INTEL_MSR_RANGE         (0xffff)
65 #define INTEL_MSR_VID_MASK      (0x00ff)
66 #define INTEL_MSR_FID_MASK      (0xff00)
67 #define INTEL_MSR_FID_SHIFT     (0x8)
68 #define PHC_VERSION_STRING      "0.3.2:2"
69
70 struct acpi_cpufreq_data {
71         struct acpi_processor_performance *acpi_data;
72         struct cpufreq_frequency_table *freq_table;
73         unsigned int resume;
74         unsigned int cpu_feature;
75         acpi_integer *original_controls;
76 };
77
78 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
79
80 /* acpi_perf_data is a pointer to percpu data. */
81 static struct acpi_processor_performance __percpu *acpi_perf_data;
82
83 static struct cpufreq_driver acpi_cpufreq_driver;
84
85 static unsigned int acpi_pstate_strict;
86
87 static int check_est_cpu(unsigned int cpuid)
88 {
89         struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
90
91         return cpu_has(cpu, X86_FEATURE_EST);
92 }
93
94 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
95 {
96         struct acpi_processor_performance *perf;
97         int i;
98
99         perf = data->acpi_data;
100
101         for (i = 0; i < perf->state_count; i++) {
102                 if (value == perf->states[i].status)
103                         return data->freq_table[i].frequency;
104         }
105         return 0;
106 }
107
108 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
109 {
110         int i;
111         u32 fid;
112         struct acpi_processor_performance *perf;
113
114         fid = msr & INTEL_MSR_FID_MASK;
115         perf = data->acpi_data;
116
117         for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
118                 if (fid == (perf->states[data->freq_table[i].index].status & INTEL_MSR_FID_MASK))
119                         return data->freq_table[i].frequency;
120         }
121         return data->freq_table[0].frequency;
122 }
123
124 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
125 {
126         switch (data->cpu_feature) {
127         case SYSTEM_INTEL_MSR_CAPABLE:
128                 return extract_msr(val, data);
129         case SYSTEM_IO_CAPABLE:
130                 return extract_io(val, data);
131         default:
132                 return 0;
133         }
134 }
135
136 struct msr_addr {
137         u32 reg;
138 };
139
140 struct io_addr {
141         u16 port;
142         u8 bit_width;
143 };
144
145 struct drv_cmd {
146         unsigned int type;
147         const struct cpumask *mask;
148         union {
149                 struct msr_addr msr;
150                 struct io_addr io;
151         } addr;
152         u32 val;
153 };
154
155 /* Called via smp_call_function_single(), on the target CPU */
156 static void do_drv_read(void *_cmd)
157 {
158         struct drv_cmd *cmd = _cmd;
159         u32 h;
160
161         switch (cmd->type) {
162         case SYSTEM_INTEL_MSR_CAPABLE:
163                 rdmsr(cmd->addr.msr.reg, cmd->val, h);
164                 break;
165         case SYSTEM_IO_CAPABLE:
166                 acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
167                                 &cmd->val,
168                                 (u32)cmd->addr.io.bit_width);
169                 break;
170         default:
171                 break;
172         }
173 }
174
175 /* Called via smp_call_function_many(), on the target CPUs */
176 static void do_drv_write(void *_cmd)
177 {
178         struct drv_cmd *cmd = _cmd;
179         u32 lo, hi;
180
181         switch (cmd->type) {
182         case SYSTEM_INTEL_MSR_CAPABLE:
183                 rdmsr(cmd->addr.msr.reg, lo, hi);
184                 lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
185                 wrmsr(cmd->addr.msr.reg, lo, hi);
186                 break;
187         case SYSTEM_IO_CAPABLE:
188                 acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
189                                 cmd->val,
190                                 (u32)cmd->addr.io.bit_width);
191                 break;
192         default:
193                 break;
194         }
195 }
196
197 static void drv_read(struct drv_cmd *cmd)
198 {
199         int err;
200         cmd->val = 0;
201
202         err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
203         WARN_ON_ONCE(err);      /* smp_call_function_any() was buggy? */
204 }
205
206 static void drv_write(struct drv_cmd *cmd)
207 {
208         int this_cpu;
209
210         this_cpu = get_cpu();
211         if (cpumask_test_cpu(this_cpu, cmd->mask))
212                 do_drv_write(cmd);
213         smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
214         put_cpu();
215 }
216
217 static u32 get_cur_val(const struct cpumask *mask)
218 {
219         struct acpi_processor_performance *perf;
220         struct drv_cmd cmd;
221
222         if (unlikely(cpumask_empty(mask)))
223                 return 0;
224
225         switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
226         case SYSTEM_INTEL_MSR_CAPABLE:
227                 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
228                 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
229                 break;
230         case SYSTEM_IO_CAPABLE:
231                 cmd.type = SYSTEM_IO_CAPABLE;
232                 perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
233                 cmd.addr.io.port = perf->control_register.address;
234                 cmd.addr.io.bit_width = perf->control_register.bit_width;
235                 break;
236         default:
237                 return 0;
238         }
239
240         cmd.mask = mask;
241         drv_read(&cmd);
242
243         pr_debug("get_cur_val = %u\n", cmd.val);
244
245         return cmd.val;
246 }
247
248 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
249 {
250         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
251         unsigned int freq;
252         unsigned int cached_freq;
253
254         pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
255
256         if (unlikely(data == NULL ||
257                      data->acpi_data == NULL || data->freq_table == NULL)) {
258                 return 0;
259         }
260
261         cached_freq = data->freq_table[data->acpi_data->state].frequency;
262         freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
263         if (freq != cached_freq) {
264                 /*
265                  * The dreaded BIOS frequency change behind our back.
266                  * Force set the frequency on next target call.
267                  */
268                 data->resume = 1;
269         }
270
271         pr_debug("cur freq = %u\n", freq);
272
273         return freq;
274 }
275
276 static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
277                                 struct acpi_cpufreq_data *data)
278 {
279         unsigned int cur_freq;
280         unsigned int i;
281
282         for (i = 0; i < 100; i++) {
283                 cur_freq = extract_freq(get_cur_val(mask), data);
284                 if (cur_freq == freq)
285                         return 1;
286                 udelay(10);
287         }
288         return 0;
289 }
290
291 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
292                                unsigned int target_freq, unsigned int relation)
293 {
294         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
295         struct acpi_processor_performance *perf;
296         struct cpufreq_freqs freqs;
297         struct drv_cmd cmd;
298         unsigned int next_state = 0; /* Index into freq_table */
299         unsigned int next_perf_state = 0; /* Index into perf table */
300         unsigned int i;
301         int result = 0;
302
303         pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
304
305         if (unlikely(data == NULL ||
306              data->acpi_data == NULL || data->freq_table == NULL)) {
307                 return -ENODEV;
308         }
309
310         perf = data->acpi_data;
311         result = cpufreq_frequency_table_target(policy,
312                                                 data->freq_table,
313                                                 target_freq,
314                                                 relation, &next_state);
315         if (unlikely(result)) {
316                 result = -ENODEV;
317                 goto out;
318         }
319
320         next_perf_state = data->freq_table[next_state].index;
321         if (perf->state == next_perf_state) {
322                 if (unlikely(data->resume)) {
323                         pr_debug("Called after resume, resetting to P%d\n",
324                                 next_perf_state);
325                         data->resume = 0;
326                 } else {
327                         pr_debug("Already at target state (P%d)\n",
328                                 next_perf_state);
329                         goto out;
330                 }
331         }
332
333         switch (data->cpu_feature) {
334         case SYSTEM_INTEL_MSR_CAPABLE:
335                 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
336                 cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
337                 cmd.val = (u32) perf->states[next_perf_state].control;
338                 break;
339         case SYSTEM_IO_CAPABLE:
340                 cmd.type = SYSTEM_IO_CAPABLE;
341                 cmd.addr.io.port = perf->control_register.address;
342                 cmd.addr.io.bit_width = perf->control_register.bit_width;
343                 cmd.val = (u32) perf->states[next_perf_state].control;
344                 break;
345         default:
346                 result = -ENODEV;
347                 goto out;
348         }
349
350         /* cpufreq holds the hotplug lock, so we are safe from here on */
351         if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
352                 cmd.mask = policy->cpus;
353         else
354                 cmd.mask = cpumask_of(policy->cpu);
355
356         freqs.old = perf->states[perf->state].core_frequency * 1000;
357         freqs.new = data->freq_table[next_state].frequency;
358         for_each_cpu(i, policy->cpus) {
359                 freqs.cpu = i;
360                 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
361         }
362
363         drv_write(&cmd);
364
365         if (acpi_pstate_strict) {
366                 if (!check_freqs(cmd.mask, freqs.new, data)) {
367                         pr_debug("acpi_cpufreq_target failed (%d)\n",
368                                 policy->cpu);
369                         result = -EAGAIN;
370                         goto out;
371                 }
372         }
373
374         for_each_cpu(i, policy->cpus) {
375                 freqs.cpu = i;
376                 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
377         }
378         perf->state = next_perf_state;
379
380 out:
381         return result;
382 }
383
384 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
385 {
386         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
387
388         pr_debug("acpi_cpufreq_verify\n");
389
390         return cpufreq_frequency_table_verify(policy, data->freq_table);
391 }
392
393 static unsigned long
394 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
395 {
396         struct acpi_processor_performance *perf = data->acpi_data;
397
398         if (cpu_khz) {
399                 /* search the closest match to cpu_khz */
400                 unsigned int i;
401                 unsigned long freq;
402                 unsigned long freqn = perf->states[0].core_frequency * 1000;
403
404                 for (i = 0; i < (perf->state_count-1); i++) {
405                         freq = freqn;
406                         freqn = perf->states[i+1].core_frequency * 1000;
407                         if ((2 * cpu_khz) > (freqn + freq)) {
408                                 perf->state = i;
409                                 return freq;
410                         }
411                 }
412                 perf->state = perf->state_count-1;
413                 return freqn;
414         } else {
415                 /* assume CPU is at P0... */
416                 perf->state = 0;
417                 return perf->states[0].core_frequency * 1000;
418         }
419 }
420
421 static void free_acpi_perf_data(void)
422 {
423         unsigned int i;
424
425         /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
426         for_each_possible_cpu(i)
427                 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
428                                  ->shared_cpu_map);
429         free_percpu(acpi_perf_data);
430 }
431
432 /*
433  * acpi_cpufreq_early_init - initialize ACPI P-States library
434  *
435  * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
436  * in order to determine correct frequency and voltage pairings. We can
437  * do _PDC and _PSD and find out the processor dependency for the
438  * actual init that will happen later...
439  */
440 static int __init acpi_cpufreq_early_init(void)
441 {
442         unsigned int i;
443         pr_debug("acpi_cpufreq_early_init\n");
444
445         acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
446         if (!acpi_perf_data) {
447                 pr_debug("Memory allocation error for acpi_perf_data.\n");
448                 return -ENOMEM;
449         }
450         for_each_possible_cpu(i) {
451                 if (!zalloc_cpumask_var_node(
452                         &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
453                         GFP_KERNEL, cpu_to_node(i))) {
454
455                         /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
456                         free_acpi_perf_data();
457                         return -ENOMEM;
458                 }
459         }
460
461         /* Do initialization in ACPI core */
462         acpi_processor_preregister_performance(acpi_perf_data);
463         return 0;
464 }
465
466 #ifdef CONFIG_SMP
467 /*
468  * Some BIOSes do SW_ANY coordination internally, either set it up in hw
469  * or do it in BIOS firmware and won't inform about it to OS. If not
470  * detected, this has a side effect of making CPU run at a different speed
471  * than OS intended it to run at. Detect it and handle it cleanly.
472  */
473 static int bios_with_sw_any_bug;
474
475 static int sw_any_bug_found(const struct dmi_system_id *d)
476 {
477         bios_with_sw_any_bug = 1;
478         return 0;
479 }
480
481 static const struct dmi_system_id sw_any_bug_dmi_table[] = {
482         {
483                 .callback = sw_any_bug_found,
484                 .ident = "Supermicro Server X6DLP",
485                 .matches = {
486                         DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
487                         DMI_MATCH(DMI_BIOS_VERSION, "080010"),
488                         DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
489                 },
490         },
491         { }
492 };
493
494 static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
495 {
496         /* Intel Xeon Processor 7100 Series Specification Update
497          * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
498          * AL30: A Machine Check Exception (MCE) Occurring during an
499          * Enhanced Intel SpeedStep Technology Ratio Change May Cause
500          * Both Processor Cores to Lock Up. */
501         if (c->x86_vendor == X86_VENDOR_INTEL) {
502                 if ((c->x86 == 15) &&
503                     (c->x86_model == 6) &&
504                     (c->x86_mask == 8)) {
505                         printk(KERN_INFO "acpi-cpufreq: Intel(R) "
506                             "Xeon(R) 7100 Errata AL30, processors may "
507                             "lock up on frequency changes: disabling "
508                             "acpi-cpufreq.\n");
509                         return -ENODEV;
510                     }
511                 }
512         return 0;
513 }
514 #endif
515
516 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
517 {
518         unsigned int i;
519         unsigned int valid_states = 0;
520         unsigned int cpu = policy->cpu;
521         struct acpi_cpufreq_data *data;
522         unsigned int result = 0;
523         struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
524         struct acpi_processor_performance *perf;
525 #ifdef CONFIG_SMP
526         static int blacklisted;
527 #endif
528
529         pr_debug("acpi_cpufreq_cpu_init\n");
530
531 #ifdef CONFIG_SMP
532         if (blacklisted)
533                 return blacklisted;
534         blacklisted = acpi_cpufreq_blacklist(c);
535         if (blacklisted)
536                 return blacklisted;
537 #endif
538
539         data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
540         if (!data)
541                 return -ENOMEM;
542
543         data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
544         per_cpu(acfreq_data, cpu) = data;
545
546         if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
547                 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
548
549         result = acpi_processor_register_performance(data->acpi_data, cpu);
550         if (result)
551                 goto err_free;
552
553         perf = data->acpi_data;
554         policy->shared_type = perf->shared_type;
555
556         /*
557          * Will let policy->cpus know about dependency only when software
558          * coordination is required.
559          */
560         if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
561             policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
562                 cpumask_copy(policy->cpus, perf->shared_cpu_map);
563         }
564         cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
565
566 #ifdef CONFIG_SMP
567         dmi_check_system(sw_any_bug_dmi_table);
568         if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
569                 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
570                 cpumask_copy(policy->cpus, cpu_core_mask(cpu));
571         }
572 #endif
573
574         /* capability check */
575         if (perf->state_count <= 1) {
576                 pr_debug("No P-States\n");
577                 result = -ENODEV;
578                 goto err_unreg;
579         }
580
581         if (perf->control_register.space_id != perf->status_register.space_id) {
582                 result = -ENODEV;
583                 goto err_unreg;
584         }
585
586         switch (perf->control_register.space_id) {
587         case ACPI_ADR_SPACE_SYSTEM_IO:
588                 pr_debug("SYSTEM IO addr space\n");
589                 data->cpu_feature = SYSTEM_IO_CAPABLE;
590                 break;
591         case ACPI_ADR_SPACE_FIXED_HARDWARE:
592                 pr_debug("HARDWARE addr space\n");
593                 if (!check_est_cpu(cpu)) {
594                         result = -ENODEV;
595                         goto err_unreg;
596                 }
597                 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
598                 break;
599         default:
600                 pr_debug("Unknown addr space %d\n",
601                         (u32) (perf->control_register.space_id));
602                 result = -ENODEV;
603                 goto err_unreg;
604         }
605
606         data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
607                     (perf->state_count+1), GFP_KERNEL);
608         if (!data->freq_table) {
609                 result = -ENOMEM;
610                 goto err_unreg;
611         }
612
613         /* detect transition latency */
614         policy->cpuinfo.transition_latency = 0;
615         for (i = 0; i < perf->state_count; i++) {
616                 if ((perf->states[i].transition_latency * 1000) >
617                     policy->cpuinfo.transition_latency)
618                         policy->cpuinfo.transition_latency =
619                             perf->states[i].transition_latency * 1000;
620         }
621
622         /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
623         if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
624             policy->cpuinfo.transition_latency > 20 * 1000) {
625                 policy->cpuinfo.transition_latency = 20 * 1000;
626                 printk_once(KERN_INFO
627                             "P-state transition latency capped at 20 uS\n");
628         }
629
630         /* table init */
631         for (i = 0; i < perf->state_count; i++) {
632                 if (i > 0 && perf->states[i].core_frequency >=
633                     data->freq_table[valid_states-1].frequency / 1000)
634                         continue;
635
636                 data->freq_table[valid_states].index = i;
637                 data->freq_table[valid_states].frequency =
638                     perf->states[i].core_frequency * 1000;
639                 valid_states++;
640         }
641         data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
642         perf->state = 0;
643
644         result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
645         if (result)
646                 goto err_freqfree;
647
648         if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
649                 printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
650
651         switch (perf->control_register.space_id) {
652         case ACPI_ADR_SPACE_SYSTEM_IO:
653                 /* Current speed is unknown and not detectable by IO port */
654                 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
655                 break;
656         case ACPI_ADR_SPACE_FIXED_HARDWARE:
657                 acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
658                 policy->cur = get_cur_freq_on_cpu(cpu);
659                 break;
660         default:
661                 break;
662         }
663
664         /* notify BIOS that we exist */
665         acpi_processor_notify_smm(THIS_MODULE);
666
667         /* Check for APERF/MPERF support in hardware */
668         if (cpu_has(c, X86_FEATURE_APERFMPERF))
669                 acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
670
671         pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
672         for (i = 0; i < perf->state_count; i++)
673                 pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
674                         (i == perf->state ? '*' : ' '), i,
675                         (u32) perf->states[i].core_frequency,
676                         (u32) perf->states[i].power,
677                         (u32) perf->states[i].transition_latency);
678
679         cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
680
681         /*
682          * the first call to ->target() should result in us actually
683          * writing something to the appropriate registers.
684          */
685         data->resume = 1;
686
687         return result;
688
689 err_freqfree:
690         kfree(data->freq_table);
691 err_unreg:
692         acpi_processor_unregister_performance(perf, cpu);
693 err_free:
694         kfree(data);
695         per_cpu(acfreq_data, cpu) = NULL;
696
697         return result;
698 }
699
700 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
701 {
702         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
703
704         pr_debug("acpi_cpufreq_cpu_exit\n");
705
706         if (data) {
707                 cpufreq_frequency_table_put_attr(policy->cpu);
708                 per_cpu(acfreq_data, policy->cpu) = NULL;
709                 acpi_processor_unregister_performance(data->acpi_data,
710                                                       policy->cpu);
711                 if (data->original_controls)
712                         kfree(data->original_controls);
713                 kfree(data->freq_table);
714                 kfree(data);
715         }
716
717         return 0;
718 }
719
720 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
721 {
722         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
723
724         pr_debug("acpi_cpufreq_resume\n");
725
726         data->resume = 1;
727
728         return 0;
729 }
730
731
732 /* sysfs interface to change operating points voltages */
733
734 static unsigned int extract_fid_from_control(unsigned int control)
735 {
736         return ((control & INTEL_MSR_FID_MASK) >> INTEL_MSR_FID_SHIFT);
737 }
738
739 static unsigned int extract_vid_from_control(unsigned int control)
740 {
741         return (control & INTEL_MSR_VID_MASK);
742 }
743
744
745 static bool check_cpu_control_capability(struct acpi_cpufreq_data *data) {
746  /* check if the cpu we are running on is capable of setting new control data
747   * 
748   */
749         if (unlikely(data == NULL || 
750                      data->acpi_data == NULL || 
751                      data->freq_table == NULL ||
752                      data->cpu_feature != SYSTEM_INTEL_MSR_CAPABLE)) {
753                 return false;
754         } else {
755                 return true;
756         };
757 }
758
759
760 static ssize_t check_origial_table (struct acpi_cpufreq_data *data)
761 {
762
763         struct acpi_processor_performance *acpi_data;
764         struct cpufreq_frequency_table *freq_table;
765         unsigned int state_index;
766
767         acpi_data = data->acpi_data;
768         freq_table = data->freq_table;
769
770         if (data->original_controls == NULL) {
771                 // Backup original control values
772                 data->original_controls = kcalloc(acpi_data->state_count,
773                                                   sizeof(acpi_integer), GFP_KERNEL);
774                 if (data->original_controls == NULL) {
775                         printk("failed to allocate memory for original control values\n");
776                         return -ENOMEM;
777                 }
778                 for (state_index = 0; state_index < acpi_data->state_count; state_index++) {
779                         data->original_controls[state_index] = acpi_data->states[state_index].control;
780                 }
781         }
782         return 0;
783 }
784
785 static ssize_t show_freq_attr_vids(struct cpufreq_policy *policy, char *buf)
786  /* display phc's voltage id's
787   * 
788   */
789 {
790         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
791         struct acpi_processor_performance *acpi_data;
792         struct cpufreq_frequency_table *freq_table;
793         unsigned int i;
794         unsigned int vid;
795         ssize_t count = 0;
796
797         if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
798
799         acpi_data = data->acpi_data;
800         freq_table = data->freq_table;
801
802         for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
803                 vid = extract_vid_from_control(acpi_data->states[freq_table[i].index].control);
804                 count += sprintf(&buf[count], "%u ", vid);
805         }
806         count += sprintf(&buf[count], "\n");
807
808         return count;
809 }
810
811 static ssize_t show_freq_attr_default_vids(struct cpufreq_policy *policy, char *buf)
812  /* display acpi's default voltage id's
813   * 
814   */
815 {
816         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
817         struct cpufreq_frequency_table *freq_table;
818         unsigned int i;
819         unsigned int vid;
820         ssize_t count = 0;
821         ssize_t retval;
822
823         if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
824
825         retval = check_origial_table(data);
826         if (0 != retval)
827                 return retval; 
828
829         freq_table = data->freq_table;
830
831         for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
832                 vid = extract_vid_from_control(data->original_controls[freq_table[i].index]);
833                 count += sprintf(&buf[count], "%u ", vid);
834         }
835         count += sprintf(&buf[count], "\n");
836
837         return count;
838 }
839
840 static ssize_t show_freq_attr_fids(struct cpufreq_policy *policy, char *buf)
841  /* display phc's frequeny id's
842   * 
843   */
844 {
845         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
846         struct acpi_processor_performance *acpi_data;
847         struct cpufreq_frequency_table *freq_table;
848         unsigned int i;
849         unsigned int fid;
850         ssize_t count = 0;
851
852         if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
853
854         acpi_data = data->acpi_data;
855         freq_table = data->freq_table;
856
857         for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
858                 fid = extract_fid_from_control(acpi_data->states[freq_table[i].index].control);
859                 count += sprintf(&buf[count], "%u ", fid);
860         }
861         count += sprintf(&buf[count], "\n");
862
863         return count;
864 }
865
866 static ssize_t show_freq_attr_controls(struct cpufreq_policy *policy, char *buf)
867  /* display phc's controls for the cpu (frequency id's and related voltage id's)
868   * 
869   */
870 {
871         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
872         struct acpi_processor_performance *acpi_data;
873         struct cpufreq_frequency_table *freq_table;
874         unsigned int i;
875         unsigned int fid;
876         unsigned int vid;
877         ssize_t count = 0;
878
879         if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
880
881         acpi_data = data->acpi_data;
882         freq_table = data->freq_table;
883
884         for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
885                 fid = extract_fid_from_control(acpi_data->states[freq_table[i].index].control);
886                 vid = extract_vid_from_control(acpi_data->states[freq_table[i].index].control);
887                 count += sprintf(&buf[count], "%u:%u ", fid, vid);
888         }
889         count += sprintf(&buf[count], "\n");
890
891         return count;
892 }
893
894 static ssize_t show_freq_attr_default_controls(struct cpufreq_policy *policy, char *buf)
895  /* display acpi's default controls for the cpu (frequency id's and related voltage id's)
896   * 
897   */
898 {
899         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
900         struct cpufreq_frequency_table *freq_table;
901         unsigned int i;
902         unsigned int fid;
903         unsigned int vid;
904         ssize_t count = 0;
905         ssize_t retval;
906
907         if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
908
909         retval = check_origial_table(data);
910         if (0 != retval)
911                 return retval; 
912
913         freq_table = data->freq_table;
914
915         for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
916                 fid = extract_fid_from_control(data->original_controls[freq_table[i].index]);
917                 vid = extract_vid_from_control(data->original_controls[freq_table[i].index]);
918                 count += sprintf(&buf[count], "%u:%u ", fid, vid);
919         }
920         count += sprintf(&buf[count], "\n");
921
922         return count;
923 }
924
925
926 static ssize_t store_freq_attr_vids(struct cpufreq_policy *policy, const char *buf, size_t count)
927  /* store the voltage id's for the related frequency
928   * We are going to do some sanity checks here to prevent users 
929   * from setting higher voltages than the default one.
930   */
931 {
932         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
933         struct acpi_processor_performance *acpi_data;
934         struct cpufreq_frequency_table *freq_table;
935         unsigned int freq_index;
936         unsigned int state_index;
937         unsigned int new_vid;
938         unsigned int original_vid;
939         unsigned int new_control;
940         unsigned int original_control;
941         const char *curr_buf = buf;
942         char *next_buf;
943         ssize_t retval;
944
945         if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
946
947         retval = check_origial_table(data);
948         if (0 != retval)
949                 return retval; 
950
951         acpi_data = data->acpi_data;
952         freq_table = data->freq_table;
953
954         /* for each value taken from the sysfs interfalce (phc_vids) get entrys and convert them to unsigned long integers*/
955         for (freq_index = 0; freq_table[freq_index].frequency != CPUFREQ_TABLE_END; freq_index++) {
956                 new_vid = simple_strtoul(curr_buf, &next_buf, 10);
957                 if (next_buf == curr_buf) {
958                         if ((curr_buf - buf == count - 1) && (*curr_buf == '\n')) {   //end of line?
959                                 curr_buf++;
960                                 break;
961                         }
962                         //if we didn't got end of line but there is nothing more to read something went wrong...
963                         printk("failed to parse vid value at %i (%s)\n", freq_index, curr_buf);
964                         return -EINVAL;
965                 }
966
967                 state_index = freq_table[freq_index].index;
968                 original_control = data->original_controls[state_index];
969                 original_vid = original_control & INTEL_MSR_VID_MASK;
970                 
971                 /* before we store the values we do some checks to prevent 
972                  * users to set up values higher than the default one
973                  */
974                 if (new_vid <= original_vid) {
975                         new_control = (original_control & ~INTEL_MSR_VID_MASK) | new_vid;
976                         printk("setting control at %i to %x (default is %x)\n",
977                                 freq_index, new_control, original_control);
978                         acpi_data->states[state_index].control = new_control;
979
980                 } else {
981                         printk("skipping vid at %i, %u is greater than default %u\n",
982                                freq_index, new_vid, original_vid);
983                 }
984
985                 curr_buf = next_buf;
986                 /* jump over value seperators (space or comma).
987                  * There could be more than one space or comma character
988                  * to separate two values so we better do it using a loop.
989                  */
990                 while ((curr_buf - buf < count) && ((*curr_buf == ' ') || (*curr_buf == ','))) {
991                         curr_buf++;
992                 }
993         }
994
995         /* set new voltage for current frequency */
996         data->resume = 1;
997         acpi_cpufreq_target(policy, get_cur_freq_on_cpu(policy->cpu), CPUFREQ_RELATION_L);
998
999         return curr_buf - buf;
1000 }
1001
1002 static ssize_t store_freq_attr_controls(struct cpufreq_policy *policy, const char *buf, size_t count)
1003  /* store the controls (frequency id's and related voltage id's)
1004   * We are going to do some sanity checks here to prevent users 
1005   * from setting higher voltages than the default one.
1006   */
1007 {
1008         struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
1009         struct acpi_processor_performance *acpi_data;
1010         struct cpufreq_frequency_table *freq_table;
1011         const char   *curr_buf;
1012         unsigned int  op_count;
1013         unsigned int  state_index;
1014         int           isok;
1015         char         *next_buf;
1016         ssize_t       retval;
1017         unsigned int  new_vid;
1018         unsigned int  original_vid;
1019         unsigned int  new_fid;
1020         unsigned int  old_fid;
1021         unsigned int  original_control;
1022         unsigned int  old_control;
1023         unsigned int  new_control;
1024         int           found;
1025
1026         if (!check_cpu_control_capability(data)) return -ENODEV;
1027
1028         retval = check_origial_table(data);
1029         if (0 != retval)
1030                 return retval;
1031
1032         acpi_data = data->acpi_data;
1033         freq_table = data->freq_table;
1034
1035         op_count = 0;
1036         curr_buf = buf;
1037         next_buf = NULL;
1038         isok     = 1;
1039         
1040         while ( (isok) && (curr_buf != NULL) )
1041         {
1042                 op_count++;
1043                 // Parse fid
1044                 new_fid = simple_strtoul(curr_buf, &next_buf, 10);
1045                 if ((next_buf != curr_buf) && (next_buf != NULL))
1046                 {
1047                         // Parse separator between frequency and voltage 
1048                         curr_buf = next_buf;
1049                         next_buf = NULL;
1050                         if (*curr_buf==':')
1051                         {
1052                                 curr_buf++;
1053                                 // Parse vid
1054                                 new_vid = simple_strtoul(curr_buf, &next_buf, 10);
1055                                 if ((next_buf != curr_buf) && (next_buf != NULL))
1056                                 {
1057                                         found = 0;
1058                                         for (state_index = 0; state_index < acpi_data->state_count; state_index++) {
1059                                                 old_control = acpi_data->states[state_index].control;
1060                                                 old_fid = extract_fid_from_control(old_control);
1061                                                 if (new_fid == old_fid)
1062                                                 {
1063                                                         found = 1;
1064                                                         original_control = data->original_controls[state_index];
1065                                                         original_vid = extract_vid_from_control(original_control);
1066                                                         if (new_vid <= original_vid)
1067                                                         {
1068                                                                 new_control = (original_control & ~INTEL_MSR_VID_MASK) | new_vid;
1069                                                                 printk("setting control at %i to %x (default is %x)\n",
1070                                                                         state_index, new_control, original_control);
1071                                                                 acpi_data->states[state_index].control = new_control;
1072
1073                                                         } else {
1074                                                                 printk("skipping vid at %i, %u is greater than default %u\n",
1075                                                                        state_index, new_vid, original_vid);
1076                                                         }
1077                                                 }
1078                                         }
1079
1080                                         if (found == 0)
1081                                         {
1082                                                 printk("operating point # %u not found (FID = %u)\n", op_count, new_fid);
1083                                                 isok = 0;
1084                                         }
1085
1086                                         // Parse seprator before next operating point, if any
1087                                         curr_buf = next_buf;
1088                                         next_buf = NULL;
1089                                         if ((*curr_buf == ',') || (*curr_buf == ' '))
1090                                                 curr_buf++;
1091                                         else
1092                                                 curr_buf = NULL;
1093                                 }
1094                                 else
1095                                 {
1096                                         printk("failed to parse VID of operating point # %u (%s)\n", op_count, curr_buf);
1097                                         isok = 0;
1098                                 }
1099                         }
1100                         else
1101                         {
1102                                 printk("failed to parse operating point # %u (%s)\n", op_count, curr_buf);
1103                                 isok = 0;
1104                         }
1105                 }
1106                 else
1107                 {
1108                         printk("failed to parse FID of operating point # %u (%s)\n", op_count, curr_buf);
1109                         isok = 0;
1110                 }
1111         }
1112
1113         if (isok)
1114         {
1115                 retval = count;
1116                 /* set new voltage at current frequency */
1117                 data->resume = 1;
1118                 acpi_cpufreq_target(policy, get_cur_freq_on_cpu(policy->cpu), CPUFREQ_RELATION_L);
1119         }
1120         else
1121         {
1122                 retval = -EINVAL;
1123         }
1124
1125         return retval;
1126 }
1127
1128 static ssize_t show_freq_attr_phc_version(struct cpufreq_policy *policy, char *buf)
1129  /* print out the phc version string set at the beginning of that file
1130   */
1131 {
1132         ssize_t count = 0;
1133         count += sprintf(&buf[count], "%s\n", PHC_VERSION_STRING);
1134         return count;
1135 }
1136
1137
1138
1139 static struct freq_attr cpufreq_freq_attr_phc_version =
1140 {
1141         /*display phc's version string*/
1142        .attr = { .name = "phc_version", .mode = 0444/*, .owner = THIS_MODULE */},
1143        .show = show_freq_attr_phc_version,
1144        .store = NULL,
1145 };
1146
1147 static struct freq_attr cpufreq_freq_attr_vids =
1148 {
1149         /*display phc's voltage id's for the cpu*/
1150        .attr = { .name = "phc_vids", .mode = 0644/*, .owner = THIS_MODULE */},
1151        .show = show_freq_attr_vids,
1152        .store = store_freq_attr_vids,
1153 };
1154
1155 static struct freq_attr cpufreq_freq_attr_default_vids =
1156 {
1157         /*display acpi's default frequency id's for the cpu*/
1158        .attr = { .name = "phc_default_vids", .mode = 0444/*, .owner = THIS_MODULE */},
1159        .show = show_freq_attr_default_vids,
1160        .store = NULL,
1161 };
1162
1163 static struct freq_attr cpufreq_freq_attr_fids =
1164 {
1165         /*display phc's default frequency id's for the cpu*/
1166        .attr = { .name = "phc_fids", .mode = 0444/*, .owner = THIS_MODULE */},
1167        .show = show_freq_attr_fids,
1168        .store = NULL,
1169 };
1170
1171 static struct freq_attr cpufreq_freq_attr_controls =
1172 {
1173         /*display phc's current voltage/frequency controls for the cpu*/
1174        .attr = { .name = "phc_controls", .mode = 0644/*, .owner = THIS_MODULE */},
1175        .show = show_freq_attr_controls,
1176        .store = store_freq_attr_controls,
1177 };
1178
1179 static struct freq_attr cpufreq_freq_attr_default_controls =
1180 {
1181         /*display acpi's default voltage/frequency controls for the cpu*/
1182        .attr = { .name = "phc_default_controls", .mode = 0444/*, .owner = THIS_MODULE */},
1183        .show = show_freq_attr_default_controls,
1184        .store = NULL,
1185 };
1186
1187 static struct freq_attr *acpi_cpufreq_attr[] = {
1188         &cpufreq_freq_attr_scaling_available_freqs,
1189         &cpufreq_freq_attr_phc_version,
1190         &cpufreq_freq_attr_vids,
1191         &cpufreq_freq_attr_default_vids,
1192         &cpufreq_freq_attr_fids,
1193         &cpufreq_freq_attr_controls,
1194         &cpufreq_freq_attr_default_controls,
1195         NULL,
1196 };
1197
1198 static struct cpufreq_driver acpi_cpufreq_driver = {
1199         .verify         = acpi_cpufreq_verify,
1200         .target         = acpi_cpufreq_target,
1201         .bios_limit     = acpi_processor_get_bios_limit,
1202         .init           = acpi_cpufreq_cpu_init,
1203         .exit           = acpi_cpufreq_cpu_exit,
1204         .resume         = acpi_cpufreq_resume,
1205         .name           = "acpi-cpufreq",
1206         .owner          = THIS_MODULE,
1207         .attr           = acpi_cpufreq_attr,
1208 };
1209
1210 static int __init acpi_cpufreq_init(void)
1211 {
1212         int ret;
1213
1214         if (acpi_disabled)
1215                 return 0;
1216
1217         pr_debug("acpi_cpufreq_init\n");
1218
1219         ret = acpi_cpufreq_early_init();
1220         if (ret)
1221                 return ret;
1222
1223         ret = cpufreq_register_driver(&acpi_cpufreq_driver);
1224         if (ret)
1225                 free_acpi_perf_data();
1226
1227         return ret;
1228 }
1229
1230 static void __exit acpi_cpufreq_exit(void)
1231 {
1232         pr_debug("acpi_cpufreq_exit\n");
1233
1234         cpufreq_unregister_driver(&acpi_cpufreq_driver);
1235
1236         free_acpi_perf_data();
1237 }
1238
1239 module_param(acpi_pstate_strict, uint, 0644);
1240 MODULE_PARM_DESC(acpi_pstate_strict,
1241         "value 0 or non-zero. non-zero -> strict ACPI checks are "
1242         "performed during frequency changes.");
1243
1244 static int param_set_phc_controls(const char *controls, struct kernel_param *kp)
1245 {
1246         int cpu;
1247
1248         for_each_possible_cpu(cpu) {
1249                 store_freq_attr_controls(cpufreq_cpu_get(cpu), controls, 0);
1250         }
1251         return 0;
1252 }
1253
1254 module_param_call(phc_controls, param_set_phc_controls, NULL, NULL, 0644);
1255 __MODULE_PARM_TYPE(phc_controls, "string");
1256 MODULE_PARM_DESC(phc_controls, "Set initial phc_controls");
1257
1258 late_initcall(acpi_cpufreq_init);
1259 module_exit(acpi_cpufreq_exit);
1260
1261 MODULE_ALIAS("acpi");