分类: LINUX
2013-01-15 17:41:24
最近在看CPU如何动态调节频率的相关部分。系统会根据当前负载,以及温度情况来调节cpu的频率,以满足系统需求和省电。
cpu内部有温度传感器,当温度升高和降低到一定程度,都会出发中断,以此来调节频率(温度升高后,进行降频,温度降低后,如果系统仍需要更高频率,则把频率调高)。另外,系统会根据当前负载,进行调频。这个优先级比温度稍微低些,因为温度方面是通过中断来控制的。
首先,需要时刻更新当前需要的频率。
532 static struct cpufreq_driver omap_driver = {
533 .flags = CPUFREQ_STICKY,
534 .verify = omap_verify_speed,
535 .target = omap_target,
536 .get = omap_getspeed,
537 .init = omap_cpu_init,
538 .exit = omap_cpu_exit,
539 .name = "omap2plus",
540 .attr = omap_cpufreq_attr,
541 };
这里,最重要的函数omap_target中会进行频率调节(根据负载,调整当前频率为需要的频率),调用的地方呆会再看。先看看它的定义
267 static int omap_target(struct cpufreq_policy *policy,
268 unsigned int target_freq,
269 unsigned int relation)
270 {
271 unsigned int i;
272 int ret = 0;
273
274 if (!freq_table) {
275 dev_err(mpu_dev, "%s: cpu%d: no freq table!\n", __func__,
276 policy->cpu);
277 return -EINVAL;
278 }
279
280 ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
281 relation, &i);
282 if (ret) {
283 dev_dbg(mpu_dev, "%s: cpu%d: no freq match for %d(ret=%d)\n",
284 __func__, policy->cpu, target_freq, ret);
285 return ret;
286 }
287
288 mutex_lock(&omap_cpufreq_lock);
289
290 current_target_freq = freq_table[i].frequency;
291
292 if (!omap_cpufreq_suspended) {
293 #ifdef CONFIG_OMAP4_DPLL_CASCADING
294 if (cpu_is_omap44xx() && target_freq > policy->min)
295 omap4_dpll_cascading_blocker_hold(mpu_dev);
296 #endif
297 ret = omap_cpufreq_scale(current_target_freq, policy->cur);
298 #ifdef CONFIG_OMAP4_DPLL_CASCADING
299 if (cpu_is_omap44xx() && target_freq == policy->min)
300 omap4_dpll_cascading_blocker_release(mpu_dev);
301 #endif
302 }
303
304 mutex_unlock(&omap_cpufreq_lock);
305
306 return ret;
307 }
根据传入的target_freq值,cpufreq_frequency_table_target()会去查表(这个频率表是预先做好的)
得到表中的索引“i”, 然后给current_target_freq,这是个全局static变量。表示当前系统需求的频率,一般情况下,只要target_freq参数正确的话,current_target_freq会和他相同。接着调用omap_cpufreq_scale()进行实际的频率调节工作。注意整个调节过程都加了互斥锁omap_cpufreq_lock,是个唯一的锁,任何调节频率的地方都会使用到该锁。
现在看看omap_target()是怎么被调用的吧。omap_driver被注册到系统之后,
1417 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1418 unsigned int target_freq,
1419 unsigned int relation)
1420 {
1421 int retval = -EINVAL;
1422
1423 pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1424 target_freq, relation);
1425 if (cpu_online(policy->cpu) && cpufreq_driver->target)
1426 retval = cpufreq_driver->target(policy, target_freq, relation);
1427
1428 return retval;
1429 }
1430 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
然后,__cpufreq_driver_target被调用的地方就很多了,具体要和当前的cpu调节方式有关,比如interactive, ondemand, performace, consetvative, userspace, powersave等
这里分析interactive模式,这种策略根据需求进行调节,和ondemand有些相似。
在
static void cpufreq_interactive_freq_down(struct work_struct *work)
和
static int cpufreq_interactive_up_task(void *data)
中进行频率调节,从函数名字也可以看出来,一个是降频,一个是升频。另外,降频操作是在一个work里面做的,升频是个task,一个内核线程做的。
具体实现就不赘述了,总之都是在一个循环里面,不断的获取当前需要的频率(利用per_cpu()宏获取),然后进行降频/升频操作。
回到上面的current_target_freq变量,该变量还用于温度对频率的影响。这是一套sensor, governor, cooling device的机制,sensor采集和上报温度中断,governor进行管理和调度,并采用cooling_device进行降温(降频)。
看看是怎么做的吧
316 void omap_thermal_step_freq_down(void)
317 {
318 unsigned int cur;
319
320 if (!omap_cpufreq_ready) {
321 pr_warn_once("%s: Thermal throttle prior to CPUFREQ ready\n",
322 __func__);
323 return;
324 }
325
326 mutex_lock(&omap_cpufreq_lock);
327
328 max_thermal = omap_thermal_lower_speed();
329
330 pr_warn("%s: temperature too high, starting cpu throttling at max %u\n",
331 __func__, max_thermal);
332
333 if (!omap_cpufreq_suspended) {
334 cur = omap_getspeed(0);
335 if (cur > max_thermal)
336 omap_cpufreq_scale(max_thermal, cur);
337 }
338
339 mutex_unlock(&omap_cpufreq_lock);
340 }
341
//降频的时候一级一级往下将
342 void omap_thermal_step_freq_up(void)
343 {
344 unsigned int cur;
345
346 if (!omap_cpufreq_ready)
347 return;
348
349 mutex_lock(&omap_cpufreq_lock);
350
351 if (max_thermal == max_freq) {
352 pr_warn("%s: not throttling\n", __func__);
353 goto out;
354 }
355
356 max_thermal = max_freq;
357 cur = omap_getspeed(0);
358 if (current_target_freq > cur)
359 {
360 current_target_freq = omap_thermal_higher_speed();
361 }
362
363 pr_warn("%s: temperature reduced, stepping up to %i\n",
364 __func__, current_target_freq);
365
366 if (!omap_cpufreq_suspended) {
367 cur = omap_getspeed(0);
368 omap_cpufreq_scale(current_target_freq, cur);
369 }
370 out:
371 mutex_unlock(&omap_cpufreq_lock);
372 }
//升频的时候,现在也改为只升一级。升频的时候要用到current_target_freq
操作流程就是这样
381 static int cpufreq_apply_cooling(struct thermal_dev *dev,
382 int cooling_level)
383 {
384 if (cooling_level < current_cooling_level) {
385 pr_err("%s: Unthrottle cool level %i curr cool %i\n",
386 __func__, cooling_level, current_cooling_level);
387 omap_thermal_step_freq_up();
388 } else if (cooling_level > current_cooling_level) {
389 pr_err("%s: Throttle cool level %i curr cool %i\n",
390 __func__, cooling_level, current_cooling_level);
391 omap_thermal_step_freq_down();
392 }
393
394 current_cooling_level = cooling_level;
395
396 return 0;
397 }
426 static struct thermal_dev_ops cpufreq_cooling_ops = {
427 .cool_device = cpufreq_apply_cooling,
428 };
该coll_device注册进内核。当温度中断发生的时候会进行调用。既然分析了。就再分析下具体过程吧。
drivers/staging/thermal_framework/governor/omap_die_governor.c中
253 static int omap_enter_zone(struct omap_thermal_zone *zone,
254 bool set_cooling_level,
255 struct list_head *cooling_list, int cpu_temp)
256 {
257 int temp_upper;
258 int temp_lower;
259
260 if (list_empty(cooling_list)) {
261 pr_err("%s: No Cooling devices registered\n",
262 __func__);
263 return -ENODEV;
264 }
265
266 if (set_cooling_level) {
267 if (zone->cooling_increment)
268 omap_gov->cooling_level += zone->cooling_increment;
269 else
270 omap_gov->cooling_level = 0;
271 thermal_device_call_all(cooling_list, cool_device,
272 omap_gov->cooling_level); //这里将governor 和 cool_device联系起来
273 }
274 omap_gov->hotspot_temp_lower = zone->temp_lower;
275 omap_gov->hotspot_temp_upper = zone->temp_upper;
276 temp_lower = hotspot_temp_to_sensor_temp(omap_gov->hotspot_temp_lower);
277 temp_upper = hotspot_temp_to_sensor_temp(omap_gov->hotspot_temp_upper);
278 thermal_device_call(omap_gov->temp_sensor, set_temp_thresh, temp_lower,
279 temp_upper);
280 omap_update_report_rate(omap_gov->temp_sensor, zone->update_rate);
281 if (thermal_lookup_temp("pcb") >= 0)
282 omap_gov->average_period = zone->average_rate;
283
284 return 0;
285 }
omap_cpu_thermal_manager()函数中会调用omap_enter_zone()。omap_cpu_thermal_manager在omap_process_cpu_temp()中调用
498 static struct thermal_dev_ops omap_gov_ops = {
499 .process_temp = omap_process_cpu_temp,
500 };
183 int thermal_sensor_set_temp(struct thermal_dev *tdev)
184 {
185 struct thermal_domain *thermal_domain;
186 int ret = -ENODEV;
187
188 thermal_domain = tdev->domain;
189 if (!thermal_domain) {
190 pr_err("%s: device not part of a domain\n", __func__);
191 goto out;
192 }
193
194 if (list_empty(&thermal_domain->cooling_agents)) {
195 pr_err("%s: no cooling agents for domain %s\n",
196 __func__, thermal_domain->domain_name);
197 goto out;
198 }
199
200 ret = thermal_device_call(thermal_domain->governor, process_temp,
201 &thermal_domain->cooling_agents,
202 tdev, tdev->current_temp); //这里调用process_temp
203 if (ret < 0)
204 pr_debug("%s: governor does not have callback\n", __func__);
205 out:
206 return ret;
207 }
208 EXPORT_SYMBOL_GPL(thermal_sensor_set_temp);
209
static irqreturn_t omap_talert_irq_handler(int irq, void *data)中会调用thermal_sensor_set_temp。