Chinaunix首页 | 论坛 | 博客
  • 博客访问: 146424
  • 博文数量: 54
  • 博客积分: 2682
  • 博客等级: 少校
  • 技术积分: 580
  • 用 户 组: 普通用户
  • 注册时间: 2009-10-24 20:56
文章分类
文章存档

2012年(2)

2011年(10)

2010年(28)

2009年(14)

我的朋友

分类: LINUX

2011-12-30 22:26:38

Recently I have modified the linux 2.6.9 process scheduling a little bit, so every process, which is under Fair Share Scheduling Policy, will get a time slice equal to other users. That is to say, each user in the FSSP will get equal time slice, and each process in one user will get equal time slice, too.

Maybe the diff file is the most clear one to explain this.

  1. diff -Naur linux-2.6.9/include/linux/fssp.h linux-2.6.9lab3/include/linux/fssp.h
  2. --- linux-2.6.9/include/linux/fssp.h    1969-12-31 19:00:00.000000000 -0500
  3. +++ linux-2.6.9lab3/include/linux/fssp.h    2011-12-29 21:19:19.000000000 -0500
  4. @@ -0,0 +1,22 @@
  5. +/*
  6. + * this file is fssp's head file, see kernel/fssp.c
  7. + *
  8. + * author: dachuan
  9. + * hdc1112@gmail.com
  10. + * 12/29/2011
  11. + */
  12. +
  13. +#ifndef _LINUX_FSSP_H
  14. +#define _LINUX_FSSP_H
  15. +
  16. +#ifdef __KERNEL__
  17. +
  18. +#define PREFIX "FSSP: "
  19. +
  20. +extern void fssp_contribute(task_t *p);
  21. +extern unsigned int fssp_share(task_t *p);
  22. +extern void fssp_quit(task_t *p, unsigned int new_policy);
  23. +
  24. +#endif
  25. +
  26. +#endif
  27. diff -Naur linux-2.6.9/include/linux/sched.h linux-2.6.9lab3/include/linux/sched.h
  28. --- linux-2.6.9/include/linux/sched.h    2004-10-18 17:53:13.000000000 -0400
  29. +++ linux-2.6.9lab3/include/linux/sched.h    2011-12-28 22:53:31.000000000 -0500
  30. @@ -126,6 +126,9 @@
  31.  #define SCHED_NORMAL        0
  32.  #define SCHED_FIFO        1
  33.  #define SCHED_RR        2
  34. +// fair-share scheduling policy, all users share the same amount of time
  35. +// and all the processes of one user share this time equally
  36. +#define SCHED_FSSP        3
  37.  
  38.  struct sched_param {
  39.      int sched_priority;
  40. diff -Naur linux-2.6.9/kernel/exit.c linux-2.6.9lab3/kernel/exit.c
  41. --- linux-2.6.9/kernel/exit.c    2004-10-18 17:55:06.000000000 -0400
  42. +++ linux-2.6.9lab3/kernel/exit.c    2011-12-29 21:45:02.937556456 -0500
  43. @@ -30,6 +30,10 @@
  44.  #include <asm/pgtable.h>
  45.  #include <asm/mmu_context.h>
  46.  
  47. +// fssp
  48. +#include <linux/fssp.h>
  49. +#include <linux/sched.h>
  50. +
  51.  extern void sem_exit (void);
  52.  extern struct task_struct *child_reaper;
  53.  
  54. @@ -829,6 +833,12 @@
  55.      mpol_free(tsk->mempolicy);
  56.      tsk->mempolicy = NULL;
  57.  #endif
  58. +    // a process is dying, call fssp_quit() to get his body out of fssp
  59. +    if (current->policy == SCHED_FSSP)
  60. +    {
  61. +        fssp_quit(current, SCHED_NORMAL);
  62. +        printk(KERN_WARNING "fssp: " "process %d quit from fssp\n", current->pid);
  63. +    }
  64.      schedule();
  65.      BUG();
  66.      /* Avoid "noreturn function does return". */
  67. diff -Naur linux-2.6.9/kernel/fssp.c linux-2.6.9lab3/kernel/fssp.c
  68. --- linux-2.6.9/kernel/fssp.c    1969-12-31 19:00:00.000000000 -0500
  69. +++ linux-2.6.9lab3/kernel/fssp.c    2011-12-29 21:59:36.723720768 -0500
  70. @@ -0,0 +1,253 @@
  71. +/*
  72. + * this file is about fair-share scheduling policy,
  73. + * and fssp is short for this appalling name.
  74. + *
  75. + * author: dachuan
  76. + * hdc1112@gmail.com
  77. + * 12/28/2011
  78. + */
  79. +
  80. +// some thinking:
  81. +// 1nd, it seems that this code needs locking, ignore temporarily;
  82. +// 2rd, it seems we need CONFIG to turn on/off FSSP, ignore temporarily;
  83. +// 3th, it seems we need EINVAD or related things, ignore temporariy;
  84. +// 4th, it seems there is a potential performance bottleneck, when
  85. +// the managed processes' number becomes larger, we need to traverse
  86. +// the whole linked-list to find the process. this may be solved
  87. +// by hash_table, but native linux doens't provide hash_table, so
  88. +// ignore temporarily.
  89. +
  90. +#include <linux/list.h>
  91. +#include <linux/types.h>
  92. +#include <linux/sched.h>
  93. +#include <linux/slab.h>
  94. +#include <linux/kernel.h>
  95. +#include <linux/fssp.h>
  96. +#include <asm-generic/bug.h>
  97. +
  98. +// the sum of all managed processes' time slices.
  99. +static unsigned int total;
  100. +
  101. +// main struct for fair-share scheduling policy.
  102. +struct fssp
  103. +{
  104. +    int users;    // the number of users under management
  105. +    struct list_head user_list;    // list head of the users, see struct fssp_user
  106. +};
  107. +
  108. +// struct for a user, it is linked to struct fssp.users.
  109. +struct fssp_user
  110. +{
  111. +    uid_t uid;    // uid of the user
  112. +    int processes;    // the number of processes under management of this user
  113. +    struct list_head user_list;    // linked to struct fssp.user_list, see struct fssp
  114. +    struct list_head proc_list;    // list head of the processes, see struct fssp_process
  115. +};
  116. +
  117. +// struct for a process, it is linked to struct fssp_user.processes.
  118. +struct fssp_process
  119. +{
  120. +    unsigned int latest_contri;    // latest time slice this process contributed, this is only used in fssp_quit()
  121. +    task_t *p;    // task_struct pointer to the process
  122. +    struct list_head proc_list;    // just linked to struct fssp_process, or struct fssp_user.processes_list
  123. +};
  124. +
  125. +// init the fssp struct.
  126. +static struct fssp _fssp =
  127. +{
  128. +    0, LIST_HEAD_INIT(_fssp.user_list)
  129. +};
  130. +
  131. +#define FSSP_USER_EXIST    0
  132. +#define FSSP_USER_CREAT 1
  133. +#define FSSP_PROCESS_EXIST    0
  134. +#define FSSP_PROCESS_CREAT    1
  135. +
  136. +// find the fssp_user struct, return NULL if there isn't.
  137. +static struct fssp_user *_fssp_find_user(uid_t uid)
  138. +{
  139. +    struct fssp_user *pos;
  140. +    
  141. +    // walk the whole list to find this process's user struct, see struct fssp_user.
  142. +    list_for_each_entry(pos, &_fssp.user_list, user_list)
  143. +    {
  144. +        if (pos->uid == uid)
  145. +            return pos;
  146. +    }
  147. +    return NULL;
  148. +}
  149. +
  150. +// find the fssp_user struct, create a new one if there isn't.
  151. +static struct fssp_user *fssp_find_user(uid_t uid, int *exist)
  152. +{
  153. +    struct fssp_user *pos = _fssp_find_user(uid);
  154. +
  155. +    // if we didn't find it, create a new entry for it.
  156. +    if (unlikely(!pos))
  157. +    {
  158. +        if (!(pos = kmalloc(sizeof(*pos), GFP_ATOMIC)))
  159. +        {
  160. +            printk(KERN_ERR PREFIX "kmalloc struct fssp_user failed\n");
  161. +            return NULL;
  162. +        }    
  163. +        //*pos = { uid, 0, LIST_HEAD_INIT(pos->proc_list) };
  164. +        pos->uid = uid;
  165. +        pos->processes = 0;
  166. +        INIT_LIST_HEAD(&pos->proc_list);
  167. +        list_add(&pos->user_list, &_fssp.user_list);
  168. +        
  169. +        _fssp.users++;
  170. +
  171. +        if (exist)
  172. +            *exist = FSSP_USER_CREAT;
  173. +    }
  174. +    else
  175. +    {
  176. +        if (exist)
  177. +            *exist = FSSP_USER_EXIST;
  178. +    }
  179. +
  180. +    return pos;
  181. +}
  182. +
  183. +// find the fssp_process struct, return NULL if there isn't.
  184. +static struct fssp_process *_fssp_find_process(struct fssp_user *user, task_t *p)
  185. +{
  186. +    struct fssp_process *pos;
  187. +    
  188. +    // walk the whole list to find this process's fssp_process, see struct fssp_process.
  189. +    list_for_each_entry(pos, &user->proc_list, proc_list)
  190. +    {
  191. +        if (pos->p == p)
  192. +            return pos;
  193. +    }
  194. +
  195. +    return NULL;
  196. +}
  197. +
  198. +// find the fssp_process struct, create a new one if there isn't.
  199. +// notice, we didn't put p->time_slice into pos->latest_time_slice here,
  200. +// it is supposed to be fssp_contribute()'s job
  201. +static struct fssp_process *fssp_find_process(struct fssp_user *user, task_t *p, int *exist)
  202. +{
  203. +    struct fssp_process *pos = _fssp_find_process(user, p);
  204. +    
  205. +    if (unlikely(!pos))
  206. +    {
  207. +        if (!(pos = kmalloc(sizeof(*pos), GFP_ATOMIC)))
  208. +        {
  209. +            printk(KERN_ERR PREFIX "kmalloc struct fssp_process failed\n");
  210. +            return NULL;
  211. +        }
  212. +        pos->latest_contri = 0;
  213. +        pos->p = p;
  214. +        list_add(&pos->proc_list, &user->proc_list);
  215. +
  216. +        user->processes++;
  217. +
  218. +        if (exist)
  219. +            *exist = FSSP_PROCESS_CREAT;
  220. +    }
  221. +    else
  222. +    {
  223. +        if (exist)
  224. +            *exist = FSSP_PROCESS_EXIST;
  225. +    }
  226. +
  227. +    return pos;
  228. +}
  229. +
  230. +static void _fssp_contribute(task_t *p, unsigned int time_contri)
  231. +{
  232. +    struct fssp_user *pos;
  233. +    struct fssp_process *pos2;
  234. +
  235. +    // just return if this process is not under fssp's management.
  236. +    if (unlikely(p->policy != SCHED_FSSP))
  237. +        return;
  238. +
  239. +    if (!(pos = fssp_find_user(p->uid, NULL)))
  240. +        return;
  241. +    // now, pos points to the right fssp_user.
  242. +
  243. +    if (!(pos2 = fssp_find_process(pos, p, NULL)))
  244. +    {
  245. +        if (pos->processes == 0)
  246. +        {
  247. +            list_del(&pos->user_list);
  248. +            kfree(pos);
  249. +            _fssp.users--;
  250. +        }
  251. +        return;
  252. +    }
  253. +    // now, pos2 points to the right fssp_process.
  254. +
  255. +    total = total - pos2->latest_contri + time_contri;
  256. +    pos2->latest_contri = time_contri;
  257. +}
  258. +
  259. +// a process's time slice should be calcuated by native kernel before calling this function.
  260. +// notice, if a process's policy has been changed to SCHED_FSSP, he doesn't necessarily show
  261. +// up in the struct fssp, because if he doesn't share or contribute time slice, he is not
  262. +// entitled to get a place here.
  263. +void fssp_contribute(task_t *p)
  264. +{
  265. +    _fssp_contribute(p, p->time_slice);
  266. +}
  267. +
  268. +static unsigned long t = 0;
  269. +// usually called right after fssp_contribute(), get his share.
  270. +// return: time_slice he ought to get.
  271. +unsigned int fssp_share(task_t *p)
  272. +{
  273. +    struct fssp_user *pos;
  274. +    // int user_exist;
  275. +    struct fssp_process *pos2;
  276. +    int process_exist;
  277. +
  278. +    if (unlikely(p->policy != SCHED_FSSP))
  279. +        return 0;    // so a wrong request get 0 timeslice
  280. +
  281. +
  282. +    pos = fssp_find_user(p->uid, NULL);
  283. +    if (!pos)
  284. +        return 0;    // so if kmalloc error, he gets a 0 timeslice
  285. +
  286. +    pos2 = fssp_find_process(pos, p, &process_exist);
  287. +    if (!pos2)
  288. +        return 0;    // so if kmalloc error, he gets a 0 timeslice
  289. +    if (process_exist == FSSP_PROCESS_CREAT)
  290. +        printk(KERN_WARNING PREFIX "a process, who never contributed before" \
  291. +                     " wants to share timeslice\n");
  292. +
  293. +    return total / _fssp.users / pos->processes;
  294. +}
  295. +
  296. +void fssp_quit(task_t *p, unsigned int new_policy)
  297. +{
  298. +    struct fssp_user *pos;
  299. +    struct fssp_process *pos2;
  300. +
  301. +    if (unlikely(p->policy != SCHED_FSSP))
  302. +        return;
  303. +
  304. +    BUG_ON(!(new_policy == SCHED_NORMAL || \
  305. + new_policy == SCHED_FIFO || \
  306. + new_policy == SCHED_RR));
  307. +    p->policy = new_policy;
  308. +
  309. +    if (!(pos = _fssp_find_user(p->uid)))
  310. +        return;
  311. +    if (!(pos2 = _fssp_find_process(pos, p)))
  312. +        return;
  313. +
  314. +    total -= pos2->latest_contri;
  315. +    list_del(&pos2->proc_list);
  316. +    kfree(pos2);
  317. +    if (!--pos->processes)
  318. +    {
  319. +        list_del(&pos->user_list);
  320. +        kfree(pos);
  321. +        _fssp.users--;
  322. +    }
  323. +}
  324. diff -Naur linux-2.6.9/kernel/Makefile linux-2.6.9lab3/kernel/Makefile
  325. --- linux-2.6.9/kernel/Makefile    2004-10-18 17:53:43.000000000 -0400
  326. +++ linux-2.6.9lab3/kernel/Makefile    2011-12-29 13:45:27.000000000 -0500
  327. @@ -7,7 +7,7 @@
  328.       sysctl.o capability.o ptrace.o timer.o user.o \
  329.       signal.o sys.o kmod.o workqueue.o pid.o \
  330.       rcupdate.o intermodule.o extable.o params.o posix-timers.o \
  331. -     kthread.o
  332. +     kthread.o fssp.o
  333.  
  334.  obj-$(CONFIG_FUTEX) += futex.o
  335.  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
  336. diff -Naur linux-2.6.9/kernel/sched.c linux-2.6.9lab3/kernel/sched.c
  337. --- linux-2.6.9/kernel/sched.c    2004-10-18 17:54:55.000000000 -0400
  338. +++ linux-2.6.9lab3/kernel/sched.c    2011-12-29 21:20:10.000000000 -0500
  339. @@ -47,6 +47,9 @@
  340.  
  341.  #include <asm/unistd.h>
  342.  
  343. +// fssp.h
  344. +#include <linux/fssp.h>
  345. +
  346.  #ifdef CONFIG_NUMA
  347.  #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
  348.  #else
  349. @@ -1310,6 +1313,12 @@
  350.       */
  351.      local_irq_disable();
  352.      p->time_slice = (current->time_slice + 1) >> 1;
  353. +    // fssp
  354. +    if (p->policy == SCHED_FSSP)
  355. +    {
  356. +        fssp_contribute(p);
  357. +        p->time_slice = fssp_share(p);
  358. +    }
  359.      /*
  360.       * The remainder of the first timeslice might be recovered by
  361.       * the parent if the child exits early enough.
  362. @@ -2452,6 +2461,12 @@
  363.          set_tsk_need_resched(p);
  364.          p->prio = effective_prio(p);
  365.          p->time_slice = task_timeslice(p);
  366. +        // fssp
  367. +        if (p->policy == SCHED_FSSP)
  368. +        {
  369. +            fssp_contribute(p);
  370. +            p->time_slice = fssp_share(p);
  371. +        }
  372.          p->first_time_slice = 0;
  373.  
  374.          if (!rq->expired_timestamp)
  375. @@ -3169,9 +3184,11 @@
  376.  static void __setscheduler(struct task_struct *p, int policy, int prio)
  377.  {
  378.      BUG_ON(p->array);
  379. +    if (p->policy == SCHED_FSSP && policy != SCHED_FSSP)
  380. +        fssp_quit(p, policy);
  381.      p->policy = policy;
  382.      p->rt_priority = prio;
  383. -    if (policy != SCHED_NORMAL)
  384. +    if (policy != SCHED_NORMAL && policy != SCHED_FSSP)
  385.          p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
  386.      else
  387.          p->prio = p->static_prio;
  388. @@ -3219,7 +3236,7 @@
  389.      else {
  390.          retval = -EINVAL;
  391.          if (policy != SCHED_FIFO && policy != SCHED_RR &&
  392. -                policy != SCHED_NORMAL)
  393. +                policy != SCHED_NORMAL && policy != SCHED_FSSP)
  394.              goto out_unlock;
  395.      }
  396.      profile_hit(SCHED_PROFILING, __builtin_return_address(0));
  397. @@ -3231,7 +3248,7 @@
  398.      retval = -EINVAL;
  399.      if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
  400.          goto out_unlock;
  401. -    if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
  402. +    if ((policy == SCHED_NORMAL || policy == SCHED_FSSP) != (lp.sched_priority == 0))
  403.          goto out_unlock;
  404.  
  405.      retval = -EPERM;
  406. diff -Naur linux-2.6.9/Makefile linux-2.6.9lab3/Makefile
  407. --- linux-2.6.9/Makefile    2004-10-18 17:54:38.000000000 -0400
  408. +++ linux-2.6.9lab3/Makefile    2011-12-28 21:34:11.000000000 -0500
  409. @@ -1,7 +1,7 @@
  410.  VERSION = 2
  411.  PATCHLEVEL = 6
  412.  SUBLEVEL = 9
  413. -EXTRAVERSION =
  414. +EXTRAVERSION = lab3
  415.  NAME=Zonked Quokka
  416.  
  417.  # *DOCUMENTATION*
And I have done some test using Top command in linux. Top command may not be the precise way to judge fssp's performance, but it is close enough.


Other things are explained here:

Comments about the screenshots:
the file's name explains the testing scenario, e.g. three.users.first-one-process.
second-one-processes.third-two-processes
means: there are currently three users, and the 1st user has one process, the 2nd user has one process, and the 3rd user has two processes.

I changed the processes' nice value on purpose, to show even under different static_priority, their time_slices are still proportionately to each other, according to fair-share scheduling policy.

Design:
only one sentence to describe the design: the process first contribute his time_slice, which is calculated by original kernel, to the fssp management component, and then get his time slice from fssp management component, of course, after his calculation and manipulation.

Benefits of this design:
we won't interfere other processes in the system;
compared to fixed amount time slice for each user, we won't starve one process if one user has too many processes.

there are two major points in designing, technically speaking:

1st, when a process is forked and his parent is under SCHED_FSSP, then he inherited it naturally and contributed his time slice, which is one half of his parent, to the fssp management component, and then got his time slice from fssp management component, too;
2nd, when a timer interrupt finally reached scheduler_tick(), and scheduler_tick() found out the current process had no time slice left, he would re-calculated his time_slice (based on avg_sleep_time, interactivity), and then he contributed his time slice to fssp management component, and got his time slice share after that.

How to apply my change to 2.6.9 kernel:
If diff file can simply be applied to kernel, like a patch, then this is the easiest way.
if not, simply copy exit.c fssp.c sched.c to kernel/, and copy sched.h, fssp.h to include/linux/

Other comments:
The source files are commented at my best effort, I think that they are easy to understand.
And all the potential improvements and potential bottleneck have been written in fssp.c

Please be warned, that you need a test program to test the whole thing, so if you need this test program, you can contact me. hdc1112@gmail.com
阅读(899) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~