Recently I have modified the linux 2.6.9 process scheduling a little bit, so every process, which is under Fair Share Scheduling Policy, will get a time slice equal to other users. That is to say, each user in the FSSP will get equal time slice, and each process in one user will get equal time slice, too.
Maybe the diff file is the most clear one to explain this.
- diff -Naur linux-2.6.9/include/linux/fssp.h linux-2.6.9lab3/include/linux/fssp.h
-
--- linux-2.6.9/include/linux/fssp.h 1969-12-31 19:00:00.000000000 -0500
-
+++ linux-2.6.9lab3/include/linux/fssp.h 2011-12-29 21:19:19.000000000 -0500
-
@@ -0,0 +1,22 @@
-
+/*
-
+ * this file is fssp's head file, see kernel/fssp.c
-
+ *
-
+ * author: dachuan
-
+ * hdc1112@gmail.com
-
+ * 12/29/2011
-
+ */
-
+
-
+#ifndef _LINUX_FSSP_H
-
+#define _LINUX_FSSP_H
-
+
-
+#ifdef __KERNEL__
-
+
-
+#define PREFIX "FSSP: "
-
+
-
+extern void fssp_contribute(task_t *p);
-
+extern unsigned int fssp_share(task_t *p);
-
+extern void fssp_quit(task_t *p, unsigned int new_policy);
-
+
-
+#endif
-
+
-
+#endif
-
diff -Naur linux-2.6.9/include/linux/sched.h linux-2.6.9lab3/include/linux/sched.h
-
--- linux-2.6.9/include/linux/sched.h 2004-10-18 17:53:13.000000000 -0400
-
+++ linux-2.6.9lab3/include/linux/sched.h 2011-12-28 22:53:31.000000000 -0500
-
@@ -126,6 +126,9 @@
-
#define SCHED_NORMAL 0
-
#define SCHED_FIFO 1
-
#define SCHED_RR 2
-
+// fair-share scheduling policy, all users share the same amount of time
-
+// and all the processes of one user share this time equally
-
+#define SCHED_FSSP 3
-
-
struct sched_param {
-
int sched_priority;
-
diff -Naur linux-2.6.9/kernel/exit.c linux-2.6.9lab3/kernel/exit.c
-
--- linux-2.6.9/kernel/exit.c 2004-10-18 17:55:06.000000000 -0400
-
+++ linux-2.6.9lab3/kernel/exit.c 2011-12-29 21:45:02.937556456 -0500
-
@@ -30,6 +30,10 @@
-
#include <asm/pgtable.h>
-
#include <asm/mmu_context.h>
-
-
+// fssp
-
+#include <linux/fssp.h>
-
+#include <linux/sched.h>
-
+
-
extern void sem_exit (void);
-
extern struct task_struct *child_reaper;
-
-
@@ -829,6 +833,12 @@
-
mpol_free(tsk->mempolicy);
-
tsk->mempolicy = NULL;
-
#endif
-
+ // a process is dying, call fssp_quit() to get his body out of fssp
-
+ if (current->policy == SCHED_FSSP)
-
+ {
-
+ fssp_quit(current, SCHED_NORMAL);
-
+ printk(KERN_WARNING "fssp: " "process %d quit from fssp\n", current->pid);
-
+ }
-
schedule();
-
BUG();
-
/* Avoid "noreturn function does return". */
-
diff -Naur linux-2.6.9/kernel/fssp.c linux-2.6.9lab3/kernel/fssp.c
-
--- linux-2.6.9/kernel/fssp.c 1969-12-31 19:00:00.000000000 -0500
-
+++ linux-2.6.9lab3/kernel/fssp.c 2011-12-29 21:59:36.723720768 -0500
-
@@ -0,0 +1,253 @@
-
+/*
-
+ * this file is about fair-share scheduling policy,
-
+ * and fssp is short for this appalling name.
-
+ *
-
+ * author: dachuan
-
+ * hdc1112@gmail.com
-
+ * 12/28/2011
-
+ */
-
+
-
+// some thinking:
-
+// 1nd, it seems that this code needs locking, ignore temporarily;
-
+// 2rd, it seems we need CONFIG to turn on/off FSSP, ignore temporarily;
-
+// 3th, it seems we need EINVAD or related things, ignore temporariy;
-
+// 4th, it seems there is a potential performance bottleneck, when
-
+// the managed processes' number becomes larger, we need to traverse
-
+// the whole linked-list to find the process. this may be solved
-
+// by hash_table, but native linux doens't provide hash_table, so
-
+// ignore temporarily.
-
+
-
+#include <linux/list.h>
-
+#include <linux/types.h>
-
+#include <linux/sched.h>
-
+#include <linux/slab.h>
-
+#include <linux/kernel.h>
-
+#include <linux/fssp.h>
-
+#include <asm-generic/bug.h>
-
+
-
+// the sum of all managed processes' time slices.
-
+static unsigned int total;
-
+
-
+// main struct for fair-share scheduling policy.
-
+struct fssp
-
+{
-
+ int users; // the number of users under management
-
+ struct list_head user_list; // list head of the users, see struct fssp_user
-
+};
-
+
-
+// struct for a user, it is linked to struct fssp.users.
-
+struct fssp_user
-
+{
-
+ uid_t uid; // uid of the user
-
+ int processes; // the number of processes under management of this user
-
+ struct list_head user_list; // linked to struct fssp.user_list, see struct fssp
-
+ struct list_head proc_list; // list head of the processes, see struct fssp_process
-
+};
-
+
-
+// struct for a process, it is linked to struct fssp_user.processes.
-
+struct fssp_process
-
+{
-
+ unsigned int latest_contri; // latest time slice this process contributed, this is only used in fssp_quit()
-
+ task_t *p; // task_struct pointer to the process
-
+ struct list_head proc_list; // just linked to struct fssp_process, or struct fssp_user.processes_list
-
+};
-
+
-
+// init the fssp struct.
-
+static struct fssp _fssp =
-
+{
-
+ 0, LIST_HEAD_INIT(_fssp.user_list)
-
+};
-
+
-
+#define FSSP_USER_EXIST 0
-
+#define FSSP_USER_CREAT 1
-
+#define FSSP_PROCESS_EXIST 0
-
+#define FSSP_PROCESS_CREAT 1
-
+
-
+// find the fssp_user struct, return NULL if there isn't.
-
+static struct fssp_user *_fssp_find_user(uid_t uid)
-
+{
-
+ struct fssp_user *pos;
-
+
-
+ // walk the whole list to find this process's user struct, see struct fssp_user.
-
+ list_for_each_entry(pos, &_fssp.user_list, user_list)
-
+ {
-
+ if (pos->uid == uid)
-
+ return pos;
-
+ }
-
+ return NULL;
-
+}
-
+
-
+// find the fssp_user struct, create a new one if there isn't.
-
+static struct fssp_user *fssp_find_user(uid_t uid, int *exist)
-
+{
-
+ struct fssp_user *pos = _fssp_find_user(uid);
-
+
-
+ // if we didn't find it, create a new entry for it.
-
+ if (unlikely(!pos))
-
+ {
-
+ if (!(pos = kmalloc(sizeof(*pos), GFP_ATOMIC)))
-
+ {
-
+ printk(KERN_ERR PREFIX "kmalloc struct fssp_user failed\n");
-
+ return NULL;
-
+ }
-
+ //*pos = { uid, 0, LIST_HEAD_INIT(pos->proc_list) };
-
+ pos->uid = uid;
-
+ pos->processes = 0;
-
+ INIT_LIST_HEAD(&pos->proc_list);
-
+ list_add(&pos->user_list, &_fssp.user_list);
-
+
-
+ _fssp.users++;
-
+
-
+ if (exist)
-
+ *exist = FSSP_USER_CREAT;
-
+ }
-
+ else
-
+ {
-
+ if (exist)
-
+ *exist = FSSP_USER_EXIST;
-
+ }
-
+
-
+ return pos;
-
+}
-
+
-
+// find the fssp_process struct, return NULL if there isn't.
-
+static struct fssp_process *_fssp_find_process(struct fssp_user *user, task_t *p)
-
+{
-
+ struct fssp_process *pos;
-
+
-
+ // walk the whole list to find this process's fssp_process, see struct fssp_process.
-
+ list_for_each_entry(pos, &user->proc_list, proc_list)
-
+ {
-
+ if (pos->p == p)
-
+ return pos;
-
+ }
-
+
-
+ return NULL;
-
+}
-
+
-
+// find the fssp_process struct, create a new one if there isn't.
-
+// notice, we didn't put p->time_slice into pos->latest_time_slice here,
-
+// it is supposed to be fssp_contribute()'s job
-
+static struct fssp_process *fssp_find_process(struct fssp_user *user, task_t *p, int *exist)
-
+{
-
+ struct fssp_process *pos = _fssp_find_process(user, p);
-
+
-
+ if (unlikely(!pos))
-
+ {
-
+ if (!(pos = kmalloc(sizeof(*pos), GFP_ATOMIC)))
-
+ {
-
+ printk(KERN_ERR PREFIX "kmalloc struct fssp_process failed\n");
-
+ return NULL;
-
+ }
-
+ pos->latest_contri = 0;
-
+ pos->p = p;
-
+ list_add(&pos->proc_list, &user->proc_list);
-
+
-
+ user->processes++;
-
+
-
+ if (exist)
-
+ *exist = FSSP_PROCESS_CREAT;
-
+ }
-
+ else
-
+ {
-
+ if (exist)
-
+ *exist = FSSP_PROCESS_EXIST;
-
+ }
-
+
-
+ return pos;
-
+}
-
+
-
+static void _fssp_contribute(task_t *p, unsigned int time_contri)
-
+{
-
+ struct fssp_user *pos;
-
+ struct fssp_process *pos2;
-
+
-
+ // just return if this process is not under fssp's management.
-
+ if (unlikely(p->policy != SCHED_FSSP))
-
+ return;
-
+
-
+ if (!(pos = fssp_find_user(p->uid, NULL)))
-
+ return;
-
+ // now, pos points to the right fssp_user.
-
+
-
+ if (!(pos2 = fssp_find_process(pos, p, NULL)))
-
+ {
-
+ if (pos->processes == 0)
-
+ {
-
+ list_del(&pos->user_list);
-
+ kfree(pos);
-
+ _fssp.users--;
-
+ }
-
+ return;
-
+ }
-
+ // now, pos2 points to the right fssp_process.
-
+
-
+ total = total - pos2->latest_contri + time_contri;
-
+ pos2->latest_contri = time_contri;
-
+}
-
+
-
+// a process's time slice should be calcuated by native kernel before calling this function.
-
+// notice, if a process's policy has been changed to SCHED_FSSP, he doesn't necessarily show
-
+// up in the struct fssp, because if he doesn't share or contribute time slice, he is not
-
+// entitled to get a place here.
-
+void fssp_contribute(task_t *p)
-
+{
-
+ _fssp_contribute(p, p->time_slice);
-
+}
-
+
-
+static unsigned long t = 0;
-
+// usually called right after fssp_contribute(), get his share.
-
+// return: time_slice he ought to get.
-
+unsigned int fssp_share(task_t *p)
-
+{
-
+ struct fssp_user *pos;
-
+ // int user_exist;
-
+ struct fssp_process *pos2;
-
+ int process_exist;
-
+
-
+ if (unlikely(p->policy != SCHED_FSSP))
-
+ return 0; // so a wrong request get 0 timeslice
-
+
-
+
-
+ pos = fssp_find_user(p->uid, NULL);
-
+ if (!pos)
-
+ return 0; // so if kmalloc error, he gets a 0 timeslice
-
+
-
+ pos2 = fssp_find_process(pos, p, &process_exist);
-
+ if (!pos2)
-
+ return 0; // so if kmalloc error, he gets a 0 timeslice
-
+ if (process_exist == FSSP_PROCESS_CREAT)
-
+ printk(KERN_WARNING PREFIX "a process, who never contributed before" \
-
+ " wants to share timeslice\n");
-
+
-
+ return total / _fssp.users / pos->processes;
-
+}
-
+
-
+void fssp_quit(task_t *p, unsigned int new_policy)
-
+{
-
+ struct fssp_user *pos;
-
+ struct fssp_process *pos2;
-
+
-
+ if (unlikely(p->policy != SCHED_FSSP))
-
+ return;
-
+
-
+ BUG_ON(!(new_policy == SCHED_NORMAL || \
-
+ new_policy == SCHED_FIFO || \
-
+ new_policy == SCHED_RR));
-
+ p->policy = new_policy;
-
+
-
+ if (!(pos = _fssp_find_user(p->uid)))
-
+ return;
-
+ if (!(pos2 = _fssp_find_process(pos, p)))
-
+ return;
-
+
-
+ total -= pos2->latest_contri;
-
+ list_del(&pos2->proc_list);
-
+ kfree(pos2);
-
+ if (!--pos->processes)
-
+ {
-
+ list_del(&pos->user_list);
-
+ kfree(pos);
-
+ _fssp.users--;
-
+ }
-
+}
-
diff -Naur linux-2.6.9/kernel/Makefile linux-2.6.9lab3/kernel/Makefile
-
--- linux-2.6.9/kernel/Makefile 2004-10-18 17:53:43.000000000 -0400
-
+++ linux-2.6.9lab3/kernel/Makefile 2011-12-29 13:45:27.000000000 -0500
-
@@ -7,7 +7,7 @@
-
sysctl.o capability.o ptrace.o timer.o user.o \
-
signal.o sys.o kmod.o workqueue.o pid.o \
-
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-
- kthread.o
-
+ kthread.o fssp.o
-
-
obj-$(CONFIG_FUTEX) += futex.o
-
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
-
diff -Naur linux-2.6.9/kernel/sched.c linux-2.6.9lab3/kernel/sched.c
-
--- linux-2.6.9/kernel/sched.c 2004-10-18 17:54:55.000000000 -0400
-
+++ linux-2.6.9lab3/kernel/sched.c 2011-12-29 21:20:10.000000000 -0500
-
@@ -47,6 +47,9 @@
-
-
#include <asm/unistd.h>
-
-
+// fssp.h
-
+#include <linux/fssp.h>
-
+
-
#ifdef CONFIG_NUMA
-
#define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
-
#else
-
@@ -1310,6 +1313,12 @@
-
*/
-
local_irq_disable();
-
p->time_slice = (current->time_slice + 1) >> 1;
-
+ // fssp
-
+ if (p->policy == SCHED_FSSP)
-
+ {
-
+ fssp_contribute(p);
-
+ p->time_slice = fssp_share(p);
-
+ }
-
/*
-
* The remainder of the first timeslice might be recovered by
-
* the parent if the child exits early enough.
-
@@ -2452,6 +2461,12 @@
-
set_tsk_need_resched(p);
-
p->prio = effective_prio(p);
-
p->time_slice = task_timeslice(p);
-
+ // fssp
-
+ if (p->policy == SCHED_FSSP)
-
+ {
-
+ fssp_contribute(p);
-
+ p->time_slice = fssp_share(p);
-
+ }
-
p->first_time_slice = 0;
-
-
if (!rq->expired_timestamp)
-
@@ -3169,9 +3184,11 @@
-
static void __setscheduler(struct task_struct *p, int policy, int prio)
-
{
-
BUG_ON(p->array);
-
+ if (p->policy == SCHED_FSSP && policy != SCHED_FSSP)
-
+ fssp_quit(p, policy);
-
p->policy = policy;
-
p->rt_priority = prio;
-
- if (policy != SCHED_NORMAL)
-
+ if (policy != SCHED_NORMAL && policy != SCHED_FSSP)
-
p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
-
else
-
p->prio = p->static_prio;
-
@@ -3219,7 +3236,7 @@
-
else {
-
retval = -EINVAL;
-
if (policy != SCHED_FIFO && policy != SCHED_RR &&
-
- policy != SCHED_NORMAL)
-
+ policy != SCHED_NORMAL && policy != SCHED_FSSP)
-
goto out_unlock;
-
}
-
profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-
@@ -3231,7 +3248,7 @@
-
retval = -EINVAL;
-
if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
-
goto out_unlock;
-
- if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
-
+ if ((policy == SCHED_NORMAL || policy == SCHED_FSSP) != (lp.sched_priority == 0))
-
goto out_unlock;
-
-
retval = -EPERM;
-
diff -Naur linux-2.6.9/Makefile linux-2.6.9lab3/Makefile
-
--- linux-2.6.9/Makefile 2004-10-18 17:54:38.000000000 -0400
-
+++ linux-2.6.9lab3/Makefile 2011-12-28 21:34:11.000000000 -0500
-
@@ -1,7 +1,7 @@
-
VERSION = 2
-
PATCHLEVEL = 6
-
SUBLEVEL = 9
-
-EXTRAVERSION =
-
+EXTRAVERSION = lab3
-
NAME=Zonked Quokka
-
-
# *DOCUMENTATION*
And I have done some test using Top command in linux. Top command may not be the precise way to judge fssp's performance, but it is close enough.
Other things are explained here:
Comments about the screenshots:
the file's name explains the testing scenario, e.g. three.users.first-one-process.
second-one-processes.third-two-processes
means:
there are currently three users, and the 1st user has one process, the
2nd user has one process, and the 3rd user has two processes.
I changed the processes' nice value on purpose, to show even under
different static_priority, their time_slices are still proportionately
to each other, according to fair-share scheduling policy.
Design:
only one sentence to describe the design: the process first contribute
his time_slice, which is calculated by original kernel, to the fssp
management component, and then get his time slice from fssp management
component, of course, after his calculation and manipulation.
Benefits of this design:
we won't interfere other processes in the system;
compared to fixed amount time slice for each user, we won't starve one process if one user has too many processes.
there are two major points in designing, technically speaking:
1st,
when a process is forked and his parent is under SCHED_FSSP, then he
inherited it naturally and contributed his time slice, which is one half
of his parent, to the fssp management component, and then got his time
slice from fssp management component, too;
2nd, when a timer interrupt finally reached scheduler_tick(), and
scheduler_tick() found out the current process had no time slice left,
he would re-calculated his time_slice (based on avg_sleep_time,
interactivity), and then he contributed his time slice to fssp
management component, and got his time slice share after that.
How to apply my change to 2.6.9 kernel:
If diff file can simply be applied to kernel, like a patch, then this is the easiest way.
if not, simply copy exit.c fssp.c sched.c to kernel/, and copy sched.h, fssp.h to include/linux/
Other comments:
The source files are commented at my best effort, I think that they are easy to understand.
And all the potential improvements and potential bottleneck have been written in fssp.c
Please be warned, that you need a test program to test the whole thing, so if you need this test program, you can contact me. hdc1112@gmail.com
阅读(899) | 评论(0) | 转发(0) |