一,并发控制,concurrency
同时操作相同的资源(hardware,memory,data)会引起并发.SMP,process preempt,interrupt,Tasklet,bottom half都会引起concurrnecy.
在临界区(critical section)下解决竞争条件(race condition)的用互斥方法,包括:
forbid interrupt,amotic operation,spin lock,semaphore
中断屏蔽:
可以避免中断和内核抢占进程资源(linux task schdule depend on interrupt),但不能SMP的并发,也不能常时间forbid(影响异步IO和进程调度).
local_irq_disable()
...
critical section
...
local_irq_enable()
local_irq_save() //关中断并保存屏蔽字
lcoal_irq_restore()
local_bh_disable()
local_bh_enable()
原子操作:
void atomic_set( atomic_t *v, int i )
atomic_t v = ATOMIC_INIT( 0 )
atomic_read( atomic_t *v )
void atomic_add( int i, atomic_t *v )
void atomic_sub( int i, atomic *v )
void atomic_inc( atomic_t *v )
void atomic_dec( atomic_t *v )
int atomic_inc_and_test( atomic_t *v )
int atomic_dec_and_test( atomic_t *v )
int atomic_sub_and_test( int i, atomic_t *v )
int atomic_add_return( int i, atomic_t *v )
int atomic_sub_return( int i, atomic_t *v )
int atomic_inc_return( atomic_t *v )
int atomic_dec_return( atomic_t *v )
void set_bit( nr, void *addr ) //nr,addr的第nr位
void clear_bit( nr, void *addr )
void change_bit( nr, void *addr )
int test_bit( nr, void *addr )
int test_and_set_bit( nr, void *addr )
int test_and_clear_bit( nr, void *addr )
int test_and_change_bit( nr, void *addr )
自旋锁:spin lock
原子操作test-and-set某个内存变量,并忙等待(就是原地打转),所以不能等待大的critical section,
另外要防止递归,copy_from_user,kmalloc等block引起的死锁.
spinlock_t lock;
spin_lock_init( &lock );
spin_lock( &lock );
spin_trylock( &lock );
尽管自旋锁不受SMP和进程调度影响,但会受irq和bh影响,联合使用.
spin_lock_irq(); spin_unlock_irq(); spin_lock_irqsave(); spin_unlock_irqrestore();
spin_lock_bh(); spin_unlock_bh();
读写自旋锁:rwlock
读锁和写锁分开.
rwlock_t my_rwlock = RW_LOCK_UNLOCKED;
rwlock_init( &my_rwlock );
read_lock( &lock );
...
read_unlock( &lock );
write_lock_irqsave( &lock, flags );
...
write_unlock_irqrestore( &lock, flags );
顺序锁:seqlock
写锁互斥,读锁可重复,当读时执行了写操作,重新读取.
void write_seqlock( seqlock_t *sl );
int write_tryseqlock( seqlock_t *sl );
void write_sequnlock( seqlock_t *sl );
unsigned read_seqbegin( const seqlock_t *sl );
int read_seqretry( const seqlock_t *sl, unsigned iv );
do{
seqnum = read_seqbegin_irqsave( &seqlock_a, flags );
...
}while( read_seqretry_irqrestore( &seqlock_a, seqnum, flags) );
RCU:read-copy update机制
在2.5.43引入,读不再受限制,写时先备份副本,修改副本,在所有对共享数据的操作完成时,用回调机制改变指针指到副本.
rcu_read_lock() 相当于preempt_disable()
rcu_read_unlock() 相当于preempt_enable()
rcu_read_lock_bh() 相当于local_bh_disable()
rcu_read_unlock_bh() 相当于local_bh_enable()
synchronize_rcu(); rcu写单元调用,Block直到读完成.
synchronzie_kernel(); 用来等待所有cpu处于可抢占状态.synchronize_sched()
void fastcall call_rcu( struct rcu_head *head, void (*func)( struct rcu_head *rcu ) ); 挂接回调函数,注册到rcu_data
void fastcall call_rcu_bh( struct rcu_head *head, void (*func)( struct rcu_head *rcu ) ); 挂接回调函数,注册到rcu_bh_data
static inline void list_add_rcu( struct list_head *new, struct list_head *head ); rcu保护的链表操作
static inline void list_add_tail_rcu( struct list_head *new, struct list_head *head );
static inline void list_del_rcu( struct list_head *entry );
static inline void list_replace_rcu( struct list_head *old, struct list_head *new );
list_for_each_rcu( pos, head ); 宏,链表操作
list_for_each_safe_rcu( pos, n, head );
list_for_each_entry_rcu( pos, head, member );
static inline void list_del_rcu( struct hlist_node *n );
static inline void hlist_add_head_rcu( struct hlist_node *n, struct hlist_head *h ); 哈希链表
hlist_for_each_rcu( pos, head );
hlist_for_each_entry_rcu( tpos, pos, head, member );
信号量:semaphore用于同步
只有获的信号量的进程才能执行临界区代码,当获取不到信号量时,进程进入睡眠.
struct semaphore sem;
void sema_init( struct semaphore *sem, int val );
void init_MUTEX( struct seamphore *sem ); DECLARE_MUTEX( name ); //宏
void init_MUTEX_LOCKED( struct semaphore *sem ); DECLARE_MUTEX_LOCKED( name );
void down( struct semaphore *sem ); 会睡眠,不能在中断上下文使用.
int down_interruptible( struct semaphore *sem ); 睡眠可被signal打断.
int down_trylock( struct semaphore *sem ); 试锁定.
void up( struct semaphore *sem );
//
DECLARE_MUTEX( mutex_sem );
down( &mount_sem );
...
critical section
...
up( &mount_sem );
读写信号量:
rw_semaphore rw_sem;
init_rwsem( &rw_sem );
down_read( &rw_sem );
...
up_read( &rw_sem );
down_write( &rw_sem );
...
up_write( &rw_sem );
完成量:linux还提供一种比信号量更好的机制completion,用于同步
struct completion my_completion;
init_completion( &my_completion ); DECLARE_COMPLETION( my_completion );
void wait_for_completion( struct completion *c );
void complete( struct competion *c ); 唤醒wait_for_completion等待
void complete_all( struct completion *c );
互斥体:linux内核中还存在mutex
struct mutex my_mutex;
mutex_init( &my_mutex );
mutex_lock( &my_mutex );
...
mutex_unlock( &my_mutex );
自旋锁和信号量的使用:
信号量和互斥体属于不同层次的互斥,前者的实现依赖于后者,为保证信号量操作的原子性,SMP中要用自旋锁互斥.
1,临界区小时用自旋锁.2,信号量可以阻塞.3,中断中只能用自旋锁.
二,阻塞和轮询
等待队列
当不能获得资源时,进程可以选择阻塞或非阻塞方式,linux驱动程序中,应用等待队列(wait queue)来唤醒阻塞的进程.
wait_queue_head_t my_queue; 定义等待队列列头
init_waitqueue_head( &my_queue ) DECLARE_WAIT_QUEUE_HEAD( name )初始化等待队列列头
DECLARE_WAITQUEUE( name, tsk ) 定义等待队列
void fastcall add_wait_queue( wait_queue_head_t *q, wait_queue_t *wait ) 添加等待队列
void fastcall remove_wait_queue( wait_queue_head_t *q, wait_queue_t *wait )删除等待队列
wait_event( queue, condition ) 等待事件
wait_event_interruptible( queue, condition )
wait_event_timeout( queue, condition, timeout )
wait_event_interruptible_timeout( queue, condition, timeout )
void wake_up( wait_queue_head_t, *queue ) 唤醒队列
void wake_up_interruptible( wait_queue_head_t *queue )
sleep_on( wait_queue_head_t *q ) 在等待队列上睡眠
interruptible_sleep_on( wait_queue_head_t *q )
等待队列实例:
struct globalfifo_dev
{
...
wait_queue_head_t r_wait;
wait_queue_head_t w_wait;
}
int globalfifo_init( void )
{
...
init_waitqueue_head( &globalfifo_devp->r_wait );
init_waitqueue_head( &globalfifo_devp->w_wait );
}
static ssize_t globalfifo_read( struct file *filp, char __user *buf, size_t count, ioff_t *ppos )
{
DECLARE_WAITQUEU( wait, current );
add_wait_queue( &dev->r_wait, &wait );
在成功读出后唤醒写队列.
wake_up_interruptible( &dev->w_wait );
out2:remove_wait_queue( &dev->wait, &wait );
}
static ssize_t globalfifo_write( struct file *filp, const char __user *buf, size_t count, loff_t *ppos )
{
DECLARE_WAITQUEUE( wait, currnt );
add_wait_queue( &dev->w_wait, &wait );
如果写满dev->current_len==GLOBALFIFO_SIZE
__set_current_state( TASK_INTERRUPTIBLE );
schedule();
if ( signal_pending( current ) )
{
ret = -ERESTARTSYS;
goto out2;
}
不满则写
wake_up_interruptible( &dev->r_wait );
}
轮询操作:非阻塞方式I/O,poll操作本身不被阻塞,但会引起文件描述集中的至少一个文件的可访问和超时.
在用户态,使用BSD UNIT select.
int select( int numfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout );
在内核,使用System V poll.
unsigned int (*poll)( struct file *filp, struct poll_table *wait );
void poll_wait( struct file *filp, wait_queue_heat_t *queue, poll_table *wait ); 注册poll_table到等待队列
模板:
static unsigned int xxx_poll( struct file *filp, poll_table *wait )
{
unsigned int mask = 0;
struct xxx_dev *dev = filp -> private_data; 获得设备结构体指针
...
poll_wait( filp, &dev->r_wait, wait ); 加读等待队列头
poll_wait( filp, &dev->w_wait, wait ); 加写等待队列头
if( ... )
{
mask |= POLLOUT | POLLWRNORM; 标示数据可获得
}
...
return mask;
}
三,异步通知和异步I/O
异步通知是设备驱动程序模拟一个中断通知应用程序,进行读写.属于异步通知的概念.
阻塞IO,非阻塞IO和异步通知本身不存在优劣,应该根据不同的场景来选择.
SIGNAL:
64种,32个已定义,32个自定义.
除了SIGSTOP和SIGKILL外,进程可以忽略或捕获其它的全部信号.捕获的意思是当信号到达进程时有相应的代码处理它.如果进程忽略这个信号,
内核将采用默认行为处理.
用户程序处理SIGNAL:
void ( *signal( int signum, void (*handle)(int)) )( int );
int sigaction( int signum, const struct sigaction *act, struct sigaction *oldact );
实例:
main()
{
int oflags;
signal( SIGIO, input_handler ); 连接本进程的SIGIO到input_handler.
fcntl( STDIN_FILENO, F_SETOWN, getpid() ); 通过F_SETOWN命令连接STDIN_FILENO设备文件到本进程.
oflags = fcntl( STDIN_FILENO, F_GETFL );
fcntl( STDIN_FILENO, F_SETFL, oflags|FASYNC); 设置设备文件支持FASYNC
while(1);
}
内核和驱动发送SIGNAL:
要使驱动程序支持异步通知机制
1,支持F_SETOWN命令,内核完成filp->f_owner.对应与用户fcntl( fd, F_SETOWN, getpid() ).
2,支持F_SETFL命令,每当FASYNC标志改变时,驱动中的fasync()将执行.因次驱动要有fasync()函数.对应于fcntl(fd,F_GETFL).
3,当资源可用时,要有kill_fasync()函数发出signal.对应于signal().
设备驱动中异步通知编程,主要用到一个数据结构和两个函数.
struct fasync_struct
int fasync_helper( int fd, struct file *filp, int mode, struct fasync_struct **fa );
viod kill_fasync( struct fasync_struct **fa, int sig, int band );
模板:
1,将fasync_struct结构体指针放入设备结构体中
struct xxx_dev
{
struct cdev cdev;
...
struct fasync_struct *async_queue; 异步结构体
}
2,支持fasync操作
static int xxx_fasync( int fd, struct file *filp, int mode )
{
struct xxx_dev *dev = filp->private_data;
return fasync_helper( fd, filp, mode, &dev->async_queue );
}
3,写时发送SIGNAL
static ssize_t xxx_write( struct file *filp, const char __user *buf, size_t count, loff_t *f_pos )
{
struct xxx_dev *dev = filp -> private_data;
...
if ( dev -> async_queue )
kill_fasync( &dev->async_queue, SIGIO, POLL_IN );
...
}
4,在文件关闭时,将文件从异步通知列表删除
static int xxx_release( struct inode *inode, struct file *filp )
{
struct xxx_dev *dev = filp->private_data;
xxx_fasync( -1, filp, 0); 将文件从异步通知列表中删除.
...
return 0;
}
POSIX异步IO,AIO
在2.6中引进内核,2.4中也有补丁.它不同于同步IO的阻塞等待,而是同时发起多个IO,每个IO有唯一的上下文.aiocb(AIO Control Block),
它包括buffer,当IO完成时,它用来标识完成的IO操作.
AIO和SELECT相似,Select对通知事件阻塞,AIO对IO调用阻塞.
AIO的API:被GNU C lib包含,符合POSIX.1b的要求.
int aio_read( struct aiocb *aiocbp ) 对一个文件描述符(文件,socket,pipe..)进行异步读操作
int aio_write( struct aiocb *aiocbp )
int aio_error( struct aiocb *aiocbp ) EINPROGRESS,ECANCELLED,-1
ssize_t aio_return( struct aiocb *aiocbp ) 异步操作不能直接得到返回值,要等到aio_error!=EINPROGRESS.再用aio_return返回值.
int aio_suspend( const struct aiocb *const cblist[], int n, const struct timespec *timeout );
阻塞进程,直到异步IO完成返回结果,此时会产生一个信号,或发生其他超时操作.
int aio_cancel( int fd, struct aiocb *aiocbp );ret:AIO_CANCELLED,AIO_NOTCANCELED,AIO_ALLDONE,然后可以用aio_error来验证.
struct aiocb *cblist[MAX_LIST];
bzero( (char*)cblist, sizeof(cblist) );
cblist[0] = &my_aiocb;
ret = aio_read( &my_aiocb );
ret = aio_suspend( cblist, MAX_LIST, NULL );
ret = aio_error( &my_aiocb );
int lio_listio( int mode, struct aiocb *list[], int nent, struct sigevent *sig ); 在一个上下文中启动大量异步IO操作.
struct aiocb aiocb1, aiocb2;
struct aiocb *list[MAX_LIST];
...初始化第一个aiocb
aiocb1.aio_fildes = fd;
aiocb1.aio_buf = malloc( BUFSIZE+1 );
aiocb1.aio_nbytes = BUFSIZE;
aiocb1.aio_offset = next_offset;
aiocb1.aio_lio_opcode = LIO_READ;
...初试化多个aiocb
bzero( (char*)list, sizeof(list) );
list[0] = &aiocb1;
list[1] = &aiocb2;
...
ret = lio_listio( LIO_WAIT, list, MAX_LIST, NULL );
使用signal作为通知来返回AIO结果:
void setup_io(...) 设置异步IO请求
{
int fd;
struct sigaction sig_act;
struct aiocb my_aiocb;
...
设置信号处理函数
sigemptyset( &sig_act.sa_mask );
sig_act.sa_flags = SA_SIGINFO;
sig_act.sa_sigaction = aio_completion_handle;
设置AIO请求
bzero( (char*)&my_aiocb, sizeof( struct aiocb ) );
my_aiocb.aio_fildes = fd;
my_aiocb.aio_buf = malloc( BUF_SIZE + 1 );
my_aiocb.nbytes = BUF_SIZE;
my_aiocb.aio_offset = next_offset;
连接AIO请求和信号处理函数
my_aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
my_aiocb.aio_sigevent.sigev_signo = SIGIO;
my_aiocb.aio_sigevent.sigev_value.sival_ptr = &my_aiocb;
将信号与信号处理函数绑定
ret = sigaction( SIGIO, &sig_act, NULL);
...
ret = aio_read( &my_aiocb); 开始异步读
}
信号处理函数
void aio_completion_handler( int signo, siginfo_t *info, void *context )
{
struct aiocb *req;
if ( info->si_signo == SIGIO )
{
req = (struct aiocb*)info->si_value.sival_ptr; 获得aiocb
if ( aio_error(req) == 0 )
{
ret = aio_return( req );
}
}
return;
}
使用回调函数(Callback)作为通知来返回AIO结果:
/proc/sys/fs/aio-nr 文件,系统范围异步IO请求的数目
/proc/sys/fs/aio-max-nr 文件,允许并发请求的最大数目
void setup_io(...) 设置异步IO请求
{
int fd;
struct aiocb my_aiocb;
...
设置AIO请求
连接AIO请求和线程回调函数
my_aiocb.aio_sigevent.sigev_notify = SIGEV_THREAD;
my_aiocb.aio_sigevent.notify_function = aio_completion_handler;
my_aiocb.aio_sigevent.notify_attributes = NULL;
my_aiocb.aio_sigevent.sigev_value.sival_ptr = &my_aiocb;
...
ret = aio_read( &my_aiocb); 开始异步读
}
AIO与设备驱动:
aiocb结构体中,ki_filp是file的指针.is_sync_kiocb()可以判断是否同步IO请求.
块设备和网络设备驱动本身就是异步的,AIO用在字符型设备驱动中,用于改善异步IO的性能.例如磁带机.
在file_operations中,用到3个AIO函数.
ssize_t (*aio_read)(struct kiocb *iocb, char *buffer, szie_t count, loff_t offset);
ssize_t (*aio_write)(struct kiocb *iocb, const char *buffer, size_t count, loff_t offset);
int (*aio_fsync)(struct kiocb *iocb, int datasync);
异步读
static ssize_t xxx_aio_read( struct kiocb *iocb, char *buf, size_t count, loff_t pos )
{
return xxx_defer_op( 0, iocb, buf, count, pos );
}
异步写
static ssize_t xxx_aio_write( struct kiocb *iocb, const char *buf, size_t count, loff_t pos )
{
return xxx_defer_op( 1, iocb, (char*)buf, count, pos );
}
初始化异步IO
static int xxx_defer_op( int write, struct kiocb *iocb, char *buf, size_t count, loff_t pos )
{
struct async_work *async_wk;
int_result;
if ( write )
result = xxx_write( iocb-> ki_filp, buf, count, &pos );
else
result = xxx_read( iocb->ki_filp, buf, count, &pos );
if ( is_sync_kiocb(iocb) )
return result;
否则是异步IO:
async_wk = kmalloc( sizeof(*async_wk), GFP_KERNEL ); async_work(异步工作)结构体将操作延后执行
if (async_wk == NULL)
return result;
调度延迟的工作
async_wk->iocb = iocb;
async_wk->result = result;
INIT_WORK( &async_wk->work, xxx_do_deferred_op, async_wk );
schedule_delayed_work( &async_wk->work, HZ/100 ); 调度async_work执行
return - EIOCBQUEUED;
}
static void xxx_do_deferred_op( void *p )
{
struct async_work *async_wk = (struct async_work*)p;
aio_complete( async_wk->iocb, async_wk->result, 0 ); 通知内核驱动程序完成了操作
kfree( async_wk );
}
struct async_work
{
struct kiocb *iocb;
int result; 执行结果
struct work_struct work; 工作结构体
}
四,中断和时钟
中断编程:
内部中断,外部终端;可屏蔽中断,不屏蔽中断;向量中断,非向量中断(软件根据寄存器分地址).linux将中断分成顶半部,底半部.
顶半部主要读取寄存器中的中断状态并清除中断标志.再登记中断(将底半部处理程序挂到该设备的底半部执行队列.)
Vxworks中提供中断上下文和进程上下文相结合的机制.通过netJobAdd()将网络包的接收和上传交给tnetask任务去执行.
/proc/interrupts 文件中有中断状态的描述.
int request_irq( unsigned int irq, 硬件中断号
void (*handler)(int irq, void *dev_id, struct pt_reqs *regs), 中断处理函数
unsigned long irqflags, 中断处理的属性,SA_INTERRUPT,SA_SHIRQ
const char *devname,
void *dev_id );
void free_irq( unsigned int irq, void *dev_id );
void enable_irq( int irq );
void disable_irq_nosync( int irq ); 立即返回
void disable_irq( int irq ); 等当前中断处理完后返回
void local_irq_save( unsigned long flags ); 屏蔽本CPU的中断并保存进flags
void local_irq_restore( unsigned long flags );
void local_irq_disable( void );
void local_irq_enable( void );
底半部机制:
1,tasklet模板:
DECLARE_TASKLET( xxx_tasklet, xxx_do_tasklet, 0 );
xxx_do_tasklet( unsigned long )
{...}
irqreturn_t xxx_interrupt( int irq, void *dev_id, struct pt_reqs *reqs )
{
...
tasklet_schedule( &xxx_tasklet );
...
}
int __init xxx_init( void )
{
...
result = request_irq( xxx_irq, xxx_interrupt, SA_INTERRUPT, "xxx", NULL );
...
}
void __exit xxx_exit( void )
{
...
free_irq( xxx_irq, xxx_interrupt );
...
}
2,工作队列模板:
struct work_struct xxx_wq;
void xxx_do_work( insigned long )
{...}
irqreturn_t xxx_interrupt( int irq, void *dev_id, struct pt_regs *regs )
{
...
schedule_work( &xxx_wq );
...
}
int xxx_int( void )
{
...
result = request_irq( xxx_irq, xxx_interrupt, SA_INTERRUPT, "xxx", NULL );
...
INIT_WORK( &xxx_wq, (void)(*)(void *)xxx_do_work, NULL );
...
}
void xxx_exit( void )
{
...
free( xxx_irq, xxx_interrupt );
...
}
3,软中断底半部
硬中断是外部设备对CPU的中断;
软中断是硬中断处理程序对内核的中断;
信号是内核对某个进程的中断;
tasklet基于软中断实现;
softirq结构体包括软中断处理函数及其参数.
open_sofirq注册软中断对应的处理函数,raise_softirq触发一个软中断.
共享中断
linux2.6支持共享中断,单跟硬件中断线上支持多个设备.
irqreturn_t xxx_interrupt( int irq, void *dev_id, struct pt_regs *regs )
{
...
int status = read_int_status();
if ( !is_myint(dev_id_status) ) 所有共享的中断处理程序都会执行,要迅速判断执行
{
return IRQ_NONE;
}
...
return IRQ_HANDLED;
}
init xxx_init( void )
{
...
result = request_irq( sh_irq, xxx_interrupt, SA_SHIRQ, "xxx", xxx_dev ); 所有中断都要设为SA_SHIRQ,传入xxx_dev.
...
}
内核定时器使用
1,timer_list 结构
struct timer_list{
struct list_head entry; 定时器列表
unsigned long expires; 定时器到期时间jiffies
void (*function)(unsigned long); 定时器处理函数
unsigned long data; 定时器处理函数参数
struct timer_base_s *base;
};
struct timer_list my_list;
2,void init_timer( struct timer_list *timer );
TIMER_INITIALIZER( _function, _expires, _data );
DEFINE_TIMER( _name, _function, _expires, _data );
static inline void setup_timer( struct timer_list *timer, void(*function)(unsigned long), unsigned long data );
3,void add_timer( struct timer_list *timer ); 注册内核定时器
4,int del_timer( struct timer_list *timer );
5,int mod_timer( struct timer_list *timer, unsigned long expires );
模板:
struct xxx_dev{
struct cdev cdev;
...
timer_list xxx_timer; 在设备结构体中添加定时器
}
xxx_fcntl(...)
{
struct xxx_dev *dev = filp->private_data;
...
init_timer( &dev->xxx_timer );
dev->xxx_timer.function = &xxx_do_timer;
dev->xxx_timer.data = (unsigned long)dev;
dev->xxx_timer.expires = jiffies + delay; 初始化定时器结构体
...
add_timer( &dev->xxx_timer );
...
}
xxx_func2(...)
{
...
del_timer( &dev->xxx_timer);
...
}
static void xxx_do_timer( unsigned long arg )
{
struct xxx_device *dev = (struct xxx_device *)(arg);
...
dev->xxx_timer.expire = jiffies + delay; 重设时间
add_timer( &dev->xxx_timer ); 再次注册
...
}
内核延时
短延时
void ndelay( unsigned long nsecs ); CPU等待
void udelay( unsigned long usecs );
void mdelay( unsigned long msecs );
void msleep( unsigned int millisecs ); 睡眠相应的时间
unsigned long msleep_interruptible( unsigned int millisecs );
void ssleep( unsigned int second );
长延时
unsigned long delay = jiffies + 100; jiffies+2*HZ
while ( time_before( jiffies, delay ) ); time_after(a,b),用来比较时间
睡着延时
void msleep( unsigned int msecs )
{
unsigned long timeout = msecs_to_jiffies( msecs ) + 1;
while( timeout )
timeout = schedule_timeout_uninterruptible( timeout ); 调度到睡眠状态
}
unsigned long msleep_interruptible( unsigned int msecs )
{
unsigned long timeout = msecs_to_jiffies(msecs) + 1;
while ( timeout && !signal_pending(current) )
timeout = schedule_timeout_interruptible( timeout );
return jiffies_to_msecs( timeout );
}
或将当前进程添加到等待队列:
sleep_on_timeout( wait_queue_head_t *q, unsigned long timeout );
interruptible_sleep_on_timeout( wait_queue_head_t *q, unsigned long timeout );
五,内存管理和I/O
硬件基础
内存空间和I/O空间
X86中有I/O空间,ARM,MIPS上没有,只有内存空间,I/O空间可选. IN 累加器,{端口号|DX}; OUT {端口号|DX},累加器
typedef void (*lpunction)();
lpFunction lpReset = (lpFunction)0xF000FFF0;
lpReset(); 相当于软启动
MMU
内存管理单元,用来实现虚拟和物理地址间的转换,内存访问权限保护,Cache缓存控制.它包括:
TLB:Translation Lookaside Buffer,其中C位用于控制对应地址高速缓存,B位写缓存,访问权限和域位控制读写允许,不允许则发送一个存储器异常信号.
TTW:Translation Table walk,如TLB中没有找到虚拟地址入口,则通过TTW遍历获取 地址转换信息和权限,放入到TLB中的空位或替换一个入口.
当访问权限允许时,对物理地址的访问发生在Cache或内存.
ARM7TDMI中的S3C4B0X,Dragonball,ColdFire,Hitachi H8/300不带MMU,使用uclinux.
S3C2410,2440带MMU.vivi中会建立一个一级页表.
void mem_map_init(void)
{
#ifdef CONFIG_S3C2410_ANND_BOOT
mem_map_nand_boot(); 最终会调用mem_mapping_linear,建立一级页表.
#else
mem_map_nor();
#endif
cache_clean_invalidate(); 清空cache,使cache无效
tlb_invalidate(); 使快表tlb无效
}
static inline void mem_mapping_linear(void)
{
unsigned long pageoffset, sectionNumber;
putstr( "MMU table base address = 0x%", (unsigned long)mmu_tlb_base );
//使用ARM920T内存映射的Section模式,分成4096*1MB,mmu_table转换表大小16KB.
//内存映射模式包括:fault(无映射),Coarse Page(粗页表),Section(段),Fine Page(细页表).
for( sectionNumber = 0; sectionNumber < 4096; sectionNumber++ )
{
pageoffset = ( sectionNumber << 20 );
*( mmu_table_base + ( pageoffset>>20 ) ) = pageoffset | MMU_SECDESC;
}
//使SDRAM区域可缓存,0x30000000-0x33ffffff. DRAM_BASE = 0x30000000, DRAM_SIZE=64M.
for ( pageoffset = DRAM_BASE; pageoffset < (DRAM_BASE+DRAM_SIZE); pageoffset += SZ_1M )
{
*(mmu_table_base + ( pageoffset >> 20 )) = pageoffset | MMU_SECDESC | MMU_CACHEABLE;
}
}
linux内存管理
用户空间0到3GB 0xC0000000,内核空间3GB到4GB.都有相应的页表.内核1GB空间分为
3G 物理内存映射区; MAX 896MB
隔离带;
虚拟内存分配区; VMALLOC_START~VMALLOC_END,vmalloc()
隔离带;
高端页面映射区; PKMAP_BASE
专用页面映射区; FIXADDR_START~FIXADDR_TOP
4G 保留区;
对大于4GB的物理内存,使用CPU的扩展分页(PAE)模式,64位页目录项.这需要CPU支持(Intel pentium Pro).
用户空间申请和释放内存
char *p = malloc(...);
free( p );
内核空间申请和释放内存
void *kmalloc( size_t size, int flags );会阻塞,所以不能用于 中断上下文,自旋锁,中断处理函数,tasklet和内核定时器.这些用GFP_ATOMIC
GFP_KERNEL: 在内核空间的进程中申请内存.底层依靠__get_free_pages()实现.
GFP_USER: 为用户空间页分配内存.
GFP_HIGHUSER: 为用户空间页分配内存,但是从高端内存分配.
GFP_NOIO: 不允许任何I/O初始化.
GFP_NOFS: 不允许任何文件系统调用.
__GFP_DMA: 要求分配在能够DMA的内存区.
__GFP_HIGHMEM:分配的内存可以位于高端内存.
__GFP_COLD: 请求一个较长时间不访问的页.
__GFP_NOWARN: 当一个分配无法满足时,阻止内核发出警告.
__GFP_HIGH: 高优先级请求,要求分配保留页.
__GFP_REPEAT: 分配失败则重复尝试.
__GFP_NOFAIL: 标示只能成功.
__GFP_NORETRY:申请不到,则立即放弃.
__get_free_pages( unsigned int flags, unsigned int order ); order为0到11,1页到2048页.2的oreder次方.
__get_free_page( int unsigned flag );
get_zeroed_page( unsigned int flags );
struct page *alloc_pages( int gfp_mask, unsigned long order ); 返回 分配的页描述符而非首地址.
void free_page( unsigned long addr );
void free_pages( unsigned long addr, insigned long order );
void *vmalloc( unsigned long size ); 会新建页表,开销大.用于软件中较大的顺序缓冲区.小的页分配用kmalloc.
void vfree( void *addr ); 内部调用kmalloc,GFP_KERNEL. 例如create_module()会用到.
slab机制操作内存:
struct kmem_cache *kmem_cache_create( const char *name, size_t size,
size_t align, unsigned long flags,
void (*ctor)(void*, struct kmem_cache *, unsigned long),
void (*dtor)(void*, struct kmem_cache *, unsigned long) );
创建一个slab缓存,可以驻留任意数目大小一样的后备缓存.size是分配的数据结构的大小,flags是如何分配的位掩码.
SLAB_NO_REAP :内存紧缺也不自动收缩这块缓存.
SLAB_HWCACHE_ALIGN :数据对象对齐到一个缓存行.
SLAB_CACHE_DMA :数据对象在DMA内存区分配.
void *kmem_cache_alloc( struct kmem_cache *cachep, gfp_t flags );
在kmem_cache_create()创建的slab后备缓冲中分配一块并返回首地址指针.
void kmem_cache_free( struct kmem_cache *cachep, gfp_t flags );
int kmem_cache_destory( struct kmem_cache *cachep );
static lmem_cache_t *xxx_cachep;
xxx_cahep = kmem_cache_create( "xxx", sizeof(struct xxx), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
struct xxx *ctx;
ctx = kmem_cache_alloc( xxx_cachep, GTP_KERNEL );
...
kmem_cache_free( xxx_cachep, ctx );
kmem_cache_destory( xxx_cachep );
内存池机制操作内存:
创建内存池;
mempool_t *mempool_create( int min_nr, 需要预分配对象的数目.
mempool_alloc_t *alloc_fn, 指向内存池机制提供的标准对象的分配函数指针.
mempool_alloc_t *free_fn, 指向内存池机制提供的标准对象的回收函数指针.
void *pool_data 分配和回收函数用到的指针.
);
alloc_fn的原型是: typedef void *(mempool_alloc_t)( int gfp_mask, void *pool_data );
free_fn的原型是: typedef void (mempool_alloc_t)( void *element, void *pool_data );
void *mempool_alloc( mempool_t *pool, int gfp_mask ); 分配对象
void mempool_free( void *element, mempool_t *pool ); 回收对象
void mempool_destory( mempool_t *pool ); 回收内存池
虚拟地址与物理地址的转换
#define __pa(x) ( (unsigned long)(x) - PAGE_OFFSET )
extern inline unsigned long virt_to_phys( volatile void *address )
{
return __pa( address );
}
#define __va(x) ( (void *)(unsigned long)(x) + PAGE_OFFSET ) PAGE_OFFSET=3GB
extern inline void *phys_to_virt( unsigned long address )
{
return __va( address );
}
上述方法只适用与常规内存,高端内存的虚拟地址和物理地址不适用这样简单的换算关系.
设备I/O端口和I/O内存的访问
设备通常有一组寄存器,包括设备读,写和状态.既控制寄存器,数据寄存器和状态寄存器.
当这些寄存器位于I/O空间,就叫IO端口.当这些寄存器位于内存空间,就叫IO内存.