Linux内核中的seq操作-Godbach-ChinaUnix博客

哥德巴赫猜想godbach.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

godbach

博客访问： 2400789
博文数量： 145
博客积分： 8668
博客等级：中将
技术积分： 3922
用户组：普通用户
注册时间： 2007-03-09 21:21

个人简介

work hard

文章分类

全部博文（145）

HAProxy（9）
内核研究（72）

系统安全（7）

流量管理（9）

网络安全（12）
程序设计（22）

Perl（2）

VC（1）

C编程（8）

Shell（11）
SSL（3）
版本管理（6）
Linux（13）
Unix（3）
随笔（14）

历史（3）

影视（2）
杂项（3）
未分配的博文（0）

文章存档

2016年（1）

2015年（1）

2014年（1）

2013年（12）

2012年（3）

2011年（9）

2010年（34）

2009年（55）

2008年（20）

2007年（9）

我的朋友

相关博文

Linux内核中的seq操作

分类： LINUX

2009-12-08 17:47:32

本文档的Copyleft归yfydz所有，使用GPL发布，可以自由拷贝，转载，转载时请保持文档的完整性，严禁用于任何商业用途。
msn:
来源：http://yfydz.cublog.cn

1. 前言

在fs/seq_file.c中定义了关于seq操作的一系列顺序读取的函数，这些函数最早是在2001年就引入了，但以前内核中一直用得不多，而到了2.6内核后，许多/proc的只读文件中大量使用了seq函数处理。

以下内核源码版本为2.6.17.11。

2. seq相关数据结构

2.1 seq文件结构

struct seq_file {
char *buf;
size_t size;
size_t from;
size_t count;
loff_t index;
loff_t version;
struct mutex lock;
struct seq_operations *op;
void *private;
};

struct seq_file描述了seq处理的缓冲区及处理方法，buf是动态分配的，大小不小于PAGE_SIZE，通常这个结构是通过struct file结构中的private_data来指向的。

char *buf：seq流的缓冲区
size_t size：缓冲区大小
size_t from：from指向当前要显示的数据头位置
size_t count：缓冲区中已有的数据长度
loff_t index：数据记录索引值
loff_t version：版本号，是struct file的版本号的拷贝
struct mutex lock：seq锁
struct seq_operations *op：seq操作结构，定义数据显示的操作函数
void *private：私有数据

2.2 seq操作结构

seq的操作结构比较简单，就是4个操作函数，完成开始、停止、显示和取下一个操作：

/* include/linux/seq_file.h */

struct seq_operations {
void * (*start) (struct seq_file *m, loff_t *pos);
void (*stop) (struct seq_file *m, void *v);
void * (*next) (struct seq_file *m, void *v, loff_t *pos);
int (*show) (struct seq_file *m, void *v);
};

3. seq操作函数

seq操作包括以下一系列函数：

int seq_open(struct file *, struct seq_operations *);
打开seq流，为struct file分配struct seq_file结构，并定义seq_file的操作；

ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
从seq流中读数据到用户空间，其中循环调用了struct seq_file中的各个函数来读数据；

ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
struct seq_file *m = (struct seq_file *)file->private_data;
size_t copied = 0;
loff_t pos;
size_t n;
void *p;
int err = 0;

// 先加锁
mutex_lock(&m->lock);
/*
* seq_file->op->..m_start/m_stop/m_next may do special actions
* or optimisations based on the file->f_version, so we want to
* pass the file->f_version to those methods.
*
* seq_file->version is just copy of f_version, and seq_file
* methods can treat it simply as file version.
* It is copied in first and copied out after all operations.
* It is convenient to have it as part of structure to avoid the
* need of passing another argument to all the seq_file methods.
*/
m->version = file->f_version;
/* grab buffer if we didn't have one */
// 如果struct seq_file结构中的缓冲区没有分配的话，
// 分配缓冲，大小为PAGE_SIZE
if (!m->buf) {
  m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
  if (!m->buf)
   goto Enomem;
}
/* if not empty - flush it first */
// count表示当时有多少数据还没有传给用户空间
// 尽量先将这些数据传出
if (m->count) {
  n = min(m->count, size);
  err = copy_to_user(buf, m->buf + m->from, n);
  if (err)
   goto Efault;
  m->count -= n;
  m->from += n;
  size -= n;
  buf += n;
  copied += n;
  if (!m->count)
   m->index++;
  if (!size)
   goto Done;
}
// 进行主要传数据过程，缓冲区中至少要有一个记录单位的数据
/* we need at least one record in buffer */
while (1) {
// 数据记录的位置
  pos = m->index;
// 初始化操作，返回值为对象相关指针
  p = m->op->start(m, &pos);
  err = PTR_ERR(p);
  if (!p || IS_ERR(p))
   break;
// 执行具体的显示过程
  err = m->op->show(m, p);
  if (err)
   break;
// 当前缓冲区中的实际数据小于缓冲区大小，转到填数据部分
  if (m->count < m->size)
   goto Fill;
// 否则说明一个记录的数据量太大，原来缓冲区大小不够；
// 先停操作，重新分配缓冲区，大小增加一倍，重新操作,
// 要保证缓冲区大小大于一个数据记录的大小
  m->op->stop(m, p);
  kfree(m->buf);
  m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
  if (!m->buf)
   goto Enomem;
  m->count = 0;
  m->version = 0;
}
m->op->stop(m, p);
m->count = 0;
goto Done;
Fill:
// 继续读数据到缓冲区
/* they want more? let's try to get some more */
while (m->count < size) {
  size_t offs = m->count;
  loff_t next = pos;
  p = m->op->next(m, p, &next);
  if (!p || IS_ERR(p)) {
   err = PTR_ERR(p);
   break;
  }
  err = m->op->show(m, p);
  if (err || m->count == m->size) {
   m->count = offs;
   break;
  }
  pos = next;
}
// 停seq
m->op->stop(m, p);
n = min(m->count, size);
// 将数据拷贝到用户空间
err = copy_to_user(buf, m->buf, n);
if (err)
  goto Efault;
copied += n;
m->count -= n;
if (m->count)
  m->from = n;
else
  pos++;
m->index = pos;
Done:
if (!copied)
  copied = err;
else
  *ppos += copied;
file->f_version = m->version;
mutex_unlock(&m->lock);
return copied;
Enomem:
err = -ENOMEM;
goto Done;
Efault:
err = -EFAULT;
goto Done;
}

loff_t seq_lseek(struct file *, loff_t, int);
定位seq流当前指针偏移；

int seq_release(struct inode *, struct file *);
释放seq流所分配的动态内存空间，即struct seq_file的buf及其本身；

int seq_escape(struct seq_file *, const char *, const char *);
将seq流中需要进行转义的字符转换为8进制数字；

int seq_putc(struct seq_file *m, char c);
向seq流中写一个字符

int seq_puts(struct seq_file *m, const char *s);
向seq流中写一个字符串

int seq_printf(struct seq_file *, const char *, ...)
__attribute__ ((format (printf,2,3)));
向seq流方式写格式化信息；

int seq_path(struct seq_file *, struct vfsmount *, struct dentry *, char *);
在seq流中添加路径信息，路径字符都转换为8进制数。

int seq_release_private(struct inode *, struct file *);
释放seq_file的private然后进行seq_release

3. 用seq流填写/proc文件

以下使用文件/proc/net/ip_conntrack的生成代码来说明seq流的使用：

3.1 创立文件

以前2.4版本中使用proc_net_create()来建立/proc/net下的文件，现在使用seq流时要使用 proc_net_fops_create()函数来创建，区别在于函数的最后一个参数，proc_net_create()的是一个函数指针，而 proc_net_fops_create()的是一个文件操作指针：

......
proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
......

proc_net_fops_create()函数其实也很简单，调用create_proc_entry()函数建立/proc文件项，然后将文件项的操作结构指针指向所提供的文件操作指针：

static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
mode_t mode, const struct file_operations *fops)
{
struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net);
if (res)
res->proc_fops = fops;
return res;
}

3.2 文件操作结构

/proc/net/ip_conntrack所用的文件结构如下：

static struct file_operations ct_file_ops = {
.owner   = THIS_MODULE,
.open    = ct_open,
.read    = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
可见，结构中除了open()函数是需要自定义外，其他的读、定位、释放函数都可以用seq标准函数。

3.3 open函数定义

open函数主要就是调用seq_open()函数将一个struct seq_operations结构和struct file链接起来，如果需要有私有数据的话，需要分配出动态空间作为struct seq_file的私有数据：

static int ct_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
struct ct_iter_state *st;
int ret;

st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
if (st == NULL)
  return -ENOMEM;
ret = seq_open(file, &ct_seq_ops);
if (ret)
  goto out_free;
seq          = file->private_data;
seq->private = st;
memset(st, 0, sizeof(struct ct_iter_state));
return ret;
out_free:
kfree(st);
return ret;
}

简单的如exp_open()函数，就只调用seq_open()函数就完了：

static int exp_open(struct inode *inode, struct file *file)
{
return seq_open(file, &exp_seq_ops);
}

3.4 seq操作结构

static struct seq_operations ct_seq_ops = {
.start = ct_seq_start,
.next = ct_seq_next,
.stop = ct_seq_stop,
.show = ct_seq_show
};

这个结构就是填写4个操作函数：

start()函数完成读数据前的一些预先操作，通常如加锁，定位数据记录位置等，该函数返回值就是show()函数第二个参数：

static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
read_lock_bh(&ip_conntrack_lock);
return ct_get_idx(seq, *pos);
}

stop()函数完成读数据后的一些恢复操作，如解锁等：

static void ct_seq_stop(struct seq_file *s, void *v)
{
read_unlock_bh(&ip_conntrack_lock);
}

next()函数定位数据下一项：

static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
return ct_get_next(s, v);
}

show()函数实现读数据过程，将要输出的数据直接用seq_printf()函数打印到seq流缓冲区中，由seq_printf()函数输出到用户空间：

static int ct_seq_show(struct seq_file *s, void *v)
{
// start()虽然返回的是struct list_head的指针，
// 但struct ip_conntrack_tuple_hash结构的第一
// 项参数就是struct list_head，所以可以进行直接
// 类型转换而不用再计算偏移量
const struct ip_conntrack_tuple_hash *hash = v;
const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
struct ip_conntrack_protocol *proto;

ASSERT_READ_LOCK(&ip_conntrack_lock);
IP_NF_ASSERT(conntrack);

/* we only want to print DIR_ORIGINAL */
if (DIRECTION(hash))
return 0;

proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
IP_NF_ASSERT(proto);
// 以下打印连接和协议信息
if (seq_printf(s, "%-8s %u %ld ",
        proto->name,
        conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
        timer_pending(&conntrack->timeout)
        ? (long)(conntrack->timeout.expires - jiffies)/HZ
        : 0) != 0)
  return -ENOSPC;

......

if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
return -ENOSPC;

return 0;
}

4. 结论

seq流函数的使用保证了数据能顺序输出，这也就是/proc只读文件中使用它的最大原因吧。

阅读(2453) | 评论(0) | 转发(1) |

上一篇：VI命令集锦

下一篇：《中国人史纲》之苻坚淝水之战

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6