搞过网络的人,一般都会用到抓包分析工具,在windows下一般就是wireshark,在linux下面一般系统自带tcpdump. 这里我们就说说tcpdump,对于它的如何使用,请看资料,本身它支持的选项并不复杂,复杂的是它支持的过滤表达式. 不论wireshark或tcpdump我觉得最重要的两点,一个就是它们的基本原理,另外就是强大的解码系统.
可以从下载最新的源码,和libpcap库.
参考代码:tcpdump4.5.1 libpcap1.5.3
我们看tcpdump主函数代码 tcpdump.c :
-
int
-
main(int argc, char **argv)
-
{
-
register int cnt, op, i;
-
bpf_u_int32 localnet, netmask;
-
register char *cp, *infile, *cmdbuf, *device, *RFileName, *VFileName, *WFileName;
-
pcap_handler callback;
-
int type;
-
int dlt;
-
int new_dlt;
-
const char *dlt_name;
-
struct bpf_program fcode;
-
#ifndef WIN32
-
RETSIGTYPE (*oldhandler)(int);
-
#endif
-
struct print_info printinfo;
-
struct dump_info dumpinfo;
-
u_char *pcap_userdata;
-
char ebuf[PCAP_ERRBUF_SIZE];
-
char VFileLine[PATH_MAX + 1];
-
char *username = NULL;
-
char *chroot_dir = NULL;
-
char *ret = NULL;
-
char *end;
-
#ifdef HAVE_PCAP_FINDALLDEVS
-
pcap_if_t *devpointer;
-
int devnum;
-
#endif
-
int status;
-
FILE *VFile;
-
#ifdef WIN32
-
if(wsockinit() != 0) return 1;
-
#endif /* WIN32 */
-
-
jflag=-1; /* not set */
-
gndo->ndo_Oflag=1;
-
gndo->ndo_Rflag=1;
-
gndo->ndo_dlt=-1;
-
gndo->ndo_default_print=ndo_default_print;
-
gndo->ndo_printf=tcpdump_printf;
-
gndo->ndo_error=ndo_error;
-
gndo->ndo_warning=ndo_warning;
-
gndo->ndo_snaplen = DEFAULT_SNAPLEN;
-
-
cnt = -1;
-
device = NULL;
-
infile = NULL;
-
RFileName = NULL;
-
VFileName = NULL;
-
VFile = NULL;
-
WFileName = NULL;
-
dlt = -1;
-
if ((cp = strrchr(argv[0], '/')) != NULL)
-
program_name = cp + 1;
-
else
-
program_name = argv[0];
-
-
if (abort_on_misalignment(ebuf, sizeof(ebuf)) < 0)
-
error("%s", ebuf);
-
-
#ifdef LIBSMI
-
smiInit("tcpdump");
-
#endif
-
-
while ( // 对命令参数的处理 ,解析后,会设置flags,然后针对flags做相应的处理.
-
(op = getopt(argc, argv, "aAb" B_FLAG "c:C:d" D_FLAG "eE:fF:G:hHi:" I_FLAG j_FLAG J_FLAG "KlLm:M:nNOp" P_FLAG "qr:Rs:StT:u" U_FLAG "vV:w:W:xXy:Yz:Z:")) != -1)
-
switch (op) {
-
-
case 'a':
-
/* compatibility for old -a */
-
break;
-
-
case 'A':
-
++Aflag;
-
break;
我们还需要看一个头文件,netdissect.h里定义了一个数据结构struct netdissect_options来描述tcdpump支持的所有参数动作,每一个参数有对应的flag, 在tcpdump 的main 里面,会根据用户的传入的参数来增加相应flag 数值,最后根据这些flag数值来实现特定动作。各个参数含义请参考源代码注释
-
struct netdissect_options {
-
int ndo_aflag; /* translate network and broadcast addresses */
-
int ndo_bflag; /* print 4 byte ASes in ASDOT notation */
-
int ndo_eflag; /* print ethernet header */
-
int ndo_fflag; /* don't translate "foreign" IP address */
-
int ndo_Kflag; /* don't check TCP checksums */
-
int ndo_nflag; /* leave addresses as numbers */
-
int ndo_Nflag; /* remove domains from printed host names */
-
int ndo_qflag; /* quick (shorter) output */
-
int ndo_Rflag; /* print sequence # field in AH/ESP*/
-
int ndo_sflag; /* use the libsmi to translate OIDs */
-
int ndo_Sflag; /* print raw TCP sequence numbers */
-
int ndo_tflag; /* print packet arrival time */
-
int ndo_Uflag; /* "unbuffered" output of dump files */
-
int ndo_uflag; /* Print undecoded NFS handles */
-
int ndo_vflag; /* verbose */
-
int ndo_xflag; /* print packet in hex */
-
int ndo_Xflag; /* print packet in hex/ascii */
-
int ndo_Aflag; /* print packet only in ascii observing TAB,
-
* LF, CR and SPACE as graphical chars
-
*/
-
int ndo_Bflag; /* buffer size */
-
int ndo_Iflag; /* rfmon (monitor) mode */
-
int ndo_Oflag; /* run filter code optimizer */
-
int ndo_dlt; /* if != -1, ask libpcap for the DLT it names*/
-
int ndo_jflag; /* packet time stamp source */
-
int ndo_pflag; /* don't go promiscuous */
-
-
int ndo_Cflag; /* rotate dump files after this many bytes */
-
int ndo_Cflag_count; /* Keep track of which file number we're writing */
-
int ndo_Gflag; /* rotate dump files after this many seconds */
-
int ndo_Gflag_count; /* number of files created with Gflag rotation */
-
time_t ndo_Gflag_time; /* The last time_t the dump file was rotated. */
-
int ndo_Wflag; /* recycle output files after this number of files */
-
int ndo_WflagChars;
-
int ndo_Hflag; /* dissect 802.11s draft mesh standard */
-
int ndo_suppress_default_print; /* don't use default_print() for unknown packet types */
-
const char *ndo_dltname;
-
-
char *ndo_espsecret;
-
struct sa_list *ndo_sa_list_head; /* used by print-esp.c */
-
struct sa_list *ndo_sa_default;
-
-
char *ndo_sigsecret; /* Signature verification secret key */
-
-
struct esp_algorithm *ndo_espsecret_xform; /* cache of decoded */
-
char *ndo_espsecret_key;
-
-
int ndo_packettype; /* as specified by -T */
-
-
char *ndo_program_name; /*used to generate self-identifying messages */
-
-
int32_t ndo_thiszone; /* seconds offset from gmt to local time */
-
-
int ndo_snaplen;
-
-
/*global pointers to beginning and end of current packet (during printing) */
-
const u_char *ndo_packetp;
-
const u_char *ndo_snapend;
-
-
/* bookkeeping for ^T output */
-
int ndo_infodelay;
-
-
/* pointer to void function to output stuff */
-
void (*ndo_default_print)(netdissect_options *,
-
register const u_char *bp, register u_int length);
-
void (*ndo_info)(netdissect_options *, int verbose);
-
-
int (*ndo_printf)(netdissect_options *,
-
const char *fmt, ...)
-
#ifdef __ATTRIBUTE___FORMAT_OK_FOR_FUNCTION_POINTERS
-
__attribute__ ((format (printf, 2, 3)))
-
#endif
-
;
-
void (*ndo_error)(netdissect_options *,
-
const char *fmt, ...)
-
#ifdef __ATTRIBUTE___NORETURN_OK_FOR_FUNCTION_POINTERS
-
__attribute__ ((noreturn))
-
#endif /* __ATTRIBUTE___NORETURN_OK_FOR_FUNCTION_POINTERS */
-
#ifdef __ATTRIBUTE___FORMAT_OK_FOR_FUNCTION_POINTERS
-
__attribute__ ((format (printf, 2, 3)))
-
#endif /* __ATTRIBUTE___FORMAT_OK_FOR_FUNCTION_POINTERS */
-
;
-
void (*ndo_warning)(netdissect_options *,
-
const char *fmt, ...)
-
#ifdef __ATTRIBUTE___FORMAT_OK_FOR_FUNCTION_POINTERS
-
__attribute__ ((format (printf, 2, 3)))
-
#endif
-
;
-
}
而在tcpdum.c定义了一个全局的:
-
netdissect_options Gndo;
-
netdissect_options *gndo = &Gndo;
而在interface.h 又定义了很多宏,方便gndo里参数的调用:
-
extern netdissect_options *gndo;
-
-
#define bflag gndo->ndo_bflag
-
#define eflag gndo->ndo_eflag
-
#define fflag gndo->ndo_fflag
-
#define jflag gndo->ndo_jflag
-
#define Kflag gndo->ndo_Kflag
-
#define nflag gndo->ndo_nflag
-
#define Nflag gndo->ndo_Nflag
-
#define Oflag gndo->ndo_Oflag
-
#define pflag gndo->ndo_pflag
-
#define qflag gndo->ndo_qflag
-
#define Rflag gndo->ndo_Rflag
-
#define sflag gndo->ndo_sflag
-
#define Sflag gndo->ndo_Sflag
-
#define tflag gndo->ndo_tflag
-
#define Uflag gndo->ndo_Uflag
-
#define uflag gndo->ndo_uflag
-
#define vflag gndo->ndo_vflag
-
#define xflag gndo->ndo_xflag
-
#define Xflag gndo->ndo_Xflag
-
#define Cflag gndo->ndo_Cflag
-
#define Gflag gndo->ndo_Gflag
-
#define Aflag gndo->ndo_Aflag
-
#define Bflag gndo->ndo_Bflag
-
#define Iflag gndo->ndo_Iflag
-
#define suppress_default_print gndo->ndo_suppress_default_print
-
#define packettype gndo->ndo_packettype
-
#define sigsecret gndo->ndo_sigsecret
-
#define Wflag gndo->ndo_Wflag
-
#define WflagChars gndo->ndo_WflagChars
-
#define Cflag_count gndo->ndo_Cflag_count
-
#define Gflag_count gndo->ndo_Gflag_count
-
#define Gflag_time gndo->ndo_Gflag_time
-
#define Hflag gndo->ndo_Hflag
-
#define snaplen gndo->ndo_snaplen
-
#define snapend gndo->ndo_snapend
这里不解释各个参数的使用及其调用.
如果我们要监控一个网络接口,一般我们会指定-i选项 后面是我们的接口ethX.
然后直接调用到:
-
#else
-
*ebuf = '\0';
-
pd = pcap_open_live(device, snaplen, !pflag, 1000, ebuf);
-
if (pd == NULL)
-
error("%s", ebuf);
-
else if (*ebuf)
-
warning("%s", ebuf);
-
#endif /* HAVE_PCAP_CREATE */
如果没有指定接口那么会调用pcap_lookupdev函数来查询一个.比如从用户空间查询/proc/net/dev
-
else {
-
/*
-
* We're doing a live capture.
-
*/
-
if (device == NULL) {
-
device = pcap_lookupdev(ebuf);
-
if (device == NULL)
-
error("%s", ebuf);
-
}
它会调用libpcap库.来查找设备链表,和内核设备链表类似.本质上它查询的是/proc/net/dev
-
#if !defined(WIN32) && !defined(MSDOS)
-
-
/*
-
* Return the name of a network interface attached to the system, or NULL
-
* if none can be found. The interface must be configured up; the
-
* lowest unit number is preferred; loopback is ignored.
-
*/
-
char *
-
pcap_lookupdev(errbuf)
-
register char *errbuf;
-
{
-
pcap_if_t *alldevs;
-
/* for old BSD systems, including bsdi3 */
-
#ifndef IF_NAMESIZE
-
#define IF_NAMESIZE IFNAMSIZ
-
#endif
-
static char device[IF_NAMESIZE + 1];
-
char *ret;
-
-
if (pcap_findalldevs(&alldevs, errbuf) == -1)
-
return (NULL);
-
-
if (alldevs == NULL || (alldevs->flags & PCAP_IF_LOOPBACK)) {
-
/*
-
* There are no devices on the list, or the first device
-
* on the list is a loopback device, which means there
-
* are no non-loopback devices on the list. This means
-
* we can't return any device.
-
*
-
* XXX - why not return a loopback device? If we can't
-
* capture on it, it won't be on the list, and if it's
-
* on the list, there aren't any non-loopback devices,
-
* so why not just supply it as the default device?
-
*/
-
(void)strlcpy(errbuf, "no suitable device found",
-
PCAP_ERRBUF_SIZE);
-
ret = NULL;
-
} else {
-
/*
-
* Return the name of the first device on the list.
-
*/
-
(void)strlcpy(device, alldevs->name, sizeof(device));
-
ret = device;
-
}
-
-
pcap_freealldevs(alldevs);
-
return (ret);
-
}
处理完参数,它会调用到pcap_open_live函数
-
pcap_t *
-
pcap_open_live(const char *source, int snaplen, int promisc, int to_ms, char *errbuf)
-
{
-
pcap_t *p;
-
int status;
-
-
p = pcap_create(source, errbuf);
-
if (p == NULL)
-
return (NULL);
-
status = pcap_set_snaplen(p, snaplen);
-
if (status < 0)
-
goto fail;
-
status = pcap_set_promisc(p, promisc);
-
if (status < 0)
-
goto fail;
-
status = pcap_set_timeout(p, to_ms);
-
if (status < 0)
-
goto fail;
-
/*
-
* Mark this as opened with pcap_open_live(), so that, for
-
* example, we show the full list of DLT_ values, rather
-
* than just the ones that are compatible with capturing
-
* when not in monitor mode. That allows existing applications
-
* to work the way they used to work, but allows new applications
-
* that know about the new open API to, for example, find out the
-
* DLT_ values that they can select without changing whether
-
* the adapter is in monitor mode or not.
-
*/
-
p->oldstyle = 1;
-
status = pcap_activate(p);
-
if (status < 0)
-
goto fail;
-
return (p);
-
fail:
-
if (status == PCAP_ERROR)
-
snprintf(errbuf, PCAP_ERRBUF_SIZE, "%s: %s", source,
-
p->errbuf);
-
else if (status == PCAP_ERROR_NO_SUCH_DEVICE ||
-
status == PCAP_ERROR_PERM_DENIED ||
-
status == PCAP_ERROR_PROMISC_PERM_DENIED)
-
snprintf(errbuf, PCAP_ERRBUF_SIZE, "%s: %s (%s)", source,
-
pcap_statustostr(status), p->errbuf);
-
else
-
snprintf(errbuf, PCAP_ERRBUF_SIZE, "%s: %s", source,
-
pcap_statustostr(status));
-
pcap_close(p);
-
return (NULL);
-
}
这个函数做了最重要的工作,它完成了和内核底层的通信.
我们看pcap_activate函数:
-
int
-
pcap_activate(pcap_t *p)
-
{
-
int status;
-
-
/*
-
* Catch attempts to re-activate an already-activated
-
* pcap_t; this should, for example, catch code that
-
* calls pcap_open_live() followed by pcap_activate(),
-
* as some code that showed up in a Stack Exchange
-
* question did.
-
*/
-
if (pcap_check_activated(p))
-
return (PCAP_ERROR_ACTIVATED);
-
status = p->activate_op(p);
-
if (status >= 0)
-
p->activated = 1;
-
else {
-
if (p->errbuf[0] == '\0') {
-
/*
-
* No error message supplied by the activate routine;
-
* for the benefit of programs that don't specially
-
* handle errors other than PCAP_ERROR, return the
-
* error message corresponding to the status.
-
*/
-
snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "%s",
-
pcap_statustostr(status));
-
}
-
-
/*
-
* Undo any operation pointer setting, etc. done by
-
* the activate operation.
-
*/
-
initialize_ops(p);
-
}
-
return (status);
-
}
这里面最主要的调用就是 p
->activate_op
(p
);但是activate_op被初始化的是什么呢?它是调用函数pcap_create中调用pcap_create_interface里被赋值的:
-
pcap_t *
-
pcap_create_interface(const char *device, char *ebuf)
-
{
-
pcap_t *handle;
-
-
handle = pcap_create_common(device, ebuf, sizeof (struct pcap_linux));
-
if (handle == NULL)
-
return NULL;
-
-
handle->activate_op = pcap_activate_linux;
-
handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;
我们看到被赋值为了pcap_activate_linux,那么它到底做了什么呢?在这个函数里它调用了一个很关键的函数activate_new
-
/* ===== Functions to interface to the newer kernels ================== */
-
-
/*
-
* Try to open a packet socket using the new kernel PF_PACKET interface.
-
* Returns 1 on success, 0 on an error that means the new interface isn't
-
* present (so the old SOCK_PACKET interface should be tried), and a
-
* PCAP_ERROR_ value on an error that means that the old mechanism won't
-
* work either (so it shouldn't be tried).
-
*/
-
static int
-
activate_new(pcap_t *handle)
-
{
-
#ifdef HAVE_PF_PACKET_SOCKETS
-
struct pcap_linux *handlep = handle->priv;
-
const char *device = handle->opt.source;
-
int is_any_device = (strcmp(device, "any") == 0);
-
int sock_fd = -1, arptype;
-
#ifdef HAVE_PACKET_AUXDATA
-
int val;
-
#endif
-
int err = 0;
-
struct packet_mreq mr;
-
-
/*
-
* Open a socket with protocol family packet. If the
-
* "any" device was specified, we open a SOCK_DGRAM
-
* socket for the cooked interface, otherwise we first
-
* try a SOCK_RAW socket for the raw interface.
-
*/
-
sock_fd = is_any_device ?
-
socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL)) :
-
socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
我们看到本质是建立了socket类型为PF_PACKET,SOCK_RAW ,网络类型是ETH_P_ALL.后面还有setsockopt的调用设置一些选项,它也会触发内核初始化一些东西.
就让我们看看socket系统调用的实现:
在socket.c中:
-
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
-
{
-
int retval;
-
struct socket *sock;
-
int flags;
-
-
/* Check the SOCK_* constants for consistency. */
-
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
-
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
-
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
-
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
-
-
flags = type & ~SOCK_TYPE_MASK;
-
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
-
return -EINVAL;
-
type &= SOCK_TYPE_MASK;
-
-
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
-
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
-
-
retval = sock_create(family, type, protocol, &sock);
-
if (retval < 0)
-
goto out;
-
-
retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
-
if (retval < 0)
-
goto out_release;
-
-
out:
-
/* It may be already another descriptor 8) Not kernel problem. */
-
return retval;
-
-
out_release:
-
sock_release(sock);
-
return retval;
-
}
这里我们跟踪sock_create:它调用了__sock_create:
-
static int __sock_create(struct net *net, int family, int type, int protocol,
-
struct socket **res, int kern)
-
{
-
int err;
-
struct socket *sock;
-
const struct net_proto_family *pf;
-
-
/*
-
* Check protocol is in range
-
*/
-
if (family < 0 || family >= NPROTO)
-
return -EAFNOSUPPORT;
-
if (type < 0 || type >= SOCK_MAX)
-
return -EINVAL;
-
-
/* Compatibility.
-
-
This uglymoron is moved from INET layer to here to avoid
-
deadlock in module load.
-
*/
-
if (family == PF_INET && type == SOCK_PACKET) {
-
static int warned;
-
if (!warned) {
-
warned = 1;
-
printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
-
current->comm);
-
}
-
family = PF_PACKET;
-
}
-
-
err = security_socket_create(family, type, protocol, kern);
-
if (err)
-
return err;
-
-
/*
-
* Allocate the socket and allow the family to set things up. if
-
* the protocol is 0, the family is instructed to select an appropriate
-
* default.
-
*/
-
sock = sock_alloc();
-
if (!sock) {
-
if (net_ratelimit())
-
printk(KERN_WARNING "socket: no more sockets\n");
-
return -ENFILE; /* Not exactly a match, but its the
-
closest posix thing */
-
}
-
-
sock->type = type;
-
-
#ifdef CONFIG_MODULES
-
/* Attempt to load a protocol module if the find failed.
-
*
-
* 12/09/1996 Marcin: this makes REALLY only sense, if the user
-
* requested real, full-featured networking support upon configuration.
-
* Otherwise module support will
-
*/
-
if (net_families[family] == NULL)
-
request_module("net-pf-%d", family);
-
#endif
-
-
rcu_read_lock();
-
pf = rcu_dereference(net_families[family]); //查询协议注册数组
-
err = -EAFNOSUPPORT;
-
if (!pf)
-
goto out_release;
-
-
/*
-
* We will call the ->create function, that possibly is in a loadable
-
* module, so we have to bump that loadable module refcnt first.
-
*/
-
if (!try_module_get(pf->owner))
-
goto out_release;
-
-
/* Now protected by module ref count */
-
rcu_read_unlock();
-
-
err = pf->create(net, sock, protocol); //调用注册协议的create函数
-
if (err < 0)
-
goto out_module_put;
-
-
/*
-
* Now to bump the refcnt of the [loadable] module that owns this
-
* socket at sock_release time we decrement its refcnt.
-
*/
-
if (!try_module_get(sock->ops->owner))
-
goto out_module_busy;
-
-
/*
-
* Now that we're done with the ->create function, the [loadable]
-
* module can have its refcnt decremented
-
*/
-
module_put(pf->owner);
-
err = security_socket_post_create(sock, family, type, protocol, kern); //内核的一个安全权限检查,这里我们不深入分析.
-
if (err)
-
goto out_sock_release;
-
*res = sock;
-
-
return 0;
-
-
out_module_busy:
-
err = -EAFNOSUPPORT;
-
out_module_put:
-
sock->ops = NULL;
-
module_put(pf->owner);
-
out_sock_release:
-
sock_release(sock);
-
return err;
-
-
out_release:
-
rcu_read_unlock();
-
goto out_sock_release;
-
}
这里查询net_families,找到内核注册的协议,并调用create函数
-
err = pf->create(net, sock, protocol);
我们看看PF_PACKET的注册,在函数af_packet.c中
-
static int __init packet_init(void)
-
{
-
int rc = proto_register(&packet_proto, 0);
-
-
if (rc != 0)
-
goto out;
-
-
sock_register(&packet_family_ops);
-
register_pernet_subsys(&packet_net_ops);
-
register_netdevice_notifier(&packet_netdev_notifier);
-
out:
-
return rc;
-
}
packet_proto:
-
static struct proto packet_proto = {
-
.name = "PACKET",
-
.owner = THIS_MODULE,
-
.obj_size = sizeof(struct packet_sock),
-
};
这里关键的是sock_register(&packet_family_ops);的注册
-
static struct net_proto_family packet_family_ops = {
-
.family = PF_PACKET,
-
.create = packet_create,
-
.owner = THIS_MODULE,
-
};
上面调用的create函数就在这里.即packet_create函数:
-
/*
-
* Create a packet of type SOCK_PACKET.
-
*/
-
-
static int packet_create(struct net *net, struct socket *sock, int protocol)
-
{
-
struct sock *sk;
-
struct packet_sock *po;
-
__be16 proto = (__force __be16)protocol; /* weird, but documented */
-
int err;
-
-
if (!capable(CAP_NET_RAW))
-
return -EPERM;
-
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
-
sock->type != SOCK_PACKET)
-
return -ESOCKTNOSUPPORT;
-
-
sock->state = SS_UNCONNECTED;
-
-
err = -ENOBUFS;
-
sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
-
if (sk == NULL)
-
goto out;
-
-
sock->ops = &packet_ops;
-
if (sock->type == SOCK_PACKET)
-
sock->ops = &packet_ops_spkt;
-
-
sock_init_data(sock, sk);
-
-
po = pkt_sk(sk);
-
sk->sk_family = PF_PACKET;
-
po->num = proto;
-
-
sk->sk_destruct = packet_sock_destruct;
-
sk_refcnt_debug_inc(sk);
-
-
/*
-
* Attach a protocol block
-
*/
-
-
spin_lock_init(&po->bind_lock);
-
mutex_init(&po->pg_vec_lock);
-
po->prot_hook.func = packet_rcv;
-
-
if (sock->type == SOCK_PACKET)
-
po->prot_hook.func = packet_rcv_spkt;
-
-
po->prot_hook.af_packet_priv = sk;
-
-
if (proto) {
-
po->prot_hook.type = proto;
-
dev_add_pack(&po->prot_hook);
-
sock_hold(sk);
-
po->running = 1;
-
}
-
-
write_lock_bh(&net->packet.sklist_lock);
-
sk_add_node(sk, &net->packet.sklist);
-
sock_prot_inuse_add(net, &packet_proto, 1);
-
write_unlock_bh(&net->packet.sklist_lock);
-
return 0;
-
out:
-
return err;
-
}
其实pf_packet是一个特殊的协议,socket,是内核专门用来嗅探数据报文的,方便调试用.
它对sock的ops重新初始化:
-
static const struct proto_ops packet_ops = {
-
.family = PF_PACKET,
-
.owner = THIS_MODULE,
-
.release = packet_release,
-
.bind = packet_bind,
-
.connect = sock_no_connect,
-
.socketpair = sock_no_socketpair,
-
.accept = sock_no_accept,
-
.getname = packet_getname,
-
.poll = packet_poll,
-
.ioctl = packet_ioctl,
-
.listen = sock_no_listen,
-
.shutdown = sock_no_shutdown,
-
.setsockopt = packet_setsockopt,
-
.getsockopt = packet_getsockopt,
-
.sendmsg = packet_sendmsg,
-
.recvmsg = packet_recvmsg,
-
.mmap = packet_mmap,
-
.sendpage = sock_no_sendpage,
-
}
并初始化特殊操作函数:
-
po->prot_hook.func = packet_rcv;
-
-
if (sock->type == SOCK_PACKET)
-
po->prot_hook.func = packet_rcv_spkt;
在create的时候默认是packet_rcv函数,然后调用协议注册函数dev_add_pack:
-
if (proto) {
-
po->prot_hook.type = proto;
-
dev_add_pack(&po->prot_hook);
-
sock_hold(sk);
-
po->running = 1;
-
}
然而我们知道这个socket的协议类型是ETH_P_ALL,我们之前在帧的接收和发送的时候讲过,它会注册到ptype_all的链表.嗅探器会用到.
这里我们在回顾一下:
在报文接收的时候在dev.c netif_recevice_skb中:
-
list_for_each_entry_rcu(ptype, &ptype_all, list) {
-
if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-
ptype->dev == orig_dev) {
-
if (pt_prev)
-
ret = deliver_skb(skb, pt_prev, orig_dev);
-
pt_prev = ptype;
-
}
-
}
它就会查询注册的协议,调用处理函数,这里是packet_rcv
而发送的时候是在dev_hard_start_xmit中调用dev_queue_xmit_nit
-
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-
struct netdev_queue *txq)
-
{
-
const struct net_device_ops *ops = dev->netdev_ops;
-
int rc;
-
-
if (likely(!skb->next)) {
-
if (!list_empty(&ptype_all))
-
dev_queue_xmit_nit(skb, dev);
-
/*
-
* Support routine. Sends outgoing frames to any network
-
* taps currently in use.
-
*/
-
-
static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
-
{
-
struct packet_type *ptype;
-
-
#ifdef CONFIG_NET_CLS_ACT
-
if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-
net_timestamp(skb);
-
#else
-
net_timestamp(skb);
-
#endif
-
-
rcu_read_lock();
-
list_for_each_entry_rcu(ptype, &ptype_all, list) {
-
/* Never send packets back to the socket
-
* they originated from - MvS (miquels@drinkel.ow.org)
-
*/
-
if ((ptype->dev == dev || !ptype->dev) &&
-
(ptype->af_packet_priv == NULL ||
-
(struct sock *)ptype->af_packet_priv != skb->sk)) {
-
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
-
if (!skb2)
-
break;
-
-
/* skb->nh should be correctly
-
set by sender, so that the second statement is
-
just protection against buggy protocols.
-
*/
-
skb_reset_mac_header(skb2);
-
-
if (skb_network_header(skb2) < skb2->data ||
-
skb2->network_header > skb2->tail) {
-
if (net_ratelimit())
-
printk(KERN_CRIT "protocol %04x is "
-
"buggy, dev %s\n",
-
skb2->protocol, dev->name);
-
skb_reset_network_header(skb2);
-
}
-
-
skb2->transport_header = skb2->network_header;
-
skb2->pkt_type = PACKET_OUTGOING;
-
ptype->func(skb2, skb->dev, ptype, skb->dev);
-
}
-
}
-
rcu_read_unlock();
-
}
把报文复制一份,然后调用packet_rcv传递给上层.
当然在调用setsockopt时即调用packet_setsockopt函数,会根据flags重新初始化接收处理函数:
packet_set_ring它会处理包更高效:
-
po->prot_hook.func = (po->rx_ring.pg_vec) ?
-
tpacket_rcv : packet_rcv;
判断之后,重新调用dev_add_pack注册,在实际中会变成tpacket_rcv函数.至于为什么,原因这里就不分析了.
在实际应用中我们可以看一个例子:
# cat /proc/net/ptype
Type Device Function
0800 ip_rcv+0x0/0x510
0011 llc_rcv+0x0/0x3cc
8863 pppoe_disc_rcv+0x0/0x200
0004 llc_rcv+0x0/0x3cc
8864 pppoe_rcv+0x0/0x240
0806 arp_rcv+0x0/0x16c
88d9 br0 packet_rcv+0x0/0x20
886c br0 packet_rcv+0x0/0x20
86dd ipv6_rcv+0x0/0x68c
#
#
#
# tcpdump -i eth2 &
# tcpdump: WARNING: eth2: no IPv4 address assigned
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth2, link-type EN10MB (Ethernet), capture size 65535 bytes
#
# cat /proc/net/ptype
Type Device Function
ALL eth2 tpacket_rcv+0x0/0x20
0800 ip_rcv+0x0/0x510
0011 llc_rcv+0x0/0x3cc
8863 pppoe_disc_rcv+0x0/0x200
0004 llc_rcv+0x0/0x3cc
8864 pppoe_rcv+0x0/0x240
0806 arp_rcv+0x0/0x16c
88d9 br0 packet_rcv+0x0/0x20
886c br0 packet_rcv+0x0/0x20
86dd ipv6_rcv+0x0/0x68c
#
我们会发现是tpacket_rcv函数.这需要注意一下.
这里的原因来自libpcap调用的时候的
setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,(void *) &req, sizeof(req))
这个PACKET_RX_RING的标志!它触发了内核的新初始化.
然后我们回到tcpdump的主函数,最后会调用pcap_loop来无限处理报文.
在内核调用接收报文函数的时候里面会有一个run_filter,它就是报文过滤规则,这里不多说,需要专门来分析.包括过滤规则和解码的以后分析吧
阅读(13285) | 评论(4) | 转发(5) |