1、数据帧收发主要函数及netdevice 结构 LinuxTCP/IP协议栈学习2数据帧收发主要函数及netdevice结构 /* * netif_rx - post buffer to the network code * skb: buffer to post * * this function receives a packet from a device driver and queues it for * the upper (protocol) levels to process. it always succeeds. the buffer * may be dropped dur
2、ing processing for congestion control or by the * protocol layers. * * return values: * net_rx_success (no congestion) * net_rx_drop (packet was dropped) * */ int netif_rx( struct sk_buff * skb) struct softnet_data * queue; unsigned long flags ; /* if netpoll wants it, pretend we never saw it */ if
3、(netpoll_rx (skb ) return net_rx_drop; if (!skb-tstamp .tv64 ) /得到帧接收的时间 net_timestamp(skb); /* * the code is rearranged so that the path is the most * short when cpu is congested, but is still operating. */ local_irq_save(flags); queue = &_get_cpu_var (softnet_data );/获取当前cpu的 softnet_data 数据 _get_
4、cpu_var(netdev_rx_stat ).total +;/当前cpu接收的帧数+1 if (queue-input_pkt_queue .qlen input_pkt_queue .qlen ) enqueue: /将该帧加入到softnet_data队列 _skb_queue_tail(&queue -input_pkt_queue, skb); local_irq_restore(flags); return net_rx_success; /当队列是空的时候,表明这个队列并没有被软中断所schedule,因此我们需要将此队列加入到软中断的处理链表中。可以看到加入的正好是back
5、log,由于调用netif_rx的是非napi的驱动,因此backlog就是初始化时的process_backlog函数。 napi_schedule(&queue -backlog); goto enqueue; _get_cpu_var(netdev_rx_stat ).dropped +; local_irq_restore(flags); kfree_skb(skb); return net_rx_drop;/ 上面代码中用到一个关键的数据结构 softnet_data ,在网卡收发数据的时候,需要维护一个缓冲区队列,来缓存可能存在的突发数据,在协议栈中用一个队列层来表示该缓冲区,队列
6、层位于数据链路层和网络层之间。softnet_data 就是数据链路层中的数据结构,它是一个per-cpu变量,每个cpu都有一个 /* * netif_receive_skb - process receive buffer from network * skb: buffer to process * * netif_receive_skb() is the main receive data processing function. * it always succeeds. the buffer may be dropped during processing * for conges
7、tion control or by the protocol layers. * * this function may only be called from softirq context and interrupts * should be enabled. * * return values (usually ignored): * net_rx_success: no congestion * net_rx_drop: packet was dropped */netif_receive_skb 是对于 netif_rx 的 napi 对等函数; 它递交一个报文给内核. 当一个 n
8、api 兼容的驱动已耗尽接收报 文的供应, 它应当重开中断, 并且调用 netif_rx_complete(现在是 _napi_complete() 来停止轮询.int netif_receive_skb( struct sk_buff * skb) struct packet_type * ptype, *pt_prev ; struct net_device * orig_dev; struct net_device * master; struct net_device * null_or_orig; struct net_device * null_or_bond; int ret =
9、 net_rx_drop; _be16 type; if (!skb-tstamp .tv64 ) net_timestamp(skb); if (vlan_tx_tag_present (skb ) & vlan_hwaccel_do_receive(skb) return net_rx_success; /* if weve gotten here through napi, check netpoll */ if (netpoll_receive_skb (skb ) return net_rx_drop; if (!skb-skb_iif ) skb-skb_iif = skb -de
10、v- ifindex;/ 记录帧的入口 null_or_orig = null; orig_dev = skb-dev; master = access_once (orig_dev -master); if (master) if (skb_bond_should_drop (skb , master ) null_or_orig = orig_dev ; /* deliver only exact match */ else skb-dev = master ; _get_cpu_var(netdev_rx_stat ).total +; skb_reset_network_header(
11、skb); skb_reset_transport_header(skb); skb-mac_len = skb -network_header - skb-mac_header ; pt_prev = null; rcu_read_lock(); #ifdef config_net_cls_act if (skb-tc_verd & tc_ncls) skb-tc_verd = clr_tc_ncls( skb-tc_verd ); goto ncls; #endif /处理 ptype_all 上所有的 packet_type-func() ,这里先提一下linux 是根据packet_t
12、ype 通过 dev_add_pack() 函数来注册相应的处理函数,后面会讲如何注册,每种包对应哪个处理函数 / static struct list_head ptype_all _read_mostly; list_for_each_entry_rcu(ptype, &ptype_all , list ) if (ptype-dev = null_or_orig | ptype-dev = skb- dev | ptype-dev = orig_dev) if (pt_prev) ret = deliver_skb (skb , pt_prev , orig_dev );/调用相应的包处
13、理函数 pt_prev = ptype; #ifdef config_net_cls_act skb = handle_ing (skb , &pt_prev , &ret , orig_dev ); if (!skb) goto out;ncls:#endif /若编译内核时选上bridge,下面会执行网桥模块 skb = handle_bridge (skb , &pt_prev , &ret , orig_dev ); if (!skb) goto out; /编译内核时选上mac_vlan模块,下面才会执行 skb = handle_macvlan (skb , &pt_prev ,
14、&ret , orig_dev ); if (!skb) goto out; /* * make sure frames received on vlan interfaces stacked on * bonding interfaces still make their way to any base bonding * device that may have registered for a specific ptype. the * handler may have to adjust skb-dev and orig_dev. */ null_or_bond = null; if
15、(skb-dev-priv_flags & iff_802_1q_vlan) & (vlan_dev_real_dev( skb-dev)-priv_flags & iff_bonding) null_or_bond = vlan_dev_real_dev (skb -dev); /最后 type = skb-protocol; &ptype_basentohs(type)&15处理ptype_basentohs(type)&15上的所有的 packet_type-func(),根据第二层不同协议来进入不同的钩子函数,重要的有:ip_rcv(), arp_rcv() type = skb-pr
16、otocol ; list_for_each_entry_rcu(ptype, &ptype_basentohs (type ) & ptype_hash_mask, list) if (ptype-type = type & (ptype -dev = null_or_orig | ptype-dev = skb- dev | ptype-dev = orig_dev | ptype-dev = null_or_bond) if (pt_prev) ret = deliver_skb (skb , pt_prev , orig_dev ); pt_prev = ptype; if (pt_p
17、rev) ret = pt_prev -func( skb, skb-dev, pt_prev , orig_dev ); else kfree_skb(skb); /* jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ ret = net_rx_drop ; out: rcu_read_unlock(); return ret; /* * dev_queue_xmit - transmit a buffer * skb: buffer to transmi
18、t * * queue a buffer for transmission to a network device. the caller must * have set the device and priority and built the buffer before calling * this function. the function can be called from an interrupt. * * a negative errno code is returned on a failure. a success does not * guarantee the fram
19、e will be transmitted as it may be dropped due * to congestion or traffic shaping. * * - * i notice this method can also return errors from the queue disciplines, * including net_xmit_drop, which is a positive value. so, errors can also * be positive. * * regardless of the return value, the skb is c
20、onsumed, so it is currently * difficult to retry a send to this method. (you can bump the ref count * before sending to hold a reference for retry if you are careful.) * * when calling this method, interrupts must be enabled. this is because * the bh enable code must have irqs enabled so that it wil
21、l not deadlock. * -blg */int dev_queue_xmit( struct sk_buff * skb) struct net_device * dev = skb-dev; struct netdev_queue * txq; struct qdisc * q; int rc = - enomem; /* gso will handle the following emulations directly. */ if (netif_needs_gso (dev , skb )/如果是gso数据包,且设备支持gso数据包的处理 goto gso; /* conver
22、t a paged skb to linear, if required */ if (skb_needs_linearize (skb , dev ) & _skb_linearize(skb) goto out_kfree_skb; /* if packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb-ip_summed = checksum_partial) skb_set_transport_
23、header(skb, skb-csum_start - skb_headroom(skb); if (!dev_can_checksum (dev , skb ) & skb_checksum_help(skb) goto out_kfree_skb; gso: /* disable soft irqs for various locks below. also * stops preemption for rcu. */ rcu_read_lock_bh(); txq = dev_pick_tx (dev , skb ); q = rcu_dereference_bh(txq-qdisc
24、); #ifdef config_net_cls_act skb-tc_verd = set_tc_at( skb-tc_verd , at_egress );#endif if (q-enqueue ) rc = _dev_xmit_skb (skb , q , dev , txq ); goto out; /* the device has no queue. common case for software devices: loopback, all the sorts of tunnels. really, it is unlikely that netif_tx_lock prot
25、ection is necessary here. (f.e. loopback and ip tunnels are clean ignoring statistics counters.) however, it is possible, that they rely on protection made by us here. check this and shot the lock. it is not prone from deadlocks. either shot noqueue qdisc, it is even simpler 8) */ if (dev-flags & if
26、f_up) int cpu = smp_processor_id(); /* ok because bhs are off */ if (txq-xmit_lock_owner != cpu) hard_tx_lock(dev, txq, cpu); if (!netif_tx_queue_stopped (txq ) rc = dev_hard_start_xmit (skb , dev , txq ); if (dev_xmit_complete (rc ) hard_tx_unlock(dev, txq); goto out; hard_tx_unlock(dev, txq); if (
27、net_ratelimit () printk(kern_crit virtual device %s asks to queue packet!n , dev -name); else /* recursion is detected! it is possible, * unfortunately */ if (net_ratelimit () printk(kern_crit dead loop on virtual device %s, fix it urgently!n , dev -name); rc = -enetdown ; rcu_read_unlock_bh(); out_
28、kfree_skb: kfree_skb(skb); return rc;out: rcu_read_unlock_bh(); return rc; 数据链路层不得不谈到 struct net_device 相关结构,在2.6.29之后 net_device 结构进行了调整,操作函数被重构到了 net_device_ops 中。下面简要分析一下:struct net_device /*this first field, name, is the beginning of the visible part of this structure. it contains the string tha
29、t is the name of the interface. by visible, we mean that this part of the data structure is generic and doesnt contain any private areas specific to a particular type of device.*/ char nameifnamsiz ; /* device name hash chain */ struct hlist_node name_hlist; /* snmp alias */ char *ifalias ; /* * i/o specific fields * fixme: merge these and struct ifmap into one */ unsigned long mem_end; /* shared mem end */ unsigned long mem_start; /* shared mem start */ unsigned long base_addr; /* device i/o address */
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1