Linux E1000网络驱动源代码分析
1.初始化
e1000_init_module下pci_register_driver注册驱动,
static struct pci_driver e1000_driver = {
.probe = e1000_probe,
.remove = e1000_remove,
.shutdown = e1000_shutdown,
};
在e1000_probe下,e1000_is_need_ioport为真时,执行
//PCI 6个bar空间中选择IORESOURCE_MEM和IORESOURCE_IO
bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO);
err = pci_enable_device(pdev);
//请求PCI的bar资源
pci_request_selected_regions(pdev, bars, e1000_driver_name);
//将当前设备设置为主PCI,本质就是pci_write_config_word(dev, PCI_COMMAND, cmd)
pci_set_master(pdev);
//将相关数据保存在dev->saved_config_space中
pci_save_state(pdev);
//分配网络设备
netdev = alloc_etherdev(sizeof(struct e1000_adapter));
//将PCI设备data指向netdev
pci_set_drvdata(pdev, netdev);
//将PCI的bar0寄存器映射到主存
hw->hw_addr = pci_ioremap_bar(pdev, BAR_0);
//一次申请并初始化其他5个bar的资源
for (i = BAR_1; i <= BAR_5; i++) {
if (pci_resource_len(pdev, i) == 0)
continue;
if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
hw->io_base = pci_resource_start(pdev, i);
break;
}
}
//初始化e1000_hw内容
e1000_init_hw_struct(adapter, hw);
pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);
//配置DMA掩码
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
netdev->netdev_ops = &e1000_netdev_ops;
//设置ethtool的ops
e1000_set_ethtool_ops(netdev);
这两个ops如下:
static const struct net_device_ops e1000_netdev_ops = {
.ndo_open= e1000_open,
.ndo_stop= e1000_close,
.ndo_start_xmit= e1000_xmit_frame,
.ndo_get_stats= e1000_get_stats,
.ndo_set_rx_mode= e1000_set_rx_mode,
.ndo_set_mac_address= e1000_set_mac,
.ndo_tx_timeout= e1000_tx_timeout,
.ndo_change_mtu= e1000_change_mtu,
.ndo_do_ioctl= e1000_ioctl,
.ndo_validate_addr= eth_validate_addr,
.ndo_vlan_rx_add_vid= e1000_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid= e1000_vlan_rx_kill_vid,
.ndo_fix_features= e1000_fix_features,
.ndo_set_features= e1000_set_features,
};
static const struct ethtool_ops e1000_ethtool_ops = {
.get_settings= e1000_get_settings,
.set_settings= e1000_set_settings,
.get_drvinfo= e1000_get_drvinfo,
.get_regs_len= e1000_get_regs_len,
.get_regs= e1000_get_regs,
.get_wol= e1000_get_wol,
.set_wol= e1000_set_wol,
.get_msglevel= e1000_get_msglevel,
.set_msglevel= e1000_set_msglevel,
.nway_reset= e1000_nway_reset,
.get_link= e1000_get_link,
.get_eeprom_len= e1000_get_eeprom_len,
.get_eeprom= e1000_get_eeprom,
.set_eeprom= e1000_set_eeprom,
.get_ringparam= e1000_get_ringparam,
.set_ringparam= e1000_set_ringparam,
.get_pauseparam= e1000_get_pauseparam,
.set_pauseparam= e1000_set_pauseparam,
.self_test= e1000_diag_test,
.get_strings= e1000_get_strings,
.set_phys_id= e1000_set_phys_id,
.get_ethtool_stats= e1000_get_ethtool_stats,
.get_sset_count= e1000_get_sset_count,
.get_coalesce= e1000_get_coalesce,
.set_coalesce= e1000_set_coalesce,
.get_ts_info= ethtool_op_get_ts_info,
};
//初始化adapter
e1000_sw_init(adapter);
二,网络设备打开
e1000_open是属于e1000_netdev_ops,
struct e1000_adapter *adapter = netdev_priv(netdev); //获取e1000_adapter关闭载波信号 netif_carrier_off(netdev); //初始化发送缓冲区 e1000_setup_all_tx_resources(adapter); //初始化接受缓冲区 e1000_setup_all_rx_resources(adapter); //配置网卡参数 e1000_configure(adapter); //注册网卡中断 e1000_request_irq(adapter); //使能中断 e1000_irq_enable(adapter); //允许包开始传输 netif_start_queue(netdev);
在e1000_setup_all_tx_resources下,
for (i = 0; i < adapter->num_tx_queues; i++) {
err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
//这样可以看e1000_adapter
struct e1000_adapter {
//发送缓存ring
struct e1000_tx_ring *tx_ring;
//接受缓存ring
struct e1000_rx_ring *rx_ring;
//接受发送的ring个数
int num_tx_queues;
int num_rx_queues;
}
struct e1000_tx_ring {
/* pointer to the descriptor ring memory */
void *desc;
/* physical address of the descriptor ring */
dma_addr_t dma;
/* length of descriptor ring in bytes */
unsigned int size;
/* number of descriptors in the ring */
unsigned int count;
/* next descriptor to associate a buffer with */
unsigned int next_to_use;
/* next descriptor to check for DD status bit */
unsigned int next_to_clean;
/* array of buffer information structs */
struct e1000_buffer *buffer_info;
u16 tdh;
u16 tdt;
bool last_tx_tso;
};
struct e1000_rx_ring {
/* pointer to the descriptor ring memory */
void *desc;
/* physical address of the descriptor ring */
dma_addr_t dma;
/* length of descriptor ring in bytes */
unsigned int size;
/* number of descriptors in the ring */
unsigned int count;
/* next descriptor to associate a buffer with */
unsigned int next_to_use;
/* next descriptor to check for DD status bit */
unsigned int next_to_clean;
/* array of buffer information structs */
struct e1000_buffer *buffer_info;
struct sk_buff *rx_skb_top;
/* cpu for rx queue */
int cpu;
u16 rdh;
u16 rdt;
};
e1000_tx_ring和e1000_rx_ring注释如上,写满了。
那么在e1000_setup_tx_resources下,
size = sizeof(struct e1000_buffer) * txdr->count; txdr->buffer_info = vzalloc(size); txdr->size = txdr->count * sizeof(struct e1000_tx_desc); //4K对齐 txdr->size = ALIGN(txdr->size, 4096); //为真正的ring分配内存 txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, GFP_KERNEL);
从dma_alloc_coherent到dma_alloc_attrs,先通过dma_alloc_from_coherent进行内存申请,
//先根据buddy系统算一算 int order = get_order(size); mem = dev->dma_mem; pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); //下面物理地址和虚拟地址都是根据dev->dma_mem算出来的 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); *ret = mem->virt_base + (pageno << PAGE_SHIFT);
而在dma_declare_coherent_memory中,
mem_base = ioremap(phys_addr, size); dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; dev->dma_mem->pfn_base = PFN_DOWN(phys_addr); dev->dma_mem->size = pages; dev->dma_mem->flags = flags;
调用dma_declare_coherent_memory只有ohci_hcd_sm501_drv_probe等,所以对于E1000,会执行if (!mem) return 0,dma_alloc_from_coherent申请失败。
memory = ops->alloc(dev, size, dma_handle,
dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
struct dma_map_ops intel_dma_ops = {
.alloc = intel_alloc_coherent,
.free = intel_free_coherent,
.map_sg = intel_map_sg,
.unmap_sg = intel_unmap_sg,
.map_page = intel_map_page,
.unmap_page = intel_unmap_page,
.mapping_error = intel_mapping_error,
};
在intel_alloc_coherent下,
size = PAGE_ALIGN(size); order = get_order(size); //此处很明显申请的是RAM page = alloc_pages(flags, order); *dma_handle = __intel_map_single(dev, page_to_phys(page), size, DMA_BIDIRECTIONAL, dev->coherent_dma_mask);
退回到e1000_setup_rx_resources,看接受缓存的处理如上。
e1000_configure配置网卡数据,其中 e1000_configure_tx,e1000_setup_rctl,e1000_configure_rx初始化了很多寄存器。对于看虚拟化下的实现很重要。
看e1000中断的申请和使能
e1000_request_irq申请中断
//中断回调函数 irq_handler_t handler = e1000_intr; request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,netdev);
e1000_irq_enable使能中断,本质就是写e1000的IMS寄存器。
ew32(IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH();
三,e1000_xmit_frame发包接口
e1000_xmit_frame是e1000_netdev_ops下的调用函数,struct net_device_ops上的注释已经说明了
/* netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. */

网络设备的发包是由dev_queue_xmit开始的,由__dev_queue_xmit调用dev_hard_start_xmit,执行ops->ndo_start_xmit(skb, dev)完成e1000下的发包动作。e1000_xmit_frame下关注的点是e1000_tx_map,其他的忽略,在e1000_tx_map下,填充tx_ring->buffer_info。
buffer_info = &tx_ring->buffer_info[i]; size = min(len, max_per_txd); buffer_info->length = size; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; //将skb->data的虚拟地址转换成PCI域的物理地址 buffer_info->dma = dma_map_single(&pdev->dev, skb->data + offset, size, DMA_TO_DEVICE);
然后就是让网卡直接从该地址读取memory内容,即为网卡的DMA读取。
另外一个函数是e1000_tx_queue,
do {
buffer_info = &tx_ring->buffer_info[i];
tx_desc = E1000_TX_DESC(*tx_ring, i);
tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
tx_desc->lower.data = cpu_to_le32(txd_lower | buffer_info->length);
tx_desc->upper.data = cpu_to_le32(txd_upper);
i++;
if (i == tx_ring->count) i = 0;
} while (--count > 0);
e1000_tx_map之后将获取的dma地址存放到net ring下的发送包描述符中tx_desc->buffer_addr,剩下的工作就是硬件取包并发送了。
四,e1000_intr中断函数
屏蔽中断,然后刷新,E1000_WRITE_FLUSH本质是读取status状态,也就是PCI规范中的dalay传送方式。如果是Posted则无需刷新。
ew32(IMC, ~0); E1000_WRITE_FLUSH();
将e1000包的计数清空
if (likely(napi_schedule_prep(&adapter->napi))) {
adapter->total_tx_bytes = 0;
adapter->total_tx_packets = 0;
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(&adapter->napi);
}
调用网卡中断是e1000_netpoll,e1000_intr是在e1000_request_irq下进行初始化注册中断,e1000_netpoll归netpoll_poll_dev调用。e1000_intr在收到网卡中断后调用__napi_schedule(&adapter->napi),调用napi 轮询进行收包,__napi_schedule调用____napi_schedule,然后执行__raise_softirq_irqoff(NET_RX_SOFTIRQ),
在net_dev_init下
open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action);
那么调用的函数是net_rx_action,下面执行work = n->poll(n, weight),在e1000_probe下时
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
那么最终执行的是e1000_clean。
五,e1000_clean收包接口
e1000_clean_tx_irq负责清理发送队列,将之前发送队列的map数据unmap,为下一次发送准备。
static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, struct e1000_buffer *buffer_info)
{ if (buffer_info->dma) {
if (buffer_info->mapped_as_page)
dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
buffer_info->length, DMA_TO_DEVICE);
else
dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
buffer_info->length,
DMA_TO_DEVICE);
buffer_info->dma = 0;
}}
后面是adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget)
在e1000_configure_rx下,
if (adapter->netdev->mtu > ETH_DATA_LEN) {
rdlen = adapter->rx_ring[0].count * sizeof(struct e1000_rx_desc);
adapter->clean_rx = e1000_clean_jumbo_rx_irq;
adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
} else {
rdlen = adapter->rx_ring[0].count * sizeof(struct e1000_rx_desc);
adapter->clean_rx = e1000_clean_rx_irq;
adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
}
同时在e1000_configure下,
for (i = 0; i < adapter->num_rx_queues; i++) {
struct e1000_rx_ring *ring = &adapter->rx_ring[i];
adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring));
}
接收队列被初始化
buffer_info = &rx_ring->buffer_info[i];
while (cleaned_count--) {
//提前为skb分配好空间
skb = netdev_alloc_skb_ip_align(netdev, bufsz);
buffer_info->skb = skb;
buffer_info->length = adapter->rx_buffer_len;
//将分配的mem地址映射给buffer
buffer_info->dma = dma_map_single(&pdev->dev, skb->data, buffer_info->length, DMA_FROM_DEVICE);
}
rx_desc = E1000_RX_DESC(*rx_ring, i);
//将DMA地址写入到ring的描述符里面,如此硬件就可以直接DMA写入memory了
rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
继续看e1000_clean_rx_irq,这个时候DMA已经完成了,data已经写入到指定地址了,
dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_buffer_len, DMA_FROM_DEVICE); //只需要填充skb的其他属性 skb_put(skb, length); //继续往上推送skb e1000_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.upper.vlan);
Linux E1000网络驱动源代码分析来自于OENHAN
链接为:https://oenhan.com/linux-e1000-networking-driver/