Linux E1000网络驱动源代码分析
1.初始化
e1000_init_module下pci_register_driver注册驱动,
static struct pci_driver e1000_driver = { .probe = e1000_probe, .remove = e1000_remove, .shutdown = e1000_shutdown, };
在e1000_probe下,e1000_is_need_ioport为真时,执行
//PCI 6个bar空间中选择IORESOURCE_MEM和IORESOURCE_IO bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO); err = pci_enable_device(pdev); //请求PCI的bar资源 pci_request_selected_regions(pdev, bars, e1000_driver_name); //将当前设备设置为主PCI,本质就是pci_write_config_word(dev, PCI_COMMAND, cmd) pci_set_master(pdev); //将相关数据保存在dev->saved_config_space中 pci_save_state(pdev); //分配网络设备 netdev = alloc_etherdev(sizeof(struct e1000_adapter)); //将PCI设备data指向netdev pci_set_drvdata(pdev, netdev); //将PCI的bar0寄存器映射到主存 hw->hw_addr = pci_ioremap_bar(pdev, BAR_0); //一次申请并初始化其他5个bar的资源 for (i = BAR_1; i <= BAR_5; i++) { if (pci_resource_len(pdev, i) == 0) continue; if (pci_resource_flags(pdev, i) & IORESOURCE_IO) { hw->io_base = pci_resource_start(pdev, i); break; } } //初始化e1000_hw内容 e1000_init_hw_struct(adapter, hw); pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word); //配置DMA掩码 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); netdev->netdev_ops = &e1000_netdev_ops; //设置ethtool的ops e1000_set_ethtool_ops(netdev);
这两个ops如下:
static const struct net_device_ops e1000_netdev_ops = { .ndo_open= e1000_open, .ndo_stop= e1000_close, .ndo_start_xmit= e1000_xmit_frame, .ndo_get_stats= e1000_get_stats, .ndo_set_rx_mode= e1000_set_rx_mode, .ndo_set_mac_address= e1000_set_mac, .ndo_tx_timeout= e1000_tx_timeout, .ndo_change_mtu= e1000_change_mtu, .ndo_do_ioctl= e1000_ioctl, .ndo_validate_addr= eth_validate_addr, .ndo_vlan_rx_add_vid= e1000_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid= e1000_vlan_rx_kill_vid, .ndo_fix_features= e1000_fix_features, .ndo_set_features= e1000_set_features, }; static const struct ethtool_ops e1000_ethtool_ops = { .get_settings= e1000_get_settings, .set_settings= e1000_set_settings, .get_drvinfo= e1000_get_drvinfo, .get_regs_len= e1000_get_regs_len, .get_regs= e1000_get_regs, .get_wol= e1000_get_wol, .set_wol= e1000_set_wol, .get_msglevel= e1000_get_msglevel, .set_msglevel= e1000_set_msglevel, .nway_reset= e1000_nway_reset, .get_link= e1000_get_link, .get_eeprom_len= e1000_get_eeprom_len, .get_eeprom= e1000_get_eeprom, .set_eeprom= e1000_set_eeprom, .get_ringparam= e1000_get_ringparam, .set_ringparam= e1000_set_ringparam, .get_pauseparam= e1000_get_pauseparam, .set_pauseparam= e1000_set_pauseparam, .self_test= e1000_diag_test, .get_strings= e1000_get_strings, .set_phys_id= e1000_set_phys_id, .get_ethtool_stats= e1000_get_ethtool_stats, .get_sset_count= e1000_get_sset_count, .get_coalesce= e1000_get_coalesce, .set_coalesce= e1000_set_coalesce, .get_ts_info= ethtool_op_get_ts_info, }; //初始化adapter e1000_sw_init(adapter);
二,网络设备打开
e1000_open是属于e1000_netdev_ops,
struct e1000_adapter *adapter = netdev_priv(netdev); //获取e1000_adapter关闭载波信号 netif_carrier_off(netdev); //初始化发送缓冲区 e1000_setup_all_tx_resources(adapter); //初始化接受缓冲区 e1000_setup_all_rx_resources(adapter); //配置网卡参数 e1000_configure(adapter); //注册网卡中断 e1000_request_irq(adapter); //使能中断 e1000_irq_enable(adapter); //允许包开始传输 netif_start_queue(netdev);
在e1000_setup_all_tx_resources下,
for (i = 0; i < adapter->num_tx_queues; i++) { err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]); //这样可以看e1000_adapter struct e1000_adapter { //发送缓存ring struct e1000_tx_ring *tx_ring; //接受缓存ring struct e1000_rx_ring *rx_ring; //接受发送的ring个数 int num_tx_queues; int num_rx_queues; }
struct e1000_tx_ring { /* pointer to the descriptor ring memory */ void *desc; /* physical address of the descriptor ring */ dma_addr_t dma; /* length of descriptor ring in bytes */ unsigned int size; /* number of descriptors in the ring */ unsigned int count; /* next descriptor to associate a buffer with */ unsigned int next_to_use; /* next descriptor to check for DD status bit */ unsigned int next_to_clean; /* array of buffer information structs */ struct e1000_buffer *buffer_info; u16 tdh; u16 tdt; bool last_tx_tso; }; struct e1000_rx_ring { /* pointer to the descriptor ring memory */ void *desc; /* physical address of the descriptor ring */ dma_addr_t dma; /* length of descriptor ring in bytes */ unsigned int size; /* number of descriptors in the ring */ unsigned int count; /* next descriptor to associate a buffer with */ unsigned int next_to_use; /* next descriptor to check for DD status bit */ unsigned int next_to_clean; /* array of buffer information structs */ struct e1000_buffer *buffer_info; struct sk_buff *rx_skb_top; /* cpu for rx queue */ int cpu; u16 rdh; u16 rdt; };
e1000_tx_ring和e1000_rx_ring注释如上,写满了。
那么在e1000_setup_tx_resources下,
size = sizeof(struct e1000_buffer) * txdr->count; txdr->buffer_info = vzalloc(size); txdr->size = txdr->count * sizeof(struct e1000_tx_desc); //4K对齐 txdr->size = ALIGN(txdr->size, 4096); //为真正的ring分配内存 txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, GFP_KERNEL);
从dma_alloc_coherent到dma_alloc_attrs,先通过dma_alloc_from_coherent进行内存申请,
//先根据buddy系统算一算 int order = get_order(size); mem = dev->dma_mem; pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); //下面物理地址和虚拟地址都是根据dev->dma_mem算出来的 *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); *ret = mem->virt_base + (pageno << PAGE_SHIFT);
而在dma_declare_coherent_memory中,
mem_base = ioremap(phys_addr, size); dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; dev->dma_mem->pfn_base = PFN_DOWN(phys_addr); dev->dma_mem->size = pages; dev->dma_mem->flags = flags;
调用dma_declare_coherent_memory只有ohci_hcd_sm501_drv_probe等,所以对于E1000,会执行if (!mem) return 0,dma_alloc_from_coherent申请失败。
memory = ops->alloc(dev, size, dma_handle, dma_alloc_coherent_gfp_flags(dev, gfp), attrs); struct dma_map_ops intel_dma_ops = { .alloc = intel_alloc_coherent, .free = intel_free_coherent, .map_sg = intel_map_sg, .unmap_sg = intel_unmap_sg, .map_page = intel_map_page, .unmap_page = intel_unmap_page, .mapping_error = intel_mapping_error, };
在intel_alloc_coherent下,
size = PAGE_ALIGN(size); order = get_order(size); //此处很明显申请的是RAM page = alloc_pages(flags, order); *dma_handle = __intel_map_single(dev, page_to_phys(page), size, DMA_BIDIRECTIONAL, dev->coherent_dma_mask);
退回到e1000_setup_rx_resources,看接受缓存的处理如上。
e1000_configure配置网卡数据,其中 e1000_configure_tx,e1000_setup_rctl,e1000_configure_rx初始化了很多寄存器。对于看虚拟化下的实现很重要。
看e1000中断的申请和使能
e1000_request_irq申请中断
//中断回调函数 irq_handler_t handler = e1000_intr; request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,netdev);
e1000_irq_enable使能中断,本质就是写e1000的IMS寄存器。
ew32(IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH();
三,e1000_xmit_frame发包接口
e1000_xmit_frame是e1000_netdev_ops下的调用函数,struct net_device_ops上的注释已经说明了
/* netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. */
网络设备的发包是由dev_queue_xmit开始的,由__dev_queue_xmit调用dev_hard_start_xmit,执行ops->ndo_start_xmit(skb, dev)完成e1000下的发包动作。e1000_xmit_frame下关注的点是e1000_tx_map,其他的忽略,在e1000_tx_map下,填充tx_ring->buffer_info。
buffer_info = &tx_ring->buffer_info[i]; size = min(len, max_per_txd); buffer_info->length = size; buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; //将skb->data的虚拟地址转换成PCI域的物理地址 buffer_info->dma = dma_map_single(&pdev->dev, skb->data + offset, size, DMA_TO_DEVICE);
然后就是让网卡直接从该地址读取memory内容,即为网卡的DMA读取。
另外一个函数是e1000_tx_queue,
do { buffer_info = &tx_ring->buffer_info[i]; tx_desc = E1000_TX_DESC(*tx_ring, i); tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma); tx_desc->lower.data = cpu_to_le32(txd_lower | buffer_info->length); tx_desc->upper.data = cpu_to_le32(txd_upper); i++; if (i == tx_ring->count) i = 0; } while (--count > 0);
e1000_tx_map之后将获取的dma地址存放到net ring下的发送包描述符中tx_desc->buffer_addr,剩下的工作就是硬件取包并发送了。
四,e1000_intr中断函数
屏蔽中断,然后刷新,E1000_WRITE_FLUSH本质是读取status状态,也就是PCI规范中的dalay传送方式。如果是Posted则无需刷新。
ew32(IMC, ~0); E1000_WRITE_FLUSH();
将e1000包的计数清空
if (likely(napi_schedule_prep(&adapter->napi))) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; __napi_schedule(&adapter->napi); }
调用网卡中断是e1000_netpoll,e1000_intr是在e1000_request_irq下进行初始化注册中断,e1000_netpoll归netpoll_poll_dev调用。e1000_intr在收到网卡中断后调用__napi_schedule(&adapter->napi),调用napi 轮询进行收包,__napi_schedule调用____napi_schedule,然后执行__raise_softirq_irqoff(NET_RX_SOFTIRQ),
在net_dev_init下
open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action);
那么调用的函数是net_rx_action,下面执行work = n->poll(n, weight),在e1000_probe下时
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
那么最终执行的是e1000_clean。
五,e1000_clean收包接口
e1000_clean_tx_irq负责清理发送队列,将之前发送队列的map数据unmap,为下一次发送准备。
static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter, struct e1000_buffer *buffer_info) { if (buffer_info->dma) { if (buffer_info->mapped_as_page) dma_unmap_page(&adapter->pdev->dev, buffer_info->dma, buffer_info->length, DMA_TO_DEVICE); else dma_unmap_single(&adapter->pdev->dev, buffer_info->dma, buffer_info->length, DMA_TO_DEVICE); buffer_info->dma = 0; }}
后面是adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget)
在e1000_configure_rx下,
if (adapter->netdev->mtu > ETH_DATA_LEN) { rdlen = adapter->rx_ring[0].count * sizeof(struct e1000_rx_desc); adapter->clean_rx = e1000_clean_jumbo_rx_irq; adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers; } else { rdlen = adapter->rx_ring[0].count * sizeof(struct e1000_rx_desc); adapter->clean_rx = e1000_clean_rx_irq; adapter->alloc_rx_buf = e1000_alloc_rx_buffers; }
同时在e1000_configure下,
for (i = 0; i < adapter->num_rx_queues; i++) { struct e1000_rx_ring *ring = &adapter->rx_ring[i]; adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring)); }
接收队列被初始化
buffer_info = &rx_ring->buffer_info[i]; while (cleaned_count--) { //提前为skb分配好空间 skb = netdev_alloc_skb_ip_align(netdev, bufsz); buffer_info->skb = skb; buffer_info->length = adapter->rx_buffer_len; //将分配的mem地址映射给buffer buffer_info->dma = dma_map_single(&pdev->dev, skb->data, buffer_info->length, DMA_FROM_DEVICE); } rx_desc = E1000_RX_DESC(*rx_ring, i); //将DMA地址写入到ring的描述符里面,如此硬件就可以直接DMA写入memory了 rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
继续看e1000_clean_rx_irq,这个时候DMA已经完成了,data已经写入到指定地址了,
dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_buffer_len, DMA_FROM_DEVICE); //只需要填充skb的其他属性 skb_put(skb, length); //继续往上推送skb e1000_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.upper.vlan);
Linux E1000网络驱动源代码分析来自于OenHan
链接为:https://oenhan.com/linux-e1000-networking-driver