kernel源代码版本:git-v3.16.39

1.初始化

e1000_init_module下pci_register_driver注册驱动,

static struct pci_driver e1000_driver = {

.probe    = e1000_probe,

.remove   = e1000_remove,

.shutdown = e1000_shutdown,

};

在e1000_probe下,e1000_is_need_ioport为真时,执行

//PCI 6个bar空间中选择IORESOURCE_MEM和IORESOURCE_IO

bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO);

err = pci_enable_device(pdev);

//请求PCI的bar资源

pci_request_selected_regions(pdev, bars, e1000_driver_name);

//将当前设备设置为主PCI,本质就是pci_write_config_word(dev, PCI_COMMAND, cmd)

pci_set_master(pdev);

//将相关数据保存在dev->saved_config_space中

pci_save_state(pdev);

//分配网络设备

netdev = alloc_etherdev(sizeof(struct e1000_adapter));

//将PCI设备data指向netdev

pci_set_drvdata(pdev, netdev);

//将PCI的bar0寄存器映射到主存


hw->hw_addr = pci_ioremap_bar(pdev, BAR_0);

//一次申请并初始化其他5个bar的资源

for (i = BAR_1; i <= BAR_5; i++) {

    if (pci_resource_len(pdev, i) == 0)

        continue;

    if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {

hw->io_base = pci_resource_start(pdev, i);

break;

    }

}

//初始化e1000_hw内容

e1000_init_hw_struct(adapter, hw);

pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);

//配置DMA掩码

dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));

netdev->netdev_ops = &e1000_netdev_ops;

//设置ethtool的ops

e1000_set_ethtool_ops(netdev);

这两个ops如下:

static const struct net_device_ops e1000_netdev_ops = {

.ndo_open= e1000_open,

.ndo_stop= e1000_close,

.ndo_start_xmit= e1000_xmit_frame,

.ndo_get_stats= e1000_get_stats,

.ndo_set_rx_mode= e1000_set_rx_mode,

.ndo_set_mac_address= e1000_set_mac,

.ndo_tx_timeout= e1000_tx_timeout,

.ndo_change_mtu= e1000_change_mtu,

.ndo_do_ioctl= e1000_ioctl,

.ndo_validate_addr= eth_validate_addr,

.ndo_vlan_rx_add_vid= e1000_vlan_rx_add_vid,

.ndo_vlan_rx_kill_vid= e1000_vlan_rx_kill_vid,

.ndo_fix_features= e1000_fix_features,

.ndo_set_features= e1000_set_features,

};

static const struct ethtool_ops e1000_ethtool_ops = {

.get_settings= e1000_get_settings,

.set_settings= e1000_set_settings,

.get_drvinfo= e1000_get_drvinfo,

.get_regs_len= e1000_get_regs_len,

.get_regs= e1000_get_regs,

.get_wol= e1000_get_wol,

.set_wol= e1000_set_wol,

.get_msglevel= e1000_get_msglevel,

.set_msglevel= e1000_set_msglevel,

.nway_reset= e1000_nway_reset,

.get_link= e1000_get_link,

.get_eeprom_len= e1000_get_eeprom_len,

.get_eeprom= e1000_get_eeprom,

.set_eeprom= e1000_set_eeprom,

.get_ringparam= e1000_get_ringparam,

.set_ringparam= e1000_set_ringparam,

.get_pauseparam= e1000_get_pauseparam,

.set_pauseparam= e1000_set_pauseparam,

.self_test= e1000_diag_test,

.get_strings= e1000_get_strings,

.set_phys_id= e1000_set_phys_id,

.get_ethtool_stats= e1000_get_ethtool_stats,

.get_sset_count= e1000_get_sset_count,

.get_coalesce= e1000_get_coalesce,

.set_coalesce= e1000_set_coalesce,

.get_ts_info= ethtool_op_get_ts_info,

};

//初始化adapter

e1000_sw_init(adapter);

二,网络设备打开

e1000_open是属于e1000_netdev_ops,

struct e1000_adapter *adapter = netdev_priv(netdev);

//获取e1000_adapter关闭载波信号

netif_carrier_off(netdev);

//初始化发送缓冲区

e1000_setup_all_tx_resources(adapter);

//初始化接受缓冲区

e1000_setup_all_rx_resources(adapter);

//配置网卡参数

e1000_configure(adapter);

//注册网卡中断

e1000_request_irq(adapter);

//使能中断

e1000_irq_enable(adapter);

//允许包开始传输

netif_start_queue(netdev);

在e1000_setup_all_tx_resources下,

for (i = 0; i < adapter->num_tx_queues; i++) {

err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);

//这样可以看e1000_adapter

struct e1000_adapter {

//发送缓存ring

struct e1000_tx_ring *tx_ring;

//接受缓存ring

struct e1000_rx_ring *rx_ring;

//接受发送的ring个数

int num_tx_queues;

int num_rx_queues;

}
struct e1000_tx_ring {

/* pointer to the descriptor ring memory */
void *desc;

/* physical address of the descriptor ring */
dma_addr_t dma;

/* length of descriptor ring in bytes */
unsigned int size;

/* number of descriptors in the ring */
unsigned int count;

/* next descriptor to associate a buffer with */
unsigned int next_to_use;

/* next descriptor to check for DD status bit */
unsigned int next_to_clean;

/* array of buffer information structs */
struct e1000_buffer *buffer_info;

u16 tdh;

u16 tdt;

bool last_tx_tso;

};

struct e1000_rx_ring {

/* pointer to the descriptor ring memory */
void *desc;

/* physical address of the descriptor ring */
dma_addr_t dma;

/* length of descriptor ring in bytes */
unsigned int size;

/* number of descriptors in the ring */
unsigned int count;

/* next descriptor to associate a buffer with */
unsigned int next_to_use;

/* next descriptor to check for DD status bit */
unsigned int next_to_clean;

/* array of buffer information structs */
struct e1000_buffer *buffer_info;

struct sk_buff *rx_skb_top;

/* cpu for rx queue */
int cpu;

u16 rdh;

u16 rdt;

};

e1000_tx_ring和e1000_rx_ring注释如上,写满了。

那么在e1000_setup_tx_resources下,

size = sizeof(struct e1000_buffer) * txdr->count;

txdr->buffer_info = vzalloc(size);

txdr->size = txdr->count * sizeof(struct e1000_tx_desc);

//4K对齐

txdr->size = ALIGN(txdr->size, 4096);

//为真正的ring分配内存

txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma, GFP_KERNEL);

从dma_alloc_coherent到dma_alloc_attrs,先通过dma_alloc_from_coherent进行内存申请

//先根据buddy系统算一算

int order = get_order(size);

mem = dev->dma_mem;

pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);

//下面物理地址和虚拟地址都是根据dev->dma_mem算出来的

*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);

*ret = mem->virt_base + (pageno << PAGE_SHIFT);

而在dma_declare_coherent_memory中,

mem_base = ioremap(phys_addr, size);

dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);

dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);

dev->dma_mem->virt_base = mem_base;

dev->dma_mem->device_base = device_addr;

dev->dma_mem->pfn_base = PFN_DOWN(phys_addr);

dev->dma_mem->size = pages;

dev->dma_mem->flags = flags;

调用dma_declare_coherent_memory只有ohci_hcd_sm501_drv_probe等,所以对于E1000,会执行if (!mem) return 0,dma_alloc_from_coherent申请失败

memory = ops->alloc(dev, size, dma_handle,

dma_alloc_coherent_gfp_flags(dev, gfp), attrs);

struct dma_map_ops intel_dma_ops = {

.alloc = intel_alloc_coherent,

.free = intel_free_coherent,

.map_sg = intel_map_sg,

.unmap_sg = intel_unmap_sg,

.map_page = intel_map_page,

.unmap_page = intel_unmap_page,

.mapping_error = intel_mapping_error,

};

在intel_alloc_coherent下,

size = PAGE_ALIGN(size);

order = get_order(size);

//此处很明显申请的是RAM

page = alloc_pages(flags, order);


*dma_handle = __intel_map_single(dev, page_to_phys(page), size,

DMA_BIDIRECTIONAL,

dev->coherent_dma_mask);

退回到e1000_setup_rx_resources,看接受缓存的处理如上。

e1000_configure配置网卡数据,其中 e1000_configure_tx,e1000_setup_rctl,e1000_configure_rx初始化了很多寄存器。对于看虚拟化下的实现很重要。

看e1000中断的申请和使能

e1000_request_irq申请中断

//中断回调函数

irq_handler_t handler = e1000_intr;

request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,netdev);

e1000_irq_enable使能中断,本质就是写e1000的IMS寄存器

ew32(IMS, IMS_ENABLE_MASK);

E1000_WRITE_FLUSH();

三,e1000_xmit_frame发包接口

e1000_xmit_frame是e1000_netdev_ops下的调用函数,struct net_device_ops上的注释已经说明了

 /* netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,

 *                               struct net_device *dev);

 * Called when a packet needs to be transmitted.

 * Must return NETDEV_TX_OK , NETDEV_TX_BUSY.

 *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)

 * Required can not be NULL.

*/

网络设备的发包是由dev_queue_xmit开始的,由__dev_queue_xmit调用dev_hard_start_xmit,执行ops->ndo_start_xmit(skb, dev)完成e1000下的发包动作。e1000_xmit_frame下关注的点是e1000_tx_map,其他的忽略,在e1000_tx_map下,填充tx_ring->buffer_info。

buffer_info = &tx_ring->buffer_info[i];

size = min(len, max_per_txd);

buffer_info->length = size;


buffer_info->time_stamp = jiffies;

buffer_info->next_to_watch = i;


//将skb->data的虚拟地址转换成PCI域的物理地址

buffer_info->dma = dma_map_single(&pdev->dev, skb->data + offset, size, DMA_TO_DEVICE);

然后就是让网卡直接从该地址读取memory内容,即为网卡的DMA读取

另外一个函数是e1000_tx_queue,

do {

buffer_info = &tx_ring->buffer_info[i];

tx_desc = E1000_TX_DESC(*tx_ring, i);

tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);

tx_desc->lower.data = cpu_to_le32(txd_lower | buffer_info->length);

tx_desc->upper.data = cpu_to_le32(txd_upper);

i++;


if (i == tx_ring->count) i = 0;

} while (--count > 0);

e1000_tx_map之后将获取的dma地址存放到net ring下的发送包描述符中tx_desc->buffer_addr,剩下的工作就是硬件取包并发送了。

四,e1000_intr中断函数

屏蔽中断,然后刷新,E1000_WRITE_FLUSH本质是读取status状态,也就是PCI规范中的dalay传送方式。如果是Posted则无需刷新。

ew32(IMC, ~0);

E1000_WRITE_FLUSH();

将e1000包的计数清空

if (likely(napi_schedule_prep(&adapter->napi))) {

adapter->total_tx_bytes = 0;

adapter->total_tx_packets = 0;

adapter->total_rx_bytes = 0;

adapter->total_rx_packets = 0;

__napi_schedule(&adapter->napi);

}

调用网卡中断是e1000_netpoll,e1000_intr是在e1000_request_irq下进行初始化注册中断,e1000_netpoll归netpoll_poll_dev调用。e1000_intr在收到网卡中断后调用__napi_schedule(&adapter->napi),调用napi 轮询进行收包,__napi_schedule调用____napi_schedule,然后执行__raise_softirq_irqoff(NET_RX_SOFTIRQ),

在net_dev_init下

open_softirq(NET_TX_SOFTIRQ, net_tx_action);

open_softirq(NET_RX_SOFTIRQ, net_rx_action);

那么调用的函数是net_rx_action,下面执行work = n->poll(n, weight),在e1000_probe下时

netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);

那么最终执行的是e1000_clean。

五,e1000_clean收包接口

e1000_clean_tx_irq负责清理发送队列,将之前发送队列的map数据unmap,为下一次发送准备。

static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,  struct e1000_buffer *buffer_info)

{ if (buffer_info->dma) {

if (buffer_info->mapped_as_page)

dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,

      buffer_info->length, DMA_TO_DEVICE);

else

dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,

buffer_info->length,

DMA_TO_DEVICE);

buffer_info->dma = 0;

}}

后面是adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget)

在e1000_configure_rx下,

if (adapter->netdev->mtu > ETH_DATA_LEN) {

rdlen = adapter->rx_ring[0].count * sizeof(struct e1000_rx_desc);

adapter->clean_rx = e1000_clean_jumbo_rx_irq;

adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;

} else {

rdlen = adapter->rx_ring[0].count * sizeof(struct e1000_rx_desc);

adapter->clean_rx = e1000_clean_rx_irq;

adapter->alloc_rx_buf = e1000_alloc_rx_buffers;

}

同时在e1000_configure下,

for (i = 0; i < adapter->num_rx_queues; i++) {

struct e1000_rx_ring *ring = &adapter->rx_ring[i];

adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring));

}

接收队列被初始化

buffer_info = &rx_ring->buffer_info[i];

while (cleaned_count--) {

//提前为skb分配好空间

skb = netdev_alloc_skb_ip_align(netdev, bufsz);

buffer_info->skb = skb;

buffer_info->length = adapter->rx_buffer_len;

//将分配的mem地址映射给buffer

buffer_info->dma = dma_map_single(&pdev->dev,  skb->data, buffer_info->length, DMA_FROM_DEVICE);

}

rx_desc = E1000_RX_DESC(*rx_ring, i);

//将DMA地址写入到ring的描述符里面,如此硬件就可以直接DMA写入memory了

rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);

继续看e1000_clean_rx_irq,这个时候DMA已经完成了,data已经写入到指定地址了,

dma_unmap_single(&pdev->dev, buffer_info->dma, adapter->rx_buffer_len, DMA_FROM_DEVICE);

//只需要填充skb的其他属性

skb_put(skb, length);

//继续往上推送skb

e1000_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.upper.vlan);

Linux E1000网络驱动源代码分析来自于OenHan

链接为:https://oenhan.com/linux-e1000-networking-driver

发表回复