sriov功能的简单理解

B站上有两个视频讲的挺好的,从普通模型的收包处理流程(两次中断和一次复制),到vmdq(一次中断和一次复制),再到sriov(一次中断)。讲述了模型的一步步优化。 sriov简单理解

其它参考文章: 1:基于sriov寄存器原理讲解的 2:https://projectacrn.github.io/latest/tutorials/sriov_virtualization.html#sr 3:https://access.redhat.com/documentation/zh-cn/red_hat_enterprise_linux_openstack_platform/7/html/networking_guide/sec-sr-iov

sriov功能涉及的api主要如下所示

#ifdef CONFIG_PCI_IOV

int pci_iov_virtfn_bus(struct pci_dev *dev, int id);

int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);

int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);

void pci_disable_sriov(struct pci_dev *dev);

int pci_iov_add_virtfn(struct pci_dev *dev, int id);

void pci_iov_remove_virtfn(struct pci_dev *dev, int id);

int pci_num_vf(struct pci_dev *dev);

int pci_vfs_assigned(struct pci_dev *dev);

int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);

int pci_sriov_get_totalvfs(struct pci_dev *dev);

int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn);

resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);

void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);

/* Arch may override these (weak) */

int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);

int pcibios_sriov_disable(struct pci_dev *pdev);

resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);

#else

static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)

{

return -ENOSYS;

}

参考:sriov主要api

sriov api理解分析

#include

#include

#include

#include

#include

#include "pci.h"

#define VIRTFN_ID_LEN 16

/*

FirstVF Offset:第一个VF相对PF的Routing ID的偏移量

VF Stride: 相邻VF之间的Routing ID的偏移量(步进值)

8bit bus number、5bitdevice number、3bit function number

Routing ID = (PF Routing ID + First VF Offset + (N-1) * VF Stride)

vf_bus = pf_bus + (pf_devfn + offset + stride * vf_id) >> 8

vf_devfn = (pf_devfn + offset + stride * vf_id) & 0xff

*/

int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)

{

if (!dev->is_physfn)

return -EINVAL;

return dev->bus->number + ((dev->devfn + dev->sriov->offset +

dev->sriov->stride * vf_id) >> 8);

}

int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)

{

if (!dev->is_physfn)

return -EINVAL;

return (dev->devfn + dev->sriov->offset +

dev->sriov->stride * vf_id) & 0xff;

}

/*根据不同的vf得到对应的offset和stride*/

static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)

{

struct pci_sriov *iov = dev->sriov;

pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);

pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);

pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);

}

static int compute_max_vf_buses(struct pci_dev *dev)

{

struct pci_sriov *iov = dev->sriov;

int nr_virtfn, busnr, rc = 0;

for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {

/*这里要根据不同的vf得到对应相对应的offset和stride

随后用于求出vf(n)对应的bus number*/

pci_iov_set_numvfs(dev, nr_virtfn);

if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {

rc = -EIO;

goto out;

}

busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);

if (busnr > iov->max_VF_buses)

iov->max_VF_buses = busnr;

}

out:

pci_iov_set_numvfs(dev, 0);

return rc;

}

/********************************************************************************************/

resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)

{

if (!dev->is_physfn)

return 0;

return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];

}

resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,int resno)

{

return pci_iov_resource_size(dev, resno);

}

resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)

{

return pcibios_iov_resource_alignment(dev, resno);

}

/*****************************************************************************************/

static ssize_t sriov_totalvfs_show(struct device *dev,

struct device_attribute *attr, char *buf)

{

struct pci_dev *pdev = to_pci_dev(dev);

return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));

}

static ssize_t sriov_numvfs_show(struct device *dev,

struct device_attribute *attr, char *buf)

{

struct pci_dev *pdev = to_pci_dev(dev);

u16 num_vfs;

/* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */

device_lock(&pdev->dev);

num_vfs = pdev->sriov->num_VFs;

device_unlock(&pdev->dev);

return sprintf(buf, "%u\n", num_vfs);

}

static ssize_t sriov_offset_show(struct device *dev,

struct device_attribute *attr, char *buf)

{

struct pci_dev *pdev = to_pci_dev(dev);

return sprintf(buf, "%u\n", pdev->sriov->offset);

}

static ssize_t sriov_stride_show(struct device *dev,

struct device_attribute *attr, char *buf)

{

struct pci_dev *pdev = to_pci_dev(dev);

return sprintf(buf, "%u\n", pdev->sriov->stride);

}

static ssize_t sriov_vf_device_show(struct device *dev,

struct device_attribute *attr, char *buf)

{

struct pci_dev *pdev = to_pci_dev(dev);

return sprintf(buf, "%x\n", pdev->sriov->vf_device);

}

static ssize_t sriov_drivers_autoprobe_show(struct device *dev,

struct device_attribute *attr, char *buf)

{

struct pci_dev *pdev = to_pci_dev(dev);

return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);

}

/***************************************************************************/

static DEVICE_ATTR_RO(sriov_totalvfs);

static DEVICE_ATTR_RW(sriov_numvfs);

static DEVICE_ATTR_RO(sriov_offset);

static DEVICE_ATTR_RO(sriov_stride);

static DEVICE_ATTR_RO(sriov_vf_device);

static DEVICE_ATTR_RW(sriov_drivers_autoprobe);

static struct attribute *sriov_dev_attrs[] = {

&dev_attr_sriov_totalvfs.attr,

&dev_attr_sriov_numvfs.attr,

&dev_attr_sriov_offset.attr,

&dev_attr_sriov_stride.attr,

&dev_attr_sriov_vf_device.attr,

&dev_attr_sriov_drivers_autoprobe.attr,

NULL,

};

static umode_t sriov_attrs_are_visible(struct kobject *kobj,

struct attribute *a, int n)

{

struct device *dev = kobj_to_dev(kobj);

if (!dev_is_pf(dev))

return 0;

return a->mode;

}

const struct attribute_group sriov_dev_attr_group = {

.attrs = sriov_dev_attrs,

.is_visible = sriov_attrs_are_visible,

};

/**************************************************使能sriov功能***************************************************/

/*设置sriov_numvfs数量时,会进入该函数*/

static ssize_t sriov_numvfs_store(struct device *dev,

struct device_attribute *attr, const char *buf, size_t count)

{

struct pci_dev *pdev = to_pci_dev(dev);

int ret;

u16 num_vfs;

/*字符串转整数,buf输入(应该是输入shell命令时

配置num_vfs输入的字符串),num_vfs输出*/

ret = kstrtou16(buf, 0, &num_vfs);

/*判断是否转换成功*/

if (ret < 0)

return ret;

/*应该是判断设置的vf数量有没有超过系统支持的*/

if (num_vfs > pci_sriov_get_totalvfs(pdev))

return -ERANGE;

device_lock(&pdev->dev);

/*如果设置的vfs数量和用来的一样,本次的设置应该是没有意义的

这里直接退出了*/

if (num_vfs == pdev->sriov->num_VFs)

goto exit;

/* is PF driver loaded w/callback

从这里进行判断,如果要设置vfs成功,还要实现configure的

回调函数,否则会设置不成功*/

if (!pdev->driver || !pdev->driver->sriov_configure) {

pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n");

ret = -ENOENT;

goto exit;

}

/*如果设置的数量为0,则代表disable VF*/

if (num_vfs == 0) {

/* disable VFs */

ret = pdev->driver->sriov_configure(pdev, 0);

goto exit;

}

/* enable VFs -->

这里应该是重新设置vfs时,如果之前num_VFs大于0,

则要先disable,才能重新设置vfs*/

if (pdev->sriov->num_VFs) {

pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",

pdev->sriov->num_VFs, num_vfs);

ret = -EBUSY;

goto exit;

}

/*sriov_configure

根据pci.c file(3225 line)的赋值,

这里应该是调用pci_sriov_configure_simple函数进行真正的

num_vfs 设置,返回值小于零则函数执行出错了,执行正确

这里返回值ret和num_vfs会相等*/

ret = pdev->driver->sriov_configure(pdev, num_vfs);

if (ret < 0)

goto exit;

if (ret != num_vfs)

pci_warn(pdev, "%d VFs requested; only %d enabled\n",

num_vfs, ret);

exit:

device_unlock(&pdev->dev);

if (ret < 0)

return ret;

return count;

}

int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)

{

int rc;

might_sleep();

/*这个位域段表示这个设备是不是PF设备

参考:https://blog.csdn.net/scarecrow_byr/article/details/103248133*/

if (!dev->is_physfn)

return -ENODEV;

/*函数用于返回属于此(dev)设备的vf数量,如果设备不是pf

设备则直接返回0*/

if (pci_vfs_assigned(dev)) {

pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");

return -EPERM;

}

/*一般不会是0*/

if (nr_virtfn == 0) {

sriov_disable(dev);

return 0;

}

/*真正的进入enable阶段*/

rc = sriov_enable(dev, nr_virtfn);

if (rc < 0)

return rc;

return nr_virtfn;

}

EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);

/*

* dev : pci设备(PF)

* nr_virtfn(要开启的vf数量)

*/

int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)

{

might_sleep();

/* 判断是否是物理设备(PF) 只有物理

设备才能开启sriov功能*/

if (!dev->is_physfn)

return -ENOSYS;

return sriov_enable(dev, nr_virtfn);

}

EXPORT_SYMBOL_GPL(pci_enable_sriov);

static int sriov_enable(struct pci_dev *dev, int nr_virtfn)

{

int rc;

int i;

int nres;

u16 initial;

struct resource *res;

struct pci_dev *pdev;

/*这个在sriov 初始化的时候申请的内存*/

struct pci_sriov *iov = dev->sriov;

int bars = 0;

int bus;

/*判断vf的数值是否为0*/

if (!nr_virtfn)

return 0;

/*可见 vfs,

NumVFs字段包含当前配置使用的VF数量*/

if (iov->num_VFs)

return -EINVAL;

/*读初始vf的值,并判断是否合法,

根据文档说明,初始值initial和total_VFs是相等的。

PCI_SRIOV_CAP_VFM字段默认值是0,取反为1,因为

(initial 和 iov->total_VFs)一开始是相等的,所以,第一个

if语句以及第二个if语句不会成立*/

pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);

if (initial > iov->total_VFs ||

(!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))

return -EIO;

if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||

(!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))

return -EINVAL;

/**/

nres = 0;

for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {

/*bars = bars | ((1 << (i + PCI_IOV_RESOURCES)))*/

bars |= (1 << (i + PCI_IOV_RESOURCES));

res = &dev->resource[i + PCI_IOV_RESOURCES];

if (res->parent)

nres++;

}

if (nres != iov->nres) {

pci_err(dev, "not enough MMIO resources for SR-IOV\n");

return -ENOMEM;

}

/*计算出vf bus*/

bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);

if (bus > dev->bus->busn_res.end) {

pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",

nr_virtfn, bus, &dev->bus->busn_res);

return -ENOMEM;

}

/* 使能bar resource*/

if (pci_enable_resources(dev, bars)) {

pci_err(dev, "SR-IOV: IOV BARS not allocated\n");

return -ENOMEM;

}

/*???*/

if (iov->link != dev->devfn) {

pdev = pci_get_slot(dev->bus, iov->link);

if (!pdev)

return -ENODEV;

if (!pdev->is_physfn) {

pci_dev_put(pdev);

return -ENOSYS;

}

rc = sysfs_create_link(&dev->dev.kobj,

&pdev->dev.kobj, "dep_link");

pci_dev_put(pdev);

if (rc)

return rc;

}

/*这里做一下成员的赋值*/

iov->initial_VFs = initial;

/*这里应该不会成立,因为一开始

initial等于total_VFs,除非随意给了

一个nr_virtfn大于total_VFs的值进来*/

if (nr_virtfn < initial)

initial = nr_virtfn;

rc = pcibios_sriov_enable(dev, initial);

if (rc) {

pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);

goto err_pcibios;

}

/*写入寄存器*/

pci_iov_set_numvfs(dev, nr_virtfn);

/* iov->ctrl = iov->ctrl | (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);

iov->ctrl = iov->ctrl | (00000000 00000001 | 00000000 0000 1000);

相当于在原来的基础上把PCI_SRIOV_CTRL_VFE和PCI_SRIOV_CTRL_MSE

功能开启*/

iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;

pci_cfg_access_lock(dev);

pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);

msleep(100);

pci_cfg_access_unlock(dev);

/*这里应该和pcie初始化的流程基本一致,分配填充结构体,初始化设备,add设备*/

rc = sriov_add_vfs(dev, initial);

if (rc)

goto err_pcibios;

/**/

kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);

/*成员赋值*/

iov->num_VFs = nr_virtfn;

return 0;

err_pcibios:

iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);

pci_cfg_access_lock(dev);

pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);

ssleep(1);

pci_cfg_access_unlock(dev);

pcibios_sriov_disable(dev);

if (iov->link != dev->devfn)

sysfs_remove_link(&dev->dev.kobj, "dep_link");

pci_iov_set_numvfs(dev, 0);

return rc;

}

int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)

{

return 0;

}

/*****************************************delete*********************************************************************/

static void sriov_del_vfs(struct pci_dev *dev)

{

struct pci_sriov *iov = dev->sriov;

int i;

for (i = 0; i < iov->num_VFs; i++)

pci_iov_remove_virtfn(dev, i);

}

/***************************************release****************************************/

static void sriov_release(struct pci_dev *dev)

{

/*调试*/

BUG_ON(dev->sriov->num_VFs);

if (dev != dev->sriov->dev)

pci_dev_put(dev->sriov->dev);

/*在初始化阶段申请的iov*/

kfree(dev->sriov);

dev->sriov = NULL;

}

void pci_iov_release(struct pci_dev *dev)

{

if (dev->is_physfn)

sriov_release(dev);

}

/********************************remove(模块卸载或者设备移除时执行)*********************************************************/

void pci_iov_remove(struct pci_dev *dev)

{

struct pci_sriov *iov = dev->sriov;

if (!dev->is_physfn)

return;

iov->driver_max_VFs = iov->total_VFs;

if (iov->num_VFs)

pci_warn(dev, "driver left SR-IOV enabled after remove\n");

}

static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)

{

if (physbus != virtbus && list_empty(&virtbus->devices))

pci_remove_bus(virtbus);

}

void pci_iov_remove_virtfn(struct pci_dev *dev, int id)

{

char buf[VIRTFN_ID_LEN];

struct pci_dev *virtfn;

virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),

pci_iov_virtfn_bus(dev, id),

pci_iov_virtfn_devfn(dev, id));

if (!virtfn)

return;

sprintf(buf, "virtfn%u", id);

sysfs_remove_link(&dev->dev.kobj, buf);

if (virtfn->dev.kobj.sd)

sysfs_remove_link(&virtfn->dev.kobj, "physfn");

pci_stop_and_remove_bus_device(virtfn);

virtfn_remove_bus(dev->bus, virtfn->bus);

/* balance pci_get_domain_bus_and_slot() */

pci_dev_put(virtfn);

pci_dev_put(dev);

}

/****************************sriov disable**********************************************************/

int __weak pcibios_sriov_disable(struct pci_dev *pdev)

{

return 0;

}

static void sriov_disable(struct pci_dev *dev)

{

struct pci_sriov *iov = dev->sriov;

if (!iov->num_VFs)

return;

sriov_del_vfs(dev);

iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);

pci_cfg_access_lock(dev);

pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);

ssleep(1);

pci_cfg_access_unlock(dev);

pcibios_sriov_disable(dev);

if (iov->link != dev->devfn)

sysfs_remove_link(&dev->dev.kobj, "dep_link");

iov->num_VFs = 0;

pci_iov_set_numvfs(dev, 0);

}

void pci_disable_sriov(struct pci_dev *dev)

{

might_sleep();

if (!dev->is_physfn)

return;

sriov_disable(dev);

}

EXPORT_SYMBOL_GPL(pci_disable_sriov);

/****************************************init*************************************************************/

/*总结:sriov的初始化就是给vf的bar空间赋值,

并决定当前pf支持几个vf,并给每个vf编个号,

sriov_init只是初始化,如果需要使用vf的话,

还必须主动调用pci_enable_sriov来指定需要启动哪个vf。

*/

static int sriov_init(struct pci_dev *dev, int pos)

{

int i, bar64;

int rc;

int nres;

u32 pgsz;

u16 ctrl, total;

struct pci_sriov *iov;

struct resource *res;

struct pci_dev *pdev;

/*应该是初始化阶段还不要进行VF enable,

所以,如果读到的值为1,则进行写0操作*/

pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);

/*将读取到的ctrl(2字节)字段与PCI_SRIOV_CTRL_VFE相与(第一个比特位)

判断是否开启VF enable功能*/

if (ctrl & PCI_SRIOV_CTRL_VFE) {

/*这里是初始化,PCI_SRIOV_CTRL字段默认值应该都是0*/

pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);

ssleep(1);

}

/*这个循环暂时不太清除具体作用*/

ctrl = 0;

list_for_each_entry(pdev, &dev->bus->devices, bus_list)

if (pdev->is_physfn)

goto found;

pdev = NULL;

if (pci_ari_enabled(dev->bus))

ctrl |= PCI_SRIOV_CTRL_ARI;

found:

pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);

/*从配置空间中找到当前支持几个vf*/

pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);

if (!total)

return 0;

/*PF支持的页面大小,只读

System Page Size = 1,则表示页大小为4KB。

表示该PF的所有VF的bar必须以System Page Size对齐

PF支持的页大小的集合;每个bit代表一个页大小;

如果第n个比特置一,则支持2**(n + 12)字节的页大小,0 <= n <= 31;

示例:Supported Page Size = 0x00000553,

则表示支持4KB, 8KB, 64KB, 256KB, 1MB, 4MB大小的页。*/

pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);

i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;

pgsz &= ~((1 << i) - 1);

if (!pgsz)

return -EIO;

/*系统页面大小,

由软件配置;仅能有一个比特置一;

置一的比特必须在Supported Page Size集合中;

示例:System Page Size = 1,则表示页大小为4KB。

含义:表示该PF的所有VF的bar必须以System Page Size对齐;*/

pgsz &= ~(pgsz - 1);

pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);

/*申请一个iov*/

iov = kzalloc(sizeof(*iov), GFP_KERNEL);

if (!iov)

return -ENOMEM;

/*for循环用来给vf的bar空间赋值*/

nres = 0;

for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {

/*res指向dev sriov部分的资源地址*/

res = &dev->resource[i + PCI_IOV_RESOURCES];

/*这里主要是判断资源类型*/

if (res->flags & IORESOURCE_PCI_FIXED)

bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;

else

/*如果是64位的,那么是两个bar当一个bar使用,注意后面还有的操作i += bar64*/

bar64 = __pci_read_base(dev, pci_bar_unknown, res, pos + PCI_SRIOV_BAR + i * 4);

/*这个猜测是判断bar里的资源是否可用,如果在pci设备枚举的时候,没有分配,那这里就

不分配了*/

if (!res->flags)

continue;

/*边界对齐*/

if (resource_size(res) & (PAGE_SIZE - 1)) {

rc = -EIO;

goto failed;

}

/*iov的bar赋值*/

iov->barsz[i] = resource_size(res);

/*后面vf0到vfn的bar都从iov->barsz[i]这里分配了,

例如i等于0时,vf0-vfn的bar0都从iov->barsz[0]分配,

依次类推*/

res->end = res->start + resource_size(res) * total - 1;

pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n", i, res, i, total);

i += bar64;

/*资源引用数*/

nres++;

}

/*iov其它成员赋值*/

iov->pos = pos;

iov->nres = nres;

iov->ctrl = ctrl;

iov->total_VFs = total;

iov->driver_max_VFs = total;

pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);

iov->pgsz = pgsz;

iov->self = dev;

iov->drivers_autoprobe = true;

/*SR-IOV Capabilities*/

pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);

/*SR-IOV fun dep link*/

pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);

if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)

iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);

/*这个和上面的for循环链表有关系,?*/

if (pdev)

iov->dev = pci_dev_get(pdev);

else

iov->dev = dev;

/*将iov赋值给dev->sriov,这个dev代表当前的pcie设备也就是pf*/

dev->sriov = iov;

/*声明为物理设备(PF)*/

dev->is_physfn = 1;

/*主要是给iov->max_VF_buses进行赋值*/

rc = compute_max_vf_buses(dev);

if (rc)

goto fail_max_buses;

return 0;

fail_max_buses:

dev->sriov = NULL;

dev->is_physfn = 0;

failed:

for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {

res = &dev->resource[i + PCI_IOV_RESOURCES];

res->flags = 0;

}

kfree(iov);

return rc;

}

int pci_iov_init(struct pci_dev *dev)

{

int pos;

/*判断是否是pcie设备,pcie设备有pcie功能,

pci设备无sriov功能*/

if (!pci_is_pcie(dev))

return -ENODEV;

/*判断这款pcie设备是否有sriov feature*/

pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);

/*有的话就调用sriov_init函数做vf的初始化,注意这里的返回值,

(pos)返回的是SRIOV扩展能力空间的起始地址*/

if (pos)

return sriov_init(dev, pos);

return -ENODEV;

}

/******************************************************************************************/

void pci_iov_update_resource(struct pci_dev *dev, int resno)

{

struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;

struct resource *res = dev->resource + resno;

int vf_bar = resno - PCI_IOV_RESOURCES;

struct pci_bus_region region;

u16 cmd;

u32 new;

int reg;

if (!iov)

return;

pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);

if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {

dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",

vf_bar, res);

return;

}

if (!res->flags)

return;

if (res->flags & IORESOURCE_UNSET)

return;

if (res->flags & IORESOURCE_PCI_FIXED)

return;

pcibios_resource_to_bus(dev->bus, ®ion, res);

new = region.start;

new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;

reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;

pci_write_config_dword(dev, reg, new);

if (res->flags & IORESOURCE_MEM_64) {

new = region.start >> 16 >> 16;

pci_write_config_dword(dev, reg + 4, new);

}

}

/****************************************************************************/

static void sriov_restore_state(struct pci_dev *dev)

{

int i;

u16 ctrl;

struct pci_sriov *iov = dev->sriov;

pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);

if (ctrl & PCI_SRIOV_CTRL_VFE)

return;

ctrl &= ~PCI_SRIOV_CTRL_ARI;

ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;

pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);

for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)

pci_update_resource(dev, i + PCI_IOV_RESOURCES);

pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);

pci_iov_set_numvfs(dev, iov->num_VFs);

pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);

if (iov->ctrl & PCI_SRIOV_CTRL_VFE)

msleep(100);

}

void pci_restore_iov_state(struct pci_dev *dev)

{

if (dev->is_physfn)

sriov_restore_state(dev);

}

/**********************************************************************************/

void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)

{

if (dev->is_physfn)

dev->sriov->drivers_autoprobe = auto_probe;

}

static ssize_t sriov_drivers_autoprobe_store(struct device *dev,

struct device_attribute *attr, const char *buf, size_t count)

{

struct pci_dev *pdev = to_pci_dev(dev);

bool drivers_autoprobe;

if (kstrtobool(buf, &drivers_autoprobe) < 0)

return -EINVAL;

pdev->sriov->drivers_autoprobe = drivers_autoprobe;

return count;

}

/**************************************************************************/

int pci_iov_bus_range(struct pci_bus *bus)

{

int max = 0;

struct pci_dev *dev;

/*寻找总线资源*/

list_for_each_entry(dev, &bus->devices, bus_list) {

if (!dev->is_physfn)

continue;

if (dev->sriov->max_VF_buses > max)

max = dev->sriov->max_VF_buses;

}

return max ? max - bus->number : 0;

}

/***************************************************************************************/

/*这里为什么返回的是driver_max_VFs而不是totalvfs*/

int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)

{

if (!dev->is_physfn)

return -ENOSYS;

if (numvfs > dev->sriov->total_VFs)

return -EINVAL;

/* Shouldn't change if VFs already enabled */

if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)

return -EBUSY;

dev->sriov->driver_max_VFs = numvfs;

return 0;

}

EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);

int pci_sriov_get_totalvfs(struct pci_dev *dev)

{

if (!dev->is_physfn)

return 0;

return dev->sriov->driver_max_VFs;

}

EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);

int pci_num_vf(struct pci_dev *dev)

{

if (!dev->is_physfn)

return 0;

return dev->sriov->num_VFs;

}

EXPORT_SYMBOL_GPL(pci_num_vf);

/**************************************************************************************/

int pci_vfs_assigned(struct pci_dev *dev)

{

struct pci_dev *vfdev;

unsigned int vfs_assigned = 0;

unsigned short dev_id;

/* only search if we are a PF */

if (!dev->is_physfn)

return 0;

dev_id = dev->sriov->vf_device;

/* loop through all the VFs to see if we own any that are assigned */

vfdev = pci_get_device(dev->vendor, dev_id, NULL);

while (vfdev) {

if (vfdev->is_virtfn && (vfdev->physfn == dev) &&

pci_is_dev_assigned(vfdev))

vfs_assigned++;

vfdev = pci_get_device(dev->vendor, dev_id, vfdev);

}

return vfs_assigned;

}

EXPORT_SYMBOL_GPL(pci_vfs_assigned);

static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)

{

unsigned int i;

int rc;

if (dev->no_vf_scan)

return 0;

/*逐个增加vf*/

for (i = 0; i < num_vfs; i++) {

rc = pci_iov_add_virtfn(dev, i);

if (rc)

goto failed;

}

return 0;

failed:

while (i--)

pci_iov_remove_virtfn(dev, i);

return rc;

}

static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)

{

struct pci_bus *child;

if (bus->number == busnr)

return bus;

child = pci_find_bus(pci_domain_nr(bus), busnr);

if (child)

return child;

child = pci_add_new_bus(bus, NULL, busnr);

if (!child)

return NULL;

pci_bus_insert_busn_res(child, busnr, busnr);

return child;

}

static void pci_read_vf_config_common(struct pci_dev *virtfn)

{

struct pci_dev *physfn = virtfn->physfn;

pci_read_config_dword(virtfn, PCI_CLASS_REVISION,

&physfn->sriov->class);

pci_read_config_byte(virtfn, PCI_HEADER_TYPE,

&physfn->sriov->hdr_type);

pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,

&physfn->sriov->subsystem_vendor);

pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,

&physfn->sriov->subsystem_device);

}

int pci_iov_sysfs_link(struct pci_dev *dev,

struct pci_dev *virtfn, int id)

{

char buf[VIRTFN_ID_LEN];

int rc;

sprintf(buf, "virtfn%u", id);

rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);

if (rc)

goto failed;

rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");

if (rc)

goto failed1;

kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);

return 0;

failed1:

sysfs_remove_link(&dev->dev.kobj, buf);

failed:

return rc;

}

int pci_iov_add_virtfn(struct pci_dev *dev, int id)

{

int i;

int rc = -ENOMEM;

u64 size;

struct pci_dev *virtfn;

struct resource *res;

struct pci_sriov *iov = dev->sriov;

struct pci_bus *bus;

/*虚拟总线bus*/

bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));

if (!bus)

goto failed;

/*从总线当中申请一个pci设备*/

virtfn = pci_alloc_dev(bus);

if (!virtfn)

goto failed0;

/*DF赋值*/

virtfn->devfn = pci_iov_virtfn_devfn(dev, id);

/*vendor id赋值*/

virtfn->vendor = dev->vendor;

/*device id赋值*/

virtfn->device = iov->vf_device;

/*声明为虚拟设备(VF)*/

virtfn->is_virtfn = 1;

/*vf 相关pf*/

virtfn->physfn = pci_dev_get(dev);

/*??*/

virtfn->no_command_memory = 1;

/*其它成员的赋值,不过为什么是只有id为0时进来?*/

if (id == 0)

pci_read_vf_config_common(virtfn);

/*对这个pf(把vf的一些值赋给pf的成员变量)设备进行一些设置,

例如中断设置,以及这个假的vf设备(从结构体本身而言是pf设备)

resource变量(flag)赋值等等*/

rc = pci_setup_device(virtfn);

if (rc)

goto failed1;

/*设备父节点赋值,从赋值情况来看

vf设备的父节点设备和他关联的pf设备是一样的*/

virtfn->dev.parent = dev->dev.parent;

/*单功能设备*/

virtfn->multifunction = 0;

/*设备的bar资源从初始化的时候那里拿过来依次分配*/

for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {

/*从pci部分开始的赋值*/

res = &dev->resource[i + PCI_IOV_RESOURCES];

if (!res->parent)

continue;

virtfn->resource[i].name = pci_name(virtfn);

virtfn->resource[i].flags = res->flags;

size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);

virtfn->resource[i].start = res->start + size * id;

virtfn->resource[i].end = virtfn->resource[i].start + size - 1;

rc = request_resource(res, &virtfn->resource[i]);

BUG_ON(rc);

}

/*先追加一个设备,再在总线上对这个设备进行追加*/

pci_device_add(virtfn, virtfn->bus);

/**/

rc = pci_iov_sysfs_link(dev, virtfn, id);

if (rc)

goto failed1;

/*这个时候可以在总线上追加一个设备了,

最后追加的其实是一个pf设备,只不过把这个pf

设备当成vf设备使用了*/

pci_bus_add_device(virtfn);

return 0;

failed1:

pci_stop_and_remove_bus_device(virtfn);

pci_dev_put(dev);

failed0:

virtfn_remove_bus(dev->bus, bus);

failed:

return rc;

}

总结:这部分代码量比较庞大,涉及的东西比较多。通过阅读,要真正的掌握这部分代码,我的感受有以下几点。 1:由于在enable sriov时需要指定num_vfs,这里在添加一个vf设备时涉及的过程和pci设备枚举是一样的,由于自己对这部分不是非常的熟悉,所以这里有些部分的代码无法吃透。可能后续我会先去阅读pcie枚举部分的代码,在回来看sriov

2:在阅读时,大部分操作涉及寄存器的赋值,这里推荐以下这个网址 https://www.intel.cn/content/www/cn/zh/docs/programmable/683111/17-1/initial-vfs-and-total-vfs-registers.html

3:有些细节问题需要多加打印语句才能理解,在阅读这部分代码时我没有调试,这是我后续需要改进的地方。

精彩文章

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。