vhost,virtio,vhost: fixes,features

Hardening work by Jason vdpa driver for Alibaba ENI Performance tweaks for virtio blk virtio rng rework using an internal buffer mac/mtu programming for mlx5 vdpa Misc fixes, cleanups Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmF/su8PHG1zdEByZWRo YXQuY29tAAoJECgfDbjSjVRpo+sIAJjBTvbET+d0KuIMt8YBGsU+NWgHaJxW06hm GHZzinueZYWaS20MPMYMPfcHUgigWj0kk7F0gMljG5rtDFzR85JkOi7+e5V6RVJW 8SQc7JjA1Krde6EiPJdlv3mVkLz/5VbrGUgjAW9di/O04Xc/jAc9kmJ41wTYr0mL E5IsUi9QBmgHtKQ+2ofb9QEWuebJclGK+NVd3Kp08BtAQOrdn5UrIzY30/sjkZwE ul2ll6v/k7T3dCQbHD/wMgfFycPvoZVfxaVj+FWQnwdWkqZwzMRmpKPRes4KJfww Lfx1qox10mJzH7XJCs7xmOM8f7HgNNWi0gD5FxNlfkiz1AwHYOk= =DTXY -----END PGP SIGNATURE----- Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost Pull virtio updates from Michael Tsirkin: "vhost and virtio fixes and features: - Hardening work by Jason - vdpa driver for Alibaba ENI - Performance tweaks for virtio blk - virtio rng rework using an internal buffer - mac/mtu programming for mlx5 vdpa - Misc fixes, cleanups" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (45 commits) vdpa/mlx5: Forward only packets with allowed MAC address vdpa/mlx5: Support configuration of MAC vdpa/mlx5: Fix clearing of VIRTIO_NET_F_MAC feature bit vdpa_sim_net: Enable user to set mac address and mtu vdpa: Enable user to set mac and mtu of vdpa device vdpa: Use kernel coding style for structure comments vdpa: Introduce query of device config layout vdpa: Introduce and use vdpa device get, set config helpers virtio-scsi: don't let virtio core to validate used buffer length virtio-blk: don't let virtio core to validate used length virtio-net: don't let virtio core to validate used length virtio_ring: validate used buffer length virtio_blk: correct types for status handling virtio_blk: allow 0 as num_request_queues i2c: virtio: Add support for zero-length requests virtio-blk: fixup coccinelle warnings virtio_ring: fix typos in vring_desc_extra virtio-pci: harden INTX interrupts virtio_pci: harden MSI-X interrupts virtio_config: introduce a new .enable_cbs method ...
2021-11-03 15:00:39 -07:00 · 2021-11-03 15:00:39 -07:00 · 43e1b12927
parent d4ec3d5535 540061ac79
commit 43e1b12927
36 changed files with 1779 additions and 295 deletions
--- a/7
+++ b/7
@ -20083,6 +20083,13 @@ S:	Maintained
 F:	drivers/i2c/busses/i2c-virtio.c
 F:	include/uapi/linux/virtio_i2c.h

+VIRTIO PMEM DRIVER
+M:	Pankaj Gupta <pankaj.gupta.linux@gmail.com>
+L:	virtualization@lists.linux-foundation.org
+S:	Maintained
+F:	drivers/nvdimm/virtio_pmem.c
+F:	drivers/nvdimm/nd_virtio.c
+
 VIRTUAL BOX GUEST DEVICE DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
 M:	Arnd Bergmann <arnd@arndb.de>
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@ -371,6 +371,7 @@ config XEN_BLKDEV_BACKEND
 config VIRTIO_BLK
 	tristate "Virtio block driver"
 	depends on VIRTIO
+	select SG_POOL
 	help
 	  This is the virtual block driver for virtio.  It can be used with
          QEMU based VMMs (like KVM or Xen).  Say Y or M.
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@ -24,6 +24,19 @@
 /* The maximum number of sg elements that fit into a virtqueue */
 #define VIRTIO_BLK_MAX_SG_ELEMS 32768

+#ifdef CONFIG_ARCH_NO_SG_CHAIN
+#define VIRTIO_BLK_INLINE_SG_CNT	0
+#else
+#define VIRTIO_BLK_INLINE_SG_CNT	2
+#endif
+
+static unsigned int num_request_queues;
+module_param(num_request_queues, uint, 0644);
+MODULE_PARM_DESC(num_request_queues,
+		 "Limit the number of request queues to use for blk device. "
+		 "0 for no limit. "
+		 "Values > nr_cpu_ids truncated to nr_cpu_ids.");
+
 static int major;
 static DEFINE_IDA(vd_index_ida);

@ -77,6 +90,7 @@ struct virtio_blk {
 struct virtblk_req {
 	struct virtio_blk_outhdr out_hdr;
 	u8 status;
+	struct sg_table sg_table;
 	struct scatterlist sg[];
 };

@ -162,12 +176,93 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
 	return 0;
 }

+static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr)
+{
+	if (blk_rq_nr_phys_segments(req))
+		sg_free_table_chained(&vbr->sg_table,
+				      VIRTIO_BLK_INLINE_SG_CNT);
+}
+
+static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req,
+		struct virtblk_req *vbr)
+{
+	int err;
+
+	if (!blk_rq_nr_phys_segments(req))
+		return 0;
+
+	vbr->sg_table.sgl = vbr->sg;
+	err = sg_alloc_table_chained(&vbr->sg_table,
+				     blk_rq_nr_phys_segments(req),
+				     vbr->sg_table.sgl,
+				     VIRTIO_BLK_INLINE_SG_CNT);
+	if (unlikely(err))
+		return -ENOMEM;
+
+	return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
+}
+
+static void virtblk_cleanup_cmd(struct request *req)
+{
+	if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
+		kfree(bvec_virt(&req->special_vec));
+}
+
+static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
+				      struct request *req,
+				      struct virtblk_req *vbr)
+{
+	bool unmap = false;
+	u32 type;
+
+	vbr->out_hdr.sector = 0;
+
+	switch (req_op(req)) {
+	case REQ_OP_READ:
+		type = VIRTIO_BLK_T_IN;
+		vbr->out_hdr.sector = cpu_to_virtio64(vdev,
+						      blk_rq_pos(req));
+		break;
+	case REQ_OP_WRITE:
+		type = VIRTIO_BLK_T_OUT;
+		vbr->out_hdr.sector = cpu_to_virtio64(vdev,
+						      blk_rq_pos(req));
+		break;
+	case REQ_OP_FLUSH:
+		type = VIRTIO_BLK_T_FLUSH;
+		break;
+	case REQ_OP_DISCARD:
+		type = VIRTIO_BLK_T_DISCARD;
+		break;
+	case REQ_OP_WRITE_ZEROES:
+		type = VIRTIO_BLK_T_WRITE_ZEROES;
+		unmap = !(req->cmd_flags & REQ_NOUNMAP);
+		break;
+	case REQ_OP_DRV_IN:
+		type = VIRTIO_BLK_T_GET_ID;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return BLK_STS_IOERR;
+	}
+
+	vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
+	vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
+
+	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
+		if (virtblk_setup_discard_write_zeroes(req, unmap))
+			return BLK_STS_RESOURCE;
+	}
+
+	return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);

-	if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
-		kfree(bvec_virt(&req->special_vec));
+	virtblk_unmap_data(req, vbr);
+	virtblk_cleanup_cmd(req);
 	blk_mq_end_request(req, virtblk_result(vbr));
 }

@ -223,59 +318,26 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	unsigned long flags;
 	unsigned int num;
 	int qid = hctx->queue_num;
-	int err;
 	bool notify = false;
-	bool unmap = false;
-	u32 type;
+	blk_status_t status;
+	int err;

 	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);

-	switch (req_op(req)) {
-	case REQ_OP_READ:
-	case REQ_OP_WRITE:
-		type = 0;
-		break;
-	case REQ_OP_FLUSH:
-		type = VIRTIO_BLK_T_FLUSH;
-		break;
-	case REQ_OP_DISCARD:
-		type = VIRTIO_BLK_T_DISCARD;
-		break;
-	case REQ_OP_WRITE_ZEROES:
-		type = VIRTIO_BLK_T_WRITE_ZEROES;
-		unmap = !(req->cmd_flags & REQ_NOUNMAP);
-		break;
-	case REQ_OP_DRV_IN:
-		type = VIRTIO_BLK_T_GET_ID;
-		break;
-	default:
-		WARN_ON_ONCE(1);
-		return BLK_STS_IOERR;
-	}
-
-	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
-	vbr->out_hdr.sector = type ?
-		0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
-	vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
+	status = virtblk_setup_cmd(vblk->vdev, req, vbr);
+	if (unlikely(status))
+		return status;

 	blk_mq_start_request(req);

-	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
-		err = virtblk_setup_discard_write_zeroes(req, unmap);
-		if (err)
-			return BLK_STS_RESOURCE;
-	}
-
-	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
-	if (num) {
-		if (rq_data_dir(req) == WRITE)
-			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
-		else
-			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
+	num = virtblk_map_data(hctx, req, vbr);
+	if (unlikely(num < 0)) {
+		virtblk_cleanup_cmd(req);
+		return BLK_STS_RESOURCE;
 	}

 	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
-	err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
+	err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num);
 	if (err) {
 		virtqueue_kick(vblk->vqs[qid].vq);
 		/* Don't stop the queue if -ENOMEM: we may have failed to
@ -284,6 +346,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		if (err == -ENOSPC)
 			blk_mq_stop_hw_queue(hctx);
 		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
+		virtblk_unmap_data(req, vbr);
+		virtblk_cleanup_cmd(req);
 		switch (err) {
 		case -ENOSPC:
 			return BLK_STS_DEV_RESOURCE;
@ -497,8 +561,14 @@ static int init_vq(struct virtio_blk *vblk)
 				   &num_vqs);
 	if (err)
 		num_vqs = 1;
+	if (!err && !num_vqs) {
+		dev_err(&vdev->dev, "MQ advertised but zero queues reported\n");
+		return -EINVAL;
+	}

-	num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
+	num_vqs = min_t(unsigned int,
+			min_not_zero(num_request_queues, nr_cpu_ids),
+			num_vqs);

 	vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
 	if (!vblk->vqs)
@ -624,7 +694,7 @@ cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
 	u8 writeback = virtblk_get_cache_mode(vblk->vdev);

 	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
-	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
+	return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]);
 }

 static DEVICE_ATTR_RW(cache_type);
@ -660,16 +730,6 @@ static const struct attribute_group *virtblk_attr_groups[] = {
 	NULL,
 };

-static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
-		unsigned int hctx_idx, unsigned int numa_node)
-{
-	struct virtio_blk *vblk = set->driver_data;
-	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
-
-	sg_init_table(vbr->sg, vblk->sg_elems);
-	return 0;
-}
-
 static int virtblk_map_queues(struct blk_mq_tag_set *set)
 {
 	struct virtio_blk *vblk = set->driver_data;
@ -682,7 +742,6 @@ static const struct blk_mq_ops virtio_mq_ops = {
 	.queue_rq	= virtio_queue_rq,
 	.commit_rqs	= virtio_commit_rqs,
 	.complete	= virtblk_request_done,
-	.init_request	= virtblk_init_request,
 	.map_queues	= virtblk_map_queues,
 };

@ -762,7 +821,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	vblk->tag_set.cmd_size =
 		sizeof(struct virtblk_req) +
-		sizeof(struct scatterlist) * sg_elems;
+		sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
 	vblk->tag_set.driver_data = vblk;
 	vblk->tag_set.nr_hw_queues = vblk->num_vqs;

@ -990,6 +1049,7 @@ static struct virtio_driver virtio_blk = {
 	.feature_table_size		= ARRAY_SIZE(features),
 	.feature_table_legacy		= features_legacy,
 	.feature_table_size_legacy	= ARRAY_SIZE(features_legacy),
+	.suppress_used_validation	= true,
 	.driver.name			= KBUILD_MODNAME,
 	.driver.owner			= THIS_MODULE,
 	.id_table			= id_table,
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@ -18,13 +18,20 @@ static DEFINE_IDA(rng_index_ida);
 struct virtrng_info {
 	struct hwrng hwrng;
 	struct virtqueue *vq;
-	struct completion have_data;
 	char name[25];
-	unsigned int data_avail;
 	int index;
-	bool busy;
 	bool hwrng_register_done;
 	bool hwrng_removed;
+	/* data transfer */
+	struct completion have_data;
+	unsigned int data_avail;
+	unsigned int data_idx;
+	/* minimal size returned by rng_buffer_size() */
+#if SMP_CACHE_BYTES < 32
+	u8 data[32];
+#else
+	u8 data[SMP_CACHE_BYTES];
+#endif
 };

 static void random_recv_done(struct virtqueue *vq)
@ -35,54 +42,88 @@ static void random_recv_done(struct virtqueue *vq)
 	if (!virtqueue_get_buf(vi->vq, &vi->data_avail))
 		return;

+	vi->data_idx = 0;
+
 	complete(&vi->have_data);
 }

-/* The host will fill any buffer we give it with sweet, sweet randomness. */
-static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
+static void request_entropy(struct virtrng_info *vi)
 {
 	struct scatterlist sg;

-	sg_init_one(&sg, buf, size);
+	reinit_completion(&vi->have_data);
+	vi->data_avail = 0;
+	vi->data_idx = 0;
+
+	sg_init_one(&sg, vi->data, sizeof(vi->data));

 	/* There should always be room for one buffer. */
-	virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL);
+	virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL);

 	virtqueue_kick(vi->vq);
 }

+static unsigned int copy_data(struct virtrng_info *vi, void *buf,
+			      unsigned int size)
+{
+	size = min_t(unsigned int, size, vi->data_avail);
+	memcpy(buf, vi->data + vi->data_idx, size);
+	vi->data_idx += size;
+	vi->data_avail -= size;
+	if (vi->data_avail == 0)
+		request_entropy(vi);
+	return size;
+}
+
 static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 {
 	int ret;
 	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
+	unsigned int chunk;
+	size_t read;

 	if (vi->hwrng_removed)
 		return -ENODEV;

-	if (!vi->busy) {
-		vi->busy = true;
-		reinit_completion(&vi->have_data);
-		register_buffer(vi, buf, size);
+	read = 0;
+
+	/* copy available data */
+	if (vi->data_avail) {
+		chunk = copy_data(vi, buf, size);
+		size -= chunk;
+		read += chunk;
 	}

 	if (!wait)
-		return 0;
+		return read;

-	ret = wait_for_completion_killable(&vi->have_data);
-	if (ret < 0)
-		return ret;
+	/* We have already copied available entropy,
+	 * so either size is 0 or data_avail is 0
+	 */
+	while (size != 0) {
+		/* data_avail is 0 but a request is pending */
+		ret = wait_for_completion_killable(&vi->have_data);
+		if (ret < 0)
+			return ret;
+		/* if vi->data_avail is 0, we have been interrupted
+		 * by a cleanup, but buffer stays in the queue
+		 */
+		if (vi->data_avail == 0)
+			return read;

-	vi->busy = false;
+		chunk = copy_data(vi, buf + read, size);
+		size -= chunk;
+		read += chunk;
+	}

-	return vi->data_avail;
+	return read;
 }

 static void virtio_cleanup(struct hwrng *rng)
 {
 	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;

-	if (vi->busy)
-		wait_for_completion(&vi->have_data);
+	complete(&vi->have_data);
 }

 static int probe_common(struct virtio_device *vdev)
@ -118,6 +159,9 @@ static int probe_common(struct virtio_device *vdev)
 		goto err_find;
 	}

+	/* we always have a pending entropy request */
+	request_entropy(vi);
+
 	return 0;

 err_find:
@ -133,9 +177,9 @@ static void remove_common(struct virtio_device *vdev)

 	vi->hwrng_removed = true;
 	vi->data_avail = 0;
+	vi->data_idx = 0;
 	complete(&vi->have_data);
 	vdev->config->reset(vdev);
-	vi->busy = false;
 	if (vi->hwrng_register_done)
 		hwrng_unregister(&vi->hwrng);
 	vdev->config->del_vqs(vdev);
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@ -28,6 +28,7 @@
 #include "../tty/hvc/hvc_console.h"

 #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC)
+#define VIRTCONS_MAX_PORTS 0x8000

 /*
 * This is a global struct for storing common data for all the devices
@ -2036,6 +2037,14 @@ static int virtcons_probe(struct virtio_device *vdev)
 	    virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT,
 				 struct virtio_console_config, max_nr_ports,
 				 &portdev->max_nr_ports) == 0) {
+		if (portdev->max_nr_ports == 0 ||
+		    portdev->max_nr_ports > VIRTCONS_MAX_PORTS) {
+			dev_err(&vdev->dev,
+				"Invalidate max_nr_ports %d",
+				portdev->max_nr_ports);
+			err = -EINVAL;
+			goto free;
+		}
 		multiport = true;
 	}

--- a/drivers/i2c/busses/i2c-virtio.c
+++ b/drivers/i2c/busses/i2c-virtio.c
@ -62,35 +62,33 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
 	for (i = 0; i < num; i++) {
 		int outcnt = 0, incnt = 0;

-		/*
-		 * We don't support 0 length messages and so filter out
-		 * 0 length transfers by using i2c_adapter_quirks.
-		 */
-		if (!msgs[i].len)
-			break;
-
 		/*
 		 * Only 7-bit mode supported for this moment. For the address
 		 * format, Please check the Virtio I2C Specification.
 		 */
 		reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1);

+		if (msgs[i].flags & I2C_M_RD)
+			reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_M_RD);
+
 		if (i != num - 1)
-			reqs[i].out_hdr.flags = cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT);
+			reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT);

 		sg_init_one(&out_hdr, &reqs[i].out_hdr, sizeof(reqs[i].out_hdr));
 		sgs[outcnt++] = &out_hdr;

-		reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1);
-		if (!reqs[i].buf)
-			break;
+		if (msgs[i].len) {
+			reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1);
+			if (!reqs[i].buf)
+				break;

-		sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len);
+			sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len);

-		if (msgs[i].flags & I2C_M_RD)
-			sgs[outcnt + incnt++] = &msg_buf;
-		else
-			sgs[outcnt++] = &msg_buf;
+			if (msgs[i].flags & I2C_M_RD)
+				sgs[outcnt + incnt++] = &msg_buf;
+			else
+				sgs[outcnt++] = &msg_buf;
+		}

 		sg_init_one(&in_hdr, &reqs[i].in_hdr, sizeof(reqs[i].in_hdr));
 		sgs[outcnt + incnt++] = &in_hdr;
@ -191,7 +189,7 @@ static int virtio_i2c_setup_vqs(struct virtio_i2c *vi)

 static u32 virtio_i2c_func(struct i2c_adapter *adap)
 {
-	return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }

 static struct i2c_algorithm virtio_algorithm = {
@ -199,15 +197,16 @@ static struct i2c_algorithm virtio_algorithm = {
 	.functionality = virtio_i2c_func,
 };

-static const struct i2c_adapter_quirks virtio_i2c_quirks = {
-	.flags = I2C_AQ_NO_ZERO_LEN,
-};
-
 static int virtio_i2c_probe(struct virtio_device *vdev)
 {
 	struct virtio_i2c *vi;
 	int ret;

+	if (!virtio_has_feature(vdev, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST)) {
+		dev_err(&vdev->dev, "Zero-length request feature is mandatory\n");
+		return -EINVAL;
+	}
+
 	vi = devm_kzalloc(&vdev->dev, sizeof(*vi), GFP_KERNEL);
 	if (!vi)
 		return -ENOMEM;
@ -225,7 +224,6 @@ static int virtio_i2c_probe(struct virtio_device *vdev)
 	snprintf(vi->adap.name, sizeof(vi->adap.name),
 		 "i2c_virtio at virtio bus %d", vdev->index);
 	vi->adap.algo = &virtio_algorithm;
-	vi->adap.quirks = &virtio_i2c_quirks;
 	vi->adap.dev.parent = &vdev->dev;
 	vi->adap.dev.of_node = vdev->dev.of_node;
 	i2c_set_adapdata(&vi->adap, vi);
@ -270,11 +268,17 @@ static int virtio_i2c_restore(struct virtio_device *vdev)
 }
 #endif

+static const unsigned int features[] = {
+	VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
+};
+
 static struct virtio_driver virtio_i2c_driver = {
-	.id_table	= id_table,
-	.probe		= virtio_i2c_probe,
-	.remove		= virtio_i2c_remove,
-	.driver	= {
+	.feature_table		= features,
+	.feature_table_size	= ARRAY_SIZE(features),
+	.id_table		= id_table,
+	.probe			= virtio_i2c_probe,
+	.remove			= virtio_i2c_remove,
+	.driver			= {
 		.name	= "i2c_virtio",
 	},
 #ifdef CONFIG_PM_SLEEP
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@ -408,12 +408,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	 * add_recvbuf_mergeable() + get_mergeable_buf_len()
 	 */
 	truesize = headroom ? PAGE_SIZE : truesize;
-	tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len);
+	tailroom = truesize - headroom;
 	buf = p - headroom;

 	len -= hdr_len;
 	offset += hdr_padded_len;
 	p += hdr_padded_len;
+	tailroom -= hdr_padded_len + len;

 	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

@ -3422,6 +3423,7 @@ static struct virtio_driver virtio_net_driver = {
 	.feature_table_size = ARRAY_SIZE(features),
 	.feature_table_legacy = features_legacy,
 	.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
+	.suppress_used_validation = true,
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@ -978,6 +978,7 @@ static unsigned int features[] = {
 static struct virtio_driver virtio_scsi_driver = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
+	.suppress_used_validation = true,
 	.driver.name = KBUILD_MODNAME,
 	.driver.owner = THIS_MODULE,
 	.id_table = id_table,
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@ -78,4 +78,12 @@ config VP_VDPA
 	help
 	  This kernel module bridges virtio PCI device to vDPA bus.

+config ALIBABA_ENI_VDPA
+	tristate "vDPA driver for Alibaba ENI"
+	select VIRTIO_PCI_LIB_LEGACY
+	depends on PCI_MSI && X86
+	help
+	  VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon
+	  virtio 0.9.5 specification.
+
 endif # VDPA
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@ -5,3 +5,4 @@ obj-$(CONFIG_VDPA_USER) += vdpa_user/
 obj-$(CONFIG_IFCVF)    += ifcvf/
 obj-$(CONFIG_MLX5_VDPA) += mlx5/
 obj-$(CONFIG_VP_VDPA)    += virtio_pci/
+obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/
--- a/drivers/vdpa/alibaba/Makefile
+++ b/drivers/vdpa/alibaba/Makefile
@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ALIBABA_ENI_VDPA) += eni_vdpa.o
+
--- a/drivers/vdpa/alibaba/eni_vdpa.c
+++ b/drivers/vdpa/alibaba/eni_vdpa.c
@ -0,0 +1,553 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bridge driver for Alibaba ENI(Elastic Network Interface)
+ *
+ * Copyright (c) 2021, Alibaba Inc. All rights reserved.
+ * Author: Wu Zongyong <wuzongyong@linux.alibaba.com>
+ *
+ */
+
+#include "linux/bits.h"
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_pci_legacy.h>
+#include <uapi/linux/virtio_net.h>
+
+#define ENI_MSIX_NAME_SIZE 256
+
+#define ENI_ERR(pdev, fmt, ...)	\
+	dev_err(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__)
+#define ENI_DBG(pdev, fmt, ...)	\
+	dev_dbg(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__)
+#define ENI_INFO(pdev, fmt, ...) \
+	dev_info(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__)
+
+struct eni_vring {
+	void __iomem *notify;
+	char msix_name[ENI_MSIX_NAME_SIZE];
+	struct vdpa_callback cb;
+	int irq;
+};
+
+struct eni_vdpa {
+	struct vdpa_device vdpa;
+	struct virtio_pci_legacy_device ldev;
+	struct eni_vring *vring;
+	struct vdpa_callback config_cb;
+	char msix_name[ENI_MSIX_NAME_SIZE];
+	int config_irq;
+	int queues;
+	int vectors;
+};
+
+static struct eni_vdpa *vdpa_to_eni(struct vdpa_device *vdpa)
+{
+	return container_of(vdpa, struct eni_vdpa, vdpa);
+}
+
+static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+
+	return &eni_vdpa->ldev;
+}
+
+static u64 eni_vdpa_get_features(struct vdpa_device *vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+	u64 features = vp_legacy_get_features(ldev);
+
+	features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
+	features |= BIT_ULL(VIRTIO_F_ORDER_PLATFORM);
+
+	return features;
+}
+
+static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	if (!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) && features) {
+		ENI_ERR(ldev->pci_dev,
+			"VIRTIO_NET_F_MRG_RXBUF is not negotiated\n");
+		return -EINVAL;
+	}
+
+	vp_legacy_set_features(ldev, (u32)features);
+
+	return 0;
+}
+
+static u8 eni_vdpa_get_status(struct vdpa_device *vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	return vp_legacy_get_status(ldev);
+}
+
+static int eni_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+	int irq = eni_vdpa->vring[idx].irq;
+
+	if (irq == VIRTIO_MSI_NO_VECTOR)
+		return -EINVAL;
+
+	return irq;
+}
+
+static void eni_vdpa_free_irq(struct eni_vdpa *eni_vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	struct pci_dev *pdev = ldev->pci_dev;
+	int i;
+
+	for (i = 0; i < eni_vdpa->queues; i++) {
+		if (eni_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) {
+			vp_legacy_queue_vector(ldev, i, VIRTIO_MSI_NO_VECTOR);
+			devm_free_irq(&pdev->dev, eni_vdpa->vring[i].irq,
+				      &eni_vdpa->vring[i]);
+			eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
+		}
+	}
+
+	if (eni_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) {
+		vp_legacy_config_vector(ldev, VIRTIO_MSI_NO_VECTOR);
+		devm_free_irq(&pdev->dev, eni_vdpa->config_irq, eni_vdpa);
+		eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
+	}
+
+	if (eni_vdpa->vectors) {
+		pci_free_irq_vectors(pdev);
+		eni_vdpa->vectors = 0;
+	}
+}
+
+static irqreturn_t eni_vdpa_vq_handler(int irq, void *arg)
+{
+	struct eni_vring *vring = arg;
+
+	if (vring->cb.callback)
+		return vring->cb.callback(vring->cb.private);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t eni_vdpa_config_handler(int irq, void *arg)
+{
+	struct eni_vdpa *eni_vdpa = arg;
+
+	if (eni_vdpa->config_cb.callback)
+		return eni_vdpa->config_cb.callback(eni_vdpa->config_cb.private);
+
+	return IRQ_HANDLED;
+}
+
+static int eni_vdpa_request_irq(struct eni_vdpa *eni_vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	struct pci_dev *pdev = ldev->pci_dev;
+	int i, ret, irq;
+	int queues = eni_vdpa->queues;
+	int vectors = queues + 1;
+
+	ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);
+	if (ret != vectors) {
+		ENI_ERR(pdev,
+			"failed to allocate irq vectors want %d but %d\n",
+			vectors, ret);
+		return ret;
+	}
+
+	eni_vdpa->vectors = vectors;
+
+	for (i = 0; i < queues; i++) {
+		snprintf(eni_vdpa->vring[i].msix_name, ENI_MSIX_NAME_SIZE,
+			 "eni-vdpa[%s]-%d\n", pci_name(pdev), i);
+		irq = pci_irq_vector(pdev, i);
+		ret = devm_request_irq(&pdev->dev, irq,
+				       eni_vdpa_vq_handler,
+				       0, eni_vdpa->vring[i].msix_name,
+				       &eni_vdpa->vring[i]);
+		if (ret) {
+			ENI_ERR(pdev, "failed to request irq for vq %d\n", i);
+			goto err;
+		}
+		vp_legacy_queue_vector(ldev, i, i);
+		eni_vdpa->vring[i].irq = irq;
+	}
+
+	snprintf(eni_vdpa->msix_name, ENI_MSIX_NAME_SIZE, "eni-vdpa[%s]-config\n",
+		 pci_name(pdev));
+	irq = pci_irq_vector(pdev, queues);
+	ret = devm_request_irq(&pdev->dev, irq, eni_vdpa_config_handler, 0,
+			       eni_vdpa->msix_name, eni_vdpa);
+	if (ret) {
+		ENI_ERR(pdev, "failed to request irq for config vq %d\n", i);
+		goto err;
+	}
+	vp_legacy_config_vector(ldev, queues);
+	eni_vdpa->config_irq = irq;
+
+	return 0;
+err:
+	eni_vdpa_free_irq(eni_vdpa);
+	return ret;
+}
+
+static void eni_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	u8 s = eni_vdpa_get_status(vdpa);
+
+	if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
+	    !(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
+		eni_vdpa_request_irq(eni_vdpa);
+	}
+
+	vp_legacy_set_status(ldev, status);
+
+	if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+	    (s & VIRTIO_CONFIG_S_DRIVER_OK))
+		eni_vdpa_free_irq(eni_vdpa);
+}
+
+static int eni_vdpa_reset(struct vdpa_device *vdpa)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	u8 s = eni_vdpa_get_status(vdpa);
+
+	vp_legacy_set_status(ldev, 0);
+
+	if (s & VIRTIO_CONFIG_S_DRIVER_OK)
+		eni_vdpa_free_irq(eni_vdpa);
+
+	return 0;
+}
+
+static u16 eni_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	return vp_legacy_get_queue_size(ldev, 0);
+}
+
+static u16 eni_vdpa_get_vq_num_min(struct vdpa_device *vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	return vp_legacy_get_queue_size(ldev, 0);
+}
+
+static int eni_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
+				struct vdpa_vq_state *state)
+{
+	return -EOPNOTSUPP;
+}
+
+static int eni_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
+				 const struct vdpa_vq_state *state)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+	const struct vdpa_vq_state_split *split = &state->split;
+
+	/* ENI is build upon virtio-pci specfication which not support
+	 * to set state of virtqueue. But if the state is equal to the
+	 * device initial state by chance, we can let it go.
+	 */
+	if (!vp_legacy_get_queue_enable(ldev, qid)
+	    && split->avail_index == 0)
+		return 0;
+
+	return -EOPNOTSUPP;
+}
+
+
+static void eni_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid,
+			       struct vdpa_callback *cb)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+
+	eni_vdpa->vring[qid].cb = *cb;
+}
+
+static void eni_vdpa_set_vq_ready(struct vdpa_device *vdpa, u16 qid,
+				  bool ready)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	/* ENI is a legacy virtio-pci device. This is not supported
+	 * by specification. But we can disable virtqueue by setting
+	 * address to 0.
+	 */
+	if (!ready)
+		vp_legacy_set_queue_address(ldev, qid, 0);
+}
+
+static bool eni_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	return vp_legacy_get_queue_enable(ldev, qid);
+}
+
+static void eni_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid,
+			       u32 num)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+	struct pci_dev *pdev = ldev->pci_dev;
+	u16 n = vp_legacy_get_queue_size(ldev, qid);
+
+	/* ENI is a legacy virtio-pci device which not allow to change
+	 * virtqueue size. Just report a error if someone tries to
+	 * change it.
+	 */
+	if (num != n)
+		ENI_ERR(pdev,
+			"not support to set vq %u fixed num %u to %u\n",
+			qid, n, num);
+}
+
+static int eni_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid,
+				   u64 desc_area, u64 driver_area,
+				   u64 device_area)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+	u32 pfn = desc_area >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+
+	vp_legacy_set_queue_address(ldev, qid, pfn);
+
+	return 0;
+}
+
+static void eni_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+
+	iowrite16(qid, eni_vdpa->vring[qid].notify);
+}
+
+static u32 eni_vdpa_get_device_id(struct vdpa_device *vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	return ldev->id.device;
+}
+
+static u32 eni_vdpa_get_vendor_id(struct vdpa_device *vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa);
+
+	return ldev->id.vendor;
+}
+
+static u32 eni_vdpa_get_vq_align(struct vdpa_device *vdpa)
+{
+	return VIRTIO_PCI_VRING_ALIGN;
+}
+
+static size_t eni_vdpa_get_config_size(struct vdpa_device *vdpa)
+{
+	return sizeof(struct virtio_net_config);
+}
+
+
+static void eni_vdpa_get_config(struct vdpa_device *vdpa,
+				unsigned int offset,
+				void *buf, unsigned int len)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	void __iomem *ioaddr = ldev->ioaddr +
+		VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) +
+		offset;
+	u8 *p = buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		*p++ = ioread8(ioaddr + i);
+}
+
+static void eni_vdpa_set_config(struct vdpa_device *vdpa,
+				unsigned int offset, const void *buf,
+				unsigned int len)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	void __iomem *ioaddr = ldev->ioaddr +
+		VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) +
+		offset;
+	const u8 *p = buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		iowrite8(*p++, ioaddr + i);
+}
+
+static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa,
+				   struct vdpa_callback *cb)
+{
+	struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa);
+
+	eni_vdpa->config_cb = *cb;
+}
+
+static const struct vdpa_config_ops eni_vdpa_ops = {
+	.get_features	= eni_vdpa_get_features,
+	.set_features	= eni_vdpa_set_features,
+	.get_status	= eni_vdpa_get_status,
+	.set_status	= eni_vdpa_set_status,
+	.reset		= eni_vdpa_reset,
+	.get_vq_num_max	= eni_vdpa_get_vq_num_max,
+	.get_vq_num_min	= eni_vdpa_get_vq_num_min,
+	.get_vq_state	= eni_vdpa_get_vq_state,
+	.set_vq_state	= eni_vdpa_set_vq_state,
+	.set_vq_cb	= eni_vdpa_set_vq_cb,
+	.set_vq_ready	= eni_vdpa_set_vq_ready,
+	.get_vq_ready	= eni_vdpa_get_vq_ready,
+	.set_vq_num	= eni_vdpa_set_vq_num,
+	.set_vq_address	= eni_vdpa_set_vq_address,
+	.kick_vq	= eni_vdpa_kick_vq,
+	.get_device_id	= eni_vdpa_get_device_id,
+	.get_vendor_id	= eni_vdpa_get_vendor_id,
+	.get_vq_align	= eni_vdpa_get_vq_align,
+	.get_config_size = eni_vdpa_get_config_size,
+	.get_config	= eni_vdpa_get_config,
+	.set_config	= eni_vdpa_set_config,
+	.set_config_cb  = eni_vdpa_set_config_cb,
+	.get_vq_irq	= eni_vdpa_get_vq_irq,
+};
+
+
+static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa)
+{
+	struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev;
+	u32 features = vp_legacy_get_features(ldev);
+	u16 num = 2;
+
+	if (features & BIT_ULL(VIRTIO_NET_F_MQ)) {
+		__virtio16 max_virtqueue_pairs;
+
+		eni_vdpa_get_config(&eni_vdpa->vdpa,
+			offsetof(struct virtio_net_config, max_virtqueue_pairs),
+			&max_virtqueue_pairs,
+			sizeof(max_virtqueue_pairs));
+		num = 2 * __virtio16_to_cpu(virtio_legacy_is_little_endian(),
+				max_virtqueue_pairs);
+	}
+
+	if (features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
+		num += 1;
+
+	return num;
+}
+
+static void eni_vdpa_free_irq_vectors(void *data)
+{
+	pci_free_irq_vectors(data);
+}
+
+static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct eni_vdpa *eni_vdpa;
+	struct virtio_pci_legacy_device *ldev;
+	int ret, i;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa,
+				     dev, &eni_vdpa_ops, NULL, false);
+	if (IS_ERR(eni_vdpa)) {
+		ENI_ERR(pdev, "failed to allocate vDPA structure\n");
+		return PTR_ERR(eni_vdpa);
+	}
+
+	ldev = &eni_vdpa->ldev;
+	ldev->pci_dev = pdev;
+
+	ret = vp_legacy_probe(ldev);
+	if (ret) {
+		ENI_ERR(pdev, "failed to probe legacy PCI device\n");
+		goto err;
+	}
+
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, eni_vdpa);
+
+	eni_vdpa->vdpa.dma_dev = &pdev->dev;
+	eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa);
+
+	ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev);
+	if (ret) {
+		ENI_ERR(pdev,
+			"failed for adding devres for freeing irq vectors\n");
+		goto err;
+	}
+
+	eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues,
+				      sizeof(*eni_vdpa->vring),
+				      GFP_KERNEL);
+	if (!eni_vdpa->vring) {
+		ret = -ENOMEM;
+		ENI_ERR(pdev, "failed to allocate virtqueues\n");
+		goto err;
+	}
+
+	for (i = 0; i < eni_vdpa->queues; i++) {
+		eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
+		eni_vdpa->vring[i].notify = ldev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
+	}
+	eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
+
+	ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues);
+	if (ret) {
+		ENI_ERR(pdev, "failed to register to vdpa bus\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	put_device(&eni_vdpa->vdpa.dev);
+	return ret;
+}
+
+static void eni_vdpa_remove(struct pci_dev *pdev)
+{
+	struct eni_vdpa *eni_vdpa = pci_get_drvdata(pdev);
+
+	vdpa_unregister_device(&eni_vdpa->vdpa);
+	vp_legacy_remove(&eni_vdpa->ldev);
+}
+
+static struct pci_device_id eni_pci_ids[] = {
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET,
+			 VIRTIO_TRANS_ID_NET,
+			 PCI_SUBVENDOR_ID_REDHAT_QUMRANET,
+			 VIRTIO_ID_NET) },
+	{ 0 },
+};
+
+static struct pci_driver eni_vdpa_driver = {
+	.name		= "alibaba-eni-vdpa",
+	.id_table	= eni_pci_ids,
+	.probe		= eni_vdpa_probe,
+	.remove		= eni_vdpa_remove,
+};
+
+module_pci_driver(eni_vdpa_driver);
+
+MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>");
+MODULE_DESCRIPTION("Alibaba ENI vDPA driver");
+MODULE_LICENSE("GPL v2");
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@ -499,7 +499,8 @@ static u32 get_dev_type(struct pci_dev *pdev)
 	return dev_type;
 }

-static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			      const struct vdpa_dev_set_config *config)
 {
 	struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
 	struct ifcvf_adapter *adapter;
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@ -63,7 +63,7 @@ struct mlx5_control_vq {
 	unsigned short head;
 };

-struct mlx5_ctrl_wq_ent {
+struct mlx5_vdpa_wq_ent {
 	struct work_struct work;
 	struct mlx5_vdpa_dev *mvdev;
 };
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@ -6,6 +6,7 @@
 #include <linux/vringh.h>
 #include <uapi/linux/virtio_net.h>
 #include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/vdpa.h>
 #include <linux/virtio_config.h>
 #include <linux/auxiliary_bus.h>
 #include <linux/mlx5/cq.h>
@ -157,10 +158,12 @@ struct mlx5_vdpa_net {
 	struct mutex reslock;
 	struct mlx5_flow_table *rxft;
 	struct mlx5_fc *rx_counter;
-	struct mlx5_flow_handle *rx_rule;
+	struct mlx5_flow_handle *rx_rule_ucast;
+	struct mlx5_flow_handle *rx_rule_mcast;
 	bool setup;
-	u16 mtu;
 	u32 cur_num_vqs;
+	struct notifier_block nb;
+	struct vdpa_callback config_cb;
 };

 static void free_resources(struct mlx5_vdpa_net *ndev);
@ -1381,21 +1384,33 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_spec *spec;
+	void *headers_c;
+	void *headers_v;
+	u8 *dmac_c;
+	u8 *dmac_v;
 	int err;

-	/* for now, one entry, match all, forward to tir */
-	ft_attr.max_fte = 1;
-	ft_attr.autogroup.max_num_groups = 1;
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	ft_attr.max_fte = 2;
+	ft_attr.autogroup.max_num_groups = 2;

 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
 	if (!ns) {
-		mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
-		return -EOPNOTSUPP;
+		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
+		err = -EOPNOTSUPP;
+		goto err_ns;
 	}

 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
-	if (IS_ERR(ndev->rxft))
-		return PTR_ERR(ndev->rxft);
+	if (IS_ERR(ndev->rxft)) {
+		err = PTR_ERR(ndev->rxft);
+		goto err_ns;
+	}

 	ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
 	if (IS_ERR(ndev->rx_counter)) {
@ -1403,37 +1418,64 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
 		goto err_fc;
 	}

+	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
+	memset(dmac_c, 0xff, ETH_ALEN);
+	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
+	ether_addr_copy(dmac_v, ndev->config.mac);
+
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
 	dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest[0].tir_num = ndev->res.tirn;
 	dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 	dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
-	ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
-	if (IS_ERR(ndev->rx_rule)) {
-		err = PTR_ERR(ndev->rx_rule);
-		ndev->rx_rule = NULL;
-		goto err_rule;
+	ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2);
+
+	if (IS_ERR(ndev->rx_rule_ucast)) {
+		err = PTR_ERR(ndev->rx_rule_ucast);
+		ndev->rx_rule_ucast = NULL;
+		goto err_rule_ucast;
 	}

+	memset(dmac_c, 0, ETH_ALEN);
+	memset(dmac_v, 0, ETH_ALEN);
+	dmac_c[0] = 1;
+	dmac_v[0] = 1;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1);
+	if (IS_ERR(ndev->rx_rule_mcast)) {
+		err = PTR_ERR(ndev->rx_rule_mcast);
+		ndev->rx_rule_mcast = NULL;
+		goto err_rule_mcast;
+	}
+
+	kvfree(spec);
 	return 0;

-err_rule:
+err_rule_mcast:
+	mlx5_del_flow_rules(ndev->rx_rule_ucast);
+	ndev->rx_rule_ucast = NULL;
+err_rule_ucast:
 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
 err_fc:
 	mlx5_destroy_flow_table(ndev->rxft);
+err_ns:
+	kvfree(spec);
 	return err;
 }

 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
 {
-	if (!ndev->rx_rule)
+	if (!ndev->rx_rule_ucast)
 		return;

-	mlx5_del_flow_rules(ndev->rx_rule);
+	mlx5_del_flow_rules(ndev->rx_rule_mcast);
+	ndev->rx_rule_mcast = NULL;
+	mlx5_del_flow_rules(ndev->rx_rule_ucast);
+	ndev->rx_rule_ucast = NULL;
 	mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
 	mlx5_destroy_flow_table(ndev->rxft);
-
-	ndev->rx_rule = NULL;
 }

 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
@ -1557,14 +1599,14 @@ static void mlx5_cvq_kick_handler(struct work_struct *work)
 {
 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 	struct virtio_net_ctrl_hdr ctrl;
-	struct mlx5_ctrl_wq_ent *wqent;
+	struct mlx5_vdpa_wq_ent *wqent;
 	struct mlx5_vdpa_dev *mvdev;
 	struct mlx5_control_vq *cvq;
 	struct mlx5_vdpa_net *ndev;
 	size_t read, write;
 	int err;

-	wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
+	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
 	mvdev = wqent->mvdev;
 	ndev = to_mlx5_vdpa_ndev(mvdev);
 	cvq = &mvdev->cvq;
@ -1616,7 +1658,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 	struct mlx5_vdpa_virtqueue *mvq;
-	struct mlx5_ctrl_wq_ent *wqent;
+	struct mlx5_vdpa_wq_ent *wqent;

 	if (!is_index_valid(mvdev, idx))
 		return;
@ -1852,6 +1894,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
 	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
+	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS);

 	print_features(mvdev, ndev->mvdev.mlx_features, false);
 	return ndev->mvdev.mlx_features;
@ -1942,16 +1985,16 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
 		return err;

 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
-	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
-	ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
 	update_cvq_info(mvdev);
 	return err;
 }

 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
 {
-	/* not implemented */
-	mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
+	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+
+	ndev->config_cb = *cb;
 }

 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
@ -2192,7 +2235,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 	clear_vqs_ready(ndev);
 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
 	ndev->mvdev.status = 0;
-	ndev->mvdev.mlx_features = 0;
 	memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
 	ndev->mvdev.actual_features = 0;
 	++mvdev->generation;
@ -2404,7 +2446,84 @@ struct mlx5_vdpa_mgmtdev {
 	struct mlx5_vdpa_net *ndev;
 };

-static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
+static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+	int err;
+
+	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
+	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+	MLX5_SET(query_vport_state_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+	if (err)
+		return 0;
+
+	return MLX5_GET(query_vport_state_out, out, state);
+}
+
+static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
+{
+	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
+	    VPORT_STATE_UP)
+		return true;
+
+	return false;
+}
+
+static void update_carrier(struct work_struct *work)
+{
+	struct mlx5_vdpa_wq_ent *wqent;
+	struct mlx5_vdpa_dev *mvdev;
+	struct mlx5_vdpa_net *ndev;
+
+	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
+	mvdev = wqent->mvdev;
+	ndev = to_mlx5_vdpa_ndev(mvdev);
+	if (get_link_state(mvdev))
+		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+	else
+		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+
+	if (ndev->config_cb.callback)
+		ndev->config_cb.callback(ndev->config_cb.private);
+
+	kfree(wqent);
+}
+
+static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
+{
+	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
+	struct mlx5_eqe *eqe = param;
+	int ret = NOTIFY_DONE;
+	struct mlx5_vdpa_wq_ent *wqent;
+
+	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+		switch (eqe->sub_type) {
+		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+			if (!wqent)
+				return NOTIFY_DONE;
+
+			wqent->mvdev = &ndev->mvdev;
+			INIT_WORK(&wqent->work, update_carrier);
+			queue_work(ndev->mvdev.wq, &wqent->work);
+			ret = NOTIFY_OK;
+			break;
+		default:
+			return NOTIFY_DONE;
+		}
+		return ret;
+	}
+	return ret;
+}
+
+static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
+			     const struct vdpa_dev_set_config *add_config)
 {
 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
 	struct virtio_net_config *config;
@ -2413,6 +2532,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	struct mlx5_vdpa_net *ndev;
 	struct mlx5_core_dev *mdev;
 	u32 max_vqs;
+	u16 mtu;
 	int err;

 	if (mgtdev->ndev)
@ -2440,13 +2560,24 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	init_mvqs(ndev);
 	mutex_init(&ndev->reslock);
 	config = &ndev->config;
-	err = query_mtu(mdev, &ndev->mtu);
+	err = query_mtu(mdev, &mtu);
 	if (err)
 		goto err_mtu;

-	err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
-	if (err)
-		goto err_mtu;
+	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
+
+	if (get_link_state(mvdev))
+		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+	else
+		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+
+	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
+		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
+	} else {
+		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
+		if (err)
+			goto err_mtu;
+	}

 	if (!is_zero_ether_addr(config->mac)) {
 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
@ -2473,12 +2604,14 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	if (err)
 		goto err_mr;

-	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
+	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
 	if (!mvdev->wq) {
 		err = -ENOMEM;
 		goto err_res2;
 	}

+	ndev->nb.notifier_call = event_handler;
+	mlx5_notifier_register(mdev, &ndev->nb);
 	ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
 	mvdev->vdev.mdev = &mgtdev->mgtdev;
 	err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
@ -2509,7 +2642,9 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
 {
 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
+	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);

+	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
 	destroy_workqueue(mvdev->wq);
 	_vdpa_unregister_device(dev);
 	mgtdev->ndev = NULL;
@ -2541,6 +2676,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
 	mgtdev->mgtdev.ops = &mdev_ops;
 	mgtdev->mgtdev.device = mdev->device;
 	mgtdev->mgtdev.id_table = id_table;
+	mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
 	mgtdev->madev = madev;

 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@ -14,6 +14,7 @@
 #include <uapi/linux/vdpa.h>
 #include <net/genetlink.h>
 #include <linux/mod_devicetable.h>
+#include <linux/virtio_ids.h>

 static LIST_HEAD(mdev_head);
 /* A global mutex that protects vdpa management device and device level operations. */
@ -26,8 +27,16 @@ static int vdpa_dev_probe(struct device *d)
 {
 	struct vdpa_device *vdev = dev_to_vdpa(d);
 	struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver);
+	const struct vdpa_config_ops *ops = vdev->config;
+	u32 max_num, min_num = 1;
 	int ret = 0;

+	max_num = ops->get_vq_num_max(vdev);
+	if (ops->get_vq_num_min)
+		min_num = ops->get_vq_num_min(vdev);
+	if (max_num < min_num)
+		return -EINVAL;
+
 	if (drv && drv->probe)
 		ret = drv->probe(vdev);

@ -58,6 +67,7 @@ static void vdpa_release_dev(struct device *d)
 		ops->free(vdev);

 	ida_simple_remove(&vdpa_index_ida, vdev->index);
+	mutex_destroy(&vdev->cf_mutex);
 	kfree(vdev);
 }

@ -119,6 +129,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	if (err)
 		goto err_name;

+	mutex_init(&vdev->cf_mutex);
 	device_initialize(&vdev->dev);

 	return vdev;
@ -289,6 +300,46 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
 }
 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);

+/**
+ * vdpa_get_config - Get one or more device configuration fields.
+ * @vdev: vdpa device to operate on
+ * @offset: starting byte offset of the field
+ * @buf: buffer pointer to read to
+ * @len: length of the configuration fields in bytes
+ */
+void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
+		     void *buf, unsigned int len)
+{
+	const struct vdpa_config_ops *ops = vdev->config;
+
+	mutex_lock(&vdev->cf_mutex);
+	/*
+	 * Config accesses aren't supposed to trigger before features are set.
+	 * If it does happen we assume a legacy guest.
+	 */
+	if (!vdev->features_valid)
+		vdpa_set_features(vdev, 0);
+	ops->get_config(vdev, offset, buf, len);
+	mutex_unlock(&vdev->cf_mutex);
+}
+EXPORT_SYMBOL_GPL(vdpa_get_config);
+
+/**
+ * vdpa_set_config - Set one or more device configuration fields.
+ * @vdev: vdpa device to operate on
+ * @offset: starting byte offset of the field
+ * @buf: buffer pointer to read from
+ * @length: length of the configuration fields in bytes
+ */
+void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset,
+		     const void *buf, unsigned int length)
+{
+	mutex_lock(&vdev->cf_mutex);
+	vdev->config->set_config(vdev, offset, buf, length);
+	mutex_unlock(&vdev->cf_mutex);
+}
+EXPORT_SYMBOL_GPL(vdpa_set_config);
+
 static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev,
 				 const char *busname, const char *devname)
 {
@ -428,9 +479,15 @@ out:
 	return msg->len;
 }

+#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
+				 (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
+
 static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
 {
+	struct vdpa_dev_set_config config = {};
+	struct nlattr **nl_attrs = info->attrs;
 	struct vdpa_mgmt_dev *mdev;
+	const u8 *macaddr;
 	const char *name;
 	int err = 0;

@ -439,6 +496,26 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i

 	name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);

+	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
+		macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
+		memcpy(config.net.mac, macaddr, sizeof(config.net.mac));
+		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
+	}
+	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) {
+		config.net.mtu =
+			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
+		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU);
+	}
+
+	/* Skip checking capability if user didn't prefer to configure any
+	 * device networking attributes. It is likely that user might have used
+	 * a device specific method to configure such attributes or using device
+	 * default attributes.
+	 */
+	if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) &&
+	    !netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
 	mutex_lock(&vdpa_dev_mutex);
 	mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
 	if (IS_ERR(mdev)) {
@ -446,8 +523,14 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
 		err = PTR_ERR(mdev);
 		goto err;
 	}
+	if ((config.mask & mdev->config_attr_mask) != config.mask) {
+		NL_SET_ERR_MSG_MOD(info->extack,
+				   "All provided attributes are not supported");
+		err = -EOPNOTSUPP;
+		goto err;
+	}

-	err = mdev->ops->dev_add(mdev, name);
+	err = mdev->ops->dev_add(mdev, name, &config);
 err:
 	mutex_unlock(&vdpa_dev_mutex);
 	return err;
@ -492,6 +575,7 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq
 	      int flags, struct netlink_ext_ack *extack)
 {
 	u16 max_vq_size;
+	u16 min_vq_size = 1;
 	u32 device_id;
 	u32 vendor_id;
 	void *hdr;
@ -508,6 +592,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq
 	device_id = vdev->config->get_device_id(vdev);
 	vendor_id = vdev->config->get_vendor_id(vdev);
 	max_vq_size = vdev->config->get_vq_num_max(vdev);
+	if (vdev->config->get_vq_num_min)
+		min_vq_size = vdev->config->get_vq_num_min(vdev);

 	err = -EMSGSIZE;
 	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev)))
@ -520,6 +606,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq
 		goto msg_err;
 	if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size))
 		goto msg_err;
+	if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size))
+		goto msg_err;

 	genlmsg_end(msg, hdr);
 	return 0;
@ -612,10 +700,175 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba
 	return msg->len;
 }

+static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev,
+				       struct sk_buff *msg, u64 features,
+				       const struct virtio_net_config *config)
+{
+	u16 val_u16;
+
+	if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0)
+		return 0;
+
+	val_u16 = le16_to_cpu(config->max_virtqueue_pairs);
+	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16);
+}
+
+static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg)
+{
+	struct virtio_net_config config = {};
+	u64 features;
+	u16 val_u16;
+
+	vdpa_get_config(vdev, 0, &config, sizeof(config));
+
+	if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac),
+		    config.mac))
+		return -EMSGSIZE;
+
+	val_u16 = le16_to_cpu(config.status);
+	if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
+		return -EMSGSIZE;
+
+	val_u16 = le16_to_cpu(config.mtu);
+	if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
+		return -EMSGSIZE;
+
+	features = vdev->config->get_features(vdev);
+
+	return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config);
+}
+
+static int
+vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
+		     int flags, struct netlink_ext_ack *extack)
+{
+	u32 device_id;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
+			  VDPA_CMD_DEV_CONFIG_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
+		err = -EMSGSIZE;
+		goto msg_err;
+	}
+
+	device_id = vdev->config->get_device_id(vdev);
+	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
+		err = -EMSGSIZE;
+		goto msg_err;
+	}
+
+	switch (device_id) {
+	case VIRTIO_ID_NET:
+		err = vdpa_dev_net_config_fill(vdev, msg);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	if (err)
+		goto msg_err;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+msg_err:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct vdpa_device *vdev;
+	struct sk_buff *msg;
+	const char *devname;
+	struct device *dev;
+	int err;
+
+	if (!info->attrs[VDPA_ATTR_DEV_NAME])
+		return -EINVAL;
+	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&vdpa_dev_mutex);
+	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
+	if (!dev) {
+		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
+		err = -ENODEV;
+		goto dev_err;
+	}
+	vdev = container_of(dev, struct vdpa_device, dev);
+	if (!vdev->mdev) {
+		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
+		err = -EINVAL;
+		goto mdev_err;
+	}
+	err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq,
+				   0, info->extack);
+	if (!err)
+		err = genlmsg_reply(msg, info);
+
+mdev_err:
+	put_device(dev);
+dev_err:
+	mutex_unlock(&vdpa_dev_mutex);
+	if (err)
+		nlmsg_free(msg);
+	return err;
+}
+
+static int vdpa_dev_config_dump(struct device *dev, void *data)
+{
+	struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
+	struct vdpa_dev_dump_info *info = data;
+	int err;
+
+	if (!vdev->mdev)
+		return 0;
+	if (info->idx < info->start_idx) {
+		info->idx++;
+		return 0;
+	}
+	err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
+				   info->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				   info->cb->extack);
+	if (err)
+		return err;
+
+	info->idx++;
+	return 0;
+}
+
+static int
+vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct vdpa_dev_dump_info info;
+
+	info.msg = msg;
+	info.cb = cb;
+	info.start_idx = cb->args[0];
+	info.idx = 0;
+
+	mutex_lock(&vdpa_dev_mutex);
+	bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump);
+	mutex_unlock(&vdpa_dev_mutex);
+	cb->args[0] = info.idx;
+	return msg->len;
+}
+
 static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
 	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
 	[VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
+	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
+	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
 };

 static const struct genl_ops vdpa_nl_ops[] = {
@ -643,6 +896,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
 		.doit = vdpa_nl_cmd_dev_get_doit,
 		.dumpit = vdpa_nl_cmd_dev_get_dumpit,
 	},
+	{
+		.cmd = VDPA_CMD_DEV_CONFIG_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = vdpa_nl_cmd_dev_config_get_doit,
+		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
+	},
 };

 static struct genl_family vdpa_nl_family __ro_after_init = {
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@ -248,7 +248,8 @@ static struct device vdpasim_blk_mgmtdev = {
 	.release = vdpasim_blk_mgmtdev_release,
 };

-static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			       const struct vdpa_dev_set_config *config)
 {
 	struct vdpasim_dev_attr dev_attr = {};
 	struct vdpasim *simdev;
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@ -16,6 +16,7 @@
 #include <linux/vringh.h>
 #include <linux/vdpa.h>
 #include <uapi/linux/virtio_net.h>
+#include <uapi/linux/vdpa.h>

 #include "vdpa_sim.h"

@ -29,12 +30,6 @@

 #define VDPASIM_NET_VQ_NUM	2

-static char *macaddr;
-module_param(macaddr, charp, 0);
-MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
-
-static u8 macaddr_buf[ETH_ALEN];
-
 static void vdpasim_net_work(struct work_struct *work)
 {
 	struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
@ -112,9 +107,21 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
 {
 	struct virtio_net_config *net_config = config;

-	net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
 	net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
-	memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
+}
+
+static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
+				     const struct vdpa_dev_set_config *config)
+{
+	struct virtio_net_config *vio_config = vdpasim->config;
+
+	if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR))
+		memcpy(vio_config->mac, config->net.mac, ETH_ALEN);
+	if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
+		vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu);
+	else
+		/* Setup default MTU to be 1500 */
+		vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
 }

 static void vdpasim_net_mgmtdev_release(struct device *dev)
@ -126,7 +133,8 @@ static struct device vdpasim_net_mgmtdev = {
 	.release = vdpasim_net_mgmtdev_release,
 };

-static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			       const struct vdpa_dev_set_config *config)
 {
 	struct vdpasim_dev_attr dev_attr = {};
 	struct vdpasim *simdev;
@ -146,6 +154,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
 	if (IS_ERR(simdev))
 		return PTR_ERR(simdev);

+	vdpasim_net_setup_config(simdev, config);
+
 	ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
 	if (ret)
 		goto reg_err;
@ -179,20 +189,14 @@ static struct vdpa_mgmt_dev mgmt_dev = {
 	.device = &vdpasim_net_mgmtdev,
 	.id_table = id_table,
 	.ops = &vdpasim_net_mgmtdev_ops,
+	.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
+			     1 << VDPA_ATTR_DEV_NET_CFG_MTU),
 };

 static int __init vdpasim_net_init(void)
 {
 	int ret;

-	if (macaddr) {
-		mac_pton(macaddr, macaddr_buf);
-		if (!is_valid_ether_addr(macaddr_buf))
-			return -EADDRNOTAVAIL;
-	} else {
-		eth_random_addr(macaddr_buf);
-	}
-
 	ret = device_register(&vdpasim_net_mgmtdev);
 	if (ret)
 		return ret;
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@ -1503,7 +1503,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
 	return 0;
 }

-static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			const struct vdpa_dev_set_config *config)
 {
 	struct vduse_dev *dev;
 	int ret;
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@ -76,6 +76,17 @@ static u8 vp_vdpa_get_status(struct vdpa_device *vdpa)
 	return vp_modern_get_status(mdev);
 }

+static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx)
+{
+	struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+	int irq = vp_vdpa->vring[idx].irq;
+
+	if (irq == VIRTIO_MSI_NO_VECTOR)
+		return -EINVAL;
+
+	return irq;
+}
+
 static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa)
 {
 	struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
@ -427,6 +438,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = {
 	.get_config	= vp_vdpa_get_config,
 	.set_config	= vp_vdpa_set_config,
 	.set_config_cb  = vp_vdpa_set_config_cb,
+	.get_vq_irq	= vp_vdpa_get_vq_irq,
 };

 static void vp_vdpa_free_irq_vectors(void *data)
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@ -237,7 +237,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 				  struct vhost_vdpa_config __user *c)
 {
 	struct vdpa_device *vdpa = v->vdpa;
-	const struct vdpa_config_ops *ops = vdpa->config;
 	struct vhost_vdpa_config config;
 	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 	u8 *buf;
@ -251,7 +250,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 	if (IS_ERR(buf))
 		return PTR_ERR(buf);

-	ops->set_config(vdpa, config.off, buf, config.len);
+	vdpa_set_config(vdpa, config.off, buf, config.len);

 	kvfree(buf);
 	return 0;
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@ -20,6 +20,15 @@ config VIRTIO_PCI_LIB
 	  PCI device with possible vendor specific extensions. Any
 	  module that selects this module must depend on PCI.

+config VIRTIO_PCI_LIB_LEGACY
+	tristate
+	help
+	  Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device)
+	  implementation.
+	  This module implements the basic probe and control for devices
+	  which are based on legacy PCI device. Any module that selects this
+	  module must depend on PCI.
+
 menuconfig VIRTIO_MENU
 	bool "Virtio drivers"
 	default y
@ -43,6 +52,7 @@ config VIRTIO_PCI_LEGACY
 	bool "Support for legacy virtio draft 0.9.X and older devices"
 	default y
 	depends on VIRTIO_PCI
+	select VIRTIO_PCI_LIB_LEGACY
 	help
          Virtio PCI Card 0.9.X Draft (circa 2014) and older device support.

--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
 obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o
+obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@ -24,17 +24,46 @@ MODULE_PARM_DESC(force_legacy,
 		 "Force legacy mode for transitional virtio 1 devices");
 #endif

-/* wait for pending irq handlers */
-void vp_synchronize_vectors(struct virtio_device *vdev)
+/* disable irq handlers */
+void vp_disable_cbs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i;

-	if (vp_dev->intx_enabled)
+	if (vp_dev->intx_enabled) {
+		/*
+		 * The below synchronize() guarantees that any
+		 * interrupt for this line arriving after
+		 * synchronize_irq() has completed is guaranteed to see
+		 * intx_soft_enabled == false.
+		 */
+		WRITE_ONCE(vp_dev->intx_soft_enabled, false);
 		synchronize_irq(vp_dev->pci_dev->irq);
+	}

 	for (i = 0; i < vp_dev->msix_vectors; ++i)
-		synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
+		disable_irq(pci_irq_vector(vp_dev->pci_dev, i));
+}
+
+/* enable irq handlers */
+void vp_enable_cbs(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled) {
+		disable_irq(vp_dev->pci_dev->irq);
+		/*
+		 * The above disable_irq() provides TSO ordering and
+		 * as such promotes the below store to store-release.
+		 */
+		WRITE_ONCE(vp_dev->intx_soft_enabled, true);
+		enable_irq(vp_dev->pci_dev->irq);
+		return;
+	}
+
+	for (i = 0; i < vp_dev->msix_vectors; ++i)
+		enable_irq(pci_irq_vector(vp_dev->pci_dev, i));
 }

 /* the notify function used when creating a virt queue */
@ -84,6 +113,9 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	struct virtio_pci_device *vp_dev = opaque;
 	u8 isr;

+	if (!READ_ONCE(vp_dev->intx_soft_enabled))
+		return IRQ_NONE;
+
 	/* reading the ISR has the effect of also clearing it so it's very
 	 * important to save off the value. */
 	isr = ioread8(vp_dev->isr);
@ -141,7 +173,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 		 "%s-config", name);
 	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
-			  vp_config_changed, 0, vp_dev->msix_names[v],
+			  vp_config_changed, IRQF_NO_AUTOEN,
+			  vp_dev->msix_names[v],
 			  vp_dev);
 	if (err)
 		goto error;
@ -160,7 +193,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 			 "%s-virtqueues", name);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
-				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
+				  vp_vring_interrupt, IRQF_NO_AUTOEN,
+				  vp_dev->msix_names[v],
 				  vp_dev);
 		if (err)
 			goto error;
@ -337,7 +371,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			 "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
-				  vring_interrupt, 0,
+				  vring_interrupt, IRQF_NO_AUTOEN,
 				  vp_dev->msix_names[msix_vec],
 				  vqs[i]);
 		if (err)
@ -549,6 +583,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,

 	pci_set_master(pci_dev);

+	vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
+
 	rc = register_virtio_device(&vp_dev->vdev);
 	reg_dev = vp_dev;
 	if (rc)
@ -557,10 +593,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	return 0;

 err_register:
-	if (vp_dev->ioaddr)
-	     virtio_pci_legacy_remove(vp_dev);
+	if (vp_dev->is_legacy)
+		virtio_pci_legacy_remove(vp_dev);
 	else
-	     virtio_pci_modern_remove(vp_dev);
+		virtio_pci_modern_remove(vp_dev);
 err_probe:
 	pci_disable_device(pci_dev);
 err_enable_device:
@ -587,7 +623,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)

 	unregister_virtio_device(&vp_dev->vdev);

-	if (vp_dev->ioaddr)
+	if (vp_dev->is_legacy)
 		virtio_pci_legacy_remove(vp_dev);
 	else
 		virtio_pci_modern_remove(vp_dev);
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@ -25,6 +25,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_pci.h>
+#include <linux/virtio_pci_legacy.h>
 #include <linux/virtio_pci_modern.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
@ -44,16 +45,14 @@ struct virtio_pci_vq_info {
 struct virtio_pci_device {
 	struct virtio_device vdev;
 	struct pci_dev *pci_dev;
+	struct virtio_pci_legacy_device ldev;
 	struct virtio_pci_modern_device mdev;

-	/* In legacy mode, these two point to within ->legacy. */
+	bool is_legacy;
+
 	/* Where to read and clear interrupt */
 	u8 __iomem *isr;

-	/* Legacy only field */
-	/* the IO mapping for the PCI config space */
-	void __iomem *ioaddr;
-
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
@ -64,6 +63,7 @@ struct virtio_pci_device {
 	/* MSI-X support */
 	int msix_enabled;
 	int intx_enabled;
+	bool intx_soft_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
@ -102,8 +102,10 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 	return container_of(vdev, struct virtio_pci_device, vdev);
 }

-/* wait for pending irq handlers */
-void vp_synchronize_vectors(struct virtio_device *vdev);
+/* disable irq handlers */
+void vp_disable_cbs(struct virtio_device *vdev);
+/* enable irq handlers */
+void vp_enable_cbs(struct virtio_device *vdev);
 /* the notify function used when creating a virt queue */
 bool vp_notify(struct virtqueue *vq);
 /* the config->del_vqs() implementation */
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@ -14,6 +14,7 @@
 *  Michael S. Tsirkin <mst@redhat.com>
 */

+#include "linux/virtio_pci_legacy.h"
 #include "virtio_pci_common.h"

 /* virtio config->get_features() implementation */
@ -23,7 +24,7 @@ static u64 vp_get_features(struct virtio_device *vdev)

 	/* When someone needs more than 32 feature bits, we'll need to
 	 * steal a bit to indicate that the rest are somewhere else. */
-	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+	return vp_legacy_get_features(&vp_dev->ldev);
 }

 /* virtio config->finalize_features() implementation */
@ -38,7 +39,7 @@ static int vp_finalize_features(struct virtio_device *vdev)
 	BUG_ON((u32)vdev->features != vdev->features);

 	/* We only support 32 feature bits. */
-	iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+	vp_legacy_set_features(&vp_dev->ldev, vdev->features);

 	return 0;
 }
@ -48,7 +49,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
+	void __iomem *ioaddr = vp_dev->ldev.ioaddr +
 			VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
 			offset;
 	u8 *ptr = buf;
@ -64,7 +65,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
+	void __iomem *ioaddr = vp_dev->ldev.ioaddr +
 			VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
 			offset;
 	const u8 *ptr = buf;
@ -78,7 +79,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 static u8 vp_get_status(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	return vp_legacy_get_status(&vp_dev->ldev);
 }

 static void vp_set_status(struct virtio_device *vdev, u8 status)
@ -86,28 +87,24 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* We should never be setting status to 0. */
 	BUG_ON(status == 0);
-	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	vp_legacy_set_status(&vp_dev->ldev, status);
 }

 static void vp_reset(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
-	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	vp_legacy_set_status(&vp_dev->ldev, 0);
 	/* Flush out the status write, and flush in device writes,
 	 * including MSi-X interrupts, if any. */
-	ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
-	/* Flush pending VQ/configuration callbacks. */
-	vp_synchronize_vectors(vdev);
+	vp_legacy_get_status(&vp_dev->ldev);
+	/* Disable VQ/configuration callbacks. */
+	vp_disable_cbs(vdev);
 }

 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 {
-	/* Setup the vector used for configuration events */
-	iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-	/* Verify we had enough resources to assign the vector */
-	/* Will also flush the write out to device */
-	return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+	return vp_legacy_config_vector(&vp_dev->ldev, vector);
 }

 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@ -123,12 +120,9 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	int err;
 	u64 q_pfn;

-	/* Select the queue we're interested in */
-	iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
 	/* Check if queue is either not available or already active. */
-	num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
-	if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
+	num = vp_legacy_get_queue_size(&vp_dev->ldev, index);
+	if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
 		return ERR_PTR(-ENOENT);

 	info->msix_vector = msix_vec;
@ -151,13 +145,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	}

 	/* activate the queue */
-	iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn);

-	vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
+	vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;

 	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-		iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec);
 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			err = -EBUSY;
 			goto out_deactivate;
@ -167,7 +160,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	return vq;

 out_deactivate:
-	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	vp_legacy_set_queue_address(&vp_dev->ldev, index, 0);
 out_del_vq:
 	vring_del_virtqueue(vq);
 	return ERR_PTR(err);
@ -178,22 +171,21 @@ static void del_vq(struct virtio_pci_vq_info *info)
 	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);

-	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
 	if (vp_dev->msix_enabled) {
-		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		vp_legacy_queue_vector(&vp_dev->ldev, vq->index,
+				VIRTIO_MSI_NO_VECTOR);
 		/* Flush the write out to device */
-		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+		ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR);
 	}

 	/* Select and deactivate the queue */
-	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0);

 	vring_del_virtqueue(vq);
 }

 static const struct virtio_config_ops virtio_pci_config_ops = {
+	.enable_cbs	= vp_enable_cbs,
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
@ -211,51 +203,18 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 /* the PCI probing function */
 int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
 {
+	struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
 	struct pci_dev *pci_dev = vp_dev->pci_dev;
 	int rc;

-	/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
-	if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
-		return -ENODEV;
+	ldev->pci_dev = pci_dev;

-	if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
-		printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
-		       VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
-		return -ENODEV;
-	}
-
-	rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
-	if (rc) {
-		rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
-	} else {
-		/*
-		 * The virtio ring base address is expressed as a 32-bit PFN,
-		 * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
-		 */
-		dma_set_coherent_mask(&pci_dev->dev,
-				DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
-	}
-
-	if (rc)
-		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
-
-	rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
+	rc = vp_legacy_probe(ldev);
 	if (rc)
 		return rc;

-	rc = -ENOMEM;
-	vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
-	if (!vp_dev->ioaddr)
-		goto err_iomap;
-
-	vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR;
-
-	/* we use the subsystem vendor/device id as the virtio vendor/device
-	 * id.  this allows us to use the same PCI vendor/device id for all
-	 * virtio devices and to identify the particular virtio driver by
-	 * the subsystem ids */
-	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
-	vp_dev->vdev.id.device = pci_dev->subsystem_device;
+	vp_dev->isr = ldev->isr;
+	vp_dev->vdev.id = ldev->id;

 	vp_dev->vdev.config = &virtio_pci_config_ops;

@ -264,16 +223,11 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
 	vp_dev->del_vq = del_vq;

 	return 0;
-
-err_iomap:
-	pci_release_region(pci_dev, 0);
-	return rc;
 }

 void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev)
 {
-	struct pci_dev *pci_dev = vp_dev->pci_dev;
+	struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;

-	pci_iounmap(pci_dev, vp_dev->ioaddr);
-	pci_release_region(pci_dev, 0);
+	vp_legacy_remove(ldev);
 }
--- a/drivers/virtio/virtio_pci_legacy_dev.c
+++ b/drivers/virtio/virtio_pci_legacy_dev.c
@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "linux/virtio_pci.h"
+#include <linux/virtio_pci_legacy.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+
+/*
+ * vp_legacy_probe: probe the legacy virtio pci device, note that the
+ * caller is required to enable PCI device before calling this function.
+ * @ldev: the legacy virtio-pci device
+ *
+ * Return 0 on succeed otherwise fail
+ */
+int vp_legacy_probe(struct virtio_pci_legacy_device *ldev)
+{
+	struct pci_dev *pci_dev = ldev->pci_dev;
+	int rc;
+
+	/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
+	if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
+		return -ENODEV;
+
+	if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION)
+		return -ENODEV;
+
+	rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+	} else {
+		/*
+		 * The virtio ring base address is expressed as a 32-bit PFN,
+		 * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
+		 */
+		dma_set_coherent_mask(&pci_dev->dev,
+				DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
+	}
+
+	if (rc)
+		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
+	rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
+	if (rc)
+		return rc;
+
+	ldev->ioaddr = pci_iomap(pci_dev, 0, 0);
+	if (!ldev->ioaddr)
+		goto err_iomap;
+
+	ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR;
+
+	ldev->id.vendor = pci_dev->subsystem_vendor;
+	ldev->id.device = pci_dev->subsystem_device;
+
+	return 0;
+err_iomap:
+	pci_release_region(pci_dev, 0);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(vp_legacy_probe);
+
+/*
+ * vp_legacy_probe: remove and cleanup the legacy virtio pci device
+ * @ldev: the legacy virtio-pci device
+ */
+void vp_legacy_remove(struct virtio_pci_legacy_device *ldev)
+{
+	struct pci_dev *pci_dev = ldev->pci_dev;
+
+	pci_iounmap(pci_dev, ldev->ioaddr);
+	pci_release_region(pci_dev, 0);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_remove);
+
+/*
+ * vp_legacy_get_features - get features from device
+ * @ldev: the legacy virtio-pci device
+ *
+ * Returns the features read from the device
+ */
+u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev)
+{
+
+	return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_features);
+
+/*
+ * vp_legacy_get_driver_features - get driver features from device
+ * @ldev: the legacy virtio-pci device
+ *
+ * Returns the driver features read from the device
+ */
+u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev)
+{
+	return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features);
+
+/*
+ * vp_legacy_set_features - set features to device
+ * @ldev: the legacy virtio-pci device
+ * @features: the features set to device
+ */
+void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev,
+			    u32 features)
+{
+	iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_set_features);
+
+/*
+ * vp_legacy_get_status - get the device status
+ * @ldev: the legacy virtio-pci device
+ *
+ * Returns the status read from device
+ */
+u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev)
+{
+	return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_status);
+
+/*
+ * vp_legacy_set_status - set status to device
+ * @ldev: the legacy virtio-pci device
+ * @status: the status set to device
+ */
+void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev,
+				 u8 status)
+{
+	iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_set_status);
+
+/*
+ * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue
+ * @ldev: the legacy virtio-pci device
+ * @index: queue index
+ * @vector: the config vector
+ *
+ * Returns the config vector read from the device
+ */
+u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev,
+			   u16 index, u16 vector)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+	/* Flush the write out to device */
+	return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_queue_vector);
+
+/*
+ * vp_legacy_config_vector - set the vector for config interrupt
+ * @ldev: the legacy virtio-pci device
+ * @vector: the config vector
+ *
+ * Returns the config vector read from the device
+ */
+u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev,
+			    u16 vector)
+{
+	/* Setup the vector used for configuration events */
+	iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+	/* Verify we had enough resources to assign the vector */
+	/* Will also flush the write out to device */
+	return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_config_vector);
+
+/*
+ * vp_legacy_set_queue_address - set the virtqueue address
+ * @ldev: the legacy virtio-pci device
+ * @index: the queue index
+ * @queue_pfn: pfn of the virtqueue
+ */
+void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
+			     u16 index, u32 queue_pfn)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address);
+
+/*
+ * vp_legacy_get_queue_enable - enable a virtqueue
+ * @ldev: the legacy virtio-pci device
+ * @index: the queue index
+ *
+ * Returns whether a virtqueue is enabled or not
+ */
+bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
+				u16 index)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable);
+
+/*
+ * vp_legacy_get_queue_size - get size for a virtqueue
+ * @ldev: the legacy virtio-pci device
+ * @index: the queue index
+ *
+ * Returns the size of the virtqueue
+ */
+u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
+			     u16 index)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size);
+
+MODULE_VERSION("0.1");
+MODULE_DESCRIPTION("Legacy Virtio PCI Device");
+MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>");
+MODULE_LICENSE("GPL");
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev)
 	 */
 	while (vp_modern_get_status(mdev))
 		msleep(1);
-	/* Flush pending VQ/configuration callbacks. */
-	vp_synchronize_vectors(vdev);
+	/* Disable VQ/configuration callbacks. */
+	vp_disable_cbs(vdev);
 }

 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
@ -380,6 +380,7 @@ static bool vp_get_shm_region(struct virtio_device *vdev,
 }

 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
+	.enable_cbs	= vp_enable_cbs,
 	.get		= NULL,
 	.set		= NULL,
 	.generation	= vp_generation,
@ -397,6 +398,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 };

 static const struct virtio_config_ops virtio_pci_config_ops = {
+	.enable_cbs	= vp_enable_cbs,
 	.get		= vp_get,
 	.set		= vp_set,
 	.generation	= vp_generation,
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@ -14,6 +14,9 @@
 #include <linux/spinlock.h>
 #include <xen/xen.h>

+static bool force_used_validation = false;
+module_param(force_used_validation, bool, 0444);
+
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
 #define BAD_RING(_vq, fmt, args...)				\
@ -79,8 +82,8 @@ struct vring_desc_state_packed {
 };

 struct vring_desc_extra {
-	dma_addr_t addr;		/* Buffer DMA addr. */
-	u32 len;			/* Buffer length. */
+	dma_addr_t addr;		/* Descriptor DMA addr. */
+	u32 len;			/* Descriptor length. */
 	u16 flags;			/* Descriptor flags. */
 	u16 next;			/* The next desc state in a list. */
 };
@ -182,6 +185,9 @@ struct vring_virtqueue {
 		} packed;
 	};

+	/* Per-descriptor in buffer length */
+	u32 *buflen;
+
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);

@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	unsigned int i, n, avail, descs_used, prev, err_idx;
 	int head;
 	bool indirect;
+	u32 buflen = 0;

 	START_USE(vq);

@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
 						     indirect);
+			buflen += sg->length;
 		}
 	}
 	/* Last one doesn't continue. */
@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	else
 		vq->split.desc_state[head].indir_desc = ctx;

+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[head] = buflen;
+
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
+	if (vq->buflen && unlikely(*len > vq->buflen[i])) {
+		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
+			*len, vq->buflen[i]);
+		return NULL;
+	}

 	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->split.desc_state[i].data;
@ -1050,21 +1067,24 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
 }

 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
-				       struct scatterlist *sgs[],
-				       unsigned int total_sg,
-				       unsigned int out_sgs,
-				       unsigned int in_sgs,
-				       void *data,
-				       gfp_t gfp)
+					 struct scatterlist *sgs[],
+					 unsigned int total_sg,
+					 unsigned int out_sgs,
+					 unsigned int in_sgs,
+					 void *data,
+					 gfp_t gfp)
 {
 	struct vring_packed_desc *desc;
 	struct scatterlist *sg;
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
+	u32 buflen = 0;

 	head = vq->packed.next_avail_idx;
 	desc = alloc_indirect_packed(total_sg, gfp);
+	if (!desc)
+		return -ENOMEM;

 	if (unlikely(vq->vq.num_free < 1)) {
 		pr_debug("Can't add buf len 1 - avail = 0\n");
@ -1089,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 			desc[i].addr = cpu_to_le64(addr);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
+			if (n >= out_sgs)
+				buflen += sg->length;
 		}
 	}

@ -1142,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;

+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[id] = buflen;
+
 	vq->num_added += 1;

 	pr_debug("Added buffer head %i to %p\n", head, vq);
@ -1176,6 +1202,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	unsigned int i, n, c, descs_used, err_idx;
 	__le16 head_flags, flags;
 	u16 head, id, prev, curr, avail_used_flags;
+	int err;
+	u32 buflen = 0;

 	START_USE(vq);

@ -1191,9 +1219,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,

 	BUG_ON(total_sg == 0);

-	if (virtqueue_use_indirect(_vq, total_sg))
-		return virtqueue_add_indirect_packed(vq, sgs, total_sg,
-				out_sgs, in_sgs, data, gfp);
+	if (virtqueue_use_indirect(_vq, total_sg)) {
+		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
+						    in_sgs, data, gfp);
+		if (err != -ENOMEM)
+			return err;
+
+		/* fall back on direct */
+	}

 	head = vq->packed.next_avail_idx;
 	avail_used_flags = vq->packed.avail_used_flags;
@ -1250,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 					1 << VRING_PACKED_DESC_F_AVAIL |
 					1 << VRING_PACKED_DESC_F_USED;
 			}
+			if (n >= out_sgs)
+				buflen += sg->length;
 		}
 	}

@ -1269,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;

+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[id] = buflen;
+
 	/*
 	 * A driver MUST NOT make the first descriptor in the list
 	 * available before all subsequent descriptors comprising
@ -1455,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", id);
 		return NULL;
 	}
+	if (vq->buflen && unlikely(*len > vq->buflen[id])) {
+		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
+			*len, vq->buflen[id]);
+		return NULL;
+	}

 	/* detach_buf_packed clears data, so grab it now. */
 	ret = vq->packed.desc_state[id].data;
@ -1660,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	struct vring_virtqueue *vq;
 	struct vring_packed_desc *ring;
 	struct vring_packed_desc_event *driver, *device;
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
 	size_t ring_size_in_bytes, event_size_in_bytes;

@ -1749,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	if (!vq->packed.desc_extra)
 		goto err_desc_extra;

+	if (!drv->suppress_used_validation || force_used_validation) {
+		vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
+					   GFP_KERNEL);
+		if (!vq->buflen)
+			goto err_buflen;
+	} else {
+		vq->buflen = NULL;
+	}
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@ -1761,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;

+err_buflen:
+	kfree(vq->packed.desc_extra);
 err_desc_extra:
 	kfree(vq->packed.desc_state);
 err_desc_state:
@ -2168,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					void (*callback)(struct virtqueue *),
 					const char *name)
 {
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	struct vring_virtqueue *vq;

 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@ -2227,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	if (!vq->split.desc_extra)
 		goto err_extra;

+	if (!drv->suppress_used_validation || force_used_validation) {
+		vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
+					   GFP_KERNEL);
+		if (!vq->buflen)
+			goto err_buflen;
+	} else {
+		vq->buflen = NULL;
+	}
+
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	memset(vq->split.desc_state, 0, vring.num *
@ -2237,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;

+err_buflen:
+	kfree(vq->split.desc_extra);
 err_extra:
 	kfree(vq->split.desc_state);
 err_state:
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@ -65,9 +65,8 @@ static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset,
 			    const void *buf, unsigned len)
 {
 	struct vdpa_device *vdpa = vd_get_vdpa(vdev);
-	const struct vdpa_config_ops *ops = vdpa->config;

-	ops->set_config(vdpa, offset, buf, len);
+	vdpa_set_config(vdpa, offset, buf, len);
 }

 static u32 virtio_vdpa_generation(struct virtio_device *vdev)
@ -145,7 +144,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
 	/* Assume split virtqueue, switch to packed if necessary */
 	struct vdpa_vq_state state = {0};
 	unsigned long flags;
-	u32 align, num;
+	u32 align, max_num, min_num = 1;
+	bool may_reduce_num = true;
 	int err;

 	if (!name)
@ -163,16 +163,21 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
 	if (!info)
 		return ERR_PTR(-ENOMEM);

-	num = ops->get_vq_num_max(vdpa);
-	if (num == 0) {
+	max_num = ops->get_vq_num_max(vdpa);
+	if (max_num == 0) {
 		err = -ENOENT;
 		goto error_new_virtqueue;
 	}

+	if (ops->get_vq_num_min)
+		min_num = ops->get_vq_num_min(vdpa);
+
+	may_reduce_num = (max_num == min_num) ? false : true;
+
 	/* Create the vring */
 	align = ops->get_vq_align(vdpa);
-	vq = vring_create_virtqueue(index, num, align, vdev,
-				    true, true, ctx,
+	vq = vring_create_virtqueue(index, max_num, align, vdev,
+				    true, may_reduce_num, ctx,
 				    virtio_vdpa_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@ -6,6 +6,8 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/vhost_iotlb.h>
+#include <linux/virtio_net.h>
+#include <linux/if_ether.h>

 /**
 * struct vdpa_calllback - vDPA callback definition.
@ -63,6 +65,7 @@ struct vdpa_mgmt_dev;
 * @dev: underlying device
 * @dma_dev: the actual device that is performing DMA
 * @config: the configuration ops for this device.
+ * @cf_mutex: Protects get and set access to configuration layout.
 * @index: device index
 * @features_valid: were features initialized? for legacy guests
 * @use_va: indicate whether virtual address must be used by this device
@ -74,6 +77,7 @@ struct vdpa_device {
 	struct device dev;
 	struct device *dma_dev;
 	const struct vdpa_config_ops *config;
+	struct mutex cf_mutex; /* Protects get/set config */
 	unsigned int index;
 	bool features_valid;
 	bool use_va;
@ -91,6 +95,14 @@ struct vdpa_iova_range {
 	u64 last;
 };

+struct vdpa_dev_set_config {
+	struct {
+		u8 mac[ETH_ALEN];
+		u16 mtu;
+	} net;
+	u64 mask;
+};
+
 /**
 * Corresponding file area for device memory mapping
 * @file: vma->vm_file for the mapping
@ -171,6 +183,9 @@ struct vdpa_map_file {
 * @get_vq_num_max:		Get the max size of virtqueue
 *				@vdev: vdpa device
 *				Returns u16: max size of virtqueue
+ * @get_vq_num_min:		Get the min size of virtqueue (optional)
+ *				@vdev: vdpa device
+ *				Returns u16: min size of virtqueue
 * @get_device_id:		Get virtio device id
 *				@vdev: vdpa device
 *				Returns u32: virtio device id
@ -257,7 +272,7 @@ struct vdpa_config_ops {
 	struct vdpa_notification_area
 	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
 	/* vq irq is not expected to be changed once DRIVER_OK is set */
-	int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx);
+	int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx);

 	/* Device ops */
 	u32 (*get_vq_align)(struct vdpa_device *vdev);
@ -266,6 +281,7 @@ struct vdpa_config_ops {
 	void (*set_config_cb)(struct vdpa_device *vdev,
 			      struct vdpa_callback *cb);
 	u16 (*get_vq_num_max)(struct vdpa_device *vdev);
+	u16 (*get_vq_num_min)(struct vdpa_device *vdev);
 	u32 (*get_device_id)(struct vdpa_device *vdev);
 	u32 (*get_vendor_id)(struct vdpa_device *vdev);
 	u8 (*get_status)(struct vdpa_device *vdev);
@ -382,26 +398,16 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
 	return ops->set_features(vdev, features);
 }

-static inline void vdpa_get_config(struct vdpa_device *vdev,
-				   unsigned int offset, void *buf,
-				   unsigned int len)
-{
-	const struct vdpa_config_ops *ops = vdev->config;
-
-	/*
-	 * Config accesses aren't supposed to trigger before features are set.
-	 * If it does happen we assume a legacy guest.
-	 */
-	if (!vdev->features_valid)
-		vdpa_set_features(vdev, 0);
-	ops->get_config(vdev, offset, buf, len);
-}
-
+void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
+		     void *buf, unsigned int len);
+void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
+		     const void *buf, unsigned int length);
 /**
 * struct vdpa_mgmtdev_ops - vdpa device ops
 * @dev_add: Add a vdpa device using alloc and register
 *	     @mdev: parent device to use for device addition
 *	     @name: name of the new vdpa device
+ *	     @config: config attributes to apply to the device under creation
 *	     Driver need to add a new device using _vdpa_register_device()
 *	     after fully initializing the vdpa device. Driver must return 0
 *	     on success or appropriate error code.
@ -412,14 +418,25 @@ static inline void vdpa_get_config(struct vdpa_device *vdev,
 *	     _vdpa_unregister_device().
 */
 struct vdpa_mgmtdev_ops {
-	int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
+	int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name,
+		       const struct vdpa_dev_set_config *config);
 	void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
 };

+/**
+ * struct vdpa_mgmt_dev - vdpa management device
+ * @device: Management parent device
+ * @ops: operations supported by management device
+ * @id_table: Pointer to device id table of supported ids
+ * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that
+ *		      management device support during dev_add callback
+ * @list: list entry
+ */
 struct vdpa_mgmt_dev {
 	struct device *device;
 	const struct vdpa_mgmtdev_ops *ops;
-	const struct virtio_device_id *id_table; /* supported ids */
+	const struct virtio_device_id *id_table;
+	u64 config_attr_mask;
 	struct list_head list;
 };

--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
 * @feature_table_size: number of entries in the feature table array.
 * @feature_table_legacy: same as feature_table but when working in legacy mode.
 * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @suppress_used_validation: set to not have core validate used length
 * @probe: the function to call when a device is found.  Returns 0 or -errno.
 * @scan: optional function to call after successful probe; intended
 *    for virtio-scsi to invoke a scan.
@ -168,6 +169,7 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
+	bool suppress_used_validation;
 	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@ -23,6 +23,8 @@ struct virtio_shm_region {
 *       any of @get/@set, @get_status/@set_status, or @get_features/
 *       @finalize_features are NOT safe to be called from an atomic
 *       context.
+ * @enable_cbs: enable the callbacks
+ *      vdev: the virtio_device
 * @get: read the value of a configuration field
 *	vdev: the virtio_device
 *	offset: the offset of the configuration field
@ -75,6 +77,7 @@ struct virtio_shm_region {
 */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
+	void (*enable_cbs)(struct virtio_device *vdev);
 	void (*get)(struct virtio_device *vdev, unsigned offset,
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
@ -229,6 +232,9 @@ void virtio_device_ready(struct virtio_device *dev)
 {
 	unsigned status = dev->config->get_status(dev);

+	if (dev->config->enable_cbs)
+                  dev->config->enable_cbs(dev);
+
 	BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
 	dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
 }
--- a/include/linux/virtio_pci_legacy.h
+++ b/include/linux/virtio_pci_legacy.h
@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_PCI_LEGACY_H
+#define _LINUX_VIRTIO_PCI_LEGACY_H
+
+#include "linux/mod_devicetable.h"
+#include <linux/pci.h>
+#include <linux/virtio_pci.h>
+
+struct virtio_pci_legacy_device {
+	struct pci_dev *pci_dev;
+
+	/* Where to read and clear interrupt */
+	u8 __iomem *isr;
+	/* The IO mapping for the PCI config space (legacy mode only) */
+	void __iomem *ioaddr;
+
+	struct virtio_device_id id;
+};
+
+u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev);
+u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev);
+void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev,
+			u32 features);
+u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev);
+void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev,
+			u8 status);
+u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev,
+			   u16 idx, u16 vector);
+u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev,
+		     u16 vector);
+void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
+			     u16 index, u32 queue_pfn);
+bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
+				u16 idx);
+void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev,
+			      u16 idx, u16 size);
+u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
+			     u16 idx);
+int vp_legacy_probe(struct virtio_pci_legacy_device *ldev);
+void vp_legacy_remove(struct virtio_pci_legacy_device *ldev);
+
+#endif
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@ -17,6 +17,7 @@ enum vdpa_command {
 	VDPA_CMD_DEV_NEW,
 	VDPA_CMD_DEV_DEL,
 	VDPA_CMD_DEV_GET,		/* can dump */
+	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
 };

 enum vdpa_attr {
@ -32,6 +33,12 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_VENDOR_ID,		/* u32 */
 	VDPA_ATTR_DEV_MAX_VQS,			/* u32 */
 	VDPA_ATTR_DEV_MAX_VQ_SIZE,		/* u16 */
+	VDPA_ATTR_DEV_MIN_VQ_SIZE,		/* u16 */
+
+	VDPA_ATTR_DEV_NET_CFG_MACADDR,		/* binary */
+	VDPA_ATTR_DEV_NET_STATUS,		/* u8 */
+	VDPA_ATTR_DEV_NET_CFG_MAX_VQP,		/* u16 */
+	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */

 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
--- a/include/uapi/linux/virtio_i2c.h
+++ b/include/uapi/linux/virtio_i2c.h
@ -11,9 +11,15 @@
 #include <linux/const.h>
 #include <linux/types.h>

+/* Virtio I2C Feature bits */
+#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST	0
+
 /* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */
 #define VIRTIO_I2C_FLAGS_FAIL_NEXT	_BITUL(0)

+/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */
+#define VIRTIO_I2C_FLAGS_M_RD		_BITUL(1)
+
 /**
 * struct virtio_i2c_out_hdr - the virtio I2C message OUT header
 * @addr: the controlled device address