linux-stable/drivers/crypto/virtio/virtio_crypto_common.h
zhenwei pi 977231e8d4 virtio-crypto: wait ctrl queue instead of busy polling
Originally, after submitting request into virtio crypto control
queue, the guest side polls the result from the virt queue. This
works like following:
    CPU0   CPU1               ...             CPUx  CPUy
     |      |                                  |     |
     \      \                                  /     /
      \--------spin_lock(&vcrypto->ctrl_lock)-------/
                           |
                 virtqueue add & kick
                           |
                  busy poll virtqueue
                           |
              spin_unlock(&vcrypto->ctrl_lock)
                          ...

There are two problems:
1, The queue depth is always 1, the performance of a virtio crypto
   device gets limited. Multi user processes share a single control
   queue, and hit spin lock race from control queue. Test on Intel
   Platinum 8260, a single worker gets ~35K/s create/close session
   operations, and 8 workers get ~40K/s operations with 800% CPU
   utilization.
2, The control request is supposed to get handled immediately, but
   in the current implementation of QEMU(v6.2), the vCPU thread kicks
   another thread to do this work, the latency also gets unstable.
   Tracking latency of virtio_crypto_alg_akcipher_close_session in 5s:
        usecs               : count     distribution
         0 -> 1          : 0        |                        |
         2 -> 3          : 7        |                        |
         4 -> 7          : 72       |                        |
         8 -> 15         : 186485   |************************|
        16 -> 31         : 687      |                        |
        32 -> 63         : 5        |                        |
        64 -> 127        : 3        |                        |
       128 -> 255        : 1        |                        |
       256 -> 511        : 0        |                        |
       512 -> 1023       : 0        |                        |
      1024 -> 2047       : 0        |                        |
      2048 -> 4095       : 0        |                        |
      4096 -> 8191       : 0        |                        |
      8192 -> 16383      : 2        |                        |
This means that a CPU may hold vcrypto->ctrl_lock as long as 8192~16383us.

To improve the performance of control queue, a request on control queue
waits completion instead of busy polling to reduce lock racing, and gets
completed by control queue callback.
    CPU0   CPU1               ...             CPUx  CPUy
     |      |                                  |     |
     \      \                                  /     /
      \--------spin_lock(&vcrypto->ctrl_lock)-------/
                           |
                 virtqueue add & kick
                           |
      ---------spin_unlock(&vcrypto->ctrl_lock)------
     /      /                                  \     \
     |      |                                  |     |
    wait   wait                               wait  wait

Test this patch, the guest side get ~200K/s operations with 300% CPU
utilization.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Message-Id: <20220506131627.180784-4-pizhenwei@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-05-31 12:45:09 -04:00

149 lines
4.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Common header for Virtio crypto device.
*
* Copyright 2016 HUAWEI TECHNOLOGIES CO., LTD.
*/
#ifndef _VIRTIO_CRYPTO_COMMON_H
#define _VIRTIO_CRYPTO_COMMON_H
#include <linux/virtio.h>
#include <linux/crypto.h>
#include <linux/spinlock.h>
#include <crypto/aead.h>
#include <crypto/aes.h>
#include <crypto/engine.h>
#include <uapi/linux/virtio_crypto.h>
/* Internal representation of a data virtqueue */
struct data_queue {
/* Virtqueue associated with this send _queue */
struct virtqueue *vq;
/* To protect the vq operations for the dataq */
spinlock_t lock;
/* Name of the tx queue: dataq.$index */
char name[32];
struct crypto_engine *engine;
};
struct virtio_crypto {
struct virtio_device *vdev;
struct virtqueue *ctrl_vq;
struct data_queue *data_vq;
/* To protect the vq operations for the controlq */
spinlock_t ctrl_lock;
/* Maximum of data queues supported by the device */
u32 max_data_queues;
/* Number of queue currently used by the driver */
u32 curr_queue;
/*
* Specifies the services mask which the device support,
* see VIRTIO_CRYPTO_SERVICE_*
*/
u32 crypto_services;
/* Detailed algorithms mask */
u32 cipher_algo_l;
u32 cipher_algo_h;
u32 hash_algo;
u32 mac_algo_l;
u32 mac_algo_h;
u32 aead_algo;
u32 akcipher_algo;
/* Maximum length of cipher key */
u32 max_cipher_key_len;
/* Maximum length of authenticated key */
u32 max_auth_key_len;
/* Maximum size of per request */
u64 max_size;
unsigned long status;
atomic_t ref_count;
struct list_head list;
struct module *owner;
uint8_t dev_id;
/* Does the affinity hint is set for virtqueues? */
bool affinity_hint_set;
};
struct virtio_crypto_sym_session_info {
/* Backend session id, which come from the host side */
__u64 session_id;
};
/*
* Note: there are padding fields in request, clear them to zero before
* sending to host to avoid to divulge any information.
* Ex, virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48]
*/
struct virtio_crypto_ctrl_request {
struct virtio_crypto_op_ctrl_req ctrl;
struct virtio_crypto_session_input input;
struct virtio_crypto_inhdr ctrl_status;
struct completion compl;
};
struct virtio_crypto_request;
typedef void (*virtio_crypto_data_callback)
(struct virtio_crypto_request *vc_req, int len);
struct virtio_crypto_request {
uint8_t status;
struct virtio_crypto_op_data_req *req_data;
struct scatterlist **sgs;
struct data_queue *dataq;
virtio_crypto_data_callback alg_cb;
};
int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev);
struct list_head *virtcrypto_devmgr_get_head(void);
void virtcrypto_devmgr_rm_dev(struct virtio_crypto *vcrypto_dev);
struct virtio_crypto *virtcrypto_devmgr_get_first(void);
int virtcrypto_dev_in_use(struct virtio_crypto *vcrypto_dev);
int virtcrypto_dev_get(struct virtio_crypto *vcrypto_dev);
void virtcrypto_dev_put(struct virtio_crypto *vcrypto_dev);
int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev);
bool virtcrypto_algo_is_supported(struct virtio_crypto *vcrypto_dev,
uint32_t service,
uint32_t algo);
struct virtio_crypto *virtcrypto_get_dev_node(int node,
uint32_t service,
uint32_t algo);
int virtcrypto_dev_start(struct virtio_crypto *vcrypto);
void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
int virtio_crypto_skcipher_crypt_req(
struct crypto_engine *engine, void *vreq);
void
virtcrypto_clear_request(struct virtio_crypto_request *vc_req);
static inline int virtio_crypto_get_current_node(void)
{
int cpu, node;
cpu = get_cpu();
node = topology_physical_package_id(cpu);
put_cpu();
return node;
}
int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto);
void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto);
int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto);
void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto);
int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[],
unsigned int out_sgs, unsigned int in_sgs,
struct virtio_crypto_ctrl_request *vc_ctrl_req);
#endif /* _VIRTIO_CRYPTO_COMMON_H */