- SGX2 ISA support which makes enclave memory management much more

dynamic.  For instance, enclaves can now change enclave page
    permissions on the fly.
  - Removal of an unused structure member
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmLq2M8ACgkQaDWVMHDJ
 krCbAw/+J4nHXxZNMQQX1c8CYJ7XHIr+YtsqNFYwH58rJJstHO/YwQf+mesVOeeu
 08BYn+T5cdAbShKcxdkowPB17S6w/WzACtUfVhaoRQC7Md40cBiyc45UiC2e1u9g
 W3Osk5+fTVcSYA9WiizPntIQkjVs9e7hcNKjTyVPnSw8W8mFCLg+ZiPb7YvKERTO
 o8Wi2+zzX1BGDNOyBEqvnstz9uXDbCbFUTYX6zToBUk+Y1ZPXHwuHgNTtrAqGYaL
 qyi0O2zoWnfOUmplzjJ/1aPmzPJDPgDNImC+gjTpYXGmg05Ryds+VZAc64IIjqYn
 K+/5674PZFdsp5/YfctubdsQm0l0xen94sccAacd7KfsVurcHs3E2bdQPDw0htxv
 svCX0Sai/qv52tPNzw+n9EJRcQsiwd9Pn0rWwx2i8hQcgMFiwCus6DBKhU7uh2Jp
 oTwlspqJy2NHu9bici78tmsOio9CORjrh1WOfWX+yHEux4dtQAl889Gw5qzId6V1
 Bh1MgoAu/pQ78feo96f3h5yOultOtpbTGyXEC8t4MTSpIVgZ2NzfUxe4RhOCBnhA
 kdftVNfZLGOzwBbgFy0gYTe/ukt1DkP4BNHQilf2I+bUP/kZFlN8wfxBipWzr0bs
 Skrz4+brBIaTdGoFgzhgt3g5YH16DSasmy/HCkIeV7eaAHFRLoE=
 =Y7YA
 -----END PGP SIGNATURE-----

Merge tag 'x86_sgx_for_v6.0-2022-08-03.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SGX updates from Dave Hansen:
 "A set of x86/sgx changes focused on implementing the "SGX2" features,
  plus a minor cleanup:

   - SGX2 ISA support which makes enclave memory management much more
     dynamic. For instance, enclaves can now change enclave page
     permissions on the fly.

   - Removal of an unused structure member"

* tag 'x86_sgx_for_v6.0-2022-08-03.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits)
  x86/sgx: Drop 'page_index' from sgx_backing
  selftests/sgx: Page removal stress test
  selftests/sgx: Test reclaiming of untouched page
  selftests/sgx: Test invalid access to removed enclave page
  selftests/sgx: Test faulty enclave behavior
  selftests/sgx: Test complete changing of page type flow
  selftests/sgx: Introduce TCS initialization enclave operation
  selftests/sgx: Introduce dynamic entry point
  selftests/sgx: Test two different SGX2 EAUG flows
  selftests/sgx: Add test for TCS page permission changes
  selftests/sgx: Add test for EPCM permission changes
  Documentation/x86: Introduce enclave runtime management section
  x86/sgx: Free up EPC pages directly to support large page ranges
  x86/sgx: Support complete page removal
  x86/sgx: Support modifying SGX page type
  x86/sgx: Tighten accessible memory range after enclave initialization
  x86/sgx: Support adding of pages to an initialized enclave
  x86/sgx: Support restricting of enclave page permissions
  x86/sgx: Support VA page allocation without reclaiming
  x86/sgx: Export sgx_encl_page_alloc()
  ...
This commit is contained in:
Linus Torvalds 2022-08-05 10:47:40 -07:00
commit 9e2f402336
15 changed files with 2627 additions and 130 deletions

View file

@ -100,6 +100,21 @@ pages and establish enclave page permissions.
sgx_ioc_enclave_init
sgx_ioc_enclave_provision
Enclave runtime management
--------------------------
Systems supporting SGX2 additionally support changes to initialized
enclaves: modifying enclave page permissions and type, and dynamically
adding and removing of enclave pages. When an enclave accesses an address
within its address range that does not have a backing page then a new
regular page will be dynamically added to the enclave. The enclave is
still required to run EACCEPT on the new page before it can be used.
.. kernel-doc:: arch/x86/kernel/cpu/sgx/ioctl.c
:functions: sgx_ioc_enclave_restrict_permissions
sgx_ioc_enclave_modify_types
sgx_ioc_enclave_remove_pages
Enclave vDSO
------------

View file

@ -65,17 +65,22 @@ enum sgx_encls_function {
/**
* enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
* %SGX_EPC_PAGE_CONFLICT: Page is being written by other ENCLS function.
* %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
* been completed yet.
* %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
* %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
* public key does not match IA32_SGXLEPUBKEYHASH.
* %SGX_PAGE_NOT_MODIFIABLE: The EPC page cannot be modified because it
* is in the PENDING or MODIFIED state.
* %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
*/
enum sgx_return_code {
SGX_EPC_PAGE_CONFLICT = 7,
SGX_NOT_TRACKED = 11,
SGX_CHILD_PRESENT = 13,
SGX_INVALID_EINITTOKEN = 16,
SGX_PAGE_NOT_MODIFIABLE = 20,
SGX_UNMASKED_EVENT = 128,
};
@ -234,6 +239,9 @@ struct sgx_pageinfo {
* %SGX_PAGE_TYPE_REG: a regular page
* %SGX_PAGE_TYPE_VA: a VA page
* %SGX_PAGE_TYPE_TRIM: a page in trimmed state
*
* Make sure when making changes to this enum that its values can still fit
* in the bitfield within &struct sgx_encl_page
*/
enum sgx_page_type {
SGX_PAGE_TYPE_SECS,

View file

@ -29,6 +29,12 @@ enum sgx_page_flags {
_IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision)
#define SGX_IOC_VEPC_REMOVE_ALL \
_IO(SGX_MAGIC, 0x04)
#define SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS \
_IOWR(SGX_MAGIC, 0x05, struct sgx_enclave_restrict_permissions)
#define SGX_IOC_ENCLAVE_MODIFY_TYPES \
_IOWR(SGX_MAGIC, 0x06, struct sgx_enclave_modify_types)
#define SGX_IOC_ENCLAVE_REMOVE_PAGES \
_IOWR(SGX_MAGIC, 0x07, struct sgx_enclave_remove_pages)
/**
* struct sgx_enclave_create - parameter structure for the
@ -76,6 +82,62 @@ struct sgx_enclave_provision {
__u64 fd;
};
/**
* struct sgx_enclave_restrict_permissions - parameters for ioctl
* %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
* @offset: starting page offset (page aligned relative to enclave base
* address defined in SECS)
* @length: length of memory (multiple of the page size)
* @permissions:new permission bits for pages in range described by @offset
* and @length
* @result: (output) SGX result code of ENCLS[EMODPR] function
* @count: (output) bytes successfully changed (multiple of page size)
*/
struct sgx_enclave_restrict_permissions {
__u64 offset;
__u64 length;
__u64 permissions;
__u64 result;
__u64 count;
};
/**
* struct sgx_enclave_modify_types - parameters for ioctl
* %SGX_IOC_ENCLAVE_MODIFY_TYPES
* @offset: starting page offset (page aligned relative to enclave base
* address defined in SECS)
* @length: length of memory (multiple of the page size)
* @page_type: new type for pages in range described by @offset and @length
* @result: (output) SGX result code of ENCLS[EMODT] function
* @count: (output) bytes successfully changed (multiple of page size)
*/
struct sgx_enclave_modify_types {
__u64 offset;
__u64 length;
__u64 page_type;
__u64 result;
__u64 count;
};
/**
* struct sgx_enclave_remove_pages - %SGX_IOC_ENCLAVE_REMOVE_PAGES parameters
* @offset: starting page offset (page aligned relative to enclave base
* address defined in SECS)
* @length: length of memory (multiple of the page size)
* @count: (output) bytes successfully changed (multiple of page size)
*
* Regular (PT_REG) or TCS (PT_TCS) can be removed from an initialized
* enclave if the system supports SGX2. First, the %SGX_IOC_ENCLAVE_MODIFY_TYPES
* ioctl() should be used to change the page type to PT_TRIM. After that
* succeeds ENCLU[EACCEPT] should be run from within the enclave and then
* %SGX_IOC_ENCLAVE_REMOVE_PAGES can be used to complete the page removal.
*/
struct sgx_enclave_remove_pages {
__u64 offset;
__u64 length;
__u64 count;
};
struct sgx_enclave_run;
/**

View file

@ -232,25 +232,10 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
return epc_page;
}
static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
unsigned long addr,
unsigned long vm_flags)
static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
struct sgx_encl_page *entry)
{
unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
struct sgx_epc_page *epc_page;
struct sgx_encl_page *entry;
entry = xa_load(&encl->page_array, PFN_DOWN(addr));
if (!entry)
return ERR_PTR(-EFAULT);
/*
* Verify that the faulted page has equal or higher build time
* permissions than the VMA permissions (i.e. the subset of {VM_READ,
* VM_WRITE, VM_EXECUTE} in vma->vm_flags).
*/
if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
return ERR_PTR(-EFAULT);
/* Entry successfully located. */
if (entry->epc_page) {
@ -276,6 +261,146 @@ static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
return entry;
}
static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
unsigned long addr,
unsigned long vm_flags)
{
unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
struct sgx_encl_page *entry;
entry = xa_load(&encl->page_array, PFN_DOWN(addr));
if (!entry)
return ERR_PTR(-EFAULT);
/*
* Verify that the page has equal or higher build time
* permissions than the VMA permissions (i.e. the subset of {VM_READ,
* VM_WRITE, VM_EXECUTE} in vma->vm_flags).
*/
if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
return ERR_PTR(-EFAULT);
return __sgx_encl_load_page(encl, entry);
}
struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
unsigned long addr)
{
struct sgx_encl_page *entry;
entry = xa_load(&encl->page_array, PFN_DOWN(addr));
if (!entry)
return ERR_PTR(-EFAULT);
return __sgx_encl_load_page(encl, entry);
}
/**
* sgx_encl_eaug_page() - Dynamically add page to initialized enclave
* @vma: VMA obtained from fault info from where page is accessed
* @encl: enclave accessing the page
* @addr: address that triggered the page fault
*
* When an initialized enclave accesses a page with no backing EPC page
* on a SGX2 system then the EPC can be added dynamically via the SGX2
* ENCLS[EAUG] instruction.
*
* Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
* successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
*/
static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
struct sgx_encl *encl, unsigned long addr)
{
vm_fault_t vmret = VM_FAULT_SIGBUS;
struct sgx_pageinfo pginfo = {0};
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
struct sgx_va_page *va_page;
unsigned long phys_addr;
u64 secinfo_flags;
int ret;
if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
return VM_FAULT_SIGBUS;
/*
* Ignore internal permission checking for dynamically added pages.
* They matter only for data added during the pre-initialization
* phase. The enclave decides the permissions by the means of
* EACCEPT, EACCEPTCOPY and EMODPE.
*/
secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
if (IS_ERR(encl_page))
return VM_FAULT_OOM;
mutex_lock(&encl->lock);
epc_page = sgx_alloc_epc_page(encl_page, false);
if (IS_ERR(epc_page)) {
if (PTR_ERR(epc_page) == -EBUSY)
vmret = VM_FAULT_NOPAGE;
goto err_out_unlock;
}
va_page = sgx_encl_grow(encl, false);
if (IS_ERR(va_page))
goto err_out_epc;
if (va_page)
list_add(&va_page->list, &encl->va_pages);
ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
encl_page, GFP_KERNEL);
/*
* If ret == -EBUSY then page was created in another flow while
* running without encl->lock
*/
if (ret)
goto err_out_shrink;
pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.metadata = 0;
ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
if (ret)
goto err_out;
encl_page->encl = encl;
encl_page->epc_page = epc_page;
encl_page->type = SGX_PAGE_TYPE_REG;
encl->secs_child_cnt++;
sgx_mark_page_reclaimable(encl_page->epc_page);
phys_addr = sgx_get_epc_phys_addr(epc_page);
/*
* Do not undo everything when creating PTE entry fails - next #PF
* would find page ready for a PTE.
*/
vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
if (vmret != VM_FAULT_NOPAGE) {
mutex_unlock(&encl->lock);
return VM_FAULT_SIGBUS;
}
mutex_unlock(&encl->lock);
return VM_FAULT_NOPAGE;
err_out:
xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
err_out_shrink:
sgx_encl_shrink(encl, va_page);
err_out_epc:
sgx_encl_free_epc_page(epc_page);
err_out_unlock:
mutex_unlock(&encl->lock);
kfree(encl_page);
return vmret;
}
static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
{
unsigned long addr = (unsigned long)vmf->address;
@ -295,9 +420,20 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
if (unlikely(!encl))
return VM_FAULT_SIGBUS;
/*
* The page_array keeps track of all enclave pages, whether they
* are swapped out or not. If there is no entry for this page and
* the system supports SGX2 then it is possible to dynamically add
* a new enclave page. This is only possible for an initialized
* enclave that will be checked for right away.
*/
if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
(!xa_load(&encl->page_array, PFN_DOWN(addr))))
return sgx_encl_eaug_page(vma, encl, addr);
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
if (IS_ERR(entry)) {
mutex_unlock(&encl->lock);
@ -367,6 +503,11 @@ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
/* Disallow mapping outside enclave's address range. */
if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
(start < encl->base || end > encl->base + encl->size))
return -EACCES;
/*
* Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
* conflict with the enclave page permissions.
@ -445,7 +586,7 @@ static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
for ( ; ; ) {
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr, vm_flags);
entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
if (PTR_ERR(entry) != -EBUSY)
break;
@ -687,7 +828,7 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
spin_lock(&encl->mm_lock);
list_add_rcu(&encl_mm->list, &encl->mm_list);
/* Pairs with smp_rmb() in sgx_reclaimer_block(). */
/* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
smp_wmb();
encl->mm_list_version++;
spin_unlock(&encl->mm_lock);
@ -695,6 +836,73 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
return 0;
}
/**
* sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
* @encl: the enclave
*
* Some SGX functions require that no cached linear-to-physical address
* mappings are present before they can succeed. For example, ENCLS[EWB]
* copies a page from the enclave page cache to regular main memory but
* it fails if it cannot ensure that there are no cached
* linear-to-physical address mappings referring to the page.
*
* SGX hardware flushes all cached linear-to-physical mappings on a CPU
* when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
* Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
* address mappings are cleared but coordination with the tracking done within
* the SGX hardware is needed to support the SGX functions that depend on this
* cache clearing.
*
* When the ENCLS[ETRACK] function is issued on an enclave the hardware
* tracks threads operating inside the enclave at that time. The SGX
* hardware tracking require that all the identified threads must have
* exited the enclave in order to flush the mappings before a function such
* as ENCLS[EWB] will be permitted
*
* The following flow is used to support SGX functions that require that
* no cached linear-to-physical address mappings are present:
* 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
* 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
* accessing the enclave.
* 3) Send IPI to identified CPUs, kicking them out of the enclave and
* thus flushing all locally cached linear-to-physical address mappings.
* 4) Execute SGX function.
*
* Context: It is required to call this function after ENCLS[ETRACK].
* This will ensure that if any new mm appears (racing with
* sgx_encl_mm_add()) then the new mm will enter into the
* enclave with fresh linear-to-physical address mappings.
*
* It is required that all IPIs are completed before a new
* ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
* of the above flow with the enclave's mutex.
*
* Return: cpumask of CPUs that might be accessing @encl
*/
const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
{
cpumask_t *cpumask = &encl->cpumask;
struct sgx_encl_mm *encl_mm;
int idx;
cpumask_clear(cpumask);
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
mmput_async(encl_mm->mm);
}
srcu_read_unlock(&encl->srcu, idx);
return cpumask;
}
static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
pgoff_t index)
{
@ -735,7 +943,6 @@ static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
return PTR_ERR(pcmd);
}
backing->page_index = page_index;
backing->contents = contents;
backing->pcmd = pcmd;
backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
@ -902,8 +1109,85 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
return ret;
}
struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
unsigned long offset,
u64 secinfo_flags)
{
struct sgx_encl_page *encl_page;
unsigned long prot;
encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
if (!encl_page)
return ERR_PTR(-ENOMEM);
encl_page->desc = encl->base + offset;
encl_page->encl = encl;
prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
_calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
_calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
/*
* TCS pages must always RW set for CPU access while the SECINFO
* permissions are *always* zero - the CPU ignores the user provided
* values and silently overwrites them with zero permissions.
*/
if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
prot |= PROT_READ | PROT_WRITE;
/* Calculate maximum of the VM flags for the page. */
encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
return encl_page;
}
/**
* sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
* @encl: the enclave
* @addr: page aligned pointer to single page for which PTEs will be removed
*
* Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
* @addr from each VMA. Ensure that page fault handler is ready to handle
* new mappings of @addr before calling this function.
*/
void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
{
unsigned long mm_list_version;
struct sgx_encl_mm *encl_mm;
struct vm_area_struct *vma;
int idx, ret;
do {
mm_list_version = encl->mm_list_version;
/* Pairs with smp_wmb() in sgx_encl_mm_add(). */
smp_rmb();
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
mmap_read_lock(encl_mm->mm);
ret = sgx_encl_find(encl_mm->mm, addr, &vma);
if (!ret && encl == vma->vm_private_data)
zap_vma_ptes(vma, addr, PAGE_SIZE);
mmap_read_unlock(encl_mm->mm);
mmput_async(encl_mm->mm);
}
srcu_read_unlock(&encl->srcu, idx);
} while (unlikely(encl->mm_list_version != mm_list_version));
}
/**
* sgx_alloc_va_page() - Allocate a Version Array (VA) page
* @reclaim: Reclaim EPC pages directly if none available. Enclave
* mutex should not be held if this is set.
*
* Allocate a free EPC page and convert it to a Version Array (VA) page.
*
@ -911,12 +1195,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
* a VA page,
* -errno otherwise
*/
struct sgx_epc_page *sgx_alloc_va_page(void)
struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
{
struct sgx_epc_page *epc_page;
int ret;
epc_page = sgx_alloc_epc_page(NULL, true);
epc_page = sgx_alloc_epc_page(NULL, reclaim);
if (IS_ERR(epc_page))
return ERR_CAST(epc_page);

View file

@ -27,7 +27,8 @@
struct sgx_encl_page {
unsigned long desc;
unsigned long vm_max_prot_bits;
unsigned long vm_max_prot_bits:8;
enum sgx_page_type type:16;
struct sgx_epc_page *epc_page;
struct sgx_encl *encl;
struct sgx_va_page *va_page;
@ -78,7 +79,6 @@ struct sgx_va_page {
};
struct sgx_backing {
pgoff_t page_index;
struct page *contents;
struct page *pcmd;
unsigned long pcmd_offset;
@ -106,6 +106,7 @@ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
bool current_is_ksgxd(void);
void sgx_encl_release(struct kref *ref);
int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl);
int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
struct sgx_backing *backing);
int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
@ -113,11 +114,18 @@ int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
void sgx_encl_put_backing(struct sgx_backing *backing);
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
struct sgx_encl_page *page);
struct sgx_epc_page *sgx_alloc_va_page(void);
struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
unsigned long offset,
u64 secinfo_flags);
void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr);
struct sgx_epc_page *sgx_alloc_va_page(bool reclaim);
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
bool sgx_va_page_full(struct sgx_va_page *va_page);
void sgx_encl_free_epc_page(struct sgx_epc_page *page);
struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
unsigned long addr);
struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim);
void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
#endif /* _X86_ENCL_H */

View file

@ -136,57 +136,71 @@ static inline bool encls_failed(int ret)
ret; \
})
/* Initialize an EPC page into an SGX Enclave Control Structure (SECS) page. */
static inline int __ecreate(struct sgx_pageinfo *pginfo, void *secs)
{
return __encls_2(ECREATE, pginfo, secs);
}
/* Hash a 256 byte region of an enclave page to SECS:MRENCLAVE. */
static inline int __eextend(void *secs, void *addr)
{
return __encls_2(EEXTEND, secs, addr);
}
/*
* Associate an EPC page to an enclave either as a REG or TCS page
* populated with the provided data.
*/
static inline int __eadd(struct sgx_pageinfo *pginfo, void *addr)
{
return __encls_2(EADD, pginfo, addr);
}
/* Finalize enclave build, initialize enclave for user code execution. */
static inline int __einit(void *sigstruct, void *token, void *secs)
{
return __encls_ret_3(EINIT, sigstruct, secs, token);
}
/* Disassociate EPC page from its enclave and mark it as unused. */
static inline int __eremove(void *addr)
{
return __encls_ret_1(EREMOVE, addr);
}
/* Copy data to an EPC page belonging to a debug enclave. */
static inline int __edbgwr(void *addr, unsigned long *data)
{
return __encls_2(EDGBWR, *data, addr);
}
/* Copy data from an EPC page belonging to a debug enclave. */
static inline int __edbgrd(void *addr, unsigned long *data)
{
return __encls_1_1(EDGBRD, *data, addr);
}
/* Track that software has completed the required TLB address clears. */
static inline int __etrack(void *addr)
{
return __encls_ret_1(ETRACK, addr);
}
/* Load, verify, and unblock an EPC page. */
static inline int __eldu(struct sgx_pageinfo *pginfo, void *addr,
void *va)
{
return __encls_ret_3(ELDU, pginfo, addr, va);
}
/* Make EPC page inaccessible to enclave, ready to be written to memory. */
static inline int __eblock(void *addr)
{
return __encls_ret_1(EBLOCK, addr);
}
/* Initialize an EPC page into a Version Array (VA) page. */
static inline int __epa(void *addr)
{
unsigned long rbx = SGX_PAGE_TYPE_VA;
@ -194,10 +208,29 @@ static inline int __epa(void *addr)
return __encls_2(EPA, rbx, addr);
}
/* Invalidate an EPC page and write it out to main memory. */
static inline int __ewb(struct sgx_pageinfo *pginfo, void *addr,
void *va)
{
return __encls_ret_3(EWB, pginfo, addr, va);
}
/* Restrict the EPCM permissions of an EPC page. */
static inline int __emodpr(struct sgx_secinfo *secinfo, void *addr)
{
return __encls_ret_2(EMODPR, secinfo, addr);
}
/* Change the type of an EPC page. */
static inline int __emodt(struct sgx_secinfo *secinfo, void *addr)
{
return __encls_ret_2(EMODT, secinfo, addr);
}
/* Zero a page of EPC memory and add it to an initialized enclave. */
static inline int __eaug(struct sgx_pageinfo *pginfo, void *addr)
{
return __encls_2(EAUG, pginfo, addr);
}
#endif /* _X86_ENCLS_H */

View file

@ -17,7 +17,7 @@
#include "encl.h"
#include "encls.h"
static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
{
struct sgx_va_page *va_page = NULL;
void *err;
@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
if (!va_page)
return ERR_PTR(-ENOMEM);
va_page->epc_page = sgx_alloc_va_page();
va_page->epc_page = sgx_alloc_va_page(reclaim);
if (IS_ERR(va_page->epc_page)) {
err = ERR_CAST(va_page->epc_page);
kfree(va_page);
@ -43,7 +43,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
return va_page;
}
static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
{
encl->page_cnt--;
@ -64,7 +64,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
struct file *backing;
long ret;
va_page = sgx_encl_grow(encl);
va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page))
return PTR_ERR(va_page);
else if (va_page)
@ -107,6 +107,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
set_bit(SGX_ENCL_DEBUG, &encl->flags);
encl->secs.encl = encl;
encl->secs.type = SGX_PAGE_TYPE_SECS;
encl->base = secs->base;
encl->size = secs->size;
encl->attributes = secs->attributes;
@ -168,38 +169,6 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
return ret;
}
static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
unsigned long offset,
u64 secinfo_flags)
{
struct sgx_encl_page *encl_page;
unsigned long prot;
encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
if (!encl_page)
return ERR_PTR(-ENOMEM);
encl_page->desc = encl->base + offset;
encl_page->encl = encl;
prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
_calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
_calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
/*
* TCS pages must always RW set for CPU access while the SECINFO
* permissions are *always* zero - the CPU ignores the user provided
* values and silently overwrites them with zero permissions.
*/
if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
prot |= PROT_READ | PROT_WRITE;
/* Calculate maximum of the VM flags for the page. */
encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
return encl_page;
}
static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
{
u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
@ -306,7 +275,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
return PTR_ERR(epc_page);
}
va_page = sgx_encl_grow(encl);
va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page)) {
ret = PTR_ERR(va_page);
goto err_out_free;
@ -344,6 +313,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
*/
encl_page->encl = encl;
encl_page->epc_page = epc_page;
encl_page->type = (secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK) >> 8;
encl->secs_child_cnt++;
if (flags & SGX_PAGE_MEASURE) {
@ -372,6 +342,26 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
return ret;
}
/*
* Ensure user provided offset and length values are valid for
* an enclave.
*/
static int sgx_validate_offset_length(struct sgx_encl *encl,
unsigned long offset,
unsigned long length)
{
if (!IS_ALIGNED(offset, PAGE_SIZE))
return -EINVAL;
if (!length || !IS_ALIGNED(length, PAGE_SIZE))
return -EINVAL;
if (offset + length - PAGE_SIZE >= encl->size)
return -EINVAL;
return 0;
}
/**
* sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
* @encl: an enclave pointer
@ -425,14 +415,10 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
return -EFAULT;
if (!IS_ALIGNED(add_arg.offset, PAGE_SIZE) ||
!IS_ALIGNED(add_arg.src, PAGE_SIZE))
if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
return -EINVAL;
if (!add_arg.length || add_arg.length & (PAGE_SIZE - 1))
return -EINVAL;
if (add_arg.offset + add_arg.length - PAGE_SIZE >= encl->size)
if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
return -EINVAL;
if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
@ -674,6 +660,565 @@ static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
return sgx_set_attribute(&encl->attributes_mask, params.fd);
}
/*
* Ensure enclave is ready for SGX2 functions. Readiness is checked
* by ensuring the hardware supports SGX2 and the enclave is initialized
* and thus able to handle requests to modify pages within it.
*/
static int sgx_ioc_sgx2_ready(struct sgx_encl *encl)
{
if (!(cpu_feature_enabled(X86_FEATURE_SGX2)))
return -ENODEV;
if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
return -EINVAL;
return 0;
}
/*
* Some SGX functions require that no cached linear-to-physical address
* mappings are present before they can succeed. Collaborate with
* hardware via ENCLS[ETRACK] to ensure that all cached
* linear-to-physical address mappings belonging to all threads of
* the enclave are cleared. See sgx_encl_cpumask() for details.
*
* Must be called with enclave's mutex held from the time the
* SGX function requiring that no cached linear-to-physical mappings
* are present is executed until this ETRACK flow is complete.
*/
static int sgx_enclave_etrack(struct sgx_encl *encl)
{
void *epc_virt;
int ret;
epc_virt = sgx_get_epc_virt_addr(encl->secs.epc_page);
ret = __etrack(epc_virt);
if (ret) {
/*
* ETRACK only fails when there is an OS issue. For
* example, two consecutive ETRACK was sent without
* completed IPI between.
*/
pr_err_once("ETRACK returned %d (0x%x)", ret, ret);
/*
* Send IPIs to kick CPUs out of the enclave and
* try ETRACK again.
*/
on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
ret = __etrack(epc_virt);
if (ret) {
pr_err_once("ETRACK repeat returned %d (0x%x)",
ret, ret);
return -EFAULT;
}
}
on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
return 0;
}
/**
* sgx_enclave_restrict_permissions() - Restrict EPCM permissions
* @encl: Enclave to which the pages belong.
* @modp: Checked parameters from user on which pages need modifying and
* their new permissions.
*
* Return:
* - 0: Success.
* - -errno: Otherwise.
*/
static long
sgx_enclave_restrict_permissions(struct sgx_encl *encl,
struct sgx_enclave_restrict_permissions *modp)
{
struct sgx_encl_page *entry;
struct sgx_secinfo secinfo;
unsigned long addr;
unsigned long c;
void *epc_virt;
int ret;
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = modp->permissions & SGX_SECINFO_PERMISSION_MASK;
for (c = 0 ; c < modp->length; c += PAGE_SIZE) {
addr = encl->base + modp->offset + c;
sgx_reclaim_direct();
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
goto out_unlock;
}
/*
* Changing EPCM permissions is only supported on regular
* SGX pages. Attempting this change on other pages will
* result in #PF.
*/
if (entry->type != SGX_PAGE_TYPE_REG) {
ret = -EINVAL;
goto out_unlock;
}
/*
* Apart from ensuring that read-access remains, do not verify
* the permission bits requested. Kernel has no control over
* how EPCM permissions can be relaxed from within the enclave.
* ENCLS[EMODPR] can only remove existing EPCM permissions,
* attempting to set new permissions will be ignored by the
* hardware.
*/
/* Change EPCM permissions. */
epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
ret = __emodpr(&secinfo, epc_virt);
if (encls_faulted(ret)) {
/*
* All possible faults should be avoidable:
* parameters have been checked, will only change
* permissions of a regular page, and no concurrent
* SGX1/SGX2 ENCLS instructions since these
* are protected with mutex.
*/
pr_err_once("EMODPR encountered exception %d\n",
ENCLS_TRAPNR(ret));
ret = -EFAULT;
goto out_unlock;
}
if (encls_failed(ret)) {
modp->result = ret;
ret = -EFAULT;
goto out_unlock;
}
ret = sgx_enclave_etrack(encl);
if (ret) {
ret = -EFAULT;
goto out_unlock;
}
mutex_unlock(&encl->lock);
}
ret = 0;
goto out;
out_unlock:
mutex_unlock(&encl->lock);
out:
modp->count = c;
return ret;
}
/**
* sgx_ioc_enclave_restrict_permissions() - handler for
* %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
* @encl: an enclave pointer
* @arg: userspace pointer to a &struct sgx_enclave_restrict_permissions
* instance
*
* SGX2 distinguishes between relaxing and restricting the enclave page
* permissions maintained by the hardware (EPCM permissions) of pages
* belonging to an initialized enclave (after SGX_IOC_ENCLAVE_INIT).
*
* EPCM permissions cannot be restricted from within the enclave, the enclave
* requires the kernel to run the privileged level 0 instructions ENCLS[EMODPR]
* and ENCLS[ETRACK]. An attempt to relax EPCM permissions with this call
* will be ignored by the hardware.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_ioc_enclave_restrict_permissions(struct sgx_encl *encl,
void __user *arg)
{
struct sgx_enclave_restrict_permissions params;
long ret;
ret = sgx_ioc_sgx2_ready(encl);
if (ret)
return ret;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
if (sgx_validate_offset_length(encl, params.offset, params.length))
return -EINVAL;
if (params.permissions & ~SGX_SECINFO_PERMISSION_MASK)
return -EINVAL;
/*
* Fail early if invalid permissions requested to prevent ENCLS[EMODPR]
* from faulting later when the CPU does the same check.
*/
if ((params.permissions & SGX_SECINFO_W) &&
!(params.permissions & SGX_SECINFO_R))
return -EINVAL;
if (params.result || params.count)
return -EINVAL;
ret = sgx_enclave_restrict_permissions(encl, &params);
if (copy_to_user(arg, &params, sizeof(params)))
return -EFAULT;
return ret;
}
/**
* sgx_enclave_modify_types() - Modify type of SGX enclave pages
* @encl: Enclave to which the pages belong.
* @modt: Checked parameters from user about which pages need modifying
* and their new page type.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_enclave_modify_types(struct sgx_encl *encl,
struct sgx_enclave_modify_types *modt)
{
unsigned long max_prot_restore;
enum sgx_page_type page_type;
struct sgx_encl_page *entry;
struct sgx_secinfo secinfo;
unsigned long prot;
unsigned long addr;
unsigned long c;
void *epc_virt;
int ret;
page_type = modt->page_type & SGX_PAGE_TYPE_MASK;
/*
* The only new page types allowed by hardware are PT_TCS and PT_TRIM.
*/
if (page_type != SGX_PAGE_TYPE_TCS && page_type != SGX_PAGE_TYPE_TRIM)
return -EINVAL;
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = page_type << 8;
for (c = 0 ; c < modt->length; c += PAGE_SIZE) {
addr = encl->base + modt->offset + c;
sgx_reclaim_direct();
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
goto out_unlock;
}
/*
* Borrow the logic from the Intel SDM. Regular pages
* (SGX_PAGE_TYPE_REG) can change type to SGX_PAGE_TYPE_TCS
* or SGX_PAGE_TYPE_TRIM but TCS pages can only be trimmed.
* CET pages not supported yet.
*/
if (!(entry->type == SGX_PAGE_TYPE_REG ||
(entry->type == SGX_PAGE_TYPE_TCS &&
page_type == SGX_PAGE_TYPE_TRIM))) {
ret = -EINVAL;
goto out_unlock;
}
max_prot_restore = entry->vm_max_prot_bits;
/*
* Once a regular page becomes a TCS page it cannot be
* changed back. So the maximum allowed protection reflects
* the TCS page that is always RW from kernel perspective but
* will be inaccessible from within enclave. Before doing
* so, do make sure that the new page type continues to
* respect the originally vetted page permissions.
*/
if (entry->type == SGX_PAGE_TYPE_REG &&
page_type == SGX_PAGE_TYPE_TCS) {
if (~entry->vm_max_prot_bits & (VM_READ | VM_WRITE)) {
ret = -EPERM;
goto out_unlock;
}
prot = PROT_READ | PROT_WRITE;
entry->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
/*
* Prevent page from being reclaimed while mutex
* is released.
*/
if (sgx_unmark_page_reclaimable(entry->epc_page)) {
ret = -EAGAIN;
goto out_entry_changed;
}
/*
* Do not keep encl->lock because of dependency on
* mmap_lock acquired in sgx_zap_enclave_ptes().
*/
mutex_unlock(&encl->lock);
sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);
sgx_mark_page_reclaimable(entry->epc_page);
}
/* Change EPC type */
epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
ret = __emodt(&secinfo, epc_virt);
if (encls_faulted(ret)) {
/*
* All possible faults should be avoidable:
* parameters have been checked, will only change
* valid page types, and no concurrent
* SGX1/SGX2 ENCLS instructions since these are
* protected with mutex.
*/
pr_err_once("EMODT encountered exception %d\n",
ENCLS_TRAPNR(ret));
ret = -EFAULT;
goto out_entry_changed;
}
if (encls_failed(ret)) {
modt->result = ret;
ret = -EFAULT;
goto out_entry_changed;
}
ret = sgx_enclave_etrack(encl);
if (ret) {
ret = -EFAULT;
goto out_unlock;
}
entry->type = page_type;
mutex_unlock(&encl->lock);
}
ret = 0;
goto out;
out_entry_changed:
entry->vm_max_prot_bits = max_prot_restore;
out_unlock:
mutex_unlock(&encl->lock);
out:
modt->count = c;
return ret;
}
/**
* sgx_ioc_enclave_modify_types() - handler for %SGX_IOC_ENCLAVE_MODIFY_TYPES
* @encl: an enclave pointer
* @arg: userspace pointer to a &struct sgx_enclave_modify_types instance
*
* Ability to change the enclave page type supports the following use cases:
*
* * It is possible to add TCS pages to an enclave by changing the type of
* regular pages (%SGX_PAGE_TYPE_REG) to TCS (%SGX_PAGE_TYPE_TCS) pages.
* With this support the number of threads supported by an initialized
* enclave can be increased dynamically.
*
* * Regular or TCS pages can dynamically be removed from an initialized
* enclave by changing the page type to %SGX_PAGE_TYPE_TRIM. Changing the
* page type to %SGX_PAGE_TYPE_TRIM marks the page for removal with actual
* removal done by handler of %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() called
* after ENCLU[EACCEPT] is run on %SGX_PAGE_TYPE_TRIM page from within the
* enclave.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_ioc_enclave_modify_types(struct sgx_encl *encl,
void __user *arg)
{
struct sgx_enclave_modify_types params;
long ret;
ret = sgx_ioc_sgx2_ready(encl);
if (ret)
return ret;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
if (sgx_validate_offset_length(encl, params.offset, params.length))
return -EINVAL;
if (params.page_type & ~SGX_PAGE_TYPE_MASK)
return -EINVAL;
if (params.result || params.count)
return -EINVAL;
ret = sgx_enclave_modify_types(encl, &params);
if (copy_to_user(arg, &params, sizeof(params)))
return -EFAULT;
return ret;
}
/**
* sgx_encl_remove_pages() - Remove trimmed pages from SGX enclave
* @encl: Enclave to which the pages belong
* @params: Checked parameters from user on which pages need to be removed
*
* Return:
* - 0: Success.
* - -errno: Otherwise.
*/
static long sgx_encl_remove_pages(struct sgx_encl *encl,
struct sgx_enclave_remove_pages *params)
{
struct sgx_encl_page *entry;
struct sgx_secinfo secinfo;
unsigned long addr;
unsigned long c;
void *epc_virt;
int ret;
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
for (c = 0 ; c < params->length; c += PAGE_SIZE) {
addr = encl->base + params->offset + c;
sgx_reclaim_direct();
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
goto out_unlock;
}
if (entry->type != SGX_PAGE_TYPE_TRIM) {
ret = -EPERM;
goto out_unlock;
}
/*
* ENCLS[EMODPR] is a no-op instruction used to inform if
* ENCLU[EACCEPT] was run from within the enclave. If
* ENCLS[EMODPR] is run with RWX on a trimmed page that is
* not yet accepted then it will return
* %SGX_PAGE_NOT_MODIFIABLE, after the trimmed page is
* accepted the instruction will encounter a page fault.
*/
epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
ret = __emodpr(&secinfo, epc_virt);
if (!encls_faulted(ret) || ENCLS_TRAPNR(ret) != X86_TRAP_PF) {
ret = -EPERM;
goto out_unlock;
}
if (sgx_unmark_page_reclaimable(entry->epc_page)) {
ret = -EBUSY;
goto out_unlock;
}
/*
* Do not keep encl->lock because of dependency on
* mmap_lock acquired in sgx_zap_enclave_ptes().
*/
mutex_unlock(&encl->lock);
sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);
sgx_encl_free_epc_page(entry->epc_page);
encl->secs_child_cnt--;
entry->epc_page = NULL;
xa_erase(&encl->page_array, PFN_DOWN(entry->desc));
sgx_encl_shrink(encl, NULL);
kfree(entry);
mutex_unlock(&encl->lock);
}
ret = 0;
goto out;
out_unlock:
mutex_unlock(&encl->lock);
out:
params->count = c;
return ret;
}
/**
* sgx_ioc_enclave_remove_pages() - handler for %SGX_IOC_ENCLAVE_REMOVE_PAGES
* @encl: an enclave pointer
* @arg: userspace pointer to &struct sgx_enclave_remove_pages instance
*
* Final step of the flow removing pages from an initialized enclave. The
* complete flow is:
*
* 1) User changes the type of the pages to be removed to %SGX_PAGE_TYPE_TRIM
* using the %SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl().
* 2) User approves the page removal by running ENCLU[EACCEPT] from within
* the enclave.
* 3) User initiates actual page removal using the
* %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() that is handled here.
*
* First remove any page table entries pointing to the page and then proceed
* with the actual removal of the enclave page and data in support of it.
*
* VA pages are not affected by this removal. It is thus possible that the
* enclave may end up with more VA pages than needed to support all its
* pages.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_ioc_enclave_remove_pages(struct sgx_encl *encl,
void __user *arg)
{
struct sgx_enclave_remove_pages params;
long ret;
ret = sgx_ioc_sgx2_ready(encl);
if (ret)
return ret;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
if (sgx_validate_offset_length(encl, params.offset, params.length))
return -EINVAL;
if (params.count)
return -EINVAL;
ret = sgx_encl_remove_pages(encl, &params);
if (copy_to_user(arg, &params, sizeof(params)))
return -EFAULT;
return ret;
}
long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct sgx_encl *encl = filep->private_data;
@ -695,6 +1240,16 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
case SGX_IOC_ENCLAVE_PROVISION:
ret = sgx_ioc_enclave_provision(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS:
ret = sgx_ioc_enclave_restrict_permissions(encl,
(void __user *)arg);
break;
case SGX_IOC_ENCLAVE_MODIFY_TYPES:
ret = sgx_ioc_enclave_modify_types(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_REMOVE_PAGES:
ret = sgx_ioc_enclave_remove_pages(encl, (void __user *)arg);
break;
default:
ret = -ENOIOCTLCMD;
break;

View file

@ -137,36 +137,9 @@ static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
struct sgx_encl_page *page = epc_page->owner;
unsigned long addr = page->desc & PAGE_MASK;
struct sgx_encl *encl = page->encl;
unsigned long mm_list_version;
struct sgx_encl_mm *encl_mm;
struct vm_area_struct *vma;
int idx, ret;
int ret;
do {
mm_list_version = encl->mm_list_version;
/* Pairs with smp_rmb() in sgx_encl_mm_add(). */
smp_rmb();
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
mmap_read_lock(encl_mm->mm);
ret = sgx_encl_find(encl_mm->mm, addr, &vma);
if (!ret && encl == vma->vm_private_data)
zap_vma_ptes(vma, addr, PAGE_SIZE);
mmap_read_unlock(encl_mm->mm);
mmput_async(encl_mm->mm);
}
srcu_read_unlock(&encl->srcu, idx);
} while (unlikely(encl->mm_list_version != mm_list_version));
sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);
@ -201,39 +174,10 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
return ret;
}
static void sgx_ipi_cb(void *info)
void sgx_ipi_cb(void *info)
{
}
static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
{
cpumask_t *cpumask = &encl->cpumask;
struct sgx_encl_mm *encl_mm;
int idx;
/*
* Can race with sgx_encl_mm_add(), but ETRACK has already been
* executed, which means that the CPUs running in the new mm will enter
* into the enclave with a fresh epoch.
*/
cpumask_clear(cpumask);
idx = srcu_read_lock(&encl->srcu);
list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
if (!mmget_not_zero(encl_mm->mm))
continue;
cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
mmput_async(encl_mm->mm);
}
srcu_read_unlock(&encl->srcu, idx);
return cpumask;
}
/*
* Swap page to the regular memory transformed to the blocked state by using
* EBLOCK, which means that it can no longer be referenced (no new TLB entries).
@ -280,7 +224,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
* miss cpus that entered the enclave between
* generating the mask and incrementing epoch.
*/
on_each_cpu_mask(sgx_encl_ewb_cpumask(encl),
on_each_cpu_mask(sgx_encl_cpumask(encl),
sgx_ipi_cb, NULL, 1);
ret = __sgx_encl_ewb(epc_page, va_slot, backing);
}
@ -431,6 +375,17 @@ static bool sgx_should_reclaim(unsigned long watermark)
!list_empty(&sgx_active_page_list);
}
/*
* sgx_reclaim_direct() should be called (without enclave's mutex held)
* in locations where SGX memory resources might be low and might be
* needed in order to make forward progress.
*/
void sgx_reclaim_direct(void)
{
if (sgx_should_reclaim(SGX_NR_LOW_PAGES))
sgx_reclaim_pages();
}
static int ksgxd(void *p)
{
set_freezable();

View file

@ -86,10 +86,13 @@ static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page)
struct sgx_epc_page *__sgx_alloc_epc_page(void);
void sgx_free_epc_page(struct sgx_epc_page *page);
void sgx_reclaim_direct(void);
void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
void sgx_ipi_cb(void *info);
#ifdef CONFIG_X86_SGX_KVM
int __init sgx_vepc_init(void);
#else

View file

@ -24,6 +24,9 @@ enum encl_op_type {
ENCL_OP_PUT_TO_ADDRESS,
ENCL_OP_GET_FROM_ADDRESS,
ENCL_OP_NOP,
ENCL_OP_EACCEPT,
ENCL_OP_EMODPE,
ENCL_OP_INIT_TCS_PAGE,
ENCL_OP_MAX,
};
@ -53,4 +56,24 @@ struct encl_op_get_from_addr {
uint64_t addr;
};
struct encl_op_eaccept {
struct encl_op_header header;
uint64_t epc_addr;
uint64_t flags;
uint64_t ret;
};
struct encl_op_emodpe {
struct encl_op_header header;
uint64_t epc_addr;
uint64_t flags;
};
struct encl_op_init_tcs_page {
struct encl_op_header header;
uint64_t tcs_page;
uint64_t ssa;
uint64_t entry;
};
#endif /* DEFINES_H */

View file

@ -130,6 +130,47 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
return true;
}
/*
* Parse the enclave code's symbol table to locate and return address of
* the provided symbol
*/
uint64_t encl_get_entry(struct encl *encl, const char *symbol)
{
Elf64_Shdr *sections;
Elf64_Sym *symtab;
Elf64_Ehdr *ehdr;
char *sym_names;
int num_sym;
int i;
ehdr = encl->bin;
sections = encl->bin + ehdr->e_shoff;
for (i = 0; i < ehdr->e_shnum; i++) {
if (sections[i].sh_type == SHT_SYMTAB) {
symtab = (Elf64_Sym *)((char *)encl->bin + sections[i].sh_offset);
num_sym = sections[i].sh_size / sections[i].sh_entsize;
break;
}
}
for (i = 0; i < ehdr->e_shnum; i++) {
if (sections[i].sh_type == SHT_STRTAB) {
sym_names = (char *)encl->bin + sections[i].sh_offset;
break;
}
}
for (i = 0; i < num_sym; i++) {
Elf64_Sym *sym = &symtab[i];
if (!strcmp(symbol, sym_names + sym->st_name))
return (uint64_t)sym->st_value;
}
return 0;
}
bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
{
const char device_path[] = "/dev/sgx_enclave";

File diff suppressed because it is too large Load diff

View file

@ -38,6 +38,7 @@ void encl_delete(struct encl *ctx);
bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
bool encl_measure(struct encl *encl);
bool encl_build(struct encl *encl);
uint64_t encl_get_entry(struct encl *encl, const char *symbol);
int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9,
struct sgx_enclave_run *run);

View file

@ -11,6 +11,42 @@
*/
static uint8_t encl_buffer[8192] = { 1 };
enum sgx_enclu_function {
EACCEPT = 0x5,
EMODPE = 0x6,
};
static void do_encl_emodpe(void *_op)
{
struct sgx_secinfo secinfo __aligned(sizeof(struct sgx_secinfo)) = {0};
struct encl_op_emodpe *op = _op;
secinfo.flags = op->flags;
asm volatile(".byte 0x0f, 0x01, 0xd7"
:
: "a" (EMODPE),
"b" (&secinfo),
"c" (op->epc_addr));
}
static void do_encl_eaccept(void *_op)
{
struct sgx_secinfo secinfo __aligned(sizeof(struct sgx_secinfo)) = {0};
struct encl_op_eaccept *op = _op;
int rax;
secinfo.flags = op->flags;
asm volatile(".byte 0x0f, 0x01, 0xd7"
: "=a" (rax)
: "a" (EACCEPT),
"b" (&secinfo),
"c" (op->epc_addr));
op->ret = rax;
}
static void *memcpy(void *dest, const void *src, size_t n)
{
size_t i;
@ -21,6 +57,35 @@ static void *memcpy(void *dest, const void *src, size_t n)
return dest;
}
static void *memset(void *dest, int c, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
((char *)dest)[i] = c;
return dest;
}
static void do_encl_init_tcs_page(void *_op)
{
struct encl_op_init_tcs_page *op = _op;
void *tcs = (void *)op->tcs_page;
uint32_t val_32;
memset(tcs, 0, 16); /* STATE and FLAGS */
memcpy(tcs + 16, &op->ssa, 8); /* OSSA */
memset(tcs + 24, 0, 4); /* CSSA */
val_32 = 1;
memcpy(tcs + 28, &val_32, 4); /* NSSA */
memcpy(tcs + 32, &op->entry, 8); /* OENTRY */
memset(tcs + 40, 0, 24); /* AEP, OFSBASE, OGSBASE */
val_32 = 0xFFFFFFFF;
memcpy(tcs + 64, &val_32, 4); /* FSLIMIT */
memcpy(tcs + 68, &val_32, 4); /* GSLIMIT */
memset(tcs + 72, 0, 4024); /* Reserved */
}
static void do_encl_op_put_to_buf(void *op)
{
struct encl_op_put_to_buf *op2 = op;
@ -62,6 +127,9 @@ void encl_body(void *rdi, void *rsi)
do_encl_op_put_to_addr,
do_encl_op_get_from_addr,
do_encl_op_nop,
do_encl_eaccept,
do_encl_emodpe,
do_encl_init_tcs_page,
};
struct encl_op_header *op = (struct encl_op_header *)rdi;

View file

@ -45,6 +45,12 @@ encl_entry:
# TCS #2. By adding the value of encl_stack to it, we get
# the absolute address for the stack.
lea (encl_stack)(%rbx), %rax
jmp encl_entry_core
encl_dyn_entry:
# Entry point for dynamically created TCS page expected to follow
# its stack directly.
lea -1(%rbx), %rax
encl_entry_core:
xchg %rsp, %rax
push %rax