staging: add Lustre file system client support

Lustre is the most deployed distributed file system
in the HPC (High Performance Computing) world. The patch
adds its client side support.

The code is not very clean and needs to live in drivers/staging
for some time for continuing cleanup work. See
drivers/staging/lustre/TODO for details.

The code is based on Lustre master commit faefbfc04

commit faefbfc0460bc00f2ee4c1c1c86aa1e39b9eea49
Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Date:   Tue Apr 30 23:05:21 2013 +0400

    LU-3244 utils: tunefs.lustre should preserve virgin label

Plus a few under-review patches on Whamcloud gerrit:
3.8 kernel support:
http://review.whamcloud.com/#change,5973
http://review.whamcloud.com/#change,5974
http://review.whamcloud.com/#change,5768
http://review.whamcloud.com/#change,5781
http://review.whamcloud.com/#change,5763
http://review.whamcloud.com/#change,5613
http://review.whamcloud.com/#change,5655

3.9 kernel support:
http://review.whamcloud.com/#change,5898
http://review.whamcloud.com/#change,5899

Kconfig/Kbuild:
http://review.whamcloud.com/#change,4646
http://review.whamcloud.com/#change,4644

libcfs cleanup:
http://review.whamcloud.com/#change,2831
http://review.whamcloud.com/#change,4775
http://review.whamcloud.com/#change,4776
http://review.whamcloud.com/#change,4777
http://review.whamcloud.com/#change,4778
http://review.whamcloud.com/#change,4779
http://review.whamcloud.com/#change,4780

All starting/trailing whitespaces are removed, to match kernel
coding style. Also ran scripts/cleanfile on all lustre source files.

[maked the Kconfig depend on BROKEN as the recent procfs changes causes
this to fail - gregkh]

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Peng Tao 2013-05-02 16:46:55 +08:00 committed by Greg Kroah-Hartman
parent 2339b79d09
commit d7e09d0397
427 changed files with 258994 additions and 0 deletions

View file

@ -140,4 +140,6 @@ source "drivers/staging/netlogic/Kconfig"
source "drivers/staging/dwc2/Kconfig"
source "drivers/staging/lustre/Kconfig"
endif # STAGING

View file

@ -62,3 +62,4 @@ obj-$(CONFIG_FIREWIRE_SERIAL) += fwserial/
obj-$(CONFIG_ZCACHE) += zcache/
obj-$(CONFIG_GOLDFISH) += goldfish/
obj-$(CONFIG_USB_DWC2) += dwc2/
obj-$(CONFIG_LUSTRE_FS) += lustre/

View file

@ -0,0 +1,3 @@
source "drivers/staging/lustre/lustre/Kconfig"
source "drivers/staging/lustre/lnet/Kconfig"

View file

@ -0,0 +1,4 @@
subdir-ccflags-y := -I$(src)/include/
obj-$(CONFIG_LUSTRE_FS) += lustre/
obj-$(CONFIG_LNET) += lnet/

View file

@ -0,0 +1,13 @@
* Possible remaining coding style fix.
* Remove deadcode.
* Seperate client/server functionality. Functions only used by server can be
removed from client.
* Clean up libcfs layer. Ideally we can remove include/linux/libcfs entirely.
* Clean up CLIO layer. Lustre client readahead/writeback control needs to better
suit kernel providings.
* Add documents in Documentation.
* Other minor misc cleanups...
Please send any patches to Greg Kroah-Hartman <greg@kroah.com>, Andreas Dilger
<andreas.dilger@intel.com> and Peng Tao <tao.peng@emc.com>. CCing
hpdd-discuss <hpdd-discuss@lists.01.org> would be great too.

View file

@ -0,0 +1,111 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef _LIBCFS_BITMAP_H_
#define _LIBCFS_BITMAP_H_
typedef struct {
int size;
unsigned long data[0];
} cfs_bitmap_t;
#define CFS_BITMAP_SIZE(nbits) \
(((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(cfs_bitmap_t))
static inline
cfs_bitmap_t *CFS_ALLOCATE_BITMAP(int size)
{
cfs_bitmap_t *ptr;
OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size));
if (ptr == NULL)
RETURN(ptr);
ptr->size = size;
RETURN (ptr);
}
#define CFS_FREE_BITMAP(ptr) OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size))
static inline
void cfs_bitmap_set(cfs_bitmap_t *bitmap, int nbit)
{
set_bit(nbit, bitmap->data);
}
static inline
void cfs_bitmap_clear(cfs_bitmap_t *bitmap, int nbit)
{
test_and_clear_bit(nbit, bitmap->data);
}
static inline
int cfs_bitmap_check(cfs_bitmap_t *bitmap, int nbit)
{
return test_bit(nbit, bitmap->data);
}
static inline
int cfs_bitmap_test_and_clear(cfs_bitmap_t *bitmap, int nbit)
{
return test_and_clear_bit(nbit, bitmap->data);
}
/* return 0 is bitmap has none set bits */
static inline
int cfs_bitmap_check_empty(cfs_bitmap_t *bitmap)
{
return find_first_bit(bitmap->data, bitmap->size) == bitmap->size;
}
static inline
void cfs_bitmap_copy(cfs_bitmap_t *new, cfs_bitmap_t *old)
{
int newsize;
LASSERT(new->size >= old->size);
newsize = new->size;
memcpy(new, old, CFS_BITMAP_SIZE(old->size));
new->size = newsize;
}
#define cfs_foreach_bit(bitmap, pos) \
for ((pos) = find_first_bit((bitmap)->data, bitmap->size); \
(pos) < (bitmap)->size; \
(pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos) + 1))
#endif

View file

@ -0,0 +1,110 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/curproc.h
*
* Lustre curproc API declaration
*
* Author: Nikita Danilov <nikita@clusterfs.com>
*/
#ifndef __LIBCFS_CURPROC_H__
#define __LIBCFS_CURPROC_H__
/*
* Portable API to access common characteristics of "current" UNIX process.
*
* Implemented in portals/include/libcfs/<os>/
*/
int cfs_curproc_groups_nr(void);
int current_is_in_group(gid_t group);
void cfs_curproc_groups_dump(gid_t *array, int size);
/*
* Plus, platform-specific constant
*
* CFS_CURPROC_COMM_MAX,
*
* and opaque scalar type
*
* kernel_cap_t
*/
/* check if task is running in compat mode.*/
int current_is_32bit(void);
#define current_pid() (current->pid)
#define current_comm() (current->comm)
int cfs_get_environ(const char *key, char *value, int *val_len);
typedef __u32 cfs_cap_t;
#define CFS_CAP_CHOWN 0
#define CFS_CAP_DAC_OVERRIDE 1
#define CFS_CAP_DAC_READ_SEARCH 2
#define CFS_CAP_FOWNER 3
#define CFS_CAP_FSETID 4
#define CFS_CAP_LINUX_IMMUTABLE 9
#define CFS_CAP_SYS_ADMIN 21
#define CFS_CAP_SYS_BOOT 23
#define CFS_CAP_SYS_RESOURCE 24
#define CFS_CAP_FS_MASK ((1 << CFS_CAP_CHOWN) | \
(1 << CFS_CAP_DAC_OVERRIDE) | \
(1 << CFS_CAP_DAC_READ_SEARCH) | \
(1 << CFS_CAP_FOWNER) | \
(1 << CFS_CAP_FSETID ) | \
(1 << CFS_CAP_LINUX_IMMUTABLE) | \
(1 << CFS_CAP_SYS_ADMIN) | \
(1 << CFS_CAP_SYS_BOOT) | \
(1 << CFS_CAP_SYS_RESOURCE))
void cfs_cap_raise(cfs_cap_t cap);
void cfs_cap_lower(cfs_cap_t cap);
int cfs_cap_raised(cfs_cap_t cap);
cfs_cap_t cfs_curproc_cap_pack(void);
void cfs_curproc_cap_unpack(cfs_cap_t cap);
int cfs_capable(cfs_cap_t cap);
/* __LIBCFS_CURPROC_H__ */
#endif
/*
* Local variables:
* c-indentation-style: "K&R"
* c-basic-offset: 8
* tab-width: 8
* fill-column: 80
* scroll-step: 1
* End:
*/

View file

@ -0,0 +1,286 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LIBCFS_LIBCFS_H__
#define __LIBCFS_LIBCFS_H__
#if !__GNUC__
#define __attribute__(x)
#endif
#include <linux/libcfs/linux/libcfs.h>
#include "curproc.h"
#ifndef offsetof
# define offsetof(typ,memb) ((long)(long_ptr_t)((char *)&(((typ *)0)->memb)))
#endif
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof ((a)[0])))
#endif
#if !defined(swap)
#define swap(x,y) do { typeof(x) z = x; x = y; y = z; } while (0)
#endif
#if !defined(container_of)
/* given a pointer @ptr to the field @member embedded into type (usually
* struct) @type, return pointer to the embedding instance of @type. */
#define container_of(ptr, type, member) \
((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
#endif
static inline int __is_po2(unsigned long long val)
{
return !(val & (val - 1));
}
#define IS_PO2(val) __is_po2((unsigned long long)(val))
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
/*
* Lustre Error Checksum: calculates checksum
* of Hex number by XORing each bit.
*/
#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
((hexnum) >> 8 & 0xf))
/*
* Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses
* Lustre RETURN(NULL) macro.
*/
#if defined(NULL)
#undef NULL
#endif
#define NULL ((void *)0)
#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID
#include <linux/list.h>
#ifndef cfs_for_each_possible_cpu
# error cfs_for_each_possible_cpu is not supported by kernel!
#endif
/* libcfs tcpip */
int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
int libcfs_ipif_enumerate(char ***names);
void libcfs_ipif_free_enumeration(char **names, int n);
int libcfs_sock_listen(socket_t **sockp, __u32 ip, int port, int backlog);
int libcfs_sock_accept(socket_t **newsockp, socket_t *sock);
void libcfs_sock_abort_accept(socket_t *sock);
int libcfs_sock_connect(socket_t **sockp, int *fatal,
__u32 local_ip, int local_port,
__u32 peer_ip, int peer_port);
int libcfs_sock_setbuf(socket_t *socket, int txbufsize, int rxbufsize);
int libcfs_sock_getbuf(socket_t *socket, int *txbufsize, int *rxbufsize);
int libcfs_sock_getaddr(socket_t *socket, int remote, __u32 *ip, int *port);
int libcfs_sock_write(socket_t *sock, void *buffer, int nob, int timeout);
int libcfs_sock_read(socket_t *sock, void *buffer, int nob, int timeout);
void libcfs_sock_release(socket_t *sock);
/* libcfs watchdogs */
struct lc_watchdog;
/* Add a watchdog which fires after "time" milliseconds of delay. You have to
* touch it once to enable it. */
struct lc_watchdog *lc_watchdog_add(int time,
void (*cb)(pid_t pid, void *),
void *data);
/* Enables a watchdog and resets its timer. */
void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout);
#define CFS_GET_TIMEOUT(svc) (max_t(int, obd_timeout, \
AT_OFF ? 0 : at_get(&svc->srv_at_estimate)) * \
svc->srv_watchdog_factor)
/* Disable a watchdog; touch it to restart it. */
void lc_watchdog_disable(struct lc_watchdog *lcw);
/* Clean up the watchdog */
void lc_watchdog_delete(struct lc_watchdog *lcw);
/* Dump a debug log */
void lc_watchdog_dumplog(pid_t pid, void *data);
/* need both kernel and user-land acceptor */
#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512
#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023
/*
* libcfs pseudo device operations
*
* struct psdev_t and
* misc_register() and
* misc_deregister() are declared in
* libcfs/<os>/<os>-prim.h
*
* It's just draft now.
*/
struct cfs_psdev_file {
unsigned long off;
void *private_data;
unsigned long reserved1;
unsigned long reserved2;
};
struct cfs_psdev_ops {
int (*p_open)(unsigned long, void *);
int (*p_close)(unsigned long, void *);
int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *);
};
/*
* Drop into debugger, if possible. Implementation is provided by platform.
*/
void cfs_enter_debugger(void);
/*
* Defined by platform
*/
int unshare_fs_struct(void);
sigset_t cfs_get_blocked_sigs(void);
sigset_t cfs_block_allsigs(void);
sigset_t cfs_block_sigs(unsigned long sigs);
sigset_t cfs_block_sigsinv(unsigned long sigs);
void cfs_restore_sigs(sigset_t);
int cfs_signal_pending(void);
void cfs_clear_sigpending(void);
int convert_server_error(__u64 ecode);
int convert_client_oflag(int cflag, int *result);
/*
* Stack-tracing filling.
*/
/*
* Platform-dependent data-type to hold stack frames.
*/
struct cfs_stack_trace;
/*
* Fill @trace with current back-trace.
*/
void cfs_stack_trace_fill(struct cfs_stack_trace *trace);
/*
* Return instruction pointer for frame @frame_no. NULL if @frame_no is
* invalid.
*/
void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no);
#ifndef O_NOACCESS
#define O_NOACCESS O_NONBLOCK
#endif
/*
* Universal open flags.
*/
#define CFS_O_NOACCESS 0003
#define CFS_O_ACCMODE CFS_O_NOACCESS
#define CFS_O_CREAT 0100
#define CFS_O_EXCL 0200
#define CFS_O_NOCTTY 0400
#define CFS_O_TRUNC 01000
#define CFS_O_APPEND 02000
#define CFS_O_NONBLOCK 04000
#define CFS_O_NDELAY CFS_O_NONBLOCK
#define CFS_O_SYNC 010000
#define CFS_O_ASYNC 020000
#define CFS_O_DIRECT 040000
#define CFS_O_LARGEFILE 0100000
#define CFS_O_DIRECTORY 0200000
#define CFS_O_NOFOLLOW 0400000
#define CFS_O_NOATIME 01000000
/* convert local open flags to universal open flags */
int cfs_oflags2univ(int flags);
/* convert universal open flags to local open flags */
int cfs_univ2oflags(int flags);
/*
* Random number handling
*/
/* returns a random 32-bit integer */
unsigned int cfs_rand(void);
/* seed the generator */
void cfs_srand(unsigned int, unsigned int);
void cfs_get_random_bytes(void *buf, int size);
#include <linux/libcfs/libcfs_debug.h>
#include <linux/libcfs/libcfs_cpu.h>
#include <linux/libcfs/libcfs_private.h>
#include <linux/libcfs/libcfs_ioctl.h>
#include <linux/libcfs/libcfs_prim.h>
#include <linux/libcfs/libcfs_time.h>
#include <linux/libcfs/libcfs_string.h>
#include <linux/libcfs/libcfs_kernelcomm.h>
#include <linux/libcfs/libcfs_workitem.h>
#include <linux/libcfs/libcfs_hash.h>
#include <linux/libcfs/libcfs_heap.h>
#include <linux/libcfs/libcfs_fail.h>
#include <linux/libcfs/params_tree.h>
#include <linux/libcfs/libcfs_crypto.h>
/* container_of depends on "likely" which is defined in libcfs_private.h */
static inline void *__container_of(void *ptr, unsigned long shift)
{
if (unlikely(IS_ERR(ptr) || ptr == NULL))
return ptr;
else
return (char *)ptr - shift;
}
#define container_of0(ptr, type, member) \
((type *)__container_of((void *)(ptr), offsetof(type, member)))
#define SET_BUT_UNUSED(a) do { } while(sizeof(a) - sizeof(a))
#define _LIBCFS_H
#endif /* _LIBCFS_H */

View file

@ -0,0 +1,214 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA
*
* GPL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_cpu.h
*
* CPU partition
* . CPU partition is virtual processing unit
*
* . CPU partition can present 1-N cores, or 1-N NUMA nodes,
* in other words, CPU partition is a processors pool.
*
* CPU Partition Table (CPT)
* . a set of CPU partitions
*
* . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP
*
* . User can specify total number of CPU partitions while creating a
* CPT, ID of CPU partition is always start from 0.
*
* Example: if there are 8 cores on the system, while creating a CPT
* with cpu_npartitions=4:
* core[0, 1] = partition[0], core[2, 3] = partition[1]
* core[4, 5] = partition[2], core[6, 7] = partition[3]
*
* cpu_npartitions=1:
* core[0, 1, ... 7] = partition[0]
*
* . User can also specify CPU partitions by string pattern
*
* Examples: cpu_partitions="0[0,1], 1[2,3]"
* cpu_partitions="N 0[0-3], 1[4-8]"
*
* The first character "N" means following numbers are numa ID
*
* . NUMA allocators, CPU affinity threads are built over CPU partitions,
* instead of HW CPUs or HW nodes.
*
* . By default, Lustre modules should refer to the global cfs_cpt_table,
* instead of accessing HW CPUs directly, so concurrency of Lustre can be
* configured by cpu_npartitions of the global cfs_cpt_table
*
* . If cpu_npartitions=1(all CPUs in one pool), lustre should work the
* same way as 2.2 or earlier versions
*
* Author: liang@whamcloud.com
*/
#ifndef __LIBCFS_CPU_H__
#define __LIBCFS_CPU_H__
#ifndef HAVE_LIBCFS_CPT
typedef unsigned long cpumask_t;
typedef unsigned long nodemask_t;
struct cfs_cpt_table {
/* # of CPU partitions */
int ctb_nparts;
/* cpu mask */
cpumask_t ctb_mask;
/* node mask */
nodemask_t ctb_nodemask;
/* version */
__u64 ctb_version;
};
#endif /* !HAVE_LIBCFS_CPT */
/* any CPU partition */
#define CFS_CPT_ANY (-1)
extern struct cfs_cpt_table *cfs_cpt_table;
/**
* destroy a CPU partition table
*/
void cfs_cpt_table_free(struct cfs_cpt_table *cptab);
/**
* create a cfs_cpt_table with \a ncpt number of partitions
*/
struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt);
/**
* print string information of cpt-table
*/
int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len);
/**
* return total number of CPU partitions in \a cptab
*/
int
cfs_cpt_number(struct cfs_cpt_table *cptab);
/**
* return number of HW cores or hypter-threadings in a CPU partition \a cpt
*/
int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt);
/**
* is there any online CPU in CPU partition \a cpt
*/
int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt);
/**
* return cpumask of CPU partition \a cpt
*/
cpumask_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt);
/**
* return nodemask of CPU partition \a cpt
*/
nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt);
/**
* shadow current HW processor ID to CPU-partition ID of \a cptab
*/
int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap);
/**
* shadow HW processor ID \a CPU to CPU-partition ID by \a cptab
*/
int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu);
/**
* bind current thread on a CPU-partition \a cpt of \a cptab
*/
int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt);
/**
* add \a cpu to CPU partion @cpt of \a cptab, return 1 for success,
* otherwise 0 is returned
*/
int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
/**
* remove \a cpu from CPU partition \a cpt of \a cptab
*/
void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu);
/**
* add all cpus in \a mask to CPU partition \a cpt
* return 1 if successfully set all CPUs, otherwise return 0
*/
int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab,
int cpt, cpumask_t *mask);
/**
* remove all cpus in \a mask from CPU partition \a cpt
*/
void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab,
int cpt, cpumask_t *mask);
/**
* add all cpus in NUMA node \a node to CPU partition \a cpt
* return 1 if successfully set all CPUs, otherwise return 0
*/
int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node);
/**
* remove all cpus in NUMA node \a node from CPU partition \a cpt
*/
void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node);
/**
* add all cpus in node mask \a mask to CPU partition \a cpt
* return 1 if successfully set all CPUs, otherwise return 0
*/
int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab,
int cpt, nodemask_t *mask);
/**
* remove all cpus in node mask \a mask from CPU partition \a cpt
*/
void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab,
int cpt, nodemask_t *mask);
/**
* unset all cpus for CPU partition \a cpt
*/
void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt);
/**
* convert partition id \a cpt to numa node id, if there are more than one
* nodes in this partition, it might return a different node id each time.
*/
int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
/**
* iterate over all CPU partitions in \a cptab
*/
#define cfs_cpt_for_each(i, cptab) \
for (i = 0; i < cfs_cpt_number(cptab); i++)
#ifndef __read_mostly
# define __read_mostly
#endif
#ifndef ____cacheline_aligned
#define ____cacheline_aligned
#endif
int cfs_cpu_init(void);
void cfs_cpu_fini(void);
#endif /* __LIBCFS_CPU_H__ */

View file

@ -0,0 +1,201 @@
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*/
#ifndef _LIBCFS_CRYPTO_H
#define _LIBCFS_CRYPTO_H
struct cfs_crypto_hash_type {
char *cht_name; /**< hash algorithm name, equal to
* format name for crypto api */
unsigned int cht_key; /**< init key by default (vaild for
* 4 bytes context like crc32, adler */
unsigned int cht_size; /**< hash digest size */
};
enum cfs_crypto_hash_alg {
CFS_HASH_ALG_NULL = 0,
CFS_HASH_ALG_ADLER32,
CFS_HASH_ALG_CRC32,
CFS_HASH_ALG_MD5,
CFS_HASH_ALG_SHA1,
CFS_HASH_ALG_SHA256,
CFS_HASH_ALG_SHA384,
CFS_HASH_ALG_SHA512,
CFS_HASH_ALG_CRC32C,
CFS_HASH_ALG_MAX
};
static struct cfs_crypto_hash_type hash_types[] = {
[CFS_HASH_ALG_NULL] = { "null", 0, 0 },
[CFS_HASH_ALG_ADLER32] = { "adler32", 1, 4 },
[CFS_HASH_ALG_CRC32] = { "crc32", ~0, 4 },
[CFS_HASH_ALG_CRC32C] = { "crc32c", ~0, 4 },
[CFS_HASH_ALG_MD5] = { "md5", 0, 16 },
[CFS_HASH_ALG_SHA1] = { "sha1", 0, 20 },
[CFS_HASH_ALG_SHA256] = { "sha256", 0, 32 },
[CFS_HASH_ALG_SHA384] = { "sha384", 0, 48 },
[CFS_HASH_ALG_SHA512] = { "sha512", 0, 64 },
};
/** Return pointer to type of hash for valid hash algorithm identifier */
static inline const struct cfs_crypto_hash_type *
cfs_crypto_hash_type(unsigned char hash_alg)
{
struct cfs_crypto_hash_type *ht;
if (hash_alg < CFS_HASH_ALG_MAX) {
ht = &hash_types[hash_alg];
if (ht->cht_name)
return ht;
}
return NULL;
}
/** Return hash name for valid hash algorithm identifier or "unknown" */
static inline const char *cfs_crypto_hash_name(unsigned char hash_alg)
{
const struct cfs_crypto_hash_type *ht;
ht = cfs_crypto_hash_type(hash_alg);
if (ht)
return ht->cht_name;
else
return "unknown";
}
/** Return digest size for valid algorithm identifier or 0 */
static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg)
{
const struct cfs_crypto_hash_type *ht;
ht = cfs_crypto_hash_type(hash_alg);
if (ht)
return ht->cht_size;
else
return 0;
}
/** Return hash identifier for valid hash algorithm name or 0xFF */
static inline unsigned char cfs_crypto_hash_alg(const char *algname)
{
unsigned char i;
for (i = 0; i < CFS_HASH_ALG_MAX; i++)
if (!strcmp(hash_types[i].cht_name, algname))
break;
return (i == CFS_HASH_ALG_MAX ? 0xFF : i);
}
/** Calculate hash digest for buffer.
* @param alg id of hash algorithm
* @param buf buffer of data
* @param buf_len buffer len
* @param key initial value for algorithm, if it is NULL,
* default initial value should be used.
* @param key_len len of initial value
* @param hash [out] pointer to hash, if it is NULL, hash_len is
* set to valid digest size in bytes, retval -ENOSPC.
* @param hash_len [in,out] size of hash buffer
* @returns status of operation
* @retval -EINVAL if buf, buf_len, hash_len or alg_id is invalid
* @retval -ENODEV if this algorithm is unsupported
* @retval -ENOSPC if pointer to hash is NULL, or hash_len less than
* digest size
* @retval 0 for success
* @retval < 0 other errors from lower layers.
*/
int cfs_crypto_hash_digest(unsigned char alg,
const void *buf, unsigned int buf_len,
unsigned char *key, unsigned int key_len,
unsigned char *hash, unsigned int *hash_len);
/* cfs crypto hash descriptor */
struct cfs_crypto_hash_desc;
/** Allocate and initialize desriptor for hash algorithm.
* @param alg algorithm id
* @param key initial value for algorithm, if it is NULL,
* default initial value should be used.
* @param key_len len of initial value
* @returns pointer to descriptor of hash instance
* @retval ERR_PTR(error) when errors occured.
*/
struct cfs_crypto_hash_desc*
cfs_crypto_hash_init(unsigned char alg,
unsigned char *key, unsigned int key_len);
/** Update digest by part of data.
* @param desc hash descriptor
* @param page data page
* @param offset data offset
* @param len data len
* @returns status of operation
* @retval 0 for success.
*/
int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc,
struct page *page, unsigned int offset,
unsigned int len);
/** Update digest by part of data.
* @param desc hash descriptor
* @param buf pointer to data buffer
* @param buf_len size of data at buffer
* @returns status of operation
* @retval 0 for success.
*/
int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf,
unsigned int buf_len);
/** Finalize hash calculation, copy hash digest to buffer, destroy hash
* descriptor.
* @param desc hash descriptor
* @param hash buffer pointer to store hash digest
* @param hash_len pointer to hash buffer size, if NULL
* destory hash descriptor
* @returns status of operation
* @retval -ENOSPC if hash is NULL, or *hash_len less than
* digest size
* @retval 0 for success
* @retval < 0 other errors from lower layers.
*/
int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc,
unsigned char *hash, unsigned int *hash_len);
/**
* Register crypto hash algorithms
*/
int cfs_crypto_register(void);
/**
* Unregister
*/
void cfs_crypto_unregister(void);
/** Return hash speed in Mbytes per second for valid hash algorithm
* identifier. If test was unsuccessfull -1 would be return.
*/
int cfs_crypto_hash_speed(unsigned char hash_alg);
#endif

View file

@ -0,0 +1,350 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_debug.h
*
* Debug messages and assertions
*
*/
#ifndef __LIBCFS_DEBUG_H__
#define __LIBCFS_DEBUG_H__
/*
* Debugging
*/
extern unsigned int libcfs_subsystem_debug;
extern unsigned int libcfs_stack;
extern unsigned int libcfs_debug;
extern unsigned int libcfs_printk;
extern unsigned int libcfs_console_ratelimit;
extern unsigned int libcfs_watchdog_ratelimit;
extern unsigned int libcfs_console_max_delay;
extern unsigned int libcfs_console_min_delay;
extern unsigned int libcfs_console_backoff;
extern unsigned int libcfs_debug_binary;
extern char libcfs_debug_file_path_arr[PATH_MAX];
int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
/* Has there been an LBUG? */
extern unsigned int libcfs_catastrophe;
extern unsigned int libcfs_panic_on_lbug;
/**
* Format for debug message headers
*/
struct ptldebug_header {
__u32 ph_len;
__u32 ph_flags;
__u32 ph_subsys;
__u32 ph_mask;
__u16 ph_cpu_id;
__u16 ph_type;
__u32 ph_sec;
__u64 ph_usec;
__u32 ph_stack;
__u32 ph_pid;
__u32 ph_extern_pid;
__u32 ph_line_num;
} __attribute__((packed));
#define PH_FLAG_FIRST_RECORD 1
/* Debugging subsystems (32 bits, non-overlapping) */
/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
#define S_UNDEFINED 0x00000001
#define S_MDC 0x00000002
#define S_MDS 0x00000004
#define S_OSC 0x00000008
#define S_OST 0x00000010
#define S_CLASS 0x00000020
#define S_LOG 0x00000040
#define S_LLITE 0x00000080
#define S_RPC 0x00000100
#define S_MGMT 0x00000200
#define S_LNET 0x00000400
#define S_LND 0x00000800 /* ALL LNDs */
#define S_PINGER 0x00001000
#define S_FILTER 0x00002000
/* unused */
#define S_ECHO 0x00008000
#define S_LDLM 0x00010000
#define S_LOV 0x00020000
#define S_LQUOTA 0x00040000
#define S_OSD 0x00080000
/* unused */
/* unused */
/* unused */
#define S_LMV 0x00800000 /* b_new_cmd */
/* unused */
#define S_SEC 0x02000000 /* upcall cache */
#define S_GSS 0x04000000 /* b_new_cmd */
/* unused */
#define S_MGC 0x10000000
#define S_MGS 0x20000000
#define S_FID 0x40000000 /* b_new_cmd */
#define S_FLD 0x80000000 /* b_new_cmd */
/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
/* Debugging masks (32 bits, non-overlapping) */
/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
#define D_INODE 0x00000002
#define D_SUPER 0x00000004
#define D_EXT2 0x00000008 /* anything from ext2_debug */
#define D_MALLOC 0x00000010 /* print malloc, free information */
#define D_CACHE 0x00000020 /* cache-related items */
#define D_INFO 0x00000040 /* general information */
#define D_IOCTL 0x00000080 /* ioctl related information */
#define D_NETERROR 0x00000100 /* network errors */
#define D_NET 0x00000200 /* network communications */
#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
#define D_BUFFS 0x00000800
#define D_OTHER 0x00001000
#define D_DENTRY 0x00002000
#define D_NETTRACE 0x00004000
#define D_PAGE 0x00008000 /* bulk page handling */
#define D_DLMTRACE 0x00010000
#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
#define D_HA 0x00080000 /* recovery and failover */
#define D_RPCTRACE 0x00100000 /* for distributed debugging */
#define D_VFSTRACE 0x00200000
#define D_READA 0x00400000 /* read-ahead */
#define D_MMAP 0x00800000
#define D_CONFIG 0x01000000
#define D_CONSOLE 0x02000000
#define D_QUOTA 0x04000000
#define D_SEC 0x08000000
#define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */
/* keep these in sync with lnet/{utils,libcfs}/debug.c */
#define D_HSM D_TRACE
#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
#ifndef DEBUG_SUBSYSTEM
# define DEBUG_SUBSYSTEM S_UNDEFINED
#endif
#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */
#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */
#define CDEBUG_DEFAULT_BACKOFF 2
typedef struct {
cfs_time_t cdls_next;
unsigned int cdls_delay;
int cdls_count;
} cfs_debug_limit_state_t;
struct libcfs_debug_msg_data {
const char *msg_file;
const char *msg_fn;
int msg_subsys;
int msg_line;
int msg_mask;
cfs_debug_limit_state_t *msg_cdls;
};
#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls) \
do { \
(data)->msg_subsys = DEBUG_SUBSYSTEM; \
(data)->msg_file = __FILE__; \
(data)->msg_fn = __FUNCTION__; \
(data)->msg_line = __LINE__; \
(data)->msg_cdls = (cdls); \
(data)->msg_mask = (mask); \
} while (0)
#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls) \
static struct libcfs_debug_msg_data dataname = { \
.msg_subsys = DEBUG_SUBSYSTEM, \
.msg_file = __FILE__, \
.msg_fn = __FUNCTION__, \
.msg_line = __LINE__, \
.msg_cdls = (cdls) }; \
dataname.msg_mask = (mask);
/**
* Filters out logging messages based on mask and subsystem.
*/
static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
{
return mask & D_CANTMASK ||
((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
}
#define __CDEBUG(cdls, mask, format, ...) \
do { \
static struct libcfs_debug_msg_data msgdata; \
\
CFS_CHECK_STACK(&msgdata, mask, cdls); \
\
if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \
LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls); \
libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__); \
} \
} while (0)
#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
#define CDEBUG_LIMIT(mask, format, ...) \
do { \
static cfs_debug_limit_state_t cdls; \
\
__CDEBUG(&cdls, mask, format, ## __VA_ARGS__);\
} while (0)
#define CWARN(format, ...) CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
#define CERROR(format, ...) CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
#define CNETERR(format, a...) CDEBUG_LIMIT(D_NETERROR, format, ## a)
#define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
#define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
#define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
"%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
void libcfs_log_goto(struct libcfs_debug_msg_data *, const char *, long_ptr_t);
#define GOTO(label, rc) \
do { \
if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \
libcfs_log_goto(&msgdata, #label, (long_ptr_t)(rc)); \
} else { \
(void)(rc); \
} \
goto label; \
} while (0)
/*
* if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
* there will be a warning in osx.
*/
#if defined(__GNUC__)
long libcfs_log_return(struct libcfs_debug_msg_data *, long rc);
#if BITS_PER_LONG > 32
#define RETURN(rc) \
do { \
EXIT_NESTING; \
if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \
return (typeof(rc))libcfs_log_return(&msgdata, \
(long)(rc)); \
} \
\
return (rc); \
} while (0)
#else /* BITS_PER_LONG == 32 */
/* We need an on-stack variable, because we cannot case a 32-bit pointer
* directly to (long long) without generating a complier warning/error, yet
* casting directly to (long) will truncate 64-bit return values. The log
* values will print as 32-bit values, but they always have been. LU-1436
*/
#define RETURN(rc) \
do { \
EXIT_NESTING; \
if (cfs_cdebug_show(D_TRACE, DEBUG_SUBSYSTEM)) { \
typeof(rc) __rc = (rc); \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_TRACE, NULL); \
libcfs_log_return(&msgdata, (long_ptr_t)__rc); \
return __rc; \
} \
\
return (rc); \
} while (0)
#endif /* BITS_PER_LONG > 32 */
#elif defined(_MSC_VER)
#define RETURN(rc) \
do { \
CDEBUG(D_TRACE, "Process leaving.\n"); \
EXIT_NESTING; \
return (rc); \
} while (0)
#else
# error "Unkown compiler"
#endif /* __GNUC__ */
#define ENTRY \
ENTRY_NESTING; \
do { \
CDEBUG(D_TRACE, "Process entered\n"); \
} while (0)
#define EXIT \
do { \
CDEBUG(D_TRACE, "Process leaving\n"); \
EXIT_NESTING; \
} while(0)
#define RETURN_EXIT \
do { \
EXIT; \
return; \
} while (0)
extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
const char *format1, ...)
__attribute__ ((format (printf, 2, 3)));
extern int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
const char *format1,
va_list args, const char *format2, ...)
__attribute__ ((format (printf, 4, 5)));
/* other external symbols that tracefile provides: */
extern int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
const char *usr_buffer, int usr_buffer_nob);
extern int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
const char *knl_buffer, char *append);
#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
#endif /* __LIBCFS_DEBUG_H__ */

View file

@ -0,0 +1,170 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please contact Oracle Corporation, Inc., 500 Oracle Parkway, Redwood Shores,
* CA 94065 USA or visit www.oracle.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Oracle Corporation, Inc.
*/
#ifndef _LIBCFS_FAIL_H
#define _LIBCFS_FAIL_H
extern unsigned long cfs_fail_loc;
extern unsigned int cfs_fail_val;
extern wait_queue_head_t cfs_race_waitq;
extern int cfs_race_state;
int __cfs_fail_check_set(__u32 id, __u32 value, int set);
int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set);
enum {
CFS_FAIL_LOC_NOSET = 0,
CFS_FAIL_LOC_ORSET = 1,
CFS_FAIL_LOC_RESET = 2,
CFS_FAIL_LOC_VALUE = 3
};
/* Failure injection control */
#define CFS_FAIL_MASK_SYS 0x0000FF00
#define CFS_FAIL_MASK_LOC (0x000000FF | CFS_FAIL_MASK_SYS)
#define CFS_FAILED_BIT 30
/* CFS_FAILED is 0x40000000 */
#define CFS_FAILED (1 << CFS_FAILED_BIT)
#define CFS_FAIL_ONCE_BIT 31
/* CFS_FAIL_ONCE is 0x80000000 */
#define CFS_FAIL_ONCE (1 << CFS_FAIL_ONCE_BIT)
/* The following flags aren't made to be combined */
#define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */
#define CFS_FAIL_SOME 0x10000000 /* only fail N times */
#define CFS_FAIL_RAND 0x08000000 /* fail 1/N of the times */
#define CFS_FAIL_USR1 0x04000000 /* user flag */
#define CFS_FAIL_PRECHECK(id) (cfs_fail_loc && \
(cfs_fail_loc & CFS_FAIL_MASK_LOC) == \
((id) & CFS_FAIL_MASK_LOC))
static inline int cfs_fail_check_set(__u32 id, __u32 value,
int set, int quiet)
{
int ret = 0;
if (unlikely(CFS_FAIL_PRECHECK(id) &&
(ret = __cfs_fail_check_set(id, value, set)))) {
if (quiet) {
CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n",
id, value);
} else {
LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n",
id, value);
}
}
return ret;
}
/* If id hit cfs_fail_loc, return 1, otherwise return 0 */
#define CFS_FAIL_CHECK(id) \
cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0)
#define CFS_FAIL_CHECK_QUIET(id) \
cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1)
/* If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1,
* otherwise return 0 */
#define CFS_FAIL_CHECK_VALUE(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0)
#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1)
/* If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1,
* otherwise return 0 */
#define CFS_FAIL_CHECK_ORSET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0)
#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1)
/* If id hit cfs_fail_loc, cfs_fail_loc = value and return 1,
* otherwise return 0 */
#define CFS_FAIL_CHECK_RESET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0)
#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \
cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1)
static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
{
if (unlikely(CFS_FAIL_PRECHECK(id)))
return __cfs_fail_timeout_set(id, value, ms, set);
else
return 0;
}
/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */
#define CFS_FAIL_TIMEOUT(id, secs) \
cfs_fail_timeout_set(id, 0, secs * 1000, CFS_FAIL_LOC_NOSET)
#define CFS_FAIL_TIMEOUT_MS(id, ms) \
cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET)
/* If id hit cfs_fail_loc, cfs_fail_loc |= value and
* sleep seconds or milliseconds */
#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET)
#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
/* The idea here is to synchronise two threads to force a race. The
* first thread that calls this with a matching fail_loc is put to
* sleep. The next thread that calls with the same fail_loc wakes up
* the first and continues. */
static inline void cfs_race(__u32 id)
{
if (CFS_FAIL_PRECHECK(id)) {
if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) {
int rc;
cfs_race_state = 0;
CERROR("cfs_race id %x sleeping\n", id);
cfs_wait_event_interruptible(cfs_race_waitq,
cfs_race_state != 0, rc);
CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc);
} else {
CERROR("cfs_fail_race id %x waking\n", id);
cfs_race_state = 1;
wake_up(&cfs_race_waitq);
}
}
}
#define CFS_RACE(id) cfs_race(id)
#endif /* _LIBCFS_FAIL_H */

View file

@ -0,0 +1,850 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_hash.h
*
* Hashing routines
*
*/
#ifndef __LIBCFS_HASH_H__
#define __LIBCFS_HASH_H__
/*
* Knuth recommends primes in approximately golden ratio to the maximum
* integer representable by a machine word for multiplicative hashing.
* Chuck Lever verified the effectiveness of this technique:
* http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
*
* These primes are chosen to be bit-sparse, that is operations on
* them can use shifts and additions instead of multiplications for
* machines where multiplications are slow.
*/
/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
/*
* Ideally we would use HAVE_HASH_LONG for this, but on linux we configure
* the linux kernel and user space at the same time, so we need to differentiate
* between them explicitely. If this is not needed on other architectures, then
* we'll need to move the functions to archi specific headers.
*/
#include <linux/hash.h>
#define cfs_hash_long(val, bits) hash_long(val, bits)
/** disable debug */
#define CFS_HASH_DEBUG_NONE 0
/** record hash depth and output to console when it's too deep,
* computing overhead is low but consume more memory */
#define CFS_HASH_DEBUG_1 1
/** expensive, check key validation */
#define CFS_HASH_DEBUG_2 2
#define CFS_HASH_DEBUG_LEVEL CFS_HASH_DEBUG_NONE
struct cfs_hash_ops;
struct cfs_hash_lock_ops;
struct cfs_hash_hlist_ops;
typedef union {
rwlock_t rw; /**< rwlock */
spinlock_t spin; /**< spinlock */
} cfs_hash_lock_t;
/**
* cfs_hash_bucket is a container of:
* - lock, couter ...
* - array of hash-head starting from hsb_head[0], hash-head can be one of
* . cfs_hash_head_t
* . cfs_hash_head_dep_t
* . cfs_hash_dhead_t
* . cfs_hash_dhead_dep_t
* which depends on requirement of user
* - some extra bytes (caller can require it while creating hash)
*/
typedef struct cfs_hash_bucket {
cfs_hash_lock_t hsb_lock; /**< bucket lock */
__u32 hsb_count; /**< current entries */
__u32 hsb_version; /**< change version */
unsigned int hsb_index; /**< index of bucket */
int hsb_depmax; /**< max depth on bucket */
long hsb_head[0]; /**< hash-head array */
} cfs_hash_bucket_t;
/**
* cfs_hash bucket descriptor, it's normally in stack of caller
*/
typedef struct cfs_hash_bd {
cfs_hash_bucket_t *bd_bucket; /**< address of bucket */
unsigned int bd_offset; /**< offset in bucket */
} cfs_hash_bd_t;
#define CFS_HASH_NAME_LEN 16 /**< default name length */
#define CFS_HASH_BIGNAME_LEN 64 /**< bigname for param tree */
#define CFS_HASH_BKT_BITS 3 /**< default bits of bucket */
#define CFS_HASH_BITS_MAX 30 /**< max bits of bucket */
#define CFS_HASH_BITS_MIN CFS_HASH_BKT_BITS
/**
* common hash attributes.
*/
enum cfs_hash_tag {
/**
* don't need any lock, caller will protect operations with it's
* own lock. With this flag:
* . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
* will be ignored.
* . Some functions will be disabled with this flag, i.e:
* cfs_hash_for_each_empty, cfs_hash_rehash
*/
CFS_HASH_NO_LOCK = 1 << 0,
/** no bucket lock, use one spinlock to protect the whole hash */
CFS_HASH_NO_BKTLOCK = 1 << 1,
/** rwlock to protect bucket */
CFS_HASH_RW_BKTLOCK = 1 << 2,
/** spinlcok to protect bucket */
CFS_HASH_SPIN_BKTLOCK = 1 << 3,
/** always add new item to tail */
CFS_HASH_ADD_TAIL = 1 << 4,
/** hash-table doesn't have refcount on item */
CFS_HASH_NO_ITEMREF = 1 << 5,
/** big name for param-tree */
CFS_HASH_BIGNAME = 1 << 6,
/** track global count */
CFS_HASH_COUNTER = 1 << 7,
/** rehash item by new key */
CFS_HASH_REHASH_KEY = 1 << 8,
/** Enable dynamic hash resizing */
CFS_HASH_REHASH = 1 << 9,
/** can shrink hash-size */
CFS_HASH_SHRINK = 1 << 10,
/** assert hash is empty on exit */
CFS_HASH_ASSERT_EMPTY = 1 << 11,
/** record hlist depth */
CFS_HASH_DEPTH = 1 << 12,
/**
* rehash is always scheduled in a different thread, so current
* change on hash table is non-blocking
*/
CFS_HASH_NBLK_CHANGE = 1 << 13,
/** NB, we typed hs_flags as __u16, please change it
* if you need to extend >=16 flags */
};
/** most used attributes */
#define CFS_HASH_DEFAULT (CFS_HASH_RW_BKTLOCK | \
CFS_HASH_COUNTER | CFS_HASH_REHASH)
/**
* cfs_hash is a hash-table implementation for general purpose, it can support:
* . two refcount modes
* hash-table with & without refcount
* . four lock modes
* nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
* . general operations
* lookup, add(add_tail or add_head), delete
* . rehash
* grows or shrink
* . iteration
* locked iteration and unlocked iteration
* . bigname
* support long name hash
* . debug
* trace max searching depth
*
* Rehash:
* When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
* is spawned to handle the rehash in the background, it's possible that other
* processes can concurrently perform additions, deletions, and lookups
* without being blocked on rehash completion, because rehash will release
* the global wrlock for each bucket.
*
* rehash and iteration can't run at the same time because it's too tricky
* to keep both of them safe and correct.
* As they are relatively rare operations, so:
* . if iteration is in progress while we try to launch rehash, then
* it just giveup, iterator will launch rehash at the end.
* . if rehash is in progress while we try to iterate the hash table,
* then we just wait (shouldn't be very long time), anyway, nobody
* should expect iteration of whole hash-table to be non-blocking.
*
* During rehashing, a (key,object) pair may be in one of two buckets,
* depending on whether the worker task has yet to transfer the object
* to its new location in the table. Lookups and deletions need to search both
* locations; additions must take care to only insert into the new bucket.
*/
typedef struct cfs_hash {
/** serialize with rehash, or serialize all operations if
* the hash-table has CFS_HASH_NO_BKTLOCK */
cfs_hash_lock_t hs_lock;
/** hash operations */
struct cfs_hash_ops *hs_ops;
/** hash lock operations */
struct cfs_hash_lock_ops *hs_lops;
/** hash list operations */
struct cfs_hash_hlist_ops *hs_hops;
/** hash buckets-table */
cfs_hash_bucket_t **hs_buckets;
/** total number of items on this hash-table */
atomic_t hs_count;
/** hash flags, see cfs_hash_tag for detail */
__u16 hs_flags;
/** # of extra-bytes for bucket, for user saving extended attributes */
__u16 hs_extra_bytes;
/** wants to iterate */
__u8 hs_iterating;
/** hash-table is dying */
__u8 hs_exiting;
/** current hash bits */
__u8 hs_cur_bits;
/** min hash bits */
__u8 hs_min_bits;
/** max hash bits */
__u8 hs_max_bits;
/** bits for rehash */
__u8 hs_rehash_bits;
/** bits for each bucket */
__u8 hs_bkt_bits;
/** resize min threshold */
__u16 hs_min_theta;
/** resize max threshold */
__u16 hs_max_theta;
/** resize count */
__u32 hs_rehash_count;
/** # of iterators (caller of cfs_hash_for_each_*) */
__u32 hs_iterators;
/** rehash workitem */
cfs_workitem_t hs_rehash_wi;
/** refcount on this hash table */
atomic_t hs_refcount;
/** rehash buckets-table */
cfs_hash_bucket_t **hs_rehash_buckets;
#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
/** serialize debug members */
spinlock_t hs_dep_lock;
/** max depth */
unsigned int hs_dep_max;
/** id of the deepest bucket */
unsigned int hs_dep_bkt;
/** offset in the deepest bucket */
unsigned int hs_dep_off;
/** bits when we found the max depth */
unsigned int hs_dep_bits;
/** workitem to output max depth */
cfs_workitem_t hs_dep_wi;
#endif
/** name of htable */
char hs_name[0];
} cfs_hash_t;
typedef struct cfs_hash_lock_ops {
/** lock the hash table */
void (*hs_lock)(cfs_hash_lock_t *lock, int exclusive);
/** unlock the hash table */
void (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive);
/** lock the hash bucket */
void (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive);
/** unlock the hash bucket */
void (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive);
} cfs_hash_lock_ops_t;
typedef struct cfs_hash_hlist_ops {
/** return hlist_head of hash-head of @bd */
struct hlist_head *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd);
/** return hash-head size */
int (*hop_hhead_size)(cfs_hash_t *hs);
/** add @hnode to hash-head of @bd */
int (*hop_hnode_add)(cfs_hash_t *hs,
cfs_hash_bd_t *bd, struct hlist_node *hnode);
/** remove @hnode from hash-head of @bd */
int (*hop_hnode_del)(cfs_hash_t *hs,
cfs_hash_bd_t *bd, struct hlist_node *hnode);
} cfs_hash_hlist_ops_t;
typedef struct cfs_hash_ops {
/** return hashed value from @key */
unsigned (*hs_hash)(cfs_hash_t *hs, const void *key, unsigned mask);
/** return key address of @hnode */
void * (*hs_key)(struct hlist_node *hnode);
/** copy key from @hnode to @key */
void (*hs_keycpy)(struct hlist_node *hnode, void *key);
/**
* compare @key with key of @hnode
* returns 1 on a match
*/
int (*hs_keycmp)(const void *key, struct hlist_node *hnode);
/** return object address of @hnode, i.e: container_of(...hnode) */
void * (*hs_object)(struct hlist_node *hnode);
/** get refcount of item, always called with holding bucket-lock */
void (*hs_get)(cfs_hash_t *hs, struct hlist_node *hnode);
/** release refcount of item */
void (*hs_put)(cfs_hash_t *hs, struct hlist_node *hnode);
/** release refcount of item, always called with holding bucket-lock */
void (*hs_put_locked)(cfs_hash_t *hs, struct hlist_node *hnode);
/** it's called before removing of @hnode */
void (*hs_exit)(cfs_hash_t *hs, struct hlist_node *hnode);
} cfs_hash_ops_t;
/** total number of buckets in @hs */
#define CFS_HASH_NBKT(hs) \
(1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits))
/** total number of buckets in @hs while rehashing */
#define CFS_HASH_RH_NBKT(hs) \
(1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits))
/** number of hlist for in bucket */
#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits)
/** total number of hlist in @hs */
#define CFS_HASH_NHLIST(hs) (1U << (hs)->hs_cur_bits)
/** total number of hlist in @hs while rehashing */
#define CFS_HASH_RH_NHLIST(hs) (1U << (hs)->hs_rehash_bits)
static inline int
cfs_hash_with_no_lock(cfs_hash_t *hs)
{
/* caller will serialize all operations for this hash-table */
return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0;
}
static inline int
cfs_hash_with_no_bktlock(cfs_hash_t *hs)
{
/* no bucket lock, one single lock to protect the hash-table */
return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0;
}
static inline int
cfs_hash_with_rw_bktlock(cfs_hash_t *hs)
{
/* rwlock to protect hash bucket */
return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0;
}
static inline int
cfs_hash_with_spin_bktlock(cfs_hash_t *hs)
{
/* spinlock to protect hash bucket */
return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0;
}
static inline int
cfs_hash_with_add_tail(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0;
}
static inline int
cfs_hash_with_no_itemref(cfs_hash_t *hs)
{
/* hash-table doesn't keep refcount on item,
* item can't be removed from hash unless it's
* ZERO refcount */
return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0;
}
static inline int
cfs_hash_with_bigname(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_BIGNAME) != 0;
}
static inline int
cfs_hash_with_counter(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_COUNTER) != 0;
}
static inline int
cfs_hash_with_rehash(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_REHASH) != 0;
}
static inline int
cfs_hash_with_rehash_key(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0;
}
static inline int
cfs_hash_with_shrink(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_SHRINK) != 0;
}
static inline int
cfs_hash_with_assert_empty(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0;
}
static inline int
cfs_hash_with_depth(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_DEPTH) != 0;
}
static inline int
cfs_hash_with_nblk_change(cfs_hash_t *hs)
{
return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0;
}
static inline int
cfs_hash_is_exiting(cfs_hash_t *hs)
{ /* cfs_hash_destroy is called */
return hs->hs_exiting;
}
static inline int
cfs_hash_is_rehashing(cfs_hash_t *hs)
{ /* rehash is launched */
return hs->hs_rehash_bits != 0;
}
static inline int
cfs_hash_is_iterating(cfs_hash_t *hs)
{ /* someone is calling cfs_hash_for_each_* */
return hs->hs_iterating || hs->hs_iterators != 0;
}
static inline int
cfs_hash_bkt_size(cfs_hash_t *hs)
{
return offsetof(cfs_hash_bucket_t, hsb_head[0]) +
hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
hs->hs_extra_bytes;
}
#define CFS_HOP(hs, op) (hs)->hs_ops->hs_ ## op
static inline unsigned
cfs_hash_id(cfs_hash_t *hs, const void *key, unsigned mask)
{
return CFS_HOP(hs, hash)(hs, key, mask);
}
static inline void *
cfs_hash_key(cfs_hash_t *hs, struct hlist_node *hnode)
{
return CFS_HOP(hs, key)(hnode);
}
static inline void
cfs_hash_keycpy(cfs_hash_t *hs, struct hlist_node *hnode, void *key)
{
if (CFS_HOP(hs, keycpy) != NULL)
CFS_HOP(hs, keycpy)(hnode, key);
}
/**
* Returns 1 on a match,
*/
static inline int
cfs_hash_keycmp(cfs_hash_t *hs, const void *key, struct hlist_node *hnode)
{
return CFS_HOP(hs, keycmp)(key, hnode);
}
static inline void *
cfs_hash_object(cfs_hash_t *hs, struct hlist_node *hnode)
{
return CFS_HOP(hs, object)(hnode);
}
static inline void
cfs_hash_get(cfs_hash_t *hs, struct hlist_node *hnode)
{
return CFS_HOP(hs, get)(hs, hnode);
}
static inline void
cfs_hash_put_locked(cfs_hash_t *hs, struct hlist_node *hnode)
{
LASSERT(CFS_HOP(hs, put_locked) != NULL);
return CFS_HOP(hs, put_locked)(hs, hnode);
}
static inline void
cfs_hash_put(cfs_hash_t *hs, struct hlist_node *hnode)
{
LASSERT(CFS_HOP(hs, put) != NULL);
return CFS_HOP(hs, put)(hs, hnode);
}
static inline void
cfs_hash_exit(cfs_hash_t *hs, struct hlist_node *hnode)
{
if (CFS_HOP(hs, exit))
CFS_HOP(hs, exit)(hs, hnode);
}
static inline void cfs_hash_lock(cfs_hash_t *hs, int excl)
{
hs->hs_lops->hs_lock(&hs->hs_lock, excl);
}
static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl)
{
hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
}
static inline int cfs_hash_dec_and_lock(cfs_hash_t *hs,
atomic_t *condition)
{
LASSERT(cfs_hash_with_no_bktlock(hs));
return atomic_dec_and_lock(condition, &hs->hs_lock.spin);
}
static inline void cfs_hash_bd_lock(cfs_hash_t *hs,
cfs_hash_bd_t *bd, int excl)
{
hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
}
static inline void cfs_hash_bd_unlock(cfs_hash_t *hs,
cfs_hash_bd_t *bd, int excl)
{
hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
}
/**
* operations on cfs_hash bucket (bd: bucket descriptor),
* they are normally for hash-table without rehash
*/
void cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd);
static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, const void *key,
cfs_hash_bd_t *bd, int excl)
{
cfs_hash_bd_get(hs, key, bd);
cfs_hash_bd_lock(hs, bd, excl);
}
static inline unsigned cfs_hash_bd_index_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
{
return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
}
static inline void cfs_hash_bd_index_set(cfs_hash_t *hs,
unsigned index, cfs_hash_bd_t *bd)
{
bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
}
static inline void *
cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
{
return (void *)bd->bd_bucket +
cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
}
static inline __u32
cfs_hash_bd_version_get(cfs_hash_bd_t *bd)
{
/* need hold cfs_hash_bd_lock */
return bd->bd_bucket->hsb_version;
}
static inline __u32
cfs_hash_bd_count_get(cfs_hash_bd_t *bd)
{
/* need hold cfs_hash_bd_lock */
return bd->bd_bucket->hsb_count;
}
static inline int
cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd)
{
return bd->bd_bucket->hsb_depmax;
}
static inline int
cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2)
{
if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
if (bd1->bd_offset != bd2->bd_offset)
return bd1->bd_offset - bd2->bd_offset;
return 0;
}
void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
struct hlist_node *hnode);
void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
struct hlist_node *hnode);
void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old,
cfs_hash_bd_t *bd_new, struct hlist_node *hnode);
static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd,
atomic_t *condition)
{
LASSERT(cfs_hash_with_spin_bktlock(hs));
return atomic_dec_and_lock(condition,
&bd->bd_bucket->hsb_lock.spin);
}
static inline struct hlist_head *cfs_hash_bd_hhead(cfs_hash_t *hs,
cfs_hash_bd_t *bd)
{
return hs->hs_hops->hop_hhead(hs, bd);
}
struct hlist_node *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bd, const void *key);
struct hlist_node *cfs_hash_bd_peek_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bd, const void *key);
struct hlist_node *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bd, const void *key,
struct hlist_node *hnode,
int insist_add);
struct hlist_node *cfs_hash_bd_finddel_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bd, const void *key,
struct hlist_node *hnode);
/**
* operations on cfs_hash bucket (bd: bucket descriptor),
* they are safe for hash-table with rehash
*/
void cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds);
void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, const void *key,
cfs_hash_bd_t *bds, int excl)
{
cfs_hash_dual_bd_get(hs, key, bds);
cfs_hash_dual_bd_lock(hs, bds, excl);
}
struct hlist_node *cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bds,
const void *key);
struct hlist_node *cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bds,
const void *key,
struct hlist_node *hnode,
int insist_add);
struct hlist_node *cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs,
cfs_hash_bd_t *bds,
const void *key,
struct hlist_node *hnode);
/* Hash init/cleanup functions */
cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
unsigned bkt_bits, unsigned extra_bytes,
unsigned min_theta, unsigned max_theta,
cfs_hash_ops_t *ops, unsigned flags);
cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs);
void cfs_hash_putref(cfs_hash_t *hs);
/* Hash addition functions */
void cfs_hash_add(cfs_hash_t *hs, const void *key,
struct hlist_node *hnode);
int cfs_hash_add_unique(cfs_hash_t *hs, const void *key,
struct hlist_node *hnode);
void *cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key,
struct hlist_node *hnode);
/* Hash deletion functions */
void *cfs_hash_del(cfs_hash_t *hs, const void *key, struct hlist_node *hnode);
void *cfs_hash_del_key(cfs_hash_t *hs, const void *key);
/* Hash lookup/for_each functions */
#define CFS_HASH_LOOP_HOG 1024
typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd,
struct hlist_node *node, void *data);
void *cfs_hash_lookup(cfs_hash_t *hs, const void *key);
void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
int cfs_hash_for_each_nolock(cfs_hash_t *hs,
cfs_hash_for_each_cb_t, void *data);
int cfs_hash_for_each_empty(cfs_hash_t *hs,
cfs_hash_for_each_cb_t, void *data);
void cfs_hash_for_each_key(cfs_hash_t *hs, const void *key,
cfs_hash_for_each_cb_t, void *data);
typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data);
void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex,
cfs_hash_for_each_cb_t, void *data);
int cfs_hash_is_empty(cfs_hash_t *hs);
__u64 cfs_hash_size_get(cfs_hash_t *hs);
/*
* Rehash - Theta is calculated to be the average chained
* hash depth assuming a perfectly uniform hash funcion.
*/
void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs);
void cfs_hash_rehash_cancel(cfs_hash_t *hs);
int cfs_hash_rehash(cfs_hash_t *hs, int do_rehash);
void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key,
void *new_key, struct hlist_node *hnode);
#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
/* Validate hnode references the correct key */
static inline void
cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
struct hlist_node *hnode)
{
LASSERT(cfs_hash_keycmp(hs, key, hnode));
}
/* Validate hnode is in the correct bucket */
static inline void
cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
struct hlist_node *hnode)
{
cfs_hash_bd_t bds[2];
cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
bds[1].bd_bucket == bd->bd_bucket);
}
#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
static inline void
cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
struct hlist_node *hnode) {}
static inline void
cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
struct hlist_node *hnode) {}
#endif /* CFS_HASH_DEBUG_LEVEL */
#define CFS_HASH_THETA_BITS 10
#define CFS_HASH_MIN_THETA (1U << (CFS_HASH_THETA_BITS - 1))
#define CFS_HASH_MAX_THETA (1U << (CFS_HASH_THETA_BITS + 1))
/* Return integer component of theta */
static inline int __cfs_hash_theta_int(int theta)
{
return (theta >> CFS_HASH_THETA_BITS);
}
/* Return a fractional value between 0 and 999 */
static inline int __cfs_hash_theta_frac(int theta)
{
return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
(__cfs_hash_theta_int(theta) * 1000);
}
static inline int __cfs_hash_theta(cfs_hash_t *hs)
{
return (atomic_read(&hs->hs_count) <<
CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
}
static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max)
{
LASSERT(min < max);
hs->hs_min_theta = (__u16)min;
hs->hs_max_theta = (__u16)max;
}
/* Generic debug formatting routines mainly for proc handler */
int cfs_hash_debug_header(char *str, int size);
int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size);
/*
* Generic djb2 hash algorithm for character arrays.
*/
static inline unsigned
cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask)
{
unsigned i, hash = 5381;
LASSERT(key != NULL);
for (i = 0; i < size; i++)
hash = hash * 33 + ((char *)key)[i];
return (hash & mask);
}
/*
* Generic u32 hash algorithm.
*/
static inline unsigned
cfs_hash_u32_hash(const __u32 key, unsigned mask)
{
return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
}
/*
* Generic u64 hash algorithm.
*/
static inline unsigned
cfs_hash_u64_hash(const __u64 key, unsigned mask)
{
return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
}
/** iterate over all buckets in @bds (array of cfs_hash_bd_t) */
#define cfs_hash_for_each_bd(bds, n, i) \
for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
/** iterate over all buckets of @hs */
#define cfs_hash_for_each_bucket(hs, bd, pos) \
for (pos = 0; \
pos < CFS_HASH_NBKT(hs) && \
((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
/** iterate over all hlist of bucket @bd */
#define cfs_hash_bd_for_each_hlist(hs, bd, hlist) \
for ((bd)->bd_offset = 0; \
(bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) && \
(hlist = cfs_hash_bd_hhead(hs, bd)) != NULL; \
(bd)->bd_offset++)
/* !__LIBCFS__HASH_H__ */
#endif

View file

@ -0,0 +1,200 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License version 2 for more details. A copy is
* included in the COPYING file that accompanied this code.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* GPL HEADER END
*/
/*
* Copyright (c) 2011 Intel Corporation
*/
/*
* libcfs/include/libcfs/heap.h
*
* Author: Eric Barton <eeb@whamcloud.com>
* Liang Zhen <liang@whamcloud.com>
*/
#ifndef __LIBCFS_HEAP_H__
#define __LIBCFS_HEAP_H__
/** \defgroup heap Binary heap
*
* The binary heap is a scalable data structure created using a binary tree. It
* is capable of maintaining large sets of elements sorted usually by one or
* more element properties, but really based on anything that can be used as a
* binary predicate in order to determine the relevant ordering of any two nodes
* that belong to the set. There is no search operation, rather the intention is
* for the element of the lowest priority which will always be at the root of
* the tree (as this is an implementation of a min-heap) to be removed by users
* for consumption.
*
* Users of the heap should embed a \e cfs_binheap_node_t object instance on
* every object of the set that they wish the binary heap instance to handle,
* and (at a minimum) provide a cfs_binheap_ops_t::hop_compare() implementation
* which is used by the heap as the binary predicate during its internal sorting
* operations.
*
* The current implementation enforces no locking scheme, and so assumes the
* user caters for locking between calls to insert, delete and lookup
* operations. Since the only consumer for the data structure at this point
* are NRS policies, and these operate on a per-CPT basis, binary heap instances
* are tied to a specific CPT.
* @{
*/
/**
* Binary heap node.
*
* Objects of this type are embedded into objects of the ordered set that is to
* be maintained by a \e cfs_binheap_t instance.
*/
typedef struct {
/** Index into the binary tree */
unsigned int chn_index;
} cfs_binheap_node_t;
#define CBH_SHIFT 9
#define CBH_SIZE (1 << CBH_SHIFT) /* # ptrs per level */
#define CBH_MASK (CBH_SIZE - 1)
#define CBH_NOB (CBH_SIZE * sizeof(cfs_binheap_node_t *))
#define CBH_POISON 0xdeadbeef
/**
* Binary heap flags.
*/
enum {
CBH_FLAG_ATOMIC_GROW = 1,
};
struct cfs_binheap;
/**
* Binary heap operations.
*/
typedef struct {
/**
* Called right before inserting a node into the binary heap.
*
* Implementing this operation is optional.
*
* \param[in] h The heap
* \param[in] e The node
*
* \retval 0 success
* \retval != 0 error
*/
int (*hop_enter)(struct cfs_binheap *h,
cfs_binheap_node_t *e);
/**
* Called right after removing a node from the binary heap.
*
* Implementing this operation is optional.
*
* \param[in] h The heap
* \param[in] e The node
*/
void (*hop_exit)(struct cfs_binheap *h,
cfs_binheap_node_t *e);
/**
* A binary predicate which is called during internal heap sorting
* operations, and used in order to determine the relevant ordering of
* two heap nodes.
*
* Implementing this operation is mandatory.
*
* \param[in] a The first heap node
* \param[in] b The second heap node
*
* \retval 0 Node a > node b
* \retval 1 Node a < node b
*
* \see cfs_binheap_bubble()
* \see cfs_biheap_sink()
*/
int (*hop_compare)(cfs_binheap_node_t *a,
cfs_binheap_node_t *b);
} cfs_binheap_ops_t;
/**
* Binary heap object.
*
* Sorts elements of type \e cfs_binheap_node_t
*/
typedef struct cfs_binheap {
/** Triple indirect */
cfs_binheap_node_t ****cbh_elements3;
/** double indirect */
cfs_binheap_node_t ***cbh_elements2;
/** single indirect */
cfs_binheap_node_t **cbh_elements1;
/** # elements referenced */
unsigned int cbh_nelements;
/** high water mark */
unsigned int cbh_hwm;
/** user flags */
unsigned int cbh_flags;
/** operations table */
cfs_binheap_ops_t *cbh_ops;
/** private data */
void *cbh_private;
/** associated CPT table */
struct cfs_cpt_table *cbh_cptab;
/** associated CPT id of this cfs_binheap_t::cbh_cptab */
int cbh_cptid;
} cfs_binheap_t;
void cfs_binheap_destroy(cfs_binheap_t *h);
cfs_binheap_t *cfs_binheap_create(cfs_binheap_ops_t *ops, unsigned int flags,
unsigned count, void *arg,
struct cfs_cpt_table *cptab, int cptid);
cfs_binheap_node_t *cfs_binheap_find(cfs_binheap_t *h, unsigned int idx);
int cfs_binheap_insert(cfs_binheap_t *h, cfs_binheap_node_t *e);
void cfs_binheap_remove(cfs_binheap_t *h, cfs_binheap_node_t *e);
static inline int
cfs_binheap_size(cfs_binheap_t *h)
{
return h->cbh_nelements;
}
static inline int
cfs_binheap_is_empty(cfs_binheap_t *h)
{
return h->cbh_nelements == 0;
}
static inline cfs_binheap_node_t *
cfs_binheap_root(cfs_binheap_t *h)
{
return cfs_binheap_find(h, 0);
}
static inline cfs_binheap_node_t *
cfs_binheap_remove_root(cfs_binheap_t *h)
{
cfs_binheap_node_t *e = cfs_binheap_find(h, 0);
if (e != NULL)
cfs_binheap_remove(h, e);
return e;
}
/** @} heap */
#endif /* __LIBCFS_HEAP_H__ */

View file

@ -0,0 +1,222 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_ioctl.h
*
* Low-level ioctl data structures. Kernel ioctl functions declared here,
* and user space functions are in libcfsutil_ioctl.h.
*
*/
#ifndef __LIBCFS_IOCTL_H__
#define __LIBCFS_IOCTL_H__
#define LIBCFS_IOCTL_VERSION 0x0001000a
struct libcfs_ioctl_data {
__u32 ioc_len;
__u32 ioc_version;
__u64 ioc_nid;
__u64 ioc_u64[1];
__u32 ioc_flags;
__u32 ioc_count;
__u32 ioc_net;
__u32 ioc_u32[7];
__u32 ioc_inllen1;
char *ioc_inlbuf1;
__u32 ioc_inllen2;
char *ioc_inlbuf2;
__u32 ioc_plen1; /* buffers in userspace */
char *ioc_pbuf1;
__u32 ioc_plen2; /* buffers in userspace */
char *ioc_pbuf2;
char ioc_bulk[0];
};
struct libcfs_ioctl_hdr {
__u32 ioc_len;
__u32 ioc_version;
};
struct libcfs_debug_ioctl_data
{
struct libcfs_ioctl_hdr hdr;
unsigned int subs;
unsigned int debug;
};
#define LIBCFS_IOC_INIT(data) \
do { \
memset(&data, 0, sizeof(data)); \
data.ioc_version = LIBCFS_IOCTL_VERSION; \
data.ioc_len = sizeof(data); \
} while (0)
struct libcfs_ioctl_handler {
struct list_head item;
int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data);
};
#define DECLARE_IOCTL_HANDLER(ident, func) \
struct libcfs_ioctl_handler ident = { \
/* .item = */ LIST_HEAD_INIT(ident.item), \
/* .handle_ioctl = */ func \
}
/* FIXME check conflict with lustre_lib.h */
#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long)
/* ioctls for manipulating snapshots 30- */
#define IOC_LIBCFS_TYPE 'e'
#define IOC_LIBCFS_MIN_NR 30
/* libcfs ioctls */
#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE)
/* lnet ioctls */
#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
/* lnd ioctls */
#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
#define IOC_LIBCFS_MAX_NR 80
static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
{
int len = sizeof(*data);
len += cfs_size_round(data->ioc_inllen1);
len += cfs_size_round(data->ioc_inllen2);
return len;
}
static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
{
if (data->ioc_len > (1<<30)) {
CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n");
return 1;
}
if (data->ioc_inllen1 > (1<<30)) {
CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
return 1;
}
if (data->ioc_inllen2 > (1<<30)) {
CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
return 1;
}
if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
return 1;
}
if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
return 1;
}
if (data->ioc_pbuf1 && !data->ioc_plen1) {
CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
return 1;
}
if (data->ioc_pbuf2 && !data->ioc_plen2) {
CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
return 1;
}
if (data->ioc_plen1 && !data->ioc_pbuf1) {
CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
return 1;
}
if (data->ioc_plen2 && !data->ioc_pbuf2) {
CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
return 1;
}
if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) {
CERROR ("LIBCFS ioctl: packlen != ioc_len\n");
return 1;
}
if (data->ioc_inllen1 &&
data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
return 1;
}
if (data->ioc_inllen2 &&
data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
data->ioc_inllen2 - 1] != '\0') {
CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
return 1;
}
return 0;
}
extern int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
extern int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg);
extern int libcfs_ioctl_popdata(void *arg, void *buf, int size);
#endif /* __LIBCFS_IOCTL_H__ */

View file

@ -0,0 +1,117 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* Author: Nathan Rutman <nathan.rutman@sun.com>
*
* libcfs/include/libcfs/libcfs_kernelcomm.h
*
* Kernel <-> userspace communication routines.
* The definitions below are used in the kernel and userspace.
*
*/
#ifndef __LIBCFS_KERNELCOMM_H__
#define __LIBCFS_KERNELCOMM_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
/* KUC message header.
* All current and future KUC messages should use this header.
* To avoid having to include Lustre headers from libcfs, define this here.
*/
struct kuc_hdr {
__u16 kuc_magic;
__u8 kuc_transport; /* Each new Lustre feature should use a different
transport */
__u8 kuc_flags;
__u16 kuc_msgtype; /* Message type or opcode, transport-specific */
__u16 kuc_msglen; /* Including header */
} __attribute__((aligned(sizeof(__u64))));
#define KUC_MAGIC 0x191C /*Lustre9etLinC */
#define KUC_FL_BLOCK 0x01 /* Wait for send */
/* kuc_msgtype values are defined in each transport */
enum kuc_transport_type {
KUC_TRANSPORT_GENERIC = 1,
KUC_TRANSPORT_HSM = 2,
KUC_TRANSPORT_CHANGELOG = 3,
};
enum kuc_generic_message_type {
KUC_MSG_SHUTDOWN = 1,
};
/* prototype for callback function on kuc groups */
typedef int (*libcfs_kkuc_cb_t)(__u32 data, void *cb_arg);
/* KUC Broadcast Groups. This determines which userspace process hears which
* messages. Mutliple transports may be used within a group, or multiple
* groups may use the same transport. Broadcast
* groups need not be used if e.g. a UID is specified instead;
* use group 0 to signify unicast.
*/
#define KUC_GRP_HSM 0x02
#define KUC_GRP_MAX KUC_GRP_HSM
/* Kernel methods */
extern int libcfs_kkuc_msg_put(struct file *fp, void *payload);
extern int libcfs_kkuc_group_put(int group, void *payload);
extern int libcfs_kkuc_group_add(struct file *fp, int uid, int group,
__u32 data);
extern int libcfs_kkuc_group_rem(int uid, int group);
extern int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func,
void *cb_arg);
#define LK_FLG_STOP 0x01
/* kernelcomm control structure, passed from userspace to kernel */
typedef struct lustre_kernelcomm {
__u32 lk_wfd;
__u32 lk_rfd;
__u32 lk_uid;
__u32 lk_group;
__u32 lk_data;
__u32 lk_flags;
} __attribute__((packed)) lustre_kernelcomm;
/* Userspace methods */
extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups);
extern int libcfs_ukuc_stop(lustre_kernelcomm *l);
extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize,
int transport);
#endif /* __LIBCFS_KERNELCOMM_H__ */

View file

@ -0,0 +1,101 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_prim.h
*
* General primitives.
*
*/
#ifndef __LIBCFS_PRIM_H__
#define __LIBCFS_PRIM_H__
#ifndef EXPORT_SYMBOL
# define EXPORT_SYMBOL(s)
#endif
/*
* Schedule
*/
void cfs_pause(cfs_duration_t ticks);
/*
* Timer
*/
typedef void (cfs_timer_func_t)(ulong_ptr_t);
void schedule_timeout_and_set_state(cfs_task_state_t, int64_t);
void init_waitqueue_entry_current(wait_queue_t *link);
int64_t waitq_timedwait(wait_queue_t *, cfs_task_state_t, int64_t);
void waitq_wait(wait_queue_t *, cfs_task_state_t);
void add_wait_queue_exclusive_head(wait_queue_head_t *, wait_queue_t *);
void cfs_init_timer(timer_list_t *t);
void cfs_timer_init(timer_list_t *t, cfs_timer_func_t *func, void *arg);
void cfs_timer_done(timer_list_t *t);
void cfs_timer_arm(timer_list_t *t, cfs_time_t deadline);
void cfs_timer_disarm(timer_list_t *t);
int cfs_timer_is_armed(timer_list_t *t);
cfs_time_t cfs_timer_deadline(timer_list_t *t);
/*
* Memory
*/
#ifndef memory_pressure_get
#define memory_pressure_get() (0)
#endif
#ifndef memory_pressure_set
#define memory_pressure_set() do {} while (0)
#endif
#ifndef memory_pressure_clr
#define memory_pressure_clr() do {} while (0)
#endif
static inline int cfs_memory_pressure_get_and_set(void)
{
int old = memory_pressure_get();
if (!old)
memory_pressure_set();
return old;
}
static inline void cfs_memory_pressure_restore(int old)
{
if (old)
memory_pressure_set();
else
memory_pressure_clr();
return;
}
#endif

View file

@ -0,0 +1,568 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_private.h
*
* Various defines for libcfs.
*
*/
#ifndef __LIBCFS_PRIVATE_H__
#define __LIBCFS_PRIVATE_H__
/* XXX this layering violation is for nidstrings */
#include <linux/lnet/types.h>
#ifndef DEBUG_SUBSYSTEM
# define DEBUG_SUBSYSTEM S_UNDEFINED
#endif
/*
* When this is on, LASSERT macro includes check for assignment used instead
* of equality check, but doesn't have unlikely(). Turn this on from time to
* time to make test-builds. This shouldn't be on for production release.
*/
#define LASSERT_CHECKED (0)
#define LASSERTF(cond, fmt, ...) \
do { \
if (unlikely(!(cond))) { \
LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL); \
libcfs_debug_msg(&__msg_data, \
"ASSERTION( %s ) failed: " fmt, #cond, \
## __VA_ARGS__); \
lbug_with_loc(&__msg_data); \
} \
} while (0)
#define LASSERT(cond) LASSERTF(cond, "\n")
# define LINVRNT(exp) ((void)sizeof!!(exp))
#define KLASSERT(e) LASSERT(e)
void lbug_with_loc(struct libcfs_debug_msg_data *) __attribute__((noreturn));
#define LBUG() \
do { \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \
lbug_with_loc(&msgdata); \
} while(0)
extern atomic_t libcfs_kmemory;
/*
* Memory
*/
# define libcfs_kmem_inc(ptr, size) \
do { \
atomic_add(size, &libcfs_kmemory); \
} while (0)
# define libcfs_kmem_dec(ptr, size) \
do { \
atomic_sub(size, &libcfs_kmemory); \
} while (0)
# define libcfs_kmem_read() \
atomic_read(&libcfs_kmemory)
#ifndef LIBCFS_VMALLOC_SIZE
#define LIBCFS_VMALLOC_SIZE (2 << PAGE_CACHE_SHIFT) /* 2 pages */
#endif
#define LIBCFS_ALLOC_PRE(size, mask) \
do { \
LASSERT(!in_interrupt() || \
((size) <= LIBCFS_VMALLOC_SIZE && \
((mask) & GFP_ATOMIC)) != 0); \
} while (0)
#define LIBCFS_ALLOC_POST(ptr, size) \
do { \
if (unlikely((ptr) == NULL)) { \
CERROR("LNET: out of memory at %s:%d (tried to alloc '" \
#ptr "' = %d)\n", __FILE__, __LINE__, (int)(size)); \
CERROR("LNET: %d total bytes allocated by lnet\n", \
libcfs_kmem_read()); \
} else { \
memset((ptr), 0, (size)); \
libcfs_kmem_inc((ptr), (size)); \
CDEBUG(D_MALLOC, "alloc '" #ptr "': %d at %p (tot %d).\n", \
(int)(size), (ptr), libcfs_kmem_read()); \
} \
} while (0)
/**
* allocate memory with GFP flags @mask
*/
#define LIBCFS_ALLOC_GFP(ptr, size, mask) \
do { \
LIBCFS_ALLOC_PRE((size), (mask)); \
(ptr) = (size) <= LIBCFS_VMALLOC_SIZE ? \
kmalloc((size), (mask)) : vmalloc(size); \
LIBCFS_ALLOC_POST((ptr), (size)); \
} while (0)
/**
* default allocator
*/
#define LIBCFS_ALLOC(ptr, size) \
LIBCFS_ALLOC_GFP(ptr, size, __GFP_IO)
/**
* non-sleeping allocator
*/
#define LIBCFS_ALLOC_ATOMIC(ptr, size) \
LIBCFS_ALLOC_GFP(ptr, size, GFP_ATOMIC)
/**
* allocate memory for specified CPU partition
* \a cptab != NULL, \a cpt is CPU partition id of \a cptab
* \a cptab == NULL, \a cpt is HW NUMA node id
*/
#define LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, mask) \
do { \
LIBCFS_ALLOC_PRE((size), (mask)); \
(ptr) = (size) <= LIBCFS_VMALLOC_SIZE ? \
cfs_cpt_malloc((cptab), (cpt), (size), (mask)) : \
cfs_cpt_vmalloc((cptab), (cpt), (size)); \
LIBCFS_ALLOC_POST((ptr), (size)); \
} while (0)
/** default numa allocator */
#define LIBCFS_CPT_ALLOC(ptr, cptab, cpt, size) \
LIBCFS_CPT_ALLOC_GFP(ptr, cptab, cpt, size, __GFP_IO)
#define LIBCFS_FREE(ptr, size) \
do { \
int s = (size); \
if (unlikely((ptr) == NULL)) { \
CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
"%s:%d\n", s, __FILE__, __LINE__); \
break; \
} \
libcfs_kmem_dec((ptr), s); \
CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
s, (ptr), libcfs_kmem_read()); \
if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \
vfree(ptr); \
else \
kfree(ptr); \
} while (0)
/******************************************************************************/
/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */
#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
#define ___htonl(x) __cpu_to_be32(x)
#define ___htons(x) __cpu_to_be16(x)
#define ___ntohl(x) __be32_to_cpu(x)
#define ___ntohs(x) __be16_to_cpu(x)
#define htonl(x) ___htonl(x)
#define ntohl(x) ___ntohl(x)
#define htons(x) ___htons(x)
#define ntohs(x) ___ntohs(x)
#endif
void libcfs_debug_dumpstack(task_t *tsk);
void libcfs_run_upcall(char **argv);
void libcfs_run_lbug_upcall(struct libcfs_debug_msg_data *);
void libcfs_debug_dumplog(void);
int libcfs_debug_init(unsigned long bufsize);
int libcfs_debug_cleanup(void);
int libcfs_debug_clear_buffer(void);
int libcfs_debug_mark_buffer(const char *text);
void libcfs_debug_set_level(unsigned int debug_level);
/*
* allocate per-cpu-partition data, returned value is an array of pointers,
* variable can be indexed by CPU ID.
* cptable != NULL: size of array is number of CPU partitions
* cptable == NULL: size of array is number of HW cores
*/
void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
/*
* destory per-cpu-partition variable
*/
void cfs_percpt_free(void *vars);
int cfs_percpt_number(void *vars);
void *cfs_percpt_current(void *vars);
void *cfs_percpt_index(void *vars, int idx);
#define cfs_percpt_for_each(var, i, vars) \
for (i = 0; i < cfs_percpt_number(vars) && \
((var) = (vars)[i]) != NULL; i++)
/*
* allocate a variable array, returned value is an array of pointers.
* Caller can specify length of array by count.
*/
void *cfs_array_alloc(int count, unsigned int size);
void cfs_array_free(void *vars);
#define LASSERT_ATOMIC_ENABLED (1)
#if LASSERT_ATOMIC_ENABLED
/** assert value of @a is equal to @v */
#define LASSERT_ATOMIC_EQ(a, v) \
do { \
LASSERTF(atomic_read(a) == v, \
"value: %d\n", atomic_read((a))); \
} while (0)
/** assert value of @a is unequal to @v */
#define LASSERT_ATOMIC_NE(a, v) \
do { \
LASSERTF(atomic_read(a) != v, \
"value: %d\n", atomic_read((a))); \
} while (0)
/** assert value of @a is little than @v */
#define LASSERT_ATOMIC_LT(a, v) \
do { \
LASSERTF(atomic_read(a) < v, \
"value: %d\n", atomic_read((a))); \
} while (0)
/** assert value of @a is little/equal to @v */
#define LASSERT_ATOMIC_LE(a, v) \
do { \
LASSERTF(atomic_read(a) <= v, \
"value: %d\n", atomic_read((a))); \
} while (0)
/** assert value of @a is great than @v */
#define LASSERT_ATOMIC_GT(a, v) \
do { \
LASSERTF(atomic_read(a) > v, \
"value: %d\n", atomic_read((a))); \
} while (0)
/** assert value of @a is great/equal to @v */
#define LASSERT_ATOMIC_GE(a, v) \
do { \
LASSERTF(atomic_read(a) >= v, \
"value: %d\n", atomic_read((a))); \
} while (0)
/** assert value of @a is great than @v1 and little than @v2 */
#define LASSERT_ATOMIC_GT_LT(a, v1, v2) \
do { \
int __v = atomic_read(a); \
LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v); \
} while (0)
/** assert value of @a is great than @v1 and little/equal to @v2 */
#define LASSERT_ATOMIC_GT_LE(a, v1, v2) \
do { \
int __v = atomic_read(a); \
LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v); \
} while (0)
/** assert value of @a is great/equal to @v1 and little than @v2 */
#define LASSERT_ATOMIC_GE_LT(a, v1, v2) \
do { \
int __v = atomic_read(a); \
LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v); \
} while (0)
/** assert value of @a is great/equal to @v1 and little/equal to @v2 */
#define LASSERT_ATOMIC_GE_LE(a, v1, v2) \
do { \
int __v = atomic_read(a); \
LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v); \
} while (0)
#else /* !LASSERT_ATOMIC_ENABLED */
#define LASSERT_ATOMIC_EQ(a, v) do {} while (0)
#define LASSERT_ATOMIC_NE(a, v) do {} while (0)
#define LASSERT_ATOMIC_LT(a, v) do {} while (0)
#define LASSERT_ATOMIC_LE(a, v) do {} while (0)
#define LASSERT_ATOMIC_GT(a, v) do {} while (0)
#define LASSERT_ATOMIC_GE(a, v) do {} while (0)
#define LASSERT_ATOMIC_GT_LT(a, v1, v2) do {} while (0)
#define LASSERT_ATOMIC_GT_LE(a, v1, v2) do {} while (0)
#define LASSERT_ATOMIC_GE_LT(a, v1, v2) do {} while (0)
#define LASSERT_ATOMIC_GE_LE(a, v1, v2) do {} while (0)
#endif /* LASSERT_ATOMIC_ENABLED */
#define LASSERT_ATOMIC_ZERO(a) LASSERT_ATOMIC_EQ(a, 0)
#define LASSERT_ATOMIC_POS(a) LASSERT_ATOMIC_GT(a, 0)
#define CFS_ALLOC_PTR(ptr) LIBCFS_ALLOC(ptr, sizeof (*(ptr)));
#define CFS_FREE_PTR(ptr) LIBCFS_FREE(ptr, sizeof (*(ptr)));
/*
* percpu partition lock
*
* There are some use-cases like this in Lustre:
* . each CPU partition has it's own private data which is frequently changed,
* and mostly by the local CPU partition.
* . all CPU partitions share some global data, these data are rarely changed.
*
* LNet is typical example.
* CPU partition lock is designed for this kind of use-cases:
* . each CPU partition has it's own private lock
* . change on private data just needs to take the private lock
* . read on shared data just needs to take _any_ of private locks
* . change on shared data needs to take _all_ private locks,
* which is slow and should be really rare.
*/
enum {
CFS_PERCPT_LOCK_EX = -1, /* negative */
};
struct cfs_percpt_lock {
/* cpu-partition-table for this lock */
struct cfs_cpt_table *pcl_cptab;
/* exclusively locked */
unsigned int pcl_locked;
/* private lock table */
spinlock_t **pcl_locks;
};
/* return number of private locks */
static inline int
cfs_percpt_lock_num(struct cfs_percpt_lock *pcl)
{
return cfs_cpt_number(pcl->pcl_cptab);
}
/*
* create a cpu-partition lock based on CPU partition table \a cptab,
* each private lock has extra \a psize bytes padding data
*/
struct cfs_percpt_lock *cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab);
/* destroy a cpu-partition lock */
void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
/* lock private lock \a index of \a pcl */
void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
/* unlock private lock \a index of \a pcl */
void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
/* create percpt (atomic) refcount based on @cptab */
atomic_t **cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int val);
/* destroy percpt refcount */
void cfs_percpt_atomic_free(atomic_t **refs);
/* return sum of all percpu refs */
int cfs_percpt_atomic_summary(atomic_t **refs);
/** Compile-time assertion.
* Check an invariant described by a constant expression at compile time by
* forcing a compiler error if it does not hold. \a cond must be a constant
* expression as defined by the ISO C Standard:
*
* 6.8.4.2 The switch statement
* ....
* [#3] The expression of each case label shall be an integer
* constant expression and no two of the case constant
* expressions in the same switch statement shall have the same
* value after conversion...
*
*/
#define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0)
/* support decl needed both by kernel and liblustre */
int libcfs_isknown_lnd(int type);
char *libcfs_lnd2modname(int type);
char *libcfs_lnd2str(int type);
int libcfs_str2lnd(const char *str);
char *libcfs_net2str(__u32 net);
char *libcfs_nid2str(lnet_nid_t nid);
__u32 libcfs_str2net(const char *str);
lnet_nid_t libcfs_str2nid(const char *str);
int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
char *libcfs_id2str(lnet_process_id_t id);
void cfs_free_nidlist(struct list_head *list);
int cfs_parse_nidlist(char *str, int len, struct list_head *list);
int cfs_match_nid(lnet_nid_t nid, struct list_head *list);
/** \addtogroup lnet_addr
* @{ */
/* how an LNET NID encodes net:address */
/** extract the address part of an lnet_nid_t */
#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff))
/** extract the network part of an lnet_nid_t */
#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff)
/** make an lnet_nid_t from a network part and an address part */
#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr)))
/* how net encodes type:number */
#define LNET_NETNUM(net) ((net) & 0xffff)
#define LNET_NETTYP(net) (((net) >> 16) & 0xffff)
#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num)))
/** @} lnet_addr */
/* max value for numeric network address */
#define MAX_NUMERIC_VALUE 0xffffffff
/* implication */
#define ergo(a, b) (!(a) || (b))
/* logical equivalence */
#define equi(a, b) (!!(a) == !!(b))
#ifndef CFS_CURRENT_TIME
# define CFS_CURRENT_TIME time(0)
#endif
/* --------------------------------------------------------------------
* Light-weight trace
* Support for temporary event tracing with minimal Heisenberg effect.
* All stuff about lwt are put in arch/kp30.h
* -------------------------------------------------------------------- */
struct libcfs_device_userstate
{
int ldu_memhog_pages;
struct page *ldu_memhog_root_page;
};
/* what used to be in portals_lib.h */
#ifndef MIN
# define MIN(a,b) (((a)<(b)) ? (a): (b))
#endif
#ifndef MAX
# define MAX(a,b) (((a)>(b)) ? (a): (b))
#endif
#define MKSTR(ptr) ((ptr))? (ptr) : ""
static inline int cfs_size_round4 (int val)
{
return (val + 3) & (~0x3);
}
#ifndef HAVE_CFS_SIZE_ROUND
static inline int cfs_size_round (int val)
{
return (val + 7) & (~0x7);
}
#define HAVE_CFS_SIZE_ROUND
#endif
static inline int cfs_size_round16(int val)
{
return (val + 0xf) & (~0xf);
}
static inline int cfs_size_round32(int val)
{
return (val + 0x1f) & (~0x1f);
}
static inline int cfs_size_round0(int val)
{
if (!val)
return 0;
return (val + 1 + 7) & (~0x7);
}
static inline size_t cfs_round_strlen(char *fset)
{
return (size_t)cfs_size_round((int)strlen(fset) + 1);
}
/* roundup \a val to power2 */
static inline unsigned int cfs_power2_roundup(unsigned int val)
{
if (val != LOWEST_BIT_SET(val)) { /* not a power of 2 already */
do {
val &= ~LOWEST_BIT_SET(val);
} while (val != LOWEST_BIT_SET(val));
/* ...and round up */
val <<= 1;
}
return val;
}
#define LOGL(var,len,ptr) \
do { \
if (var) \
memcpy((char *)ptr, (const char *)var, len); \
ptr += cfs_size_round(len); \
} while (0)
#define LOGU(var,len,ptr) \
do { \
if (var) \
memcpy((char *)var, (const char *)ptr, len); \
ptr += cfs_size_round(len); \
} while (0)
#define LOGL0(var,len,ptr) \
do { \
if (!len) \
break; \
memcpy((char *)ptr, (const char *)var, len); \
*((char *)(ptr) + len) = 0; \
ptr += cfs_size_round(len + 1); \
} while (0)
/**
* Lustre Network Driver types.
*/
enum {
/* Only add to these values (i.e. don't ever change or redefine them):
* network addresses depend on them... */
QSWLND = 1,
SOCKLND = 2,
GMLND = 3, /* obsolete, keep it so that libcfs_nid2str works */
PTLLND = 4,
O2IBLND = 5,
CIBLND = 6,
OPENIBLND = 7,
IIBLND = 8,
LOLND = 9,
RALND = 10,
VIBLND = 11,
MXLND = 12,
GNILND = 13,
};
#endif

View file

@ -0,0 +1,137 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_string.h
*
* Generic string manipulation functions.
*
* Author: Nathan Rutman <nathan.rutman@sun.com>
*/
#ifndef __LIBCFS_STRING_H__
#define __LIBCFS_STRING_H__
/* libcfs_string.c */
/* string comparison ignoring case */
int cfs_strncasecmp(const char *s1, const char *s2, size_t n);
/* Convert a text string to a bitmask */
int cfs_str2mask(const char *str, const char *(*bit2str)(int bit),
int *oldmask, int minmask, int allmask);
/* Allocate space for and copy an existing string.
* Must free with kfree().
*/
char *cfs_strdup(const char *str, u_int32_t flags);
/* safe vsnprintf */
int cfs_vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
/* safe snprintf */
int cfs_snprintf(char *buf, size_t size, const char *fmt, ...);
/* trim leading and trailing space characters */
char *cfs_firststr(char *str, size_t size);
/**
* Structure to represent NULL-less strings.
*/
struct cfs_lstr {
char *ls_str;
int ls_len;
};
/*
* Structure to represent \<range_expr\> token of the syntax.
*/
struct cfs_range_expr {
/*
* Link to cfs_expr_list::el_exprs.
*/
struct list_head re_link;
__u32 re_lo;
__u32 re_hi;
__u32 re_stride;
};
struct cfs_expr_list {
struct list_head el_link;
struct list_head el_exprs;
};
static inline int
cfs_iswhite(char c)
{
switch (c) {
case ' ':
case '\t':
case '\n':
case '\r':
return 1;
default:
break;
}
return 0;
}
char *cfs_trimwhite(char *str);
int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
int cfs_str2num_check(char *str, int nob, unsigned *num,
unsigned min, unsigned max);
int cfs_range_expr_parse(struct cfs_lstr *src, unsigned min, unsigned max,
int single_tok, struct cfs_range_expr **expr);
int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list);
int cfs_expr_list_values(struct cfs_expr_list *expr_list,
int max, __u32 **values);
static inline void
cfs_expr_list_values_free(__u32 *values, int num)
{
/* This array is allocated by LIBCFS_ALLOC(), so it shouldn't be freed
* by OBD_FREE() if it's called by module other than libcfs & LNet,
* otherwise we will see fake memory leak */
LIBCFS_FREE(values, num * sizeof(values[0]));
}
void cfs_expr_list_free(struct cfs_expr_list *expr_list);
void cfs_expr_list_print(struct cfs_expr_list *expr_list);
int cfs_expr_list_parse(char *str, int len, unsigned min, unsigned max,
struct cfs_expr_list **elpp);
void cfs_expr_list_free_list(struct list_head *list);
int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
int cfs_ip_addr_match(__u32 addr, struct list_head *list);
void cfs_ip_addr_free(struct list_head *list);
#define strtoul(str, endp, base) simple_strtoul(str, endp, base)
#endif

View file

@ -0,0 +1,132 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_time.h
*
* Time functions.
*
*/
#ifndef __LIBCFS_TIME_H__
#define __LIBCFS_TIME_H__
/*
* generic time manipulation functions.
*/
static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
{
return (cfs_time_t)(t + d);
}
static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
{
return (cfs_time_t)(t1 - t2);
}
static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2)
{
return cfs_time_before(t2, t1);
}
static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2)
{
return cfs_time_beforeq(t2, t1);
}
static inline cfs_time_t cfs_time_shift(int seconds)
{
return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
}
static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small,
struct timeval *result)
{
long r = (long) (
(large->tv_sec - small->tv_sec) * ONE_MILLION +
(large->tv_usec - small->tv_usec));
if (result != NULL) {
result->tv_usec = r % ONE_MILLION;
result->tv_sec = r / ONE_MILLION;
}
return r;
}
static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg)
{
if (cfs_time_after(cfs_time_current(),
cfs_time_add(now, cfs_time_seconds(15))))
CERROR("slow %s "CFS_TIME_T" sec\n", msg,
cfs_duration_sec(cfs_time_sub(cfs_time_current(),now)));
}
#define CFS_RATELIMIT(seconds) \
({ \
/* \
* XXX nikita: non-portable initializer \
*/ \
static time_t __next_message = 0; \
int result; \
\
if (cfs_time_after(cfs_time_current(), __next_message)) \
result = 1; \
else { \
__next_message = cfs_time_shift(seconds); \
result = 0; \
} \
result; \
})
/*
* helper function similar to do_gettimeofday() of Linux kernel
*/
static inline void cfs_fs_timeval(struct timeval *tv)
{
cfs_fs_time_t time;
cfs_fs_time_current(&time);
cfs_fs_time_usec(&time, tv);
}
/*
* return valid time-out based on user supplied one. Currently we only check
* that time-out is not shorted than allowed.
*/
static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout)
{
if (timeout < CFS_TICK)
timeout = CFS_TICK;
return timeout;
}
#endif

View file

@ -0,0 +1,110 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/libcfs_workitem.h
*
* Author: Isaac Huang <he.h.huang@oracle.com>
* Liang Zhen <zhen.liang@sun.com>
*
* A workitems is deferred work with these semantics:
* - a workitem always runs in thread context.
* - a workitem can be concurrent with other workitems but is strictly
* serialized with respect to itself.
* - no CPU affinity, a workitem does not necessarily run on the same CPU
* that schedules it. However, this might change in the future.
* - if a workitem is scheduled again before it has a chance to run, it
* runs only once.
* - if a workitem is scheduled while it runs, it runs again after it
* completes; this ensures that events occurring while other events are
* being processed receive due attention. This behavior also allows a
* workitem to reschedule itself.
*
* Usage notes:
* - a workitem can sleep but it should be aware of how that sleep might
* affect others.
* - a workitem runs inside a kernel thread so there's no user space to access.
* - do not use a workitem if the scheduling latency can't be tolerated.
*
* When wi_action returns non-zero, it means the workitem has either been
* freed or reused and workitem scheduler won't touch it any more.
*/
#ifndef __LIBCFS_WORKITEM_H__
#define __LIBCFS_WORKITEM_H__
struct cfs_wi_sched;
void cfs_wi_sched_destroy(struct cfs_wi_sched *);
int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt,
int nthrs, struct cfs_wi_sched **);
struct cfs_workitem;
typedef int (*cfs_wi_action_t) (struct cfs_workitem *);
typedef struct cfs_workitem {
/** chain on runq or rerunq */
struct list_head wi_list;
/** working function */
cfs_wi_action_t wi_action;
/** arg for working function */
void *wi_data;
/** in running */
unsigned short wi_running:1;
/** scheduled */
unsigned short wi_scheduled:1;
} cfs_workitem_t;
static inline void
cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action)
{
INIT_LIST_HEAD(&wi->wi_list);
wi->wi_running = 0;
wi->wi_scheduled = 0;
wi->wi_data = data;
wi->wi_action = action;
}
void cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
int cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
void cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
int cfs_wi_startup(void);
void cfs_wi_shutdown(void);
/** # workitem scheduler loops before reschedule */
#define CFS_WI_RESCHED 128
#endif /* __LIBCFS_WORKITEM_H__ */

View file

@ -0,0 +1,286 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LIBCFS_LINUX_KP30_H__
#define __LIBCFS_LINUX_KP30_H__
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/kmod.h>
#include <linux/notifier.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/version.h>
#include <asm/atomic.h>
#include <asm/uaccess.h>
#include <linux/rwsem.h>
#include <linux/proc_fs.h>
#include <linux/file.h>
#include <linux/smp.h>
#include <linux/ctype.h>
#include <linux/compiler.h>
#ifdef HAVE_MM_INLINE
# include <linux/mm_inline.h>
#endif
#include <linux/kallsyms.h>
#include <linux/moduleparam.h>
#include <linux/scatterlist.h>
#include <linux/libcfs/linux/portals_compat25.h>
#define prepare_work(wq,cb,cbdata) \
do { \
INIT_WORK((wq), (void *)(cb)); \
} while (0)
#define cfs_get_work_data(type,field,data) container_of(data,type,field)
#define our_recalc_sigpending(current) recalc_sigpending()
#define strtok(a,b) strpbrk(a, b)
#define work_struct_t struct work_struct
#ifdef CONFIG_SMP
#else
#endif
#define SEM_COUNT(sem) ((sem)->count)
/* ------------------------------------------------------------------- */
#define PORTAL_SYMBOL_REGISTER(x)
#define PORTAL_SYMBOL_UNREGISTER(x)
/******************************************************************************/
/* Module parameter support */
#define CFS_MODULE_PARM(name, t, type, perm, desc) \
module_param(name, type, perm);\
MODULE_PARM_DESC(name, desc)
#define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */
/******************************************************************************/
#if (__GNUC__)
/* Use the special GNU C __attribute__ hack to have the compiler check the
* printf style argument string against the actual argument count and
* types.
*/
#ifdef printf
# warning printf has been defined as a macro...
# undef printf
#endif
#endif /* __GNUC__ */
# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
# define time(a) CURRENT_TIME
# define cfs_num_present_cpus() num_present_cpus()
/******************************************************************************/
/* Light-weight trace
* Support for temporary event tracing with minimal Heisenberg effect. */
#define LWT_SUPPORT 0
#define LWT_MEMORY (16<<20)
#ifndef KLWT_SUPPORT
# if !defined(BITS_PER_LONG)
# error "BITS_PER_LONG not defined"
# endif
/* kernel hasn't defined this? */
typedef struct {
long long lwte_when;
char *lwte_where;
void *lwte_task;
long lwte_p1;
long lwte_p2;
long lwte_p3;
long lwte_p4;
# if BITS_PER_LONG > 32
long lwte_pad;
# endif
} lwt_event_t;
#endif /* !KLWT_SUPPORT */
#if LWT_SUPPORT
# if !KLWT_SUPPORT
typedef struct _lwt_page {
struct list_head lwtp_list;
struct page *lwtp_page;
lwt_event_t *lwtp_events;
} lwt_page_t;
typedef struct {
int lwtc_current_index;
lwt_page_t *lwtc_current_page;
} lwt_cpu_t;
extern int lwt_enabled;
extern lwt_cpu_t lwt_cpus[];
/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
* This stuff is meant for finding specific problems; it never stays in
* production code... */
#define LWTSTR(n) #n
#define LWTWHERE(f,l) f ":" LWTSTR(l)
#define LWT_EVENTS_PER_PAGE (PAGE_CACHE_SIZE / sizeof (lwt_event_t))
#define LWT_EVENT(p1, p2, p3, p4) \
do { \
unsigned long flags; \
lwt_cpu_t *cpu; \
lwt_page_t *p; \
lwt_event_t *e; \
\
if (lwt_enabled) { \
local_irq_save (flags); \
\
cpu = &lwt_cpus[smp_processor_id()]; \
p = cpu->lwtc_current_page; \
e = &p->lwtp_events[cpu->lwtc_current_index++]; \
\
if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \
cpu->lwtc_current_page = \
list_entry (p->lwtp_list.next, \
lwt_page_t, lwtp_list); \
cpu->lwtc_current_index = 0; \
} \
\
e->lwte_when = get_cycles(); \
e->lwte_where = LWTWHERE(__FILE__,__LINE__); \
e->lwte_task = current; \
e->lwte_p1 = (long)(p1); \
e->lwte_p2 = (long)(p2); \
e->lwte_p3 = (long)(p3); \
e->lwte_p4 = (long)(p4); \
\
local_irq_restore (flags); \
} \
} while (0)
#endif /* !KLWT_SUPPORT */
extern int lwt_init (void);
extern void lwt_fini (void);
extern int lwt_lookup_string (int *size, char *knlptr,
char *usrptr, int usrsize);
extern int lwt_control (int enable, int clear);
extern int lwt_snapshot (cfs_cycles_t *now, int *ncpu, int *total_size,
void *user_ptr, int user_size);
#endif /* LWT_SUPPORT */
/* ------------------------------------------------------------------ */
#define IOCTL_LIBCFS_TYPE long
#ifdef __CYGWIN__
# ifndef BITS_PER_LONG
# define BITS_PER_LONG 64
# endif
#endif
# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
/* this is a bit chunky */
#define _LWORDSIZE BITS_PER_LONG
# define LPU64 "%llu"
# define LPD64 "%lld"
# define LPX64 "%#llx"
# define LPX64i "%llx"
# define LPO64 "%#llo"
# define LPF64 "L"
/*
* long_ptr_t & ulong_ptr_t, same to "long" for gcc
*/
# define LPLU "%lu"
# define LPLD "%ld"
# define LPLX "%#lx"
/*
* pid_t
*/
# define LPPID "%d"
#undef _LWORDSIZE
/* compat macroses */
#ifndef get_cpu
# ifdef CONFIG_PREEMPT
# define get_cpu() ({ preempt_disable(); smp_processor_id(); })
# define put_cpu() preempt_enable()
# else
# define get_cpu() smp_processor_id()
# define put_cpu()
# endif
#else
#endif /* get_cpu & put_cpu */
#define INIT_CTL_NAME(a)
#define INIT_STRATEGY(a)
#endif

View file

@ -0,0 +1,131 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LIBCFS_LINUX_LIBCFS_H__
#define __LIBCFS_LINUX_LIBCFS_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <stdarg.h>
#include <linux/libcfs/linux/linux-cpu.h>
#include <linux/libcfs/linux/linux-time.h>
#include <linux/libcfs/linux/linux-mem.h>
#include <linux/libcfs/linux/linux-prim.h>
#include <linux/libcfs/linux/linux-lock.h>
#include <linux/libcfs/linux/linux-fs.h>
#include <linux/libcfs/linux/linux-tcpip.h>
#include <linux/libcfs/linux/linux-bitops.h>
#include <linux/libcfs/linux/linux-types.h>
#include <linux/libcfs/linux/kp30.h>
#include <asm/types.h>
#include <linux/types.h>
#include <asm/timex.h>
#include <linux/sched.h> /* THREAD_SIZE */
#include <linux/rbtree.h>
#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
#if !defined(__x86_64__)
# ifdef __ia64__
# define CDEBUG_STACK() (THREAD_SIZE - \
((unsigned long)__builtin_dwarf_cfa() & \
(THREAD_SIZE - 1)))
# else
# define CDEBUG_STACK() (THREAD_SIZE - \
((unsigned long)__builtin_frame_address(0) & \
(THREAD_SIZE - 1)))
# endif /* __ia64__ */
#define __CHECK_STACK(msgdata, mask, cdls) \
do { \
if (unlikely(CDEBUG_STACK() > libcfs_stack)) { \
LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL); \
libcfs_stack = CDEBUG_STACK(); \
libcfs_debug_msg(msgdata, \
"maximum lustre stack %lu\n", \
CDEBUG_STACK()); \
(msgdata)->msg_mask = mask; \
(msgdata)->msg_cdls = cdls; \
dump_stack(); \
/*panic("LBUG");*/ \
} \
} while (0)
#define CFS_CHECK_STACK(msgdata, mask, cdls) __CHECK_STACK(msgdata, mask, cdls)
#else /* __x86_64__ */
#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while(0)
#define CDEBUG_STACK() (0L)
#endif /* __x86_64__ */
/* initial pid */
#define LUSTRE_LNET_PID 12345
#define ENTRY_NESTING_SUPPORT (1)
#define ENTRY_NESTING do {;} while (0)
#define EXIT_NESTING do {;} while (0)
#define __current_nesting_level() (0)
/**
* Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
*
* Implementation is in linux-curproc.c
*/
#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm)
#include <linux/capability.h>
/*
* No stack-back-tracing in Linux for now.
*/
struct cfs_stack_trace {
};
/* long integer with size equal to pointer */
typedef unsigned long ulong_ptr_t;
typedef long long_ptr_t;
#ifndef WITH_WATCHDOG
#define WITH_WATCHDOG
#endif
#endif /* _LINUX_LIBCFS_H */

View file

@ -0,0 +1,38 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-bitops.h
*/
#include <linux/bitops.h>

View file

@ -0,0 +1,175 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA
*
* GPL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-mem.h
*
* Basic library routines.
*
* Author: liang@whamcloud.com
*/
#ifndef __LIBCFS_LINUX_CPU_H__
#define __LIBCFS_LINUX_CPU_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/topology.h>
#include <linux/version.h>
#ifdef CONFIG_SMP
#define HAVE_LIBCFS_CPT
/** virtual processing unit */
struct cfs_cpu_partition {
/* CPUs mask for this partition */
cpumask_t *cpt_cpumask;
/* nodes mask for this partition */
nodemask_t *cpt_nodemask;
/* spread rotor for NUMA allocator */
unsigned cpt_spread_rotor;
};
/** descriptor for CPU partitions */
struct cfs_cpt_table {
/* version, reserved for hotplug */
unsigned ctb_version;
/* spread rotor for NUMA allocator */
unsigned ctb_spread_rotor;
/* # of CPU partitions */
unsigned ctb_nparts;
/* partitions tables */
struct cfs_cpu_partition *ctb_parts;
/* shadow HW CPU to CPU partition ID */
int *ctb_cpu2cpt;
/* all cpus in this partition table */
cpumask_t *ctb_cpumask;
/* all nodes in this partition table */
nodemask_t *ctb_nodemask;
};
void cfs_cpu_core_siblings(int cpu, cpumask_t *mask);
void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask);
void cfs_node_to_cpumask(int node, cpumask_t *mask);
int cfs_cpu_core_nsiblings(int cpu);
int cfs_cpu_ht_nsiblings(int cpu);
/**
* comment out definitions for compatible layer
* #define CFS_CPU_NR NR_CPUS
*
* typedef cpumask_t cfs_cpumask_t;
*
* #define cfs_cpu_current() smp_processor_id()
* #define cfs_cpu_online(i) cpu_online(i)
* #define cfs_cpu_online_num() num_online_cpus()
* #define cfs_cpu_online_for_each(i) for_each_online_cpu(i)
* #define cfs_cpu_possible_num() num_possible_cpus()
* #define cfs_cpu_possible_for_each(i) for_each_possible_cpu(i)
*
* #ifdef CONFIG_CPUMASK_SIZE
* #define cfs_cpu_mask_size() cpumask_size()
* #else
* #define cfs_cpu_mask_size() sizeof(cfs_cpumask_t)
* #endif
*
* #define cfs_cpu_mask_set(i, mask) cpu_set(i, mask)
* #define cfs_cpu_mask_unset(i, mask) cpu_clear(i, mask)
* #define cfs_cpu_mask_isset(i, mask) cpu_isset(i, mask)
* #define cfs_cpu_mask_clear(mask) cpus_clear(mask)
* #define cfs_cpu_mask_empty(mask) cpus_empty(mask)
* #define cfs_cpu_mask_weight(mask) cpus_weight(mask)
* #define cfs_cpu_mask_first(mask) first_cpu(mask)
* #define cfs_cpu_mask_any_online(mask) (any_online_cpu(mask) != NR_CPUS)
* #define cfs_cpu_mask_for_each(i, mask) for_each_cpu_mask(i, mask)
* #define cfs_cpu_mask_bind(t, mask) set_cpus_allowed(t, mask)
*
* #ifdef HAVE_CPUMASK_COPY
* #define cfs_cpu_mask_copy(dst, src) cpumask_copy(dst, src)
* #else
* #define cfs_cpu_mask_copy(dst, src) memcpy(dst, src, sizeof(*src))
* #endif
*
* static inline void
* cfs_cpu_mask_of_online(cfs_cpumask_t *mask)
* {
* cfs_cpu_mask_copy(mask, &cpu_online_map);
* }
*
* #ifdef CONFIG_NUMA
*
* #define CFS_NODE_NR MAX_NUMNODES
*
* typedef nodemask_t cfs_node_mask_t;
*
* #define cfs_node_of_cpu(cpu) cpu_to_node(cpu)
* #define cfs_node_online(i) node_online(i)
* #define cfs_node_online_num() num_online_nodes()
* #define cfs_node_online_for_each(i) for_each_online_node(i)
* #define cfs_node_possible_num() num_possible_nodes()
* #define cfs_node_possible_for_each(i) for_each_node(i)
*
* static inline void cfs_node_to_cpumask(int node, cfs_cpumask_t *mask)
* {
* #if defined(HAVE_NODE_TO_CPUMASK)
* *mask = node_to_cpumask(node);
* #elif defined(HAVE_CPUMASK_OF_NODE)
* cfs_cpu_mask_copy(mask, cpumask_of_node(node));
* #else
* # error "Needs node_to_cpumask or cpumask_of_node"
* #endif
* }
*
* #define cfs_node_mask_set(i, mask) node_set(i, mask)
* #define cfs_node_mask_unset(i, mask) node_clear(i, mask)
* #define cfs_node_mask_isset(i, mask) node_isset(i, mask)
* #define cfs_node_mask_clear(mask) nodes_reset(mask)
* #define cfs_node_mask_empty(mask) nodes_empty(mask)
* #define cfs_node_mask_weight(mask) nodes_weight(mask)
* #define cfs_node_mask_for_each(i, mask) for_each_node_mask(i, mask)
* #define cfs_node_mask_copy(dst, src) memcpy(dst, src, sizeof(*src))
*
* static inline void
* cfs_node_mask_of_online(cfs_node_mask_t *mask)
* {
* cfs_node_mask_copy(mask, &node_online_map);
* }
*
* #endif
*/
#endif /* CONFIG_SMP */
#endif /* __LIBCFS_LINUX_CPU_H__ */

View file

@ -0,0 +1,49 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*/
/**
* Linux crypto hash specific functions.
*/
/**
* Functions for start/stop shash CRC32 algorithm.
*/
int cfs_crypto_crc32_register(void);
void cfs_crypto_crc32_unregister(void);
/**
* Functions for start/stop shash adler32 algorithm.
*/
int cfs_crypto_adler32_register(void);
void cfs_crypto_adler32_unregister(void);
/**
* Functions for start/stop shash crc32 pclmulqdq
*/
int cfs_crypto_crc32_pclmul_register(void);
void cfs_crypto_crc32_pclmul_unregister(void);

View file

@ -0,0 +1,95 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-fs.h
*
* Basic library routines.
*/
#ifndef __LIBCFS_LINUX_CFS_FS_H__
#define __LIBCFS_LINUX_CFS_FS_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/mount.h>
#include <linux/backing-dev.h>
#include <linux/posix_acl_xattr.h>
#define filp_size(f) \
(i_size_read((f)->f_dentry->d_inode))
#define filp_poff(f) \
(&(f)->f_pos)
# define do_fsync(fp, flag) \
((fp)->f_op->fsync(fp, 0, LLONG_MAX, flag))
#define filp_read(fp, buf, size, pos) \
((fp)->f_op->read((fp), (buf), (size), pos))
#define filp_write(fp, buf, size, pos) \
((fp)->f_op->write((fp), (buf), (size), pos))
#define filp_fsync(fp) \
do_fsync(fp, 1)
#define flock_type(fl) ((fl)->fl_type)
#define flock_set_type(fl, type) do { (fl)->fl_type = (type); } while (0)
#define flock_pid(fl) ((fl)->fl_pid)
#define flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while (0)
#define flock_start(fl) ((fl)->fl_start)
#define flock_set_start(fl, st) do { (fl)->fl_start = (st); } while (0)
#define flock_end(fl) ((fl)->fl_end)
#define flock_set_end(fl, end) do { (fl)->fl_end = (end); } while (0)
ssize_t filp_user_write(struct file *filp, const void *buf, size_t count,
loff_t *offset);
#ifndef IFSHIFT
#define IFSHIFT 12
#endif
#ifndef IFTODT
#define IFTODT(type) (((type) & S_IFMT) >> IFSHIFT)
#endif
#ifndef DTTOIF
#define DTTOIF(dirtype) ((dirtype) << IFSHIFT)
#endif
#endif

View file

@ -0,0 +1,204 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-lock.h
*
* Basic library routines.
*/
#ifndef __LIBCFS_LINUX_CFS_LOCK_H__
#define __LIBCFS_LINUX_CFS_LOCK_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <linux/mutex.h>
/*
* IMPORTANT !!!!!!!!
*
* All locks' declaration are not guaranteed to be initialized,
* Althought some of they are initialized in Linux. All locks
* declared by CFS_DECL_* should be initialized explicitly.
*/
/*
* spin_lock "implementation" (use Linux kernel's primitives)
*
* - spin_lock_init(x)
* - spin_lock(x)
* - spin_lock_bh(x)
* - spin_lock_bh_init(x)
* - spin_unlock(x)
* - spin_unlock_bh(x)
* - spin_trylock(x)
* - spin_is_locked(x)
*
* - spin_lock_irq(x)
* - spin_lock_irqsave(x, f)
* - spin_unlock_irqrestore(x, f)
* - read_lock_irqsave(lock, f)
* - write_lock_irqsave(lock, f)
* - write_unlock_irqrestore(lock, f)
*/
/*
* spinlock "implementation"
*/
/*
* rw_semaphore "implementation" (use Linux kernel's primitives)
*
* - sema_init(x)
* - init_rwsem(x)
* - down_read(x)
* - up_read(x)
* - down_write(x)
* - up_write(x)
*/
#define fini_rwsem(s) do {} while (0)
/*
* rwlock_t "implementation" (use Linux kernel's primitives)
*
* - rwlock_init(x)
* - read_lock(x)
* - read_unlock(x)
* - write_lock(x)
* - write_unlock(x)
* - write_lock_bh(x)
* - write_unlock_bh(x)
*
* - RW_LOCK_UNLOCKED
*/
#ifndef DEFINE_RWLOCK
#define DEFINE_RWLOCK(lock) rwlock_t lock = __RW_LOCK_UNLOCKED(lock)
#endif
/*
* completion "implementation" (use Linux kernel's primitives)
*
* - DECLARE_COMPLETION(work)
* - INIT_COMPLETION(c)
* - COMPLETION_INITIALIZER(work)
* - init_completion(c)
* - complete(c)
* - wait_for_completion(c)
* - wait_for_completion_interruptible(c)
* - fini_completion(c)
*/
#define fini_completion(c) do { } while (0)
/*
* semaphore "implementation" (use Linux kernel's primitives)
* - DEFINE_SEMAPHORE(name)
* - sema_init(sem, val)
* - up(sem)
* - down(sem)
* - down_interruptible(sem)
* - down_trylock(sem)
*/
/*
* mutex "implementation" (use Linux kernel's primitives)
*
* - DEFINE_MUTEX(name)
* - mutex_init(x)
* - mutex_lock(x)
* - mutex_unlock(x)
* - mutex_trylock(x)
* - mutex_is_locked(x)
* - mutex_destroy(x)
*/
#ifndef lockdep_set_class
/**************************************************************************
*
* Lockdep "implementation". Also see liblustre.h
*
**************************************************************************/
struct lock_class_key {
;
};
#define lockdep_set_class(lock, key) \
do { (void)sizeof(lock); (void)sizeof(key); } while (0)
/* This has to be a macro, so that `subclass' can be undefined in kernels
* that do not support lockdep. */
static inline void lockdep_off(void)
{
}
static inline void lockdep_on(void)
{
}
#else
#endif /* lockdep_set_class */
#ifndef CONFIG_DEBUG_LOCK_ALLOC
#ifndef mutex_lock_nested
#define mutex_lock_nested(mutex, subclass) mutex_lock(mutex)
#endif
#ifndef spin_lock_nested
#define spin_lock_nested(lock, subclass) spin_lock(lock)
#endif
#ifndef down_read_nested
#define down_read_nested(lock, subclass) down_read(lock)
#endif
#ifndef down_write_nested
#define down_write_nested(lock, subclass) down_write(lock)
#endif
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
#endif /* __LIBCFS_LINUX_CFS_LOCK_H__ */

View file

@ -0,0 +1,139 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-mem.h
*
* Basic library routines.
*/
#ifndef __LIBCFS_LINUX_CFS_MEM_H__
#define __LIBCFS_LINUX_CFS_MEM_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#define CFS_PAGE_MASK (~((__u64)PAGE_CACHE_SIZE-1))
#define page_index(p) ((p)->index)
#define memory_pressure_get() (current->flags & PF_MEMALLOC)
#define memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
#define memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0)
#if BITS_PER_LONG == 32
/* limit to lowmem on 32-bit systems */
#define NUM_CACHEPAGES \
min(num_physpages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
#else
#define NUM_CACHEPAGES num_physpages
#endif
/*
* In Linux there is no way to determine whether current execution context is
* blockable.
*/
#define ALLOC_ATOMIC_TRY GFP_ATOMIC
#define DECL_MMSPACE mm_segment_t __oldfs
#define MMSPACE_OPEN \
do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
#define MMSPACE_CLOSE set_fs(__oldfs)
/*
* NUMA allocators
*
* NB: we will rename these functions in a separate patch:
* - rename kmalloc to cfs_malloc
* - rename kmalloc/free_page to cfs_page_alloc/free
* - rename kmalloc/free_large to cfs_vmalloc/vfree
*/
extern void *cfs_cpt_malloc(struct cfs_cpt_table *cptab, int cpt,
size_t nr_bytes, unsigned int flags);
extern void *cfs_cpt_vmalloc(struct cfs_cpt_table *cptab, int cpt,
size_t nr_bytes);
extern struct page *cfs_page_cpt_alloc(struct cfs_cpt_table *cptab,
int cpt, unsigned int flags);
extern void *cfs_mem_cache_cpt_alloc(struct kmem_cache *cachep,
struct cfs_cpt_table *cptab,
int cpt, unsigned int flags);
/*
* Shrinker
*/
# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \
struct shrinker *shrinker, \
struct shrink_control *sc
# define shrink_param(sc, var) ((sc)->var)
typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
static inline
struct shrinker *set_shrinker(int seek, shrinker_t func)
{
struct shrinker *s;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL)
return (NULL);
s->shrink = func;
s->seeks = seek;
register_shrinker(s);
return s;
}
static inline
void remove_shrinker(struct shrinker *shrinker)
{
if (shrinker == NULL)
return;
unregister_shrinker(shrinker);
kfree(shrinker);
}
#endif /* __LINUX_CFS_MEM_H__ */

View file

@ -0,0 +1,243 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-prim.h
*
* Basic library routines.
*/
#ifndef __LIBCFS_LINUX_CFS_PRIM_H__
#define __LIBCFS_LINUX_CFS_PRIM_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/proc_fs.h>
#include <linux/mm.h>
#include <linux/timer.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/miscdevice.h>
#include <linux/libcfs/linux/portals_compat25.h>
#include <asm/div64.h>
#include <linux/libcfs/linux/linux-time.h>
/*
* CPU
*/
#ifdef for_each_possible_cpu
#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
#elif defined(for_each_cpu)
#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
#endif
#ifdef NR_CPUS
#else
#define NR_CPUS 1
#endif
#define cfs_set_cpus_allowed(t, mask) set_cpus_allowed(t, mask)
/*
* cache
*/
/*
* IRQs
*/
/*
* Pseudo device register
*/
typedef struct miscdevice psdev_t;
/*
* Sysctl register
*/
typedef struct ctl_table ctl_table_t;
typedef struct ctl_table_header ctl_table_header_t;
#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
#define DECLARE_PROC_HANDLER(name) \
static int \
LL_PROC_PROTO(name) \
{ \
DECLARE_LL_PROC_PPOS_DECL; \
\
return proc_call_handler(table->data, write, \
ppos, buffer, lenp, \
__##name); \
}
/*
* Symbol register
*/
#define cfs_symbol_register(s, p) do {} while(0)
#define cfs_symbol_unregister(s) do {} while(0)
#define cfs_symbol_get(s) symbol_get(s)
#define cfs_symbol_put(s) symbol_put(s)
typedef struct module module_t;
/*
* Proc file system APIs
*/
typedef struct proc_dir_entry proc_dir_entry_t;
/*
* Wait Queue
*/
typedef long cfs_task_state_t;
#define CFS_DECL_WAITQ(wq) DECLARE_WAIT_QUEUE_HEAD(wq)
/*
* Task struct
*/
typedef struct task_struct task_t;
#define DECL_JOURNAL_DATA void *journal_info
#define PUSH_JOURNAL do { \
journal_info = current->journal_info; \
current->journal_info = NULL; \
} while(0)
#define POP_JOURNAL do { \
current->journal_info = journal_info; \
} while(0)
/* Module interfaces */
#define cfs_module(name, version, init, fini) \
module_init(init); \
module_exit(fini)
/*
* Signal
*/
/*
* Timer
*/
typedef struct timer_list timer_list_t;
#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */
#define __wait_event_timeout(wq, condition, timeout, ret) \
do { \
int __ret = 0; \
if (!(condition)) { \
wait_queue_t __wait; \
unsigned long expire; \
\
init_waitqueue_entry(&__wait, current); \
expire = timeout + jiffies; \
add_wait_queue(&wq, &__wait); \
for (;;) { \
set_current_state(TASK_UNINTERRUPTIBLE); \
if (condition) \
break; \
if (jiffies > expire) { \
ret = jiffies - expire; \
break; \
} \
schedule_timeout(timeout); \
} \
current->state = TASK_RUNNING; \
remove_wait_queue(&wq, &__wait); \
} \
} while (0)
/*
retval == 0; condition met; we're good.
retval > 0; timed out.
*/
#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \
do { \
ret = 0; \
if (!(condition)) \
__wait_event_timeout(wq, condition, timeout, ret); \
} while (0)
#else
#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \
ret = wait_event_timeout(wq, condition, timeout)
#endif
#define cfs_waitq_wait_event_interruptible_timeout(wq, c, timeout, ret) \
ret = wait_event_interruptible_timeout(wq, c, timeout)
/*
* atomic
*/
#define cfs_atomic_add_unless(atom, a, u) atomic_add_unless(atom, a, u)
#define cfs_atomic_cmpxchg(atom, old, nv) atomic_cmpxchg(atom, old, nv)
/*
* membar
*/
/*
* interrupt
*/
/*
* might_sleep
*/
/*
* group_info
*/
typedef struct group_info group_info_t;
/*
* Random bytes
*/
#endif

View file

@ -0,0 +1,87 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-tcpip.h
*
* Basic library routines.
*/
#ifndef __LIBCFS_LINUX_CFS_TCP_H__
#define __LIBCFS_LINUX_CFS_TCP_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
#include <net/sock.h>
#ifndef HIPQUAD
// XXX Should just kill all users
#if defined(__LITTLE_ENDIAN)
#define HIPQUAD(addr) \
((unsigned char *)&addr)[3], \
((unsigned char *)&addr)[2], \
((unsigned char *)&addr)[1], \
((unsigned char *)&addr)[0]
#elif defined(__BIG_ENDIAN)
#define HIPQUAD NIPQUAD
#else
#error "Please fix asm/byteorder.h"
#endif /* __LITTLE_ENDIAN */
#endif
typedef struct socket socket_t;
#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf)
#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags)
static inline int
cfs_sock_error(struct socket *sock)
{
return sock->sk->sk_err;
}
static inline int
cfs_sock_wmem_queued(struct socket *sock)
{
return sock->sk->sk_wmem_queued;
}
#define cfs_sk_sleep(sk) sk_sleep(sk)
#define DEFAULT_NET (&init_net)
#endif

View file

@ -0,0 +1,275 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/linux/linux-time.h
*
* Implementation of portable time API for Linux (kernel and user-level).
*
* Author: Nikita Danilov <nikita@clusterfs.com>
*/
#ifndef __LIBCFS_LINUX_LINUX_TIME_H__
#define __LIBCFS_LINUX_LINUX_TIME_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
#endif
/* Portable time API */
/*
* Platform provides three opaque data-types:
*
* cfs_time_t represents point in time. This is internal kernel
* time rather than "wall clock". This time bears no
* relation to gettimeofday().
*
* cfs_duration_t represents time interval with resolution of internal
* platform clock
*
* cfs_fs_time_t represents instance in world-visible time. This is
* used in file-system time-stamps
*
* cfs_time_t cfs_time_current(void);
* cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
* cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
* int cfs_impl_time_before (cfs_time_t, cfs_time_t);
* int cfs_impl_time_before_eq(cfs_time_t, cfs_time_t);
*
* cfs_duration_t cfs_duration_build(int64_t);
*
* time_t cfs_duration_sec (cfs_duration_t);
* void cfs_duration_usec(cfs_duration_t, struct timeval *);
* void cfs_duration_nsec(cfs_duration_t, struct timespec *);
*
* void cfs_fs_time_current(cfs_fs_time_t *);
* time_t cfs_fs_time_sec (cfs_fs_time_t *);
* void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
* void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
* int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
* int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
*
* CFS_TIME_FORMAT
* CFS_DURATION_FORMAT
*
*/
#define ONE_BILLION ((u_int64_t)1000000000)
#define ONE_MILLION 1000000
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/time.h>
#include <asm/div64.h>
#include <linux/libcfs/linux/portals_compat25.h>
/*
* post 2.5 kernels.
*/
#include <linux/jiffies.h>
typedef struct timespec cfs_fs_time_t;
static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
{
v->tv_sec = t->tv_sec;
v->tv_usec = t->tv_nsec / 1000;
}
static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
{
*s = *t;
}
/*
* internal helper function used by cfs_fs_time_before*()
*/
static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
{
return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec;
}
/*
* Generic kernel stuff
*/
typedef unsigned long cfs_time_t; /* jiffies */
typedef long cfs_duration_t;
typedef cycles_t cfs_cycles_t;
static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
{
return time_before(t1, t2);
}
static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
{
return time_before_eq(t1, t2);
}
static inline cfs_time_t cfs_time_current(void)
{
return jiffies;
}
static inline time_t cfs_time_current_sec(void)
{
return get_seconds();
}
static inline void cfs_fs_time_current(cfs_fs_time_t *t)
{
*t = CURRENT_TIME;
}
static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
{
return t->tv_sec;
}
static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
{
return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2);
}
static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
{
return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2);
}
#if 0
static inline cfs_duration_t cfs_duration_build(int64_t nano)
{
#if (BITS_PER_LONG == 32)
/* We cannot use do_div(t, ONE_BILLION), do_div can only process
* 64 bits n and 32 bits base */
int64_t t = nano * HZ;
do_div(t, 1000);
do_div(t, 1000000);
return (cfs_duration_t)t;
#else
return (nano * HZ / ONE_BILLION);
#endif
}
#endif
static inline cfs_duration_t cfs_time_seconds(int seconds)
{
return ((cfs_duration_t)seconds) * HZ;
}
static inline time_t cfs_duration_sec(cfs_duration_t d)
{
return d / HZ;
}
static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
{
#if (BITS_PER_LONG == 32) && (HZ > 4096)
__u64 t;
s->tv_sec = d / HZ;
t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION;
do_div(t, HZ);
s->tv_usec = t;
#else
s->tv_sec = d / HZ;
s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * \
ONE_MILLION) / HZ;
#endif
}
static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
{
#if (BITS_PER_LONG == 32)
__u64 t;
s->tv_sec = d / HZ;
t = (d - s->tv_sec * HZ) * ONE_BILLION;
do_div(t, HZ);
s->tv_nsec = t;
#else
s->tv_sec = d / HZ;
s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ;
#endif
}
#define cfs_time_current_64 get_jiffies_64
static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
{
return t + d;
}
static inline __u64 cfs_time_shift_64(int seconds)
{
return cfs_time_add_64(cfs_time_current_64(),
cfs_time_seconds(seconds));
}
static inline int cfs_time_before_64(__u64 t1, __u64 t2)
{
return (__s64)t2 - (__s64)t1 > 0;
}
static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
{
return (__s64)t2 - (__s64)t1 >= 0;
}
/*
* One jiffy
*/
#define CFS_TICK (1)
#define CFS_TIME_T "%lu"
#define CFS_DURATION_T "%ld"
#endif /* __LIBCFS_LINUX_LINUX_TIME_H__ */
/*
* Local variables:
* c-indentation-style: "K&R"
* c-basic-offset: 8
* tab-width: 8
* fill-column: 80
* scroll-step: 1
* End:
*/

View file

@ -0,0 +1,36 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/include/libcfs/user-bitops.h
*/
#include <linux/types.h>

View file

@ -0,0 +1,116 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__
#define __LIBCFS_LINUX_PORTALS_COMPAT_H__
// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG
# define SIGNAL_MASK_ASSERT() \
LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
#else
# define SIGNAL_MASK_ASSERT()
#endif
// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
#define SIGNAL_MASK_LOCK(task, flags) \
spin_lock_irqsave(&task->sighand->siglock, flags)
#define SIGNAL_MASK_UNLOCK(task, flags) \
spin_unlock_irqrestore(&task->sighand->siglock, flags)
#define USERMODEHELPER(path, argv, envp) \
call_usermodehelper(path, argv, envp, 1)
#define clear_tsk_thread_flag(current, TIF_SIGPENDING) clear_tsk_thread_flag(current, \
TIF_SIGPENDING)
# define smp_num_cpus num_online_cpus()
#define cfs_wait_event_interruptible(wq, condition, ret) \
ret = wait_event_interruptible(wq, condition)
#define cfs_wait_event_interruptible_exclusive(wq, condition, ret) \
ret = wait_event_interruptible_exclusive(wq, condition)
#define THREAD_NAME(comm, len, fmt, a...) \
snprintf(comm, len, fmt, ## a)
/* 2.6 alloc_page users can use page->lru */
#define PAGE_LIST_ENTRY lru
#define PAGE_LIST(page) ((page)->lru)
#ifndef __user
#define __user
#endif
#ifndef __fls
#define __cfs_fls fls
#else
#define __cfs_fls __fls
#endif
#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \
proc_dointvec(table, write, buffer, lenp, ppos);
#define ll_proc_dolongvec(table, write, filp, buffer, lenp, ppos) \
proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \
proc_dostring(table, write, buffer, lenp, ppos);
#define LL_PROC_PROTO(name) \
name(ctl_table_t *table, int write, \
void __user *buffer, size_t *lenp, loff_t *ppos)
#define DECLARE_LL_PROC_PPOS_DECL
/* helper for sysctl handlers */
int proc_call_handler(void *data, int write,
loff_t *ppos, void *buffer, size_t *lenp,
int (*handler)(void *data, int write,
loff_t pos, void *buffer, int len));
/*
* CPU
*/
#ifdef for_each_possible_cpu
#define cfs_for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
#elif defined(for_each_cpu)
#define cfs_for_each_possible_cpu(cpu) for_each_cpu(cpu)
#endif
#ifdef NR_CPUS
#else
#define NR_CPUS 1
#endif
#define cfs_set_cpus_allowed(t, mask) set_cpus_allowed(t, mask)
#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
#endif /* _PORTALS_COMPAT_H */

View file

@ -0,0 +1,162 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef _LUCACHE_H
#define _LUCACHE_H
#include <linux/libcfs/libcfs.h>
/** \defgroup ucache ucache
*
* @{
*/
#define UC_CACHE_NEW 0x01
#define UC_CACHE_ACQUIRING 0x02
#define UC_CACHE_INVALID 0x04
#define UC_CACHE_EXPIRED 0x08
#define UC_CACHE_IS_NEW(i) ((i)->ue_flags & UC_CACHE_NEW)
#define UC_CACHE_IS_INVALID(i) ((i)->ue_flags & UC_CACHE_INVALID)
#define UC_CACHE_IS_ACQUIRING(i) ((i)->ue_flags & UC_CACHE_ACQUIRING)
#define UC_CACHE_IS_EXPIRED(i) ((i)->ue_flags & UC_CACHE_EXPIRED)
#define UC_CACHE_IS_VALID(i) ((i)->ue_flags == 0)
#define UC_CACHE_SET_NEW(i) (i)->ue_flags |= UC_CACHE_NEW
#define UC_CACHE_SET_INVALID(i) (i)->ue_flags |= UC_CACHE_INVALID
#define UC_CACHE_SET_ACQUIRING(i) (i)->ue_flags |= UC_CACHE_ACQUIRING
#define UC_CACHE_SET_EXPIRED(i) (i)->ue_flags |= UC_CACHE_EXPIRED
#define UC_CACHE_SET_VALID(i) (i)->ue_flags = 0
#define UC_CACHE_CLEAR_NEW(i) (i)->ue_flags &= ~UC_CACHE_NEW
#define UC_CACHE_CLEAR_ACQUIRING(i) (i)->ue_flags &= ~UC_CACHE_ACQUIRING
#define UC_CACHE_CLEAR_INVALID(i) (i)->ue_flags &= ~UC_CACHE_INVALID
#define UC_CACHE_CLEAR_EXPIRED(i) (i)->ue_flags &= ~UC_CACHE_EXPIRED
struct upcall_cache_entry;
struct md_perm {
lnet_nid_t mp_nid;
__u32 mp_perm;
};
struct md_identity {
struct upcall_cache_entry *mi_uc_entry;
uid_t mi_uid;
gid_t mi_gid;
group_info_t *mi_ginfo;
int mi_nperms;
struct md_perm *mi_perms;
};
struct upcall_cache_entry {
struct list_head ue_hash;
__u64 ue_key;
atomic_t ue_refcount;
int ue_flags;
wait_queue_head_t ue_waitq;
cfs_time_t ue_acquire_expire;
cfs_time_t ue_expire;
union {
struct md_identity identity;
} u;
};
#define UC_CACHE_HASH_SIZE (128)
#define UC_CACHE_HASH_INDEX(id) ((id) & (UC_CACHE_HASH_SIZE - 1))
#define UC_CACHE_UPCALL_MAXPATH (1024UL)
struct upcall_cache;
struct upcall_cache_ops {
void (*init_entry)(struct upcall_cache_entry *, void *args);
void (*free_entry)(struct upcall_cache *,
struct upcall_cache_entry *);
int (*upcall_compare)(struct upcall_cache *,
struct upcall_cache_entry *,
__u64 key, void *args);
int (*downcall_compare)(struct upcall_cache *,
struct upcall_cache_entry *,
__u64 key, void *args);
int (*do_upcall)(struct upcall_cache *,
struct upcall_cache_entry *);
int (*parse_downcall)(struct upcall_cache *,
struct upcall_cache_entry *, void *);
};
struct upcall_cache {
struct list_head uc_hashtable[UC_CACHE_HASH_SIZE];
spinlock_t uc_lock;
rwlock_t uc_upcall_rwlock;
char uc_name[40]; /* for upcall */
char uc_upcall[UC_CACHE_UPCALL_MAXPATH];
int uc_acquire_expire; /* seconds */
int uc_entry_expire; /* seconds */
struct upcall_cache_ops *uc_ops;
};
struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
__u64 key, void *args);
void upcall_cache_put_entry(struct upcall_cache *cache,
struct upcall_cache_entry *entry);
int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
void *args);
void upcall_cache_flush_idle(struct upcall_cache *cache);
void upcall_cache_flush_all(struct upcall_cache *cache);
void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args);
struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
struct upcall_cache_ops *ops);
void upcall_cache_cleanup(struct upcall_cache *cache);
#if 0
struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
__u64 key, __u32 primary,
__u32 ngroups, __u32 *groups);
void upcall_cache_put_entry(struct upcall_cache *hash,
struct upcall_cache_entry *entry);
int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
__u32 primary, __u32 ngroups, __u32 *groups);
void upcall_cache_flush_idle(struct upcall_cache *cache);
void upcall_cache_flush_all(struct upcall_cache *cache);
struct upcall_cache *upcall_cache_init(const char *name);
void upcall_cache_cleanup(struct upcall_cache *hash);
#endif
/** @} ucache */
#endif /* _LUCACHE_H */

View file

@ -0,0 +1,230 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* API and structure definitions for params_tree.
*
* Author: LiuYing <emoly.liu@oracle.com>
*/
#ifndef __PARAMS_TREE_H__
#define __PARAMS_TREE_H__
#include <linux/libcfs/libcfs.h>
#undef LPROCFS
#if defined(CONFIG_PROC_FS)
# define LPROCFS
#endif
#ifdef LPROCFS
typedef struct file cfs_param_file_t;
typedef struct inode cfs_inode_t;
typedef struct proc_inode cfs_proc_inode_t;
typedef struct seq_file cfs_seq_file_t;
typedef struct seq_operations cfs_seq_ops_t;
typedef struct file_operations cfs_param_file_ops_t;
typedef module_t *cfs_param_module_t;
typedef struct proc_dir_entry cfs_param_dentry_t;
typedef struct poll_table_struct cfs_poll_table_t;
#define CFS_PARAM_MODULE THIS_MODULE
#define CFS_PDE(value) PDE(value)
#define cfs_file_private(file) (file->private_data)
#define cfs_dentry_data(dentry) (dentry->data)
#define cfs_proc_inode_pde(proc_inode) (proc_inode->pde)
#define cfs_proc_inode(proc_inode) (proc_inode->vfs_inode)
#define cfs_seq_read_common seq_read
#define cfs_seq_lseek_common seq_lseek
#define cfs_seq_private(seq) (seq->private)
#define cfs_seq_printf(seq, format, ...) seq_printf(seq, format, \
## __VA_ARGS__)
#define cfs_seq_release(inode, file) seq_release(inode, file)
#define cfs_seq_puts(seq, s) seq_puts(seq, s)
#define cfs_seq_putc(seq, s) seq_putc(seq, s)
#define cfs_seq_read(file, buf, count, ppos, rc) (rc = seq_read(file, buf, \
count, ppos))
#define cfs_seq_open(file, ops, rc) (rc = seq_open(file, ops))
/* in lprocfs_stat.c, to protect the private data for proc entries */
extern struct rw_semaphore _lprocfs_lock;
/* to begin from 2.6.23, Linux defines self file_operations (proc_reg_file_ops)
* in procfs, the proc file_operation defined by Lustre (lprocfs_generic_fops)
* will be wrapped into the new defined proc_reg_file_ops, which instroduces
* user count in proc_dir_entrey(pde_users) to protect the proc entry from
* being deleted. then the protection lock (_lprocfs_lock) defined by Lustre
* isn't necessary anymore for lprocfs_generic_fops(e.g. lprocfs_fops_read).
* see bug19706 for detailed information.
*/
#define LPROCFS_ENTRY() do{ }while(0)
#define LPROCFS_EXIT() do{ }while(0)
static inline
int LPROCFS_ENTRY_AND_CHECK(struct proc_dir_entry *dp)
{
int deleted = 0;
spin_lock(&(dp)->pde_unload_lock);
if (dp->proc_fops == NULL)
deleted = 1;
spin_unlock(&(dp)->pde_unload_lock);
if (deleted)
return -ENODEV;
return 0;
}
#define LPROCFS_SRCH_ENTRY() \
do { \
down_read(&_lprocfs_lock); \
} while(0)
#define LPROCFS_SRCH_EXIT() \
do { \
up_read(&_lprocfs_lock); \
} while(0)
#define LPROCFS_WRITE_ENTRY() \
do { \
down_write(&_lprocfs_lock); \
} while(0)
#define LPROCFS_WRITE_EXIT() \
do { \
up_write(&_lprocfs_lock); \
} while(0)
#else /* !LPROCFS */
typedef struct cfs_params_file {
void *param_private;
loff_t param_pos;
unsigned int param_flags;
} cfs_param_file_t;
typedef struct cfs_param_inode {
void *param_private;
} cfs_inode_t;
typedef struct cfs_param_dentry {
void *param_data;
} cfs_param_dentry_t;
typedef struct cfs_proc_inode {
cfs_param_dentry_t *param_pde;
cfs_inode_t param_inode;
} cfs_proc_inode_t;
struct cfs_seq_operations;
typedef struct cfs_seq_file {
char *buf;
size_t size;
size_t from;
size_t count;
loff_t index;
loff_t version;
struct mutex lock;
struct cfs_seq_operations *op;
void *private;
} cfs_seq_file_t;
typedef struct cfs_seq_operations {
void *(*start) (cfs_seq_file_t *m, loff_t *pos);
void (*stop) (cfs_seq_file_t *m, void *v);
void *(*next) (cfs_seq_file_t *m, void *v, loff_t *pos);
int (*show) (cfs_seq_file_t *m, void *v);
} cfs_seq_ops_t;
typedef void *cfs_param_module_t;
typedef void *cfs_poll_table_t;
typedef struct cfs_param_file_ops {
cfs_param_module_t owner;
int (*open) (cfs_inode_t *, struct file *);
loff_t (*llseek)(struct file *, loff_t, int);
int (*release) (cfs_inode_t *, cfs_param_file_t *);
unsigned int (*poll) (struct file *, cfs_poll_table_t *);
ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
ssize_t (*read)(struct file *, char *, size_t, loff_t *);
} cfs_param_file_ops_t;
typedef cfs_param_file_ops_t *cfs_lproc_filep_t;
static inline cfs_proc_inode_t *FAKE_PROC_I(const cfs_inode_t *inode)
{
return container_of(inode, cfs_proc_inode_t, param_inode);
}
static inline cfs_param_dentry_t *FAKE_PDE(cfs_inode_t *inode)
{
return FAKE_PROC_I(inode)->param_pde;
}
#define CFS_PARAM_MODULE NULL
#define CFS_PDE(value) FAKE_PDE(value)
#define cfs_file_private(file) (file->param_private)
#define cfs_dentry_data(dentry) (dentry->param_data)
#define cfs_proc_inode(proc_inode) (proc_inode->param_inode)
#define cfs_proc_inode_pde(proc_inode) (proc_inode->param_pde)
#define cfs_seq_read_common NULL
#define cfs_seq_lseek_common NULL
#define cfs_seq_private(seq) (seq->private)
#define cfs_seq_read(file, buf, count, ppos, rc) do {} while(0)
#define cfs_seq_open(file, ops, rc) \
do { \
cfs_seq_file_t *p = cfs_file_private(file); \
if (!p) { \
LIBCFS_ALLOC(p, sizeof(*p)); \
if (!p) { \
rc = -ENOMEM; \
break; \
} \
cfs_file_private(file) = p; \
} \
memset(p, 0, sizeof(*p)); \
p->op = ops; \
rc = 0; \
} while(0)
#define LPROCFS_ENTRY() do {} while(0)
#define LPROCFS_EXIT() do {} while(0)
static inline
int LPROCFS_ENTRY_AND_CHECK(cfs_param_dentry_t *dp)
{
LPROCFS_ENTRY();
return 0;
}
#define LPROCFS_WRITE_ENTRY() do {} while(0)
#define LPROCFS_WRITE_EXIT() do {} while(0)
#endif /* LPROCFS */
/* XXX: params_tree APIs */
#endif /* __PARAMS_TREE_H__ */

View file

@ -0,0 +1,44 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_API_SUPPORT_H__
#define __LNET_API_SUPPORT_H__
#include <linux/lnet/linux/api-support.h>
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/types.h>
#include <linux/lnet/lnet.h>
#endif

View file

@ -0,0 +1,220 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_API_H__
#define __LNET_API_H__
/** \defgroup lnet LNet
*
* The Lustre Networking subsystem.
*
* LNet is an asynchronous message-passing API, which provides an unreliable
* connectionless service that can't guarantee any order. It supports OFA IB,
* TCP/IP, and Cray Portals, and routes between heterogeneous networks.
*
* LNet can run both in OS kernel space and in userspace as a library.
* @{
*/
#include <linux/lnet/types.h>
/** \defgroup lnet_init_fini Initialization and cleanup
* The LNet must be properly initialized before any LNet calls can be made.
* @{ */
int LNetInit(void);
void LNetFini(void);
int LNetNIInit(lnet_pid_t requested_pid);
int LNetNIFini(void);
/** @} lnet_init_fini */
/** \defgroup lnet_addr LNet addressing and basic types
*
* Addressing scheme and basic data types of LNet.
*
* The LNet API is memory-oriented, so LNet must be able to address not only
* end-points but also memory region within a process address space.
* An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process
* in a node. A portal represents an opening in the address space of a
* process. Match bits is criteria to identify a region of memory inside a
* portal, and offset specifies an offset within the memory region.
*
* LNet creates a table of portals for each process during initialization.
* This table has MAX_PORTALS entries and its size can't be dynamically
* changed. A portal stays empty until the owning process starts to add
* memory regions to it. A portal is sometimes called an index because
* it's an entry in the portals table of a process.
*
* \see LNetMEAttach
* @{ */
int LNetGetId(unsigned int index, lnet_process_id_t *id);
int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order);
void LNetSnprintHandle(char *str, int str_len, lnet_handle_any_t handle);
/** @} lnet_addr */
/** \defgroup lnet_me Match entries
*
* A match entry (abbreviated as ME) describes a set of criteria to accept
* incoming requests.
*
* A portal is essentially a match list plus a set of attributes. A match
* list is a chain of MEs. Each ME includes a pointer to a memory descriptor
* and a set of match criteria. The match criteria can be used to reject
* incoming requests based on process ID or the match bits provided in the
* request. MEs can be dynamically inserted into a match list by LNetMEAttach()
* and LNetMEInsert(), and removed from its list by LNetMEUnlink().
* @{ */
int LNetMEAttach(unsigned int portal,
lnet_process_id_t match_id_in,
__u64 match_bits_in,
__u64 ignore_bits_in,
lnet_unlink_t unlink_in,
lnet_ins_pos_t pos_in,
lnet_handle_me_t *handle_out);
int LNetMEInsert(lnet_handle_me_t current_in,
lnet_process_id_t match_id_in,
__u64 match_bits_in,
__u64 ignore_bits_in,
lnet_unlink_t unlink_in,
lnet_ins_pos_t position_in,
lnet_handle_me_t *handle_out);
int LNetMEUnlink(lnet_handle_me_t current_in);
/** @} lnet_me */
/** \defgroup lnet_md Memory descriptors
*
* A memory descriptor contains information about a region of a user's
* memory (either in kernel or user space) and optionally points to an
* event queue where information about the operations performed on the
* memory descriptor are recorded. Memory descriptor is abbreviated as
* MD and can be used interchangeably with the memory region it describes.
*
* The LNet API provides two operations to create MDs: LNetMDAttach()
* and LNetMDBind(); one operation to unlink and release the resources
* associated with a MD: LNetMDUnlink().
* @{ */
int LNetMDAttach(lnet_handle_me_t current_in,
lnet_md_t md_in,
lnet_unlink_t unlink_in,
lnet_handle_md_t *handle_out);
int LNetMDBind(lnet_md_t md_in,
lnet_unlink_t unlink_in,
lnet_handle_md_t *handle_out);
int LNetMDUnlink(lnet_handle_md_t md_in);
/** @} lnet_md */
/** \defgroup lnet_eq Events and event queues
*
* Event queues (abbreviated as EQ) are used to log operations performed on
* local MDs. In particular, they signal the completion of a data transmission
* into or out of a MD. They can also be used to hold acknowledgments for
* completed PUT operations and indicate when a MD has been unlinked. Multiple
* MDs can share a single EQ. An EQ may have an optional event handler
* associated with it. If an event handler exists, it will be run for each
* event that is deposited into the EQ.
*
* In addition to the lnet_handle_eq_t, the LNet API defines two types
* associated with events: The ::lnet_event_kind_t defines the kinds of events
* that can be stored in an EQ. The lnet_event_t defines a structure that
* holds the information about with an event.
*
* There are five functions for dealing with EQs: LNetEQAlloc() is used to
* create an EQ and allocate the resources needed, while LNetEQFree()
* releases these resources and free the EQ. LNetEQGet() retrieves the next
* event from an EQ, and LNetEQWait() can be used to block a process until
* an EQ has at least one event. LNetEQPoll() can be used to test or wait
* on multiple EQs.
* @{ */
int LNetEQAlloc(unsigned int count_in,
lnet_eq_handler_t handler,
lnet_handle_eq_t *handle_out);
int LNetEQFree(lnet_handle_eq_t eventq_in);
int LNetEQGet(lnet_handle_eq_t eventq_in,
lnet_event_t *event_out);
int LNetEQWait(lnet_handle_eq_t eventq_in,
lnet_event_t *event_out);
int LNetEQPoll(lnet_handle_eq_t *eventqs_in,
int neq_in,
int timeout_ms,
lnet_event_t *event_out,
int *which_eq_out);
/** @} lnet_eq */
/** \defgroup lnet_data Data movement operations
*
* The LNet API provides two data movement operations: LNetPut()
* and LNetGet().
* @{ */
int LNetPut(lnet_nid_t self,
lnet_handle_md_t md_in,
lnet_ack_req_t ack_req_in,
lnet_process_id_t target_in,
unsigned int portal_in,
__u64 match_bits_in,
unsigned int offset_in,
__u64 hdr_data_in);
int LNetGet(lnet_nid_t self,
lnet_handle_md_t md_in,
lnet_process_id_t target_in,
unsigned int portal_in,
__u64 match_bits_in,
unsigned int offset_in);
/** @} lnet_data */
/** \defgroup lnet_misc Miscellaneous operations.
* Miscellaneous operations.
* @{ */
int LNetSetLazyPortal(int portal);
int LNetClearLazyPortal(int portal);
int LNetCtl(unsigned int cmd, void *arg);
int LNetSetAsync(lnet_process_id_t id, int nasync);
/** @} lnet_misc */
/** @} lnet */
#endif

View file

@ -0,0 +1,874 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/include/lnet/lib-lnet.h
*
* Top level include for library side routines
*/
#ifndef __LNET_LIB_LNET_H__
#define __LNET_LIB_LNET_H__
#include <linux/lnet/linux/lib-lnet.h>
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/types.h>
#include <linux/lnet/lnet.h>
#include <linux/lnet/lib-types.h>
extern lnet_t the_lnet; /* THE network */
#if defined(LNET_USE_LIB_FREELIST)
/* 1 CPT, simplify implementation... */
# define LNET_CPT_MAX_BITS 0
#else /* KERNEL and no freelist */
# if (BITS_PER_LONG == 32)
/* 2 CPTs, allowing more CPTs might make us under memory pressure */
# define LNET_CPT_MAX_BITS 1
# else /* 64-bit system */
/*
* 256 CPTs for thousands of CPUs, allowing more CPTs might make us
* under risk of consuming all lh_cookie.
*/
# define LNET_CPT_MAX_BITS 8
# endif /* BITS_PER_LONG == 32 */
#endif
/* max allowed CPT number */
#define LNET_CPT_MAX (1 << LNET_CPT_MAX_BITS)
#define LNET_CPT_NUMBER (the_lnet.ln_cpt_number)
#define LNET_CPT_BITS (the_lnet.ln_cpt_bits)
#define LNET_CPT_MASK ((1ULL << LNET_CPT_BITS) - 1)
/** exclusive lock */
#define LNET_LOCK_EX CFS_PERCPT_LOCK_EX
static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh)
{
return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE);
}
static inline int lnet_md_exhausted (lnet_libmd_t *md)
{
return (md->md_threshold == 0 ||
((md->md_options & LNET_MD_MAX_SIZE) != 0 &&
md->md_offset + md->md_max_size > md->md_length));
}
static inline int lnet_md_unlinkable (lnet_libmd_t *md)
{
/* Should unlink md when its refcount is 0 and either:
* - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink,
* in the latter case md may not be exhausted).
* - auto unlink is on and md is exhausted.
*/
if (md->md_refcount != 0)
return 0;
if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0)
return 1;
return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 &&
lnet_md_exhausted(md));
}
#define lnet_cpt_table() (the_lnet.ln_cpt_table)
#define lnet_cpt_current() cfs_cpt_current(the_lnet.ln_cpt_table, 1)
static inline int
lnet_cpt_of_cookie(__u64 cookie)
{
unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK;
/* LNET_CPT_NUMBER doesn't have to be power2, which means we can
* get illegal cpt from it's invalid cookie */
return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER;
}
static inline void
lnet_res_lock(int cpt)
{
cfs_percpt_lock(the_lnet.ln_res_lock, cpt);
}
static inline void
lnet_res_unlock(int cpt)
{
cfs_percpt_unlock(the_lnet.ln_res_lock, cpt);
}
static inline int
lnet_res_lock_current(void)
{
int cpt = lnet_cpt_current();
lnet_res_lock(cpt);
return cpt;
}
static inline void
lnet_net_lock(int cpt)
{
cfs_percpt_lock(the_lnet.ln_net_lock, cpt);
}
static inline void
lnet_net_unlock(int cpt)
{
cfs_percpt_unlock(the_lnet.ln_net_lock, cpt);
}
static inline int
lnet_net_lock_current(void)
{
int cpt = lnet_cpt_current();
lnet_net_lock(cpt);
return cpt;
}
#define LNET_LOCK() lnet_net_lock(LNET_LOCK_EX)
#define LNET_UNLOCK() lnet_net_unlock(LNET_LOCK_EX)
#define lnet_ptl_lock(ptl) spin_lock(&(ptl)->ptl_lock)
#define lnet_ptl_unlock(ptl) spin_unlock(&(ptl)->ptl_lock)
#define lnet_eq_wait_lock() spin_lock(&the_lnet.ln_eq_wait_lock)
#define lnet_eq_wait_unlock() spin_unlock(&the_lnet.ln_eq_wait_lock)
#define lnet_ni_lock(ni) spin_lock(&(ni)->ni_lock)
#define lnet_ni_unlock(ni) spin_unlock(&(ni)->ni_lock)
#define LNET_MUTEX_LOCK(m) mutex_lock(m)
#define LNET_MUTEX_UNLOCK(m) mutex_unlock(m)
#define MAX_PORTALS 64
/* these are only used by code with LNET_USE_LIB_FREELIST, but we still
* exported them to !LNET_USE_LIB_FREELIST for easy implemetation */
#define LNET_FL_MAX_MES 2048
#define LNET_FL_MAX_MDS 2048
#define LNET_FL_MAX_EQS 512
#define LNET_FL_MAX_MSGS 2048 /* Outstanding messages */
#ifdef LNET_USE_LIB_FREELIST
int lnet_freelist_init(lnet_freelist_t *fl, int n, int size);
void lnet_freelist_fini(lnet_freelist_t *fl);
static inline void *
lnet_freelist_alloc (lnet_freelist_t *fl)
{
/* ALWAYS called with liblock held */
lnet_freeobj_t *o;
if (list_empty (&fl->fl_list))
return (NULL);
o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list);
list_del (&o->fo_list);
return ((void *)&o->fo_contents);
}
static inline void
lnet_freelist_free (lnet_freelist_t *fl, void *obj)
{
/* ALWAYS called with liblock held */
lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents);
list_add (&o->fo_list, &fl->fl_list);
}
static inline lnet_eq_t *
lnet_eq_alloc (void)
{
/* NEVER called with resource lock held */
struct lnet_res_container *rec = &the_lnet.ln_eq_container;
lnet_eq_t *eq;
LASSERT(LNET_CPT_NUMBER == 1);
lnet_res_lock(0);
eq = (lnet_eq_t *)lnet_freelist_alloc(&rec->rec_freelist);
lnet_res_unlock(0);
return eq;
}
static inline void
lnet_eq_free_locked(lnet_eq_t *eq)
{
/* ALWAYS called with resource lock held */
struct lnet_res_container *rec = &the_lnet.ln_eq_container;
LASSERT(LNET_CPT_NUMBER == 1);
lnet_freelist_free(&rec->rec_freelist, eq);
}
static inline void
lnet_eq_free(lnet_eq_t *eq)
{
lnet_res_lock(0);
lnet_eq_free_locked(eq);
lnet_res_unlock(0);
}
static inline lnet_libmd_t *
lnet_md_alloc (lnet_md_t *umd)
{
/* NEVER called with resource lock held */
struct lnet_res_container *rec = the_lnet.ln_md_containers[0];
lnet_libmd_t *md;
LASSERT(LNET_CPT_NUMBER == 1);
lnet_res_lock(0);
md = (lnet_libmd_t *)lnet_freelist_alloc(&rec->rec_freelist);
lnet_res_unlock(0);
if (md != NULL)
INIT_LIST_HEAD(&md->md_list);
return md;
}
static inline void
lnet_md_free_locked(lnet_libmd_t *md)
{
/* ALWAYS called with resource lock held */
struct lnet_res_container *rec = the_lnet.ln_md_containers[0];
LASSERT(LNET_CPT_NUMBER == 1);
lnet_freelist_free(&rec->rec_freelist, md);
}
static inline void
lnet_md_free(lnet_libmd_t *md)
{
lnet_res_lock(0);
lnet_md_free_locked(md);
lnet_res_unlock(0);
}
static inline lnet_me_t *
lnet_me_alloc(void)
{
/* NEVER called with resource lock held */
struct lnet_res_container *rec = the_lnet.ln_me_containers[0];
lnet_me_t *me;
LASSERT(LNET_CPT_NUMBER == 1);
lnet_res_lock(0);
me = (lnet_me_t *)lnet_freelist_alloc(&rec->rec_freelist);
lnet_res_unlock(0);
return me;
}
static inline void
lnet_me_free_locked(lnet_me_t *me)
{
/* ALWAYS called with resource lock held */
struct lnet_res_container *rec = the_lnet.ln_me_containers[0];
LASSERT(LNET_CPT_NUMBER == 1);
lnet_freelist_free(&rec->rec_freelist, me);
}
static inline void
lnet_me_free(lnet_me_t *me)
{
lnet_res_lock(0);
lnet_me_free_locked(me);
lnet_res_unlock(0);
}
static inline lnet_msg_t *
lnet_msg_alloc (void)
{
/* NEVER called with network lock held */
struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0];
lnet_msg_t *msg;
LASSERT(LNET_CPT_NUMBER == 1);
lnet_net_lock(0);
msg = (lnet_msg_t *)lnet_freelist_alloc(&msc->msc_freelist);
lnet_net_unlock(0);
if (msg != NULL) {
/* NULL pointers, clear flags etc */
memset(msg, 0, sizeof(*msg));
}
return msg;
}
static inline void
lnet_msg_free_locked(lnet_msg_t *msg)
{
/* ALWAYS called with network lock held */
struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0];
LASSERT(LNET_CPT_NUMBER == 1);
LASSERT(!msg->msg_onactivelist);
lnet_freelist_free(&msc->msc_freelist, msg);
}
static inline void
lnet_msg_free (lnet_msg_t *msg)
{
lnet_net_lock(0);
lnet_msg_free_locked(msg);
lnet_net_unlock(0);
}
#else /* !LNET_USE_LIB_FREELIST */
static inline lnet_eq_t *
lnet_eq_alloc (void)
{
/* NEVER called with liblock held */
lnet_eq_t *eq;
LIBCFS_ALLOC(eq, sizeof(*eq));
return (eq);
}
static inline void
lnet_eq_free(lnet_eq_t *eq)
{
/* ALWAYS called with resource lock held */
LIBCFS_FREE(eq, sizeof(*eq));
}
static inline lnet_libmd_t *
lnet_md_alloc (lnet_md_t *umd)
{
/* NEVER called with liblock held */
lnet_libmd_t *md;
unsigned int size;
unsigned int niov;
if ((umd->options & LNET_MD_KIOV) != 0) {
niov = umd->length;
size = offsetof(lnet_libmd_t, md_iov.kiov[niov]);
} else {
niov = ((umd->options & LNET_MD_IOVEC) != 0) ?
umd->length : 1;
size = offsetof(lnet_libmd_t, md_iov.iov[niov]);
}
LIBCFS_ALLOC(md, size);
if (md != NULL) {
/* Set here in case of early free */
md->md_options = umd->options;
md->md_niov = niov;
INIT_LIST_HEAD(&md->md_list);
}
return (md);
}
static inline void
lnet_md_free(lnet_libmd_t *md)
{
/* ALWAYS called with resource lock held */
unsigned int size;
if ((md->md_options & LNET_MD_KIOV) != 0)
size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]);
else
size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]);
LIBCFS_FREE(md, size);
}
static inline lnet_me_t *
lnet_me_alloc (void)
{
/* NEVER called with liblock held */
lnet_me_t *me;
LIBCFS_ALLOC(me, sizeof(*me));
return (me);
}
static inline void
lnet_me_free(lnet_me_t *me)
{
/* ALWAYS called with resource lock held */
LIBCFS_FREE(me, sizeof(*me));
}
static inline lnet_msg_t *
lnet_msg_alloc(void)
{
/* NEVER called with liblock held */
lnet_msg_t *msg;
LIBCFS_ALLOC(msg, sizeof(*msg));
/* no need to zero, LIBCFS_ALLOC does for us */
return (msg);
}
static inline void
lnet_msg_free(lnet_msg_t *msg)
{
/* ALWAYS called with network lock held */
LASSERT(!msg->msg_onactivelist);
LIBCFS_FREE(msg, sizeof(*msg));
}
#define lnet_eq_free_locked(eq) lnet_eq_free(eq)
#define lnet_md_free_locked(md) lnet_md_free(md)
#define lnet_me_free_locked(me) lnet_me_free(me)
#define lnet_msg_free_locked(msg) lnet_msg_free(msg)
#endif /* LNET_USE_LIB_FREELIST */
lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
__u64 cookie);
void lnet_res_lh_initialize(struct lnet_res_container *rec,
lnet_libhandle_t *lh);
static inline void
lnet_res_lh_invalidate(lnet_libhandle_t *lh)
{
/* ALWAYS called with resource lock held */
/* NB: cookie is still useful, don't reset it */
list_del(&lh->lh_hash_chain);
}
static inline void
lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq)
{
if (eq == NULL) {
LNetInvalidateHandle(handle);
return;
}
handle->cookie = eq->eq_lh.lh_cookie;
}
static inline lnet_eq_t *
lnet_handle2eq(lnet_handle_eq_t *handle)
{
/* ALWAYS called with resource lock held */
lnet_libhandle_t *lh;
lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie);
if (lh == NULL)
return NULL;
return lh_entry(lh, lnet_eq_t, eq_lh);
}
static inline void
lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md)
{
handle->cookie = md->md_lh.lh_cookie;
}
static inline lnet_libmd_t *
lnet_handle2md(lnet_handle_md_t *handle)
{
/* ALWAYS called with resource lock held */
lnet_libhandle_t *lh;
int cpt;
cpt = lnet_cpt_of_cookie(handle->cookie);
lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
handle->cookie);
if (lh == NULL)
return NULL;
return lh_entry(lh, lnet_libmd_t, md_lh);
}
static inline lnet_libmd_t *
lnet_wire_handle2md(lnet_handle_wire_t *wh)
{
/* ALWAYS called with resource lock held */
lnet_libhandle_t *lh;
int cpt;
if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie)
return NULL;
cpt = lnet_cpt_of_cookie(wh->wh_object_cookie);
lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt],
wh->wh_object_cookie);
if (lh == NULL)
return NULL;
return lh_entry(lh, lnet_libmd_t, md_lh);
}
static inline void
lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me)
{
handle->cookie = me->me_lh.lh_cookie;
}
static inline lnet_me_t *
lnet_handle2me(lnet_handle_me_t *handle)
{
/* ALWAYS called with resource lock held */
lnet_libhandle_t *lh;
int cpt;
cpt = lnet_cpt_of_cookie(handle->cookie);
lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt],
handle->cookie);
if (lh == NULL)
return NULL;
return lh_entry(lh, lnet_me_t, me_lh);
}
static inline void
lnet_peer_addref_locked(lnet_peer_t *lp)
{
LASSERT (lp->lp_refcount > 0);
lp->lp_refcount++;
}
extern void lnet_destroy_peer_locked(lnet_peer_t *lp);
static inline void
lnet_peer_decref_locked(lnet_peer_t *lp)
{
LASSERT (lp->lp_refcount > 0);
lp->lp_refcount--;
if (lp->lp_refcount == 0)
lnet_destroy_peer_locked(lp);
}
static inline int
lnet_isrouter(lnet_peer_t *lp)
{
return lp->lp_rtr_refcount != 0;
}
static inline void
lnet_ni_addref_locked(lnet_ni_t *ni, int cpt)
{
LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
LASSERT(*ni->ni_refs[cpt] >= 0);
(*ni->ni_refs[cpt])++;
}
static inline void
lnet_ni_addref(lnet_ni_t *ni)
{
lnet_net_lock(0);
lnet_ni_addref_locked(ni, 0);
lnet_net_unlock(0);
}
static inline void
lnet_ni_decref_locked(lnet_ni_t *ni, int cpt)
{
LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER);
LASSERT(*ni->ni_refs[cpt] > 0);
(*ni->ni_refs[cpt])--;
}
static inline void
lnet_ni_decref(lnet_ni_t *ni)
{
lnet_net_lock(0);
lnet_ni_decref_locked(ni, 0);
lnet_net_unlock(0);
}
void lnet_ni_free(lnet_ni_t *ni);
static inline int
lnet_nid2peerhash(lnet_nid_t nid)
{
return cfs_hash_long(nid, LNET_PEER_HASH_BITS);
}
static inline struct list_head *
lnet_net2rnethash(__u32 net)
{
return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
LNET_NETTYP(net)) &
((1U << the_lnet.ln_remote_nets_hbits) - 1)];
}
extern lnd_t the_lolnd;
extern int lnet_cpt_of_nid_locked(lnet_nid_t nid);
extern int lnet_cpt_of_nid(lnet_nid_t nid);
extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
extern lnet_ni_t *lnet_net2ni(__u32 net);
int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, cfs_time_t when);
void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when);
int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid);
int lnet_check_routes(void);
int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
void lnet_destroy_routes(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
lnet_nid_t *gateway, __u32 *alive);
void lnet_proc_init(void);
void lnet_proc_fini(void);
int lnet_rtrpools_alloc(int im_a_router);
void lnet_rtrpools_free(void);
lnet_remotenet_t *lnet_find_net_locked (__u32 net);
int lnet_islocalnid(lnet_nid_t nid);
int lnet_islocalnet(__u32 net);
void lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
unsigned int offset, unsigned int mlen);
void lnet_msg_detach_md(lnet_msg_t *msg, int status);
void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev);
void lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type);
void lnet_msg_commit(lnet_msg_t *msg, int cpt);
void lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status);
void lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev);
void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target,
unsigned int offset, unsigned int len);
int lnet_send(lnet_nid_t nid, lnet_msg_t *msg, lnet_nid_t rtr_nid);
void lnet_return_tx_credits_locked(lnet_msg_t *msg);
void lnet_return_rx_credits_locked(lnet_msg_t *msg);
/* portals functions */
/* portals attributes */
static inline int
lnet_ptl_is_lazy(lnet_portal_t *ptl)
{
return !!(ptl->ptl_options & LNET_PTL_LAZY);
}
static inline int
lnet_ptl_is_unique(lnet_portal_t *ptl)
{
return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE);
}
static inline int
lnet_ptl_is_wildcard(lnet_portal_t *ptl)
{
return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD);
}
static inline void
lnet_ptl_setopt(lnet_portal_t *ptl, int opt)
{
ptl->ptl_options |= opt;
}
static inline void
lnet_ptl_unsetopt(lnet_portal_t *ptl, int opt)
{
ptl->ptl_options &= ~opt;
}
/* match-table functions */
struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable,
lnet_process_id_t id, __u64 mbits);
struct lnet_match_table *lnet_mt_of_attach(unsigned int index,
lnet_process_id_t id, __u64 mbits,
__u64 ignore_bits,
lnet_ins_pos_t pos);
int lnet_mt_match_md(struct lnet_match_table *mtable,
struct lnet_match_info *info, struct lnet_msg *msg);
/* portals match/attach functions */
void lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
struct list_head *matches, struct list_head *drops);
void lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md);
int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg);
/* initialized and finalize portals */
int lnet_portals_create(void);
void lnet_portals_destroy(void);
/* message functions */
int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr,
lnet_nid_t fromnid, void *private, int rdma_req);
void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
unsigned int offset, unsigned int mlen, unsigned int rlen);
lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg);
void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len);
void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc);
void lnet_drop_delayed_msg_list(struct list_head *head, char *reason);
void lnet_recv_delayed_msg_list(struct list_head *head);
int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt);
void lnet_msg_container_cleanup(struct lnet_msg_container *container);
void lnet_msg_containers_destroy(void);
int lnet_msg_containers_create(void);
char *lnet_msgtyp2str (int type);
void lnet_print_hdr (lnet_hdr_t * hdr);
int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
void lnet_counters_get(lnet_counters_t *counters);
void lnet_counters_reset(void);
unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov);
int lnet_extract_iov (int dst_niov, struct iovec *dst,
int src_niov, struct iovec *src,
unsigned int offset, unsigned int len);
unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov);
int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst,
int src_niov, lnet_kiov_t *src,
unsigned int offset, unsigned int len);
void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov,
unsigned int doffset,
unsigned int nsiov, struct iovec *siov,
unsigned int soffset, unsigned int nob);
void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov,
unsigned int iovoffset,
unsigned int nkiov, lnet_kiov_t *kiov,
unsigned int kiovoffset, unsigned int nob);
void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov,
unsigned int kiovoffset,
unsigned int niov, struct iovec *iov,
unsigned int iovoffset, unsigned int nob);
void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov,
unsigned int doffset,
unsigned int nskiov, lnet_kiov_t *skiov,
unsigned int soffset, unsigned int nob);
static inline void
lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset,
unsigned int nsiov, struct iovec *siov, unsigned int soffset,
unsigned int nob)
{
struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
lnet_copy_iov2iov(1, &diov, doffset,
nsiov, siov, soffset, nob);
}
static inline void
lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset,
unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset,
unsigned int nob)
{
struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
lnet_copy_kiov2iov(1, &diov, doffset,
nsiov, skiov, soffset, nob);
}
static inline void
lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset,
int slen, void *src, unsigned int soffset, unsigned int nob)
{
struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
lnet_copy_iov2iov(ndiov, diov, doffset,
1, &siov, soffset, nob);
}
static inline void
lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset,
int slen, void *src, unsigned int soffset, unsigned int nob)
{
struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
lnet_copy_iov2kiov(ndiov, dkiov, doffset,
1, &siov, soffset, nob);
}
void lnet_me_unlink(lnet_me_t *me);
void lnet_md_unlink(lnet_libmd_t *md);
void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd);
void lnet_register_lnd(lnd_t *lnd);
void lnet_unregister_lnd(lnd_t *lnd);
int lnet_set_ip_niaddr (lnet_ni_t *ni);
int lnet_connect(socket_t **sockp, lnet_nid_t peer_nid,
__u32 local_ip, __u32 peer_ip, int peer_port);
void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
__u32 peer_ip, int port);
int lnet_count_acceptor_nis(void);
int lnet_acceptor_timeout(void);
int lnet_acceptor_port(void);
int lnet_count_acceptor_nis(void);
int lnet_acceptor_port(void);
int lnet_acceptor_start(void);
void lnet_acceptor_stop(void);
void lnet_get_tunables(void);
int lnet_peers_start_down(void);
int lnet_peer_buffer_credits(lnet_ni_t *ni);
int lnet_router_checker_start(void);
void lnet_router_checker_stop(void);
void lnet_swap_pinginfo(lnet_ping_info_t *info);
int lnet_ping_target_init(void);
void lnet_ping_target_fini(void);
int lnet_ping(lnet_process_id_t id, int timeout_ms,
lnet_process_id_t *ids, int n_ids);
int lnet_parse_ip2nets (char **networksp, char *ip2nets);
int lnet_parse_routes (char *route_str, int *im_a_router);
int lnet_parse_networks (struct list_head *nilist, char *networks);
int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
lnet_nid_t nid);
void lnet_peer_tables_cleanup(void);
void lnet_peer_tables_destroy(void);
int lnet_peer_tables_create(void);
void lnet_debug_peer(lnet_nid_t nid);
#endif

View file

@ -0,0 +1,765 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/include/lnet/lib-types.h
*
* Types used by the library side routines that do not need to be
* exposed to the user application
*/
#ifndef __LNET_LIB_TYPES_H__
#define __LNET_LIB_TYPES_H__
#include <linux/lnet/linux/lib-types.h>
#include <linux/libcfs/libcfs.h>
#include <linux/list.h>
#include <linux/lnet/types.h>
#define WIRE_ATTR __attribute__((packed))
/* Packed version of lnet_process_id_t to transfer via network */
typedef struct {
lnet_nid_t nid;
lnet_pid_t pid; /* node id / process id */
} WIRE_ATTR lnet_process_id_packed_t;
/* The wire handle's interface cookie only matches one network interface in
* one epoch (i.e. new cookie when the interface restarts or the node
* reboots). The object cookie only matches one object on that interface
* during that object's lifetime (i.e. no cookie re-use). */
typedef struct {
__u64 wh_interface_cookie;
__u64 wh_object_cookie;
} WIRE_ATTR lnet_handle_wire_t;
typedef enum {
LNET_MSG_ACK = 0,
LNET_MSG_PUT,
LNET_MSG_GET,
LNET_MSG_REPLY,
LNET_MSG_HELLO,
} lnet_msg_type_t;
/* The variant fields of the portals message header are aligned on an 8
* byte boundary in the message header. Note that all types used in these
* wire structs MUST be fixed size and the smaller types are placed at the
* end. */
typedef struct lnet_ack {
lnet_handle_wire_t dst_wmd;
__u64 match_bits;
__u32 mlength;
} WIRE_ATTR lnet_ack_t;
typedef struct lnet_put {
lnet_handle_wire_t ack_wmd;
__u64 match_bits;
__u64 hdr_data;
__u32 ptl_index;
__u32 offset;
} WIRE_ATTR lnet_put_t;
typedef struct lnet_get {
lnet_handle_wire_t return_wmd;
__u64 match_bits;
__u32 ptl_index;
__u32 src_offset;
__u32 sink_length;
} WIRE_ATTR lnet_get_t;
typedef struct lnet_reply {
lnet_handle_wire_t dst_wmd;
} WIRE_ATTR lnet_reply_t;
typedef struct lnet_hello {
__u64 incarnation;
__u32 type;
} WIRE_ATTR lnet_hello_t;
typedef struct {
lnet_nid_t dest_nid;
lnet_nid_t src_nid;
lnet_pid_t dest_pid;
lnet_pid_t src_pid;
__u32 type; /* lnet_msg_type_t */
__u32 payload_length; /* payload data to follow */
/*<------__u64 aligned------->*/
union {
lnet_ack_t ack;
lnet_put_t put;
lnet_get_t get;
lnet_reply_t reply;
lnet_hello_t hello;
} msg;
} WIRE_ATTR lnet_hdr_t;
/* A HELLO message contains a magic number and protocol version
* code in the header's dest_nid, the peer's NID in the src_nid, and
* LNET_MSG_HELLO in the type field. All other common fields are zero
* (including payload_size; i.e. no payload).
* This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
* running the same protocol and to find out its NID. These LNDs should
* exchange HELLO messages when a connection is first established. Individual
* LNDs can put whatever else they fancy in lnet_hdr_t::msg.
*/
typedef struct {
__u32 magic; /* LNET_PROTO_TCP_MAGIC */
__u16 version_major; /* increment on incompatible change */
__u16 version_minor; /* increment on compatible change */
} WIRE_ATTR lnet_magicversion_t;
/* PROTO MAGIC for LNDs */
#define LNET_PROTO_IB_MAGIC 0x0be91b91
#define LNET_PROTO_RA_MAGIC 0x0be91b92
#define LNET_PROTO_QSW_MAGIC 0x0be91b93
#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */
#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */
#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */
#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100
#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */
/* Placeholder for a future "unified" protocol across all LNDs */
/* Current LNDs that receive a request with this magic will respond with a
* "stub" reply using their current protocol */
#define LNET_PROTO_MAGIC 0x45726963 /* ! */
#define LNET_PROTO_TCP_VERSION_MAJOR 1
#define LNET_PROTO_TCP_VERSION_MINOR 0
/* Acceptor connection request */
typedef struct {
__u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */
__u32 acr_version; /* protocol version */
__u64 acr_nid; /* target NID */
} WIRE_ATTR lnet_acceptor_connreq_t;
#define LNET_PROTO_ACCEPTOR_VERSION 1
/* forward refs */
struct lnet_libmd;
typedef struct lnet_msg {
struct list_head msg_activelist;
struct list_head msg_list; /* Q for credits/MD */
lnet_process_id_t msg_target;
/* where is it from, it's only for building event */
lnet_nid_t msg_from;
__u32 msg_type;
/* commited for sending */
unsigned int msg_tx_committed:1;
/* CPT # this message committed for sending */
unsigned int msg_tx_cpt:15;
/* commited for receiving */
unsigned int msg_rx_committed:1;
/* CPT # this message committed for receiving */
unsigned int msg_rx_cpt:15;
/* queued for tx credit */
unsigned int msg_tx_delayed:1;
/* queued for RX buffer */
unsigned int msg_rx_delayed:1;
/* ready for pending on RX delay list */
unsigned int msg_rx_ready_delay:1;
unsigned int msg_vmflush:1; /* VM trying to free memory */
unsigned int msg_target_is_router:1; /* sending to a router */
unsigned int msg_routing:1; /* being forwarded */
unsigned int msg_ack:1; /* ack on finalize (PUT) */
unsigned int msg_sending:1; /* outgoing message */
unsigned int msg_receiving:1; /* being received */
unsigned int msg_txcredit:1; /* taken an NI send credit */
unsigned int msg_peertxcredit:1; /* taken a peer send credit */
unsigned int msg_rtrcredit:1; /* taken a globel router credit */
unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
unsigned int msg_onactivelist:1; /* on the activelist */
struct lnet_peer *msg_txpeer; /* peer I'm sending to */
struct lnet_peer *msg_rxpeer; /* peer I received from */
void *msg_private;
struct lnet_libmd *msg_md;
unsigned int msg_len;
unsigned int msg_wanted;
unsigned int msg_offset;
unsigned int msg_niov;
struct iovec *msg_iov;
lnet_kiov_t *msg_kiov;
lnet_event_t msg_ev;
lnet_hdr_t msg_hdr;
} lnet_msg_t;
typedef struct lnet_libhandle {
struct list_head lh_hash_chain;
__u64 lh_cookie;
} lnet_libhandle_t;
#define lh_entry(ptr, type, member) \
((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
typedef struct lnet_eq {
struct list_head eq_list;
lnet_libhandle_t eq_lh;
lnet_seq_t eq_enq_seq;
lnet_seq_t eq_deq_seq;
unsigned int eq_size;
lnet_eq_handler_t eq_callback;
lnet_event_t *eq_events;
int **eq_refs; /* percpt refcount for EQ */
} lnet_eq_t;
typedef struct lnet_me {
struct list_head me_list;
lnet_libhandle_t me_lh;
lnet_process_id_t me_match_id;
unsigned int me_portal;
unsigned int me_pos; /* hash offset in mt_hash */
__u64 me_match_bits;
__u64 me_ignore_bits;
lnet_unlink_t me_unlink;
struct lnet_libmd *me_md;
} lnet_me_t;
typedef struct lnet_libmd {
struct list_head md_list;
lnet_libhandle_t md_lh;
lnet_me_t *md_me;
char *md_start;
unsigned int md_offset;
unsigned int md_length;
unsigned int md_max_size;
int md_threshold;
int md_refcount;
unsigned int md_options;
unsigned int md_flags;
void *md_user_ptr;
lnet_eq_t *md_eq;
unsigned int md_niov; /* # frags */
union {
struct iovec iov[LNET_MAX_IOV];
lnet_kiov_t kiov[LNET_MAX_IOV];
} md_iov;
} lnet_libmd_t;
#define LNET_MD_FLAG_ZOMBIE (1 << 0)
#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1)
#ifdef LNET_USE_LIB_FREELIST
typedef struct
{
void *fl_objs; /* single contiguous array of objects */
int fl_nobjs; /* the number of them */
int fl_objsize; /* the size (including overhead) of each of them */
struct list_head fl_list; /* where they are enqueued */
} lnet_freelist_t;
typedef struct
{
struct list_head fo_list; /* enqueue on fl_list */
void *fo_contents; /* aligned contents */
} lnet_freeobj_t;
#endif
typedef struct {
/* info about peers we are trying to fail */
struct list_head tp_list; /* ln_test_peers */
lnet_nid_t tp_nid; /* matching nid */
unsigned int tp_threshold; /* # failures to simulate */
} lnet_test_peer_t;
#define LNET_COOKIE_TYPE_MD 1
#define LNET_COOKIE_TYPE_ME 2
#define LNET_COOKIE_TYPE_EQ 3
#define LNET_COOKIE_TYPE_BITS 2
#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
struct lnet_ni; /* forward ref */
typedef struct lnet_lnd
{
/* fields managed by portals */
struct list_head lnd_list; /* stash in the LND table */
int lnd_refcount; /* # active instances */
/* fields initialised by the LND */
unsigned int lnd_type;
int (*lnd_startup) (struct lnet_ni *ni);
void (*lnd_shutdown) (struct lnet_ni *ni);
int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
/* In data movement APIs below, payload buffers are described as a set
* of 'niov' fragments which are...
* EITHER
* in virtual memory (struct iovec *iov != NULL)
* OR
* in pages (kernel only: plt_kiov_t *kiov != NULL).
* The LND may NOT overwrite these fragment descriptors.
* An 'offset' and may specify a byte offset within the set of
* fragments to start from
*/
/* Start sending a preformatted message. 'private' is NULL for PUT and
* GET messages; otherwise this is a response to an incoming message
* and 'private' is the 'private' passed to lnet_parse(). Return
* non-zero for immediate failure, otherwise complete later with
* lnet_finalize() */
int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg);
/* Start receiving 'mlen' bytes of payload data, skipping the following
* 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
* lnet_parse(). Return non-zero for immedaite failure, otherwise
* complete later with lnet_finalize(). This also gives back a receive
* credit if the LND does flow control. */
int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
int delayed, unsigned int niov,
struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);
/* lnet_parse() has had to delay processing of this message
* (e.g. waiting for a forwarding buffer or send credits). Give the
* LND a chance to free urgently needed resources. If called, return 0
* for success and do NOT give back a receive credit; that has to wait
* until lnd_recv() gets called. On failure return < 0 and
* release resources; lnd_recv() will not be called. */
int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
void **new_privatep);
/* notification of peer health */
void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
/* query of peer aliveness */
void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, cfs_time_t *when);
/* accept a new connection */
int (*lnd_accept)(struct lnet_ni *ni, socket_t *sock);
} lnd_t;
#define LNET_NI_STATUS_UP 0x15aac0de
#define LNET_NI_STATUS_DOWN 0xdeadface
#define LNET_NI_STATUS_INVALID 0x00000000
typedef struct {
lnet_nid_t ns_nid;
__u32 ns_status;
__u32 ns_unused;
} WIRE_ATTR lnet_ni_status_t;
struct lnet_tx_queue {
int tq_credits; /* # tx credits free */
int tq_credits_min; /* lowest it's been */
int tq_credits_max; /* total # tx credits */
struct list_head tq_delayed; /* delayed TXs */
};
#define LNET_MAX_INTERFACES 16
typedef struct lnet_ni {
spinlock_t ni_lock;
struct list_head ni_list; /* chain on ln_nis */
struct list_head ni_cptlist; /* chain on ln_nis_cpt */
int ni_maxtxcredits; /* # tx credits */
/* # per-peer send credits */
int ni_peertxcredits;
/* # per-peer router buffer credits */
int ni_peerrtrcredits;
/* seconds to consider peer dead */
int ni_peertimeout;
int ni_ncpts; /* number of CPTs */
__u32 *ni_cpts; /* bond NI on some CPTs */
lnet_nid_t ni_nid; /* interface's NID */
void *ni_data; /* instance-specific data */
lnd_t *ni_lnd; /* procedural interface */
struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */
int **ni_refs; /* percpt reference count */
long ni_last_alive; /* when I was last alive */
lnet_ni_status_t *ni_status; /* my health status */
/* equivalent interfaces to use */
char *ni_interfaces[LNET_MAX_INTERFACES];
} lnet_ni_t;
#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
* of old LNet, so there shouldn't be any compatibility issue */
#define LNET_PING_FEAT_INVAL (0) /* no feature */
#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */
#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */
#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \
LNET_PING_FEAT_NI_STATUS)
typedef struct {
__u32 pi_magic;
__u32 pi_features;
lnet_pid_t pi_pid;
__u32 pi_nnis;
lnet_ni_status_t pi_ni[0];
} WIRE_ATTR lnet_ping_info_t;
/* router checker data, per router */
#define LNET_MAX_RTR_NIS 16
#define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS])
typedef struct {
/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
struct list_head rcd_list;
lnet_handle_md_t rcd_mdh; /* ping buffer MD */
struct lnet_peer *rcd_gateway; /* reference to gateway */
lnet_ping_info_t *rcd_pinginfo; /* ping buffer */
} lnet_rc_data_t;
typedef struct lnet_peer {
struct list_head lp_hashlist; /* chain on peer hash */
struct list_head lp_txq; /* messages blocking for tx credits */
struct list_head lp_rtrq; /* messages blocking for router credits */
struct list_head lp_rtr_list; /* chain on router list */
int lp_txcredits; /* # tx credits available */
int lp_mintxcredits; /* low water mark */
int lp_rtrcredits; /* # router credits */
int lp_minrtrcredits; /* low water mark */
unsigned int lp_alive:1; /* alive/dead? */
unsigned int lp_notify:1; /* notification outstanding? */
unsigned int lp_notifylnd:1; /* outstanding notification for LND? */
unsigned int lp_notifying:1; /* some thread is handling notification */
unsigned int lp_ping_notsent; /* SEND event outstanding from ping */
int lp_alive_count; /* # times router went dead<->alive */
long lp_txqnob; /* bytes queued for sending */
cfs_time_t lp_timestamp; /* time of last aliveness news */
cfs_time_t lp_ping_timestamp; /* time of last ping attempt */
cfs_time_t lp_ping_deadline; /* != 0 if ping reply expected */
cfs_time_t lp_last_alive; /* when I was last alive */
cfs_time_t lp_last_query; /* when lp_ni was queried last time */
lnet_ni_t *lp_ni; /* interface peer is on */
lnet_nid_t lp_nid; /* peer's NID */
int lp_refcount; /* # refs */
int lp_cpt; /* CPT this peer attached on */
/* # refs from lnet_route_t::lr_gateway */
int lp_rtr_refcount;
/* returned RC ping features */
unsigned int lp_ping_feats;
struct list_head lp_routes; /* routers on this peer */
lnet_rc_data_t *lp_rcd; /* router checker state */
} lnet_peer_t;
/* peer hash size */
#define LNET_PEER_HASH_BITS 9
#define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS)
/* peer hash table */
struct lnet_peer_table {
int pt_version; /* /proc validity stamp */
int pt_number; /* # peers extant */
struct list_head pt_deathrow; /* zombie peers */
struct list_head *pt_hash; /* NID->peer hash */
};
/* peer aliveness is enabled only on routers for peers in a network where the
* lnet_ni_t::ni_peertimeout has been set to a positive value */
#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
(lp)->lp_ni->ni_peertimeout > 0)
typedef struct {
struct list_head lr_list; /* chain on net */
struct list_head lr_gwlist; /* chain on gateway */
lnet_peer_t *lr_gateway; /* router node */
__u32 lr_net; /* remote network number */
int lr_seq; /* sequence for round-robin */
unsigned int lr_downis; /* number of down NIs */
unsigned int lr_hops; /* how far I am */
} lnet_route_t;
#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7)
#define LNET_REMOTE_NETS_HASH_MAX (1U << 16)
#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits)
typedef struct {
struct list_head lrn_list; /* chain on ln_remote_nets_hash */
struct list_head lrn_routes; /* routes to me */
__u32 lrn_net; /* my net number */
} lnet_remotenet_t;
typedef struct {
struct list_head rbp_bufs; /* my free buffer pool */
struct list_head rbp_msgs; /* messages blocking for a buffer */
int rbp_npages; /* # pages in each buffer */
int rbp_nbuffers; /* # buffers */
int rbp_credits; /* # free buffers / blocked messages */
int rbp_mincredits; /* low water mark */
} lnet_rtrbufpool_t;
typedef struct {
struct list_head rb_list; /* chain on rbp_bufs */
lnet_rtrbufpool_t *rb_pool; /* owning pool */
lnet_kiov_t rb_kiov[0]; /* the buffer space */
} lnet_rtrbuf_t;
typedef struct {
__u32 msgs_alloc;
__u32 msgs_max;
__u32 errors;
__u32 send_count;
__u32 recv_count;
__u32 route_count;
__u32 drop_count;
__u64 send_length;
__u64 recv_length;
__u64 route_length;
__u64 drop_length;
} WIRE_ATTR lnet_counters_t;
#define LNET_PEER_HASHSIZE 503 /* prime! */
#define LNET_NRBPOOLS 3 /* # different router buffer pools */
enum {
/* Didn't match anything */
LNET_MATCHMD_NONE = (1 << 0),
/* Matched OK */
LNET_MATCHMD_OK = (1 << 1),
/* Must be discarded */
LNET_MATCHMD_DROP = (1 << 2),
/* match and buffer is exhausted */
LNET_MATCHMD_EXHAUSTED = (1 << 3),
/* match or drop */
LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
};
/* Options for lnet_portal_t::ptl_options */
#define LNET_PTL_LAZY (1 << 0)
#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */
#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */
/* parameter for matching operations (GET, PUT) */
struct lnet_match_info {
__u64 mi_mbits;
lnet_process_id_t mi_id;
unsigned int mi_opc;
unsigned int mi_portal;
unsigned int mi_rlength;
unsigned int mi_roffset;
};
/* ME hash of RDMA portal */
#define LNET_MT_HASH_BITS 8
#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS)
#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1)
/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
* the last entry is reserved for MEs with ignore-bits */
#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE
/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
* is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
* ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */
#define LNET_MT_BITS_U64 6 /* 2^6 bits */
#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1)
/* portal match table */
struct lnet_match_table {
/* reserved for upcoming patches, CPU partition ID */
unsigned int mt_cpt;
unsigned int mt_portal; /* portal index */
/* match table is set as "enabled" if there's non-exhausted MD
* attached on mt_mhash, it's only valide for wildcard portal */
unsigned int mt_enabled;
/* bitmap to flag whether MEs on mt_hash are exhausted or not */
__u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
struct list_head *mt_mhash; /* matching hash */
};
/* these are only useful for wildcard portal */
/* Turn off message rotor for wildcard portals */
#define LNET_PTL_ROTOR_OFF 0
/* round-robin dispatch all PUT messages for wildcard portals */
#define LNET_PTL_ROTOR_ON 1
/* round-robin dispatch routed PUT message for wildcard portals */
#define LNET_PTL_ROTOR_RR_RT 2
/* dispatch routed PUT message by hashing source NID for wildcard portals */
#define LNET_PTL_ROTOR_HASH_RT 3
typedef struct lnet_portal {
spinlock_t ptl_lock;
unsigned int ptl_index; /* portal ID, reserved */
/* flags on this portal: lazy, unique... */
unsigned int ptl_options;
/* list of messags which are stealing buffer */
struct list_head ptl_msg_stealing;
/* messages blocking for MD */
struct list_head ptl_msg_delayed;
/* Match table for each CPT */
struct lnet_match_table **ptl_mtables;
/* spread rotor of incoming "PUT" */
int ptl_rotor;
/* # active entries for this portal */
int ptl_mt_nmaps;
/* array of active entries' cpu-partition-id */
int ptl_mt_maps[0];
} lnet_portal_t;
#define LNET_LH_HASH_BITS 12
#define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS)
#define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1)
/* resource container (ME, MD, EQ) */
struct lnet_res_container {
unsigned int rec_type; /* container type */
__u64 rec_lh_cookie; /* cookie generator */
struct list_head rec_active; /* active resource list */
struct list_head *rec_lh_hash; /* handle hash */
#ifdef LNET_USE_LIB_FREELIST
lnet_freelist_t rec_freelist; /* freelist for resources */
#endif
};
/* message container */
struct lnet_msg_container {
int msc_init; /* initialized or not */
/* max # threads finalizing */
int msc_nfinalizers;
/* msgs waiting to complete finalizing */
struct list_head msc_finalizing;
struct list_head msc_active; /* active message list */
/* threads doing finalization */
void **msc_finalizers;
#ifdef LNET_USE_LIB_FREELIST
lnet_freelist_t msc_freelist; /* freelist for messages */
#endif
};
/* Router Checker states */
#define LNET_RC_STATE_SHUTDOWN 0 /* not started */
#define LNET_RC_STATE_RUNNING 1 /* started up OK */
#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */
typedef struct
{
/* CPU partition table of LNet */
struct cfs_cpt_table *ln_cpt_table;
/* number of CPTs in ln_cpt_table */
unsigned int ln_cpt_number;
unsigned int ln_cpt_bits;
/* protect LNet resources (ME/MD/EQ) */
struct cfs_percpt_lock *ln_res_lock;
/* # portals */
int ln_nportals;
/* the vector of portals */
lnet_portal_t **ln_portals;
/* percpt ME containers */
struct lnet_res_container **ln_me_containers;
/* percpt MD container */
struct lnet_res_container **ln_md_containers;
/* Event Queue container */
struct lnet_res_container ln_eq_container;
wait_queue_head_t ln_eq_waitq;
spinlock_t ln_eq_wait_lock;
unsigned int ln_remote_nets_hbits;
/* protect NI, peer table, credits, routers, rtrbuf... */
struct cfs_percpt_lock *ln_net_lock;
/* percpt message containers for active/finalizing/freed message */
struct lnet_msg_container **ln_msg_containers;
lnet_counters_t **ln_counters;
struct lnet_peer_table **ln_peer_tables;
/* failure simulation */
struct list_head ln_test_peers;
struct list_head ln_nis; /* LND instances */
/* NIs bond on specific CPT(s) */
struct list_head ln_nis_cpt;
/* dying LND instances */
struct list_head ln_nis_zombie;
lnet_ni_t *ln_loni; /* the loopback NI */
/* NI to wait for events in */
lnet_ni_t *ln_eq_waitni;
/* remote networks with routes to them */
struct list_head *ln_remote_nets_hash;
/* validity stamp */
__u64 ln_remote_nets_version;
/* list of all known routers */
struct list_head ln_routers;
/* validity stamp */
__u64 ln_routers_version;
/* percpt router buffer pools */
lnet_rtrbufpool_t **ln_rtrpools;
lnet_handle_md_t ln_ping_target_md;
lnet_handle_eq_t ln_ping_target_eq;
lnet_ping_info_t *ln_ping_info;
/* router checker startup/shutdown state */
int ln_rc_state;
/* router checker's event queue */
lnet_handle_eq_t ln_rc_eqh;
/* rcd still pending on net */
struct list_head ln_rcd_deathrow;
/* rcd ready for free */
struct list_head ln_rcd_zombie;
/* serialise startup/shutdown */
struct semaphore ln_rc_signal;
struct mutex ln_api_mutex;
struct mutex ln_lnd_mutex;
int ln_init; /* LNetInit() called? */
/* Have I called LNetNIInit myself? */
int ln_niinit_self;
/* LNetNIInit/LNetNIFini counter */
int ln_refcount;
/* shutdown in progress */
int ln_shutdown;
int ln_routing; /* am I a router? */
lnet_pid_t ln_pid; /* requested pid */
/* uniquely identifies this ni in this epoch */
__u64 ln_interface_cookie;
/* registered LNDs */
struct list_head ln_lnds;
/* space for network names */
char *ln_network_tokens;
int ln_network_tokens_nob;
/* test protocol compatibility flags */
int ln_testprotocompat;
} lnet_t;
#endif

View file

@ -0,0 +1,43 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LINUX_API_SUPPORT_H__
#define __LINUX_API_SUPPORT_H__
#ifndef __LNET_API_SUPPORT_H__
#error Do not #include this file directly. #include <lnet /api-support.h> instead
#endif
#endif

View file

@ -0,0 +1,72 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_LINUX_LIB_LNET_H__
#define __LNET_LINUX_LIB_LNET_H__
#ifndef __LNET_LIB_LNET_H__
#error Do not #include this file directly. #include <linux/lnet/lib-lnet.h> instead
#endif
# include <asm/page.h>
# include <linux/string.h>
# include <asm/io.h>
# include <linux/libcfs/libcfs.h>
static inline __u64
lnet_page2phys (struct page *p)
{
/* compiler optimizer will elide unused branches */
switch (sizeof(typeof(page_to_phys(p)))) {
case 4:
/* page_to_phys returns a 32 bit physical address. This must
* be a 32 bit machine with <= 4G memory and we must ensure we
* don't sign extend when converting to 64 bits. */
return (unsigned long)page_to_phys(p);
case 8:
/* page_to_phys returns a 64 bit physical address :) */
return page_to_phys(p);
default:
LBUG();
return 0;
}
}
#define LNET_ROUTER
#endif /* __LNET_LINUX_LIB_LNET_H__ */

View file

@ -0,0 +1,45 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_LINUX_LIB_TYPES_H__
#define __LNET_LINUX_LIB_TYPES_H__
#ifndef __LNET_LIB_TYPES_H__
#error Do not #include this file directly. #include <linux/lnet/lib-types.h> instead
#endif
# include <linux/uio.h>
# include <linux/types.h>
#endif

View file

@ -0,0 +1,56 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_LINUX_LNET_H__
#define __LNET_LINUX_LNET_H__
#ifndef __LNET_H__
#error Do not #include this file directly. #include <linux/lnet/lnet.h> instead
#endif
/*
* lnet.h
*
* User application interface file
*/
#include <linux/uio.h>
#include <linux/types.h>
#define cfs_tcp_sendpage(sk, page, offset, size, flags) \
tcp_sendpage(sk, page, offset, size, flags)
#endif

View file

@ -0,0 +1,51 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_SYSCTL_H__
#define __LNET_SYSCTL_H__
#if defined(CONFIG_SYSCTL)
#define CTL_KRANAL 201
#define CTL_O2IBLND 205
#define CTL_PTLLND 206
#define CTL_QSWNAL 207
#define CTL_SOCKLND 208
#define CTL_GNILND 210
#endif
#endif

View file

@ -0,0 +1,51 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_H__
#define __LNET_H__
/*
* lnet.h
*
* User application interface file
*/
#include <linux/lnet/linux/lnet.h>
#include <linux/lnet/types.h>
#include <linux/lnet/api.h>
#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */
#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */
#endif

View file

@ -0,0 +1,80 @@
/*
* This file is part of Portals, http://www.sf.net/projects/lustre/
*
* Portals is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Portals is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Portals; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* header for libptlctl.a
*/
#ifndef _PTLCTL_H_
#define _PTLCTL_H_
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/types.h>
#define LNET_DEV_ID 0
#define LNET_DEV_PATH "/dev/lnet"
#define LNET_DEV_MAJOR 10
#define LNET_DEV_MINOR 240
#define OBD_DEV_ID 1
#define OBD_DEV_NAME "obd"
#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
#define OBD_DEV_MAJOR 10
#define OBD_DEV_MINOR 241
#define SMFS_DEV_ID 2
#define SMFS_DEV_PATH "/dev/snapdev"
#define SMFS_DEV_MAJOR 10
#define SMFS_DEV_MINOR 242
int ptl_initialize(int argc, char **argv);
int jt_ptl_network(int argc, char **argv);
int jt_ptl_list_nids(int argc, char **argv);
int jt_ptl_which_nid(int argc, char **argv);
int jt_ptl_print_interfaces(int argc, char **argv);
int jt_ptl_add_interface(int argc, char **argv);
int jt_ptl_del_interface(int argc, char **argv);
int jt_ptl_print_peers (int argc, char **argv);
int jt_ptl_add_peer (int argc, char **argv);
int jt_ptl_del_peer (int argc, char **argv);
int jt_ptl_print_connections (int argc, char **argv);
int jt_ptl_disconnect(int argc, char **argv);
int jt_ptl_push_connection(int argc, char **argv);
int jt_ptl_print_active_txs(int argc, char **argv);
int jt_ptl_ping(int argc, char **argv);
int jt_ptl_mynid(int argc, char **argv);
int jt_ptl_add_uuid(int argc, char **argv);
int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
int jt_ptl_close_uuid(int argc, char **argv);
int jt_ptl_del_uuid(int argc, char **argv);
int jt_ptl_add_route (int argc, char **argv);
int jt_ptl_del_route (int argc, char **argv);
int jt_ptl_notify_router (int argc, char **argv);
int jt_ptl_print_routes (int argc, char **argv);
int jt_ptl_fail_nid (int argc, char **argv);
int jt_ptl_lwt(int argc, char **argv);
int jt_ptl_testprotocompat(int argc, char **argv);
int jt_ptl_memhog(int argc, char **argv);
int dbg_initialize(int argc, char **argv);
int jt_dbg_filter(int argc, char **argv);
int jt_dbg_show(int argc, char **argv);
int jt_dbg_list(int argc, char **argv);
int jt_dbg_debug_kernel(int argc, char **argv);
int jt_dbg_debug_daemon(int argc, char **argv);
int jt_dbg_debug_file(int argc, char **argv);
int jt_dbg_clear_debug_buf(int argc, char **argv);
int jt_dbg_mark_debug_buf(int argc, char **argv);
int jt_dbg_modules(int argc, char **argv);
int jt_dbg_panic(int argc, char **argv);
#endif

View file

@ -0,0 +1,491 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/include/lnet/lnetst.h
*
* Author: Liang Zhen <liangzhen@clusterfs.com>
*/
#ifndef __LNET_ST_H__
#define __LNET_ST_H__
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lnet.h>
#include <linux/lnet/lib-types.h>
#define LST_FEAT_NONE (0)
#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */
#define LST_FEATS_EMPTY (LST_FEAT_NONE)
#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN)
#define LST_NAME_SIZE 32 /* max name buffer length */
#define LSTIO_DEBUG 0xC00 /* debug */
#define LSTIO_SESSION_NEW 0xC01 /* create session */
#define LSTIO_SESSION_END 0xC02 /* end session */
#define LSTIO_SESSION_INFO 0xC03 /* query session */
#define LSTIO_GROUP_ADD 0xC10 /* add group */
#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */
#define LSTIO_GROUP_INFO 0xC12 /* query defailt infomation of specified group */
#define LSTIO_GROUP_DEL 0xC13 /* delete group */
#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */
#define LSTIO_GROUP_UPDATE 0xC15 /* update group */
#define LSTIO_BATCH_ADD 0xC20 /* add batch */
#define LSTIO_BATCH_START 0xC21 /* start batch */
#define LSTIO_BATCH_STOP 0xC22 /* stop batch */
#define LSTIO_BATCH_DEL 0xC23 /* delete batch */
#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */
#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */
#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */
#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */
#define LSTIO_STAT_QUERY 0xC30 /* get stats */
typedef struct {
lnet_nid_t ses_nid; /* nid of console node */
__u64 ses_stamp; /* time stamp */
} lst_sid_t; /*** session id */
extern lst_sid_t LST_INVALID_SID;
typedef struct {
__u64 bat_id; /* unique id in session */
} lst_bid_t; /*** batch id (group of tests) */
/* Status of test node */
#define LST_NODE_ACTIVE 0x1 /* node in this session */
#define LST_NODE_BUSY 0x2 /* node is taken by other session */
#define LST_NODE_DOWN 0x4 /* node is down */
#define LST_NODE_UNKNOWN 0x8 /* node not in session */
typedef struct {
lnet_process_id_t nde_id; /* id of node */
int nde_state; /* state of node */
} lstcon_node_ent_t; /*** node entry, for list_group command */
typedef struct {
int nle_nnode; /* # of nodes */
int nle_nactive; /* # of active nodes */
int nle_nbusy; /* # of busy nodes */
int nle_ndown; /* # of down nodes */
int nle_nunknown; /* # of unknown nodes */
} lstcon_ndlist_ent_t; /*** node_list entry, for list_batch command */
typedef struct {
int tse_type; /* test type */
int tse_loop; /* loop count */
int tse_concur; /* concurrency of test */
} lstcon_test_ent_t; /*** test summary entry, for list_batch command */
typedef struct {
int bae_state; /* batch status */
int bae_timeout; /* batch timeout */
int bae_ntest; /* # of tests in the batch */
} lstcon_batch_ent_t; /*** batch summary entry, for list_batch command */
typedef struct {
lstcon_ndlist_ent_t tbe_cli_nle; /* client (group) node_list entry */
lstcon_ndlist_ent_t tbe_srv_nle; /* server (group) node_list entry */
union {
lstcon_test_ent_t tbe_test; /* test entry */
lstcon_batch_ent_t tbe_batch; /* batch entry */
} u;
} lstcon_test_batch_ent_t; /*** test/batch verbose information entry,
*** for list_batch command */
typedef struct {
struct list_head rpe_link; /* link chain */
lnet_process_id_t rpe_peer; /* peer's id */
struct timeval rpe_stamp; /* time stamp of RPC */
int rpe_state; /* peer's state */
int rpe_rpc_errno; /* RPC errno */
lst_sid_t rpe_sid; /* peer's session id */
int rpe_fwk_errno; /* framework errno */
int rpe_priv[4]; /* private data */
char rpe_payload[0]; /* private reply payload */
} lstcon_rpc_ent_t;
typedef struct {
int trs_rpc_stat[4]; /* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */
int trs_rpc_errno; /* RPC errno */
int trs_fwk_stat[8]; /* framework stat */
int trs_fwk_errno; /* errno of the first remote error */
void *trs_fwk_private; /* private framework stat */
} lstcon_trans_stat_t;
static inline int
lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0];
}
static inline int
lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1];
}
static inline int
lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2];
}
static inline int
lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
}
static inline int
lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
}
static inline int
lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
}
static inline int
lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
}
static inline int
lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
}
static inline int
lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
}
static inline int
lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
}
static inline int
lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
}
static inline int
lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
}
static inline int
lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2];
}
static inline int
lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0];
}
static inline int
lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc)
{
return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1];
}
/* create a session */
typedef struct {
int lstio_ses_key; /* IN: local key */
int lstio_ses_timeout; /* IN: session timeout */
int lstio_ses_force; /* IN: force create ? */
/** IN: session features */
unsigned lstio_ses_feats;
lst_sid_t *lstio_ses_idp; /* OUT: session id */
int lstio_ses_nmlen; /* IN: name length */
char *lstio_ses_namep; /* IN: session name */
} lstio_session_new_args_t;
/* query current session */
typedef struct {
lst_sid_t *lstio_ses_idp; /* OUT: session id */
int *lstio_ses_keyp; /* OUT: local key */
/** OUT: session features */
unsigned *lstio_ses_featp;
lstcon_ndlist_ent_t *lstio_ses_ndinfo; /* OUT: */
int lstio_ses_nmlen; /* IN: name length */
char *lstio_ses_namep; /* OUT: session name */
} lstio_session_info_args_t;
/* delete a session */
typedef struct {
int lstio_ses_key; /* IN: session key */
} lstio_session_end_args_t;
#define LST_OPC_SESSION 1
#define LST_OPC_GROUP 2
#define LST_OPC_NODES 3
#define LST_OPC_BATCHCLI 4
#define LST_OPC_BATCHSRV 5
typedef struct {
int lstio_dbg_key; /* IN: session key */
int lstio_dbg_type; /* IN: debug sessin|batch|group|nodes list */
int lstio_dbg_flags; /* IN: reserved debug flags */
int lstio_dbg_timeout; /* IN: timeout of debug */
int lstio_dbg_nmlen; /* IN: len of name */
char *lstio_dbg_namep; /* IN: name of group|batch */
int lstio_dbg_count; /* IN: # of test nodes to debug */
lnet_process_id_t *lstio_dbg_idsp; /* IN: id of test nodes */
struct list_head *lstio_dbg_resultp; /* OUT: list head of result buffer */
} lstio_debug_args_t;
typedef struct {
int lstio_grp_key; /* IN: session key */
int lstio_grp_nmlen; /* IN: name length */
char *lstio_grp_namep; /* IN: group name */
} lstio_group_add_args_t;
typedef struct {
int lstio_grp_key; /* IN: session key */
int lstio_grp_nmlen; /* IN: name length */
char *lstio_grp_namep; /* IN: group name */
} lstio_group_del_args_t;
#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */
#define LST_GROUP_REFRESH 2 /* refresh inactive nodes in the group */
#define LST_GROUP_RMND 3 /* delete nodes from the group */
typedef struct {
int lstio_grp_key; /* IN: session key */
int lstio_grp_opc; /* IN: OPC */
int lstio_grp_args; /* IN: arguments */
int lstio_grp_nmlen; /* IN: name length */
char *lstio_grp_namep; /* IN: group name */
int lstio_grp_count; /* IN: # of nodes id */
lnet_process_id_t *lstio_grp_idsp; /* IN: array of nodes */
struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */
} lstio_group_update_args_t;
typedef struct {
int lstio_grp_key; /* IN: session key */
int lstio_grp_nmlen; /* IN: name length */
char *lstio_grp_namep; /* IN: group name */
int lstio_grp_count; /* IN: # of nodes */
/** OUT: session features */
unsigned *lstio_grp_featp;
lnet_process_id_t *lstio_grp_idsp; /* IN: nodes */
struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */
} lstio_group_nodes_args_t;
typedef struct {
int lstio_grp_key; /* IN: session key */
int lstio_grp_idx; /* IN: group idx */
int lstio_grp_nmlen; /* IN: name len */
char *lstio_grp_namep; /* OUT: name */
} lstio_group_list_args_t;
typedef struct {
int lstio_grp_key; /* IN: session key */
int lstio_grp_nmlen; /* IN: name len */
char *lstio_grp_namep; /* IN: name */
lstcon_ndlist_ent_t *lstio_grp_entp; /* OUT: description of group */
int *lstio_grp_idxp; /* IN/OUT: node index */
int *lstio_grp_ndentp; /* IN/OUT: # of nodent */
lstcon_node_ent_t *lstio_grp_dentsp; /* OUT: nodent array */
} lstio_group_info_args_t;
#define LST_DEFAULT_BATCH "batch" /* default batch name */
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: batch name */
} lstio_batch_add_args_t;
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: batch name */
} lstio_batch_del_args_t;
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_timeout; /* IN: timeout for the batch */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: batch name */
struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
} lstio_batch_run_args_t;
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_force; /* IN: abort unfinished test RPC */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: batch name */
struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
} lstio_batch_stop_args_t;
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_testidx; /* IN: test index */
int lstio_bat_client; /* IN: is test client? */
int lstio_bat_timeout; /* IN: timeout for waiting */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: batch name */
struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */
} lstio_batch_query_args_t;
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_idx; /* IN: index */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: batch name */
} lstio_batch_list_args_t;
typedef struct {
int lstio_bat_key; /* IN: session key */
int lstio_bat_nmlen; /* IN: name length */
char *lstio_bat_namep; /* IN: name */
int lstio_bat_server; /* IN: query server or not */
int lstio_bat_testidx; /* IN: test index */
lstcon_test_batch_ent_t *lstio_bat_entp; /* OUT: batch ent */
int *lstio_bat_idxp; /* IN/OUT: index of node */
int *lstio_bat_ndentp; /* IN/OUT: # of nodent */
lstcon_node_ent_t *lstio_bat_dentsp; /* array of nodent */
} lstio_batch_info_args_t;
/* add stat in session */
typedef struct {
int lstio_sta_key; /* IN: session key */
int lstio_sta_timeout; /* IN: timeout for stat requst */
int lstio_sta_nmlen; /* IN: group name length */
char *lstio_sta_namep; /* IN: group name */
int lstio_sta_count; /* IN: # of pid */
lnet_process_id_t *lstio_sta_idsp; /* IN: pid */
struct list_head *lstio_sta_resultp; /* OUT: list head of result buffer */
} lstio_stat_args_t;
typedef enum {
LST_TEST_BULK = 1,
LST_TEST_PING = 2
} lst_test_type_t;
/* create a test in a batch */
#define LST_MAX_CONCUR 1024 /* Max concurrency of test */
typedef struct {
int lstio_tes_key; /* IN: session key */
int lstio_tes_bat_nmlen; /* IN: batch name len */
char *lstio_tes_bat_name; /* IN: batch name */
int lstio_tes_type; /* IN: test type */
int lstio_tes_oneside; /* IN: one sided test */
int lstio_tes_loop; /* IN: loop count */
int lstio_tes_concur; /* IN: concurrency */
int lstio_tes_dist; /* IN: node distribution in destination groups */
int lstio_tes_span; /* IN: node span in destination groups */
int lstio_tes_sgrp_nmlen; /* IN: source group name length */
char *lstio_tes_sgrp_name; /* IN: group name */
int lstio_tes_dgrp_nmlen; /* IN: destination group name length */
char *lstio_tes_dgrp_name; /* IN: group name */
int lstio_tes_param_len; /* IN: param buffer len */
void *lstio_tes_param; /* IN: parameter for specified test:
lstio_bulk_param_t,
lstio_ping_param_t,
... more */
int *lstio_tes_retp; /* OUT: private returned value */
struct list_head *lstio_tes_resultp; /* OUT: list head of result buffer */
} lstio_test_args_t;
typedef enum {
LST_BRW_READ = 1,
LST_BRW_WRITE = 2
} lst_brw_type_t;
typedef enum {
LST_BRW_CHECK_NONE = 1,
LST_BRW_CHECK_SIMPLE = 2,
LST_BRW_CHECK_FULL = 3
} lst_brw_flags_t;
typedef struct {
int blk_opc; /* bulk operation code */
int blk_size; /* size (bytes) */
int blk_time; /* time of running the test*/
int blk_flags; /* reserved flags */
} lst_test_bulk_param_t;
typedef struct {
int png_size; /* size of ping message */
int png_time; /* time */
int png_loop; /* loop */
int png_flags; /* reserved flags */
} lst_test_ping_param_t;
/* more tests */
typedef struct {
__u32 errors;
__u32 rpcs_sent;
__u32 rpcs_rcvd;
__u32 rpcs_dropped;
__u32 rpcs_expired;
__u64 bulk_get;
__u64 bulk_put;
} WIRE_ATTR srpc_counters_t;
typedef struct {
/** milliseconds since current session started */
__u32 running_ms;
__u32 active_batches;
__u32 zombie_sessions;
__u32 brw_errors;
__u32 ping_errors;
} WIRE_ATTR sfw_counters_t;
#endif

View file

@ -0,0 +1,94 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/include/lnet/ptllnd.h
*
* Author: PJ Kirner <pjkirner@clusterfs.com>
*/
/*
* The PTLLND was designed to support Portals with
* Lustre and non-lustre UNLINK semantics.
* However for now the two targets are Cray Portals
* on the XT3 and Lustre Portals (for testing) both
* have Lustre UNLINK semantics, so this is defined
* by default.
*/
#define LUSTRE_PORTALS_UNLINK_SEMANTICS
#ifdef _USING_LUSTRE_PORTALS_
/* NIDs are 64-bits on Lustre Portals */
#define FMT_NID LPU64
#define FMT_PID "%d"
/* When using Lustre Portals Lustre completion semantics are imlicit*/
#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
#else /* _USING_CRAY_PORTALS_ */
/* NIDs are integers on Cray Portals */
#define FMT_NID "%u"
#define FMT_PID "%d"
/* When using Cray Portals this is defined in the Cray Portals Header*/
/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */
/* Can compare handles directly on Cray Portals */
#define PtlHandleIsEqual(a,b) ((a) == (b))
/* Diffrent error types on Cray Portals*/
#define ptl_err_t ptl_ni_fail_t
/*
* The Cray Portals has no maximum number of IOVs. The
* maximum is limited only by memory and size of the
* int parameters (2^31-1).
* Lustre only really require that the underyling
* implemenation to support at least LNET_MAX_IOV,
* so for Cray portals we can safely just use that
* value here.
*
*/
#define PTL_MD_MAX_IOV LNET_MAX_IOV
#endif
#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID
/* Align incoming small request messages to an 8 byte boundary if this is
* supported to avoid alignment issues on some architectures */
#ifndef PTL_MD_LOCAL_ALIGN8
# define PTL_MD_LOCAL_ALIGN8 0
#endif

View file

@ -0,0 +1,124 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/include/lnet/ptllnd_wire.h
*
* Author: PJ Kirner <pjkirner@clusterfs.com>
*/
/* Minimum buffer size that any peer will post to receive ptllnd messages */
#define PTLLND_MIN_BUFFER_SIZE 256
/************************************************************************
* Tunable defaults that {u,k}lnds/ptllnd should have in common.
*/
#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */
#define PTLLND_PID 9 /* The Portals PID */
#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */
/* Default buffer size for kernel ptllnds (guaranteed eager) */
#define PTLLND_MAX_KLND_MSG_SIZE 512
/* Default buffer size for catamount ptllnds (not guaranteed eager) - large
* enough to avoid RDMA for anything sent while control is not in liblustre */
#define PTLLND_MAX_ULND_MSG_SIZE 512
/************************************************************************
* Portals LND Wire message format.
* These are sent in sender's byte order (i.e. receiver flips).
*/
#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved
* above is for bulk data transfer */
#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */
typedef struct
{
lnet_hdr_t kptlim_hdr; /* portals header */
char kptlim_payload[0]; /* piggy-backed payload */
} WIRE_ATTR kptl_immediate_msg_t;
typedef struct
{
lnet_hdr_t kptlrm_hdr; /* portals header */
__u64 kptlrm_matchbits; /* matchbits */
} WIRE_ATTR kptl_rdma_msg_t;
typedef struct
{
__u64 kptlhm_matchbits; /* matchbits */
__u32 kptlhm_max_msg_size; /* max message size */
} WIRE_ATTR kptl_hello_msg_t;
typedef struct
{
/* First 2 fields fixed FOR ALL TIME */
__u32 ptlm_magic; /* I'm a Portals LND message */
__u16 ptlm_version; /* this is my version number */
__u8 ptlm_type; /* the message type */
__u8 ptlm_credits; /* returned credits */
__u32 ptlm_nob; /* # bytes in whole message */
__u32 ptlm_cksum; /* checksum (0 == no checksum) */
__u64 ptlm_srcnid; /* sender's NID */
__u64 ptlm_srcstamp; /* sender's incarnation */
__u64 ptlm_dstnid; /* destination's NID */
__u64 ptlm_dststamp; /* destination's incarnation */
__u32 ptlm_srcpid; /* sender's PID */
__u32 ptlm_dstpid; /* destination's PID */
union {
kptl_immediate_msg_t immediate;
kptl_rdma_msg_t rdma;
kptl_hello_msg_t hello;
} WIRE_ATTR ptlm_u;
} kptl_msg_t;
/* kptl_msg_t::ptlm_credits is only a __u8 */
#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t*) 0)->ptlm_credits)) -1)
#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC
#define PTLLND_MSG_VERSION 0x04
#define PTLLND_RDMA_OK 0x00
#define PTLLND_RDMA_FAIL 0x01
#define PTLLND_MSG_TYPE_INVALID 0x00
#define PTLLND_MSG_TYPE_PUT 0x01
#define PTLLND_MSG_TYPE_GET 0x02
#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/
#define PTLLND_MSG_TYPE_NOOP 0x04
#define PTLLND_MSG_TYPE_HELLO 0x05
#define PTLLND_MSG_TYPE_NAK 0x06

View file

@ -0,0 +1,103 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/include/lnet/socklnd.h
*
* #defines shared between socknal implementation and utilities
*/
#ifndef __LNET_LNET_SOCKLND_H__
#define __LNET_LNET_SOCKLND_H__
#include <linux/lnet/types.h>
#include <linux/lnet/lib-types.h>
#define SOCKLND_CONN_NONE (-1)
#define SOCKLND_CONN_ANY 0
#define SOCKLND_CONN_CONTROL 1
#define SOCKLND_CONN_BULK_IN 2
#define SOCKLND_CONN_BULK_OUT 3
#define SOCKLND_CONN_NTYPES 4
#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN
typedef struct {
__u32 kshm_magic; /* magic number of socklnd message */
__u32 kshm_version; /* version of socklnd message */
lnet_nid_t kshm_src_nid; /* sender's nid */
lnet_nid_t kshm_dst_nid; /* destination nid */
lnet_pid_t kshm_src_pid; /* sender's pid */
lnet_pid_t kshm_dst_pid; /* destination pid */
__u64 kshm_src_incarnation; /* sender's incarnation */
__u64 kshm_dst_incarnation; /* destination's incarnation */
__u32 kshm_ctype; /* connection type */
__u32 kshm_nips; /* # IP addrs */
__u32 kshm_ips[0]; /* IP addrs */
} WIRE_ATTR ksock_hello_msg_t;
typedef struct {
lnet_hdr_t ksnm_hdr; /* lnet hdr */
/*
* ksnm_payload is removed because of winnt compiler's limitation:
* zero-sized array can only be placed at the tail of [nested]
* structure definitions. lnet payload will be stored just after
* the body of structure ksock_lnet_msg_t
*/
} WIRE_ATTR ksock_lnet_msg_t;
typedef struct {
__u32 ksm_type; /* type of socklnd message */
__u32 ksm_csum; /* checksum if != 0 */
__u64 ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */
union {
ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */
} WIRE_ATTR ksm_u;
} WIRE_ATTR ksock_msg_t;
static inline void
socklnd_init_msg(ksock_msg_t *msg, int type)
{
msg->ksm_csum = 0;
msg->ksm_type = type;
msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0;
}
#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */
#define KSOCK_MSG_LNET 0xc1 /* lnet msg */
/* We need to know this number to parse hello msg from ksocklnd in
* other LND (usocklnd, for example) */
#define KSOCK_PROTO_V2 2
#define KSOCK_PROTO_V3 3
#endif

View file

@ -0,0 +1,503 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __LNET_TYPES_H__
#define __LNET_TYPES_H__
/** \addtogroup lnet
* @{ */
#include <linux/libcfs/libcfs.h>
/** \addtogroup lnet_addr
* @{ */
/** Portal reserved for LNet's own use.
* \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments.
*/
#define LNET_RESERVED_PORTAL 0
/**
* Address of an end-point in an LNet network.
*
* A node can have multiple end-points and hence multiple addresses.
* An LNet network can be a simple network (e.g. tcp0) or a network of
* LNet networks connected by LNet routers. Therefore an end-point address
* has two parts: network ID, and address within a network.
*
* \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID.
*/
typedef __u64 lnet_nid_t;
/**
* ID of a process in a node. Shortened as PID to distinguish from
* lnet_process_id_t, the global process ID.
*/
typedef __u32 lnet_pid_t;
/** wildcard NID that matches any end-point address */
#define LNET_NID_ANY ((lnet_nid_t) -1)
/** wildcard PID that matches any lnet_pid_t */
#define LNET_PID_ANY ((lnet_pid_t) -1)
#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
#define LNET_TIME_FOREVER (-1)
/**
* Objects maintained by the LNet are accessed through handles. Handle types
* have names of the form lnet_handle_xx_t, where xx is one of the two letter
* object type codes ('eq' for event queue, 'md' for memory descriptor, and
* 'me' for match entry).
* Each type of object is given a unique handle type to enhance type checking.
* The type lnet_handle_any_t can be used when a generic handle is needed.
* Every handle value can be converted into a value of type lnet_handle_any_t
* without loss of information.
*/
typedef struct {
__u64 cookie;
} lnet_handle_any_t;
typedef lnet_handle_any_t lnet_handle_eq_t;
typedef lnet_handle_any_t lnet_handle_md_t;
typedef lnet_handle_any_t lnet_handle_me_t;
#define LNET_WIRE_HANDLE_COOKIE_NONE (-1)
/**
* Invalidate handle \a h.
*/
static inline void LNetInvalidateHandle(lnet_handle_any_t *h)
{
h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE;
}
/**
* Compare handles \a h1 and \a h2.
*
* \return 1 if handles are equal, 0 if otherwise.
*/
static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2)
{
return (h1.cookie == h2.cookie);
}
/**
* Check whether handle \a h is invalid.
*
* \return 1 if handle is invalid, 0 if valid.
*/
static inline int LNetHandleIsInvalid(lnet_handle_any_t h)
{
return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie);
}
/**
* Global process ID.
*/
typedef struct {
/** node id */
lnet_nid_t nid;
/** process id */
lnet_pid_t pid;
} lnet_process_id_t;
/** @} lnet_addr */
/** \addtogroup lnet_me
* @{ */
/**
* Specifies whether the match entry or memory descriptor should be unlinked
* automatically (LNET_UNLINK) or not (LNET_RETAIN).
*/
typedef enum {
LNET_RETAIN = 0,
LNET_UNLINK
} lnet_unlink_t;
/**
* Values of the type lnet_ins_pos_t are used to control where a new match
* entry is inserted. The value LNET_INS_BEFORE is used to insert the new
* entry before the current entry or before the head of the list. The value
* LNET_INS_AFTER is used to insert the new entry after the current entry
* or after the last item in the list.
*/
typedef enum {
/** insert ME before current position or head of the list */
LNET_INS_BEFORE,
/** insert ME after current position or tail of the list */
LNET_INS_AFTER,
/** attach ME at tail of local CPU partition ME list */
LNET_INS_LOCAL
} lnet_ins_pos_t;
/** @} lnet_me */
/** \addtogroup lnet_md
* @{ */
/**
* Defines the visible parts of a memory descriptor. Values of this type
* are used to initialize memory descriptors.
*/
typedef struct {
/**
* Specify the memory region associated with the memory descriptor.
* If the options field has:
* - LNET_MD_KIOV bit set: The start field points to the starting
* address of an array of lnet_kiov_t and the length field specifies
* the number of entries in the array. The length can't be bigger
* than LNET_MAX_IOV. The lnet_kiov_t is used to describe page-based
* fragments that are not necessarily mapped in virtal memory.
* - LNET_MD_IOVEC bit set: The start field points to the starting
* address of an array of struct iovec and the length field specifies
* the number of entries in the array. The length can't be bigger
* than LNET_MAX_IOV. The struct iovec is used to describe fragments
* that have virtual addresses.
* - Otherwise: The memory region is contiguous. The start field
* specifies the starting address for the memory region and the
* length field specifies its length.
*
* When the memory region is fragmented, all fragments but the first
* one must start on page boundary, and all but the last must end on
* page boundary.
*/
void *start;
unsigned int length;
/**
* Specifies the maximum number of operations that can be performed
* on the memory descriptor. An operation is any action that could
* possibly generate an event. In the usual case, the threshold value
* is decremented for each operation on the MD. When the threshold
* drops to zero, the MD becomes inactive and does not respond to
* operations. A threshold value of LNET_MD_THRESH_INF indicates that
* there is no bound on the number of operations that may be applied
* to a MD.
*/
int threshold;
/**
* Specifies the largest incoming request that the memory descriptor
* should respond to. When the unused portion of a MD (length -
* local offset) falls below this value, the MD becomes inactive and
* does not respond to further operations. This value is only used
* if the LNET_MD_MAX_SIZE option is set.
*/
int max_size;
/**
* Specifies the behavior of the memory descriptor. A bitwise OR
* of the following values can be used:
* - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD.
* - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD.
* - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory
* region is provided by the incoming request. By default, the
* offset is maintained locally. When maintained locally, the
* offset is incremented by the length of the request so that
* the next operation (PUT or GET) will access the next part of
* the memory region. Note that only one offset variable exists
* per memory descriptor. If both PUT and GET operations are
* performed on a memory descriptor, the offset is updated each time.
* - LNET_MD_TRUNCATE: The length provided in the incoming request can
* be reduced to match the memory available in the region (determined
* by subtracting the offset from the length of the memory region).
* By default, if the length in the incoming operation is greater
* than the amount of memory available, the operation is rejected.
* - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for
* incoming PUT operations, even if requested. By default,
* acknowledgments are sent for PUT operations that request an
* acknowledgment. Acknowledgments are never sent for GET operations.
* The data sent in the REPLY serves as an implicit acknowledgment.
* - LNET_MD_KIOV: The start and length fields specify an array of
* lnet_kiov_t.
* - LNET_MD_IOVEC: The start and length fields specify an array of
* struct iovec.
* - LNET_MD_MAX_SIZE: The max_size field is valid.
*
* Note:
* - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather
* capability for memory descriptors. They can't be both set.
* - When LNET_MD_MAX_SIZE is set, the total length of the memory
* region (i.e. sum of all fragment lengths) must not be less than
* \a max_size.
*/
unsigned int options;
/**
* A user-specified value that is associated with the memory
* descriptor. The value does not need to be a pointer, but must fit
* in the space used by a pointer. This value is recorded in events
* associated with operations on this MD.
*/
void *user_ptr;
/**
* A handle for the event queue used to log the operations performed on
* the memory region. If this argument is a NULL handle (i.e. nullified
* by LNetInvalidateHandle()), operations performed on this memory
* descriptor are not logged.
*/
lnet_handle_eq_t eq_handle;
} lnet_md_t;
/* Max Transfer Unit (minimum supported everywhere).
* CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
* these limits are system wide and not interface-local. */
#define LNET_MTU_BITS 20
#define LNET_MTU (1 << LNET_MTU_BITS)
/** limit on the number of fragments in discontiguous MDs */
#define LNET_MAX_IOV 256
/* Max payload size */
# define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD
# if (LNET_MAX_PAYLOAD < LNET_MTU)
# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
# else
# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
/* PAGE_SIZE is a constant: check with cpp! */
# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
# endif
# endif
/**
* Options for the MD structure. See lnet_md_t::options.
*/
#define LNET_MD_OP_PUT (1 << 0)
/** See lnet_md_t::options. */
#define LNET_MD_OP_GET (1 << 1)
/** See lnet_md_t::options. */
#define LNET_MD_MANAGE_REMOTE (1 << 2)
/* unused (1 << 3) */
/** See lnet_md_t::options. */
#define LNET_MD_TRUNCATE (1 << 4)
/** See lnet_md_t::options. */
#define LNET_MD_ACK_DISABLE (1 << 5)
/** See lnet_md_t::options. */
#define LNET_MD_IOVEC (1 << 6)
/** See lnet_md_t::options. */
#define LNET_MD_MAX_SIZE (1 << 7)
/** See lnet_md_t::options. */
#define LNET_MD_KIOV (1 << 8)
/* For compatibility with Cray Portals */
#define LNET_MD_PHYS 0
/** Infinite threshold on MD operations. See lnet_md_t::threshold */
#define LNET_MD_THRESH_INF (-1)
/* NB lustre portals uses struct iovec internally! */
typedef struct iovec lnet_md_iovec_t;
/**
* A page-based fragment of a MD.
*/
typedef struct {
/** Pointer to the page where the fragment resides */
struct page *kiov_page;
/** Length in bytes of the fragment */
unsigned int kiov_len;
/**
* Starting offset of the fragment within the page. Note that the
* end of the fragment must not pass the end of the page; i.e.,
* kiov_len + kiov_offset <= PAGE_CACHE_SIZE.
*/
unsigned int kiov_offset;
} lnet_kiov_t;
/** @} lnet_md */
/** \addtogroup lnet_eq
* @{ */
/**
* Six types of events can be logged in an event queue.
*/
typedef enum {
/** An incoming GET operation has completed on the MD. */
LNET_EVENT_GET = 1,
/**
* An incoming PUT operation has completed on the MD. The
* underlying layers will not alter the memory (on behalf of this
* operation) once this event has been logged.
*/
LNET_EVENT_PUT,
/**
* A REPLY operation has completed. This event is logged after the
* data (if any) from the REPLY has been written into the MD.
*/
LNET_EVENT_REPLY,
/** An acknowledgment has been received. */
LNET_EVENT_ACK,
/**
* An outgoing send (PUT or GET) operation has completed. This event
* is logged after the entire buffer has been sent and it is safe for
* the caller to reuse the buffer.
*
* Note:
* - The LNET_EVENT_SEND doesn't guarantee message delivery. It can
* happen even when the message has not yet been put out on wire.
* - It's unsafe to assume that in an outgoing GET operation
* the LNET_EVENT_SEND event would happen before the
* LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and
* LNET_EVENT_ACK events in an outgoing PUT operation.
*/
LNET_EVENT_SEND,
/**
* A MD has been unlinked. Note that LNetMDUnlink() does not
* necessarily trigger an LNET_EVENT_UNLINK event.
* \see LNetMDUnlink
*/
LNET_EVENT_UNLINK,
} lnet_event_kind_t;
#define LNET_SEQ_BASETYPE long
typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t;
#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0)
/* XXX
* cygwin need the pragma line, not clear if it's needed in other places.
* checking!!!
*/
#ifdef __CYGWIN__
#pragma pack(push, 4)
#endif
/**
* Information about an event on a MD.
*/
typedef struct {
/** The identifier (nid, pid) of the target. */
lnet_process_id_t target;
/** The identifier (nid, pid) of the initiator. */
lnet_process_id_t initiator;
/**
* The NID of the immediate sender. If the request has been forwarded
* by routers, this is the NID of the last hop; otherwise it's the
* same as the initiator.
*/
lnet_nid_t sender;
/** Indicates the type of the event. */
lnet_event_kind_t type;
/** The portal table index specified in the request */
unsigned int pt_index;
/** A copy of the match bits specified in the request. */
__u64 match_bits;
/** The length (in bytes) specified in the request. */
unsigned int rlength;
/**
* The length (in bytes) of the data that was manipulated by the
* operation. For truncated operations, the manipulated length will be
* the number of bytes specified by the MD (possibly with an offset,
* see lnet_md_t). For all other operations, the manipulated length
* will be the length of the requested operation, i.e. rlength.
*/
unsigned int mlength;
/**
* The handle to the MD associated with the event. The handle may be
* invalid if the MD has been unlinked.
*/
lnet_handle_md_t md_handle;
/**
* A snapshot of the state of the MD immediately after the event has
* been processed. In particular, the threshold field in md will
* reflect the value of the threshold after the operation occurred.
*/
lnet_md_t md;
/**
* 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT.
* \see LNetPut
*/
__u64 hdr_data;
/**
* Indicates the completion status of the operation. It's 0 for
* successful operations, otherwise it's an error code.
*/
int status;
/**
* Indicates whether the MD has been unlinked. Note that:
* - An event with unlinked set is the last event on the MD.
* - This field is also set for an explicit LNET_EVENT_UNLINK event.
* \see LNetMDUnlink
*/
int unlinked;
/**
* The displacement (in bytes) into the memory region that the
* operation used. The offset can be determined by the operation for
* a remote managed MD or by the local MD.
* \see lnet_md_t::options
*/
unsigned int offset;
/**
* The sequence number for this event. Sequence numbers are unique
* to each event.
*/
volatile lnet_seq_t sequence;
} lnet_event_t;
#ifdef __CYGWIN__
#pragma pop
#endif
/**
* Event queue handler function type.
*
* The EQ handler runs for each event that is deposited into the EQ. The
* handler is supplied with a pointer to the event that triggered the
* handler invocation.
*
* The handler must not block, must be reentrant, and must not call any LNet
* API functions. It should return as quickly as possible.
*/
typedef void (*lnet_eq_handler_t)(lnet_event_t *event);
#define LNET_EQ_HANDLER_NONE NULL
/** @} lnet_eq */
/** \addtogroup lnet_data
* @{ */
/**
* Specify whether an acknowledgment should be sent by target when the PUT
* operation completes (i.e., when the data has been written to a MD of the
* target process).
*
* \see lnet_md_t::options for the discussion on LNET_MD_ACK_DISABLE by which
* acknowledgments can be disabled for a MD.
*/
typedef enum {
/** Request an acknowledgment */
LNET_ACK_REQ,
/** Request that no acknowledgment should be generated. */
LNET_NOACK_REQ
} lnet_ack_req_t;
/** @} lnet_data */
/** @} lnet */
#endif

View file

@ -0,0 +1,40 @@
config LNET
tristate "Lustre networking subsystem"
depends on LUSTRE_FS
config LNET_MAX_PAYLOAD
int "Lustre lnet max transfer payload (default 2MB)"
depends on LUSTRE_FS
default "1048576"
help
This option defines the maximum size of payload in bytes that lnet
can put into its transport.
If unsure, use default.
config LNET_SELFTEST
tristate "Lustre networking self testing"
depends on LNET
help
Choose Y here if you want to do lnet self testing. To compile this
as a module, choose M here: the module will be called lnet_selftest.
To compile this as a kernel modules, choose M here and it will be
called lnet_selftest.
If unsure, say N.
See also http://wiki.lustre.org/
config LNET_XPRT_IB
tristate "LNET infiniband support"
depends on LNET && INFINIBAND && INFINIBAND_ADDR_TRANS
default LNET && INFINIBAND
help
This option allows the LNET users to use infiniband as an
RDMA-enabled transport.
To compile this as a kernel module, choose M here and it will be
called ko2iblnd.
If unsure, say N.

View file

@ -0,0 +1 @@
obj-$(CONFIG_LNET) := klnds/ lnet/ selftest/

View file

@ -0,0 +1 @@
obj-$(CONFIG_LNET) += o2iblnd/ socklnd/

View file

@ -0,0 +1,5 @@
obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o
ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o
ccflags-y := -I$(src)/../../include

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,493 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/klnds/o2iblnd/o2iblnd_modparams.c
*
* Author: Eric Barton <eric@bartonsoftware.com>
*/
#include "o2iblnd.h"
static int service = 987;
CFS_MODULE_PARM(service, "i", int, 0444,
"service number (within RDMA_PS_TCP)");
static int cksum = 0;
CFS_MODULE_PARM(cksum, "i", int, 0644,
"set non-zero to enable message (not RDMA) checksums");
static int timeout = 50;
CFS_MODULE_PARM(timeout, "i", int, 0644,
"timeout (seconds)");
/* Number of threads in each scheduler pool which is percpt,
* we will estimate reasonable value based on CPUs if it's set to zero. */
static int nscheds;
CFS_MODULE_PARM(nscheds, "i", int, 0444,
"number of threads in each scheduler pool");
/* NB: this value is shared by all CPTs, it can grow at runtime */
static int ntx = 512;
CFS_MODULE_PARM(ntx, "i", int, 0444,
"# of message descriptors allocated for each pool");
/* NB: this value is shared by all CPTs */
static int credits = 256;
CFS_MODULE_PARM(credits, "i", int, 0444,
"# concurrent sends");
static int peer_credits = 8;
CFS_MODULE_PARM(peer_credits, "i", int, 0444,
"# concurrent sends to 1 peer");
static int peer_credits_hiw = 0;
CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
"when eagerly to return credits");
static int peer_buffer_credits = 0;
CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
"# per-peer router buffer credits");
static int peer_timeout = 180;
CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
"Seconds without aliveness news to declare peer dead (<=0 to disable)");
static char *ipif_name = "ib0";
CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
"IPoIB interface name");
static int retry_count = 5;
CFS_MODULE_PARM(retry_count, "i", int, 0644,
"Retransmissions when no ACK received");
static int rnr_retry_count = 6;
CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
"RNR retransmissions");
static int keepalive = 100;
CFS_MODULE_PARM(keepalive, "i", int, 0644,
"Idle time in seconds before sending a keepalive");
static int ib_mtu = 0;
CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
"IB MTU 256/512/1024/2048/4096");
static int concurrent_sends = 0;
CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
"send work-queue sizing");
static int map_on_demand = 0;
CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
"map on demand");
/* NB: this value is shared by all CPTs, it can grow at runtime */
static int fmr_pool_size = 512;
CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
"size of fmr pool on each CPT (>= ntx / 4)");
/* NB: this value is shared by all CPTs, it can grow at runtime */
static int fmr_flush_trigger = 384;
CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
"# dirty FMRs that triggers pool flush");
static int fmr_cache = 1;
CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
"non-zero to enable FMR caching");
/* NB: this value is shared by all CPTs, it can grow at runtime */
static int pmr_pool_size = 512;
CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
"size of MR cache pmr pool on each CPT");
/*
* 0: disable failover
* 1: enable failover if necessary
* 2: force to failover (for debug)
*/
static int dev_failover = 0;
CFS_MODULE_PARM(dev_failover, "i", int, 0444,
"HCA failover for bonding (0 off, 1 on, other values reserved)");
static int require_privileged_port = 0;
CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
"require privileged port when accepting connection");
static int use_privileged_port = 1;
CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
"use privileged port when initiating connection");
kib_tunables_t kiblnd_tunables = {
.kib_dev_failover = &dev_failover,
.kib_service = &service,
.kib_cksum = &cksum,
.kib_timeout = &timeout,
.kib_keepalive = &keepalive,
.kib_ntx = &ntx,
.kib_credits = &credits,
.kib_peertxcredits = &peer_credits,
.kib_peercredits_hiw = &peer_credits_hiw,
.kib_peerrtrcredits = &peer_buffer_credits,
.kib_peertimeout = &peer_timeout,
.kib_default_ipif = &ipif_name,
.kib_retry_count = &retry_count,
.kib_rnr_retry_count = &rnr_retry_count,
.kib_concurrent_sends = &concurrent_sends,
.kib_ib_mtu = &ib_mtu,
.kib_map_on_demand = &map_on_demand,
.kib_fmr_pool_size = &fmr_pool_size,
.kib_fmr_flush_trigger = &fmr_flush_trigger,
.kib_fmr_cache = &fmr_cache,
.kib_pmr_pool_size = &pmr_pool_size,
.kib_require_priv_port = &require_privileged_port,
.kib_use_priv_port = &use_privileged_port,
.kib_nscheds = &nscheds
};
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
static char ipif_basename_space[32];
enum {
O2IBLND_SERVICE = 1,
O2IBLND_CKSUM,
O2IBLND_TIMEOUT,
O2IBLND_NTX,
O2IBLND_CREDITS,
O2IBLND_PEER_TXCREDITS,
O2IBLND_PEER_CREDITS_HIW,
O2IBLND_PEER_RTRCREDITS,
O2IBLND_PEER_TIMEOUT,
O2IBLND_IPIF_BASENAME,
O2IBLND_RETRY_COUNT,
O2IBLND_RNR_RETRY_COUNT,
O2IBLND_KEEPALIVE,
O2IBLND_CONCURRENT_SENDS,
O2IBLND_IB_MTU,
O2IBLND_MAP_ON_DEMAND,
O2IBLND_FMR_POOL_SIZE,
O2IBLND_FMR_FLUSH_TRIGGER,
O2IBLND_FMR_CACHE,
O2IBLND_PMR_POOL_SIZE,
O2IBLND_DEV_FAILOVER
};
static ctl_table_t kiblnd_ctl_table[] = {
{
.ctl_name = O2IBLND_SERVICE,
.procname = "service",
.data = &service,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_CKSUM,
.procname = "cksum",
.data = &cksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_TIMEOUT,
.procname = "timeout",
.data = &timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_NTX,
.procname = "ntx",
.data = &ntx,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_CREDITS,
.procname = "credits",
.data = &credits,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_PEER_TXCREDITS,
.procname = "peer_credits",
.data = &peer_credits,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_PEER_CREDITS_HIW,
.procname = "peer_credits_hiw",
.data = &peer_credits_hiw,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_PEER_RTRCREDITS,
.procname = "peer_buffer_credits",
.data = &peer_buffer_credits,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_PEER_TIMEOUT,
.procname = "peer_timeout",
.data = &peer_timeout,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_IPIF_BASENAME,
.procname = "ipif_name",
.data = ipif_basename_space,
.maxlen = sizeof(ipif_basename_space),
.mode = 0444,
.proc_handler = &proc_dostring
},
{
.ctl_name = O2IBLND_RETRY_COUNT,
.procname = "retry_count",
.data = &retry_count,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_RNR_RETRY_COUNT,
.procname = "rnr_retry_count",
.data = &rnr_retry_count,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_KEEPALIVE,
.procname = "keepalive",
.data = &keepalive,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_CONCURRENT_SENDS,
.procname = "concurrent_sends",
.data = &concurrent_sends,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_IB_MTU,
.procname = "ib_mtu",
.data = &ib_mtu,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_MAP_ON_DEMAND,
.procname = "map_on_demand",
.data = &map_on_demand,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_FMR_POOL_SIZE,
.procname = "fmr_pool_size",
.data = &fmr_pool_size,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
.procname = "fmr_flush_trigger",
.data = &fmr_flush_trigger,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_FMR_CACHE,
.procname = "fmr_cache",
.data = &fmr_cache,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_PMR_POOL_SIZE,
.procname = "pmr_pool_size",
.data = &pmr_pool_size,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{
.ctl_name = O2IBLND_DEV_FAILOVER,
.procname = "dev_failover",
.data = &dev_failover,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec
},
{0}
};
static ctl_table_t kiblnd_top_ctl_table[] = {
{
.ctl_name = CTL_O2IBLND,
.procname = "o2iblnd",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = kiblnd_ctl_table
},
{0}
};
void
kiblnd_initstrtunable(char *space, char *str, int size)
{
strncpy(space, str, size);
space[size-1] = 0;
}
void
kiblnd_sysctl_init (void)
{
kiblnd_initstrtunable(ipif_basename_space, ipif_name,
sizeof(ipif_basename_space));
kiblnd_tunables.kib_sysctl =
cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
if (kiblnd_tunables.kib_sysctl == NULL)
CWARN("Can't setup /proc tunables\n");
}
void
kiblnd_sysctl_fini (void)
{
if (kiblnd_tunables.kib_sysctl != NULL)
unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
}
#else
void
kiblnd_sysctl_init (void)
{
}
void
kiblnd_sysctl_fini (void)
{
}
#endif
int
kiblnd_tunables_init (void)
{
if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
*kiblnd_tunables.kib_ib_mtu);
return -EINVAL;
}
if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
*kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
*kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
*kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
*kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
*kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
if (*kiblnd_tunables.kib_map_on_demand < 0 ||
*kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
*kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
if (*kiblnd_tunables.kib_map_on_demand == 1)
*kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
if (*kiblnd_tunables.kib_concurrent_sends == 0) {
if (*kiblnd_tunables.kib_map_on_demand > 0 &&
*kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
*kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
else
*kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
}
if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
*kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
*kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
CWARN("Concurrent sends %d is lower than message queue size: %d, "
"performance may drop slightly.\n",
*kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
}
kiblnd_sysctl_init();
return 0;
}
void
kiblnd_tunables_fini (void)
{
kiblnd_sysctl_fini();
}

View file

@ -0,0 +1,7 @@
obj-$(CONFIG_LNET) += ksocklnd.o
ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib-linux.o
ccflags-y := -I$(src)/../../include

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,602 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*
* Author: Zach Brown <zab@zabbo.net>
* Author: Peter J. Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
* Author: Eric Barton <eric@bartonsoftware.com>
*
* This file is part of Lustre, http://www.lustre.org
*
* Portals is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Portals is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Portals; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#define DEBUG_PORTAL_ALLOC
#define DEBUG_SUBSYSTEM S_LND
#include "socklnd_lib-linux.h"
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lnet.h>
#include <linux/lnet/lib-lnet.h>
#include <linux/lnet/socklnd.h>
#include <linux/lnet/lnet-sysctl.h>
#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */
#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
* no risk if we're not running on a CONFIG_HIGHMEM platform. */
#ifdef CONFIG_HIGHMEM
# define SOCKNAL_RISK_KMAP_DEADLOCK 0
#else
# define SOCKNAL_RISK_KMAP_DEADLOCK 1
#endif
struct ksock_sched_info;
typedef struct /* per scheduler state */
{
spinlock_t kss_lock; /* serialise */
struct list_head kss_rx_conns; /* conn waiting to be read */
/* conn waiting to be written */
struct list_head kss_tx_conns;
/* zombie noop tx list */
struct list_head kss_zombie_noop_txs;
wait_queue_head_t kss_waitq; /* where scheduler sleeps */
/* # connections assigned to this scheduler */
int kss_nconns;
struct ksock_sched_info *kss_info; /* owner of it */
struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
struct iovec kss_scratch_iov[LNET_MAX_IOV];
} ksock_sched_t;
struct ksock_sched_info {
int ksi_nthreads_max; /* max allowed threads */
int ksi_nthreads; /* number of threads */
int ksi_cpt; /* CPT id */
ksock_sched_t *ksi_scheds; /* array of schedulers */
};
#define KSOCK_CPT_SHIFT 16
#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid))
#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT)
#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1))
typedef struct /* in-use interface */
{
__u32 ksni_ipaddr; /* interface's IP address */
__u32 ksni_netmask; /* interface's network mask */
int ksni_nroutes; /* # routes using (active) */
int ksni_npeers; /* # peers using (passive) */
char ksni_name[IFNAMSIZ]; /* interface name */
} ksock_interface_t;
typedef struct
{
/* "stuck" socket timeout (seconds) */
int *ksnd_timeout;
/* # scheduler threads in each pool while starting */
int *ksnd_nscheds;
int *ksnd_nconnds; /* # connection daemons */
int *ksnd_nconnds_max; /* max # connection daemons */
int *ksnd_min_reconnectms; /* first connection retry after (ms)... */
int *ksnd_max_reconnectms; /* ...exponentially increasing to this */
int *ksnd_eager_ack; /* make TCP ack eagerly? */
int *ksnd_typed_conns; /* drive sockets by type? */
int *ksnd_min_bulk; /* smallest "large" message */
int *ksnd_tx_buffer_size; /* socket tx buffer size */
int *ksnd_rx_buffer_size; /* socket rx buffer size */
int *ksnd_nagle; /* enable NAGLE? */
int *ksnd_round_robin; /* round robin for multiple interfaces */
int *ksnd_keepalive; /* # secs for sending keepalive NOOP */
int *ksnd_keepalive_idle; /* # idle secs before 1st probe */
int *ksnd_keepalive_count; /* # probes */
int *ksnd_keepalive_intvl; /* time between probes */
int *ksnd_credits; /* # concurrent sends */
int *ksnd_peertxcredits; /* # concurrent sends to 1 peer */
int *ksnd_peerrtrcredits; /* # per-peer router buffer credits */
int *ksnd_peertimeout; /* seconds to consider peer dead */
int *ksnd_enable_csum; /* enable check sum */
int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
int *ksnd_nonblk_zcack; /* always send zc-ack on non-blocking connection */
unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload size */
int *ksnd_zc_recv; /* enable ZC receive (for Chelsio TOE) */
int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
ctl_table_header_t *ksnd_sysctl; /* sysctl interface */
#endif
} ksock_tunables_t;
typedef struct
{
__u64 ksnn_incarnation; /* my epoch */
spinlock_t ksnn_lock; /* serialise */
struct list_head ksnn_list; /* chain on global list */
int ksnn_npeers; /* # peers */
int ksnn_shutdown; /* shutting down? */
int ksnn_ninterfaces; /* IP interfaces */
ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
} ksock_net_t;
/** connd timeout */
#define SOCKNAL_CONND_TIMEOUT 120
/** reserved thread for accepting & creating new connd */
#define SOCKNAL_CONND_RESV 1
typedef struct
{
int ksnd_init; /* initialisation state */
int ksnd_nnets; /* # networks set up */
struct list_head ksnd_nets; /* list of nets */
/* stabilize peer/conn ops */
rwlock_t ksnd_global_lock;
/* hash table of all my known peers */
struct list_head *ksnd_peers;
int ksnd_peer_hash_size; /* size of ksnd_peers */
int ksnd_nthreads; /* # live threads */
int ksnd_shuttingdown; /* tell threads to exit */
/* schedulers information */
struct ksock_sched_info **ksnd_sched_info;
atomic_t ksnd_nactive_txs; /* #active txs */
struct list_head ksnd_deathrow_conns; /* conns to close: reaper_lock*/
struct list_head ksnd_zombie_conns; /* conns to free: reaper_lock */
struct list_head ksnd_enomem_conns; /* conns to retry: reaper_lock*/
wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
cfs_time_t ksnd_reaper_waketime;/* when reaper will wake */
spinlock_t ksnd_reaper_lock; /* serialise */
int ksnd_enomem_tx; /* test ENOMEM sender */
int ksnd_stall_tx; /* test sluggish sender */
int ksnd_stall_rx; /* test sluggish receiver */
struct list_head ksnd_connd_connreqs; /* incoming connection requests */
struct list_head ksnd_connd_routes; /* routes waiting to be connected */
wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */
int ksnd_connd_connecting;/* # connds connecting */
/** time stamp of the last failed connecting attempt */
long ksnd_connd_failed_stamp;
/** # starting connd */
unsigned ksnd_connd_starting;
/** time stamp of the last starting connd */
long ksnd_connd_starting_stamp;
/** # running connd */
unsigned ksnd_connd_running;
spinlock_t ksnd_connd_lock; /* serialise */
struct list_head ksnd_idle_noop_txs; /* list head for freed noop tx */
spinlock_t ksnd_tx_lock; /* serialise, g_lock unsafe */
} ksock_nal_data_t;
#define SOCKNAL_INIT_NOTHING 0
#define SOCKNAL_INIT_DATA 1
#define SOCKNAL_INIT_ALL 2
/* A packet just assembled for transmission is represented by 1 or more
* struct iovec fragments (the first frag contains the portals header),
* followed by 0 or more lnet_kiov_t fragments.
*
* On the receive side, initially 1 struct iovec fragment is posted for
* receive (the header). Once the header has been received, the payload is
* received into either struct iovec or lnet_kiov_t fragments, depending on
* what the header matched or whether the message needs forwarding. */
struct ksock_conn; /* forward ref */
struct ksock_peer; /* forward ref */
struct ksock_route; /* forward ref */
struct ksock_proto; /* forward ref */
typedef struct /* transmit packet */
{
struct list_head tx_list; /* queue on conn for transmission etc */
struct list_head tx_zc_list; /* queue on peer for ZC request */
atomic_t tx_refcount; /* tx reference count */
int tx_nob; /* # packet bytes */
int tx_resid; /* residual bytes */
int tx_niov; /* # packet iovec frags */
struct iovec *tx_iov; /* packet iovec frags */
int tx_nkiov; /* # packet page frags */
unsigned short tx_zc_aborted; /* aborted ZC request */
unsigned short tx_zc_capable:1; /* payload is large enough for ZC */
unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */
unsigned short tx_nonblk:1; /* it's a non-blocking ACK */
lnet_kiov_t *tx_kiov; /* packet page frags */
struct ksock_conn *tx_conn; /* owning conn */
lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */
cfs_time_t tx_deadline; /* when (in jiffies) tx times out */
ksock_msg_t tx_msg; /* socklnd message buffer */
int tx_desc_size; /* size of this descriptor */
union {
struct {
struct iovec iov; /* virt hdr */
lnet_kiov_t kiov[0]; /* paged payload */
} paged;
struct {
struct iovec iov[1]; /* virt hdr + payload */
} virt;
} tx_frags;
} ksock_tx_t;
#define KSOCK_NOOP_TX_SIZE ((int)offsetof(ksock_tx_t, tx_frags.paged.kiov[0]))
/* network zero copy callback descriptor embedded in ksock_tx_t */
/* space for the rx frag descriptors; we either read a single contiguous
* header, or up to LNET_MAX_IOV frags of payload of either type. */
typedef union {
struct iovec iov[LNET_MAX_IOV];
lnet_kiov_t kiov[LNET_MAX_IOV];
} ksock_rxiovspace_t;
#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */
#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */
#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */
#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */
#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
#define SOCKNAL_RX_SLOP 6 /* skipping body */
typedef struct ksock_conn
{
struct ksock_peer *ksnc_peer; /* owning peer */
struct ksock_route *ksnc_route; /* owning route */
struct list_head ksnc_list; /* stash on peer's conn list */
socket_t *ksnc_sock; /* actual socket */
void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
void *ksnc_saved_write_space; /* socket's original write_space() callback */
atomic_t ksnc_conn_refcount; /* conn refcount */
atomic_t ksnc_sock_refcount; /* sock refcount */
ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
__u32 ksnc_myipaddr; /* my IP */
__u32 ksnc_ipaddr; /* peer's IP */
int ksnc_port; /* peer's port */
signed int ksnc_type:3; /* type of connection,
* should be signed value */
unsigned int ksnc_closing:1; /* being shut down */
unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
unsigned int ksnc_zc_capable:1; /* enable to ZC */
struct ksock_proto *ksnc_proto; /* protocol for the connection */
/* reader */
struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */
__u8 ksnc_rx_started; /* started receiving a message */
__u8 ksnc_rx_ready; /* data ready to read */
__u8 ksnc_rx_scheduled;/* being progressed */
__u8 ksnc_rx_state; /* what is being read */
int ksnc_rx_nob_left; /* # bytes to next hdr/body */
int ksnc_rx_nob_wanted; /* bytes actually wanted */
int ksnc_rx_niov; /* # iovec frags */
struct iovec *ksnc_rx_iov; /* the iovec frags */
int ksnc_rx_nkiov; /* # page frags */
lnet_kiov_t *ksnc_rx_kiov; /* the page frags */
ksock_rxiovspace_t ksnc_rx_iov_space;/* space for frag descriptors */
__u32 ksnc_rx_csum; /* partial checksum for incoming data */
void *ksnc_cookie; /* rx lnet_finalize passthru arg */
ksock_msg_t ksnc_msg; /* incoming message buffer:
* V2.x message takes the
* whole struct
* V1.x message is a bare
* lnet_hdr_t, it's stored in
* ksnc_msg.ksm_u.lnetmsg */
/* WRITER */
struct list_head ksnc_tx_list; /* where I enq waiting for output space */
struct list_head ksnc_tx_queue; /* packets waiting to be sent */
ksock_tx_t *ksnc_tx_carrier; /* next TX that can carry a LNet message or ZC-ACK */
cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */
int ksnc_tx_bufnob; /* send buffer marker */
atomic_t ksnc_tx_nob; /* # bytes queued */
int ksnc_tx_ready; /* write space */
int ksnc_tx_scheduled; /* being progressed */
cfs_time_t ksnc_tx_last_post; /* time stamp of the last posted TX */
} ksock_conn_t;
typedef struct ksock_route
{
struct list_head ksnr_list; /* chain on peer route list */
struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
struct ksock_peer *ksnr_peer; /* owning peer */
atomic_t ksnr_refcount; /* # users */
cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */
cfs_duration_t ksnr_retry_interval; /* how long between retries */
__u32 ksnr_myipaddr; /* my IP */
__u32 ksnr_ipaddr; /* IP address to connect to */
int ksnr_port; /* port to connect to */
unsigned int ksnr_scheduled:1; /* scheduled for attention */
unsigned int ksnr_connecting:1;/* connection establishment in progress */
unsigned int ksnr_connected:4; /* connections established by type */
unsigned int ksnr_deleted:1; /* been removed from peer? */
unsigned int ksnr_share_count; /* created explicitly? */
int ksnr_conn_count; /* # conns established by this route */
} ksock_route_t;
#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
typedef struct ksock_peer
{
struct list_head ksnp_list; /* stash on global peer list */
cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
lnet_process_id_t ksnp_id; /* who's on the other end(s) */
atomic_t ksnp_refcount; /* # users */
int ksnp_sharecount; /* lconf usage counter */
int ksnp_closing; /* being closed */
int ksnp_accepting;/* # passive connections pending */
int ksnp_error; /* errno on closing last conn */
__u64 ksnp_zc_next_cookie;/* ZC completion cookie */
__u64 ksnp_incarnation; /* latest known peer incarnation */
struct ksock_proto *ksnp_proto; /* latest known peer protocol */
struct list_head ksnp_conns; /* all active connections */
struct list_head ksnp_routes; /* routes */
struct list_head ksnp_tx_queue; /* waiting packets */
spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
struct list_head ksnp_zc_req_list; /* zero copy requests wait for ACK */
cfs_time_t ksnp_send_keepalive; /* time to send keepalive */
lnet_ni_t *ksnp_ni; /* which network */
int ksnp_n_passive_ips; /* # of... */
__u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */
} ksock_peer_t;
typedef struct ksock_connreq
{
struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */
lnet_ni_t *ksncr_ni; /* chosen NI */
socket_t *ksncr_sock; /* accepted socket */
} ksock_connreq_t;
extern ksock_nal_data_t ksocknal_data;
extern ksock_tunables_t ksocknal_tunables;
#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */
#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not preferred */
typedef struct ksock_proto
{
int pro_version; /* version number of protocol */
int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */
int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */
void (*pro_pack)(ksock_tx_t *); /* message pack */
void (*pro_unpack)(ksock_msg_t *); /* message unpack */
ksock_tx_t *(*pro_queue_tx_msg)(ksock_conn_t *, ksock_tx_t *); /* queue tx on the connection */
int (*pro_queue_tx_zcack)(ksock_conn_t *, ksock_tx_t *, __u64); /* queue ZC ack on the connection */
int (*pro_handle_zcreq)(ksock_conn_t *, __u64, int); /* handle ZC request */
int (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64); /* handle ZC ACK */
int (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int); /* msg type matches the connection type:
* return value:
* return MATCH_NO : no
* return MATCH_YES : matching type
* return MATCH_MAY : can be backup */
} ksock_proto_t;
extern ksock_proto_t ksocknal_protocol_v1x;
extern ksock_proto_t ksocknal_protocol_v2x;
extern ksock_proto_t ksocknal_protocol_v3x;
#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
#ifndef CPU_MASK_NONE
#define CPU_MASK_NONE 0UL
#endif
static inline int
ksocknal_route_mask(void)
{
if (!*ksocknal_tunables.ksnd_typed_conns)
return (1 << SOCKLND_CONN_ANY);
return ((1 << SOCKLND_CONN_CONTROL) |
(1 << SOCKLND_CONN_BULK_IN) |
(1 << SOCKLND_CONN_BULK_OUT));
}
static inline struct list_head *
ksocknal_nid2peerlist (lnet_nid_t nid)
{
unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
return (&ksocknal_data.ksnd_peers [hash]);
}
static inline void
ksocknal_conn_addref (ksock_conn_t *conn)
{
LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
atomic_inc(&conn->ksnc_conn_refcount);
}
extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
extern void ksocknal_finalize_zcreq(ksock_conn_t *conn);
static inline void
ksocknal_conn_decref (ksock_conn_t *conn)
{
LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
ksocknal_queue_zombie_conn(conn);
}
static inline int
ksocknal_connsock_addref (ksock_conn_t *conn)
{
int rc = -ESHUTDOWN;
read_lock(&ksocknal_data.ksnd_global_lock);
if (!conn->ksnc_closing) {
LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0);
atomic_inc(&conn->ksnc_sock_refcount);
rc = 0;
}
read_unlock(&ksocknal_data.ksnd_global_lock);
return (rc);
}
static inline void
ksocknal_connsock_decref (ksock_conn_t *conn)
{
LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
LASSERT (conn->ksnc_closing);
libcfs_sock_release(conn->ksnc_sock);
conn->ksnc_sock = NULL;
ksocknal_finalize_zcreq(conn);
}
}
static inline void
ksocknal_tx_addref (ksock_tx_t *tx)
{
LASSERT (atomic_read(&tx->tx_refcount) > 0);
atomic_inc(&tx->tx_refcount);
}
extern void ksocknal_tx_prep (ksock_conn_t *, ksock_tx_t *tx);
extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx);
static inline void
ksocknal_tx_decref (ksock_tx_t *tx)
{
LASSERT (atomic_read(&tx->tx_refcount) > 0);
if (atomic_dec_and_test(&tx->tx_refcount))
ksocknal_tx_done(NULL, tx);
}
static inline void
ksocknal_route_addref (ksock_route_t *route)
{
LASSERT (atomic_read(&route->ksnr_refcount) > 0);
atomic_inc(&route->ksnr_refcount);
}
extern void ksocknal_destroy_route (ksock_route_t *route);
static inline void
ksocknal_route_decref (ksock_route_t *route)
{
LASSERT (atomic_read (&route->ksnr_refcount) > 0);
if (atomic_dec_and_test(&route->ksnr_refcount))
ksocknal_destroy_route (route);
}
static inline void
ksocknal_peer_addref (ksock_peer_t *peer)
{
LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
atomic_inc(&peer->ksnp_refcount);
}
extern void ksocknal_destroy_peer (ksock_peer_t *peer);
static inline void
ksocknal_peer_decref (ksock_peer_t *peer)
{
LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
if (atomic_dec_and_test(&peer->ksnp_refcount))
ksocknal_destroy_peer (peer);
}
int ksocknal_startup (lnet_ni_t *ni);
void ksocknal_shutdown (lnet_ni_t *ni);
int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
int delayed, unsigned int niov,
struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);
int ksocknal_accept(lnet_ni_t *ni, socket_t *sock);
extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
extern void ksocknal_peer_failed (ksock_peer_t *peer);
extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
socket_t *sock, int type);
extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
extern void ksocknal_terminate_conn (ksock_conn_t *conn);
extern void ksocknal_destroy_conn (ksock_conn_t *conn);
extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer,
__u32 ipaddr, int why);
extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr);
extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer,
ksock_tx_t *tx, int nonblk);
extern int ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx,
lnet_process_id_t id);
extern ksock_tx_t *ksocknal_alloc_tx(int type, int size);
extern void ksocknal_free_tx (ksock_tx_t *tx);
extern ksock_tx_t *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk);
extern void ksocknal_next_tx_carrier(ksock_conn_t *conn);
extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist,
int error);
extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
extern void ksocknal_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
extern void ksocknal_thread_fini (void);
extern void ksocknal_launch_all_connections_locked (ksock_peer_t *peer);
extern ksock_route_t *ksocknal_find_connectable_route_locked (ksock_peer_t *peer);
extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
extern int ksocknal_scheduler (void *arg);
extern int ksocknal_connd (void *arg);
extern int ksocknal_reaper (void *arg);
extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
ksock_hello_msg_t *hello, lnet_process_id_t *id,
__u64 *incarnation);
extern void ksocknal_read_callback(ksock_conn_t *conn);
extern void ksocknal_write_callback(ksock_conn_t *conn);
extern int ksocknal_lib_zc_capable(ksock_conn_t *conn);
extern void ksocknal_lib_save_callback(socket_t *sock, ksock_conn_t *conn);
extern void ksocknal_lib_set_callback(socket_t *sock, ksock_conn_t *conn);
extern void ksocknal_lib_reset_callback(socket_t *sock, ksock_conn_t *conn);
extern void ksocknal_lib_push_conn (ksock_conn_t *conn);
extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn);
extern int ksocknal_lib_setup_sock (socket_t *so);
extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx);
extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx);
extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
int *rxmem, int *nagle);
extern int ksocknal_tunables_init(void);
extern void ksocknal_tunables_fini(void);
extern int ksocknal_lib_tunables_init(void);
extern void ksocknal_lib_tunables_fini(void);
extern void ksocknal_lib_csum_tx(ksock_tx_t *tx);
extern int ksocknal_lib_memory_pressure(ksock_conn_t *conn);
extern int ksocknal_lib_bind_thread_to_cpu(int id);

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#define DEBUG_PORTAL_ALLOC
#ifndef __LINUX_SOCKNAL_LIB_H__
#define __LINUX_SOCKNAL_LIB_H__
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <linux/uio.h>
#include <linux/if.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/stat.h>
#include <linux/list.h>
#include <linux/kmod.h>
#include <linux/sysctl.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
#include <linux/syscalls.h>
#include <linux/libcfs/libcfs.h>
#include <linux/libcfs/linux/portals_compat25.h>
#include <linux/crc32.h>
static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len)
{
#if 1
return crc32_le(crc, p, len);
#else
while (len-- > 0)
crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
return crc;
#endif
}
#define SOCKNAL_WSPACE(sk) sk_stream_wspace(sk)
#define SOCKNAL_MIN_WSPACE(sk) sk_stream_min_wspace(sk)
/* assume one thread for each connection type */
#define SOCKNAL_NSCHEDS 3
#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
#endif

View file

@ -0,0 +1,198 @@
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*
* Author: Eric Barton <eric@bartonsoftware.com>
*
* Portals is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Portals is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Portals; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "socklnd.h"
static int sock_timeout = 50;
CFS_MODULE_PARM(sock_timeout, "i", int, 0644,
"dead socket timeout (seconds)");
static int credits = 256;
CFS_MODULE_PARM(credits, "i", int, 0444,
"# concurrent sends");
static int peer_credits = 8;
CFS_MODULE_PARM(peer_credits, "i", int, 0444,
"# concurrent sends to 1 peer");
static int peer_buffer_credits = 0;
CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
"# per-peer router buffer credits");
static int peer_timeout = 180;
CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
"Seconds without aliveness news to declare peer dead (<=0 to disable)");
/* Number of daemons in each thread pool which is percpt,
* we will estimate reasonable value based on CPUs if it's not set. */
static unsigned int nscheds;
CFS_MODULE_PARM(nscheds, "i", int, 0444,
"# scheduler daemons in each pool while starting");
static int nconnds = 4;
CFS_MODULE_PARM(nconnds, "i", int, 0444,
"# connection daemons while starting");
static int nconnds_max = 64;
CFS_MODULE_PARM(nconnds_max, "i", int, 0444,
"max # connection daemons");
static int min_reconnectms = 1000;
CFS_MODULE_PARM(min_reconnectms, "i", int, 0644,
"min connection retry interval (mS)");
static int max_reconnectms = 60000;
CFS_MODULE_PARM(max_reconnectms, "i", int, 0644,
"max connection retry interval (mS)");
# define DEFAULT_EAGER_ACK 0
static int eager_ack = DEFAULT_EAGER_ACK;
CFS_MODULE_PARM(eager_ack, "i", int, 0644,
"send tcp ack packets eagerly");
static int typed_conns = 1;
CFS_MODULE_PARM(typed_conns, "i", int, 0444,
"use different sockets for bulk");
static int min_bulk = (1<<10);
CFS_MODULE_PARM(min_bulk, "i", int, 0644,
"smallest 'large' message");
# define DEFAULT_BUFFER_SIZE 0
static int tx_buffer_size = DEFAULT_BUFFER_SIZE;
CFS_MODULE_PARM(tx_buffer_size, "i", int, 0644,
"socket tx buffer size (0 for system default)");
static int rx_buffer_size = DEFAULT_BUFFER_SIZE;
CFS_MODULE_PARM(rx_buffer_size, "i", int, 0644,
"socket rx buffer size (0 for system default)");
static int nagle = 0;
CFS_MODULE_PARM(nagle, "i", int, 0644,
"enable NAGLE?");
static int round_robin = 1;
CFS_MODULE_PARM(round_robin, "i", int, 0644,
"Round robin for multiple interfaces");
static int keepalive = 30;
CFS_MODULE_PARM(keepalive, "i", int, 0644,
"# seconds before send keepalive");
static int keepalive_idle = 30;
CFS_MODULE_PARM(keepalive_idle, "i", int, 0644,
"# idle seconds before probe");
#define DEFAULT_KEEPALIVE_COUNT 5
static int keepalive_count = DEFAULT_KEEPALIVE_COUNT;
CFS_MODULE_PARM(keepalive_count, "i", int, 0644,
"# missed probes == dead");
static int keepalive_intvl = 5;
CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644,
"seconds between probes");
static int enable_csum = 0;
CFS_MODULE_PARM(enable_csum, "i", int, 0644,
"enable check sum");
static int inject_csum_error = 0;
CFS_MODULE_PARM(inject_csum_error, "i", int, 0644,
"set non-zero to inject a checksum error");
static int nonblk_zcack = 1;
CFS_MODULE_PARM(nonblk_zcack, "i", int, 0644,
"always send ZC-ACK on non-blocking connection");
static unsigned int zc_min_payload = (16 << 10);
CFS_MODULE_PARM(zc_min_payload, "i", int, 0644,
"minimum payload size to zero copy");
static unsigned int zc_recv = 0;
CFS_MODULE_PARM(zc_recv, "i", int, 0644,
"enable ZC recv for Chelsio driver");
static unsigned int zc_recv_min_nfrags = 16;
CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0644,
"minimum # of fragments to enable ZC recv");
#if SOCKNAL_VERSION_DEBUG
static int protocol = 3;
CFS_MODULE_PARM(protocol, "i", int, 0644,
"protocol version");
#endif
ksock_tunables_t ksocknal_tunables;
int ksocknal_tunables_init(void)
{
/* initialize ksocknal_tunables structure */
ksocknal_tunables.ksnd_timeout = &sock_timeout;
ksocknal_tunables.ksnd_nscheds = &nscheds;
ksocknal_tunables.ksnd_nconnds = &nconnds;
ksocknal_tunables.ksnd_nconnds_max = &nconnds_max;
ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms;
ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms;
ksocknal_tunables.ksnd_eager_ack = &eager_ack;
ksocknal_tunables.ksnd_typed_conns = &typed_conns;
ksocknal_tunables.ksnd_min_bulk = &min_bulk;
ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size;
ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size;
ksocknal_tunables.ksnd_nagle = &nagle;
ksocknal_tunables.ksnd_round_robin = &round_robin;
ksocknal_tunables.ksnd_keepalive = &keepalive;
ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle;
ksocknal_tunables.ksnd_keepalive_count = &keepalive_count;
ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
ksocknal_tunables.ksnd_credits = &credits;
ksocknal_tunables.ksnd_peertxcredits = &peer_credits;
ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits;
ksocknal_tunables.ksnd_peertimeout = &peer_timeout;
ksocknal_tunables.ksnd_enable_csum = &enable_csum;
ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack;
ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload;
ksocknal_tunables.ksnd_zc_recv = &zc_recv;
ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
#if SOCKNAL_VERSION_DEBUG
ksocknal_tunables.ksnd_protocol = &protocol;
#endif
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
ksocknal_tunables.ksnd_sysctl = NULL;
#endif
if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10))
*ksocknal_tunables.ksnd_zc_min_payload = (2 << 10);
/* initialize platform-sepcific tunables */
return ksocknal_lib_tunables_init();
};
void ksocknal_tunables_fini(void)
{
ksocknal_lib_tunables_fini();
}

View file

@ -0,0 +1,797 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2012, Intel Corporation.
*
* Author: Zach Brown <zab@zabbo.net>
* Author: Peter J. Braam <braam@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
* Author: Eric Barton <eric@bartonsoftware.com>
*
* This file is part of Portals, http://www.sf.net/projects/sandiaportals/
*
* Portals is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Portals is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Portals; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "socklnd.h"
/*
* Protocol entries :
* pro_send_hello : send hello message
* pro_recv_hello : receive hello message
* pro_pack : pack message header
* pro_unpack : unpack message header
* pro_queue_tx_zcack() : Called holding BH lock: kss_lock
* return 1 if ACK is piggybacked, otherwise return 0
* pro_queue_tx_msg() : Called holding BH lock: kss_lock
* return the ACK that piggybacked by my message, or NULL
* pro_handle_zcreq() : handler of incoming ZC-REQ
* pro_handle_zcack() : handler of incoming ZC-ACK
* pro_match_tx() : Called holding glock
*/
static ksock_tx_t *
ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
{
/* V1.x, just enqueue it */
list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
return NULL;
}
void
ksocknal_next_tx_carrier(ksock_conn_t *conn)
{
ksock_tx_t *tx = conn->ksnc_tx_carrier;
/* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
LASSERT (!list_empty(&conn->ksnc_tx_queue));
LASSERT (tx != NULL);
/* Next TX that can carry ZC-ACK or LNet message */
if (tx->tx_list.next == &conn->ksnc_tx_queue) {
/* no more packets queued */
conn->ksnc_tx_carrier = NULL;
} else {
conn->ksnc_tx_carrier = list_entry(tx->tx_list.next,
ksock_tx_t, tx_list);
LASSERT (conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
}
}
static int
ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
ksock_tx_t *tx_ack, __u64 cookie)
{
ksock_tx_t *tx = conn->ksnc_tx_carrier;
LASSERT (tx_ack == NULL ||
tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
/*
* Enqueue or piggyback tx_ack / cookie
* . no tx can piggyback cookie of tx_ack (or cookie), just
* enqueue the tx_ack (if tx_ack != NUL) and return NULL.
* . There is tx can piggyback cookie of tx_ack (or cookie),
* piggyback the cookie and return the tx.
*/
if (tx == NULL) {
if (tx_ack != NULL) {
list_add_tail(&tx_ack->tx_list,
&conn->ksnc_tx_queue);
conn->ksnc_tx_carrier = tx_ack;
}
return 0;
}
if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
/* tx is noop zc-ack, can't piggyback zc-ack cookie */
if (tx_ack != NULL)
list_add_tail(&tx_ack->tx_list,
&conn->ksnc_tx_queue);
return 0;
}
LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0);
if (tx_ack != NULL)
cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
/* piggyback the zc-ack cookie */
tx->tx_msg.ksm_zc_cookies[1] = cookie;
/* move on to the next TX which can carry cookie */
ksocknal_next_tx_carrier(conn);
return 1;
}
static ksock_tx_t *
ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
{
ksock_tx_t *tx = conn->ksnc_tx_carrier;
/*
* Enqueue tx_msg:
* . If there is no NOOP on the connection, just enqueue
* tx_msg and return NULL
* . If there is NOOP on the connection, piggyback the cookie
* and replace the NOOP tx, and return the NOOP tx.
*/
if (tx == NULL) { /* nothing on queue */
list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
conn->ksnc_tx_carrier = tx_msg;
return NULL;
}
if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
return NULL;
}
LASSERT (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
/* There is a noop zc-ack can be piggybacked */
tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
ksocknal_next_tx_carrier(conn);
/* use new_tx to replace the noop zc-ack packet */
list_add(&tx_msg->tx_list, &tx->tx_list);
list_del(&tx->tx_list);
return tx;
}
static int
ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
ksock_tx_t *tx_ack, __u64 cookie)
{
ksock_tx_t *tx;
if (conn->ksnc_type != SOCKLND_CONN_ACK)
return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
/* non-blocking ZC-ACK (to router) */
LASSERT (tx_ack == NULL ||
tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
if ((tx = conn->ksnc_tx_carrier) == NULL) {
if (tx_ack != NULL) {
list_add_tail(&tx_ack->tx_list,
&conn->ksnc_tx_queue);
conn->ksnc_tx_carrier = tx_ack;
}
return 0;
}
/* conn->ksnc_tx_carrier != NULL */
if (tx_ack != NULL)
cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
return 1;
if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
/* replace the keepalive PING with a real ACK */
LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
tx->tx_msg.ksm_zc_cookies[1] = cookie;
return 1;
}
if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
cookie == tx->tx_msg.ksm_zc_cookies[1]) {
CWARN("%s: duplicated ZC cookie: "LPU64"\n",
libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
return 1; /* XXX return error in the future */
}
if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
/* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
tx->tx_msg.ksm_zc_cookies[1] = cookie;
} else {
tx->tx_msg.ksm_zc_cookies[0] = cookie;
}
if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
/* not likely to carry more ACKs, skip it to simplify logic */
ksocknal_next_tx_carrier(conn);
}
return 1;
}
/* takes two or more cookies already */
if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
__u64 tmp = 0;
/* two seperated cookies: (a+2, a) or (a+1, a) */
LASSERT (tx->tx_msg.ksm_zc_cookies[0] -
tx->tx_msg.ksm_zc_cookies[1] <= 2);
if (tx->tx_msg.ksm_zc_cookies[0] -
tx->tx_msg.ksm_zc_cookies[1] == 2) {
if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
tmp = cookie;
} else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
tmp = tx->tx_msg.ksm_zc_cookies[1];
} else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
tmp = tx->tx_msg.ksm_zc_cookies[0];
}
if (tmp != 0) {
/* range of cookies */
tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
return 1;
}
} else {
/* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
CWARN("%s: duplicated ZC cookie: "LPU64"\n",
libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
return 1; /* XXX: return error in the future */
}
if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
tx->tx_msg.ksm_zc_cookies[1] = cookie;
return 1;
}
if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
tx->tx_msg.ksm_zc_cookies[0] = cookie;
return 1;
}
}
/* failed to piggyback ZC-ACK */
if (tx_ack != NULL) {
list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
/* the next tx can piggyback at least 1 ACK */
ksocknal_next_tx_carrier(conn);
}
return 0;
}
static int
ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
{
int nob;
#if SOCKNAL_VERSION_DEBUG
if (!*ksocknal_tunables.ksnd_typed_conns)
return SOCKNAL_MATCH_YES;
#endif
if (tx == NULL || tx->tx_lnetmsg == NULL) {
/* noop packet */
nob = offsetof(ksock_msg_t, ksm_u);
} else {
nob = tx->tx_lnetmsg->msg_len +
((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
}
/* default checking for typed connection */
switch (conn->ksnc_type) {
default:
CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
LBUG();
case SOCKLND_CONN_ANY:
return SOCKNAL_MATCH_YES;
case SOCKLND_CONN_BULK_IN:
return SOCKNAL_MATCH_MAY;
case SOCKLND_CONN_BULK_OUT:
if (nob < *ksocknal_tunables.ksnd_min_bulk)
return SOCKNAL_MATCH_MAY;
else
return SOCKNAL_MATCH_YES;
case SOCKLND_CONN_CONTROL:
if (nob >= *ksocknal_tunables.ksnd_min_bulk)
return SOCKNAL_MATCH_MAY;
else
return SOCKNAL_MATCH_YES;
}
}
static int
ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
{
int nob;
if (tx == NULL || tx->tx_lnetmsg == NULL)
nob = offsetof(ksock_msg_t, ksm_u);
else
nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
switch (conn->ksnc_type) {
default:
CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
LBUG();
case SOCKLND_CONN_ANY:
return SOCKNAL_MATCH_NO;
case SOCKLND_CONN_ACK:
if (nonblk)
return SOCKNAL_MATCH_YES;
else if (tx == NULL || tx->tx_lnetmsg == NULL)
return SOCKNAL_MATCH_MAY;
else
return SOCKNAL_MATCH_NO;
case SOCKLND_CONN_BULK_OUT:
if (nonblk)
return SOCKNAL_MATCH_NO;
else if (nob < *ksocknal_tunables.ksnd_min_bulk)
return SOCKNAL_MATCH_MAY;
else
return SOCKNAL_MATCH_YES;
case SOCKLND_CONN_CONTROL:
if (nonblk)
return SOCKNAL_MATCH_NO;
else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
return SOCKNAL_MATCH_MAY;
else
return SOCKNAL_MATCH_YES;
}
}
/* (Sink) handle incoming ZC request from sender */
static int
ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
{
ksock_peer_t *peer = c->ksnc_peer;
ksock_conn_t *conn;
ksock_tx_t *tx;
int rc;
read_lock(&ksocknal_data.ksnd_global_lock);
conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
if (conn != NULL) {
ksock_sched_t *sched = conn->ksnc_scheduler;
LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL);
spin_lock_bh(&sched->kss_lock);
rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
spin_unlock_bh(&sched->kss_lock);
if (rc) { /* piggybacked */
read_unlock(&ksocknal_data.ksnd_global_lock);
return 0;
}
}
read_unlock(&ksocknal_data.ksnd_global_lock);
/* ACK connection is not ready, or can't piggyback the ACK */
tx = ksocknal_alloc_tx_noop(cookie, !!remote);
if (tx == NULL)
return -ENOMEM;
if ((rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) == 0)
return 0;
ksocknal_free_tx(tx);
return rc;
}
/* (Sender) handle ZC_ACK from sink */
static int
ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
{
ksock_peer_t *peer = conn->ksnc_peer;
ksock_tx_t *tx;
ksock_tx_t *tmp;
LIST_HEAD (zlist);
int count;
if (cookie1 == 0)
cookie1 = cookie2;
count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
conn->ksnc_proto == &ksocknal_protocol_v3x) {
/* keepalive PING for V3.x, just ignore it */
return count == 1 ? 0 : -EPROTO;
}
spin_lock(&peer->ksnp_lock);
list_for_each_entry_safe(tx, tmp,
&peer->ksnp_zc_req_list, tx_zc_list) {
__u64 c = tx->tx_msg.ksm_zc_cookies[0];
if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
tx->tx_msg.ksm_zc_cookies[0] = 0;
list_del(&tx->tx_zc_list);
list_add(&tx->tx_zc_list, &zlist);
if (--count == 0)
break;
}
}
spin_unlock(&peer->ksnp_lock);
while (!list_empty(&zlist)) {
tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
list_del(&tx->tx_zc_list);
ksocknal_tx_decref(tx);
}
return count == 0 ? 0 : -EPROTO;
}
static int
ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
{
socket_t *sock = conn->ksnc_sock;
lnet_hdr_t *hdr;
lnet_magicversion_t *hmv;
int rc;
int i;
CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
LIBCFS_ALLOC(hdr, sizeof(*hdr));
if (hdr == NULL) {
CERROR("Can't allocate lnet_hdr_t\n");
return -ENOMEM;
}
hmv = (lnet_magicversion_t *)&hdr->dest_nid;
/* Re-organize V2.x message header to V1.x (lnet_hdr_t)
* header and send out */
hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
if (the_lnet.ln_testprotocompat != 0) {
/* single-shot proto check */
LNET_LOCK();
if ((the_lnet.ln_testprotocompat & 1) != 0) {
hmv->version_major++; /* just different! */
the_lnet.ln_testprotocompat &= ~1;
}
if ((the_lnet.ln_testprotocompat & 2) != 0) {
hmv->magic = LNET_PROTO_MAGIC;
the_lnet.ln_testprotocompat &= ~2;
}
LNET_UNLOCK();
}
hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid);
hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid);
hdr->type = cpu_to_le32 (LNET_MSG_HELLO);
hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),lnet_acceptor_timeout());
if (rc != 0) {
CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
goto out;
}
if (hello->kshm_nips == 0)
goto out;
for (i = 0; i < (int) hello->kshm_nips; i++) {
hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
}
rc = libcfs_sock_write(sock, hello->kshm_ips,
hello->kshm_nips * sizeof(__u32),
lnet_acceptor_timeout());
if (rc != 0) {
CNETERR("Error %d sending HELLO payload (%d)"
" to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
}
out:
LIBCFS_FREE(hdr, sizeof(*hdr));
return rc;
}
static int
ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
{
socket_t *sock = conn->ksnc_sock;
int rc;
hello->kshm_magic = LNET_PROTO_MAGIC;
hello->kshm_version = conn->ksnc_proto->pro_version;
if (the_lnet.ln_testprotocompat != 0) {
/* single-shot proto check */
LNET_LOCK();
if ((the_lnet.ln_testprotocompat & 1) != 0) {
hello->kshm_version++; /* just different! */
the_lnet.ln_testprotocompat &= ~1;
}
LNET_UNLOCK();
}
rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
lnet_acceptor_timeout());
if (rc != 0) {
CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
return rc;
}
if (hello->kshm_nips == 0)
return 0;
rc = libcfs_sock_write(sock, hello->kshm_ips,
hello->kshm_nips * sizeof(__u32),
lnet_acceptor_timeout());
if (rc != 0) {
CNETERR("Error %d sending HELLO payload (%d)"
" to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips,
HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
}
return rc;
}
static int
ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout)
{
socket_t *sock = conn->ksnc_sock;
lnet_hdr_t *hdr;
int rc;
int i;
LIBCFS_ALLOC(hdr, sizeof(*hdr));
if (hdr == NULL) {
CERROR("Can't allocate lnet_hdr_t\n");
return -ENOMEM;
}
rc = libcfs_sock_read(sock, &hdr->src_nid,
sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid),
timeout);
if (rc != 0) {
CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
LASSERT (rc < 0 && rc != -EALREADY);
goto out;
}
/* ...and check we got what we expected */
if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
CERROR ("Expecting a HELLO hdr,"
" but got type %d from %u.%u.%u.%u\n",
le32_to_cpu (hdr->type),
HIPQUAD(conn->ksnc_ipaddr));
rc = -EPROTO;
goto out;
}
hello->kshm_src_nid = le64_to_cpu (hdr->src_nid);
hello->kshm_src_pid = le32_to_cpu (hdr->src_pid);
hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation);
hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type);
hello->kshm_nips = le32_to_cpu (hdr->payload_length) /
sizeof (__u32);
if (hello->kshm_nips > LNET_MAX_INTERFACES) {
CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
rc = -EPROTO;
goto out;
}
if (hello->kshm_nips == 0)
goto out;
rc = libcfs_sock_read(sock, hello->kshm_ips,
hello->kshm_nips * sizeof(__u32), timeout);
if (rc != 0) {
CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
LASSERT (rc < 0 && rc != -EALREADY);
goto out;
}
for (i = 0; i < (int) hello->kshm_nips; i++) {
hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
if (hello->kshm_ips[i] == 0) {
CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
i, HIPQUAD(conn->ksnc_ipaddr));
rc = -EPROTO;
break;
}
}
out:
LIBCFS_FREE(hdr, sizeof(*hdr));
return rc;
}
static int
ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
{
socket_t *sock = conn->ksnc_sock;
int rc;
int i;
if (hello->kshm_magic == LNET_PROTO_MAGIC)
conn->ksnc_flip = 0;
else
conn->ksnc_flip = 1;
rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
offsetof(ksock_hello_msg_t, kshm_ips) -
offsetof(ksock_hello_msg_t, kshm_src_nid),
timeout);
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
LASSERT (rc < 0 && rc != -EALREADY);
return rc;
}
if (conn->ksnc_flip) {
__swab32s(&hello->kshm_src_pid);
__swab64s(&hello->kshm_src_nid);
__swab32s(&hello->kshm_dst_pid);
__swab64s(&hello->kshm_dst_nid);
__swab64s(&hello->kshm_src_incarnation);
__swab64s(&hello->kshm_dst_incarnation);
__swab32s(&hello->kshm_ctype);
__swab32s(&hello->kshm_nips);
}
if (hello->kshm_nips > LNET_MAX_INTERFACES) {
CERROR("Bad nips %d from ip %u.%u.%u.%u\n",
hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr));
return -EPROTO;
}
if (hello->kshm_nips == 0)
return 0;
rc = libcfs_sock_read(sock, hello->kshm_ips,
hello->kshm_nips * sizeof(__u32), timeout);
if (rc != 0) {
CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
LASSERT (rc < 0 && rc != -EALREADY);
return rc;
}
for (i = 0; i < (int) hello->kshm_nips; i++) {
if (conn->ksnc_flip)
__swab32s(&hello->kshm_ips[i]);
if (hello->kshm_ips[i] == 0) {
CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n",
i, HIPQUAD(conn->ksnc_ipaddr));
return -EPROTO;
}
}
return 0;
}
static void
ksocknal_pack_msg_v1(ksock_tx_t *tx)
{
/* V1.x has no KSOCK_MSG_NOOP */
LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
LASSERT(tx->tx_lnetmsg != NULL);
tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr;
tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t);
tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
}
static void
ksocknal_pack_msg_v2(ksock_tx_t *tx)
{
tx->tx_iov[0].iov_base = (void *)&tx->tx_msg;
if (tx->tx_lnetmsg != NULL) {
LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
} else {
LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
}
/* Don't checksum before start sending, because packet can be piggybacked with ACK */
}
static void
ksocknal_unpack_msg_v1(ksock_msg_t *msg)
{
msg->ksm_csum = 0;
msg->ksm_type = KSOCK_MSG_LNET;
msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0;
}
static void
ksocknal_unpack_msg_v2(ksock_msg_t *msg)
{
return; /* Do nothing */
}
ksock_proto_t ksocknal_protocol_v1x =
{
.pro_version = KSOCK_PROTO_V1,
.pro_send_hello = ksocknal_send_hello_v1,
.pro_recv_hello = ksocknal_recv_hello_v1,
.pro_pack = ksocknal_pack_msg_v1,
.pro_unpack = ksocknal_unpack_msg_v1,
.pro_queue_tx_msg = ksocknal_queue_tx_msg_v1,
.pro_handle_zcreq = NULL,
.pro_handle_zcack = NULL,
.pro_queue_tx_zcack = NULL,
.pro_match_tx = ksocknal_match_tx
};
ksock_proto_t ksocknal_protocol_v2x =
{
.pro_version = KSOCK_PROTO_V2,
.pro_send_hello = ksocknal_send_hello_v2,
.pro_recv_hello = ksocknal_recv_hello_v2,
.pro_pack = ksocknal_pack_msg_v2,
.pro_unpack = ksocknal_unpack_msg_v2,
.pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
.pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
.pro_handle_zcreq = ksocknal_handle_zcreq,
.pro_handle_zcack = ksocknal_handle_zcack,
.pro_match_tx = ksocknal_match_tx
};
ksock_proto_t ksocknal_protocol_v3x =
{
.pro_version = KSOCK_PROTO_V3,
.pro_send_hello = ksocknal_send_hello_v2,
.pro_recv_hello = ksocknal_recv_hello_v2,
.pro_pack = ksocknal_pack_msg_v2,
.pro_unpack = ksocknal_unpack_msg_v2,
.pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
.pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
.pro_handle_zcreq = ksocknal_handle_zcreq,
.pro_handle_zcack = ksocknal_handle_zcack,
.pro_match_tx = ksocknal_match_tx_v3
};

View file

@ -0,0 +1,8 @@
obj-$(CONFIG_LNET) += lnet.o
lnet-y := api-errno.o api-ni.o config.o lib-me.o lib-msg.o lib-eq.o \
lib-md.o lib-ptl.o lib-move.o module.o lo.o router.o \
router_proc.o acceptor.o peer.o
ccflags-y := -I$(src)/../include

View file

@ -0,0 +1,527 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
static int accept_port = 988;
static int accept_backlog = 127;
static int accept_timeout = 5;
struct {
int pta_shutdown;
socket_t *pta_sock;
struct completion pta_signal;
} lnet_acceptor_state;
int
lnet_acceptor_port(void)
{
return accept_port;
}
static inline int
lnet_accept_magic(__u32 magic, __u32 constant)
{
return (magic == constant ||
magic == __swab32(constant));
}
EXPORT_SYMBOL(lnet_acceptor_port);
static char *accept = "secure";
CFS_MODULE_PARM(accept, "s", charp, 0444,
"Accept connections (secure|all|none)");
CFS_MODULE_PARM(accept_port, "i", int, 0444,
"Acceptor's port (same on all nodes)");
CFS_MODULE_PARM(accept_backlog, "i", int, 0444,
"Acceptor's listen backlog");
CFS_MODULE_PARM(accept_timeout, "i", int, 0644,
"Acceptor's timeout (seconds)");
static char *accept_type = NULL;
int
lnet_acceptor_get_tunables(void)
{
/* Userland acceptor uses 'accept_type' instead of 'accept', due to
* conflict with 'accept(2)', but kernel acceptor still uses 'accept'
* for compatibility. Hence the trick. */
accept_type = accept;
return 0;
}
int
lnet_acceptor_timeout(void)
{
return accept_timeout;
}
EXPORT_SYMBOL(lnet_acceptor_timeout);
void
lnet_connect_console_error (int rc, lnet_nid_t peer_nid,
__u32 peer_ip, int peer_port)
{
switch (rc) {
/* "normal" errors */
case -ECONNREFUSED:
CNETERR("Connection to %s at host %u.%u.%u.%u on port %d was "
"refused: check that Lustre is running on that node.\n",
libcfs_nid2str(peer_nid),
HIPQUAD(peer_ip), peer_port);
break;
case -EHOSTUNREACH:
case -ENETUNREACH:
CNETERR("Connection to %s at host %u.%u.%u.%u "
"was unreachable: the network or that node may "
"be down, or Lustre may be misconfigured.\n",
libcfs_nid2str(peer_nid), HIPQUAD(peer_ip));
break;
case -ETIMEDOUT:
CNETERR("Connection to %s at host %u.%u.%u.%u on "
"port %d took too long: that node may be hung "
"or experiencing high load.\n",
libcfs_nid2str(peer_nid),
HIPQUAD(peer_ip), peer_port);
break;
case -ECONNRESET:
LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %u.%u.%u.%u"
" on port %d was reset: "
"is it running a compatible version of "
"Lustre and is %s one of its NIDs?\n",
libcfs_nid2str(peer_nid),
HIPQUAD(peer_ip), peer_port,
libcfs_nid2str(peer_nid));
break;
case -EPROTO:
LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at "
"host %u.%u.%u.%u on port %d: is it running "
"a compatible version of Lustre?\n",
libcfs_nid2str(peer_nid),
HIPQUAD(peer_ip), peer_port);
break;
case -EADDRINUSE:
LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to "
"connect to %s at host %u.%u.%u.%u on port "
"%d\n", libcfs_nid2str(peer_nid),
HIPQUAD(peer_ip), peer_port);
break;
default:
LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s"
" at host %u.%u.%u.%u on port %d\n", rc,
libcfs_nid2str(peer_nid),
HIPQUAD(peer_ip), peer_port);
break;
}
}
EXPORT_SYMBOL(lnet_connect_console_error);
int
lnet_connect(socket_t **sockp, lnet_nid_t peer_nid,
__u32 local_ip, __u32 peer_ip, int peer_port)
{
lnet_acceptor_connreq_t cr;
socket_t *sock;
int rc;
int port;
int fatal;
CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */
for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
--port) {
/* Iterate through reserved ports. */
rc = libcfs_sock_connect(&sock, &fatal,
local_ip, port,
peer_ip, peer_port);
if (rc != 0) {
if (fatal)
goto failed;
continue;
}
CLASSERT (LNET_PROTO_ACCEPTOR_VERSION == 1);
cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
cr.acr_nid = peer_nid;
if (the_lnet.ln_testprotocompat != 0) {
/* single-shot proto check */
lnet_net_lock(LNET_LOCK_EX);
if ((the_lnet.ln_testprotocompat & 4) != 0) {
cr.acr_version++;
the_lnet.ln_testprotocompat &= ~4;
}
if ((the_lnet.ln_testprotocompat & 8) != 0) {
cr.acr_magic = LNET_PROTO_MAGIC;
the_lnet.ln_testprotocompat &= ~8;
}
lnet_net_unlock(LNET_LOCK_EX);
}
rc = libcfs_sock_write(sock, &cr, sizeof(cr),
accept_timeout);
if (rc != 0)
goto failed_sock;
*sockp = sock;
return 0;
}
rc = -EADDRINUSE;
goto failed;
failed_sock:
libcfs_sock_release(sock);
failed:
lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
return rc;
}
EXPORT_SYMBOL(lnet_connect);
/* Below is the code common for both kernel and MT user-space */
int
lnet_accept(socket_t *sock, __u32 magic)
{
lnet_acceptor_connreq_t cr;
__u32 peer_ip;
int peer_port;
int rc;
int flip;
lnet_ni_t *ni;
char *str;
LASSERT (sizeof(cr) <= 16); /* not too big for the stack */
rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
LASSERT (rc == 0); /* we succeeded before */
if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) {
/* future version compatibility!
* When LNET unifies protocols over all LNDs, the first
* thing sent will be a version query. I send back
* LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */
memset (&cr, 0, sizeof(cr));
cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
rc = libcfs_sock_write(sock, &cr, sizeof(cr),
accept_timeout);
if (rc != 0)
CERROR("Error sending magic+version in response"
"to LNET magic from %u.%u.%u.%u: %d\n",
HIPQUAD(peer_ip), rc);
return -EPROTO;
}
if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC))
str = "'old' socknal/tcpnal";
else if (lnet_accept_magic(magic, LNET_PROTO_RA_MAGIC))
str = "'old' ranal";
else
str = "unrecognised";
LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %u.%u.%u.%u"
" magic %08x: %s acceptor protocol\n",
HIPQUAD(peer_ip), magic, str);
return -EPROTO;
}
flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC);
rc = libcfs_sock_read(sock, &cr.acr_version,
sizeof(cr.acr_version),
accept_timeout);
if (rc != 0) {
CERROR("Error %d reading connection request version from "
"%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
return -EIO;
}
if (flip)
__swab32s(&cr.acr_version);
if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) {
/* future version compatibility!
* An acceptor-specific protocol rev will first send a version
* query. I send back my current version to tell her I'm
* "old". */
int peer_version = cr.acr_version;
memset (&cr, 0, sizeof(cr));
cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC;
cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
rc = libcfs_sock_write(sock, &cr, sizeof(cr),
accept_timeout);
if (rc != 0)
CERROR("Error sending magic+version in response"
"to version %d from %u.%u.%u.%u: %d\n",
peer_version, HIPQUAD(peer_ip), rc);
return -EPROTO;
}
rc = libcfs_sock_read(sock, &cr.acr_nid,
sizeof(cr) -
offsetof(lnet_acceptor_connreq_t, acr_nid),
accept_timeout);
if (rc != 0) {
CERROR("Error %d reading connection request from "
"%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
return -EIO;
}
if (flip)
__swab64s(&cr.acr_nid);
ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
if (ni == NULL || /* no matching net */
ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
if (ni != NULL)
lnet_ni_decref(ni);
LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %u.%u.%u.%u"
" for %s: No matching NI\n",
HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
return -EPERM;
}
if (ni->ni_lnd->lnd_accept == NULL) {
/* This catches a request for the loopback LND */
lnet_ni_decref(ni);
LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %u.%u.%u.%u"
" for %s: NI doesn not accept IP connections\n",
HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid));
return -EPERM;
}
CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u\n",
libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip));
rc = ni->ni_lnd->lnd_accept(ni, sock);
lnet_ni_decref(ni);
return rc;
}
int
lnet_acceptor(void *arg)
{
socket_t *newsock;
int rc;
__u32 magic;
__u32 peer_ip;
int peer_port;
int secure = (int)((long_ptr_t)arg);
LASSERT (lnet_acceptor_state.pta_sock == NULL);
cfs_block_allsigs();
rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock,
0, accept_port, accept_backlog);
if (rc != 0) {
if (rc == -EADDRINUSE)
LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
" %d: port already in use\n",
accept_port);
else
LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port "
"%d: unexpected error %d\n",
accept_port, rc);
lnet_acceptor_state.pta_sock = NULL;
} else {
LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
}
/* set init status and unblock parent */
lnet_acceptor_state.pta_shutdown = rc;
complete(&lnet_acceptor_state.pta_signal);
if (rc != 0)
return rc;
while (!lnet_acceptor_state.pta_shutdown) {
rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
if (rc != 0) {
if (rc != -EAGAIN) {
CWARN("Accept error %d: pausing...\n", rc);
cfs_pause(cfs_time_seconds(1));
}
continue;
}
/* maybe we're waken up with libcfs_sock_abort_accept() */
if (lnet_acceptor_state.pta_shutdown) {
libcfs_sock_release(newsock);
break;
}
rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port);
if (rc != 0) {
CERROR("Can't determine new connection's address\n");
goto failed;
}
if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
CERROR("Refusing connection from %u.%u.%u.%u: "
"insecure port %d\n",
HIPQUAD(peer_ip), peer_port);
goto failed;
}
rc = libcfs_sock_read(newsock, &magic, sizeof(magic),
accept_timeout);
if (rc != 0) {
CERROR("Error %d reading connection request from "
"%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip));
goto failed;
}
rc = lnet_accept(newsock, magic);
if (rc != 0)
goto failed;
continue;
failed:
libcfs_sock_release(newsock);
}
libcfs_sock_release(lnet_acceptor_state.pta_sock);
lnet_acceptor_state.pta_sock = NULL;
CDEBUG(D_NET, "Acceptor stopping\n");
/* unblock lnet_acceptor_stop() */
complete(&lnet_acceptor_state.pta_signal);
return 0;
}
static inline int
accept2secure(const char *acc, long *sec)
{
if (!strcmp(acc, "secure")) {
*sec = 1;
return 1;
} else if (!strcmp(acc, "all")) {
*sec = 0;
return 1;
} else if (!strcmp(acc, "none")) {
return 0;
} else {
LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n",
acc);
return -EINVAL;
}
}
int
lnet_acceptor_start(void)
{
int rc;
long rc2;
long secure;
LASSERT (lnet_acceptor_state.pta_sock == NULL);
rc = lnet_acceptor_get_tunables();
if (rc != 0)
return rc;
init_completion(&lnet_acceptor_state.pta_signal);
rc = accept2secure(accept_type, &secure);
if (rc <= 0) {
fini_completion(&lnet_acceptor_state.pta_signal);
return rc;
}
if (lnet_count_acceptor_nis() == 0) /* not required */
return 0;
rc2 = PTR_ERR(kthread_run(lnet_acceptor,
(void *)(ulong_ptr_t)secure,
"acceptor_%03ld", secure));
if (IS_ERR_VALUE(rc2)) {
CERROR("Can't start acceptor thread: %ld\n", rc2);
fini_completion(&lnet_acceptor_state.pta_signal);
return -ESRCH;
}
/* wait for acceptor to startup */
wait_for_completion(&lnet_acceptor_state.pta_signal);
if (!lnet_acceptor_state.pta_shutdown) {
/* started OK */
LASSERT(lnet_acceptor_state.pta_sock != NULL);
return 0;
}
LASSERT(lnet_acceptor_state.pta_sock == NULL);
fini_completion(&lnet_acceptor_state.pta_signal);
return -ENETDOWN;
}
void
lnet_acceptor_stop(void)
{
if (lnet_acceptor_state.pta_sock == NULL) /* not running */
return;
lnet_acceptor_state.pta_shutdown = 1;
libcfs_sock_abort_accept(lnet_acceptor_state.pta_sock);
/* block until acceptor signals exit */
wait_for_completion(&lnet_acceptor_state.pta_signal);
fini_completion(&lnet_acceptor_state.pta_signal);
}

View file

@ -0,0 +1,39 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/api-errno.c
*
* Instantiate the string table of errors
*/
/* If you change these, you must update the number table in portals/errno.h */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,447 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/lib-eq.c
*
* Library level Event queue management routines
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
/**
* Create an event queue that has room for \a count number of events.
*
* The event queue is circular and older events will be overwritten by new
* ones if they are not removed in time by the user using the functions
* LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to
* determine the appropriate size of the event queue to prevent this loss
* of events. Note that when EQ handler is specified in \a callback, no
* event loss can happen, since the handler is run for each event deposited
* into the EQ.
*
* \param count The number of events to be stored in the event queue. It
* will be rounded up to the next power of two.
* \param callback A handler function that runs when an event is deposited
* into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to
* indicate that no event handler is desired.
* \param handle On successful return, this location will hold a handle for
* the newly created EQ.
*
* \retval 0 On success.
* \retval -EINVAL If an parameter is not valid.
* \retval -ENOMEM If memory for the EQ can't be allocated.
*
* \see lnet_eq_handler_t for the discussion on EQ handler semantics.
*/
int
LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback,
lnet_handle_eq_t *handle)
{
lnet_eq_t *eq;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
/* We need count to be a power of 2 so that when eq_{enq,deq}_seq
* overflow, they don't skip entries, so the queue has the same
* apparent capacity at all times */
count = cfs_power2_roundup(count);
if (callback != LNET_EQ_HANDLER_NONE && count != 0) {
CWARN("EQ callback is guaranteed to get every event, "
"do you still want to set eqcount %d for polling "
"event which will have locking overhead? "
"Please contact with developer to confirm\n", count);
}
/* count can be 0 if only need callback, we can eliminate
* overhead of enqueue event */
if (count == 0 && callback == LNET_EQ_HANDLER_NONE)
return -EINVAL;
eq = lnet_eq_alloc();
if (eq == NULL)
return -ENOMEM;
if (count != 0) {
LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t));
if (eq->eq_events == NULL)
goto failed;
/* NB allocator has set all event sequence numbers to 0,
* so all them should be earlier than eq_deq_seq */
}
eq->eq_deq_seq = 1;
eq->eq_enq_seq = 1;
eq->eq_size = count;
eq->eq_callback = callback;
eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(),
sizeof(*eq->eq_refs[0]));
if (eq->eq_refs == NULL)
goto failed;
/* MUST hold both exclusive lnet_res_lock */
lnet_res_lock(LNET_LOCK_EX);
/* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
* both EQ lookup and poll event with only lnet_eq_wait_lock */
lnet_eq_wait_lock();
lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh);
list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active);
lnet_eq_wait_unlock();
lnet_res_unlock(LNET_LOCK_EX);
lnet_eq2handle(handle, eq);
return 0;
failed:
if (eq->eq_events != NULL)
LIBCFS_FREE(eq->eq_events, count * sizeof(lnet_event_t));
if (eq->eq_refs != NULL)
cfs_percpt_free(eq->eq_refs);
lnet_eq_free(eq);
return -ENOMEM;
}
EXPORT_SYMBOL(LNetEQAlloc);
/**
* Release the resources associated with an event queue if it's idle;
* otherwise do nothing and it's up to the user to try again.
*
* \param eqh A handle for the event queue to be released.
*
* \retval 0 If the EQ is not in use and freed.
* \retval -ENOENT If \a eqh does not point to a valid EQ.
* \retval -EBUSY If the EQ is still in use by some MDs.
*/
int
LNetEQFree(lnet_handle_eq_t eqh)
{
struct lnet_eq *eq;
lnet_event_t *events = NULL;
int **refs = NULL;
int *ref;
int rc = 0;
int size = 0;
int i;
LASSERT(the_lnet.ln_init);
LASSERT(the_lnet.ln_refcount > 0);
lnet_res_lock(LNET_LOCK_EX);
/* NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do
* both EQ lookup and poll event with only lnet_eq_wait_lock */
lnet_eq_wait_lock();
eq = lnet_handle2eq(&eqh);
if (eq == NULL) {
rc = -ENOENT;
goto out;
}
cfs_percpt_for_each(ref, i, eq->eq_refs) {
LASSERT(*ref >= 0);
if (*ref == 0)
continue;
CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n",
i, *ref);
rc = -EBUSY;
goto out;
}
/* stash for free after lock dropped */
events = eq->eq_events;
size = eq->eq_size;
refs = eq->eq_refs;
lnet_res_lh_invalidate(&eq->eq_lh);
list_del(&eq->eq_list);
lnet_eq_free_locked(eq);
out:
lnet_eq_wait_unlock();
lnet_res_unlock(LNET_LOCK_EX);
if (events != NULL)
LIBCFS_FREE(events, size * sizeof(lnet_event_t));
if (refs != NULL)
cfs_percpt_free(refs);
return rc;
}
EXPORT_SYMBOL(LNetEQFree);
void
lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev)
{
/* MUST called with resource lock hold but w/o lnet_eq_wait_lock */
int index;
if (eq->eq_size == 0) {
LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE);
eq->eq_callback(ev);
return;
}
lnet_eq_wait_lock();
ev->sequence = eq->eq_enq_seq++;
LASSERT(eq->eq_size == LOWEST_BIT_SET(eq->eq_size));
index = ev->sequence & (eq->eq_size - 1);
eq->eq_events[index] = *ev;
if (eq->eq_callback != LNET_EQ_HANDLER_NONE)
eq->eq_callback(ev);
/* Wake anyone waiting in LNetEQPoll() */
if (waitqueue_active(&the_lnet.ln_eq_waitq))
wake_up_all(&the_lnet.ln_eq_waitq);
lnet_eq_wait_unlock();
}
int
lnet_eq_dequeue_event(lnet_eq_t *eq, lnet_event_t *ev)
{
int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
lnet_event_t *new_event = &eq->eq_events[new_index];
int rc;
ENTRY;
/* must called with lnet_eq_wait_lock hold */
if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence))
RETURN(0);
/* We've got a new event... */
*ev = *new_event;
CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
new_event, eq->eq_deq_seq, eq->eq_size);
/* ...but did it overwrite an event we've not seen yet? */
if (eq->eq_deq_seq == new_event->sequence) {
rc = 1;
} else {
/* don't complain with CERROR: some EQs are sized small
* anyway; if it's important, the caller should complain */
CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n",
eq->eq_deq_seq, new_event->sequence);
rc = -EOVERFLOW;
}
eq->eq_deq_seq = new_event->sequence + 1;
RETURN(rc);
}
/**
* A nonblocking function that can be used to get the next event in an EQ.
* If an event handler is associated with the EQ, the handler will run before
* this function returns successfully. The event is removed from the queue.
*
* \param eventq A handle for the event queue.
* \param event On successful return (1 or -EOVERFLOW), this location will
* hold the next event in the EQ.
*
* \retval 0 No pending event in the EQ.
* \retval 1 Indicates success.
* \retval -ENOENT If \a eventq does not point to a valid EQ.
* \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
* at least one event between this event and the last event obtained from the
* EQ has been dropped due to limited space in the EQ.
*/
int
LNetEQGet (lnet_handle_eq_t eventq, lnet_event_t *event)
{
int which;
return LNetEQPoll(&eventq, 1, 0,
event, &which);
}
EXPORT_SYMBOL(LNetEQGet);
/**
* Block the calling process until there is an event in the EQ.
* If an event handler is associated with the EQ, the handler will run before
* this function returns successfully. This function returns the next event
* in the EQ and removes it from the EQ.
*
* \param eventq A handle for the event queue.
* \param event On successful return (1 or -EOVERFLOW), this location will
* hold the next event in the EQ.
*
* \retval 1 Indicates success.
* \retval -ENOENT If \a eventq does not point to a valid EQ.
* \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
* at least one event between this event and the last event obtained from the
* EQ has been dropped due to limited space in the EQ.
*/
int
LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event)
{
int which;
return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER,
event, &which);
}
EXPORT_SYMBOL(LNetEQWait);
static int
lnet_eq_wait_locked(int *timeout_ms)
{
int tms = *timeout_ms;
int wait;
wait_queue_t wl;
cfs_time_t now;
if (tms == 0)
return -1; /* don't want to wait and no new event */
init_waitqueue_entry_current(&wl);
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&the_lnet.ln_eq_waitq, &wl);
lnet_eq_wait_unlock();
if (tms < 0) {
waitq_wait(&wl, TASK_INTERRUPTIBLE);
} else {
struct timeval tv;
now = cfs_time_current();
waitq_timedwait(&wl, TASK_INTERRUPTIBLE,
cfs_time_seconds(tms) / 1000);
cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv);
tms -= (int)(tv.tv_sec * 1000 + tv.tv_usec / 1000);
if (tms < 0) /* no more wait but may have new event */
tms = 0;
}
wait = tms != 0; /* might need to call here again */
*timeout_ms = tms;
lnet_eq_wait_lock();
remove_wait_queue(&the_lnet.ln_eq_waitq, &wl);
return wait;
}
/**
* Block the calling process until there's an event from a set of EQs or
* timeout happens.
*
* If an event handler is associated with the EQ, the handler will run before
* this function returns successfully, in which case the corresponding event
* is consumed.
*
* LNetEQPoll() provides a timeout to allow applications to poll, block for a
* fixed period, or block indefinitely.
*
* \param eventqs,neq An array of EQ handles, and size of the array.
* \param timeout_ms Time in milliseconds to wait for an event to occur on
* one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an
* infinite timeout.
* \param event,which On successful return (1 or -EOVERFLOW), \a event will
* hold the next event in the EQs, and \a which will contain the index of the
* EQ from which the event was taken.
*
* \retval 0 No pending event in the EQs after timeout.
* \retval 1 Indicates success.
* \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that
* at least one event between this event and the last event obtained from the
* EQ indicated by \a which has been dropped due to limited space in the EQ.
* \retval -ENOENT If there's an invalid handle in \a eventqs.
*/
int
LNetEQPoll(lnet_handle_eq_t *eventqs, int neq, int timeout_ms,
lnet_event_t *event, int *which)
{
int wait = 1;
int rc;
int i;
ENTRY;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
if (neq < 1)
RETURN(-ENOENT);
lnet_eq_wait_lock();
for (;;) {
for (i = 0; i < neq; i++) {
lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]);
if (eq == NULL) {
lnet_eq_wait_unlock();
RETURN(-ENOENT);
}
rc = lnet_eq_dequeue_event(eq, event);
if (rc != 0) {
lnet_eq_wait_unlock();
*which = i;
RETURN(rc);
}
}
if (wait == 0)
break;
/*
* return value of lnet_eq_wait_locked:
* -1 : did nothing and it's sure no new event
* 1 : sleep inside and wait until new event
* 0 : don't want to wait anymore, but might have new event
* so need to call dequeue again
*/
wait = lnet_eq_wait_locked(&timeout_ms);
if (wait < 0) /* no new event */
break;
}
lnet_eq_wait_unlock();
RETURN(0);
}

View file

@ -0,0 +1,451 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/lib-md.c
*
* Memory Descriptor management routines
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
/* must be called with lnet_res_lock held */
void
lnet_md_unlink(lnet_libmd_t *md)
{
if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) {
/* first unlink attempt... */
lnet_me_t *me = md->md_me;
md->md_flags |= LNET_MD_FLAG_ZOMBIE;
/* Disassociate from ME (if any), and unlink it if it was created
* with LNET_UNLINK */
if (me != NULL) {
/* detach MD from portal */
lnet_ptl_detach_md(me, md);
if (me->me_unlink == LNET_UNLINK)
lnet_me_unlink(me);
}
/* ensure all future handle lookups fail */
lnet_res_lh_invalidate(&md->md_lh);
}
if (md->md_refcount != 0) {
CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
return;
}
CDEBUG(D_NET, "Unlinking md %p\n", md);
if (md->md_eq != NULL) {
int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
LASSERT(*md->md_eq->eq_refs[cpt] > 0);
(*md->md_eq->eq_refs[cpt])--;
}
LASSERT(!list_empty(&md->md_list));
list_del_init(&md->md_list);
lnet_md_free_locked(md);
}
static int
lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
{
int i;
unsigned int niov;
int total_length = 0;
lmd->md_me = NULL;
lmd->md_start = umd->start;
lmd->md_offset = 0;
lmd->md_max_size = umd->max_size;
lmd->md_options = umd->options;
lmd->md_user_ptr = umd->user_ptr;
lmd->md_eq = NULL;
lmd->md_threshold = umd->threshold;
lmd->md_refcount = 0;
lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
if ((umd->options & LNET_MD_IOVEC) != 0) {
if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */
return -EINVAL;
lmd->md_niov = niov = umd->length;
memcpy(lmd->md_iov.iov, umd->start,
niov * sizeof (lmd->md_iov.iov[0]));
for (i = 0; i < (int)niov; i++) {
/* We take the base address on trust */
if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
return -EINVAL;
total_length += lmd->md_iov.iov[i].iov_len;
}
lmd->md_length = total_length;
if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
(umd->max_size < 0 ||
umd->max_size > total_length)) // illegal max_size
return -EINVAL;
} else if ((umd->options & LNET_MD_KIOV) != 0) {
lmd->md_niov = niov = umd->length;
memcpy(lmd->md_iov.kiov, umd->start,
niov * sizeof (lmd->md_iov.kiov[0]));
for (i = 0; i < (int)niov; i++) {
/* We take the page pointer on trust */
if (lmd->md_iov.kiov[i].kiov_offset +
lmd->md_iov.kiov[i].kiov_len > PAGE_CACHE_SIZE )
return -EINVAL; /* invalid length */
total_length += lmd->md_iov.kiov[i].kiov_len;
}
lmd->md_length = total_length;
if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
(umd->max_size < 0 ||
umd->max_size > total_length)) // illegal max_size
return -EINVAL;
} else { /* contiguous */
lmd->md_length = umd->length;
lmd->md_niov = niov = 1;
lmd->md_iov.iov[0].iov_base = umd->start;
lmd->md_iov.iov[0].iov_len = umd->length;
if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
(umd->max_size < 0 ||
umd->max_size > (int)umd->length)) // illegal max_size
return -EINVAL;
}
return 0;
}
/* must be called with resource lock held */
static int
lnet_md_link(lnet_libmd_t *md, lnet_handle_eq_t eq_handle, int cpt)
{
struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
/* NB we are passed an allocated, but inactive md.
* if we return success, caller may lnet_md_unlink() it.
* otherwise caller may only lnet_md_free() it.
*/
/* This implementation doesn't know how to create START events or
* disable END events. Best to LASSERT our caller is compliant so
* we find out quickly... */
/* TODO - reevaluate what should be here in light of
* the removal of the start and end events
* maybe there we shouldn't even allow LNET_EQ_NONE!)
* LASSERT (eq == NULL);
*/
if (!LNetHandleIsInvalid(eq_handle)) {
md->md_eq = lnet_handle2eq(&eq_handle);
if (md->md_eq == NULL)
return -ENOENT;
(*md->md_eq->eq_refs[cpt])++;
}
lnet_res_lh_initialize(container, &md->md_lh);
LASSERT(list_empty(&md->md_list));
list_add(&md->md_list, &container->rec_active);
return 0;
}
/* must be called with lnet_res_lock held */
void
lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd)
{
/* NB this doesn't copy out all the iov entries so when a
* discontiguous MD is copied out, the target gets to know the
* original iov pointer (in start) and the number of entries it had
* and that's all.
*/
umd->start = lmd->md_start;
umd->length = ((lmd->md_options & (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ?
lmd->md_length : lmd->md_niov;
umd->threshold = lmd->md_threshold;
umd->max_size = lmd->md_max_size;
umd->options = lmd->md_options;
umd->user_ptr = lmd->md_user_ptr;
lnet_eq2handle(&umd->eq_handle, lmd->md_eq);
}
int
lnet_md_validate(lnet_md_t *umd)
{
if (umd->start == NULL && umd->length != 0) {
CERROR("MD start pointer can not be NULL with length %u\n",
umd->length);
return -EINVAL;
}
if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
umd->length > LNET_MAX_IOV) {
CERROR("Invalid option: too many fragments %u, %d max\n",
umd->length, LNET_MAX_IOV);
return -EINVAL;
}
return 0;
}
/**
* Create a memory descriptor and attach it to a ME
*
* \param meh A handle for a ME to associate the new MD with.
* \param umd Provides initial values for the user-visible parts of a MD.
* Other than its use for initialization, there is no linkage between this
* structure and the MD maintained by the LNet.
* \param unlink A flag to indicate whether the MD is automatically unlinked
* when it becomes inactive, either because the operation threshold drops to
* zero or because the available memory becomes less than \a umd.max_size.
* (Note that the check for unlinking a MD only occurs after the completion
* of a successful operation on the MD.) The value LNET_UNLINK enables auto
* unlinking; the value LNET_RETAIN disables it.
* \param handle On successful returns, a handle to the newly created MD is
* saved here. This handle can be used later in LNetMDUnlink().
*
* \retval 0 On success.
* \retval -EINVAL If \a umd is not valid.
* \retval -ENOMEM If new MD cannot be allocated.
* \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
* valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
* calling LNetInvalidateHandle() on it.
* \retval -EBUSY If the ME pointed to by \a meh is already associated with
* a MD.
*/
int
LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
lnet_unlink_t unlink, lnet_handle_md_t *handle)
{
LIST_HEAD (matches);
LIST_HEAD (drops);
struct lnet_me *me;
struct lnet_libmd *md;
int cpt;
int rc;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
if (lnet_md_validate(&umd) != 0)
return -EINVAL;
if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) {
CERROR("Invalid option: no MD_OP set\n");
return -EINVAL;
}
md = lnet_md_alloc(&umd);
if (md == NULL)
return -ENOMEM;
rc = lnet_md_build(md, &umd, unlink);
cpt = lnet_cpt_of_cookie(meh.cookie);
lnet_res_lock(cpt);
if (rc != 0)
goto failed;
me = lnet_handle2me(&meh);
if (me == NULL)
rc = -ENOENT;
else if (me->me_md != NULL)
rc = -EBUSY;
else
rc = lnet_md_link(md, umd.eq_handle, cpt);
if (rc != 0)
goto failed;
/* attach this MD to portal of ME and check if it matches any
* blocked msgs on this portal */
lnet_ptl_attach_md(me, md, &matches, &drops);
lnet_md2handle(handle, md);
lnet_res_unlock(cpt);
lnet_drop_delayed_msg_list(&drops, "Bad match");
lnet_recv_delayed_msg_list(&matches);
return 0;
failed:
lnet_md_free_locked(md);
lnet_res_unlock(cpt);
return rc;
}
EXPORT_SYMBOL(LNetMDAttach);
/**
* Create a "free floating" memory descriptor - a MD that is not associated
* with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations.
*
* \param umd,unlink See the discussion for LNetMDAttach().
* \param handle On successful returns, a handle to the newly created MD is
* saved here. This handle can be used later in LNetMDUnlink(), LNetPut(),
* and LNetGet() operations.
*
* \retval 0 On success.
* \retval -EINVAL If \a umd is not valid.
* \retval -ENOMEM If new MD cannot be allocated.
* \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that
* it's OK to supply a NULL \a umd.eq_handle by calling
* LNetInvalidateHandle() on it.
*/
int
LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
{
lnet_libmd_t *md;
int cpt;
int rc;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
if (lnet_md_validate(&umd) != 0)
return -EINVAL;
if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0) {
CERROR("Invalid option: GET|PUT illegal on active MDs\n");
return -EINVAL;
}
md = lnet_md_alloc(&umd);
if (md == NULL)
return -ENOMEM;
rc = lnet_md_build(md, &umd, unlink);
cpt = lnet_res_lock_current();
if (rc != 0)
goto failed;
rc = lnet_md_link(md, umd.eq_handle, cpt);
if (rc != 0)
goto failed;
lnet_md2handle(handle, md);
lnet_res_unlock(cpt);
return 0;
failed:
lnet_md_free_locked(md);
lnet_res_unlock(cpt);
return rc;
}
EXPORT_SYMBOL(LNetMDBind);
/**
* Unlink the memory descriptor from any ME it may be linked to and release
* the internal resources associated with it.
*
* This function does not free the memory region associated with the MD;
* i.e., the memory the user allocated for this MD. If the ME associated with
* this MD is not NULL and was created with auto unlink enabled, the ME is
* unlinked as well (see LNetMEAttach()).
*
* Explicitly unlinking a MD via this function call has the same behavior as
* a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK
* is generated in the latter case.
*
* An unlinked event can be reported in two ways:
* - If there's no pending operations on the MD, it's unlinked immediately
* and an LNET_EVENT_UNLINK event is logged before this function returns.
* - Otherwise, the MD is only marked for deletion when this function
* returns, and the unlinked event will be piggybacked on the event of
* the completion of the last operation by setting the unlinked field of
* the event. No dedicated LNET_EVENT_UNLINK event is generated.
*
* Note that in both cases the unlinked field of the event is always set; no
* more event will happen on the MD after such an event is logged.
*
* \param mdh A handle for the MD to be unlinked.
*
* \retval 0 On success.
* \retval -ENOENT If \a mdh does not point to a valid MD object.
*/
int
LNetMDUnlink (lnet_handle_md_t mdh)
{
lnet_event_t ev;
lnet_libmd_t *md;
int cpt;
LASSERT(the_lnet.ln_init);
LASSERT(the_lnet.ln_refcount > 0);
cpt = lnet_cpt_of_cookie(mdh.cookie);
lnet_res_lock(cpt);
md = lnet_handle2md(&mdh);
if (md == NULL) {
lnet_res_unlock(cpt);
return -ENOENT;
}
/* If the MD is busy, lnet_md_unlink just marks it for deletion, and
* when the NAL is done, the completion event flags that the MD was
* unlinked. Otherwise, we enqueue an event now... */
if (md->md_eq != NULL &&
md->md_refcount == 0) {
lnet_build_unlink_event(md, &ev);
lnet_eq_enqueue_event(md->md_eq, &ev);
}
lnet_md_unlink(md);
lnet_res_unlock(cpt);
return 0;
}
EXPORT_SYMBOL(LNetMDUnlink);

View file

@ -0,0 +1,297 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/lib-me.c
*
* Match Entry management routines
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
/**
* Create and attach a match entry to the match list of \a portal. The new
* ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach()
* can be used to attach a MD to an empty ME.
*
* \param portal The portal table index where the ME should be attached.
* \param match_id Specifies the match criteria for the process ID of
* the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be
* used to wildcard either of the identifiers in the lnet_process_id_t
* structure.
* \param match_bits,ignore_bits Specify the match criteria to apply
* to the match bits in the incoming request. The ignore bits are used
* to mask out insignificant bits in the incoming match bits. The resulting
* bits are then compared to the ME's match bits to determine if the
* incoming request meets the match criteria.
* \param unlink Indicates whether the ME should be unlinked when the memory
* descriptor associated with it is unlinked (Note that the check for
* unlinking a ME only occurs when the memory descriptor is unlinked.).
* Valid values are LNET_RETAIN and LNET_UNLINK.
* \param pos Indicates whether the new ME should be prepended or
* appended to the match list. Allowed constants: LNET_INS_BEFORE,
* LNET_INS_AFTER.
* \param handle On successful returns, a handle to the newly created ME
* object is saved here. This handle can be used later in LNetMEInsert(),
* LNetMEUnlink(), or LNetMDAttach() functions.
*
* \retval 0 On success.
* \retval -EINVAL If \a portal is invalid.
* \retval -ENOMEM If new ME object cannot be allocated.
*/
int
LNetMEAttach(unsigned int portal,
lnet_process_id_t match_id,
__u64 match_bits, __u64 ignore_bits,
lnet_unlink_t unlink, lnet_ins_pos_t pos,
lnet_handle_me_t *handle)
{
struct lnet_match_table *mtable;
struct lnet_me *me;
struct list_head *head;
LASSERT(the_lnet.ln_init);
LASSERT(the_lnet.ln_refcount > 0);
if ((int)portal >= the_lnet.ln_nportals)
return -EINVAL;
mtable = lnet_mt_of_attach(portal, match_id,
match_bits, ignore_bits, pos);
if (mtable == NULL) /* can't match portal type */
return -EPERM;
me = lnet_me_alloc();
if (me == NULL)
return -ENOMEM;
lnet_res_lock(mtable->mt_cpt);
me->me_portal = portal;
me->me_match_id = match_id;
me->me_match_bits = match_bits;
me->me_ignore_bits = ignore_bits;
me->me_unlink = unlink;
me->me_md = NULL;
lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt],
&me->me_lh);
if (ignore_bits != 0)
head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
else
head = lnet_mt_match_head(mtable, match_id, match_bits);
me->me_pos = head - &mtable->mt_mhash[0];
if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL)
list_add_tail(&me->me_list, head);
else
list_add(&me->me_list, head);
lnet_me2handle(handle, me);
lnet_res_unlock(mtable->mt_cpt);
return 0;
}
EXPORT_SYMBOL(LNetMEAttach);
/**
* Create and a match entry and insert it before or after the ME pointed to by
* \a current_meh. The new ME is empty, i.e. not associated with a memory
* descriptor. LNetMDAttach() can be used to attach a MD to an empty ME.
*
* This function is identical to LNetMEAttach() except for the position
* where the new ME is inserted.
*
* \param current_meh A handle for a ME. The new ME will be inserted
* immediately before or immediately after this ME.
* \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion
* for LNetMEAttach().
*
* \retval 0 On success.
* \retval -ENOMEM If new ME object cannot be allocated.
* \retval -ENOENT If \a current_meh does not point to a valid match entry.
*/
int
LNetMEInsert(lnet_handle_me_t current_meh,
lnet_process_id_t match_id,
__u64 match_bits, __u64 ignore_bits,
lnet_unlink_t unlink, lnet_ins_pos_t pos,
lnet_handle_me_t *handle)
{
struct lnet_me *current_me;
struct lnet_me *new_me;
struct lnet_portal *ptl;
int cpt;
LASSERT(the_lnet.ln_init);
LASSERT(the_lnet.ln_refcount > 0);
if (pos == LNET_INS_LOCAL)
return -EPERM;
new_me = lnet_me_alloc();
if (new_me == NULL)
return -ENOMEM;
cpt = lnet_cpt_of_cookie(current_meh.cookie);
lnet_res_lock(cpt);
current_me = lnet_handle2me(&current_meh);
if (current_me == NULL) {
lnet_me_free_locked(new_me);
lnet_res_unlock(cpt);
return -ENOENT;
}
LASSERT(current_me->me_portal < the_lnet.ln_nportals);
ptl = the_lnet.ln_portals[current_me->me_portal];
if (lnet_ptl_is_unique(ptl)) {
/* nosense to insertion on unique portal */
lnet_me_free_locked(new_me);
lnet_res_unlock(cpt);
return -EPERM;
}
new_me->me_pos = current_me->me_pos;
new_me->me_portal = current_me->me_portal;
new_me->me_match_id = match_id;
new_me->me_match_bits = match_bits;
new_me->me_ignore_bits = ignore_bits;
new_me->me_unlink = unlink;
new_me->me_md = NULL;
lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh);
if (pos == LNET_INS_AFTER)
list_add(&new_me->me_list, &current_me->me_list);
else
list_add_tail(&new_me->me_list, &current_me->me_list);
lnet_me2handle(handle, new_me);
lnet_res_unlock(cpt);
return 0;
}
EXPORT_SYMBOL(LNetMEInsert);
/**
* Unlink a match entry from its match list.
*
* This operation also releases any resources associated with the ME. If a
* memory descriptor is attached to the ME, then it will be unlinked as well
* and an unlink event will be generated. It is an error to use the ME handle
* after calling LNetMEUnlink().
*
* \param meh A handle for the ME to be unlinked.
*
* \retval 0 On success.
* \retval -ENOENT If \a meh does not point to a valid ME.
* \see LNetMDUnlink() for the discussion on delivering unlink event.
*/
int
LNetMEUnlink(lnet_handle_me_t meh)
{
lnet_me_t *me;
lnet_libmd_t *md;
lnet_event_t ev;
int cpt;
LASSERT(the_lnet.ln_init);
LASSERT(the_lnet.ln_refcount > 0);
cpt = lnet_cpt_of_cookie(meh.cookie);
lnet_res_lock(cpt);
me = lnet_handle2me(&meh);
if (me == NULL) {
lnet_res_unlock(cpt);
return -ENOENT;
}
md = me->me_md;
if (md != NULL &&
md->md_eq != NULL &&
md->md_refcount == 0) {
lnet_build_unlink_event(md, &ev);
lnet_eq_enqueue_event(md->md_eq, &ev);
}
lnet_me_unlink(me);
lnet_res_unlock(cpt);
return 0;
}
EXPORT_SYMBOL(LNetMEUnlink);
/* call with lnet_res_lock please */
void
lnet_me_unlink(lnet_me_t *me)
{
list_del(&me->me_list);
if (me->me_md != NULL) {
lnet_libmd_t *md = me->me_md;
/* detach MD from portal of this ME */
lnet_ptl_detach_md(me, md);
lnet_md_unlink(md);
}
lnet_res_lh_invalidate(&me->me_lh);
lnet_me_free_locked(me);
}
#if 0
static void
lib_me_dump(lnet_me_t *me)
{
CWARN("Match Entry %p ("LPX64")\n", me,
me->me_lh.lh_cookie);
CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
me->me_match_bits, me->me_ignore_bits);
CWARN("\tMD\t= %p\n", me->md);
CWARN("\tprev\t= %p\n",
list_entry(me->me_list.prev, lnet_me_t, me_list));
CWARN("\tnext\t= %p\n",
list_entry(me->me_list.next, lnet_me_t, me_list));
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,650 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/lib-msg.c
*
* Message decoding, parsing and finalizing routines
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
void
lnet_build_unlink_event (lnet_libmd_t *md, lnet_event_t *ev)
{
ENTRY;
memset(ev, 0, sizeof(*ev));
ev->status = 0;
ev->unlinked = 1;
ev->type = LNET_EVENT_UNLINK;
lnet_md_deconstruct(md, &ev->md);
lnet_md2handle(&ev->md_handle, md);
EXIT;
}
/*
* Don't need any lock, must be called after lnet_commit_md
*/
void
lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type)
{
lnet_hdr_t *hdr = &msg->msg_hdr;
lnet_event_t *ev = &msg->msg_ev;
LASSERT(!msg->msg_routing);
ev->type = ev_type;
if (ev_type == LNET_EVENT_SEND) {
/* event for active message */
ev->target.nid = le64_to_cpu(hdr->dest_nid);
ev->target.pid = le32_to_cpu(hdr->dest_pid);
ev->initiator.nid = LNET_NID_ANY;
ev->initiator.pid = the_lnet.ln_pid;
ev->sender = LNET_NID_ANY;
} else {
/* event for passive message */
ev->target.pid = hdr->dest_pid;
ev->target.nid = hdr->dest_nid;
ev->initiator.pid = hdr->src_pid;
ev->initiator.nid = hdr->src_nid;
ev->rlength = hdr->payload_length;
ev->sender = msg->msg_from;
ev->mlength = msg->msg_wanted;
ev->offset = msg->msg_offset;
}
switch (ev_type) {
default:
LBUG();
case LNET_EVENT_PUT: /* passive PUT */
ev->pt_index = hdr->msg.put.ptl_index;
ev->match_bits = hdr->msg.put.match_bits;
ev->hdr_data = hdr->msg.put.hdr_data;
return;
case LNET_EVENT_GET: /* passive GET */
ev->pt_index = hdr->msg.get.ptl_index;
ev->match_bits = hdr->msg.get.match_bits;
ev->hdr_data = 0;
return;
case LNET_EVENT_ACK: /* ACK */
ev->match_bits = hdr->msg.ack.match_bits;
ev->mlength = hdr->msg.ack.mlength;
return;
case LNET_EVENT_REPLY: /* REPLY */
return;
case LNET_EVENT_SEND: /* active message */
if (msg->msg_type == LNET_MSG_PUT) {
ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index);
ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits);
ev->offset = le32_to_cpu(hdr->msg.put.offset);
ev->mlength =
ev->rlength = le32_to_cpu(hdr->payload_length);
ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data);
} else {
LASSERT(msg->msg_type == LNET_MSG_GET);
ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index);
ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits);
ev->mlength =
ev->rlength = le32_to_cpu(hdr->msg.get.sink_length);
ev->offset = le32_to_cpu(hdr->msg.get.src_offset);
ev->hdr_data = 0;
}
return;
}
}
void
lnet_msg_commit(lnet_msg_t *msg, int cpt)
{
struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
lnet_counters_t *counters = the_lnet.ln_counters[cpt];
/* routed message can be committed for both receiving and sending */
LASSERT(!msg->msg_tx_committed);
if (msg->msg_sending) {
LASSERT(!msg->msg_receiving);
msg->msg_tx_cpt = cpt;
msg->msg_tx_committed = 1;
if (msg->msg_rx_committed) { /* routed message REPLY */
LASSERT(msg->msg_onactivelist);
return;
}
} else {
LASSERT(!msg->msg_sending);
msg->msg_rx_cpt = cpt;
msg->msg_rx_committed = 1;
}
LASSERT(!msg->msg_onactivelist);
msg->msg_onactivelist = 1;
list_add(&msg->msg_activelist, &container->msc_active);
counters->msgs_alloc++;
if (counters->msgs_alloc > counters->msgs_max)
counters->msgs_max = counters->msgs_alloc;
}
static void
lnet_msg_decommit_tx(lnet_msg_t *msg, int status)
{
lnet_counters_t *counters;
lnet_event_t *ev = &msg->msg_ev;
LASSERT(msg->msg_tx_committed);
if (status != 0)
goto out;
counters = the_lnet.ln_counters[msg->msg_tx_cpt];
switch (ev->type) {
default: /* routed message */
LASSERT(msg->msg_routing);
LASSERT(msg->msg_rx_committed);
LASSERT(ev->type == 0);
counters->route_length += msg->msg_len;
counters->route_count++;
goto out;
case LNET_EVENT_PUT:
/* should have been decommitted */
LASSERT(!msg->msg_rx_committed);
/* overwritten while sending ACK */
LASSERT(msg->msg_type == LNET_MSG_ACK);
msg->msg_type = LNET_MSG_PUT; /* fix type */
break;
case LNET_EVENT_SEND:
LASSERT(!msg->msg_rx_committed);
if (msg->msg_type == LNET_MSG_PUT)
counters->send_length += msg->msg_len;
break;
case LNET_EVENT_GET:
LASSERT(msg->msg_rx_committed);
/* overwritten while sending reply, we should never be
* here for optimized GET */
LASSERT(msg->msg_type == LNET_MSG_REPLY);
msg->msg_type = LNET_MSG_GET; /* fix type */
break;
}
counters->send_count++;
out:
lnet_return_tx_credits_locked(msg);
msg->msg_tx_committed = 0;
}
static void
lnet_msg_decommit_rx(lnet_msg_t *msg, int status)
{
lnet_counters_t *counters;
lnet_event_t *ev = &msg->msg_ev;
LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
LASSERT(msg->msg_rx_committed);
if (status != 0)
goto out;
counters = the_lnet.ln_counters[msg->msg_rx_cpt];
switch (ev->type) {
default:
LASSERT(ev->type == 0);
LASSERT(msg->msg_routing);
goto out;
case LNET_EVENT_ACK:
LASSERT(msg->msg_type == LNET_MSG_ACK);
break;
case LNET_EVENT_GET:
/* type is "REPLY" if it's an optimized GET on passive side,
* because optimized GET will never be committed for sending,
* so message type wouldn't be changed back to "GET" by
* lnet_msg_decommit_tx(), see details in lnet_parse_get() */
LASSERT(msg->msg_type == LNET_MSG_REPLY ||
msg->msg_type == LNET_MSG_GET);
counters->send_length += msg->msg_wanted;
break;
case LNET_EVENT_PUT:
LASSERT(msg->msg_type == LNET_MSG_PUT);
break;
case LNET_EVENT_REPLY:
/* type is "GET" if it's an optimized GET on active side,
* see details in lnet_create_reply_msg() */
LASSERT(msg->msg_type == LNET_MSG_GET ||
msg->msg_type == LNET_MSG_REPLY);
break;
}
counters->recv_count++;
if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
counters->recv_length += msg->msg_wanted;
out:
lnet_return_rx_credits_locked(msg);
msg->msg_rx_committed = 0;
}
void
lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status)
{
int cpt2 = cpt;
LASSERT(msg->msg_tx_committed || msg->msg_rx_committed);
LASSERT(msg->msg_onactivelist);
if (msg->msg_tx_committed) { /* always decommit for sending first */
LASSERT(cpt == msg->msg_tx_cpt);
lnet_msg_decommit_tx(msg, status);
}
if (msg->msg_rx_committed) {
/* forwarding msg committed for both receiving and sending */
if (cpt != msg->msg_rx_cpt) {
lnet_net_unlock(cpt);
cpt2 = msg->msg_rx_cpt;
lnet_net_lock(cpt2);
}
lnet_msg_decommit_rx(msg, status);
}
list_del(&msg->msg_activelist);
msg->msg_onactivelist = 0;
the_lnet.ln_counters[cpt2]->msgs_alloc--;
if (cpt2 != cpt) {
lnet_net_unlock(cpt2);
lnet_net_lock(cpt);
}
}
void
lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md,
unsigned int offset, unsigned int mlen)
{
/* NB: @offset and @len are only useful for receiving */
/* Here, we attach the MD on lnet_msg and mark it busy and
* decrementing its threshold. Come what may, the lnet_msg "owns"
* the MD until a call to lnet_msg_detach_md or lnet_finalize()
* signals completion. */
LASSERT(!msg->msg_routing);
msg->msg_md = md;
if (msg->msg_receiving) { /* commited for receiving */
msg->msg_offset = offset;
msg->msg_wanted = mlen;
}
md->md_refcount++;
if (md->md_threshold != LNET_MD_THRESH_INF) {
LASSERT(md->md_threshold > 0);
md->md_threshold--;
}
/* build umd in event */
lnet_md2handle(&msg->msg_ev.md_handle, md);
lnet_md_deconstruct(md, &msg->msg_ev.md);
}
void
lnet_msg_detach_md(lnet_msg_t *msg, int status)
{
lnet_libmd_t *md = msg->msg_md;
int unlink;
/* Now it's safe to drop my caller's ref */
md->md_refcount--;
LASSERT(md->md_refcount >= 0);
unlink = lnet_md_unlinkable(md);
if (md->md_eq != NULL) {
msg->msg_ev.status = status;
msg->msg_ev.unlinked = unlink;
lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev);
}
if (unlink)
lnet_md_unlink(md);
msg->msg_md = NULL;
}
static int
lnet_complete_msg_locked(lnet_msg_t *msg, int cpt)
{
lnet_handle_wire_t ack_wmd;
int rc;
int status = msg->msg_ev.status;
LASSERT (msg->msg_onactivelist);
if (status == 0 && msg->msg_ack) {
/* Only send an ACK if the PUT completed successfully */
lnet_msg_decommit(msg, cpt, 0);
msg->msg_ack = 0;
lnet_net_unlock(cpt);
LASSERT(msg->msg_ev.type == LNET_EVENT_PUT);
LASSERT(!msg->msg_routing);
ack_wmd = msg->msg_hdr.msg.put.ack_wmd;
lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0);
msg->msg_hdr.msg.ack.dst_wmd = ack_wmd;
msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits;
msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength);
/* NB: we probably want to use NID of msg::msg_from as 3rd
* parameter (router NID) if it's routed message */
rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY);
lnet_net_lock(cpt);
/*
* NB: message is committed for sending, we should return
* on success because LND will finalize this message later.
*
* Also, there is possibility that message is commited for
* sending and also failed before delivering to LND,
* i.e: ENOMEM, in that case we can't fall through either
* because CPT for sending can be different with CPT for
* receiving, so we should return back to lnet_finalize()
* to make sure we are locking the correct partition.
*/
return rc;
} else if (status == 0 && /* OK so far */
(msg->msg_routing && !msg->msg_sending)) {
/* not forwarded */
LASSERT(!msg->msg_receiving); /* called back recv already */
lnet_net_unlock(cpt);
rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY);
lnet_net_lock(cpt);
/*
* NB: message is committed for sending, we should return
* on success because LND will finalize this message later.
*
* Also, there is possibility that message is commited for
* sending and also failed before delivering to LND,
* i.e: ENOMEM, in that case we can't fall through either:
* - The rule is message must decommit for sending first if
* the it's committed for both sending and receiving
* - CPT for sending can be different with CPT for receiving,
* so we should return back to lnet_finalize() to make
* sure we are locking the correct partition.
*/
return rc;
}
lnet_msg_decommit(msg, cpt, status);
lnet_msg_free_locked(msg);
return 0;
}
void
lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status)
{
struct lnet_msg_container *container;
int my_slot;
int cpt;
int rc;
int i;
LASSERT (!in_interrupt ());
if (msg == NULL)
return;
#if 0
CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
msg->msg_target_is_router ? "t" : "",
msg->msg_routing ? "X" : "",
msg->msg_ack ? "A" : "",
msg->msg_sending ? "S" : "",
msg->msg_receiving ? "R" : "",
msg->msg_delayed ? "d" : "",
msg->msg_txcredit ? "C" : "",
msg->msg_peertxcredit ? "c" : "",
msg->msg_rtrcredit ? "F" : "",
msg->msg_peerrtrcredit ? "f" : "",
msg->msg_onactivelist ? "!" : "",
msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
#endif
msg->msg_ev.status = status;
if (msg->msg_md != NULL) {
cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie);
lnet_res_lock(cpt);
lnet_msg_detach_md(msg, status);
lnet_res_unlock(cpt);
}
again:
rc = 0;
if (!msg->msg_tx_committed && !msg->msg_rx_committed) {
/* not commited to network yet */
LASSERT(!msg->msg_onactivelist);
lnet_msg_free(msg);
return;
}
/*
* NB: routed message can be commited for both receiving and sending,
* we should finalize in LIFO order and keep counters correct.
* (finalize sending first then finalize receiving)
*/
cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt;
lnet_net_lock(cpt);
container = the_lnet.ln_msg_containers[cpt];
list_add_tail(&msg->msg_list, &container->msc_finalizing);
/* Recursion breaker. Don't complete the message here if I am (or
* enough other threads are) already completing messages */
my_slot = -1;
for (i = 0; i < container->msc_nfinalizers; i++) {
if (container->msc_finalizers[i] == current)
break;
if (my_slot < 0 && container->msc_finalizers[i] == NULL)
my_slot = i;
}
if (i < container->msc_nfinalizers || my_slot < 0) {
lnet_net_unlock(cpt);
return;
}
container->msc_finalizers[my_slot] = current;
while (!list_empty(&container->msc_finalizing)) {
msg = list_entry(container->msc_finalizing.next,
lnet_msg_t, msg_list);
list_del(&msg->msg_list);
/* NB drops and regains the lnet lock if it actually does
* anything, so my finalizing friends can chomp along too */
rc = lnet_complete_msg_locked(msg, cpt);
if (rc != 0)
break;
}
container->msc_finalizers[my_slot] = NULL;
lnet_net_unlock(cpt);
if (rc != 0)
goto again;
}
EXPORT_SYMBOL(lnet_finalize);
void
lnet_msg_container_cleanup(struct lnet_msg_container *container)
{
int count = 0;
if (container->msc_init == 0)
return;
while (!list_empty(&container->msc_active)) {
lnet_msg_t *msg = list_entry(container->msc_active.next,
lnet_msg_t, msg_activelist);
LASSERT(msg->msg_onactivelist);
msg->msg_onactivelist = 0;
list_del(&msg->msg_activelist);
lnet_msg_free(msg);
count++;
}
if (count > 0)
CERROR("%d active msg on exit\n", count);
if (container->msc_finalizers != NULL) {
LIBCFS_FREE(container->msc_finalizers,
container->msc_nfinalizers *
sizeof(*container->msc_finalizers));
container->msc_finalizers = NULL;
}
#ifdef LNET_USE_LIB_FREELIST
lnet_freelist_fini(&container->msc_freelist);
#endif
container->msc_init = 0;
}
int
lnet_msg_container_setup(struct lnet_msg_container *container, int cpt)
{
int rc;
container->msc_init = 1;
INIT_LIST_HEAD(&container->msc_active);
INIT_LIST_HEAD(&container->msc_finalizing);
#ifdef LNET_USE_LIB_FREELIST
memset(&container->msc_freelist, 0, sizeof(lnet_freelist_t));
rc = lnet_freelist_init(&container->msc_freelist,
LNET_FL_MAX_MSGS, sizeof(lnet_msg_t));
if (rc != 0) {
CERROR("Failed to init freelist for message container\n");
lnet_msg_container_cleanup(container);
return rc;
}
#else
rc = 0;
#endif
/* number of CPUs */
container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt);
LIBCFS_CPT_ALLOC(container->msc_finalizers, lnet_cpt_table(), cpt,
container->msc_nfinalizers *
sizeof(*container->msc_finalizers));
if (container->msc_finalizers == NULL) {
CERROR("Failed to allocate message finalizers\n");
lnet_msg_container_cleanup(container);
return -ENOMEM;
}
return rc;
}
void
lnet_msg_containers_destroy(void)
{
struct lnet_msg_container *container;
int i;
if (the_lnet.ln_msg_containers == NULL)
return;
cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers)
lnet_msg_container_cleanup(container);
cfs_percpt_free(the_lnet.ln_msg_containers);
the_lnet.ln_msg_containers = NULL;
}
int
lnet_msg_containers_create(void)
{
struct lnet_msg_container *container;
int rc;
int i;
the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(),
sizeof(*container));
if (the_lnet.ln_msg_containers == NULL) {
CERROR("Failed to allocate cpu-partition data for network\n");
return -ENOMEM;
}
cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) {
rc = lnet_msg_container_setup(container, i);
if (rc != 0) {
lnet_msg_containers_destroy();
return rc;
}
}
return 0;
}

View file

@ -0,0 +1,938 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA
*
* GPL HEADER END
*/
/*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/lib-ptl.c
*
* portal & match routines
*
* Author: liang@whamcloud.com
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
/* NB: add /proc interfaces in upcoming patches */
int portal_rotor = LNET_PTL_ROTOR_HASH_RT;
CFS_MODULE_PARM(portal_rotor, "i", int, 0644,
"redirect PUTs to different cpu-partitions");
static int
lnet_ptl_match_type(unsigned int index, lnet_process_id_t match_id,
__u64 mbits, __u64 ignore_bits)
{
struct lnet_portal *ptl = the_lnet.ln_portals[index];
int unique;
unique = ignore_bits == 0 &&
match_id.nid != LNET_NID_ANY &&
match_id.pid != LNET_PID_ANY;
LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl));
/* prefer to check w/o any lock */
if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl)))
goto match;
/* unset, new portal */
lnet_ptl_lock(ptl);
/* check again with lock */
if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) {
lnet_ptl_unlock(ptl);
goto match;
}
/* still not set */
if (unique)
lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE);
else
lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD);
lnet_ptl_unlock(ptl);
return 1;
match:
if ((lnet_ptl_is_unique(ptl) && !unique) ||
(lnet_ptl_is_wildcard(ptl) && unique))
return 0;
return 1;
}
static void
lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt)
{
struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
int i;
/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
LASSERT(lnet_ptl_is_wildcard(ptl));
mtable->mt_enabled = 1;
ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt;
for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) {
LASSERT(ptl->ptl_mt_maps[i] != cpt);
if (ptl->ptl_mt_maps[i] < cpt)
break;
/* swap to order */
ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i];
ptl->ptl_mt_maps[i] = cpt;
}
ptl->ptl_mt_nmaps++;
}
static void
lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt)
{
struct lnet_match_table *mtable = ptl->ptl_mtables[cpt];
int i;
/* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */
LASSERT(lnet_ptl_is_wildcard(ptl));
if (LNET_CPT_NUMBER == 1)
return; /* never disable the only match-table */
mtable->mt_enabled = 0;
LASSERT(ptl->ptl_mt_nmaps > 0 &&
ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER);
/* remove it from mt_maps */
ptl->ptl_mt_nmaps--;
for (i = 0; i < ptl->ptl_mt_nmaps; i++) {
if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */
ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1];
}
}
static int
lnet_try_match_md(lnet_libmd_t *md,
struct lnet_match_info *info, struct lnet_msg *msg)
{
/* ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock;
* lnet_match_blocked_msg() relies on this to avoid races */
unsigned int offset;
unsigned int mlength;
lnet_me_t *me = md->md_me;
/* MD exhausted */
if (lnet_md_exhausted(md))
return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED;
/* mismatched MD op */
if ((md->md_options & info->mi_opc) == 0)
return LNET_MATCHMD_NONE;
/* mismatched ME nid/pid? */
if (me->me_match_id.nid != LNET_NID_ANY &&
me->me_match_id.nid != info->mi_id.nid)
return LNET_MATCHMD_NONE;
if (me->me_match_id.pid != LNET_PID_ANY &&
me->me_match_id.pid != info->mi_id.pid)
return LNET_MATCHMD_NONE;
/* mismatched ME matchbits? */
if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
return LNET_MATCHMD_NONE;
/* Hurrah! This _is_ a match; check it out... */
if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
offset = md->md_offset;
else
offset = info->mi_roffset;
if ((md->md_options & LNET_MD_MAX_SIZE) != 0) {
mlength = md->md_max_size;
LASSERT(md->md_offset + mlength <= md->md_length);
} else {
mlength = md->md_length - offset;
}
if (info->mi_rlength <= mlength) { /* fits in allowed space */
mlength = info->mi_rlength;
} else if ((md->md_options & LNET_MD_TRUNCATE) == 0) {
/* this packet _really_ is too big */
CERROR("Matching packet from %s, match "LPU64
" length %d too big: %d left, %d allowed\n",
libcfs_id2str(info->mi_id), info->mi_mbits,
info->mi_rlength, md->md_length - offset, mlength);
return LNET_MATCHMD_DROP;
}
/* Commit to this ME/MD */
CDEBUG(D_NET, "Incoming %s index %x from %s of "
"length %d/%d into md "LPX64" [%d] + %d\n",
(info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get",
info->mi_portal, libcfs_id2str(info->mi_id), mlength,
info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset);
lnet_msg_attach_md(msg, md, offset, mlength);
md->md_offset = offset + mlength;
if (!lnet_md_exhausted(md))
return LNET_MATCHMD_OK;
/* Auto-unlink NOW, so the ME gets unlinked if required.
* We bumped md->md_refcount above so the MD just gets flagged
* for unlink when it is finalized. */
if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0)
lnet_md_unlink(md);
return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED;
}
static struct lnet_match_table *
lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits)
{
if (LNET_CPT_NUMBER == 1)
return ptl->ptl_mtables[0]; /* the only one */
/* if it's a unique portal, return match-table hashed by NID */
return lnet_ptl_is_unique(ptl) ?
ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
}
struct lnet_match_table *
lnet_mt_of_attach(unsigned int index, lnet_process_id_t id,
__u64 mbits, __u64 ignore_bits, lnet_ins_pos_t pos)
{
struct lnet_portal *ptl;
struct lnet_match_table *mtable;
/* NB: called w/o lock */
LASSERT(index < the_lnet.ln_nportals);
if (!lnet_ptl_match_type(index, id, mbits, ignore_bits))
return NULL;
ptl = the_lnet.ln_portals[index];
mtable = lnet_match2mt(ptl, id, mbits);
if (mtable != NULL) /* unique portal or only one match-table */
return mtable;
/* it's a wildcard portal */
switch (pos) {
default:
return NULL;
case LNET_INS_BEFORE:
case LNET_INS_AFTER:
/* posted by no affinity thread, always hash to specific
* match-table to avoid buffer stealing which is heavy */
return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER];
case LNET_INS_LOCAL:
/* posted by cpu-affinity thread */
return ptl->ptl_mtables[lnet_cpt_current()];
}
}
static struct lnet_match_table *
lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
{
struct lnet_match_table *mtable;
struct lnet_portal *ptl;
int nmaps;
int rotor;
int routed;
int cpt;
/* NB: called w/o lock */
LASSERT(info->mi_portal < the_lnet.ln_nportals);
ptl = the_lnet.ln_portals[info->mi_portal];
LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl));
mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits);
if (mtable != NULL)
return mtable;
/* it's a wildcard portal */
routed = LNET_NIDNET(msg->msg_hdr.src_nid) !=
LNET_NIDNET(msg->msg_hdr.dest_nid);
if (portal_rotor == LNET_PTL_ROTOR_OFF ||
(portal_rotor != LNET_PTL_ROTOR_ON && !routed)) {
cpt = lnet_cpt_current();
if (ptl->ptl_mtables[cpt]->mt_enabled)
return ptl->ptl_mtables[cpt];
}
rotor = ptl->ptl_rotor++; /* get round-robin factor */
if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
else
cpt = rotor % LNET_CPT_NUMBER;
if (!ptl->ptl_mtables[cpt]->mt_enabled) {
/* is there any active entry for this portal? */
nmaps = ptl->ptl_mt_nmaps;
/* map to an active mtable to avoid heavy "stealing" */
if (nmaps != 0) {
/* NB: there is possibility that ptl_mt_maps is being
* changed because we are not under protection of
* lnet_ptl_lock, but it shouldn't hurt anything */
cpt = ptl->ptl_mt_maps[rotor % nmaps];
}
}
return ptl->ptl_mtables[cpt];
}
static int
lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos)
{
__u64 *bmap;
int i;
if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
return 0;
if (pos < 0) { /* check all bits */
for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) {
if (mtable->mt_exhausted[i] != (__u64)(-1))
return 0;
}
return 1;
}
LASSERT(pos <= LNET_MT_HASH_IGNORE);
/* mtable::mt_mhash[pos] is marked as exhausted or not */
bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
pos &= (1 << LNET_MT_BITS_U64) - 1;
return ((*bmap) & (1ULL << pos)) != 0;
}
static void
lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted)
{
__u64 *bmap;
LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]));
LASSERT(pos <= LNET_MT_HASH_IGNORE);
/* set mtable::mt_mhash[pos] as exhausted/non-exhausted */
bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64];
pos &= (1 << LNET_MT_BITS_U64) - 1;
if (!exhausted)
*bmap &= ~(1ULL << pos);
else
*bmap |= 1ULL << pos;
}
struct list_head *
lnet_mt_match_head(struct lnet_match_table *mtable,
lnet_process_id_t id, __u64 mbits)
{
struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal];
if (lnet_ptl_is_wildcard(ptl)) {
return &mtable->mt_mhash[mbits & LNET_MT_HASH_MASK];
} else {
unsigned long hash = mbits + id.nid + id.pid;
LASSERT(lnet_ptl_is_unique(ptl));
hash = cfs_hash_long(hash, LNET_MT_HASH_BITS);
return &mtable->mt_mhash[hash];
}
}
int
lnet_mt_match_md(struct lnet_match_table *mtable,
struct lnet_match_info *info, struct lnet_msg *msg)
{
struct list_head *head;
lnet_me_t *me;
lnet_me_t *tmp;
int exhausted = 0;
int rc;
/* any ME with ignore bits? */
if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE]))
head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE];
else
head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
again:
/* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */
if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal]))
exhausted = LNET_MATCHMD_EXHAUSTED;
list_for_each_entry_safe(me, tmp, head, me_list) {
/* ME attached but MD not attached yet */
if (me->me_md == NULL)
continue;
LASSERT(me == me->me_md->md_me);
rc = lnet_try_match_md(me->me_md, info, msg);
if ((rc & LNET_MATCHMD_EXHAUSTED) == 0)
exhausted = 0; /* mlist is not empty */
if ((rc & LNET_MATCHMD_FINISH) != 0) {
/* don't return EXHAUSTED bit because we don't know
* whether the mlist is empty or not */
return rc & ~LNET_MATCHMD_EXHAUSTED;
}
}
if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */
lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1);
if (!lnet_mt_test_exhausted(mtable, -1))
exhausted = 0;
}
if (exhausted == 0 && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) {
head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits);
goto again; /* re-check MEs w/o ignore-bits */
}
if (info->mi_opc == LNET_MD_OP_GET ||
!lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal]))
return LNET_MATCHMD_DROP | exhausted;
return LNET_MATCHMD_NONE | exhausted;
}
static int
lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg)
{
int rc;
/* message arrived before any buffer posting on this portal,
* simply delay or drop this message */
if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)))
return 0;
lnet_ptl_lock(ptl);
/* check it again with hold of lock */
if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) {
lnet_ptl_unlock(ptl);
return 0;
}
if (lnet_ptl_is_lazy(ptl)) {
if (msg->msg_rx_ready_delay) {
msg->msg_rx_delayed = 1;
list_add_tail(&msg->msg_list,
&ptl->ptl_msg_delayed);
}
rc = LNET_MATCHMD_NONE;
} else {
rc = LNET_MATCHMD_DROP;
}
lnet_ptl_unlock(ptl);
return rc;
}
static int
lnet_ptl_match_delay(struct lnet_portal *ptl,
struct lnet_match_info *info, struct lnet_msg *msg)
{
int first = ptl->ptl_mt_maps[0]; /* read w/o lock */
int rc = 0;
int i;
/* steal buffer from other CPTs, and delay it if nothing to steal,
* this function is more expensive than a regular match, but we
* don't expect it can happen a lot */
LASSERT(lnet_ptl_is_wildcard(ptl));
for (i = 0; i < LNET_CPT_NUMBER; i++) {
struct lnet_match_table *mtable;
int cpt;
cpt = (first + i) % LNET_CPT_NUMBER;
mtable = ptl->ptl_mtables[cpt];
if (i != 0 && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled)
continue;
lnet_res_lock(cpt);
lnet_ptl_lock(ptl);
if (i == 0) { /* the first try, attach on stealing list */
list_add_tail(&msg->msg_list,
&ptl->ptl_msg_stealing);
}
if (!list_empty(&msg->msg_list)) { /* on stealing list */
rc = lnet_mt_match_md(mtable, info, msg);
if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 &&
mtable->mt_enabled)
lnet_ptl_disable_mt(ptl, cpt);
if ((rc & LNET_MATCHMD_FINISH) != 0)
list_del_init(&msg->msg_list);
} else {
/* could be matched by lnet_ptl_attach_md()
* which is called by another thread */
rc = msg->msg_md == NULL ?
LNET_MATCHMD_DROP : LNET_MATCHMD_OK;
}
if (!list_empty(&msg->msg_list) && /* not matched yet */
(i == LNET_CPT_NUMBER - 1 || /* the last CPT */
ptl->ptl_mt_nmaps == 0 || /* no active CPT */
(ptl->ptl_mt_nmaps == 1 && /* the only active CPT */
ptl->ptl_mt_maps[0] == cpt))) {
/* nothing to steal, delay or drop */
list_del_init(&msg->msg_list);
if (lnet_ptl_is_lazy(ptl)) {
msg->msg_rx_delayed = 1;
list_add_tail(&msg->msg_list,
&ptl->ptl_msg_delayed);
rc = LNET_MATCHMD_NONE;
} else {
rc = LNET_MATCHMD_DROP;
}
}
lnet_ptl_unlock(ptl);
lnet_res_unlock(cpt);
if ((rc & LNET_MATCHMD_FINISH) != 0 || msg->msg_rx_delayed)
break;
}
return rc;
}
int
lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg)
{
struct lnet_match_table *mtable;
struct lnet_portal *ptl;
int rc;
CDEBUG(D_NET, "Request from %s of length %d into portal %d "
"MB="LPX64"\n", libcfs_id2str(info->mi_id),
info->mi_rlength, info->mi_portal, info->mi_mbits);
if (info->mi_portal >= the_lnet.ln_nportals) {
CERROR("Invalid portal %d not in [0-%d]\n",
info->mi_portal, the_lnet.ln_nportals);
return LNET_MATCHMD_DROP;
}
ptl = the_lnet.ln_portals[info->mi_portal];
rc = lnet_ptl_match_early(ptl, msg);
if (rc != 0) /* matched or delayed early message */
return rc;
mtable = lnet_mt_of_match(info, msg);
lnet_res_lock(mtable->mt_cpt);
if (the_lnet.ln_shutdown) {
rc = LNET_MATCHMD_DROP;
goto out1;
}
rc = lnet_mt_match_md(mtable, info, msg);
if ((rc & LNET_MATCHMD_EXHAUSTED) != 0 && mtable->mt_enabled) {
lnet_ptl_lock(ptl);
lnet_ptl_disable_mt(ptl, mtable->mt_cpt);
lnet_ptl_unlock(ptl);
}
if ((rc & LNET_MATCHMD_FINISH) != 0) /* matched or dropping */
goto out1;
if (!msg->msg_rx_ready_delay)
goto out1;
LASSERT(lnet_ptl_is_lazy(ptl));
LASSERT(!msg->msg_rx_delayed);
/* NB: we don't expect "delay" can happen a lot */
if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) {
lnet_ptl_lock(ptl);
msg->msg_rx_delayed = 1;
list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed);
lnet_ptl_unlock(ptl);
lnet_res_unlock(mtable->mt_cpt);
} else {
lnet_res_unlock(mtable->mt_cpt);
rc = lnet_ptl_match_delay(ptl, info, msg);
}
if (msg->msg_rx_delayed) {
CDEBUG(D_NET,
"Delaying %s from %s ptl %d MB "LPX64" off %d len %d\n",
info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET",
libcfs_id2str(info->mi_id), info->mi_portal,
info->mi_mbits, info->mi_roffset, info->mi_rlength);
}
goto out0;
out1:
lnet_res_unlock(mtable->mt_cpt);
out0:
/* EXHAUSTED bit is only meaningful for internal functions */
return rc & ~LNET_MATCHMD_EXHAUSTED;
}
void
lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md)
{
LASSERT(me->me_md == md && md->md_me == me);
me->me_md = NULL;
md->md_me = NULL;
}
/* called with lnet_res_lock held */
void
lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md,
struct list_head *matches, struct list_head *drops)
{
struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal];
struct lnet_match_table *mtable;
struct list_head *head;
lnet_msg_t *tmp;
lnet_msg_t *msg;
int exhausted = 0;
int cpt;
LASSERT(md->md_refcount == 0); /* a brand new MD */
me->me_md = md;
md->md_me = me;
cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie);
mtable = ptl->ptl_mtables[cpt];
if (list_empty(&ptl->ptl_msg_stealing) &&
list_empty(&ptl->ptl_msg_delayed) &&
!lnet_mt_test_exhausted(mtable, me->me_pos))
return;
lnet_ptl_lock(ptl);
head = &ptl->ptl_msg_stealing;
again:
list_for_each_entry_safe(msg, tmp, head, msg_list) {
struct lnet_match_info info;
lnet_hdr_t *hdr;
int rc;
LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing);
hdr = &msg->msg_hdr;
info.mi_id.nid = hdr->src_nid;
info.mi_id.pid = hdr->src_pid;
info.mi_opc = LNET_MD_OP_PUT;
info.mi_portal = hdr->msg.put.ptl_index;
info.mi_rlength = hdr->payload_length;
info.mi_roffset = hdr->msg.put.offset;
info.mi_mbits = hdr->msg.put.match_bits;
rc = lnet_try_match_md(md, &info, msg);
exhausted = (rc & LNET_MATCHMD_EXHAUSTED) != 0;
if ((rc & LNET_MATCHMD_NONE) != 0) {
if (exhausted)
break;
continue;
}
/* Hurrah! This _is_ a match */
LASSERT((rc & LNET_MATCHMD_FINISH) != 0);
list_del_init(&msg->msg_list);
if (head == &ptl->ptl_msg_stealing) {
if (exhausted)
break;
/* stealing thread will handle the message */
continue;
}
if ((rc & LNET_MATCHMD_OK) != 0) {
list_add_tail(&msg->msg_list, matches);
CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
"match "LPU64" offset %d length %d.\n",
libcfs_id2str(info.mi_id),
info.mi_portal, info.mi_mbits,
info.mi_roffset, info.mi_rlength);
} else {
list_add_tail(&msg->msg_list, drops);
}
if (exhausted)
break;
}
if (!exhausted && head == &ptl->ptl_msg_stealing) {
head = &ptl->ptl_msg_delayed;
goto again;
}
if (lnet_ptl_is_wildcard(ptl) && !exhausted) {
lnet_mt_set_exhausted(mtable, me->me_pos, 0);
if (!mtable->mt_enabled)
lnet_ptl_enable_mt(ptl, cpt);
}
lnet_ptl_unlock(ptl);
}
void
lnet_ptl_cleanup(struct lnet_portal *ptl)
{
struct lnet_match_table *mtable;
int i;
if (ptl->ptl_mtables == NULL) /* uninitialized portal */
return;
LASSERT(list_empty(&ptl->ptl_msg_delayed));
LASSERT(list_empty(&ptl->ptl_msg_stealing));
cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
struct list_head *mhash;
lnet_me_t *me;
int j;
if (mtable->mt_mhash == NULL) /* uninitialized match-table */
continue;
mhash = mtable->mt_mhash;
/* cleanup ME */
for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) {
while (!list_empty(&mhash[j])) {
me = list_entry(mhash[j].next,
lnet_me_t, me_list);
CERROR("Active ME %p on exit\n", me);
list_del(&me->me_list);
lnet_me_free(me);
}
}
/* the extra entry is for MEs with ignore bits */
LIBCFS_FREE(mhash, sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1));
}
cfs_percpt_free(ptl->ptl_mtables);
ptl->ptl_mtables = NULL;
}
int
lnet_ptl_setup(struct lnet_portal *ptl, int index)
{
struct lnet_match_table *mtable;
struct list_head *mhash;
int i;
int j;
ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(),
sizeof(struct lnet_match_table));
if (ptl->ptl_mtables == NULL) {
CERROR("Failed to create match table for portal %d\n", index);
return -ENOMEM;
}
ptl->ptl_index = index;
INIT_LIST_HEAD(&ptl->ptl_msg_delayed);
INIT_LIST_HEAD(&ptl->ptl_msg_stealing);
spin_lock_init(&ptl->ptl_lock);
cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) {
/* the extra entry is for MEs with ignore bits */
LIBCFS_CPT_ALLOC(mhash, lnet_cpt_table(), i,
sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1));
if (mhash == NULL) {
CERROR("Failed to create match hash for portal %d\n",
index);
goto failed;
}
memset(&mtable->mt_exhausted[0], -1,
sizeof(mtable->mt_exhausted[0]) *
LNET_MT_EXHAUSTED_BMAP);
mtable->mt_mhash = mhash;
for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++)
INIT_LIST_HEAD(&mhash[j]);
mtable->mt_portal = index;
mtable->mt_cpt = i;
}
return 0;
failed:
lnet_ptl_cleanup(ptl);
return -ENOMEM;
}
void
lnet_portals_destroy(void)
{
int i;
if (the_lnet.ln_portals == NULL)
return;
for (i = 0; i < the_lnet.ln_nportals; i++)
lnet_ptl_cleanup(the_lnet.ln_portals[i]);
cfs_array_free(the_lnet.ln_portals);
the_lnet.ln_portals = NULL;
}
int
lnet_portals_create(void)
{
int size;
int i;
size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]);
the_lnet.ln_nportals = MAX_PORTALS;
the_lnet.ln_portals = cfs_array_alloc(the_lnet.ln_nportals, size);
if (the_lnet.ln_portals == NULL) {
CERROR("Failed to allocate portals table\n");
return -ENOMEM;
}
for (i = 0; i < the_lnet.ln_nportals; i++) {
if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) {
lnet_portals_destroy();
return -ENOMEM;
}
}
return 0;
}
/**
* Turn on the lazy portal attribute. Use with caution!
*
* This portal attribute only affects incoming PUT requests to the portal,
* and is off by default. By default, if there's no matching MD for an
* incoming PUT request, it is simply dropped. With the lazy attribute on,
* such requests are queued indefinitely until either a matching MD is
* posted to the portal or the lazy attribute is turned off.
*
* It would prevent dropped requests, however it should be regarded as the
* last line of defense - i.e. users must keep a close watch on active
* buffers on a lazy portal and once it becomes too low post more buffers as
* soon as possible. This is because delayed requests usually have detrimental
* effects on underlying network connections. A few delayed requests often
* suffice to bring an underlying connection to a complete halt, due to flow
* control mechanisms.
*
* There's also a DOS attack risk. If users don't post match-all MDs on a
* lazy portal, a malicious peer can easily stop a service by sending some
* PUT requests with match bits that won't match any MD. A routed server is
* especially vulnerable since the connections to its neighbor routers are
* shared among all clients.
*
* \param portal Index of the portal to enable the lazy attribute on.
*
* \retval 0 On success.
* \retval -EINVAL If \a portal is not a valid index.
*/
int
LNetSetLazyPortal(int portal)
{
struct lnet_portal *ptl;
if (portal < 0 || portal >= the_lnet.ln_nportals)
return -EINVAL;
CDEBUG(D_NET, "Setting portal %d lazy\n", portal);
ptl = the_lnet.ln_portals[portal];
lnet_res_lock(LNET_LOCK_EX);
lnet_ptl_lock(ptl);
lnet_ptl_setopt(ptl, LNET_PTL_LAZY);
lnet_ptl_unlock(ptl);
lnet_res_unlock(LNET_LOCK_EX);
return 0;
}
EXPORT_SYMBOL(LNetSetLazyPortal);
/**
* Turn off the lazy portal attribute. Delayed requests on the portal,
* if any, will be all dropped when this function returns.
*
* \param portal Index of the portal to disable the lazy attribute on.
*
* \retval 0 On success.
* \retval -EINVAL If \a portal is not a valid index.
*/
int
LNetClearLazyPortal(int portal)
{
struct lnet_portal *ptl;
LIST_HEAD (zombies);
if (portal < 0 || portal >= the_lnet.ln_nportals)
return -EINVAL;
ptl = the_lnet.ln_portals[portal];
lnet_res_lock(LNET_LOCK_EX);
lnet_ptl_lock(ptl);
if (!lnet_ptl_is_lazy(ptl)) {
lnet_ptl_unlock(ptl);
lnet_res_unlock(LNET_LOCK_EX);
return 0;
}
if (the_lnet.ln_shutdown)
CWARN("Active lazy portal %d on exit\n", portal);
else
CDEBUG(D_NET, "clearing portal %d lazy\n", portal);
/* grab all the blocked messages atomically */
list_splice_init(&ptl->ptl_msg_delayed, &zombies);
lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY);
lnet_ptl_unlock(ptl);
lnet_res_unlock(LNET_LOCK_EX);
lnet_drop_delayed_msg_list(&zombies, "Clearing lazy portal attr");
return 0;
}
EXPORT_SYMBOL(LNetClearLazyPortal);

View file

@ -0,0 +1,120 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
int
lolnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
{
LASSERT (!lntmsg->msg_routing);
LASSERT (!lntmsg->msg_target_is_router);
return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0);
}
int
lolnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
int delayed, unsigned int niov,
struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen)
{
lnet_msg_t *sendmsg = private;
if (lntmsg != NULL) { /* not discarding */
if (sendmsg->msg_iov != NULL) {
if (iov != NULL)
lnet_copy_iov2iov(niov, iov, offset,
sendmsg->msg_niov,
sendmsg->msg_iov,
sendmsg->msg_offset, mlen);
else
lnet_copy_iov2kiov(niov, kiov, offset,
sendmsg->msg_niov,
sendmsg->msg_iov,
sendmsg->msg_offset, mlen);
} else {
if (iov != NULL)
lnet_copy_kiov2iov(niov, iov, offset,
sendmsg->msg_niov,
sendmsg->msg_kiov,
sendmsg->msg_offset, mlen);
else
lnet_copy_kiov2kiov(niov, kiov, offset,
sendmsg->msg_niov,
sendmsg->msg_kiov,
sendmsg->msg_offset, mlen);
}
lnet_finalize(ni, lntmsg, 0);
}
lnet_finalize(ni, sendmsg, 0);
return 0;
}
static int lolnd_instanced;
void
lolnd_shutdown(lnet_ni_t *ni)
{
CDEBUG (D_NET, "shutdown\n");
LASSERT (lolnd_instanced);
lolnd_instanced = 0;
}
int
lolnd_startup (lnet_ni_t *ni)
{
LASSERT (ni->ni_lnd == &the_lolnd);
LASSERT (!lolnd_instanced);
lolnd_instanced = 1;
return (0);
}
lnd_t the_lolnd = {
/* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
/* .lnd_refcount = */ 0,
/* .lnd_type = */ LOLND,
/* .lnd_startup = */ lolnd_startup,
/* .lnd_shutdown = */ lolnd_shutdown,
/* .lnt_ctl = */ NULL,
/* .lnd_send = */ lolnd_send,
/* .lnd_recv = */ lolnd_recv,
/* .lnd_eager_recv = */ NULL,
/* .lnd_notify = */ NULL,
/* .lnd_accept = */ NULL
};

View file

@ -0,0 +1,154 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
static int config_on_load = 0;
CFS_MODULE_PARM(config_on_load, "i", int, 0444,
"configure network at module load");
static struct mutex lnet_config_mutex;
int
lnet_configure (void *arg)
{
/* 'arg' only there so I can be passed to cfs_create_thread() */
int rc = 0;
LNET_MUTEX_LOCK(&lnet_config_mutex);
if (!the_lnet.ln_niinit_self) {
rc = LNetNIInit(LUSTRE_SRV_LNET_PID);
if (rc >= 0) {
the_lnet.ln_niinit_self = 1;
rc = 0;
}
}
LNET_MUTEX_UNLOCK(&lnet_config_mutex);
return rc;
}
int
lnet_unconfigure (void)
{
int refcount;
LNET_MUTEX_LOCK(&lnet_config_mutex);
if (the_lnet.ln_niinit_self) {
the_lnet.ln_niinit_self = 0;
LNetNIFini();
}
LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
refcount = the_lnet.ln_refcount;
LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
LNET_MUTEX_UNLOCK(&lnet_config_mutex);
return (refcount == 0) ? 0 : -EBUSY;
}
int
lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data)
{
int rc;
switch (cmd) {
case IOC_LIBCFS_CONFIGURE:
return lnet_configure(NULL);
case IOC_LIBCFS_UNCONFIGURE:
return lnet_unconfigure();
default:
/* Passing LNET_PID_ANY only gives me a ref if the net is up
* already; I'll need it to ensure the net can't go down while
* I'm called into it */
rc = LNetNIInit(LNET_PID_ANY);
if (rc >= 0) {
rc = LNetCtl(cmd, data);
LNetNIFini();
}
return rc;
}
}
DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl);
int
init_lnet(void)
{
int rc;
ENTRY;
mutex_init(&lnet_config_mutex);
rc = LNetInit();
if (rc != 0) {
CERROR("LNetInit: error %d\n", rc);
RETURN(rc);
}
rc = libcfs_register_ioctl(&lnet_ioctl_handler);
LASSERT (rc == 0);
if (config_on_load) {
/* Have to schedule a separate thread to avoid deadlocking
* in modload */
(void) kthread_run(lnet_configure, NULL, "lnet_initd");
}
RETURN(0);
}
void
fini_lnet(void)
{
int rc;
rc = libcfs_deregister_ioctl(&lnet_ioctl_handler);
LASSERT (rc == 0);
LNetFini();
}
MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
MODULE_DESCRIPTION("Portals v3.1");
MODULE_LICENSE("GPL");
cfs_module(lnet, "1.0.0", init_lnet, fini_lnet);

View file

@ -0,0 +1,337 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/peer.c
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/lnet/lib-lnet.h>
int
lnet_peer_tables_create(void)
{
struct lnet_peer_table *ptable;
struct list_head *hash;
int i;
int j;
the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
sizeof(*ptable));
if (the_lnet.ln_peer_tables == NULL) {
CERROR("Failed to allocate cpu-partition peer tables\n");
return -ENOMEM;
}
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
INIT_LIST_HEAD(&ptable->pt_deathrow);
LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
LNET_PEER_HASH_SIZE * sizeof(*hash));
if (hash == NULL) {
CERROR("Failed to create peer hash table\n");
lnet_peer_tables_destroy();
return -ENOMEM;
}
for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
INIT_LIST_HEAD(&hash[j]);
ptable->pt_hash = hash; /* sign of initialization */
}
return 0;
}
void
lnet_peer_tables_destroy(void)
{
struct lnet_peer_table *ptable;
struct list_head *hash;
int i;
int j;
if (the_lnet.ln_peer_tables == NULL)
return;
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
hash = ptable->pt_hash;
if (hash == NULL) /* not intialized */
break;
LASSERT(list_empty(&ptable->pt_deathrow));
ptable->pt_hash = NULL;
for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
LASSERT(list_empty(&hash[j]));
LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
}
cfs_percpt_free(the_lnet.ln_peer_tables);
the_lnet.ln_peer_tables = NULL;
}
void
lnet_peer_tables_cleanup(void)
{
struct lnet_peer_table *ptable;
int i;
int j;
LASSERT(the_lnet.ln_shutdown); /* i.e. no new peers */
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
lnet_net_lock(i);
for (j = 0; j < LNET_PEER_HASH_SIZE; j++) {
struct list_head *peers = &ptable->pt_hash[j];
while (!list_empty(peers)) {
lnet_peer_t *lp = list_entry(peers->next,
lnet_peer_t,
lp_hashlist);
list_del_init(&lp->lp_hashlist);
/* lose hash table's ref */
lnet_peer_decref_locked(lp);
}
}
lnet_net_unlock(i);
}
cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
LIST_HEAD (deathrow);
lnet_peer_t *lp;
lnet_net_lock(i);
for (j = 3; ptable->pt_number != 0; j++) {
lnet_net_unlock(i);
if ((j & (j - 1)) == 0) {
CDEBUG(D_WARNING,
"Waiting for %d peers on peer table\n",
ptable->pt_number);
}
cfs_pause(cfs_time_seconds(1) / 2);
lnet_net_lock(i);
}
list_splice_init(&ptable->pt_deathrow, &deathrow);
lnet_net_unlock(i);
while (!list_empty(&deathrow)) {
lp = list_entry(deathrow.next,
lnet_peer_t, lp_hashlist);
list_del(&lp->lp_hashlist);
LIBCFS_FREE(lp, sizeof(*lp));
}
}
}
void
lnet_destroy_peer_locked(lnet_peer_t *lp)
{
struct lnet_peer_table *ptable;
LASSERT(lp->lp_refcount == 0);
LASSERT(lp->lp_rtr_refcount == 0);
LASSERT(list_empty(&lp->lp_txq));
LASSERT(list_empty(&lp->lp_hashlist));
LASSERT(lp->lp_txqnob == 0);
ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
LASSERT(ptable->pt_number > 0);
ptable->pt_number--;
lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
lp->lp_ni = NULL;
list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
}
lnet_peer_t *
lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
{
struct list_head *peers;
lnet_peer_t *lp;
LASSERT(!the_lnet.ln_shutdown);
peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
list_for_each_entry(lp, peers, lp_hashlist) {
if (lp->lp_nid == nid) {
lnet_peer_addref_locked(lp);
return lp;
}
}
return NULL;
}
int
lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
{
struct lnet_peer_table *ptable;
lnet_peer_t *lp = NULL;
lnet_peer_t *lp2;
int cpt2;
int rc = 0;
*lpp = NULL;
if (the_lnet.ln_shutdown) /* it's shutting down */
return -ESHUTDOWN;
/* cpt can be LNET_LOCK_EX if it's called from router functions */
cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
ptable = the_lnet.ln_peer_tables[cpt2];
lp = lnet_find_peer_locked(ptable, nid);
if (lp != NULL) {
*lpp = lp;
return 0;
}
if (!list_empty(&ptable->pt_deathrow)) {
lp = list_entry(ptable->pt_deathrow.next,
lnet_peer_t, lp_hashlist);
list_del(&lp->lp_hashlist);
}
/*
* take extra refcount in case another thread has shutdown LNet
* and destroyed locks and peer-table before I finish the allocation
*/
ptable->pt_number++;
lnet_net_unlock(cpt);
if (lp != NULL)
memset(lp, 0, sizeof(*lp));
else
LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
if (lp == NULL) {
rc = -ENOMEM;
lnet_net_lock(cpt);
goto out;
}
INIT_LIST_HEAD(&lp->lp_txq);
INIT_LIST_HEAD(&lp->lp_rtrq);
INIT_LIST_HEAD(&lp->lp_routes);
lp->lp_notify = 0;
lp->lp_notifylnd = 0;
lp->lp_notifying = 0;
lp->lp_alive_count = 0;
lp->lp_timestamp = 0;
lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
lp->lp_last_alive = cfs_time_current(); /* assumes alive */
lp->lp_last_query = 0; /* haven't asked NI yet */
lp->lp_ping_timestamp = 0;
lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
lp->lp_nid = nid;
lp->lp_cpt = cpt2;
lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
lp->lp_rtr_refcount = 0;
lnet_net_lock(cpt);
if (the_lnet.ln_shutdown) {
rc = -ESHUTDOWN;
goto out;
}
lp2 = lnet_find_peer_locked(ptable, nid);
if (lp2 != NULL) {
*lpp = lp2;
goto out;
}
lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
if (lp->lp_ni == NULL) {
rc = -EHOSTUNREACH;
goto out;
}
lp->lp_txcredits =
lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
lp->lp_rtrcredits =
lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
list_add_tail(&lp->lp_hashlist,
&ptable->pt_hash[lnet_nid2peerhash(nid)]);
ptable->pt_version++;
*lpp = lp;
return 0;
out:
if (lp != NULL)
list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
ptable->pt_number--;
return rc;
}
void
lnet_debug_peer(lnet_nid_t nid)
{
char *aliveness = "NA";
lnet_peer_t *lp;
int rc;
int cpt;
cpt = lnet_cpt_of_nid(nid);
lnet_net_lock(cpt);
rc = lnet_nid2peer_locked(&lp, nid, cpt);
if (rc != 0) {
lnet_net_unlock(cpt);
CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
return;
}
if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
aliveness = lp->lp_alive ? "up" : "down";
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
aliveness, lp->lp_ni->ni_peertxcredits,
lp->lp_rtrcredits, lp->lp_minrtrcredits,
lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
lnet_peer_decref_locked(lp);
lnet_net_unlock(cpt);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,950 @@
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*
* This file is part of Portals
* http://sourceforge.net/projects/sandiaportals/
*
* Portals is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* Portals is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Portals; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lib-lnet.h>
#if defined(LNET_ROUTER)
/* This is really lnet_proc.c. You might need to update sanity test 215
* if any file format is changed. */
static ctl_table_header_t *lnet_table_header = NULL;
#define CTL_LNET (0x100)
enum {
PSDEV_LNET_STATS = 100,
PSDEV_LNET_ROUTES,
PSDEV_LNET_ROUTERS,
PSDEV_LNET_PEERS,
PSDEV_LNET_BUFFERS,
PSDEV_LNET_NIS,
PSDEV_LNET_PTL_ROTOR,
};
#define LNET_LOFFT_BITS (sizeof(loff_t) * 8)
/*
* NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system
*/
#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1)
/* change version, 16 bits or 8 bits */
#define LNET_PROC_VER_BITS MAX(((MIN(LNET_LOFFT_BITS, 64)) / 4), 8)
#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS
/*
* bits for peer hash offset
* NB: we don't use the highest bit of *ppos because it's signed
*/
#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \
LNET_PROC_CPT_BITS - \
LNET_PROC_VER_BITS - \
LNET_PROC_HASH_BITS - 1)
/* bits for hash index + position */
#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS)
/* bits for peer hash table + hash version */
#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS)
#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1)
#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1)
#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1)
#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1)
#define LNET_PROC_CPT_GET(pos) \
(int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK)
#define LNET_PROC_VER_GET(pos) \
(int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK)
#define LNET_PROC_HASH_GET(pos) \
(int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK)
#define LNET_PROC_HOFF_GET(pos) \
(int)((pos) & LNET_PROC_HOFF_MASK)
#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \
(((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \
((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \
((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \
((off) & LNET_PROC_HOFF_MASK))
#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK))
static int __proc_lnet_stats(void *data, int write,
loff_t pos, void *buffer, int nob)
{
int rc;
lnet_counters_t *ctrs;
int len;
char *tmpstr;
const int tmpsiz = 256; /* 7 %u and 4 LPU64 */
if (write) {
lnet_counters_reset();
return 0;
}
/* read */
LIBCFS_ALLOC(ctrs, sizeof(*ctrs));
if (ctrs == NULL)
return -ENOMEM;
LIBCFS_ALLOC(tmpstr, tmpsiz);
if (tmpstr == NULL) {
LIBCFS_FREE(ctrs, sizeof(*ctrs));
return -ENOMEM;
}
lnet_counters_get(ctrs);
len = snprintf(tmpstr, tmpsiz,
"%u %u %u %u %u %u %u "LPU64" "LPU64" "
LPU64" "LPU64,
ctrs->msgs_alloc, ctrs->msgs_max,
ctrs->errors,
ctrs->send_count, ctrs->recv_count,
ctrs->route_count, ctrs->drop_count,
ctrs->send_length, ctrs->recv_length,
ctrs->route_length, ctrs->drop_length);
if (pos >= min_t(int, len, strlen(tmpstr)))
rc = 0;
else
rc = cfs_trace_copyout_string(buffer, nob,
tmpstr + pos, "\n");
LIBCFS_FREE(tmpstr, tmpsiz);
LIBCFS_FREE(ctrs, sizeof(*ctrs));
return rc;
}
DECLARE_PROC_HANDLER(proc_lnet_stats);
int LL_PROC_PROTO(proc_lnet_routes)
{
const int tmpsiz = 256;
char *tmpstr;
char *s;
int rc = 0;
int len;
int ver;
int off;
DECLARE_LL_PROC_PPOS_DECL;
CLASSERT(sizeof(loff_t) >= 4);
off = LNET_PROC_HOFF_GET(*ppos);
ver = LNET_PROC_VER_GET(*ppos);
LASSERT (!write);
if (*lenp == 0)
return 0;
LIBCFS_ALLOC(tmpstr, tmpsiz);
if (tmpstr == NULL)
return -ENOMEM;
s = tmpstr; /* points to current position in tmpstr[] */
if (*ppos == 0) {
s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n",
the_lnet.ln_routing ? "enabled" : "disabled");
LASSERT (tmpstr + tmpsiz - s > 0);
s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %7s %s\n",
"net", "hops", "state", "router");
LASSERT (tmpstr + tmpsiz - s > 0);
lnet_net_lock(0);
ver = (unsigned int)the_lnet.ln_remote_nets_version;
lnet_net_unlock(0);
*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
} else {
struct list_head *n;
struct list_head *r;
lnet_route_t *route = NULL;
lnet_remotenet_t *rnet = NULL;
int skip = off - 1;
struct list_head *rn_list;
int i;
lnet_net_lock(0);
if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
lnet_net_unlock(0);
LIBCFS_FREE(tmpstr, tmpsiz);
return -ESTALE;
}
for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && route == NULL;
i++) {
rn_list = &the_lnet.ln_remote_nets_hash[i];
n = rn_list->next;
while (n != rn_list && route == NULL) {
rnet = list_entry(n, lnet_remotenet_t,
lrn_list);
r = rnet->lrn_routes.next;
while (r != &rnet->lrn_routes) {
lnet_route_t *re =
list_entry(r, lnet_route_t,
lr_list);
if (skip == 0) {
route = re;
break;
}
skip--;
r = r->next;
}
n = n->next;
}
}
if (route != NULL) {
__u32 net = rnet->lrn_net;
unsigned int hops = route->lr_hops;
lnet_nid_t nid = route->lr_gateway->lp_nid;
int alive = route->lr_gateway->lp_alive;
s += snprintf(s, tmpstr + tmpsiz - s,
"%-8s %4u %7s %s\n",
libcfs_net2str(net), hops,
alive ? "up" : "down",
libcfs_nid2str(nid));
LASSERT(tmpstr + tmpsiz - s > 0);
}
lnet_net_unlock(0);
}
len = s - tmpstr; /* how many bytes was written */
if (len > *lenp) { /* linux-supplied buffer is too small */
rc = -EINVAL;
} else if (len > 0) { /* wrote something */
if (copy_to_user(buffer, tmpstr, len))
rc = -EFAULT;
else {
off += 1;
*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
}
}
LIBCFS_FREE(tmpstr, tmpsiz);
if (rc == 0)
*lenp = len;
return rc;
}
int LL_PROC_PROTO(proc_lnet_routers)
{
int rc = 0;
char *tmpstr;
char *s;
const int tmpsiz = 256;
int len;
int ver;
int off;
DECLARE_LL_PROC_PPOS_DECL;
off = LNET_PROC_HOFF_GET(*ppos);
ver = LNET_PROC_VER_GET(*ppos);
LASSERT (!write);
if (*lenp == 0)
return 0;
LIBCFS_ALLOC(tmpstr, tmpsiz);
if (tmpstr == NULL)
return -ENOMEM;
s = tmpstr; /* points to current position in tmpstr[] */
if (*ppos == 0) {
s += snprintf(s, tmpstr + tmpsiz - s,
"%-4s %7s %9s %6s %12s %9s %8s %7s %s\n",
"ref", "rtr_ref", "alive_cnt", "state",
"last_ping", "ping_sent", "deadline",
"down_ni", "router");
LASSERT(tmpstr + tmpsiz - s > 0);
lnet_net_lock(0);
ver = (unsigned int)the_lnet.ln_routers_version;
lnet_net_unlock(0);
*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
} else {
struct list_head *r;
struct lnet_peer *peer = NULL;
int skip = off - 1;
lnet_net_lock(0);
if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) {
lnet_net_unlock(0);
LIBCFS_FREE(tmpstr, tmpsiz);
return -ESTALE;
}
r = the_lnet.ln_routers.next;
while (r != &the_lnet.ln_routers) {
lnet_peer_t *lp = list_entry(r, lnet_peer_t,
lp_rtr_list);
if (skip == 0) {
peer = lp;
break;
}
skip--;
r = r->next;
}
if (peer != NULL) {
lnet_nid_t nid = peer->lp_nid;
cfs_time_t now = cfs_time_current();
cfs_time_t deadline = peer->lp_ping_deadline;
int nrefs = peer->lp_refcount;
int nrtrrefs = peer->lp_rtr_refcount;
int alive_cnt = peer->lp_alive_count;
int alive = peer->lp_alive;
int pingsent = !peer->lp_ping_notsent;
int last_ping = cfs_duration_sec(cfs_time_sub(now,
peer->lp_ping_timestamp));
int down_ni = 0;
lnet_route_t *rtr;
if ((peer->lp_ping_feats &
LNET_PING_FEAT_NI_STATUS) != 0) {
list_for_each_entry(rtr, &peer->lp_routes,
lr_gwlist) {
/* downis on any route should be the
* number of downis on the gateway */
if (rtr->lr_downis != 0) {
down_ni = rtr->lr_downis;
break;
}
}
}
if (deadline == 0)
s += snprintf(s, tmpstr + tmpsiz - s,
"%-4d %7d %9d %6s %12d %9d %8s %7d %s\n",
nrefs, nrtrrefs, alive_cnt,
alive ? "up" : "down", last_ping,
pingsent, "NA", down_ni,
libcfs_nid2str(nid));
else
s += snprintf(s, tmpstr + tmpsiz - s,
"%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n",
nrefs, nrtrrefs, alive_cnt,
alive ? "up" : "down", last_ping,
pingsent,
cfs_duration_sec(cfs_time_sub(deadline, now)),
down_ni, libcfs_nid2str(nid));
LASSERT (tmpstr + tmpsiz - s > 0);
}
lnet_net_unlock(0);
}
len = s - tmpstr; /* how many bytes was written */
if (len > *lenp) { /* linux-supplied buffer is too small */
rc = -EINVAL;
} else if (len > 0) { /* wrote something */
if (copy_to_user(buffer, tmpstr, len))
rc = -EFAULT;
else {
off += 1;
*ppos = LNET_PROC_POS_MAKE(0, ver, 0, off);
}
}
LIBCFS_FREE(tmpstr, tmpsiz);
if (rc == 0)
*lenp = len;
return rc;
}
int LL_PROC_PROTO(proc_lnet_peers)
{
const int tmpsiz = 256;
struct lnet_peer_table *ptable;
char *tmpstr;
char *s;
int cpt = LNET_PROC_CPT_GET(*ppos);
int ver = LNET_PROC_VER_GET(*ppos);
int hash = LNET_PROC_HASH_GET(*ppos);
int hoff = LNET_PROC_HOFF_GET(*ppos);
int rc = 0;
int len;
CLASSERT(LNET_PROC_HASH_BITS >= LNET_PEER_HASH_BITS);
LASSERT(!write);
if (*lenp == 0)
return 0;
if (cpt >= LNET_CPT_NUMBER) {
*lenp = 0;
return 0;
}
LIBCFS_ALLOC(tmpstr, tmpsiz);
if (tmpstr == NULL)
return -ENOMEM;
s = tmpstr; /* points to current position in tmpstr[] */
if (*ppos == 0) {
s += snprintf(s, tmpstr + tmpsiz - s,
"%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n",
"nid", "refs", "state", "last", "max",
"rtr", "min", "tx", "min", "queue");
LASSERT (tmpstr + tmpsiz - s > 0);
hoff++;
} else {
struct lnet_peer *peer;
struct list_head *p;
int skip;
again:
p = NULL;
peer = NULL;
skip = hoff - 1;
lnet_net_lock(cpt);
ptable = the_lnet.ln_peer_tables[cpt];
if (hoff == 1)
ver = LNET_PROC_VERSION(ptable->pt_version);
if (ver != LNET_PROC_VERSION(ptable->pt_version)) {
lnet_net_unlock(cpt);
LIBCFS_FREE(tmpstr, tmpsiz);
return -ESTALE;
}
while (hash < LNET_PEER_HASH_SIZE) {
if (p == NULL)
p = ptable->pt_hash[hash].next;
while (p != &ptable->pt_hash[hash]) {
lnet_peer_t *lp = list_entry(p, lnet_peer_t,
lp_hashlist);
if (skip == 0) {
peer = lp;
/* minor optimization: start from idx+1
* on next iteration if we've just
* drained lp_hashlist */
if (lp->lp_hashlist.next ==
&ptable->pt_hash[hash]) {
hoff = 1;
hash++;
} else {
hoff++;
}
break;
}
skip--;
p = lp->lp_hashlist.next;
}
if (peer != NULL)
break;
p = NULL;
hoff = 1;
hash++;
}
if (peer != NULL) {
lnet_nid_t nid = peer->lp_nid;
int nrefs = peer->lp_refcount;
int lastalive = -1;
char *aliveness = "NA";
int maxcr = peer->lp_ni->ni_peertxcredits;
int txcr = peer->lp_txcredits;
int mintxcr = peer->lp_mintxcredits;
int rtrcr = peer->lp_rtrcredits;
int minrtrcr = peer->lp_minrtrcredits;
int txqnob = peer->lp_txqnob;
if (lnet_isrouter(peer) ||
lnet_peer_aliveness_enabled(peer))
aliveness = peer->lp_alive ? "up" : "down";
if (lnet_peer_aliveness_enabled(peer)) {
cfs_time_t now = cfs_time_current();
cfs_duration_t delta;
delta = cfs_time_sub(now, peer->lp_last_alive);
lastalive = cfs_duration_sec(delta);
/* No need to mess up peers contents with
* arbitrarily long integers - it suffices to
* know that lastalive is more than 10000s old
*/
if (lastalive >= 10000)
lastalive = 9999;
}
lnet_net_unlock(cpt);
s += snprintf(s, tmpstr + tmpsiz - s,
"%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n",
libcfs_nid2str(nid), nrefs, aliveness,
lastalive, maxcr, rtrcr, minrtrcr, txcr,
mintxcr, txqnob);
LASSERT (tmpstr + tmpsiz - s > 0);
} else { /* peer is NULL */
lnet_net_unlock(cpt);
}
if (hash == LNET_PEER_HASH_SIZE) {
cpt++;
hash = 0;
hoff = 1;
if (peer == NULL && cpt < LNET_CPT_NUMBER)
goto again;
}
}
len = s - tmpstr; /* how many bytes was written */
if (len > *lenp) { /* linux-supplied buffer is too small */
rc = -EINVAL;
} else if (len > 0) { /* wrote something */
if (copy_to_user(buffer, tmpstr, len))
rc = -EFAULT;
else
*ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff);
}
LIBCFS_FREE(tmpstr, tmpsiz);
if (rc == 0)
*lenp = len;
return rc;
}
static int __proc_lnet_buffers(void *data, int write,
loff_t pos, void *buffer, int nob)
{
char *s;
char *tmpstr;
int tmpsiz;
int idx;
int len;
int rc;
int i;
LASSERT(!write);
/* (4 %d) * 4 * LNET_CPT_NUMBER */
tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER;
LIBCFS_ALLOC(tmpstr, tmpsiz);
if (tmpstr == NULL)
return -ENOMEM;
s = tmpstr; /* points to current position in tmpstr[] */
s += snprintf(s, tmpstr + tmpsiz - s,
"%5s %5s %7s %7s\n",
"pages", "count", "credits", "min");
LASSERT (tmpstr + tmpsiz - s > 0);
if (the_lnet.ln_rtrpools == NULL)
goto out; /* I'm not a router */
for (idx = 0; idx < LNET_NRBPOOLS; idx++) {
lnet_rtrbufpool_t *rbp;
lnet_net_lock(LNET_LOCK_EX);
cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) {
s += snprintf(s, tmpstr + tmpsiz - s,
"%5d %5d %7d %7d\n",
rbp[idx].rbp_npages,
rbp[idx].rbp_nbuffers,
rbp[idx].rbp_credits,
rbp[idx].rbp_mincredits);
LASSERT(tmpstr + tmpsiz - s > 0);
}
lnet_net_unlock(LNET_LOCK_EX);
}
out:
len = s - tmpstr;
if (pos >= min_t(int, len, strlen(tmpstr)))
rc = 0;
else
rc = cfs_trace_copyout_string(buffer, nob,
tmpstr + pos, NULL);
LIBCFS_FREE(tmpstr, tmpsiz);
return rc;
}
DECLARE_PROC_HANDLER(proc_lnet_buffers);
int LL_PROC_PROTO(proc_lnet_nis)
{
int tmpsiz = 128 * LNET_CPT_NUMBER;
int rc = 0;
char *tmpstr;
char *s;
int len;
DECLARE_LL_PROC_PPOS_DECL;
LASSERT (!write);
if (*lenp == 0)
return 0;
LIBCFS_ALLOC(tmpstr, tmpsiz);
if (tmpstr == NULL)
return -ENOMEM;
s = tmpstr; /* points to current position in tmpstr[] */
if (*ppos == 0) {
s += snprintf(s, tmpstr + tmpsiz - s,
"%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
"nid", "status", "alive", "refs", "peer",
"rtr", "max", "tx", "min");
LASSERT (tmpstr + tmpsiz - s > 0);
} else {
struct list_head *n;
lnet_ni_t *ni = NULL;
int skip = *ppos - 1;
lnet_net_lock(0);
n = the_lnet.ln_nis.next;
while (n != &the_lnet.ln_nis) {
lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list);
if (skip == 0) {
ni = a_ni;
break;
}
skip--;
n = n->next;
}
if (ni != NULL) {
struct lnet_tx_queue *tq;
char *stat;
long now = cfs_time_current_sec();
int last_alive = -1;
int i;
int j;
if (the_lnet.ln_routing)
last_alive = now - ni->ni_last_alive;
/* @lo forever alive */
if (ni->ni_lnd->lnd_type == LOLND)
last_alive = 0;
lnet_ni_lock(ni);
LASSERT(ni->ni_status != NULL);
stat = (ni->ni_status->ns_status ==
LNET_NI_STATUS_UP) ? "up" : "down";
lnet_ni_unlock(ni);
/* we actually output credits information for
* TX queue of each partition */
cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
for (j = 0; ni->ni_cpts != NULL &&
j < ni->ni_ncpts; j++) {
if (i == ni->ni_cpts[j])
break;
}
if (j == ni->ni_ncpts)
continue;
if (i != 0)
lnet_net_lock(i);
s += snprintf(s, tmpstr + tmpsiz - s,
"%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
libcfs_nid2str(ni->ni_nid), stat,
last_alive, *ni->ni_refs[i],
ni->ni_peertxcredits,
ni->ni_peerrtrcredits,
tq->tq_credits_max,
tq->tq_credits, tq->tq_credits_min);
if (i != 0)
lnet_net_unlock(i);
}
LASSERT(tmpstr + tmpsiz - s > 0);
}
lnet_net_unlock(0);
}
len = s - tmpstr; /* how many bytes was written */
if (len > *lenp) { /* linux-supplied buffer is too small */
rc = -EINVAL;
} else if (len > 0) { /* wrote something */
if (copy_to_user(buffer, tmpstr, len))
rc = -EFAULT;
else
*ppos += 1;
}
LIBCFS_FREE(tmpstr, tmpsiz);
if (rc == 0)
*lenp = len;
return rc;
}
struct lnet_portal_rotors {
int pr_value;
const char *pr_name;
const char *pr_desc;
};
static struct lnet_portal_rotors portal_rotors[] = {
{
.pr_value = LNET_PTL_ROTOR_OFF,
.pr_name = "OFF",
.pr_desc = "Turn off message rotor for wildcard portals"
},
{
.pr_value = LNET_PTL_ROTOR_ON,
.pr_name = "ON",
.pr_desc = "round-robin dispatch all PUT messages for "
"wildcard portals"
},
{
.pr_value = LNET_PTL_ROTOR_RR_RT,
.pr_name = "RR_RT",
.pr_desc = "round-robin dispatch routed PUT message for "
"wildcard portals"
},
{
.pr_value = LNET_PTL_ROTOR_HASH_RT,
.pr_name = "HASH_RT",
.pr_desc = "dispatch routed PUT message by hashing source "
"NID for wildcard portals"
},
{
.pr_value = -1,
.pr_name = NULL,
.pr_desc = NULL
},
};
extern int portal_rotor;
static int __proc_lnet_portal_rotor(void *data, int write,
loff_t pos, void *buffer, int nob)
{
const int buf_len = 128;
char *buf;
char *tmp;
int rc;
int i;
LIBCFS_ALLOC(buf, buf_len);
if (buf == NULL)
return -ENOMEM;
if (!write) {
lnet_res_lock(0);
for (i = 0; portal_rotors[i].pr_value >= 0; i++) {
if (portal_rotors[i].pr_value == portal_rotor)
break;
}
LASSERT(portal_rotors[i].pr_value == portal_rotor);
lnet_res_unlock(0);
rc = snprintf(buf, buf_len,
"{\n\tportals: all\n"
"\trotor: %s\n\tdescription: %s\n}",
portal_rotors[i].pr_name,
portal_rotors[i].pr_desc);
if (pos >= min_t(int, rc, buf_len)) {
rc = 0;
} else {
rc = cfs_trace_copyout_string(buffer, nob,
buf + pos, "\n");
}
goto out;
}
rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob);
if (rc < 0)
goto out;
tmp = cfs_trimwhite(buf);
rc = -EINVAL;
lnet_res_lock(0);
for (i = 0; portal_rotors[i].pr_name != NULL; i++) {
if (cfs_strncasecmp(portal_rotors[i].pr_name, tmp,
strlen(portal_rotors[i].pr_name)) == 0) {
portal_rotor = portal_rotors[i].pr_value;
rc = 0;
break;
}
}
lnet_res_unlock(0);
out:
LIBCFS_FREE(buf, buf_len);
return rc;
}
DECLARE_PROC_HANDLER(proc_lnet_portal_rotor);
static ctl_table_t lnet_table[] = {
/*
* NB No .strategy entries have been provided since sysctl(8) prefers
* to go via /proc for portability.
*/
{
INIT_CTL_NAME(PSDEV_LNET_STATS)
.procname = "stats",
.mode = 0644,
.proc_handler = &proc_lnet_stats,
},
{
INIT_CTL_NAME(PSDEV_LNET_ROUTES)
.procname = "routes",
.mode = 0444,
.proc_handler = &proc_lnet_routes,
},
{
INIT_CTL_NAME(PSDEV_LNET_ROUTERS)
.procname = "routers",
.mode = 0444,
.proc_handler = &proc_lnet_routers,
},
{
INIT_CTL_NAME(PSDEV_LNET_PEERS)
.procname = "peers",
.mode = 0444,
.proc_handler = &proc_lnet_peers,
},
{
INIT_CTL_NAME(PSDEV_LNET_PEERS)
.procname = "buffers",
.mode = 0444,
.proc_handler = &proc_lnet_buffers,
},
{
INIT_CTL_NAME(PSDEV_LNET_NIS)
.procname = "nis",
.mode = 0444,
.proc_handler = &proc_lnet_nis,
},
{
INIT_CTL_NAME(PSDEV_LNET_PTL_ROTOR)
.procname = "portal_rotor",
.mode = 0644,
.proc_handler = &proc_lnet_portal_rotor,
},
{
INIT_CTL_NAME(0)
}
};
static ctl_table_t top_table[] = {
{
INIT_CTL_NAME(CTL_LNET)
.procname = "lnet",
.mode = 0555,
.data = NULL,
.maxlen = 0,
.child = lnet_table,
},
{
INIT_CTL_NAME(0)
}
};
void
lnet_proc_init(void)
{
#ifdef CONFIG_SYSCTL
if (lnet_table_header == NULL)
lnet_table_header = cfs_register_sysctl_table(top_table, 0);
#endif
}
void
lnet_proc_fini(void)
{
#ifdef CONFIG_SYSCTL
if (lnet_table_header != NULL)
unregister_sysctl_table(lnet_table_header);
lnet_table_header = NULL;
#endif
}
#else
void
lnet_proc_init(void)
{
}
void
lnet_proc_fini(void)
{
}
#endif

View file

@ -0,0 +1,6 @@
obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o
lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \
module.o ping_test.o brw_test.o
ccflags-y := -I$(src)/../include

View file

@ -0,0 +1,499 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/brw_test.c
*
* Author: Isaac Huang <isaac@clusterfs.com>
*/
#include "selftest.h"
static int brw_srv_workitems = SFW_TEST_WI_MAX;
CFS_MODULE_PARM(brw_srv_workitems, "i", int, 0644, "# BRW server workitems");
static int brw_inject_errors;
CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
"# data errors to inject randomly, zero by default");
static void
brw_client_fini (sfw_test_instance_t *tsi)
{
srpc_bulk_t *bulk;
sfw_test_unit_t *tsu;
LASSERT (tsi->tsi_is_client);
list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
bulk = tsu->tsu_private;
if (bulk == NULL) continue;
srpc_free_bulk(bulk);
tsu->tsu_private = NULL;
}
}
int
brw_client_init (sfw_test_instance_t *tsi)
{
sfw_session_t *sn = tsi->tsi_batch->bat_session;
int flags;
int npg;
int len;
int opc;
srpc_bulk_t *bulk;
sfw_test_unit_t *tsu;
LASSERT(sn != NULL);
LASSERT(tsi->tsi_is_client);
if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
opc = breq->blk_opc;
flags = breq->blk_flags;
npg = breq->blk_npg;
/* NB: this is not going to work for variable page size,
* but we have to keep it for compatibility */
len = npg * PAGE_CACHE_SIZE;
} else {
test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
/* I should never get this step if it's unknown feature
* because make_session will reject unknown feature */
LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
opc = breq->blk_opc;
flags = breq->blk_flags;
len = breq->blk_len;
npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}
if (npg > LNET_MAX_IOV || npg <= 0)
return -EINVAL;
if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
return -EINVAL;
if (flags != LST_BRW_CHECK_NONE &&
flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE)
return -EINVAL;
list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
npg, len, opc == LST_BRW_READ);
if (bulk == NULL) {
brw_client_fini(tsi);
return -ENOMEM;
}
tsu->tsu_private = bulk;
}
return 0;
}
#define BRW_POISON 0xbeefbeefbeefbeefULL
#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL
#define BRW_MSIZE sizeof(__u64)
int
brw_inject_one_error (void)
{
struct timeval tv;
if (brw_inject_errors <= 0) return 0;
do_gettimeofday(&tv);
if ((tv.tv_usec & 1) == 0) return 0;
return brw_inject_errors--;
}
void
brw_fill_page (struct page *pg, int pattern, __u64 magic)
{
char *addr = page_address(pg);
int i;
LASSERT (addr != NULL);
if (pattern == LST_BRW_CHECK_NONE) return;
if (magic == BRW_MAGIC)
magic += brw_inject_one_error();
if (pattern == LST_BRW_CHECK_SIMPLE) {
memcpy(addr, &magic, BRW_MSIZE);
addr += PAGE_CACHE_SIZE - BRW_MSIZE;
memcpy(addr, &magic, BRW_MSIZE);
return;
}
if (pattern == LST_BRW_CHECK_FULL) {
for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++)
memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
return;
}
LBUG ();
return;
}
int
brw_check_page (struct page *pg, int pattern, __u64 magic)
{
char *addr = page_address(pg);
__u64 data = 0; /* make compiler happy */
int i;
LASSERT (addr != NULL);
if (pattern == LST_BRW_CHECK_NONE)
return 0;
if (pattern == LST_BRW_CHECK_SIMPLE) {
data = *((__u64 *) addr);
if (data != magic) goto bad_data;
addr += PAGE_CACHE_SIZE - BRW_MSIZE;
data = *((__u64 *) addr);
if (data != magic) goto bad_data;
return 0;
}
if (pattern == LST_BRW_CHECK_FULL) {
for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) {
data = *(((__u64 *) addr) + i);
if (data != magic) goto bad_data;
}
return 0;
}
LBUG ();
bad_data:
CERROR ("Bad data in page %p: "LPX64", "LPX64" expected\n",
pg, data, magic);
return 1;
}
void
brw_fill_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
{
int i;
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
pg = bk->bk_iovs[i].kiov_page;
brw_fill_page(pg, pattern, magic);
}
}
int
brw_check_bulk (srpc_bulk_t *bk, int pattern, __u64 magic)
{
int i;
struct page *pg;
for (i = 0; i < bk->bk_niov; i++) {
pg = bk->bk_iovs[i].kiov_page;
if (brw_check_page(pg, pattern, magic) != 0) {
CERROR ("Bulk page %p (%d/%d) is corrupted!\n",
pg, i, bk->bk_niov);
return 1;
}
}
return 0;
}
static int
brw_client_prep_rpc (sfw_test_unit_t *tsu,
lnet_process_id_t dest, srpc_client_rpc_t **rpcpp)
{
srpc_bulk_t *bulk = tsu->tsu_private;
sfw_test_instance_t *tsi = tsu->tsu_instance;
sfw_session_t *sn = tsi->tsi_batch->bat_session;
srpc_client_rpc_t *rpc;
srpc_brw_reqst_t *req;
int flags;
int npg;
int len;
int opc;
int rc;
LASSERT(sn != NULL);
LASSERT(bulk != NULL);
if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
opc = breq->blk_opc;
flags = breq->blk_flags;
npg = breq->blk_npg;
len = npg * PAGE_CACHE_SIZE;
} else {
test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
/* I should never get this step if it's unknown feature
* because make_session will reject unknown feature */
LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
opc = breq->blk_opc;
flags = breq->blk_flags;
len = breq->blk_len;
npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}
rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
if (rc != 0)
return rc;
memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg]));
if (opc == LST_BRW_WRITE)
brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
else
brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON);
req = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
req->brw_flags = flags;
req->brw_rw = opc;
req->brw_len = len;
*rpcpp = rpc;
return 0;
}
static void
brw_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
{
__u64 magic = BRW_MAGIC;
sfw_test_instance_t *tsi = tsu->tsu_instance;
sfw_session_t *sn = tsi->tsi_batch->bat_session;
srpc_msg_t *msg = &rpc->crpc_replymsg;
srpc_brw_reply_t *reply = &msg->msg_body.brw_reply;
srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
LASSERT (sn != NULL);
if (rpc->crpc_status != 0) {
CERROR ("BRW RPC to %s failed with %d\n",
libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
if (!tsi->tsi_stopping) /* rpc could have been aborted */
atomic_inc(&sn->sn_brw_errors);
goto out;
}
if (msg->msg_magic != SRPC_MSG_MAGIC) {
__swab64s(&magic);
__swab32s(&reply->brw_status);
}
CDEBUG (reply->brw_status ? D_WARNING : D_NET,
"BRW RPC to %s finished with brw_status: %d\n",
libcfs_id2str(rpc->crpc_dest), reply->brw_status);
if (reply->brw_status != 0) {
atomic_inc(&sn->sn_brw_errors);
rpc->crpc_status = -(int)reply->brw_status;
goto out;
}
if (reqst->brw_rw == LST_BRW_WRITE) goto out;
if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic) != 0) {
CERROR ("Bulk data from %s is corrupted!\n",
libcfs_id2str(rpc->crpc_dest));
atomic_inc(&sn->sn_brw_errors);
rpc->crpc_status = -EBADMSG;
}
out:
return;
}
void
brw_server_rpc_done (srpc_server_rpc_t *rpc)
{
srpc_bulk_t *blk = rpc->srpc_bulk;
if (blk == NULL) return;
if (rpc->srpc_status != 0)
CERROR ("Bulk transfer %s %s has failed: %d\n",
blk->bk_sink ? "from" : "to",
libcfs_id2str(rpc->srpc_peer), rpc->srpc_status);
else
CDEBUG (D_NET, "Transfered %d pages bulk data %s %s\n",
blk->bk_niov, blk->bk_sink ? "from" : "to",
libcfs_id2str(rpc->srpc_peer));
sfw_free_pages(rpc);
}
int
brw_bulk_ready (srpc_server_rpc_t *rpc, int status)
{
__u64 magic = BRW_MAGIC;
srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
srpc_brw_reqst_t *reqst;
srpc_msg_t *reqstmsg;
LASSERT (rpc->srpc_bulk != NULL);
LASSERT (rpc->srpc_reqstbuf != NULL);
reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
reqst = &reqstmsg->msg_body.brw_reqst;
if (status != 0) {
CERROR ("BRW bulk %s failed for RPC from %s: %d\n",
reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE",
libcfs_id2str(rpc->srpc_peer), status);
return -EIO;
}
if (reqst->brw_rw == LST_BRW_READ)
return 0;
if (reqstmsg->msg_magic != SRPC_MSG_MAGIC)
__swab64s(&magic);
if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic) != 0) {
CERROR ("Bulk data from %s is corrupted!\n",
libcfs_id2str(rpc->srpc_peer));
reply->brw_status = EBADMSG;
}
return 0;
}
int
brw_server_handle(struct srpc_server_rpc *rpc)
{
struct srpc_service *sv = rpc->srpc_scd->scd_svc;
srpc_msg_t *replymsg = &rpc->srpc_replymsg;
srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply;
srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst;
int npg;
int rc;
LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
__swab32s(&reqst->brw_rw);
__swab32s(&reqst->brw_len);
__swab32s(&reqst->brw_flags);
__swab64s(&reqst->brw_rpyid);
__swab64s(&reqst->brw_bulkid);
}
LASSERT (reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id));
reply->brw_status = 0;
rpc->srpc_done = brw_server_rpc_done;
if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) ||
(reqst->brw_flags != LST_BRW_CHECK_NONE &&
reqst->brw_flags != LST_BRW_CHECK_FULL &&
reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) {
reply->brw_status = EINVAL;
return 0;
}
if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
replymsg->msg_ses_feats = LST_FEATS_MASK;
reply->brw_status = EPROTO;
return 0;
}
if ((reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN) == 0) {
/* compat with old version */
if ((reqst->brw_len & ~CFS_PAGE_MASK) != 0) {
reply->brw_status = EINVAL;
return 0;
}
npg = reqst->brw_len >> PAGE_CACHE_SHIFT;
} else {
npg = (reqst->brw_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}
replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
if (reqst->brw_len == 0 || npg > LNET_MAX_IOV) {
reply->brw_status = EINVAL;
return 0;
}
rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
reqst->brw_len,
reqst->brw_rw == LST_BRW_WRITE);
if (rc != 0)
return rc;
if (reqst->brw_rw == LST_BRW_READ)
brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
else
brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
return 0;
}
sfw_test_client_ops_t brw_test_client;
void brw_init_test_client(void)
{
brw_test_client.tso_init = brw_client_init;
brw_test_client.tso_fini = brw_client_fini;
brw_test_client.tso_prep_rpc = brw_client_prep_rpc;
brw_test_client.tso_done_rpc = brw_client_done_rpc;
};
srpc_service_t brw_test_service;
void brw_init_test_service(void)
{
brw_test_service.sv_id = SRPC_SERVICE_BRW;
brw_test_service.sv_name = "brw_test";
brw_test_service.sv_handler = brw_server_handle;
brw_test_service.sv_bulk_ready = brw_bulk_ready;
brw_test_service.sv_wi_total = brw_srv_workitems;
}

View file

@ -0,0 +1,931 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/conctl.c
*
* IOC handle in kernel
*
* Author: Liang Zhen <liangzhen@clusterfs.com>
*/
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lib-lnet.h>
#include <linux/lnet/lnetst.h>
#include "console.h"
int
lst_session_new_ioctl(lstio_session_new_args_t *args)
{
char *name;
int rc;
if (args->lstio_ses_idp == NULL || /* address for output sid */
args->lstio_ses_key == 0 || /* no key is specified */
args->lstio_ses_namep == NULL || /* session name */
args->lstio_ses_nmlen <= 0 ||
args->lstio_ses_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_ses_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_ses_namep,
args->lstio_ses_nmlen)) {
LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
return -EFAULT;
}
name[args->lstio_ses_nmlen] = 0;
rc = lstcon_session_new(name,
args->lstio_ses_key,
args->lstio_ses_feats,
args->lstio_ses_force,
args->lstio_ses_timeout,
args->lstio_ses_idp);
LIBCFS_FREE(name, args->lstio_ses_nmlen + 1);
return rc;
}
int
lst_session_end_ioctl(lstio_session_end_args_t *args)
{
if (args->lstio_ses_key != console_session.ses_key)
return -EACCES;
return lstcon_session_end();
}
int
lst_session_info_ioctl(lstio_session_info_args_t *args)
{
/* no checking of key */
if (args->lstio_ses_idp == NULL || /* address for ouput sid */
args->lstio_ses_keyp == NULL || /* address for ouput key */
args->lstio_ses_featp == NULL || /* address for ouput features */
args->lstio_ses_ndinfo == NULL || /* address for output ndinfo */
args->lstio_ses_namep == NULL || /* address for ouput name */
args->lstio_ses_nmlen <= 0 ||
args->lstio_ses_nmlen > LST_NAME_SIZE)
return -EINVAL;
return lstcon_session_info(args->lstio_ses_idp,
args->lstio_ses_keyp,
args->lstio_ses_featp,
args->lstio_ses_ndinfo,
args->lstio_ses_namep,
args->lstio_ses_nmlen);
}
int
lst_debug_ioctl(lstio_debug_args_t *args)
{
char *name = NULL;
int client = 1;
int rc;
if (args->lstio_dbg_key != console_session.ses_key)
return -EACCES;
if (args->lstio_dbg_resultp == NULL)
return -EINVAL;
if (args->lstio_dbg_namep != NULL && /* name of batch/group */
(args->lstio_dbg_nmlen <= 0 ||
args->lstio_dbg_nmlen > LST_NAME_SIZE))
return -EINVAL;
if (args->lstio_dbg_namep != NULL) {
LIBCFS_ALLOC(name, args->lstio_dbg_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name, args->lstio_dbg_namep,
args->lstio_dbg_nmlen)) {
LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
return -EFAULT;
}
name[args->lstio_dbg_nmlen] = 0;
}
rc = -EINVAL;
switch (args->lstio_dbg_type) {
case LST_OPC_SESSION:
rc = lstcon_session_debug(args->lstio_dbg_timeout,
args->lstio_dbg_resultp);
break;
case LST_OPC_BATCHSRV:
client = 0;
case LST_OPC_BATCHCLI:
if (name == NULL)
goto out;
rc = lstcon_batch_debug(args->lstio_dbg_timeout,
name, client, args->lstio_dbg_resultp);
break;
case LST_OPC_GROUP:
if (name == NULL)
goto out;
rc = lstcon_group_debug(args->lstio_dbg_timeout,
name, args->lstio_dbg_resultp);
break;
case LST_OPC_NODES:
if (args->lstio_dbg_count <= 0 ||
args->lstio_dbg_idsp == NULL)
goto out;
rc = lstcon_nodes_debug(args->lstio_dbg_timeout,
args->lstio_dbg_count,
args->lstio_dbg_idsp,
args->lstio_dbg_resultp);
break;
default:
break;
}
out:
if (name != NULL)
LIBCFS_FREE(name, args->lstio_dbg_nmlen + 1);
return rc;
}
int
lst_group_add_ioctl(lstio_group_add_args_t *args)
{
char *name;
int rc;
if (args->lstio_grp_key != console_session.ses_key)
return -EACCES;
if (args->lstio_grp_namep == NULL||
args->lstio_grp_nmlen <= 0 ||
args->lstio_grp_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_grp_namep,
args->lstio_grp_nmlen)) {
LIBCFS_FREE(name, args->lstio_grp_nmlen);
return -EFAULT;
}
name[args->lstio_grp_nmlen] = 0;
rc = lstcon_group_add(name);
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return rc;
}
int
lst_group_del_ioctl(lstio_group_del_args_t *args)
{
int rc;
char *name;
if (args->lstio_grp_key != console_session.ses_key)
return -EACCES;
if (args->lstio_grp_namep == NULL ||
args->lstio_grp_nmlen <= 0 ||
args->lstio_grp_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_grp_namep,
args->lstio_grp_nmlen)) {
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return -EFAULT;
}
name[args->lstio_grp_nmlen] = 0;
rc = lstcon_group_del(name);
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return rc;
}
int
lst_group_update_ioctl(lstio_group_update_args_t *args)
{
int rc;
char *name;
if (args->lstio_grp_key != console_session.ses_key)
return -EACCES;
if (args->lstio_grp_resultp == NULL ||
args->lstio_grp_namep == NULL ||
args->lstio_grp_nmlen <= 0 ||
args->lstio_grp_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_grp_namep,
args->lstio_grp_nmlen)) {
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return -EFAULT;
}
name[args->lstio_grp_nmlen] = 0;
switch (args->lstio_grp_opc) {
case LST_GROUP_CLEAN:
rc = lstcon_group_clean(name, args->lstio_grp_args);
break;
case LST_GROUP_REFRESH:
rc = lstcon_group_refresh(name, args->lstio_grp_resultp);
break;
case LST_GROUP_RMND:
if (args->lstio_grp_count <= 0 ||
args->lstio_grp_idsp == NULL) {
rc = -EINVAL;
break;
}
rc = lstcon_nodes_remove(name, args->lstio_grp_count,
args->lstio_grp_idsp,
args->lstio_grp_resultp);
break;
default:
rc = -EINVAL;
break;
}
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return rc;
}
int
lst_nodes_add_ioctl(lstio_group_nodes_args_t *args)
{
unsigned feats;
int rc;
char *name;
if (args->lstio_grp_key != console_session.ses_key)
return -EACCES;
if (args->lstio_grp_idsp == NULL || /* array of ids */
args->lstio_grp_count <= 0 ||
args->lstio_grp_resultp == NULL ||
args->lstio_grp_featp == NULL ||
args->lstio_grp_namep == NULL ||
args->lstio_grp_nmlen <= 0 ||
args->lstio_grp_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name, args->lstio_grp_namep,
args->lstio_grp_nmlen)) {
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return -EFAULT;
}
name[args->lstio_grp_nmlen] = 0;
rc = lstcon_nodes_add(name, args->lstio_grp_count,
args->lstio_grp_idsp, &feats,
args->lstio_grp_resultp);
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
if (rc == 0 &&
copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) {
return -EINVAL;
}
return rc;
}
int
lst_group_list_ioctl(lstio_group_list_args_t *args)
{
if (args->lstio_grp_key != console_session.ses_key)
return -EACCES;
if (args->lstio_grp_idx < 0 ||
args->lstio_grp_namep == NULL ||
args->lstio_grp_nmlen <= 0 ||
args->lstio_grp_nmlen > LST_NAME_SIZE)
return -EINVAL;
return lstcon_group_list(args->lstio_grp_idx,
args->lstio_grp_nmlen,
args->lstio_grp_namep);
}
int
lst_group_info_ioctl(lstio_group_info_args_t *args)
{
char *name;
int ndent;
int index;
int rc;
if (args->lstio_grp_key != console_session.ses_key)
return -EACCES;
if (args->lstio_grp_namep == NULL ||
args->lstio_grp_nmlen <= 0 ||
args->lstio_grp_nmlen > LST_NAME_SIZE)
return -EINVAL;
if (args->lstio_grp_entp == NULL && /* output: group entry */
args->lstio_grp_dentsp == NULL) /* output: node entry */
return -EINVAL;
if (args->lstio_grp_dentsp != NULL) { /* have node entry */
if (args->lstio_grp_idxp == NULL || /* node index */
args->lstio_grp_ndentp == NULL) /* # of node entry */
return -EINVAL;
if (copy_from_user(&ndent, args->lstio_grp_ndentp,
sizeof(ndent)) ||
copy_from_user(&index, args->lstio_grp_idxp,
sizeof(index)))
return -EFAULT;
if (ndent <= 0 || index < 0)
return -EINVAL;
}
LIBCFS_ALLOC(name, args->lstio_grp_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_grp_namep,
args->lstio_grp_nmlen)) {
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
return -EFAULT;
}
name[args->lstio_grp_nmlen] = 0;
rc = lstcon_group_info(name, args->lstio_grp_entp,
&index, &ndent, args->lstio_grp_dentsp);
LIBCFS_FREE(name, args->lstio_grp_nmlen + 1);
if (rc != 0)
return rc;
if (args->lstio_grp_dentsp != NULL &&
(copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) ||
copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent))))
rc = -EFAULT;
return 0;
}
int
lst_batch_add_ioctl(lstio_batch_add_args_t *args)
{
int rc;
char *name;
if (args->lstio_bat_key != console_session.ses_key)
return -EACCES;
if (args->lstio_bat_namep == NULL ||
args->lstio_bat_nmlen <= 0 ||
args->lstio_bat_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_bat_namep,
args->lstio_bat_nmlen)) {
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return -EFAULT;
}
name[args->lstio_bat_nmlen] = 0;
rc = lstcon_batch_add(name);
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return rc;
}
int
lst_batch_run_ioctl(lstio_batch_run_args_t *args)
{
int rc;
char *name;
if (args->lstio_bat_key != console_session.ses_key)
return -EACCES;
if (args->lstio_bat_namep == NULL ||
args->lstio_bat_nmlen <= 0 ||
args->lstio_bat_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_bat_namep,
args->lstio_bat_nmlen)) {
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return -EFAULT;
}
name[args->lstio_bat_nmlen] = 0;
rc = lstcon_batch_run(name, args->lstio_bat_timeout,
args->lstio_bat_resultp);
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return rc;
}
int
lst_batch_stop_ioctl(lstio_batch_stop_args_t *args)
{
int rc;
char *name;
if (args->lstio_bat_key != console_session.ses_key)
return -EACCES;
if (args->lstio_bat_resultp == NULL ||
args->lstio_bat_namep == NULL ||
args->lstio_bat_nmlen <= 0 ||
args->lstio_bat_nmlen > LST_NAME_SIZE)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_bat_namep,
args->lstio_bat_nmlen)) {
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return -EFAULT;
}
name[args->lstio_bat_nmlen] = 0;
rc = lstcon_batch_stop(name, args->lstio_bat_force,
args->lstio_bat_resultp);
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return rc;
}
int
lst_batch_query_ioctl(lstio_batch_query_args_t *args)
{
char *name;
int rc;
if (args->lstio_bat_key != console_session.ses_key)
return -EACCES;
if (args->lstio_bat_resultp == NULL ||
args->lstio_bat_namep == NULL ||
args->lstio_bat_nmlen <= 0 ||
args->lstio_bat_nmlen > LST_NAME_SIZE)
return -EINVAL;
if (args->lstio_bat_testidx < 0)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_bat_namep,
args->lstio_bat_nmlen)) {
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return -EFAULT;
}
name[args->lstio_bat_nmlen] = 0;
rc = lstcon_test_batch_query(name,
args->lstio_bat_testidx,
args->lstio_bat_client,
args->lstio_bat_timeout,
args->lstio_bat_resultp);
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return rc;
}
int
lst_batch_list_ioctl(lstio_batch_list_args_t *args)
{
if (args->lstio_bat_key != console_session.ses_key)
return -EACCES;
if (args->lstio_bat_idx < 0 ||
args->lstio_bat_namep == NULL ||
args->lstio_bat_nmlen <= 0 ||
args->lstio_bat_nmlen > LST_NAME_SIZE)
return -EINVAL;
return lstcon_batch_list(args->lstio_bat_idx,
args->lstio_bat_nmlen,
args->lstio_bat_namep);
}
int
lst_batch_info_ioctl(lstio_batch_info_args_t *args)
{
char *name;
int rc;
int index;
int ndent;
if (args->lstio_bat_key != console_session.ses_key)
return -EACCES;
if (args->lstio_bat_namep == NULL || /* batch name */
args->lstio_bat_nmlen <= 0 ||
args->lstio_bat_nmlen > LST_NAME_SIZE)
return -EINVAL;
if (args->lstio_bat_entp == NULL && /* output: batch entry */
args->lstio_bat_dentsp == NULL) /* output: node entry */
return -EINVAL;
if (args->lstio_bat_dentsp != NULL) { /* have node entry */
if (args->lstio_bat_idxp == NULL || /* node index */
args->lstio_bat_ndentp == NULL) /* # of node entry */
return -EINVAL;
if (copy_from_user(&index, args->lstio_bat_idxp,
sizeof(index)) ||
copy_from_user(&ndent, args->lstio_bat_ndentp,
sizeof(ndent)))
return -EFAULT;
if (ndent <= 0 || index < 0)
return -EINVAL;
}
LIBCFS_ALLOC(name, args->lstio_bat_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name,
args->lstio_bat_namep, args->lstio_bat_nmlen)) {
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
return -EFAULT;
}
name[args->lstio_bat_nmlen] = 0;
rc = lstcon_batch_info(name,
args->lstio_bat_entp, args->lstio_bat_server,
args->lstio_bat_testidx, &index, &ndent,
args->lstio_bat_dentsp);
LIBCFS_FREE(name, args->lstio_bat_nmlen + 1);
if (rc != 0)
return rc;
if (args->lstio_bat_dentsp != NULL &&
(copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) ||
copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent))))
rc = -EFAULT;
return rc;
}
int
lst_stat_query_ioctl(lstio_stat_args_t *args)
{
int rc;
char *name;
/* TODO: not finished */
if (args->lstio_sta_key != console_session.ses_key)
return -EACCES;
if (args->lstio_sta_resultp == NULL ||
(args->lstio_sta_namep == NULL &&
args->lstio_sta_idsp == NULL) ||
args->lstio_sta_nmlen <= 0 ||
args->lstio_sta_nmlen > LST_NAME_SIZE)
return -EINVAL;
if (args->lstio_sta_idsp != NULL &&
args->lstio_sta_count <= 0)
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_sta_nmlen + 1);
if (name == NULL)
return -ENOMEM;
if (copy_from_user(name, args->lstio_sta_namep,
args->lstio_sta_nmlen)) {
LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
return -EFAULT;
}
if (args->lstio_sta_idsp == NULL) {
rc = lstcon_group_stat(name, args->lstio_sta_timeout,
args->lstio_sta_resultp);
} else {
rc = lstcon_nodes_stat(args->lstio_sta_count,
args->lstio_sta_idsp,
args->lstio_sta_timeout,
args->lstio_sta_resultp);
}
LIBCFS_FREE(name, args->lstio_sta_nmlen + 1);
return rc;
}
int lst_test_add_ioctl(lstio_test_args_t *args)
{
char *name;
char *srcgrp = NULL;
char *dstgrp = NULL;
void *param = NULL;
int ret = 0;
int rc = -ENOMEM;
if (args->lstio_tes_resultp == NULL ||
args->lstio_tes_retp == NULL ||
args->lstio_tes_bat_name == NULL || /* no specified batch */
args->lstio_tes_bat_nmlen <= 0 ||
args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
args->lstio_tes_sgrp_name == NULL || /* no source group */
args->lstio_tes_sgrp_nmlen <= 0 ||
args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
args->lstio_tes_dgrp_name == NULL || /* no target group */
args->lstio_tes_dgrp_nmlen <= 0 ||
args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
return -EINVAL;
if (args->lstio_tes_loop == 0 || /* negative is infinite */
args->lstio_tes_concur <= 0 ||
args->lstio_tes_dist <= 0 ||
args->lstio_tes_span <= 0)
return -EINVAL;
/* have parameter, check if parameter length is valid */
if (args->lstio_tes_param != NULL &&
(args->lstio_tes_param_len <= 0 ||
args->lstio_tes_param_len > PAGE_CACHE_SIZE - sizeof(lstcon_test_t)))
return -EINVAL;
LIBCFS_ALLOC(name, args->lstio_tes_bat_nmlen + 1);
if (name == NULL)
return rc;
LIBCFS_ALLOC(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
if (srcgrp == NULL)
goto out;
LIBCFS_ALLOC(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
if (dstgrp == NULL)
goto out;
if (args->lstio_tes_param != NULL) {
LIBCFS_ALLOC(param, args->lstio_tes_param_len);
if (param == NULL)
goto out;
}
rc = -EFAULT;
if (copy_from_user(name,
args->lstio_tes_bat_name,
args->lstio_tes_bat_nmlen) ||
copy_from_user(srcgrp,
args->lstio_tes_sgrp_name,
args->lstio_tes_sgrp_nmlen) ||
copy_from_user(dstgrp,
args->lstio_tes_dgrp_name,
args->lstio_tes_dgrp_nmlen) ||
copy_from_user(param, args->lstio_tes_param,
args->lstio_tes_param_len))
goto out;
rc = lstcon_test_add(name,
args->lstio_tes_type,
args->lstio_tes_loop,
args->lstio_tes_concur,
args->lstio_tes_dist, args->lstio_tes_span,
srcgrp, dstgrp, param, args->lstio_tes_param_len,
&ret, args->lstio_tes_resultp);
if (ret != 0)
rc = (copy_to_user(args->lstio_tes_retp, &ret,
sizeof(ret))) ? -EFAULT : 0;
out:
if (name != NULL)
LIBCFS_FREE(name, args->lstio_tes_bat_nmlen + 1);
if (srcgrp != NULL)
LIBCFS_FREE(srcgrp, args->lstio_tes_sgrp_nmlen + 1);
if (dstgrp != NULL)
LIBCFS_FREE(dstgrp, args->lstio_tes_dgrp_nmlen + 1);
if (param != NULL)
LIBCFS_FREE(param, args->lstio_tes_param_len);
return rc;
}
int
lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_data *data)
{
char *buf;
int opc = data->ioc_u32[0];
int rc;
if (cmd != IOC_LIBCFS_LNETST)
return -EINVAL;
if (data->ioc_plen1 > PAGE_CACHE_SIZE)
return -EINVAL;
LIBCFS_ALLOC(buf, data->ioc_plen1);
if (buf == NULL)
return -ENOMEM;
/* copy in parameter */
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
LIBCFS_FREE(buf, data->ioc_plen1);
return -EFAULT;
}
mutex_lock(&console_session.ses_mutex);
console_session.ses_laststamp = cfs_time_current_sec();
if (console_session.ses_shutdown) {
rc = -ESHUTDOWN;
goto out;
}
if (console_session.ses_expired)
lstcon_session_end();
if (opc != LSTIO_SESSION_NEW &&
console_session.ses_state == LST_SESSION_NONE) {
CDEBUG(D_NET, "LST no active session\n");
rc = -ESRCH;
goto out;
}
memset(&console_session.ses_trans_stat, 0, sizeof(lstcon_trans_stat_t));
switch (opc) {
case LSTIO_SESSION_NEW:
rc = lst_session_new_ioctl((lstio_session_new_args_t *)buf);
break;
case LSTIO_SESSION_END:
rc = lst_session_end_ioctl((lstio_session_end_args_t *)buf);
break;
case LSTIO_SESSION_INFO:
rc = lst_session_info_ioctl((lstio_session_info_args_t *)buf);
break;
case LSTIO_DEBUG:
rc = lst_debug_ioctl((lstio_debug_args_t *)buf);
break;
case LSTIO_GROUP_ADD:
rc = lst_group_add_ioctl((lstio_group_add_args_t *)buf);
break;
case LSTIO_GROUP_DEL:
rc = lst_group_del_ioctl((lstio_group_del_args_t *)buf);
break;
case LSTIO_GROUP_UPDATE:
rc = lst_group_update_ioctl((lstio_group_update_args_t *)buf);
break;
case LSTIO_NODES_ADD:
rc = lst_nodes_add_ioctl((lstio_group_nodes_args_t *)buf);
break;
case LSTIO_GROUP_LIST:
rc = lst_group_list_ioctl((lstio_group_list_args_t *)buf);
break;
case LSTIO_GROUP_INFO:
rc = lst_group_info_ioctl((lstio_group_info_args_t *)buf);
break;
case LSTIO_BATCH_ADD:
rc = lst_batch_add_ioctl((lstio_batch_add_args_t *)buf);
break;
case LSTIO_BATCH_START:
rc = lst_batch_run_ioctl((lstio_batch_run_args_t *)buf);
break;
case LSTIO_BATCH_STOP:
rc = lst_batch_stop_ioctl((lstio_batch_stop_args_t *)buf);
break;
case LSTIO_BATCH_QUERY:
rc = lst_batch_query_ioctl((lstio_batch_query_args_t *)buf);
break;
case LSTIO_BATCH_LIST:
rc = lst_batch_list_ioctl((lstio_batch_list_args_t *)buf);
break;
case LSTIO_BATCH_INFO:
rc = lst_batch_info_ioctl((lstio_batch_info_args_t *)buf);
break;
case LSTIO_TEST_ADD:
rc = lst_test_add_ioctl((lstio_test_args_t *)buf);
break;
case LSTIO_STAT_QUERY:
rc = lst_stat_query_ioctl((lstio_stat_args_t *)buf);
break;
default:
rc = -EINVAL;
}
if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
sizeof(lstcon_trans_stat_t)))
rc = -EFAULT;
out:
mutex_unlock(&console_session.ses_mutex);
LIBCFS_FREE(buf, data->ioc_plen1);
return rc;
}
EXPORT_SYMBOL(lstcon_ioctl_entry);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,146 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* /lnet/selftest/conrpc.h
*
* Console rpc
*
* Author: Liang Zhen <liang@whamcloud.com>
*/
#ifndef __LST_CONRPC_H__
#define __LST_CONRPC_H__
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lnet.h>
#include <linux/lnet/lib-types.h>
#include <linux/lnet/lnetst.h>
#include "rpc.h"
#include "selftest.h"
/* Console rpc and rpc transaction */
#define LST_TRANS_TIMEOUT 30
#define LST_TRANS_MIN_TIMEOUT 3
#define LST_VALIDATE_TIMEOUT(t) MIN(MAX(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT)
#define LST_PING_INTERVAL 8
struct lstcon_rpc_trans;
struct lstcon_tsb_hdr;
struct lstcon_test;
struct lstcon_node;
typedef struct lstcon_rpc {
struct list_head crp_link; /* chain on rpc transaction */
srpc_client_rpc_t *crp_rpc; /* client rpc */
struct lstcon_node *crp_node; /* destination node */
struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */
unsigned int crp_posted:1; /* rpc is posted */
unsigned int crp_finished:1; /* rpc is finished */
unsigned int crp_unpacked:1; /* reply is unpacked */
/** RPC is embedded in other structure and can't free it */
unsigned int crp_embedded:1;
int crp_status; /* console rpc errors */
cfs_time_t crp_stamp; /* replied time stamp */
} lstcon_rpc_t;
typedef struct lstcon_rpc_trans {
struct list_head tas_olink; /* link chain on owner list */
struct list_head tas_link; /* link chain on global list */
int tas_opc; /* operation code of transaction */
/* features mask is uptodate */
unsigned tas_feats_updated;
/* test features mask */
unsigned tas_features;
wait_queue_head_t tas_waitq; /* wait queue head */
atomic_t tas_remaining; /* # of un-scheduled rpcs */
struct list_head tas_rpcs_list; /* queued requests */
} lstcon_rpc_trans_t;
#define LST_TRANS_PRIVATE 0x1000
#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01)
#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02)
#define LST_TRANS_SESQRY 0x03
#define LST_TRANS_SESPING 0x04
#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11)
#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12)
#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13)
#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14)
#define LST_TRANS_TSBCLIQRY 0x15
#define LST_TRANS_TSBSRVQRY 0x16
#define LST_TRANS_STATQRY 0x21
typedef int (* lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
typedef int (* lstcon_rpc_readent_func_t)(int, srpc_msg_t *, lstcon_rpc_ent_t *);
int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
unsigned version, lstcon_rpc_t **crpc);
int lstcon_dbgrpc_prep(struct lstcon_node *nd,
unsigned version, lstcon_rpc_t **crpc);
int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc);
int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
struct lstcon_test *test, lstcon_rpc_t **crpc);
int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version,
lstcon_rpc_t **crpc);
void lstcon_rpc_put(lstcon_rpc_t *crpc);
int lstcon_rpc_trans_prep(struct list_head *translist,
int transop, lstcon_rpc_trans_t **transpp);
int lstcon_rpc_trans_ndlist(struct list_head *ndlist,
struct list_head *translist, int transop,
void *arg, lstcon_rpc_cond_func_t condition,
lstcon_rpc_trans_t **transpp);
void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans,
lstcon_trans_stat_t *stat);
int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
struct list_head *head_up,
lstcon_rpc_readent_func_t readent);
void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error);
void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans);
void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req);
int lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout);
int lstcon_rpc_pinger_start(void);
void lstcon_rpc_pinger_stop(void);
void lstcon_rpc_cleanup_wait(void);
int lstcon_rpc_module_init(void);
void lstcon_rpc_module_fini(void);
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,232 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/console.h
*
* kernel structure for LST console
*
* Author: Liang Zhen <liangzhen@clusterfs.com>
*/
#ifndef __LST_CONSOLE_H__
#define __LST_CONSOLE_H__
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lnet.h>
#include <linux/lnet/lib-types.h>
#include <linux/lnet/lnetst.h>
#include "selftest.h"
#include "conrpc.h"
typedef struct lstcon_node {
lnet_process_id_t nd_id; /* id of the node */
int nd_ref; /* reference count */
int nd_state; /* state of the node */
int nd_timeout; /* session timeout */
cfs_time_t nd_stamp; /* timestamp of last replied RPC */
struct lstcon_rpc nd_ping; /* ping rpc */
} lstcon_node_t; /*** node descriptor */
typedef struct {
struct list_head ndl_link; /* chain on list */
struct list_head ndl_hlink; /* chain on hash */
lstcon_node_t *ndl_node; /* pointer to node */
} lstcon_ndlink_t; /*** node link descriptor */
typedef struct {
struct list_head grp_link; /* chain on global group list */
int grp_ref; /* reference count */
int grp_userland; /* has userland nodes */
int grp_nnode; /* # of nodes */
char grp_name[LST_NAME_SIZE]; /* group name */
struct list_head grp_trans_list; /* transaction list */
struct list_head grp_ndl_list; /* nodes list */
struct list_head grp_ndl_hash[0];/* hash table for nodes */
} lstcon_group_t; /*** (alias of nodes) group descriptor */
#define LST_BATCH_IDLE 0xB0 /* idle batch */
#define LST_BATCH_RUNNING 0xB1 /* running batch */
typedef struct lstcon_tsb_hdr {
lst_bid_t tsb_id; /* batch ID */
int tsb_index; /* test index */
} lstcon_tsb_hdr_t;
typedef struct {
lstcon_tsb_hdr_t bat_hdr; /* test_batch header */
struct list_head bat_link; /* chain on session's batches list */
int bat_ntest; /* # of test */
int bat_state; /* state of the batch */
int bat_arg; /* parameter for run|stop, timeout for run, force for stop */
char bat_name[LST_NAME_SIZE]; /* name of batch */
struct list_head bat_test_list; /* list head of tests (lstcon_test_t) */
struct list_head bat_trans_list; /* list head of transaction */
struct list_head bat_cli_list; /* list head of client nodes (lstcon_node_t) */
struct list_head *bat_cli_hash; /* hash table of client nodes */
struct list_head bat_srv_list; /* list head of server nodes */
struct list_head *bat_srv_hash; /* hash table of server nodes */
} lstcon_batch_t; /*** (tests ) batch descritptor */
typedef struct lstcon_test {
lstcon_tsb_hdr_t tes_hdr; /* test batch header */
struct list_head tes_link; /* chain on batch's tests list */
lstcon_batch_t *tes_batch; /* pointer to batch */
int tes_type; /* type of the test, i.e: bulk, ping */
int tes_stop_onerr; /* stop on error */
int tes_oneside; /* one-sided test */
int tes_concur; /* concurrency */
int tes_loop; /* loop count */
int tes_dist; /* nodes distribution of target group */
int tes_span; /* nodes span of target group */
int tes_cliidx; /* client index, used for RPC creating */
struct list_head tes_trans_list; /* transaction list */
lstcon_group_t *tes_src_grp; /* group run the test */
lstcon_group_t *tes_dst_grp; /* target group */
int tes_paramlen; /* test parameter length */
char tes_param[0]; /* test parameter */
} lstcon_test_t; /*** a single test descriptor */
#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */
#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */
#define LST_SESSION_NONE 0x0 /* no session */
#define LST_SESSION_ACTIVE 0x1 /* working session */
#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */
typedef struct {
struct mutex ses_mutex; /* only 1 thread in session */
lst_sid_t ses_id; /* global session id */
int ses_key; /* local session key */
int ses_state; /* state of session */
int ses_timeout; /* timeout in seconds */
time_t ses_laststamp; /* last operation stamp (seconds) */
/** tests features of the session */
unsigned ses_features;
/** features are synced with remote test nodes */
unsigned ses_feats_updated:1;
/** force creating */
unsigned ses_force:1;
/** session is shutting down */
unsigned ses_shutdown:1;
/** console is timedout */
unsigned ses_expired:1;
__u64 ses_id_cookie; /* batch id cookie */
char ses_name[LST_NAME_SIZE]; /* session name */
lstcon_rpc_trans_t *ses_ping; /* session pinger */
stt_timer_t ses_ping_timer; /* timer for pinger */
lstcon_trans_stat_t ses_trans_stat; /* transaction stats */
struct list_head ses_trans_list; /* global list of transaction */
struct list_head ses_grp_list; /* global list of groups */
struct list_head ses_bat_list; /* global list of batches */
struct list_head ses_ndl_list; /* global list of nodes */
struct list_head *ses_ndl_hash; /* hash table of nodes */
spinlock_t ses_rpc_lock; /* serialize */
atomic_t ses_rpc_counter;/* # of initialized RPCs */
struct list_head ses_rpc_freelist; /* idle console rpc */
} lstcon_session_t; /*** session descriptor */
extern lstcon_session_t console_session;
static inline lstcon_trans_stat_t *
lstcon_trans_stat(void)
{
return &console_session.ses_trans_stat;
}
static inline struct list_head *
lstcon_id2hash (lnet_process_id_t id, struct list_head *hash)
{
unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
return &hash[idx];
}
extern int lstcon_session_match(lst_sid_t sid);
extern int lstcon_session_new(char *name, int key, unsigned version,
int timeout, int flags, lst_sid_t *sid_up);
extern int lstcon_session_info(lst_sid_t *sid_up, int *key, unsigned *verp,
lstcon_ndlist_ent_t *entp, char *name_up, int len);
extern int lstcon_session_end(void);
extern int lstcon_session_debug(int timeout, struct list_head *result_up);
extern int lstcon_session_feats_check(unsigned feats);
extern int lstcon_batch_debug(int timeout, char *name,
int client, struct list_head *result_up);
extern int lstcon_group_debug(int timeout, char *name,
struct list_head *result_up);
extern int lstcon_nodes_debug(int timeout, int nnd, lnet_process_id_t *nds_up,
struct list_head *result_up);
extern int lstcon_group_add(char *name);
extern int lstcon_group_del(char *name);
extern int lstcon_group_clean(char *name, int args);
extern int lstcon_group_refresh(char *name, struct list_head *result_up);
extern int lstcon_nodes_add(char *name, int nnd, lnet_process_id_t *nds_up,
unsigned *featp, struct list_head *result_up);
extern int lstcon_nodes_remove(char *name, int nnd, lnet_process_id_t *nds_up,
struct list_head *result_up);
extern int lstcon_group_info(char *name, lstcon_ndlist_ent_t *gent_up,
int *index_p, int *ndent_p, lstcon_node_ent_t *ndents_up);
extern int lstcon_group_list(int idx, int len, char *name_up);
extern int lstcon_batch_add(char *name);
extern int lstcon_batch_run(char *name, int timeout,
struct list_head *result_up);
extern int lstcon_batch_stop(char *name, int force,
struct list_head *result_up);
extern int lstcon_test_batch_query(char *name, int testidx,
int client, int timeout,
struct list_head *result_up);
extern int lstcon_batch_del(char *name);
extern int lstcon_batch_list(int idx, int namelen, char *name_up);
extern int lstcon_batch_info(char *name, lstcon_test_batch_ent_t *ent_up,
int server, int testidx, int *index_p,
int *ndent_p, lstcon_node_ent_t *dents_up);
extern int lstcon_group_stat(char *grp_name, int timeout,
struct list_head *result_up);
extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
int timeout, struct list_head *result_up);
extern int lstcon_test_add(char *name, int type, int loop, int concur,
int dist, int span, char *src_name, char * dst_name,
void *param, int paramlen, int *retp,
struct list_head *result_up);
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,169 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#define DEBUG_SUBSYSTEM S_LNET
#include "selftest.h"
enum {
LST_INIT_NONE = 0,
LST_INIT_WI_SERIAL,
LST_INIT_WI_TEST,
LST_INIT_RPC,
LST_INIT_FW,
LST_INIT_CONSOLE
};
extern int lstcon_console_init(void);
extern int lstcon_console_fini(void);
static int lst_init_step = LST_INIT_NONE;
struct cfs_wi_sched *lst_sched_serial;
struct cfs_wi_sched **lst_sched_test;
void
lnet_selftest_fini(void)
{
int i;
switch (lst_init_step) {
case LST_INIT_CONSOLE:
lstcon_console_fini();
case LST_INIT_FW:
sfw_shutdown();
case LST_INIT_RPC:
srpc_shutdown();
case LST_INIT_WI_TEST:
for (i = 0;
i < cfs_cpt_number(lnet_cpt_table()); i++) {
if (lst_sched_test[i] == NULL)
continue;
cfs_wi_sched_destroy(lst_sched_test[i]);
}
LIBCFS_FREE(lst_sched_test,
sizeof(lst_sched_test[0]) *
cfs_cpt_number(lnet_cpt_table()));
lst_sched_test = NULL;
case LST_INIT_WI_SERIAL:
cfs_wi_sched_destroy(lst_sched_serial);
lst_sched_serial = NULL;
case LST_INIT_NONE:
break;
default:
LBUG();
}
return;
}
void
lnet_selftest_structure_assertion(void)
{
CLASSERT(sizeof(srpc_msg_t) == 160);
CLASSERT(sizeof(srpc_test_reqst_t) == 70);
CLASSERT(offsetof(srpc_msg_t, msg_body.tes_reqst.tsr_concur) == 72);
CLASSERT(offsetof(srpc_msg_t, msg_body.tes_reqst.tsr_ndest) == 78);
CLASSERT(sizeof(srpc_stat_reply_t) == 136);
CLASSERT(sizeof(srpc_stat_reqst_t) == 28);
}
int
lnet_selftest_init(void)
{
int nscheds;
int rc;
int i;
rc = cfs_wi_sched_create("lst_s", lnet_cpt_table(), CFS_CPT_ANY,
1, &lst_sched_serial);
if (rc != 0) {
CERROR("Failed to create serial WI scheduler for LST\n");
return rc;
}
lst_init_step = LST_INIT_WI_SERIAL;
nscheds = cfs_cpt_number(lnet_cpt_table());
LIBCFS_ALLOC(lst_sched_test, sizeof(lst_sched_test[0]) * nscheds);
if (lst_sched_test == NULL)
goto error;
lst_init_step = LST_INIT_WI_TEST;
for (i = 0; i < nscheds; i++) {
int nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
/* reserve at least one CPU for LND */
nthrs = max(nthrs - 1, 1);
rc = cfs_wi_sched_create("lst_t", lnet_cpt_table(), i,
nthrs, &lst_sched_test[i]);
if (rc != 0) {
CERROR("Failed to create CPT affinity WI scheduler "
"%d for LST\n", i);
goto error;
}
}
rc = srpc_startup();
if (rc != 0) {
CERROR("LST can't startup rpc\n");
goto error;
}
lst_init_step = LST_INIT_RPC;
rc = sfw_startup();
if (rc != 0) {
CERROR("LST can't startup framework\n");
goto error;
}
lst_init_step = LST_INIT_FW;
rc = lstcon_console_init();
if (rc != 0) {
CERROR("LST can't startup console\n");
goto error;
}
lst_init_step = LST_INIT_CONSOLE;
return 0;
error:
lnet_selftest_fini();
return rc;
}
MODULE_DESCRIPTION("LNet Selftest");
MODULE_LICENSE("GPL");
cfs_module(lnet, "0.9.0", lnet_selftest_init, lnet_selftest_fini);

View file

@ -0,0 +1,229 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/conctl.c
*
* Test client & Server
*
* Author: Liang Zhen <liangzhen@clusterfs.com>
*/
#include "selftest.h"
#define LST_PING_TEST_MAGIC 0xbabeface
int ping_srv_workitems = SFW_TEST_WI_MAX;
CFS_MODULE_PARM(ping_srv_workitems, "i", int, 0644, "# PING server workitems");
typedef struct {
spinlock_t pnd_lock; /* serialize */
int pnd_counter; /* sequence counter */
} lst_ping_data_t;
static lst_ping_data_t lst_ping_data;
static int
ping_client_init(sfw_test_instance_t *tsi)
{
sfw_session_t *sn = tsi->tsi_batch->bat_session;
LASSERT(tsi->tsi_is_client);
LASSERT(sn != NULL && (sn->sn_features & ~LST_FEATS_MASK) == 0);
spin_lock_init(&lst_ping_data.pnd_lock);
lst_ping_data.pnd_counter = 0;
return 0;
}
static void
ping_client_fini (sfw_test_instance_t *tsi)
{
sfw_session_t *sn = tsi->tsi_batch->bat_session;
int errors;
LASSERT (sn != NULL);
LASSERT (tsi->tsi_is_client);
errors = atomic_read(&sn->sn_ping_errors);
if (errors)
CWARN ("%d pings have failed.\n", errors);
else
CDEBUG (D_NET, "Ping test finished OK.\n");
}
static int
ping_client_prep_rpc(sfw_test_unit_t *tsu,
lnet_process_id_t dest, srpc_client_rpc_t **rpc)
{
srpc_ping_reqst_t *req;
sfw_test_instance_t *tsi = tsu->tsu_instance;
sfw_session_t *sn = tsi->tsi_batch->bat_session;
struct timeval tv;
int rc;
LASSERT(sn != NULL);
LASSERT((sn->sn_features & ~LST_FEATS_MASK) == 0);
rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc);
if (rc != 0)
return rc;
req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst;
req->pnr_magic = LST_PING_TEST_MAGIC;
spin_lock(&lst_ping_data.pnd_lock);
req->pnr_seq = lst_ping_data.pnd_counter++;
spin_unlock(&lst_ping_data.pnd_lock);
cfs_fs_timeval(&tv);
req->pnr_time_sec = tv.tv_sec;
req->pnr_time_usec = tv.tv_usec;
return rc;
}
static void
ping_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
{
sfw_test_instance_t *tsi = tsu->tsu_instance;
sfw_session_t *sn = tsi->tsi_batch->bat_session;
srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
struct timeval tv;
LASSERT (sn != NULL);
if (rpc->crpc_status != 0) {
if (!tsi->tsi_stopping) /* rpc could have been aborted */
atomic_inc(&sn->sn_ping_errors);
CERROR ("Unable to ping %s (%d): %d\n",
libcfs_id2str(rpc->crpc_dest),
reqst->pnr_seq, rpc->crpc_status);
return;
}
if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) {
__swab32s(&reply->pnr_seq);
__swab32s(&reply->pnr_magic);
__swab32s(&reply->pnr_status);
}
if (reply->pnr_magic != LST_PING_TEST_MAGIC) {
rpc->crpc_status = -EBADMSG;
atomic_inc(&sn->sn_ping_errors);
CERROR ("Bad magic %u from %s, %u expected.\n",
reply->pnr_magic, libcfs_id2str(rpc->crpc_dest),
LST_PING_TEST_MAGIC);
return;
}
if (reply->pnr_seq != reqst->pnr_seq) {
rpc->crpc_status = -EBADMSG;
atomic_inc(&sn->sn_ping_errors);
CERROR ("Bad seq %u from %s, %u expected.\n",
reply->pnr_seq, libcfs_id2str(rpc->crpc_dest),
reqst->pnr_seq);
return;
}
cfs_fs_timeval(&tv);
CDEBUG (D_NET, "%d reply in %u usec\n", reply->pnr_seq,
(unsigned)((tv.tv_sec - (unsigned)reqst->pnr_time_sec) * 1000000
+ (tv.tv_usec - reqst->pnr_time_usec)));
return;
}
static int
ping_server_handle(struct srpc_server_rpc *rpc)
{
struct srpc_service *sv = rpc->srpc_scd->scd_svc;
srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
srpc_msg_t *replymsg = &rpc->srpc_replymsg;
srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst;
srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
LASSERT (sv->sv_id == SRPC_SERVICE_PING);
if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) {
LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC));
__swab32s(&req->pnr_seq);
__swab32s(&req->pnr_magic);
__swab64s(&req->pnr_time_sec);
__swab64s(&req->pnr_time_usec);
}
LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id));
if (req->pnr_magic != LST_PING_TEST_MAGIC) {
CERROR ("Unexpect magic %08x from %s\n",
req->pnr_magic, libcfs_id2str(rpc->srpc_peer));
return -EINVAL;
}
rep->pnr_seq = req->pnr_seq;
rep->pnr_magic = LST_PING_TEST_MAGIC;
if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) {
replymsg->msg_ses_feats = LST_FEATS_MASK;
rep->pnr_status = EPROTO;
return 0;
}
replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
CDEBUG(D_NET, "Get ping %d from %s\n",
req->pnr_seq, libcfs_id2str(rpc->srpc_peer));
return 0;
}
sfw_test_client_ops_t ping_test_client;
void ping_init_test_client(void)
{
ping_test_client.tso_init = ping_client_init;
ping_test_client.tso_fini = ping_client_fini;
ping_test_client.tso_prep_rpc = ping_client_prep_rpc;
ping_test_client.tso_done_rpc = ping_client_done_rpc;
}
srpc_service_t ping_test_service;
void ping_init_test_service(void)
{
ping_test_service.sv_id = SRPC_SERVICE_PING;
ping_test_service.sv_name = "ping_test";
ping_test_service.sv_handler = ping_server_handle;
ping_test_service.sv_wi_total = ping_srv_workitems;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,302 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
#ifndef __SELFTEST_RPC_H__
#define __SELFTEST_RPC_H__
#include <linux/lnet/lnetst.h>
/*
* LST wired structures
*
* XXX: *REPLY == *REQST + 1
*/
typedef enum {
SRPC_MSG_MKSN_REQST = 0,
SRPC_MSG_MKSN_REPLY = 1,
SRPC_MSG_RMSN_REQST = 2,
SRPC_MSG_RMSN_REPLY = 3,
SRPC_MSG_BATCH_REQST = 4,
SRPC_MSG_BATCH_REPLY = 5,
SRPC_MSG_STAT_REQST = 6,
SRPC_MSG_STAT_REPLY = 7,
SRPC_MSG_TEST_REQST = 8,
SRPC_MSG_TEST_REPLY = 9,
SRPC_MSG_DEBUG_REQST = 10,
SRPC_MSG_DEBUG_REPLY = 11,
SRPC_MSG_BRW_REQST = 12,
SRPC_MSG_BRW_REPLY = 13,
SRPC_MSG_PING_REQST = 14,
SRPC_MSG_PING_REPLY = 15,
SRPC_MSG_JOIN_REQST = 16,
SRPC_MSG_JOIN_REPLY = 17,
} srpc_msg_type_t;
/* CAVEAT EMPTOR:
* All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
* and 2nd field matchbits of bulk buffer if any.
*
* All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
* session id if needed.
*/
typedef struct {
__u64 rpyid; /* reply buffer matchbits */
__u64 bulkid; /* bulk buffer matchbits */
} WIRE_ATTR srpc_generic_reqst_t;
typedef struct {
__u32 status;
lst_sid_t sid;
} WIRE_ATTR srpc_generic_reply_t;
/* FRAMEWORK RPCs */
typedef struct {
__u64 mksn_rpyid; /* reply buffer matchbits */
lst_sid_t mksn_sid; /* session id */
__u32 mksn_force; /* use brute force */
char mksn_name[LST_NAME_SIZE];
} WIRE_ATTR srpc_mksn_reqst_t; /* make session request */
typedef struct {
__u32 mksn_status; /* session status */
lst_sid_t mksn_sid; /* session id */
__u32 mksn_timeout; /* session timeout */
char mksn_name[LST_NAME_SIZE];
} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */
typedef struct {
__u64 rmsn_rpyid; /* reply buffer matchbits */
lst_sid_t rmsn_sid; /* session id */
} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */
typedef struct {
__u32 rmsn_status;
lst_sid_t rmsn_sid; /* session id */
} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */
typedef struct {
__u64 join_rpyid; /* reply buffer matchbits */
lst_sid_t join_sid; /* session id to join */
char join_group[LST_NAME_SIZE]; /* group name */
} WIRE_ATTR srpc_join_reqst_t;
typedef struct {
__u32 join_status; /* returned status */
lst_sid_t join_sid; /* session id */
__u32 join_timeout; /* # seconds' inactivity to expire */
char join_session[LST_NAME_SIZE]; /* session name */
} WIRE_ATTR srpc_join_reply_t;
typedef struct {
__u64 dbg_rpyid; /* reply buffer matchbits */
lst_sid_t dbg_sid; /* session id */
__u32 dbg_flags; /* bitmap of debug */
} WIRE_ATTR srpc_debug_reqst_t;
typedef struct {
__u32 dbg_status; /* returned code */
lst_sid_t dbg_sid; /* session id */
__u32 dbg_timeout; /* session timeout */
__u32 dbg_nbatch; /* # of batches in the node */
char dbg_name[LST_NAME_SIZE]; /* session name */
} WIRE_ATTR srpc_debug_reply_t;
#define SRPC_BATCH_OPC_RUN 1
#define SRPC_BATCH_OPC_STOP 2
#define SRPC_BATCH_OPC_QUERY 3
typedef struct {
__u64 bar_rpyid; /* reply buffer matchbits */
lst_sid_t bar_sid; /* session id */
lst_bid_t bar_bid; /* batch id */
__u32 bar_opc; /* create/start/stop batch */
__u32 bar_testidx; /* index of test */
__u32 bar_arg; /* parameters */
} WIRE_ATTR srpc_batch_reqst_t;
typedef struct {
__u32 bar_status; /* status of request */
lst_sid_t bar_sid; /* session id */
__u32 bar_active; /* # of active tests in batch/test */
__u32 bar_time; /* remained time */
} WIRE_ATTR srpc_batch_reply_t;
typedef struct {
__u64 str_rpyid; /* reply buffer matchbits */
lst_sid_t str_sid; /* session id */
__u32 str_type; /* type of stat */
} WIRE_ATTR srpc_stat_reqst_t;
typedef struct {
__u32 str_status;
lst_sid_t str_sid;
sfw_counters_t str_fw;
srpc_counters_t str_rpc;
lnet_counters_t str_lnet;
} WIRE_ATTR srpc_stat_reply_t;
typedef struct {
__u32 blk_opc; /* bulk operation code */
__u32 blk_npg; /* # of pages */
__u32 blk_flags; /* reserved flags */
} WIRE_ATTR test_bulk_req_t;
typedef struct {
/** bulk operation code */
__u16 blk_opc;
/** data check flags */
__u16 blk_flags;
/** data length */
__u32 blk_len;
/** reserved: offset */
__u32 blk_offset;
} WIRE_ATTR test_bulk_req_v1_t;
typedef struct {
__u32 png_size; /* size of ping message */
__u32 png_flags; /* reserved flags */
} WIRE_ATTR test_ping_req_t;
typedef struct {
__u64 tsr_rpyid; /* reply buffer matchbits */
__u64 tsr_bulkid; /* bulk buffer matchbits */
lst_sid_t tsr_sid; /* session id */
lst_bid_t tsr_bid; /* batch id */
__u32 tsr_service; /* test type: bulk|ping|... */
/* test client loop count or # server buffers needed */
__u32 tsr_loop;
__u32 tsr_concur; /* concurrency of test */
__u8 tsr_is_client; /* is test client or not */
__u8 tsr_stop_onerr; /* stop on error */
__u32 tsr_ndest; /* # of dest nodes */
union {
test_ping_req_t ping;
test_bulk_req_t bulk_v0;
test_bulk_req_v1_t bulk_v1;
} tsr_u;
} WIRE_ATTR srpc_test_reqst_t;
typedef struct {
__u32 tsr_status; /* returned code */
lst_sid_t tsr_sid;
} WIRE_ATTR srpc_test_reply_t;
/* TEST RPCs */
typedef struct {
__u64 pnr_rpyid;
__u32 pnr_magic;
__u32 pnr_seq;
__u64 pnr_time_sec;
__u64 pnr_time_usec;
} WIRE_ATTR srpc_ping_reqst_t;
typedef struct {
__u32 pnr_status;
__u32 pnr_magic;
__u32 pnr_seq;
} WIRE_ATTR srpc_ping_reply_t;
typedef struct {
__u64 brw_rpyid; /* reply buffer matchbits */
__u64 brw_bulkid; /* bulk buffer matchbits */
__u32 brw_rw; /* read or write */
__u32 brw_len; /* bulk data len */
__u32 brw_flags; /* bulk data patterns */
} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */
typedef struct {
__u32 brw_status;
} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */
#define SRPC_MSG_MAGIC 0xeeb0f00d
#define SRPC_MSG_VERSION 1
typedef struct srpc_msg {
/** magic number */
__u32 msg_magic;
/** message version number */
__u32 msg_version;
/** type of message body: srpc_msg_type_t */
__u32 msg_type;
__u32 msg_reserved0;
__u32 msg_reserved1;
/** test session features */
__u32 msg_ses_feats;
union {
srpc_generic_reqst_t reqst;
srpc_generic_reply_t reply;
srpc_mksn_reqst_t mksn_reqst;
srpc_mksn_reply_t mksn_reply;
srpc_rmsn_reqst_t rmsn_reqst;
srpc_rmsn_reply_t rmsn_reply;
srpc_debug_reqst_t dbg_reqst;
srpc_debug_reply_t dbg_reply;
srpc_batch_reqst_t bat_reqst;
srpc_batch_reply_t bat_reply;
srpc_stat_reqst_t stat_reqst;
srpc_stat_reply_t stat_reply;
srpc_test_reqst_t tes_reqst;
srpc_test_reply_t tes_reply;
srpc_join_reqst_t join_reqst;
srpc_join_reply_t join_reply;
srpc_ping_reqst_t ping_reqst;
srpc_ping_reply_t ping_reply;
srpc_brw_reqst_t brw_reqst;
srpc_brw_reply_t brw_reply;
} msg_body;
} WIRE_ATTR srpc_msg_t;
static inline void
srpc_unpack_msg_hdr(srpc_msg_t *msg)
{
if (msg->msg_magic == SRPC_MSG_MAGIC)
return; /* no flipping needed */
/* We do not swap the magic number here as it is needed to
determine whether the body needs to be swapped. */
/* __swab32s(&msg->msg_magic); */
__swab32s(&msg->msg_type);
__swab32s(&msg->msg_version);
__swab32s(&msg->msg_ses_feats);
__swab32s(&msg->msg_reserved0);
__swab32s(&msg->msg_reserved1);
}
#endif /* __SELFTEST_RPC_H__ */

View file

@ -0,0 +1,611 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
* copy of GPLv2].
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/selftest.h
*
* Author: Isaac Huang <isaac@clusterfs.com>
*/
#ifndef __SELFTEST_SELFTEST_H__
#define __SELFTEST_SELFTEST_H__
#define LNET_ONLY
#include <linux/libcfs/libcfs.h>
#include <linux/lnet/lnet.h>
#include <linux/lnet/lib-lnet.h>
#include <linux/lnet/lib-types.h>
#include <linux/lnet/lnetst.h>
#include "rpc.h"
#include "timer.h"
#ifndef MADE_WITHOUT_COMPROMISE
#define MADE_WITHOUT_COMPROMISE
#endif
#define SWI_STATE_NEWBORN 0
#define SWI_STATE_REPLY_SUBMITTED 1
#define SWI_STATE_REPLY_SENT 2
#define SWI_STATE_REQUEST_SUBMITTED 3
#define SWI_STATE_REQUEST_SENT 4
#define SWI_STATE_REPLY_RECEIVED 5
#define SWI_STATE_BULK_STARTED 6
#define SWI_STATE_DONE 10
/* forward refs */
struct srpc_service;
struct srpc_service_cd;
struct sfw_test_unit;
struct sfw_test_instance;
/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
* services, e.g. create/modify session.
*/
#define SRPC_SERVICE_DEBUG 0
#define SRPC_SERVICE_MAKE_SESSION 1
#define SRPC_SERVICE_REMOVE_SESSION 2
#define SRPC_SERVICE_BATCH 3
#define SRPC_SERVICE_TEST 4
#define SRPC_SERVICE_QUERY_STAT 5
#define SRPC_SERVICE_JOIN 6
#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10
/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
#define SRPC_SERVICE_BRW 11
#define SRPC_SERVICE_PING 12
#define SRPC_SERVICE_MAX_ID 12
#define SRPC_REQUEST_PORTAL 50
/* a lazy portal for framework RPC requests */
#define SRPC_FRAMEWORK_REQUEST_PORTAL 51
/* all reply/bulk RDMAs go to this portal */
#define SRPC_RDMA_PORTAL 52
static inline srpc_msg_type_t
srpc_service2request (int service)
{
switch (service) {
default:
LBUG ();
case SRPC_SERVICE_DEBUG:
return SRPC_MSG_DEBUG_REQST;
case SRPC_SERVICE_MAKE_SESSION:
return SRPC_MSG_MKSN_REQST;
case SRPC_SERVICE_REMOVE_SESSION:
return SRPC_MSG_RMSN_REQST;
case SRPC_SERVICE_BATCH:
return SRPC_MSG_BATCH_REQST;
case SRPC_SERVICE_TEST:
return SRPC_MSG_TEST_REQST;
case SRPC_SERVICE_QUERY_STAT:
return SRPC_MSG_STAT_REQST;
case SRPC_SERVICE_BRW:
return SRPC_MSG_BRW_REQST;
case SRPC_SERVICE_PING:
return SRPC_MSG_PING_REQST;
case SRPC_SERVICE_JOIN:
return SRPC_MSG_JOIN_REQST;
}
}
static inline srpc_msg_type_t
srpc_service2reply (int service)
{
return srpc_service2request(service) + 1;
}
typedef enum {
SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source) received */
SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */
SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */
SRPC_REPLY_RCVD = 4, /* incoming reply received */
SRPC_REPLY_SENT = 5, /* outgoing reply sent */
SRPC_REQUEST_RCVD = 6, /* incoming request received */
SRPC_REQUEST_SENT = 7, /* outgoing request sent */
} srpc_event_type_t;
/* RPC event */
typedef struct {
srpc_event_type_t ev_type; /* what's up */
lnet_event_kind_t ev_lnet; /* LNet event type */
int ev_fired; /* LNet event fired? */
int ev_status; /* LNet event status */
void *ev_data; /* owning server/client RPC */
} srpc_event_t;
typedef struct {
int bk_len; /* len of bulk data */
lnet_handle_md_t bk_mdh;
int bk_sink; /* sink/source */
int bk_niov; /* # iov in bk_iovs */
lnet_kiov_t bk_iovs[0];
} srpc_bulk_t; /* bulk descriptor */
/* message buffer descriptor */
typedef struct srpc_buffer {
struct list_head buf_list; /* chain on srpc_service::*_msgq */
srpc_msg_t buf_msg;
lnet_handle_md_t buf_mdh;
lnet_nid_t buf_self;
lnet_process_id_t buf_peer;
} srpc_buffer_t;
struct swi_workitem;
typedef int (*swi_action_t) (struct swi_workitem *);
typedef struct swi_workitem {
struct cfs_wi_sched *swi_sched;
cfs_workitem_t swi_workitem;
swi_action_t swi_action;
int swi_state;
} swi_workitem_t;
/* server-side state of a RPC */
typedef struct srpc_server_rpc {
/* chain on srpc_service::*_rpcq */
struct list_head srpc_list;
struct srpc_service_cd *srpc_scd;
swi_workitem_t srpc_wi;
srpc_event_t srpc_ev; /* bulk/reply event */
lnet_nid_t srpc_self;
lnet_process_id_t srpc_peer;
srpc_msg_t srpc_replymsg;
lnet_handle_md_t srpc_replymdh;
srpc_buffer_t *srpc_reqstbuf;
srpc_bulk_t *srpc_bulk;
unsigned int srpc_aborted; /* being given up */
int srpc_status;
void (*srpc_done)(struct srpc_server_rpc *);
} srpc_server_rpc_t;
/* client-side state of a RPC */
typedef struct srpc_client_rpc {
struct list_head crpc_list; /* chain on user's lists */
spinlock_t crpc_lock; /* serialize */
int crpc_service;
atomic_t crpc_refcount;
int crpc_timeout; /* # seconds to wait for reply */
stt_timer_t crpc_timer;
swi_workitem_t crpc_wi;
lnet_process_id_t crpc_dest;
void (*crpc_done)(struct srpc_client_rpc *);
void (*crpc_fini)(struct srpc_client_rpc *);
int crpc_status; /* completion status */
void *crpc_priv; /* caller data */
/* state flags */
unsigned int crpc_aborted:1; /* being given up */
unsigned int crpc_closed:1; /* completed */
/* RPC events */
srpc_event_t crpc_bulkev; /* bulk event */
srpc_event_t crpc_reqstev; /* request event */
srpc_event_t crpc_replyev; /* reply event */
/* bulk, request(reqst), and reply exchanged on wire */
srpc_msg_t crpc_reqstmsg;
srpc_msg_t crpc_replymsg;
lnet_handle_md_t crpc_reqstmdh;
lnet_handle_md_t crpc_replymdh;
srpc_bulk_t crpc_bulk;
} srpc_client_rpc_t;
#define srpc_client_rpc_size(rpc) \
offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
#define srpc_client_rpc_addref(rpc) \
do { \
CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \
(rpc), libcfs_id2str((rpc)->crpc_dest), \
atomic_read(&(rpc)->crpc_refcount)); \
LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
atomic_inc(&(rpc)->crpc_refcount); \
} while (0)
#define srpc_client_rpc_decref(rpc) \
do { \
CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \
(rpc), libcfs_id2str((rpc)->crpc_dest), \
atomic_read(&(rpc)->crpc_refcount)); \
LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \
srpc_destroy_client_rpc(rpc); \
} while (0)
#define srpc_event_pending(rpc) ((rpc)->crpc_bulkev.ev_fired == 0 || \
(rpc)->crpc_reqstev.ev_fired == 0 || \
(rpc)->crpc_replyev.ev_fired == 0)
/* CPU partition data of srpc service */
struct srpc_service_cd {
/** serialize */
spinlock_t scd_lock;
/** backref to service */
struct srpc_service *scd_svc;
/** event buffer */
srpc_event_t scd_ev;
/** free RPC descriptors */
struct list_head scd_rpc_free;
/** in-flight RPCs */
struct list_head scd_rpc_active;
/** workitem for posting buffer */
swi_workitem_t scd_buf_wi;
/** CPT id */
int scd_cpt;
/** error code for scd_buf_wi */
int scd_buf_err;
/** timestamp for scd_buf_err */
unsigned long scd_buf_err_stamp;
/** total # request buffers */
int scd_buf_total;
/** # posted request buffers */
int scd_buf_nposted;
/** in progress of buffer posting */
int scd_buf_posting;
/** allocate more buffers if scd_buf_nposted < scd_buf_low */
int scd_buf_low;
/** increase/decrease some buffers */
int scd_buf_adjust;
/** posted message buffers */
struct list_head scd_buf_posted;
/** blocked for RPC descriptor */
struct list_head scd_buf_blocked;
};
/* number of server workitems (mini-thread) for testing service */
#define SFW_TEST_WI_MIN 256
#define SFW_TEST_WI_MAX 2048
/* extra buffers for tolerating buggy peers, or unbalanced number
* of peers between partitions */
#define SFW_TEST_WI_EXTRA 64
/* number of server workitems (mini-thread) for framework service */
#define SFW_FRWK_WI_MIN 16
#define SFW_FRWK_WI_MAX 256
typedef struct srpc_service {
int sv_id; /* service id */
const char *sv_name; /* human readable name */
int sv_wi_total; /* total server workitems */
int sv_shuttingdown;
int sv_ncpts;
/* percpt data for srpc_service */
struct srpc_service_cd **sv_cpt_data;
/* Service callbacks:
* - sv_handler: process incoming RPC request
* - sv_bulk_ready: notify bulk data
*/
int (*sv_handler) (srpc_server_rpc_t *);
int (*sv_bulk_ready) (srpc_server_rpc_t *, int);
} srpc_service_t;
typedef struct {
struct list_head sn_list; /* chain on fw_zombie_sessions */
lst_sid_t sn_id; /* unique identifier */
unsigned int sn_timeout; /* # seconds' inactivity to expire */
int sn_timer_active;
unsigned int sn_features;
stt_timer_t sn_timer;
struct list_head sn_batches; /* list of batches */
char sn_name[LST_NAME_SIZE];
atomic_t sn_refcount;
atomic_t sn_brw_errors;
atomic_t sn_ping_errors;
cfs_time_t sn_started;
} sfw_session_t;
#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \
(sid0).ses_stamp == (sid1).ses_stamp)
typedef struct {
struct list_head bat_list; /* chain on sn_batches */
lst_bid_t bat_id; /* batch id */
int bat_error; /* error code of batch */
sfw_session_t *bat_session; /* batch's session */
atomic_t bat_nactive; /* # of active tests */
struct list_head bat_tests; /* test instances */
} sfw_batch_t;
typedef struct {
int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
lnet_process_id_t dest,
srpc_client_rpc_t **rpc); /* prep a tests rpc */
void (*tso_done_rpc)(struct sfw_test_unit *tsu,
srpc_client_rpc_t *rpc); /* done a test rpc */
} sfw_test_client_ops_t;
typedef struct sfw_test_instance {
struct list_head tsi_list; /* chain on batch */
int tsi_service; /* test type */
sfw_batch_t *tsi_batch; /* batch */
sfw_test_client_ops_t *tsi_ops; /* test client operations */
/* public parameter for all test units */
unsigned int tsi_is_client:1; /* is test client */
unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */
int tsi_concur; /* concurrency */
int tsi_loop; /* loop count */
/* status of test instance */
spinlock_t tsi_lock; /* serialize */
unsigned int tsi_stopping:1; /* test is stopping */
atomic_t tsi_nactive; /* # of active test unit */
struct list_head tsi_units; /* test units */
struct list_head tsi_free_rpcs; /* free rpcs */
struct list_head tsi_active_rpcs; /* active rpcs */
union {
test_ping_req_t ping; /* ping parameter */
test_bulk_req_t bulk_v0; /* bulk parameter */
test_bulk_req_v1_t bulk_v1; /* bulk v1 parameter */
} tsi_u;
} sfw_test_instance_t;
/* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at
* the end of pages are not used */
#define SFW_MAX_CONCUR LST_MAX_CONCUR
#define SFW_ID_PER_PAGE (PAGE_CACHE_SIZE / sizeof(lnet_process_id_packed_t))
#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
typedef struct sfw_test_unit {
struct list_head tsu_list; /* chain on lst_test_instance */
lnet_process_id_t tsu_dest; /* id of dest node */
int tsu_loop; /* loop count of the test */
sfw_test_instance_t *tsu_instance; /* pointer to test instance */
void *tsu_private; /* private data */
swi_workitem_t tsu_worker; /* workitem of the test unit */
} sfw_test_unit_t;
typedef struct sfw_test_case {
struct list_head tsc_list; /* chain on fw_tests */
srpc_service_t *tsc_srv_service; /* test service */
sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */
} sfw_test_case_t;
srpc_client_rpc_t *
sfw_create_rpc(lnet_process_id_t peer, int service,
unsigned features, int nbulkiov, int bulklen,
void (*done) (srpc_client_rpc_t *), void *priv);
int sfw_create_test_rpc(sfw_test_unit_t *tsu,
lnet_process_id_t peer, unsigned features,
int nblk, int blklen, srpc_client_rpc_t **rpc);
void sfw_abort_rpc(srpc_client_rpc_t *rpc);
void sfw_post_rpc(srpc_client_rpc_t *rpc);
void sfw_client_rpc_done(srpc_client_rpc_t *rpc);
void sfw_unpack_message(srpc_msg_t *msg);
void sfw_free_pages(srpc_server_rpc_t *rpc);
void sfw_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i);
int sfw_alloc_pages(srpc_server_rpc_t *rpc, int cpt, int npages, int len,
int sink);
int sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply);
srpc_client_rpc_t *
srpc_create_client_rpc(lnet_process_id_t peer, int service,
int nbulkiov, int bulklen,
void (*rpc_done)(srpc_client_rpc_t *),
void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
void srpc_post_rpc(srpc_client_rpc_t *rpc);
void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why);
void srpc_free_bulk(srpc_bulk_t *bk);
srpc_bulk_t *srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len,
int sink);
int srpc_send_rpc(swi_workitem_t *wi);
int srpc_send_reply(srpc_server_rpc_t *rpc);
int srpc_add_service(srpc_service_t *sv);
int srpc_remove_service(srpc_service_t *sv);
void srpc_shutdown_service(srpc_service_t *sv);
void srpc_abort_service(srpc_service_t *sv);
int srpc_finish_service(srpc_service_t *sv);
int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer);
void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer);
void srpc_get_counters(srpc_counters_t *cnt);
void srpc_set_counters(const srpc_counters_t *cnt);
extern struct cfs_wi_sched *lst_sched_serial;
extern struct cfs_wi_sched **lst_sched_test;
static inline int
srpc_serv_is_framework(struct srpc_service *svc)
{
return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID;
}
static inline int
swi_wi_action(cfs_workitem_t *wi)
{
swi_workitem_t *swi = container_of(wi, swi_workitem_t, swi_workitem);
return swi->swi_action(swi);
}
static inline void
swi_init_workitem(swi_workitem_t *swi, void *data,
swi_action_t action, struct cfs_wi_sched *sched)
{
swi->swi_sched = sched;
swi->swi_action = action;
swi->swi_state = SWI_STATE_NEWBORN;
cfs_wi_init(&swi->swi_workitem, data, swi_wi_action);
}
static inline void
swi_schedule_workitem(swi_workitem_t *wi)
{
cfs_wi_schedule(wi->swi_sched, &wi->swi_workitem);
}
static inline void
swi_exit_workitem(swi_workitem_t *swi)
{
cfs_wi_exit(swi->swi_sched, &swi->swi_workitem);
}
static inline int
swi_deschedule_workitem(swi_workitem_t *swi)
{
return cfs_wi_deschedule(swi->swi_sched, &swi->swi_workitem);
}
int sfw_startup(void);
int srpc_startup(void);
void sfw_shutdown(void);
void srpc_shutdown(void);
static inline void
srpc_destroy_client_rpc (srpc_client_rpc_t *rpc)
{
LASSERT (rpc != NULL);
LASSERT (!srpc_event_pending(rpc));
LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
if (rpc->crpc_fini == NULL) {
LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
} else {
(*rpc->crpc_fini) (rpc);
}
return;
}
static inline void
srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer,
int service, int nbulkiov, int bulklen,
void (*rpc_done)(srpc_client_rpc_t *),
void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
{
LASSERT (nbulkiov <= LNET_MAX_IOV);
memset(rpc, 0, offsetof(srpc_client_rpc_t,
crpc_bulk.bk_iovs[nbulkiov]));
INIT_LIST_HEAD(&rpc->crpc_list);
swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
lst_sched_test[lnet_cpt_of_nid(peer.nid)]);
spin_lock_init(&rpc->crpc_lock);
atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
rpc->crpc_dest = peer;
rpc->crpc_priv = priv;
rpc->crpc_service = service;
rpc->crpc_bulk.bk_len = bulklen;
rpc->crpc_bulk.bk_niov = nbulkiov;
rpc->crpc_done = rpc_done;
rpc->crpc_fini = rpc_fini;
LNetInvalidateHandle(&rpc->crpc_reqstmdh);
LNetInvalidateHandle(&rpc->crpc_replymdh);
LNetInvalidateHandle(&rpc->crpc_bulk.bk_mdh);
/* no event is expected at this point */
rpc->crpc_bulkev.ev_fired =
rpc->crpc_reqstev.ev_fired =
rpc->crpc_replyev.ev_fired = 1;
rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
return;
}
static inline const char *
swi_state2str (int state)
{
#define STATE2STR(x) case x: return #x
switch(state) {
default:
LBUG();
STATE2STR(SWI_STATE_NEWBORN);
STATE2STR(SWI_STATE_REPLY_SUBMITTED);
STATE2STR(SWI_STATE_REPLY_SENT);
STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
STATE2STR(SWI_STATE_REQUEST_SENT);
STATE2STR(SWI_STATE_REPLY_RECEIVED);
STATE2STR(SWI_STATE_BULK_STARTED);
STATE2STR(SWI_STATE_DONE);
}
#undef STATE2STR
}
#define UNUSED(x) ( (void)(x) )
#define selftest_wait_events() cfs_pause(cfs_time_seconds(1) / 10)
#define lst_wait_until(cond, lock, fmt, ...) \
do { \
int __I = 2; \
while (!(cond)) { \
CDEBUG(IS_PO2(++__I) ? D_WARNING : D_NET, \
fmt, ## __VA_ARGS__); \
spin_unlock(&(lock)); \
\
selftest_wait_events(); \
\
spin_lock(&(lock)); \
} \
} while (0)
static inline void
srpc_wait_service_shutdown(srpc_service_t *sv)
{
int i = 2;
LASSERT(sv->sv_shuttingdown);
while (srpc_finish_service(sv) == 0) {
i++;
CDEBUG (((i & -i) == i) ? D_WARNING : D_NET,
"Waiting for %s service to shutdown...\n",
sv->sv_name);
selftest_wait_events();
}
}
#endif /* __SELFTEST_SELFTEST_H__ */

View file

@ -0,0 +1,253 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/timer.c
*
* Author: Isaac Huang <isaac@clusterfs.com>
*/
#define DEBUG_SUBSYSTEM S_LNET
#include "selftest.h"
/*
* Timers are implemented as a sorted queue of expiry times. The queue
* is slotted, with each slot holding timers which expire in a
* 2**STTIMER_MINPOLL (8) second period. The timers in each slot are
* sorted by increasing expiry time. The number of slots is 2**7 (128),
* to cover a time period of 1024 seconds into the future before wrapping.
*/
#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */
#define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL)
#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1))
#define STTIMER_NSLOTS (1 << 7)
#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \
(STTIMER_NSLOTS - 1))])
struct st_timer_data {
spinlock_t stt_lock;
/* start time of the slot processed previously */
cfs_time_t stt_prev_slot;
struct list_head stt_hash[STTIMER_NSLOTS];
int stt_shuttingdown;
wait_queue_head_t stt_waitq;
int stt_nthreads;
} stt_data;
void
stt_add_timer(stt_timer_t *timer)
{
struct list_head *pos;
spin_lock(&stt_data.stt_lock);
LASSERT (stt_data.stt_nthreads > 0);
LASSERT (!stt_data.stt_shuttingdown);
LASSERT (timer->stt_func != NULL);
LASSERT (list_empty(&timer->stt_list));
LASSERT (cfs_time_after(timer->stt_expires, cfs_time_current_sec()));
/* a simple insertion sort */
list_for_each_prev (pos, STTIMER_SLOT(timer->stt_expires)) {
stt_timer_t *old = list_entry(pos, stt_timer_t, stt_list);
if (cfs_time_aftereq(timer->stt_expires, old->stt_expires))
break;
}
list_add(&timer->stt_list, pos);
spin_unlock(&stt_data.stt_lock);
}
/*
* The function returns whether it has deactivated a pending timer or not.
* (ie. del_timer() of an inactive timer returns 0, del_timer() of an
* active timer returns 1.)
*
* CAVEAT EMPTOR:
* When 0 is returned, it is possible that timer->stt_func _is_ running on
* another CPU.
*/
int
stt_del_timer (stt_timer_t *timer)
{
int ret = 0;
spin_lock(&stt_data.stt_lock);
LASSERT (stt_data.stt_nthreads > 0);
LASSERT (!stt_data.stt_shuttingdown);
if (!list_empty(&timer->stt_list)) {
ret = 1;
list_del_init(&timer->stt_list);
}
spin_unlock(&stt_data.stt_lock);
return ret;
}
/* called with stt_data.stt_lock held */
int
stt_expire_list (struct list_head *slot, cfs_time_t now)
{
int expired = 0;
stt_timer_t *timer;
while (!list_empty(slot)) {
timer = list_entry(slot->next, stt_timer_t, stt_list);
if (cfs_time_after(timer->stt_expires, now))
break;
list_del_init(&timer->stt_list);
spin_unlock(&stt_data.stt_lock);
expired++;
(*timer->stt_func) (timer->stt_data);
spin_lock(&stt_data.stt_lock);
}
return expired;
}
int
stt_check_timers (cfs_time_t *last)
{
int expired = 0;
cfs_time_t now;
cfs_time_t this_slot;
now = cfs_time_current_sec();
this_slot = now & STTIMER_SLOTTIMEMASK;
spin_lock(&stt_data.stt_lock);
while (cfs_time_aftereq(this_slot, *last)) {
expired += stt_expire_list(STTIMER_SLOT(this_slot), now);
this_slot = cfs_time_sub(this_slot, STTIMER_SLOTTIME);
}
*last = now & STTIMER_SLOTTIMEMASK;
spin_unlock(&stt_data.stt_lock);
return expired;
}
int
stt_timer_main (void *arg)
{
int rc = 0;
UNUSED(arg);
SET_BUT_UNUSED(rc);
cfs_block_allsigs();
while (!stt_data.stt_shuttingdown) {
stt_check_timers(&stt_data.stt_prev_slot);
rc = wait_event_timeout(stt_data.stt_waitq,
stt_data.stt_shuttingdown,
cfs_time_seconds(STTIMER_SLOTTIME));
}
spin_lock(&stt_data.stt_lock);
stt_data.stt_nthreads--;
spin_unlock(&stt_data.stt_lock);
return 0;
}
int
stt_start_timer_thread (void)
{
task_t *task;
LASSERT(!stt_data.stt_shuttingdown);
task = kthread_run(stt_timer_main, NULL, "st_timer");
if (IS_ERR(task))
return PTR_ERR(task);
spin_lock(&stt_data.stt_lock);
stt_data.stt_nthreads++;
spin_unlock(&stt_data.stt_lock);
return 0;
}
int
stt_startup (void)
{
int rc = 0;
int i;
stt_data.stt_shuttingdown = 0;
stt_data.stt_prev_slot = cfs_time_current_sec() & STTIMER_SLOTTIMEMASK;
spin_lock_init(&stt_data.stt_lock);
for (i = 0; i < STTIMER_NSLOTS; i++)
INIT_LIST_HEAD(&stt_data.stt_hash[i]);
stt_data.stt_nthreads = 0;
init_waitqueue_head(&stt_data.stt_waitq);
rc = stt_start_timer_thread();
if (rc != 0)
CERROR ("Can't spawn timer thread: %d\n", rc);
return rc;
}
void
stt_shutdown (void)
{
int i;
spin_lock(&stt_data.stt_lock);
for (i = 0; i < STTIMER_NSLOTS; i++)
LASSERT (list_empty(&stt_data.stt_hash[i]));
stt_data.stt_shuttingdown = 1;
wake_up(&stt_data.stt_waitq);
lst_wait_until(stt_data.stt_nthreads == 0, stt_data.stt_lock,
"waiting for %d threads to terminate\n",
stt_data.stt_nthreads);
spin_unlock(&stt_data.stt_lock);
}

View file

@ -0,0 +1,53 @@
/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*
* GPL HEADER END
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/selftest/timer.h
*
* Author: Isaac Huang <isaac@clusterfs.com>
*/
#ifndef __SELFTEST_TIMER_H__
#define __SELFTEST_TIMER_H__
typedef struct {
struct list_head stt_list;
cfs_time_t stt_expires;
void (*stt_func) (void *);
void *stt_data;
} stt_timer_t;
void stt_add_timer (stt_timer_t *timer);
int stt_del_timer (stt_timer_t *timer);
int stt_startup (void);
void stt_shutdown (void);
#endif /* __SELFTEST_TIMER_H__ */

View file

@ -0,0 +1,33 @@
config LUSTRE_FS
tristate "Lustre file system client support"
depends on STAGING && INET && BROKEN
select LNET
help
This option enables Lustre file system client support. Choose Y
here if you want to access a Lustre file system cluster. To compile
this file system support as a module, choose M here: the module will
be called lustre.
To mount Lustre file systems , you also need to install the user space
mount.lustre and other user space commands which can be found in the
lustre-client package, available from
http://downloads.whamcloud.com/public/lustre/
Lustre file system is the most popular cluster file system in high
performance computing. Source code of both kernel space and user space
Lustre components can also be found at
http://git.whamcloud.com/?p=fs/lustre-release.git;a=summary
If unsure, say N.
See also http://wiki.lustre.org/
config LUSTRE_OBD_MAX_IOCTL_BUFFER
int "Lustre obd max ioctl buffer bytes (default 8KB)"
depends on LUSTRE_FS
default 8192
help
This option defines the maximum size of buffer in bytes that user space
applications can pass to Lustre kernel module through ioctl interface.
If unsure, use default.

Some files were not shown because too many files have changed in this diff Show more