afs: Make use of the YFS service upgrade to fully support IPv6

YFS VL servers offer an upgraded Volume Location service that can return
IPv6 addresses to fileservers and volume servers in addition to IPv4
addresses using the YFSVL.GetEndpoints operation which we should use if
it's available.

To this end:

 (1) Make rxrpc_kernel_recv_data() return the call's current service ID so
     that the caller can detect service upgrade and see what the service
     was upgraded to.

 (2) When we see a VL server address we haven't seen before, send a
     VL.GetCapabilities operation to it with the service upgrade bit set.

     If we get an upgrade to the YFS VL service, change the service ID in
     the address list for that address to use the upgraded service and set
     a flag to note that this appears to be a YFS-compatible server.

 (3) If, when a server's addresses are being looked up, we note that we
     previously detected a YFS-compatible server, then send the
     YFSVL.GetEndpoints operation rather than VL.GetAddrsU.

 (4) Build a fileserver address list from the reply of YFSVL.GetEndpoints,
     including both IPv4 and IPv6 addresses.  Volume server addresses are
     discarded.

 (5) The address list is sorted by address and port now, instead of just
     address.  This allows multiple servers on the same host sitting on
     different ports.

Signed-off-by: David Howells <dhowells@redhat.com>
This commit is contained in:
David Howells 2017-11-02 15:27:51 +00:00
parent d2ddc776a4
commit bf99a53ce2
6 changed files with 428 additions and 10 deletions

View File

@ -17,9 +17,10 @@
#include "internal.h"
#include "afs_fs.h"
#define AFS_MAX_ADDRESSES \
((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / \
sizeof(struct sockaddr_rxrpc)))
//#define AFS_MAX_ADDRESSES
// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
// sizeof(struct sockaddr_rxrpc)))
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
/*
* Release an address list.
@ -230,15 +231,20 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
/*
* Merge an IPv4 entry into a fileserver address list.
*/
void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
{
struct sockaddr_in6 *a;
__be16 xport = htons(port);
int i;
for (i = 0; i < alist->nr_ipv4; i++) {
a = &alist->addrs[i].transport.sin6;
if (xdr == a->sin6_addr.s6_addr32[3])
if (xdr == a->sin6_addr.s6_addr32[3] &&
xport == a->sin6_port)
return;
if (xdr == a->sin6_addr.s6_addr32[3] &&
xport < a->sin6_port)
break;
if (xdr < a->sin6_addr.s6_addr32[3])
break;
}
@ -249,7 +255,7 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
a = &alist->addrs[i].transport.sin6;
a->sin6_port = htons(AFS_FS_PORT);
a->sin6_port = xport;
a->sin6_addr.s6_addr32[0] = 0;
a->sin6_addr.s6_addr32[1] = 0;
a->sin6_addr.s6_addr32[2] = htonl(0xffff);
@ -258,6 +264,42 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr)
alist->nr_addrs++;
}
/*
* Merge an IPv6 entry into a fileserver address list.
*/
void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
{
struct sockaddr_in6 *a;
__be16 xport = htons(port);
int i, diff;
for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
a = &alist->addrs[i].transport.sin6;
diff = memcmp(xdr, &a->sin6_addr, 16);
if (diff == 0 &&
xport == a->sin6_port)
return;
if (diff == 0 &&
xport < a->sin6_port)
break;
if (diff < 0)
break;
}
if (i < alist->nr_addrs)
memmove(alist->addrs + i + 1,
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
a = &alist->addrs[i].transport.sin6;
a->sin6_port = xport;
a->sin6_addr.s6_addr32[0] = xdr[0];
a->sin6_addr.s6_addr32[1] = xdr[1];
a->sin6_addr.s6_addr32[2] = xdr[2];
a->sin6_addr.s6_addr32[3] = xdr[3];
alist->nr_addrs++;
}
/*
* Get an address to try.
*/

View File

@ -16,6 +16,7 @@
#define AFS_VL_PORT 7003 /* volume location service port */
#define VL_SERVICE 52 /* RxRPC service ID for the Volume Location service */
#define YFS_VL_SERVICE 2503 /* Service ID for AuriStor upgraded VL service */
enum AFSVL_Operations {
VLGETENTRYBYID = 503, /* AFS Get VLDB entry by ID */
@ -24,6 +25,8 @@ enum AFSVL_Operations {
VLGETENTRYBYIDU = 526, /* AFS Get VLDB entry by ID (UUID-variant) */
VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */
VLGETADDRSU = 533, /* AFS Get addrs for fileserver */
YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */
VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */
};
enum AFSVL_Errors {
@ -57,6 +60,19 @@ enum AFSVL_Errors {
AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
};
enum {
YFS_SERVER_INDEX = 0,
YFS_SERVER_UUID = 1,
YFS_SERVER_ENDPOINT = 2,
};
enum {
YFS_ENDPOINT_IPV4 = 0,
YFS_ENDPOINT_IPV6 = 1,
};
#define YFS_MAXENDPOINTS 16
/*
* maps to "struct vldbentry" in vvl-spec.pdf
*/

View File

@ -70,6 +70,8 @@ struct afs_addr_list {
unsigned short nr_addrs;
unsigned short index; /* Address currently in use */
unsigned short nr_ipv4; /* Number of IPv4 addresses */
unsigned long probed; /* Mask of servers that have been probed */
unsigned long yfs; /* Mask of servers that are YFS */
struct sockaddr_rxrpc addrs[];
};
@ -113,7 +115,7 @@ struct afs_call {
bool async; /* T if asynchronous */
bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
u16 service_id; /* RxRPC service ID to call */
u16 service_id; /* Actual service ID (after upgrade) */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
@ -564,7 +566,8 @@ extern bool afs_iterate_addresses(struct afs_addr_cursor *);
extern int afs_end_cursor(struct afs_addr_cursor *);
extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32);
extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
/*
* cache.c
@ -846,6 +849,9 @@ extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
struct key *, const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
struct key *, const uuid_t *);
extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
struct key *, const uuid_t *);
/*
* volume.c

View File

@ -266,7 +266,10 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
return ERR_PTR(ret);
while (afs_iterate_addresses(&ac)) {
alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
if (test_bit(ac.index, &ac.alist->yfs))
alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
else
alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
switch (ac.error) {
case 0:
afs_end_cursor(&ac);

View File

@ -225,7 +225,7 @@ again:
bp = call->buffer;
for (i = 0; i < count; i++)
if (alist->nr_addrs < call->count2)
afs_merge_fs_addr4(alist, *bp++);
afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
call->count -= count;
if (call->count > 0)
@ -300,3 +300,338 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
}
/*
* Deliver reply data to an VL.GetCapabilities operation.
*/
static int afs_deliver_vl_get_capabilities(struct afs_call *call)
{
u32 count;
int ret;
_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
again:
switch (call->unmarshall) {
case 0:
call->offset = 0;
call->unmarshall++;
/* Extract the capabilities word count */
case 1:
ret = afs_extract_data(call, &call->tmp,
1 * sizeof(__be32),
true);
if (ret < 0)
return ret;
count = ntohl(call->tmp);
call->count = count;
call->count2 = count;
call->offset = 0;
call->unmarshall++;
/* Extract capabilities words */
case 2:
count = min(call->count, 16U);
ret = afs_extract_data(call, call->buffer,
count * sizeof(__be32),
call->count > 16);
if (ret < 0)
return ret;
/* TODO: Examine capabilities */
call->count -= count;
if (call->count > 0)
goto again;
call->offset = 0;
call->unmarshall++;
break;
}
call->reply[0] = (void *)(unsigned long)call->service_id;
_leave(" = 0 [done]");
return 0;
}
/*
* VL.GetCapabilities operation type
*/
static const struct afs_call_type afs_RXVLGetCapabilities = {
.name = "VL.GetCapabilities",
.deliver = afs_deliver_vl_get_capabilities,
.destructor = afs_flat_call_destructor,
};
/*
* Probe a fileserver for the capabilities that it supports. This can
* return up to 196 words.
*
* We use this to probe for service upgrade to determine what the server at the
* other end supports.
*/
int afs_vl_get_capabilities(struct afs_net *net,
struct afs_addr_cursor *ac,
struct key *key)
{
struct afs_call *call;
__be32 *bp;
_enter("");
call = afs_alloc_flat_call(net, &afs_RXVLGetCapabilities, 1 * 4, 16 * 4);
if (!call)
return -ENOMEM;
call->key = key;
call->upgrade = true; /* Let's see if this is a YFS server */
call->reply[0] = (void *)VLGETCAPABILITIES;
call->ret_reply0 = true;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(VLGETCAPABILITIES);
/* Can't take a ref on server */
return afs_make_call(ac, call, GFP_KERNEL, false);
}
/*
* Deliver reply data to a YFSVL.GetEndpoints call.
*
* GetEndpoints(IN yfsServerAttributes *attr,
* OUT opr_uuid *uuid,
* OUT afs_int32 *uniquifier,
* OUT endpoints *fsEndpoints,
* OUT endpoints *volEndpoints)
*/
static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
{
struct afs_addr_list *alist;
__be32 *bp;
u32 uniquifier, size;
int ret;
_enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
again:
switch (call->unmarshall) {
case 0:
call->offset = 0;
call->unmarshall = 1;
/* Extract the returned uuid, uniquifier, fsEndpoints count and
* either the first fsEndpoint type or the volEndpoints
* count if there are no fsEndpoints. */
case 1:
ret = afs_extract_data(call, call->buffer,
sizeof(uuid_t) +
3 * sizeof(__be32),
true);
if (ret < 0)
return ret;
bp = call->buffer + sizeof(uuid_t);
uniquifier = ntohl(*bp++);
call->count = ntohl(*bp++);
call->count2 = ntohl(*bp); /* Type or next count */
if (call->count > YFS_MAXENDPOINTS)
return -EBADMSG;
alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
call->reply[0] = alist;
call->offset = 0;
if (call->count == 0)
goto extract_volendpoints;
call->unmarshall = 2;
/* Extract fsEndpoints[] entries */
case 2:
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
size = sizeof(__be32) * (1 + 1 + 1);
break;
case YFS_ENDPOINT_IPV6:
size = sizeof(__be32) * (1 + 4 + 1);
break;
default:
return -EBADMSG;
}
size += sizeof(__be32);
ret = afs_extract_data(call, call->buffer, size, true);
if (ret < 0)
return ret;
alist = call->reply[0];
bp = call->buffer;
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
if (ntohl(bp[0]) != sizeof(__be32) * 2)
return -EBADMSG;
afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
bp += 3;
break;
case YFS_ENDPOINT_IPV6:
if (ntohl(bp[0]) != sizeof(__be32) * 5)
return -EBADMSG;
afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
bp += 6;
break;
default:
return -EBADMSG;
}
/* Got either the type of the next entry or the count of
* volEndpoints if no more fsEndpoints.
*/
call->count2 = htonl(*bp++);
call->offset = 0;
call->count--;
if (call->count > 0)
goto again;
extract_volendpoints:
/* Extract the list of volEndpoints. */
call->count = call->count2;
if (!call->count)
goto end;
if (call->count > YFS_MAXENDPOINTS)
return -EBADMSG;
call->unmarshall = 3;
/* Extract the type of volEndpoints[0]. Normally we would
* extract the type of the next endpoint when we extract the
* data of the current one, but this is the first...
*/
case 3:
ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
if (ret < 0)
return ret;
bp = call->buffer;
call->count2 = htonl(*bp++);
call->offset = 0;
call->unmarshall = 4;
/* Extract volEndpoints[] entries */
case 4:
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
size = sizeof(__be32) * (1 + 1 + 1);
break;
case YFS_ENDPOINT_IPV6:
size = sizeof(__be32) * (1 + 4 + 1);
break;
default:
return -EBADMSG;
}
if (call->count > 1)
size += sizeof(__be32);
ret = afs_extract_data(call, call->buffer, size, true);
if (ret < 0)
return ret;
bp = call->buffer;
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
if (ntohl(bp[0]) != sizeof(__be32) * 2)
return -EBADMSG;
bp += 3;
break;
case YFS_ENDPOINT_IPV6:
if (ntohl(bp[0]) != sizeof(__be32) * 5)
return -EBADMSG;
bp += 6;
break;
default:
return -EBADMSG;
}
/* Got either the type of the next entry or the count of
* volEndpoints if no more fsEndpoints.
*/
call->offset = 0;
call->count--;
if (call->count > 0) {
call->count2 = htonl(*bp++);
goto again;
}
end:
call->unmarshall = 5;
/* Done */
case 5:
ret = afs_extract_data(call, call->buffer, 0, false);
if (ret < 0)
return ret;
call->unmarshall = 6;
case 6:
break;
}
alist = call->reply[0];
/* Start with IPv6 if available. */
if (alist->nr_ipv4 < alist->nr_addrs)
alist->index = alist->nr_ipv4;
_leave(" = 0 [done]");
return 0;
}
/*
* YFSVL.GetEndpoints operation type.
*/
static const struct afs_call_type afs_YFSVLGetEndpoints = {
.name = "VL.GetEndpoints",
.deliver = afs_deliver_yfsvl_get_endpoints,
.destructor = afs_vl_get_addrs_u_destructor,
};
/*
* Dispatch an operation to get the addresses for a server, where the server is
* nominated by UUID.
*/
struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
struct afs_addr_cursor *ac,
struct key *key,
const uuid_t *uuid)
{
struct afs_call *call;
__be32 *bp;
_enter("");
call = afs_alloc_flat_call(net, &afs_YFSVLGetEndpoints,
sizeof(__be32) * 2 + sizeof(*uuid),
sizeof(struct in6_addr) + sizeof(__be32) * 3);
if (!call)
return ERR_PTR(-ENOMEM);
call->key = key;
call->reply[0] = NULL;
call->ret_reply0 = true;
/* Marshall the parameters */
bp = call->request;
*bp++ = htonl(YVLGETENDPOINTS);
*bp++ = htonl(YFS_SERVER_UUID);
memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
}

View File

@ -124,6 +124,22 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
return ERR_PTR(ret);
while (afs_iterate_addresses(&ac)) {
if (!test_bit(ac.index, &ac.alist->probed)) {
ret = afs_vl_get_capabilities(cell->net, &ac, key);
switch (ret) {
case VL_SERVICE:
clear_bit(ac.index, &ac.alist->yfs);
set_bit(ac.index, &ac.alist->probed);
ac.addr->srx_service = ret;
break;
case YFS_VL_SERVICE:
set_bit(ac.index, &ac.alist->yfs);
set_bit(ac.index, &ac.alist->probed);
ac.addr->srx_service = ret;
break;
}
}
vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
volname, volnamesz);
switch (ac.error) {