2019-05-31 08:09:56 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2006-01-16 16:50:04 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
2008-01-31 16:31:39 +00:00
|
|
|
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
|
2006-01-16 16:50:04 +00:00
|
|
|
*/
|
|
|
|
|
2014-03-06 20:10:45 +00:00
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/completion.h>
|
|
|
|
#include <linux/buffer_head.h>
|
|
|
|
#include <linux/blkdev.h>
|
|
|
|
#include <linux/kthread.h>
|
2011-05-26 20:00:52 +00:00
|
|
|
#include <linux/export.h>
|
2006-08-25 16:13:37 +00:00
|
|
|
#include <linux/namei.h>
|
|
|
|
#include <linux/mount.h>
|
2006-02-27 22:23:27 +00:00
|
|
|
#include <linux/gfs2_ondisk.h>
|
2009-09-15 08:59:02 +00:00
|
|
|
#include <linux/quotaops.h>
|
2012-09-05 20:55:11 +00:00
|
|
|
#include <linux/lockdep.h>
|
2013-03-03 03:39:14 +00:00
|
|
|
#include <linux/module.h>
|
2017-04-12 10:24:42 +00:00
|
|
|
#include <linux/backing-dev.h>
|
2019-03-27 14:46:00 +00:00
|
|
|
#include <linux/fs_parser.h>
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
#include "gfs2.h"
|
2006-02-27 22:23:27 +00:00
|
|
|
#include "incore.h"
|
2007-12-12 00:49:21 +00:00
|
|
|
#include "bmap.h"
|
2006-01-16 16:50:04 +00:00
|
|
|
#include "glock.h"
|
|
|
|
#include "glops.h"
|
|
|
|
#include "inode.h"
|
|
|
|
#include "recovery.h"
|
|
|
|
#include "rgrp.h"
|
|
|
|
#include "super.h"
|
|
|
|
#include "sys.h"
|
2006-02-27 22:23:27 +00:00
|
|
|
#include "util.h"
|
2007-08-16 15:03:57 +00:00
|
|
|
#include "log.h"
|
2008-11-19 10:08:22 +00:00
|
|
|
#include "quota.h"
|
2008-11-26 12:49:26 +00:00
|
|
|
#include "dir.h"
|
2013-12-06 16:19:54 +00:00
|
|
|
#include "meta_io.h"
|
2009-06-12 07:49:20 +00:00
|
|
|
#include "trace_gfs2.h"
|
2019-05-02 19:17:40 +00:00
|
|
|
#include "lops.h"
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
#define DO 0
|
|
|
|
#define UNDO 1
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
/**
|
|
|
|
* gfs2_tune_init - Fill a gfs2_tune structure with default values
|
|
|
|
* @gt: tune
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void gfs2_tune_init(struct gfs2_tune *gt)
|
|
|
|
{
|
|
|
|
spin_lock_init(>->gt_spin);
|
|
|
|
|
|
|
|
gt->gt_quota_warn_period = 10;
|
|
|
|
gt->gt_quota_scale_num = 1;
|
|
|
|
gt->gt_quota_scale_den = 1;
|
|
|
|
gt->gt_new_files_jdata = 0;
|
2016-08-02 17:05:27 +00:00
|
|
|
gt->gt_max_readahead = BIT(18);
|
2008-08-08 12:45:13 +00:00
|
|
|
gt->gt_complain_secs = 10;
|
|
|
|
}
|
|
|
|
|
2019-05-16 21:46:30 +00:00
|
|
|
void free_sbd(struct gfs2_sbd *sdp)
|
|
|
|
{
|
|
|
|
if (sdp->sd_lkstats)
|
|
|
|
free_percpu(sdp->sd_lkstats);
|
|
|
|
kfree(sdp);
|
|
|
|
}
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
static struct gfs2_sbd *init_sbd(struct super_block *sb)
|
|
|
|
{
|
|
|
|
struct gfs2_sbd *sdp;
|
2013-12-06 16:19:54 +00:00
|
|
|
struct address_space *mapping;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2006-09-07 18:40:21 +00:00
|
|
|
sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
|
2006-01-16 16:50:04 +00:00
|
|
|
if (!sdp)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
sdp->sd_vfs = sb;
|
GFS2: glock statistics gathering
The stats are divided into two sets: those relating to the
super block and those relating to an individual glock. The
super block stats are done on a per cpu basis in order to
try and reduce the overhead of gathering them. They are also
further divided by glock type.
In the case of both the super block and glock statistics,
the same information is gathered in each case. The super
block statistics are used to provide default values for
most of the glock statistics, so that newly created glocks
should have, as far as possible, a sensible starting point.
The statistics are divided into three pairs of mean and
variance, plus two counters. The mean/variance pairs are
smoothed exponential estimates and the algorithm used is
one which will be very familiar to those used to calculation
of round trip times in network code.
The three pairs of mean/variance measure the following
things:
1. DLM lock time (non-blocking requests)
2. DLM lock time (blocking requests)
3. Inter-request time (again to the DLM)
A non-blocking request is one which will complete right
away, whatever the state of the DLM lock in question. That
currently means any requests when (a) the current state of
the lock is exclusive (b) the requested state is either null
or unlocked or (c) the "try lock" flag is set. A blocking
request covers all the other lock requests.
There are two counters. The first is there primarily to show
how many lock requests have been made, and thus how much data
has gone into the mean/variance calculations. The other counter
is counting queueing of holders at the top layer of the glock
code. Hopefully that number will be a lot larger than the number
of dlm lock requests issued.
So why gather these statistics? There are several reasons
we'd like to get a better idea of these timings:
1. To be able to better set the glock "min hold time"
2. To spot performance issues more easily
3. To improve the algorithm for selecting resource groups for
allocation (to base it on lock wait time, rather than blindly
using a "try lock")
Due to the smoothing action of the updates, a step change in
some input quantity being sampled will only fully be taken
into account after 8 samples (or 4 for the variance) and this
needs to be carefully considered when interpreting the
results.
Knowing both the time it takes a lock request to complete and
the average time between lock requests for a glock means we
can compute the total percentage of the time for which the
node is able to use a glock vs. time that the rest of the
cluster has its share. That will be very useful when setting
the lock min hold time.
The other point to remember is that all times are in
nanoseconds. Great care has been taken to ensure that we
measure exactly the quantities that we want, as accurately
as possible. There are always inaccuracies in any
measuring system, but I hope this is as accurate as we
can reasonably make it.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2012-01-20 10:38:36 +00:00
|
|
|
sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
|
2019-05-16 21:46:30 +00:00
|
|
|
if (!sdp->sd_lkstats)
|
|
|
|
goto fail;
|
2018-10-08 12:52:43 +00:00
|
|
|
sb->s_fs_info = sdp;
|
GFS2: glock statistics gathering
The stats are divided into two sets: those relating to the
super block and those relating to an individual glock. The
super block stats are done on a per cpu basis in order to
try and reduce the overhead of gathering them. They are also
further divided by glock type.
In the case of both the super block and glock statistics,
the same information is gathered in each case. The super
block statistics are used to provide default values for
most of the glock statistics, so that newly created glocks
should have, as far as possible, a sensible starting point.
The statistics are divided into three pairs of mean and
variance, plus two counters. The mean/variance pairs are
smoothed exponential estimates and the algorithm used is
one which will be very familiar to those used to calculation
of round trip times in network code.
The three pairs of mean/variance measure the following
things:
1. DLM lock time (non-blocking requests)
2. DLM lock time (blocking requests)
3. Inter-request time (again to the DLM)
A non-blocking request is one which will complete right
away, whatever the state of the DLM lock in question. That
currently means any requests when (a) the current state of
the lock is exclusive (b) the requested state is either null
or unlocked or (c) the "try lock" flag is set. A blocking
request covers all the other lock requests.
There are two counters. The first is there primarily to show
how many lock requests have been made, and thus how much data
has gone into the mean/variance calculations. The other counter
is counting queueing of holders at the top layer of the glock
code. Hopefully that number will be a lot larger than the number
of dlm lock requests issued.
So why gather these statistics? There are several reasons
we'd like to get a better idea of these timings:
1. To be able to better set the glock "min hold time"
2. To spot performance issues more easily
3. To improve the algorithm for selecting resource groups for
allocation (to base it on lock wait time, rather than blindly
using a "try lock")
Due to the smoothing action of the updates, a step change in
some input quantity being sampled will only fully be taken
into account after 8 samples (or 4 for the variance) and this
needs to be carefully considered when interpreting the
results.
Knowing both the time it takes a lock request to complete and
the average time between lock requests for a glock means we
can compute the total percentage of the time for which the
node is able to use a glock vs. time that the rest of the
cluster has its share. That will be very useful when setting
the lock min hold time.
The other point to remember is that all times are in
nanoseconds. Great care has been taken to ensure that we
measure exactly the quantities that we want, as accurately
as possible. There are always inaccuracies in any
measuring system, but I hope this is as accurate as we
can reasonably make it.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2012-01-20 10:38:36 +00:00
|
|
|
|
2010-06-14 09:01:30 +00:00
|
|
|
set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_tune_init(&sdp->sd_tune);
|
|
|
|
|
2010-01-25 11:20:19 +00:00
|
|
|
init_waitqueue_head(&sdp->sd_glock_wait);
|
gfs2: Use async glocks for rename
Because s_vfs_rename_mutex is not cluster-wide, multiple nodes can
reverse the roles of which directories are "old" and which are "new" for
the purposes of rename. This can cause deadlocks where two nodes end up
waiting for each other.
There can be several layers of directory dependencies across many nodes.
This patch fixes the problem by acquiring all gfs2_rename's inode glocks
asychronously and waiting for all glocks to be acquired. That way all
inodes are locked regardless of the order.
The timeout value for multiple asynchronous glocks is calculated to be
the total of the individual wait times for each glock times two.
Since gfs2_exchange is very similar to gfs2_rename, both functions are
patched in the same way.
A new async glock wait queue, sd_async_glock_wait, keeps a list of
waiters for these events. If gfs2's holder_wake function detects an
async holder, it wakes up any waiters for the event. The waiter only
tests whether any of its requests are still pending.
Since the glocks are sent to dlm asychronously, the wait function needs
to check to see which glocks, if any, were granted.
If a glock is granted by dlm (and therefore held), its minimum hold time
is checked and adjusted as necessary, as other glock grants do.
If the event times out, all glocks held thus far must be dequeued to
resolve any existing deadlocks. Then, if there are any outstanding
locking requests, we need to loop around and wait for dlm to respond to
those requests too. After we release all requests, we return -ESTALE to
the caller (vfs rename) which loops around and retries the request.
Node1 Node2
--------- ---------
1. Enqueue A Enqueue B
2. Enqueue B Enqueue A
3. A granted
6. B granted
7. Wait for B
8. Wait for A
9. A times out (since Node 1 holds A)
10. Dequeue B (since it was granted)
11. Wait for all requests from DLM
12. B Granted (since Node2 released it in step 10)
13. Rename
14. Dequeue A
15. DLM Grants A
16. Dequeue A (due to the timeout and since we
no longer have B held for our task).
17. Dequeue B
18. Return -ESTALE to vfs
19. VFS retries the operation, goto step 1.
This release-all-locks / acquire-all-locks may slow rename / exchange
down as both nodes struggle in the same way and do the same thing.
However, this will only happen when there is contention for the same
inodes, which ought to be rare.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2019-08-30 17:31:02 +00:00
|
|
|
init_waitqueue_head(&sdp->sd_async_glock_wait);
|
2010-01-25 11:20:19 +00:00
|
|
|
atomic_set(&sdp->sd_glock_disposal, 0);
|
2011-07-11 07:53:30 +00:00
|
|
|
init_completion(&sdp->sd_locking_init);
|
2013-02-13 12:21:40 +00:00
|
|
|
init_completion(&sdp->sd_wdack);
|
2006-01-16 16:50:04 +00:00
|
|
|
spin_lock_init(&sdp->sd_statfs_spin);
|
|
|
|
|
|
|
|
spin_lock_init(&sdp->sd_rindex_spin);
|
GFS2: Use rbtree for resource groups and clean up bitmap buffer ref count scheme
Here is an update of Bob's original rbtree patch which, in addition, also
resolves the rather strange ref counting that was being done relating to
the bitmap blocks.
Originally we had a dual system for journaling resource groups. The metadata
blocks were journaled and also the rgrp itself was added to a list. The reason
for adding the rgrp to the list in the journal was so that the "repolish
clones" code could be run to update the free space, and potentially send any
discard requests when the log was flushed. This was done by comparing the
"cloned" bitmap with what had been written back on disk during the transaction
commit.
Due to this, there was a requirement to hang on to the rgrps' bitmap buffers
until the journal had been flushed. For that reason, there was a rather
complicated set up in the ->go_lock ->go_unlock functions for rgrps involving
both a mutex and a spinlock (the ->sd_rindex_spin) to maintain a reference
count on the buffers.
However, the journal maintains a reference count on the buffers anyway, since
they are being journaled as metadata buffers. So by moving the code which deals
with the post-journal accounting for bitmap blocks to the metadata journaling
code, we can entirely dispense with the rather strange buffer ref counting
scheme and also the requirement to journal the rgrps.
The net result of all this is that the ->sd_rindex_spin is left to do exactly
one job, and that is to look after the rbtree or rgrps.
This patch is designed to be a stepping stone towards using RCU for the rbtree
of resource groups, however the reduction in the number of uses of the
->sd_rindex_spin is likely to have benefits for multi-threaded workloads,
anyway.
The patch retains ->go_lock and ->go_unlock for rgrps, however these maybe also
be removed in future in favour of calling the functions directly where required
in the code. That will allow locking of resource groups without needing to
actually read them in - something that could be useful in speeding up statfs.
In the mean time though it is valid to dereference ->bi_bh only when the rgrp
is locked. This is basically the same rule as before, modulo the references not
being valid until the following journal flush.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Cc: Benjamin Marzinski <bmarzins@redhat.com>
2011-08-31 08:53:19 +00:00
|
|
|
sdp->sd_rindex_tree.rb_node = NULL;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&sdp->sd_jindex_list);
|
|
|
|
spin_lock_init(&sdp->sd_jindex_spin);
|
2006-02-21 12:51:39 +00:00
|
|
|
mutex_init(&sdp->sd_jindex_mutex);
|
2014-06-02 13:40:25 +00:00
|
|
|
init_completion(&sdp->sd_journal_ready);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&sdp->sd_quota_list);
|
2006-02-21 12:51:39 +00:00
|
|
|
mutex_init(&sdp->sd_quota_mutex);
|
2013-10-04 11:29:34 +00:00
|
|
|
mutex_init(&sdp->sd_quota_sync_mutex);
|
2008-11-17 14:25:37 +00:00
|
|
|
init_waitqueue_head(&sdp->sd_quota_wait);
|
2008-11-18 13:38:48 +00:00
|
|
|
INIT_LIST_HEAD(&sdp->sd_trunc_list);
|
|
|
|
spin_lock_init(&sdp->sd_trunc_lock);
|
2013-12-13 11:46:28 +00:00
|
|
|
spin_lock_init(&sdp->sd_bitmap_lock);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2013-12-06 16:19:54 +00:00
|
|
|
mapping = &sdp->sd_aspace;
|
|
|
|
|
2014-01-09 16:34:04 +00:00
|
|
|
address_space_init_once(mapping);
|
2014-03-31 16:48:27 +00:00
|
|
|
mapping->a_ops = &gfs2_rgrp_aops;
|
2013-12-06 16:19:54 +00:00
|
|
|
mapping->host = sb->s_bdev->bd_inode;
|
|
|
|
mapping->flags = 0;
|
|
|
|
mapping_set_gfp_mask(mapping, GFP_NOFS);
|
|
|
|
mapping->private_data = NULL;
|
|
|
|
mapping->writeback_index = 0;
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
spin_lock_init(&sdp->sd_log_lock);
|
GFS2: Various gfs2_logd improvements
This patch contains various tweaks to how log flushes and active item writeback
work. gfs2_logd is now managed by a waitqueue, and gfs2_log_reseve now waits
for gfs2_logd to do the log flushing. Multiple functions were rewritten to
remove the need to call gfs2_log_lock(). Instead of using one test to see if
gfs2_logd had work to do, there are now seperate tests to check if there
are two many buffers in the incore log or if there are two many items on the
active items list.
This patch is a port of a patch Steve Whitehouse wrote about a year ago, with
some minor changes. Since gfs2_ail1_start always submits all the active items,
it no longer needs to keep track of the first ai submitted, so this has been
removed. In gfs2_log_reserve(), the order of the calls to
prepare_to_wait_exclusive() and wake_up() when firing off the logd thread has
been switched. If it called wake_up first there was a small window for a race,
where logd could run and return before gfs2_log_reserve was ready to get woken
up. If gfs2_logd ran, but did not free up enough blocks, gfs2_log_reserve()
would be left waiting for gfs2_logd to eventualy run because it timed out.
Finally, gt_logd_secs, which controls how long to wait before gfs2_logd times
out, and flushes the log, can now be set on mount with ar_commit.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2010-05-04 19:29:16 +00:00
|
|
|
atomic_set(&sdp->sd_log_pinned, 0);
|
2019-04-05 11:16:14 +00:00
|
|
|
INIT_LIST_HEAD(&sdp->sd_log_revokes);
|
|
|
|
INIT_LIST_HEAD(&sdp->sd_log_ordered);
|
2013-01-28 09:30:07 +00:00
|
|
|
spin_lock_init(&sdp->sd_ordered_lock);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
GFS2: Various gfs2_logd improvements
This patch contains various tweaks to how log flushes and active item writeback
work. gfs2_logd is now managed by a waitqueue, and gfs2_log_reseve now waits
for gfs2_logd to do the log flushing. Multiple functions were rewritten to
remove the need to call gfs2_log_lock(). Instead of using one test to see if
gfs2_logd had work to do, there are now seperate tests to check if there
are two many buffers in the incore log or if there are two many items on the
active items list.
This patch is a port of a patch Steve Whitehouse wrote about a year ago, with
some minor changes. Since gfs2_ail1_start always submits all the active items,
it no longer needs to keep track of the first ai submitted, so this has been
removed. In gfs2_log_reserve(), the order of the calls to
prepare_to_wait_exclusive() and wake_up() when firing off the logd thread has
been switched. If it called wake_up first there was a small window for a race,
where logd could run and return before gfs2_log_reserve was ready to get woken
up. If gfs2_logd ran, but did not free up enough blocks, gfs2_log_reserve()
would be left waiting for gfs2_logd to eventualy run because it timed out.
Finally, gt_logd_secs, which controls how long to wait before gfs2_logd times
out, and flushes the log, can now be set on mount with ar_commit.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2010-05-04 19:29:16 +00:00
|
|
|
init_waitqueue_head(&sdp->sd_log_waitq);
|
|
|
|
init_waitqueue_head(&sdp->sd_logd_waitq);
|
2011-03-11 11:52:25 +00:00
|
|
|
spin_lock_init(&sdp->sd_ail_lock);
|
2006-01-16 16:50:04 +00:00
|
|
|
INIT_LIST_HEAD(&sdp->sd_ail1_list);
|
|
|
|
INIT_LIST_HEAD(&sdp->sd_ail2_list);
|
|
|
|
|
2006-03-29 14:12:12 +00:00
|
|
|
init_rwsem(&sdp->sd_log_flush_lock);
|
2007-09-17 09:59:52 +00:00
|
|
|
atomic_set(&sdp->sd_log_in_flight, 0);
|
2014-11-14 02:42:04 +00:00
|
|
|
atomic_set(&sdp->sd_reserving_log, 0);
|
|
|
|
init_waitqueue_head(&sdp->sd_reserving_log_wait);
|
2007-09-17 09:59:52 +00:00
|
|
|
init_waitqueue_head(&sdp->sd_log_flush_wait);
|
2014-11-14 02:42:04 +00:00
|
|
|
atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
|
|
|
|
mutex_init(&sdp->sd_freeze_mutex);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
return sdp;
|
|
|
|
|
2019-05-16 21:46:30 +00:00
|
|
|
fail:
|
|
|
|
free_sbd(sdp);
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
/**
|
|
|
|
* gfs2_check_sb - Check superblock
|
|
|
|
* @sdp: the filesystem
|
|
|
|
* @sb: The superblock
|
|
|
|
* @silent: Don't print a message if the check fails
|
|
|
|
*
|
|
|
|
* Checks the version code of the FS is one that we understand how to
|
|
|
|
* read and that the sizes of the various on-disk structures have not
|
|
|
|
* changed.
|
|
|
|
*/
|
|
|
|
|
2011-05-10 14:01:59 +00:00
|
|
|
static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
|
2008-08-08 12:45:13 +00:00
|
|
|
{
|
2011-05-10 14:01:59 +00:00
|
|
|
struct gfs2_sb_host *sb = &sdp->sd_sb;
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
if (sb->sb_magic != GFS2_MAGIC ||
|
|
|
|
sb->sb_type != GFS2_METATYPE_SB) {
|
|
|
|
if (!silent)
|
2014-03-06 20:10:45 +00:00
|
|
|
pr_warn("not a GFS2 filesystem\n");
|
2008-08-08 12:45:13 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If format numbers match exactly, we're done. */
|
|
|
|
|
|
|
|
if (sb->sb_fs_format == GFS2_FORMAT_FS &&
|
|
|
|
sb->sb_multihost_format == GFS2_FORMAT_MULTI)
|
|
|
|
return 0;
|
|
|
|
|
2010-09-24 08:55:07 +00:00
|
|
|
fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
|
2008-08-08 12:45:13 +00:00
|
|
|
|
2010-09-24 08:55:07 +00:00
|
|
|
return -EINVAL;
|
2008-08-08 12:45:13 +00:00
|
|
|
}
|
|
|
|
|
2015-07-20 13:29:37 +00:00
|
|
|
static void end_bio_io_page(struct bio *bio)
|
2008-08-08 12:45:13 +00:00
|
|
|
{
|
|
|
|
struct page *page = bio->bi_private;
|
|
|
|
|
2017-06-03 07:38:06 +00:00
|
|
|
if (!bio->bi_status)
|
2008-08-08 12:45:13 +00:00
|
|
|
SetPageUptodate(page);
|
|
|
|
else
|
2017-06-03 07:38:06 +00:00
|
|
|
pr_warn("error %d reading superblock\n", bio->bi_status);
|
2008-08-08 12:45:13 +00:00
|
|
|
unlock_page(page);
|
|
|
|
}
|
|
|
|
|
2011-05-10 14:01:59 +00:00
|
|
|
static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
|
2008-08-08 12:45:13 +00:00
|
|
|
{
|
2011-05-10 14:01:59 +00:00
|
|
|
struct gfs2_sb_host *sb = &sdp->sd_sb;
|
|
|
|
struct super_block *s = sdp->sd_vfs;
|
2008-08-08 12:45:13 +00:00
|
|
|
const struct gfs2_sb *str = buf;
|
|
|
|
|
|
|
|
sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
|
|
|
|
sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
|
|
|
|
sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
|
|
|
|
sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
|
|
|
|
sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
|
|
|
|
sb->sb_bsize = be32_to_cpu(str->sb_bsize);
|
|
|
|
sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
|
|
|
|
sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
|
|
|
|
sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
|
|
|
|
sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
|
|
|
|
sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
|
|
|
|
|
|
|
|
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
|
|
|
|
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
|
2017-05-10 13:06:33 +00:00
|
|
|
memcpy(&s->s_uuid, str->sb_uuid, 16);
|
2008-08-08 12:45:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* gfs2_read_super - Read the gfs2 super block from disk
|
|
|
|
* @sdp: The GFS2 super block
|
|
|
|
* @sector: The location of the super block
|
|
|
|
* @error: The error code to return
|
|
|
|
*
|
|
|
|
* This uses the bio functions to read the super block from disk
|
|
|
|
* because we want to be 100% sure that we never read cached data.
|
|
|
|
* A super block is read twice only during each GFS2 mount and is
|
|
|
|
* never written to by the filesystem. The first time its read no
|
|
|
|
* locks are held, and the only details which are looked at are those
|
|
|
|
* relating to the locking protocol. Once locking is up and working,
|
|
|
|
* the sb is read again under the lock to establish the location of
|
|
|
|
* the master directory (contains pointers to journals etc) and the
|
|
|
|
* root directory.
|
|
|
|
*
|
|
|
|
* Returns: 0 on success or error
|
|
|
|
*/
|
|
|
|
|
2011-05-10 14:01:59 +00:00
|
|
|
static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
|
2008-08-08 12:45:13 +00:00
|
|
|
{
|
|
|
|
struct super_block *sb = sdp->sd_vfs;
|
|
|
|
struct gfs2_sb *p;
|
|
|
|
struct page *page;
|
|
|
|
struct bio *bio;
|
|
|
|
|
|
|
|
page = alloc_page(GFP_NOFS);
|
|
|
|
if (unlikely(!page))
|
2014-01-16 10:31:13 +00:00
|
|
|
return -ENOMEM;
|
2008-08-08 12:45:13 +00:00
|
|
|
|
|
|
|
ClearPageUptodate(page);
|
|
|
|
ClearPageDirty(page);
|
|
|
|
lock_page(page);
|
|
|
|
|
|
|
|
bio = bio_alloc(GFP_NOFS, 1);
|
2013-10-11 22:44:27 +00:00
|
|
|
bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
|
2017-08-23 17:10:32 +00:00
|
|
|
bio_set_dev(bio, sb->s_bdev);
|
2008-08-08 12:45:13 +00:00
|
|
|
bio_add_page(bio, page, PAGE_SIZE, 0);
|
|
|
|
|
|
|
|
bio->bi_end_io = end_bio_io_page;
|
|
|
|
bio->bi_private = page;
|
2016-11-01 13:40:10 +00:00
|
|
|
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META);
|
2016-06-05 19:31:41 +00:00
|
|
|
submit_bio(bio);
|
2008-08-08 12:45:13 +00:00
|
|
|
wait_on_page_locked(page);
|
|
|
|
bio_put(bio);
|
|
|
|
if (!PageUptodate(page)) {
|
|
|
|
__free_page(page);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
p = kmap(page);
|
2011-05-10 14:01:59 +00:00
|
|
|
gfs2_sb_in(sdp, p);
|
2008-08-08 12:45:13 +00:00
|
|
|
kunmap(page);
|
|
|
|
__free_page(page);
|
2011-05-10 14:01:59 +00:00
|
|
|
return gfs2_check_sb(sdp, silent);
|
2008-08-08 12:45:13 +00:00
|
|
|
}
|
2009-03-09 09:03:51 +00:00
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
/**
|
|
|
|
* gfs2_read_sb - Read super block
|
|
|
|
* @sdp: The GFS2 superblock
|
|
|
|
* @silent: Don't print message if mount fails
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2009-03-09 09:03:51 +00:00
|
|
|
static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
|
2008-08-08 12:45:13 +00:00
|
|
|
{
|
|
|
|
u32 hash_blocks, ind_blocks, leaf_blocks;
|
|
|
|
u32 tmp_blocks;
|
|
|
|
unsigned int x;
|
|
|
|
int error;
|
|
|
|
|
2011-05-10 14:01:59 +00:00
|
|
|
error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
|
2008-08-08 12:45:13 +00:00
|
|
|
if (error) {
|
|
|
|
if (!silent)
|
|
|
|
fs_err(sdp, "can't read superblock\n");
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
|
|
|
|
GFS2_BASIC_BLOCK_SHIFT;
|
2016-08-02 17:05:27 +00:00
|
|
|
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
|
2008-08-08 12:45:13 +00:00
|
|
|
sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
|
|
|
|
sizeof(struct gfs2_dinode)) / sizeof(u64);
|
|
|
|
sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
|
|
|
|
sizeof(struct gfs2_meta_header)) / sizeof(u64);
|
|
|
|
sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
|
|
|
|
sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
|
|
|
|
sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
|
|
|
|
sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
|
|
|
|
sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
|
|
|
|
sizeof(struct gfs2_meta_header)) /
|
|
|
|
sizeof(struct gfs2_quota_change);
|
2012-10-19 12:32:51 +00:00
|
|
|
sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize -
|
|
|
|
sizeof(struct gfs2_meta_header))
|
|
|
|
* GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
|
2008-08-08 12:45:13 +00:00
|
|
|
|
|
|
|
/* Compute maximum reservation required to add a entry to a directory */
|
|
|
|
|
2016-08-02 17:05:27 +00:00
|
|
|
hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
|
2008-08-08 12:45:13 +00:00
|
|
|
sdp->sd_jbsize);
|
|
|
|
|
|
|
|
ind_blocks = 0;
|
|
|
|
for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
|
|
|
|
tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
|
|
|
|
ind_blocks += tmp_blocks;
|
|
|
|
}
|
|
|
|
|
|
|
|
leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
|
|
|
|
|
|
|
|
sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
|
|
|
|
|
|
|
|
sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
|
|
|
|
sizeof(struct gfs2_dinode);
|
|
|
|
sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
|
|
|
|
for (x = 2;; x++) {
|
|
|
|
u64 space, d;
|
|
|
|
u32 m;
|
|
|
|
|
|
|
|
space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
|
|
|
|
d = space;
|
|
|
|
m = do_div(d, sdp->sd_inptrs);
|
|
|
|
|
|
|
|
if (d != sdp->sd_heightsize[x - 1] || m)
|
|
|
|
break;
|
|
|
|
sdp->sd_heightsize[x] = space;
|
|
|
|
}
|
|
|
|
sdp->sd_max_height = x;
|
|
|
|
sdp->sd_heightsize[x] = ~0;
|
|
|
|
gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
|
|
|
|
|
gfs2: change gfs2 readdir cookie
gfs2 currently returns 31 bits of filename hash as a cookie that readdir
uses for an offset into the directory. When there are a large number of
directory entries, the likelihood of a collision goes up way too
quickly. GFS2 will now return cookies that are guaranteed unique for a
while, and then fail back to using 30 bits of filename hash.
Specifically, the directory leaf blocks are divided up into chunks based
on the minimum size of a gfs2 directory entry (48 bytes). Each entry's
cookie is based off the chunk where it starts, in the linked list of
leaf blocks that it hashes to (there are 131072 hash buckets). Directory
entries will have unique names until they take reach chunk 8192.
Assuming the largest filenames possible, and the least efficient spacing
possible, this new method will still be able to return unique names when
the previous method has statistically more than a 99% chance of a
collision. The non-unique names it fails back to are guaranteed to not
collide with the unique names.
unique cookies will be in this format:
- 1 bit "0" to make sure the the returned cookie is positive
- 17 bits for the hash table index
- 1 bit for the mode "0"
- 13 bits for the offset
non-unique cookies will be in this format:
- 1 bit "0" to make sure the the returned cookie is positive
- 17 bits for the hash table index
- 1 bit for the mode "1"
- 13 more bits of the name hash
Another benefit of location based cookies, is that once a directory's
exhash table is fully extended (so that multiple hash table indexs do
not use the same leaf blocks), gfs2 can skip sorting the directory
entries until it reaches the non-unique ones, and then it only needs to
sort these. This provides a significant speed up for directory reads of
very large directories.
The only issue is that for these cookies to continue to point to the
correct entry as files are added and removed from the directory, gfs2
must keep the entries at the same offset in the leaf block when they are
split (see my previous patch). This means that until all the nodes in a
cluster are running with code that will split the directory leaf blocks
this way, none of the nodes can use the new cookie code. To deal with
this, gfs2 now has the mount option loccookie, which, if set, will make
it return these new location based cookies. This option must not be set
until all nodes in the cluster are at least running this version of the
kernel code, and you have guaranteed that there are no outstanding
cookies required by other software, such as NFS.
gfs2 uses some of the extra space at the end of the gfs2_dirent
structure to store the calculated readdir cookies. This keeps us from
needing to allocate a seperate array to hold these values. gfs2
recomputes the cookie stored in de_cookie for every readdir call. The
time it takes to do so is small, and if gfs2 expected this value to be
saved on disk, the new code wouldn't work correctly on filesystems
created with an earlier version of gfs2.
One issue with adding de_cookie to the union in the gfs2_dirent
structure is that it caused the union to align itself to a 4 byte
boundary, instead of its previous 2 byte boundary. This changed the
offset of de_rahead. To solve that, I pulled de_rahead out of the union,
since it does not need to be there.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
2015-12-01 14:46:55 +00:00
|
|
|
sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize -
|
|
|
|
sizeof(struct gfs2_leaf)) /
|
|
|
|
GFS2_MIN_DIRENT_SIZE;
|
2008-08-08 12:45:13 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
static int init_names(struct gfs2_sbd *sdp, int silent)
|
|
|
|
{
|
|
|
|
char *proto, *table;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
proto = sdp->sd_args.ar_lockproto;
|
|
|
|
table = sdp->sd_args.ar_locktable;
|
|
|
|
|
|
|
|
/* Try to autodetect */
|
|
|
|
|
|
|
|
if (!proto[0] || !table[0]) {
|
2011-05-10 14:01:59 +00:00
|
|
|
error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
|
2007-06-01 13:11:58 +00:00
|
|
|
if (error)
|
|
|
|
return error;
|
2006-10-02 15:49:41 +00:00
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
if (!proto[0])
|
2006-10-02 15:49:41 +00:00
|
|
|
proto = sdp->sd_sb.sb_lockproto;
|
2006-01-16 16:50:04 +00:00
|
|
|
if (!table[0])
|
2006-10-02 15:49:41 +00:00
|
|
|
table = sdp->sd_sb.sb_locktable;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!table[0])
|
|
|
|
table = sdp->sd_vfs->s_id;
|
|
|
|
|
2008-05-09 15:59:51 +00:00
|
|
|
strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN);
|
|
|
|
strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2007-08-13 03:01:58 +00:00
|
|
|
table = sdp->sd_table_name;
|
|
|
|
while ((table = strchr(table, '/')))
|
2007-06-07 14:10:01 +00:00
|
|
|
*table = '_';
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
|
|
|
|
int undo)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
if (undo)
|
|
|
|
goto fail_trans;
|
|
|
|
|
|
|
|
error = gfs2_glock_nq_num(sdp,
|
|
|
|
GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
|
|
|
|
LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
|
|
|
|
mount_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't acquire mount glock: %d\n", error);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = gfs2_glock_nq_num(sdp,
|
|
|
|
GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
|
|
|
|
LM_ST_SHARED,
|
2006-04-26 18:58:26 +00:00
|
|
|
LM_FLAG_NOEXP | GL_EXACT,
|
2006-01-16 16:50:04 +00:00
|
|
|
&sdp->sd_live_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't acquire live glock: %d\n", error);
|
|
|
|
goto fail_mount;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
|
|
|
|
CREATE, &sdp->sd_rename_gl);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't create rename glock: %d\n", error);
|
|
|
|
goto fail_live;
|
|
|
|
}
|
|
|
|
|
GFS2: remove transaction glock
GFS2 has a transaction glock, which must be grabbed for every
transaction, whose purpose is to deal with freezing the filesystem.
Aside from this involving a large amount of locking, it is very easy to
make the current fsfreeze code hang on unfreezing.
This patch rewrites how gfs2 handles freezing the filesystem. The
transaction glock is removed. In it's place is a freeze glock, which is
cached (but not held) in a shared state by every node in the cluster
when the filesystem is mounted. This lock only needs to be grabbed on
freezing, and actions which need to be safe from freezing, like
recovery.
When a node wants to freeze the filesystem, it grabs this glock
exclusively. When the freeze glock state changes on the nodes (either
from shared to unlocked, or shared to exclusive), the filesystem does a
special log flush. gfs2_log_flush() does all the work for flushing out
the and shutting down the incore log, and then it tries to grab the
freeze glock in a shared state again. Since the filesystem is stuck in
gfs2_log_flush, no new transaction can start, and nothing can be written
to disk. Unfreezing the filesytem simply involes dropping the freeze
glock, allowing gfs2_log_flush() to grab and then release the shared
lock, so it is cached for next time.
However, in order for the unfreezing ioctl to occur, gfs2 needs to get a
shared lock on the filesystem root directory inode to check permissions.
If that glock has already been grabbed exclusively, fsfreeze will be
unable to get the shared lock and unfreeze the filesystem.
In order to allow the unfreeze, this patch makes gfs2 grab a shared lock
on the filesystem root directory during the freeze, and hold it until it
unfreezes the filesystem. The functions which need to grab a shared
lock in order to allow the unfreeze ioctl to be issued now use the lock
grabbed by the freeze code instead.
The freeze and unfreeze code take care to make sure that this shared
lock will not be dropped while another process is using it.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2014-05-02 03:26:55 +00:00
|
|
|
error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops,
|
|
|
|
CREATE, &sdp->sd_freeze_gl);
|
2006-01-16 16:50:04 +00:00
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't create transaction glock: %d\n", error);
|
|
|
|
goto fail_rename;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2006-06-14 19:32:57 +00:00
|
|
|
fail_trans:
|
GFS2: remove transaction glock
GFS2 has a transaction glock, which must be grabbed for every
transaction, whose purpose is to deal with freezing the filesystem.
Aside from this involving a large amount of locking, it is very easy to
make the current fsfreeze code hang on unfreezing.
This patch rewrites how gfs2 handles freezing the filesystem. The
transaction glock is removed. In it's place is a freeze glock, which is
cached (but not held) in a shared state by every node in the cluster
when the filesystem is mounted. This lock only needs to be grabbed on
freezing, and actions which need to be safe from freezing, like
recovery.
When a node wants to freeze the filesystem, it grabs this glock
exclusively. When the freeze glock state changes on the nodes (either
from shared to unlocked, or shared to exclusive), the filesystem does a
special log flush. gfs2_log_flush() does all the work for flushing out
the and shutting down the incore log, and then it tries to grab the
freeze glock in a shared state again. Since the filesystem is stuck in
gfs2_log_flush, no new transaction can start, and nothing can be written
to disk. Unfreezing the filesytem simply involes dropping the freeze
glock, allowing gfs2_log_flush() to grab and then release the shared
lock, so it is cached for next time.
However, in order for the unfreezing ioctl to occur, gfs2 needs to get a
shared lock on the filesystem root directory inode to check permissions.
If that glock has already been grabbed exclusively, fsfreeze will be
unable to get the shared lock and unfreeze the filesystem.
In order to allow the unfreeze, this patch makes gfs2 grab a shared lock
on the filesystem root directory during the freeze, and hold it until it
unfreezes the filesystem. The functions which need to grab a shared
lock in order to allow the unfreeze ioctl to be issued now use the lock
grabbed by the freeze code instead.
The freeze and unfreeze code take care to make sure that this shared
lock will not be dropped while another process is using it.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2014-05-02 03:26:55 +00:00
|
|
|
gfs2_glock_put(sdp->sd_freeze_gl);
|
2006-06-14 19:32:57 +00:00
|
|
|
fail_rename:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_glock_put(sdp->sd_rename_gl);
|
2006-06-14 19:32:57 +00:00
|
|
|
fail_live:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_glock_dq_uninit(&sdp->sd_live_gh);
|
2006-06-14 19:32:57 +00:00
|
|
|
fail_mount:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_glock_dq_uninit(mount_gh);
|
2006-06-14 19:32:57 +00:00
|
|
|
fail:
|
2006-01-16 16:50:04 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
|
|
|
|
u64 no_addr, const char *name)
|
2006-01-30 18:34:10 +00:00
|
|
|
{
|
2008-08-08 12:45:13 +00:00
|
|
|
struct gfs2_sbd *sdp = sb->s_fs_info;
|
|
|
|
struct dentry *dentry;
|
|
|
|
struct inode *inode;
|
|
|
|
|
gfs2: Fix gfs2_lookup_by_inum lock inversion
The current gfs2_lookup_by_inum takes the glock of a presumed inode
identified by block number, verifies that the block is indeed an inode,
and then instantiates and reads the new inode via gfs2_inode_lookup.
However, instantiating a new inode may block on freeing a previous
instance of that inode (__wait_on_freeing_inode), and freeing an inode
requires to take the glock already held, leading to lock inversion and
deadlock.
Fix this by first instantiating the new inode, then verifying that the
block is an inode (if required), and then reading in the new inode, all
in gfs2_inode_lookup.
If the block we are looking for is not an inode, we discard the new
inode via iget_failed, which marks inodes as bad and unhashes them.
Other tasks waiting on that inode will get back a bad inode back from
ilookup or iget_locked; in that case, retry the lookup.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
2016-06-14 17:22:27 +00:00
|
|
|
inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0,
|
|
|
|
GFS2_BLKST_FREE /* ignore */);
|
2008-08-08 12:45:13 +00:00
|
|
|
if (IS_ERR(inode)) {
|
|
|
|
fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
|
|
|
|
return PTR_ERR(inode);
|
|
|
|
}
|
2012-01-09 03:15:13 +00:00
|
|
|
dentry = d_make_root(inode);
|
2008-08-08 12:45:13 +00:00
|
|
|
if (!dentry) {
|
|
|
|
fs_err(sdp, "can't alloc %s dentry\n", name);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
*dptr = dentry;
|
|
|
|
return 0;
|
2006-01-30 18:34:10 +00:00
|
|
|
}
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
static int init_sb(struct gfs2_sbd *sdp, int silent)
|
2006-01-16 16:50:04 +00:00
|
|
|
{
|
|
|
|
struct super_block *sb = sdp->sd_vfs;
|
|
|
|
struct gfs2_holder sb_gh;
|
2007-05-15 14:37:50 +00:00
|
|
|
u64 no_addr;
|
2008-08-08 12:45:13 +00:00
|
|
|
int ret;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
|
|
|
|
LM_ST_SHARED, 0, &sb_gh);
|
|
|
|
if (ret) {
|
|
|
|
fs_err(sdp, "can't acquire superblock glock: %d\n", ret);
|
|
|
|
return ret;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
2006-09-25 13:26:04 +00:00
|
|
|
|
2009-03-09 09:03:51 +00:00
|
|
|
ret = gfs2_read_sb(sdp, silent);
|
2008-08-08 12:45:13 +00:00
|
|
|
if (ret) {
|
|
|
|
fs_err(sdp, "can't read superblock: %d\n", ret);
|
2006-01-16 16:50:04 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set up the buffer cache and SB for real */
|
2009-05-22 21:17:49 +00:00
|
|
|
if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
|
2008-08-08 12:45:13 +00:00
|
|
|
ret = -EINVAL;
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "FS block size (%u) is too small for device "
|
|
|
|
"block size (%u)\n",
|
2009-05-22 21:17:49 +00:00
|
|
|
sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev));
|
2006-01-16 16:50:04 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
|
2008-08-08 12:45:13 +00:00
|
|
|
ret = -EINVAL;
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "FS block size (%u) is too big for machine "
|
|
|
|
"page size (%u)\n",
|
|
|
|
sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
|
|
|
|
|
2006-01-30 18:34:10 +00:00
|
|
|
/* Get the root inode */
|
2007-05-15 14:37:50 +00:00
|
|
|
no_addr = sdp->sd_sb.sb_root_dir.no_addr;
|
2008-08-08 12:45:13 +00:00
|
|
|
ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root");
|
|
|
|
if (ret)
|
2006-01-30 18:34:10 +00:00
|
|
|
goto out;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
/* Get the master inode */
|
|
|
|
no_addr = sdp->sd_sb.sb_master_dir.no_addr;
|
|
|
|
ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master");
|
|
|
|
if (ret) {
|
|
|
|
dput(sdp->sd_root_dir);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir);
|
2006-01-30 18:34:10 +00:00
|
|
|
out:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_glock_dq_uninit(&sb_gh);
|
2008-08-08 12:45:13 +00:00
|
|
|
return ret;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
|
2008-01-30 15:34:04 +00:00
|
|
|
{
|
2009-01-12 10:43:39 +00:00
|
|
|
char *message = "FIRSTMOUNT=Done";
|
|
|
|
char *envp[] = { message, NULL };
|
2012-01-09 22:18:05 +00:00
|
|
|
|
|
|
|
fs_info(sdp, "first mount done, others may mount\n");
|
|
|
|
|
|
|
|
if (sdp->sd_lockstruct.ls_ops->lm_first_done)
|
|
|
|
sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
|
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
|
2008-01-30 15:34:04 +00:00
|
|
|
}
|
|
|
|
|
2008-11-26 12:49:26 +00:00
|
|
|
/**
|
|
|
|
* gfs2_jindex_hold - Grab a lock on the jindex
|
|
|
|
* @sdp: The GFS2 superblock
|
|
|
|
* @ji_gh: the holder for the jindex glock
|
|
|
|
*
|
|
|
|
* Returns: errno
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
|
|
|
|
{
|
|
|
|
struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
|
|
|
|
struct qstr name;
|
|
|
|
char buf[20];
|
|
|
|
struct gfs2_jdesc *jd;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
name.name = buf;
|
|
|
|
|
|
|
|
mutex_lock(&sdp->sd_jindex_mutex);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
|
|
|
|
name.len = sprintf(buf, "journal%u", sdp->sd_journals);
|
|
|
|
name.hash = gfs2_disk_hash(name.name, name.len);
|
|
|
|
|
|
|
|
error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
|
|
|
|
if (error == -ENOENT) {
|
|
|
|
error = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
gfs2_glock_dq_uninit(ji_gh);
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
|
|
|
|
error = -ENOMEM;
|
|
|
|
jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
|
|
|
|
if (!jd)
|
|
|
|
break;
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&jd->extent_list);
|
2014-03-06 22:19:15 +00:00
|
|
|
INIT_LIST_HEAD(&jd->jd_revoke_list);
|
|
|
|
|
2010-07-20 20:09:02 +00:00
|
|
|
INIT_WORK(&jd->jd_work, gfs2_recover_func);
|
2008-11-26 12:49:26 +00:00
|
|
|
jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
|
2019-06-05 14:24:24 +00:00
|
|
|
if (IS_ERR_OR_NULL(jd->jd_inode)) {
|
2008-11-26 12:49:26 +00:00
|
|
|
if (!jd->jd_inode)
|
|
|
|
error = -ENOENT;
|
|
|
|
else
|
|
|
|
error = PTR_ERR(jd->jd_inode);
|
|
|
|
kfree(jd);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock(&sdp->sd_jindex_spin);
|
|
|
|
jd->jd_jid = sdp->sd_journals++;
|
|
|
|
list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
|
|
|
|
spin_unlock(&sdp->sd_jindex_spin);
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&sdp->sd_jindex_mutex);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2013-09-04 16:08:02 +00:00
|
|
|
/**
|
|
|
|
* check_journal_clean - Make sure a journal is clean for a spectator mount
|
|
|
|
* @sdp: The GFS2 superblock
|
|
|
|
* @jd: The journal descriptor
|
|
|
|
*
|
|
|
|
* Returns: 0 if the journal is clean or locked, else an error
|
|
|
|
*/
|
|
|
|
static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct gfs2_holder j_gh;
|
|
|
|
struct gfs2_log_header_host head;
|
|
|
|
struct gfs2_inode *ip;
|
|
|
|
|
|
|
|
ip = GFS2_I(jd->jd_inode);
|
|
|
|
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
|
|
|
|
GL_EXACT | GL_NOCACHE, &j_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "Error locking journal for spectator mount.\n");
|
|
|
|
return -EPERM;
|
|
|
|
}
|
|
|
|
error = gfs2_jdesc_check(jd);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "Error checking journal for spectator mount.\n");
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
2019-05-02 19:17:40 +00:00
|
|
|
error = gfs2_find_jhead(jd, &head, false);
|
2013-09-04 16:08:02 +00:00
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "Error parsing journal for spectator mount.\n");
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
|
|
|
|
error = -EPERM;
|
|
|
|
fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter "
|
|
|
|
"must not be a spectator.\n", jd->jd_jid);
|
|
|
|
}
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
gfs2_glock_dq_uninit(&j_gh);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
static int init_journal(struct gfs2_sbd *sdp, int undo)
|
|
|
|
{
|
2015-03-17 22:25:59 +00:00
|
|
|
struct inode *master = d_inode(sdp->sd_master_dir);
|
2006-01-16 16:50:04 +00:00
|
|
|
struct gfs2_holder ji_gh;
|
2006-02-27 22:23:27 +00:00
|
|
|
struct gfs2_inode *ip;
|
2006-01-16 16:50:04 +00:00
|
|
|
int jindex = 1;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
if (undo) {
|
|
|
|
jindex = 0;
|
2009-05-19 09:01:18 +00:00
|
|
|
goto fail_jinode_gh;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
|
2006-03-20 17:30:04 +00:00
|
|
|
if (IS_ERR(sdp->sd_jindex)) {
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't lookup journal index: %d\n", error);
|
2006-03-20 17:30:04 +00:00
|
|
|
return PTR_ERR(sdp->sd_jindex);
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Load in the journal index special file */
|
|
|
|
|
|
|
|
error = gfs2_jindex_hold(sdp, &ji_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't read journal index: %d\n", error);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2010-02-05 23:25:41 +00:00
|
|
|
error = -EUSERS;
|
2006-01-16 16:50:04 +00:00
|
|
|
if (!gfs2_jindex_size(sdp)) {
|
|
|
|
fs_err(sdp, "no journals!\n");
|
2006-09-25 13:26:04 +00:00
|
|
|
goto fail_jindex;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
2017-01-05 21:01:45 +00:00
|
|
|
atomic_set(&sdp->sd_log_blks_needed, 0);
|
2006-01-16 16:50:04 +00:00
|
|
|
if (sdp->sd_args.ar_spectator) {
|
|
|
|
sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
|
2007-11-08 14:55:03 +00:00
|
|
|
atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
|
GFS2: Various gfs2_logd improvements
This patch contains various tweaks to how log flushes and active item writeback
work. gfs2_logd is now managed by a waitqueue, and gfs2_log_reseve now waits
for gfs2_logd to do the log flushing. Multiple functions were rewritten to
remove the need to call gfs2_log_lock(). Instead of using one test to see if
gfs2_logd had work to do, there are now seperate tests to check if there
are two many buffers in the incore log or if there are two many items on the
active items list.
This patch is a port of a patch Steve Whitehouse wrote about a year ago, with
some minor changes. Since gfs2_ail1_start always submits all the active items,
it no longer needs to keep track of the first ai submitted, so this has been
removed. In gfs2_log_reserve(), the order of the calls to
prepare_to_wait_exclusive() and wake_up() when firing off the logd thread has
been switched. If it called wake_up first there was a small window for a race,
where logd could run and return before gfs2_log_reserve was ready to get woken
up. If gfs2_logd ran, but did not free up enough blocks, gfs2_log_reserve()
would be left waiting for gfs2_logd to eventualy run because it timed out.
Finally, gt_logd_secs, which controls how long to wait before gfs2_logd times
out, and flushes the log, can now be set on mount with ar_commit.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2010-05-04 19:29:16 +00:00
|
|
|
atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
|
|
|
|
atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
|
2006-01-16 16:50:04 +00:00
|
|
|
} else {
|
|
|
|
if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
|
|
|
|
fs_err(sdp, "can't mount journal #%u\n",
|
|
|
|
sdp->sd_lockstruct.ls_jid);
|
|
|
|
fs_err(sdp, "there are only %u journals (0 - %u)\n",
|
|
|
|
gfs2_jindex_size(sdp),
|
|
|
|
gfs2_jindex_size(sdp) - 1);
|
|
|
|
goto fail_jindex;
|
|
|
|
}
|
|
|
|
sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
|
|
|
|
|
2006-06-14 19:32:57 +00:00
|
|
|
error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
|
2006-01-16 16:50:04 +00:00
|
|
|
&gfs2_journal_glops,
|
|
|
|
LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
|
|
|
|
&sdp->sd_journal_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't acquire journal glock: %d\n", error);
|
|
|
|
goto fail_jindex;
|
|
|
|
}
|
|
|
|
|
2006-06-14 19:32:57 +00:00
|
|
|
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
|
|
|
|
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
|
2007-08-08 22:08:14 +00:00
|
|
|
LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
|
2006-01-16 16:50:04 +00:00
|
|
|
&sdp->sd_jinode_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't acquire journal inode glock: %d\n",
|
|
|
|
error);
|
|
|
|
goto fail_journal_gh;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = gfs2_jdesc_check(sdp->sd_jdesc);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "my journal (%u) is bad: %d\n",
|
|
|
|
sdp->sd_jdesc->jd_jid, error);
|
|
|
|
goto fail_jinode_gh;
|
|
|
|
}
|
2007-11-08 14:55:03 +00:00
|
|
|
atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
|
GFS2: Various gfs2_logd improvements
This patch contains various tweaks to how log flushes and active item writeback
work. gfs2_logd is now managed by a waitqueue, and gfs2_log_reseve now waits
for gfs2_logd to do the log flushing. Multiple functions were rewritten to
remove the need to call gfs2_log_lock(). Instead of using one test to see if
gfs2_logd had work to do, there are now seperate tests to check if there
are two many buffers in the incore log or if there are two many items on the
active items list.
This patch is a port of a patch Steve Whitehouse wrote about a year ago, with
some minor changes. Since gfs2_ail1_start always submits all the active items,
it no longer needs to keep track of the first ai submitted, so this has been
removed. In gfs2_log_reserve(), the order of the calls to
prepare_to_wait_exclusive() and wake_up() when firing off the logd thread has
been switched. If it called wake_up first there was a small window for a race,
where logd could run and return before gfs2_log_reserve was ready to get woken
up. If gfs2_logd ran, but did not free up enough blocks, gfs2_log_reserve()
would be left waiting for gfs2_logd to eventualy run because it timed out.
Finally, gt_logd_secs, which controls how long to wait before gfs2_logd times
out, and flushes the log, can now be set on mount with ar_commit.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2010-05-04 19:29:16 +00:00
|
|
|
atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
|
|
|
|
atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
|
2007-12-12 00:49:21 +00:00
|
|
|
|
|
|
|
/* Map the extents for this journal's blocks */
|
2014-03-03 13:35:57 +00:00
|
|
|
gfs2_map_journal_extents(sdp, sdp->sd_jdesc);
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
2009-06-12 07:49:20 +00:00
|
|
|
trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
if (sdp->sd_lockstruct.ls_first) {
|
|
|
|
unsigned int x;
|
|
|
|
for (x = 0; x < sdp->sd_journals; x++) {
|
2013-09-04 16:08:02 +00:00
|
|
|
struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x);
|
|
|
|
|
|
|
|
if (sdp->sd_args.ar_spectator) {
|
|
|
|
error = check_journal_clean(sdp, jd);
|
|
|
|
if (error)
|
|
|
|
goto fail_jinode_gh;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
error = gfs2_recover_journal(jd, true);
|
2006-01-16 16:50:04 +00:00
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "error recovering journal %u: %d\n",
|
|
|
|
x, error);
|
|
|
|
goto fail_jinode_gh;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
gfs2_others_may_mount(sdp);
|
2006-01-16 16:50:04 +00:00
|
|
|
} else if (!sdp->sd_args.ar_spectator) {
|
2010-07-20 20:09:02 +00:00
|
|
|
error = gfs2_recover_journal(sdp->sd_jdesc, true);
|
2006-01-16 16:50:04 +00:00
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "error recovering my journal: %d\n", error);
|
|
|
|
goto fail_jinode_gh;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-01 14:36:00 +00:00
|
|
|
sdp->sd_log_idle = 1;
|
2006-01-16 16:50:04 +00:00
|
|
|
set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
|
|
|
|
gfs2_glock_dq_uninit(&ji_gh);
|
|
|
|
jindex = 0;
|
2014-11-14 02:42:04 +00:00
|
|
|
INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
|
2006-01-16 16:50:04 +00:00
|
|
|
return 0;
|
|
|
|
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_jinode_gh:
|
2006-01-16 16:50:04 +00:00
|
|
|
if (!sdp->sd_args.ar_spectator)
|
|
|
|
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_journal_gh:
|
2006-01-16 16:50:04 +00:00
|
|
|
if (!sdp->sd_args.ar_spectator)
|
|
|
|
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_jindex:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_jindex_free(sdp);
|
|
|
|
if (jindex)
|
|
|
|
gfs2_glock_dq_uninit(&ji_gh);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail:
|
2006-01-30 18:34:10 +00:00
|
|
|
iput(sdp->sd_jindex);
|
2006-01-16 16:50:04 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2012-09-05 20:55:11 +00:00
|
|
|
static struct lock_class_key gfs2_quota_imutex_key;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
static int init_inodes(struct gfs2_sbd *sdp, int undo)
|
|
|
|
{
|
|
|
|
int error = 0;
|
2015-03-17 22:25:59 +00:00
|
|
|
struct inode *master = d_inode(sdp->sd_master_dir);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
if (undo)
|
2006-01-30 18:34:10 +00:00
|
|
|
goto fail_qinode;
|
|
|
|
|
|
|
|
error = init_journal(sdp, undo);
|
2014-06-02 13:40:25 +00:00
|
|
|
complete_all(&sdp->sd_journal_ready);
|
2006-01-30 18:34:10 +00:00
|
|
|
if (error)
|
2008-08-08 12:45:13 +00:00
|
|
|
goto fail;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
/* Read in the master statfs inode */
|
2008-08-08 12:45:13 +00:00
|
|
|
sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
|
2006-03-20 17:30:04 +00:00
|
|
|
if (IS_ERR(sdp->sd_statfs_inode)) {
|
|
|
|
error = PTR_ERR(sdp->sd_statfs_inode);
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't read in statfs inode: %d\n", error);
|
2009-08-27 14:51:07 +00:00
|
|
|
goto fail_journal;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Read in the resource index inode */
|
2008-08-08 12:45:13 +00:00
|
|
|
sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
|
2006-03-20 17:30:04 +00:00
|
|
|
if (IS_ERR(sdp->sd_rindex)) {
|
|
|
|
error = PTR_ERR(sdp->sd_rindex);
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't get resource index inode: %d\n", error);
|
|
|
|
goto fail_statfs;
|
|
|
|
}
|
2008-01-31 16:31:39 +00:00
|
|
|
sdp->sd_rindex_uptodate = 0;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
/* Read in the quota inode */
|
2008-08-08 12:45:13 +00:00
|
|
|
sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
|
2006-03-20 17:30:04 +00:00
|
|
|
if (IS_ERR(sdp->sd_quota_inode)) {
|
|
|
|
error = PTR_ERR(sdp->sd_quota_inode);
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't get quota file inode: %d\n", error);
|
|
|
|
goto fail_rindex;
|
|
|
|
}
|
2012-09-05 20:55:11 +00:00
|
|
|
/*
|
2018-04-06 20:07:45 +00:00
|
|
|
* i_rwsem on quota files is special. Since this inode is hidden system
|
2012-09-05 20:55:11 +00:00
|
|
|
* file, we are safe to define locking ourselves.
|
|
|
|
*/
|
2016-04-15 19:08:36 +00:00
|
|
|
lockdep_set_class(&sdp->sd_quota_inode->i_rwsem,
|
2012-09-05 20:55:11 +00:00
|
|
|
&gfs2_quota_imutex_key);
|
2012-02-24 15:09:14 +00:00
|
|
|
|
|
|
|
error = gfs2_rindex_update(sdp);
|
|
|
|
if (error)
|
|
|
|
goto fail_qinode;
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
return 0;
|
|
|
|
|
2006-01-30 18:34:10 +00:00
|
|
|
fail_qinode:
|
|
|
|
iput(sdp->sd_quota_inode);
|
|
|
|
fail_rindex:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_clear_rgrpd(sdp);
|
2006-01-30 18:34:10 +00:00
|
|
|
iput(sdp->sd_rindex);
|
|
|
|
fail_statfs:
|
|
|
|
iput(sdp->sd_statfs_inode);
|
|
|
|
fail_journal:
|
|
|
|
init_journal(sdp, UNDO);
|
|
|
|
fail:
|
2006-01-16 16:50:04 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int init_per_node(struct gfs2_sbd *sdp, int undo)
|
|
|
|
{
|
2006-01-30 18:34:10 +00:00
|
|
|
struct inode *pn = NULL;
|
2006-01-16 16:50:04 +00:00
|
|
|
char buf[30];
|
|
|
|
int error = 0;
|
2006-02-27 22:23:27 +00:00
|
|
|
struct gfs2_inode *ip;
|
2015-03-17 22:25:59 +00:00
|
|
|
struct inode *master = d_inode(sdp->sd_master_dir);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
if (sdp->sd_args.ar_spectator)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (undo)
|
|
|
|
goto fail_qc_gh;
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
pn = gfs2_lookup_simple(master, "per_node");
|
2006-03-20 17:30:04 +00:00
|
|
|
if (IS_ERR(pn)) {
|
|
|
|
error = PTR_ERR(pn);
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't find per_node directory: %d\n", error);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
|
2006-03-20 17:30:04 +00:00
|
|
|
sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
|
|
|
|
if (IS_ERR(sdp->sd_sc_inode)) {
|
|
|
|
error = PTR_ERR(sdp->sd_sc_inode);
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
|
2009-08-27 14:51:07 +00:00
|
|
|
goto fail;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
|
2006-03-20 17:30:04 +00:00
|
|
|
sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
|
|
|
|
if (IS_ERR(sdp->sd_qc_inode)) {
|
|
|
|
error = PTR_ERR(sdp->sd_qc_inode);
|
2006-01-16 16:50:04 +00:00
|
|
|
fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
|
|
|
|
goto fail_ut_i;
|
|
|
|
}
|
|
|
|
|
2006-01-30 18:34:10 +00:00
|
|
|
iput(pn);
|
2006-01-16 16:50:04 +00:00
|
|
|
pn = NULL;
|
|
|
|
|
2006-06-14 19:32:57 +00:00
|
|
|
ip = GFS2_I(sdp->sd_sc_inode);
|
2009-08-27 14:51:07 +00:00
|
|
|
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
|
2006-01-16 16:50:04 +00:00
|
|
|
&sdp->sd_sc_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
|
2009-08-27 14:51:07 +00:00
|
|
|
goto fail_qc_i;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
2006-06-14 19:32:57 +00:00
|
|
|
ip = GFS2_I(sdp->sd_qc_inode);
|
2009-08-27 14:51:07 +00:00
|
|
|
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
|
2006-01-16 16:50:04 +00:00
|
|
|
&sdp->sd_qc_gh);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
|
|
|
|
goto fail_ut_gh;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_qc_gh:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_ut_gh:
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_qc_i:
|
2006-01-30 18:34:10 +00:00
|
|
|
iput(sdp->sd_qc_inode);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_ut_i:
|
2006-01-30 18:34:10 +00:00
|
|
|
iput(sdp->sd_sc_inode);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail:
|
2015-11-13 13:55:59 +00:00
|
|
|
iput(pn);
|
2006-01-16 16:50:04 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
static const match_table_t nolock_tokens = {
|
|
|
|
{ Opt_jid, "jid=%d\n", },
|
|
|
|
{ Opt_err, NULL },
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct lm_lockops nolock_ops = {
|
|
|
|
.lm_proto_name = "lock_nolock",
|
2011-03-09 10:58:04 +00:00
|
|
|
.lm_put_lock = gfs2_glock_free,
|
2009-01-12 10:43:39 +00:00
|
|
|
.lm_tokens = &nolock_tokens,
|
|
|
|
};
|
|
|
|
|
2008-01-30 15:34:04 +00:00
|
|
|
/**
|
|
|
|
* gfs2_lm_mount - mount a locking protocol
|
|
|
|
* @sdp: the filesystem
|
tree-wide: Assorted spelling fixes
In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-02-03 00:01:28 +00:00
|
|
|
* @args: mount arguments
|
2008-01-30 15:34:04 +00:00
|
|
|
* @silent: if 1, don't complain if the FS isn't a GFS2 fs
|
|
|
|
*
|
|
|
|
* Returns: errno
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
|
|
|
|
{
|
2009-01-12 10:43:39 +00:00
|
|
|
const struct lm_lockops *lm;
|
|
|
|
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
|
|
|
|
struct gfs2_args *args = &sdp->sd_args;
|
|
|
|
const char *proto = sdp->sd_proto_name;
|
|
|
|
const char *table = sdp->sd_table_name;
|
|
|
|
char *o, *options;
|
|
|
|
int ret;
|
2008-01-30 15:34:04 +00:00
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
if (!strcmp("lock_nolock", proto)) {
|
|
|
|
lm = &nolock_ops;
|
|
|
|
sdp->sd_args.ar_localflocks = 1;
|
|
|
|
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
|
|
|
|
} else if (!strcmp("lock_dlm", proto)) {
|
|
|
|
lm = &gfs2_dlm_ops;
|
|
|
|
#endif
|
|
|
|
} else {
|
2014-03-06 20:10:45 +00:00
|
|
|
pr_info("can't find protocol %s\n", proto);
|
2009-01-12 10:43:39 +00:00
|
|
|
return -ENOENT;
|
|
|
|
}
|
2008-01-30 15:34:04 +00:00
|
|
|
|
|
|
|
fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
|
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
ls->ls_ops = lm;
|
|
|
|
ls->ls_first = 1;
|
2008-01-30 15:34:04 +00:00
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
|
|
|
|
substring_t tmp[MAX_OPT_ARGS];
|
|
|
|
int token, option;
|
|
|
|
|
|
|
|
if (!o || !*o)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
token = match_token(o, *lm->lm_tokens, tmp);
|
|
|
|
switch (token) {
|
|
|
|
case Opt_jid:
|
|
|
|
ret = match_int(&tmp[0], &option);
|
|
|
|
if (ret || option < 0)
|
|
|
|
goto hostdata_error;
|
2010-06-14 09:01:30 +00:00
|
|
|
if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags))
|
|
|
|
ls->ls_jid = option;
|
2009-01-12 10:43:39 +00:00
|
|
|
break;
|
|
|
|
case Opt_id:
|
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
2012-04-26 20:54:29 +00:00
|
|
|
case Opt_nodir:
|
2009-09-09 14:59:35 +00:00
|
|
|
/* Obsolete, but left for backward compat purposes */
|
2009-01-12 10:43:39 +00:00
|
|
|
break;
|
|
|
|
case Opt_first:
|
|
|
|
ret = match_int(&tmp[0], &option);
|
|
|
|
if (ret || (option != 0 && option != 1))
|
|
|
|
goto hostdata_error;
|
|
|
|
ls->ls_first = option;
|
|
|
|
break;
|
|
|
|
case Opt_err:
|
|
|
|
default:
|
|
|
|
hostdata_error:
|
|
|
|
fs_info(sdp, "unknown hostdata (%s)\n", o);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2008-01-30 15:34:04 +00:00
|
|
|
}
|
|
|
|
|
2009-01-12 10:43:39 +00:00
|
|
|
if (lm->lm_mount == NULL) {
|
|
|
|
fs_info(sdp, "Now mounting FS...\n");
|
2011-07-26 08:15:45 +00:00
|
|
|
complete_all(&sdp->sd_locking_init);
|
2009-01-12 10:43:39 +00:00
|
|
|
return 0;
|
2008-01-30 15:34:04 +00:00
|
|
|
}
|
2012-01-09 22:18:05 +00:00
|
|
|
ret = lm->lm_mount(sdp, table);
|
2009-01-12 10:43:39 +00:00
|
|
|
if (ret == 0)
|
|
|
|
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
|
2011-07-26 08:15:45 +00:00
|
|
|
complete_all(&sdp->sd_locking_init);
|
2009-01-12 10:43:39 +00:00
|
|
|
return ret;
|
2008-01-30 15:34:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
|
|
|
|
{
|
2009-01-12 10:43:39 +00:00
|
|
|
const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
|
2019-11-14 14:52:15 +00:00
|
|
|
if (likely(!gfs2_withdrawn(sdp)) && lm->lm_unmount)
|
2009-01-12 10:43:39 +00:00
|
|
|
lm->lm_unmount(sdp);
|
2008-01-30 15:34:04 +00:00
|
|
|
}
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2010-06-14 09:01:30 +00:00
|
|
|
static int wait_on_journal(struct gfs2_sbd *sdp)
|
|
|
|
{
|
|
|
|
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
|
|
|
|
return 0;
|
|
|
|
|
sched: Remove proliferation of wait_on_bit() action functions
The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().
So:
Rename wait_on_bit and wait_on_bit_lock to
wait_on_bit_action and wait_on_bit_lock_action
to make it explicit that they need an action function.
Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
which are *not* given an action function but implicitly use
a standard one.
The decision to error-out if a signal is pending is now made
based on the 'mode' argument rather than being encoded in the action
function.
All instances of the old wait_on_bit and wait_on_bit_lock which
can use the new version have been changed accordingly and their
action functions have been discarded.
wait_on_bit{_lock} does not return any specific error code in the
event of a signal so the caller must check for non-zero and
interpolate their own error code as appropriate.
The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"
The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.
A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack. So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).
Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS. CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-07-07 05:16:04 +00:00
|
|
|
return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
|
|
|
|
? -EINTR : 0;
|
2010-06-14 09:01:30 +00:00
|
|
|
}
|
|
|
|
|
2009-07-31 10:07:29 +00:00
|
|
|
void gfs2_online_uevent(struct gfs2_sbd *sdp)
|
|
|
|
{
|
|
|
|
struct super_block *sb = sdp->sd_vfs;
|
|
|
|
char ro[20];
|
|
|
|
char spectator[20];
|
|
|
|
char *envp[] = { ro, spectator, NULL };
|
2017-07-17 07:45:34 +00:00
|
|
|
sprintf(ro, "RDONLY=%d", sb_rdonly(sb));
|
2009-07-31 10:07:29 +00:00
|
|
|
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
|
|
|
|
kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
|
|
|
|
}
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
/**
|
2019-03-27 14:46:00 +00:00
|
|
|
* gfs2_fill_super - Read in superblock
|
2006-01-16 16:50:04 +00:00
|
|
|
* @sb: The VFS superblock
|
2019-03-27 14:46:00 +00:00
|
|
|
* @args: Mount options
|
2006-01-16 16:50:04 +00:00
|
|
|
* @silent: Don't complain if it's not a GFS2 filesystem
|
|
|
|
*
|
2019-03-27 14:46:00 +00:00
|
|
|
* Returns: -errno
|
2006-01-16 16:50:04 +00:00
|
|
|
*/
|
2019-03-27 14:46:00 +00:00
|
|
|
static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
|
2006-01-16 16:50:04 +00:00
|
|
|
{
|
2019-03-27 14:46:00 +00:00
|
|
|
struct gfs2_args *args = fc->fs_private;
|
|
|
|
int silent = fc->sb_flags & SB_SILENT;
|
2006-01-16 16:50:04 +00:00
|
|
|
struct gfs2_sbd *sdp;
|
|
|
|
struct gfs2_holder mount_gh;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
sdp = init_sbd(sb);
|
|
|
|
if (!sdp) {
|
2014-03-06 20:10:45 +00:00
|
|
|
pr_warn("can't alloc struct gfs2_sbd\n");
|
2006-01-16 16:50:04 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2009-09-28 09:30:49 +00:00
|
|
|
sdp->sd_args = *args;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2009-05-19 09:01:18 +00:00
|
|
|
if (sdp->sd_args.ar_spectator) {
|
2017-11-27 21:05:09 +00:00
|
|
|
sb->s_flags |= SB_RDONLY;
|
2012-01-09 19:40:06 +00:00
|
|
|
set_bit(SDF_RORECOVERY, &sdp->sd_flags);
|
2009-05-19 09:01:18 +00:00
|
|
|
}
|
2009-01-06 11:52:25 +00:00
|
|
|
if (sdp->sd_args.ar_posix_acl)
|
2017-11-27 21:05:09 +00:00
|
|
|
sb->s_flags |= SB_POSIXACL;
|
2009-10-30 07:03:27 +00:00
|
|
|
if (sdp->sd_args.ar_nobarrier)
|
|
|
|
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
|
2009-01-06 11:52:25 +00:00
|
|
|
|
2017-11-27 21:05:09 +00:00
|
|
|
sb->s_flags |= SB_NOSEC;
|
2008-09-18 12:53:59 +00:00
|
|
|
sb->s_magic = GFS2_MAGIC;
|
|
|
|
sb->s_op = &gfs2_super_ops;
|
2010-12-18 17:06:56 +00:00
|
|
|
sb->s_d_op = &gfs2_dops;
|
2008-09-18 12:53:59 +00:00
|
|
|
sb->s_export_op = &gfs2_export_ops;
|
2009-08-26 17:41:32 +00:00
|
|
|
sb->s_xattr = gfs2_xattr_handlers;
|
2009-09-15 08:59:02 +00:00
|
|
|
sb->s_qcop = &gfs2_quotactl_ops;
|
2014-09-30 20:33:00 +00:00
|
|
|
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
|
2009-09-15 08:59:02 +00:00
|
|
|
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
|
2008-09-18 12:53:59 +00:00
|
|
|
sb->s_time_gran = 1;
|
|
|
|
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
2006-03-02 21:33:41 +00:00
|
|
|
|
|
|
|
/* Set up the buffer cache and fill in some fake block size values
|
|
|
|
to allow us to read-in the on-disk superblock. */
|
|
|
|
sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
|
|
|
|
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
|
|
|
|
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
|
|
|
|
GFS2_BASIC_BLOCK_SHIFT;
|
2016-08-02 17:05:27 +00:00
|
|
|
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
GFS2: Various gfs2_logd improvements
This patch contains various tweaks to how log flushes and active item writeback
work. gfs2_logd is now managed by a waitqueue, and gfs2_log_reseve now waits
for gfs2_logd to do the log flushing. Multiple functions were rewritten to
remove the need to call gfs2_log_lock(). Instead of using one test to see if
gfs2_logd had work to do, there are now seperate tests to check if there
are two many buffers in the incore log or if there are two many items on the
active items list.
This patch is a port of a patch Steve Whitehouse wrote about a year ago, with
some minor changes. Since gfs2_ail1_start always submits all the active items,
it no longer needs to keep track of the first ai submitted, so this has been
removed. In gfs2_log_reserve(), the order of the calls to
prepare_to_wait_exclusive() and wake_up() when firing off the logd thread has
been switched. If it called wake_up first there was a small window for a race,
where logd could run and return before gfs2_log_reserve was ready to get woken
up. If gfs2_logd ran, but did not free up enough blocks, gfs2_log_reserve()
would be left waiting for gfs2_logd to eventualy run because it timed out.
Finally, gt_logd_secs, which controls how long to wait before gfs2_logd times
out, and flushes the log, can now be set on mount with ar_commit.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2010-05-04 19:29:16 +00:00
|
|
|
sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
|
2009-10-20 07:39:44 +00:00
|
|
|
sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
|
|
|
|
if (sdp->sd_args.ar_statfs_quantum) {
|
|
|
|
sdp->sd_tune.gt_statfs_slow = 0;
|
|
|
|
sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
|
2011-05-10 14:01:59 +00:00
|
|
|
} else {
|
2009-10-20 07:39:44 +00:00
|
|
|
sdp->sd_tune.gt_statfs_slow = 1;
|
|
|
|
sdp->sd_tune.gt_statfs_quantum = 30;
|
|
|
|
}
|
2009-05-13 13:49:48 +00:00
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
error = init_names(sdp, silent);
|
2012-06-13 14:27:41 +00:00
|
|
|
if (error) {
|
|
|
|
/* In this case, we haven't initialized sysfs, so we have to
|
|
|
|
manually free the sdp. */
|
2019-05-16 21:46:30 +00:00
|
|
|
free_sbd(sdp);
|
2012-06-13 14:27:41 +00:00
|
|
|
sb->s_fs_info = NULL;
|
|
|
|
return error;
|
|
|
|
}
|
2006-01-16 16:50:04 +00:00
|
|
|
|
2017-08-22 17:17:35 +00:00
|
|
|
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
|
2012-01-09 22:18:05 +00:00
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
error = gfs2_sys_fs_add(sdp);
|
2012-06-13 14:27:41 +00:00
|
|
|
/*
|
|
|
|
* If we hit an error here, gfs2_sys_fs_add will have called function
|
|
|
|
* kobject_put which causes the sysfs usage count to go to zero, which
|
|
|
|
* causes sysfs to call function gfs2_sbd_release, which frees sdp.
|
|
|
|
* Subsequent error paths here will call gfs2_sys_fs_del, which also
|
|
|
|
* kobject_put to free sdp.
|
|
|
|
*/
|
2006-01-16 16:50:04 +00:00
|
|
|
if (error)
|
2012-06-13 14:27:41 +00:00
|
|
|
return error;
|
|
|
|
|
|
|
|
gfs2_create_debugfs_file(sdp);
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
error = gfs2_lm_mount(sdp, silent);
|
|
|
|
if (error)
|
2012-06-13 14:27:41 +00:00
|
|
|
goto fail_debug;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
error = init_locking(sdp, &mount_gh, DO);
|
|
|
|
if (error)
|
|
|
|
goto fail_lm;
|
|
|
|
|
2008-08-08 12:45:13 +00:00
|
|
|
error = init_sb(sdp, silent);
|
2006-01-16 16:50:04 +00:00
|
|
|
if (error)
|
|
|
|
goto fail_locking;
|
|
|
|
|
2010-06-14 09:01:30 +00:00
|
|
|
error = wait_on_journal(sdp);
|
|
|
|
if (error)
|
|
|
|
goto fail_sb;
|
|
|
|
|
2010-09-29 14:04:18 +00:00
|
|
|
/*
|
|
|
|
* If user space has failed to join the cluster or some similar
|
|
|
|
* failure has occurred, then the journal id will contain a
|
|
|
|
* negative (error) number. This will then be returned to the
|
|
|
|
* caller (of the mount syscall). We do this even for spectator
|
|
|
|
* mounts (which just write a jid of 0 to indicate "ok" even though
|
|
|
|
* the jid is unused in the spectator case)
|
|
|
|
*/
|
|
|
|
if (sdp->sd_lockstruct.ls_jid < 0) {
|
|
|
|
error = sdp->sd_lockstruct.ls_jid;
|
|
|
|
sdp->sd_lockstruct.ls_jid = 0;
|
|
|
|
goto fail_sb;
|
|
|
|
}
|
|
|
|
|
2012-01-09 22:18:05 +00:00
|
|
|
if (sdp->sd_args.ar_spectator)
|
2017-08-22 17:17:35 +00:00
|
|
|
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.s",
|
2012-01-09 22:18:05 +00:00
|
|
|
sdp->sd_table_name);
|
|
|
|
else
|
2017-08-22 17:17:35 +00:00
|
|
|
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.%u",
|
2012-01-09 22:18:05 +00:00
|
|
|
sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
error = init_inodes(sdp, DO);
|
|
|
|
if (error)
|
2006-01-30 18:34:10 +00:00
|
|
|
goto fail_sb;
|
2006-01-16 16:50:04 +00:00
|
|
|
|
|
|
|
error = init_per_node(sdp, DO);
|
|
|
|
if (error)
|
|
|
|
goto fail_inodes;
|
|
|
|
|
|
|
|
error = gfs2_statfs_init(sdp);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
|
|
|
|
goto fail_per_node;
|
|
|
|
}
|
|
|
|
|
2017-07-17 07:45:34 +00:00
|
|
|
if (!sb_rdonly(sb)) {
|
2006-01-16 16:50:04 +00:00
|
|
|
error = gfs2_make_fs_rw(sdp);
|
|
|
|
if (error) {
|
|
|
|
fs_err(sdp, "can't make FS RW: %d\n", error);
|
2013-12-12 11:34:09 +00:00
|
|
|
goto fail_per_node;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
gfs2_glock_dq_uninit(&mount_gh);
|
2009-07-31 10:07:29 +00:00
|
|
|
gfs2_online_uevent(sdp);
|
2006-01-16 16:50:04 +00:00
|
|
|
return 0;
|
|
|
|
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_per_node:
|
2006-01-16 16:50:04 +00:00
|
|
|
init_per_node(sdp, UNDO);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_inodes:
|
2006-01-16 16:50:04 +00:00
|
|
|
init_inodes(sdp, UNDO);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_sb:
|
2008-08-08 12:45:13 +00:00
|
|
|
if (sdp->sd_root_dir)
|
|
|
|
dput(sdp->sd_root_dir);
|
|
|
|
if (sdp->sd_master_dir)
|
|
|
|
dput(sdp->sd_master_dir);
|
2009-01-20 16:39:23 +00:00
|
|
|
if (sb->s_root)
|
|
|
|
dput(sb->s_root);
|
2008-08-08 12:45:13 +00:00
|
|
|
sb->s_root = NULL;
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_locking:
|
2006-01-16 16:50:04 +00:00
|
|
|
init_locking(sdp, &mount_gh, UNDO);
|
2006-09-04 16:04:26 +00:00
|
|
|
fail_lm:
|
2014-06-02 13:40:25 +00:00
|
|
|
complete_all(&sdp->sd_journal_ready);
|
2008-12-19 15:32:06 +00:00
|
|
|
gfs2_gl_hash_clear(sdp);
|
2006-01-16 16:50:04 +00:00
|
|
|
gfs2_lm_unmount(sdp);
|
2012-06-13 14:27:41 +00:00
|
|
|
fail_debug:
|
2007-04-18 16:41:11 +00:00
|
|
|
gfs2_delete_debugfs_file(sdp);
|
2012-06-13 14:27:41 +00:00
|
|
|
/* gfs2_sys_fs_del must be the last thing we do, since it causes
|
|
|
|
* sysfs to call function gfs2_sbd_release, which frees sdp. */
|
|
|
|
gfs2_sys_fs_del(sdp);
|
2006-02-27 22:23:27 +00:00
|
|
|
sb->s_fs_info = NULL;
|
2006-01-16 16:50:04 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
/**
|
|
|
|
* gfs2_get_tree - Get the GFS2 superblock and root directory
|
|
|
|
* @fc: The filesystem context
|
|
|
|
*
|
|
|
|
* Returns: 0 or -errno on error
|
|
|
|
*/
|
|
|
|
static int gfs2_get_tree(struct fs_context *fc)
|
2006-01-16 16:50:04 +00:00
|
|
|
{
|
2019-03-27 14:46:00 +00:00
|
|
|
struct gfs2_args *args = fc->fs_private;
|
|
|
|
struct gfs2_sbd *sdp;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = get_tree_bdev(fc, gfs2_fill_super);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
sdp = fc->root->d_sb->s_fs_info;
|
|
|
|
dput(fc->root);
|
|
|
|
if (args->ar_meta)
|
|
|
|
fc->root = dget(sdp->sd_master_dir);
|
|
|
|
else
|
|
|
|
fc->root = dget(sdp->sd_root_dir);
|
2009-09-28 09:30:49 +00:00
|
|
|
return 0;
|
2006-08-25 16:13:37 +00:00
|
|
|
}
|
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
static void gfs2_fc_free(struct fs_context *fc)
|
2009-06-05 06:18:57 +00:00
|
|
|
{
|
2019-03-27 14:46:00 +00:00
|
|
|
struct gfs2_args *args = fc->fs_private;
|
|
|
|
|
|
|
|
kfree(args);
|
2009-06-05 06:18:57 +00:00
|
|
|
}
|
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
enum gfs2_param {
|
|
|
|
Opt_lockproto,
|
|
|
|
Opt_locktable,
|
|
|
|
Opt_hostdata,
|
|
|
|
Opt_spectator,
|
|
|
|
Opt_ignore_local_fs,
|
|
|
|
Opt_localflocks,
|
|
|
|
Opt_localcaching,
|
|
|
|
Opt_debug,
|
|
|
|
Opt_upgrade,
|
|
|
|
Opt_acl,
|
|
|
|
Opt_quota,
|
|
|
|
Opt_suiddir,
|
|
|
|
Opt_data,
|
|
|
|
Opt_meta,
|
|
|
|
Opt_discard,
|
|
|
|
Opt_commit,
|
|
|
|
Opt_errors,
|
|
|
|
Opt_statfs_quantum,
|
|
|
|
Opt_statfs_percent,
|
|
|
|
Opt_quota_quantum,
|
|
|
|
Opt_barrier,
|
|
|
|
Opt_rgrplvb,
|
|
|
|
Opt_loccookie,
|
|
|
|
};
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
enum opt_quota {
|
|
|
|
Opt_quota_unset = 0,
|
|
|
|
Opt_quota_off,
|
|
|
|
Opt_quota_account,
|
|
|
|
Opt_quota_on,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const unsigned int opt_quota_values[] = {
|
|
|
|
[Opt_quota_off] = GFS2_QUOTA_OFF,
|
|
|
|
[Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
|
|
|
|
[Opt_quota_on] = GFS2_QUOTA_ON,
|
|
|
|
};
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
enum opt_data {
|
|
|
|
Opt_data_writeback = GFS2_DATA_WRITEBACK,
|
|
|
|
Opt_data_ordered = GFS2_DATA_ORDERED,
|
|
|
|
};
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
enum opt_errors {
|
|
|
|
Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
|
|
|
|
Opt_errors_panic = GFS2_ERRORS_PANIC,
|
|
|
|
};
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
static const struct fs_parameter_spec gfs2_param_specs[] = {
|
|
|
|
fsparam_string ("lockproto", Opt_lockproto),
|
|
|
|
fsparam_string ("locktable", Opt_locktable),
|
|
|
|
fsparam_string ("hostdata", Opt_hostdata),
|
|
|
|
fsparam_flag ("spectator", Opt_spectator),
|
|
|
|
fsparam_flag ("norecovery", Opt_spectator),
|
|
|
|
fsparam_flag ("ignore_local_fs", Opt_ignore_local_fs),
|
|
|
|
fsparam_flag ("localflocks", Opt_localflocks),
|
|
|
|
fsparam_flag ("localcaching", Opt_localcaching),
|
|
|
|
fsparam_flag_no("debug", Opt_debug),
|
|
|
|
fsparam_flag ("upgrade", Opt_upgrade),
|
|
|
|
fsparam_flag_no("acl", Opt_acl),
|
|
|
|
fsparam_flag_no("suiddir", Opt_suiddir),
|
|
|
|
fsparam_enum ("data", Opt_data),
|
|
|
|
fsparam_flag ("meta", Opt_meta),
|
|
|
|
fsparam_flag_no("discard", Opt_discard),
|
|
|
|
fsparam_s32 ("commit", Opt_commit),
|
|
|
|
fsparam_enum ("errors", Opt_errors),
|
|
|
|
fsparam_s32 ("statfs_quantum", Opt_statfs_quantum),
|
|
|
|
fsparam_s32 ("statfs_percent", Opt_statfs_percent),
|
|
|
|
fsparam_s32 ("quota_quantum", Opt_quota_quantum),
|
|
|
|
fsparam_flag_no("barrier", Opt_barrier),
|
|
|
|
fsparam_flag_no("rgrplvb", Opt_rgrplvb),
|
|
|
|
fsparam_flag_no("loccookie", Opt_loccookie),
|
|
|
|
/* quota can be a flag or an enum so it gets special treatment */
|
|
|
|
__fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
|
|
|
|
{}
|
|
|
|
};
|
2010-07-26 12:03:58 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
static const struct fs_parameter_enum gfs2_param_enums[] = {
|
|
|
|
{ Opt_quota, "off", Opt_quota_off },
|
|
|
|
{ Opt_quota, "account", Opt_quota_account },
|
|
|
|
{ Opt_quota, "on", Opt_quota_on },
|
|
|
|
{ Opt_data, "writeback", Opt_data_writeback },
|
|
|
|
{ Opt_data, "ordered", Opt_data_ordered },
|
|
|
|
{ Opt_errors, "withdraw", Opt_errors_withdraw },
|
|
|
|
{ Opt_errors, "panic", Opt_errors_panic },
|
|
|
|
{}
|
|
|
|
};
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-10-17 11:02:25 +00:00
|
|
|
static const struct fs_parameter_description gfs2_fs_parameters = {
|
2019-03-27 14:46:00 +00:00
|
|
|
.name = "gfs2",
|
|
|
|
.specs = gfs2_param_specs,
|
|
|
|
.enums = gfs2_param_enums,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Parse a single mount parameter */
|
|
|
|
static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
|
|
|
{
|
|
|
|
struct gfs2_args *args = fc->fs_private;
|
|
|
|
struct fs_parse_result result;
|
|
|
|
int o;
|
|
|
|
|
|
|
|
o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
|
|
|
|
if (o < 0)
|
|
|
|
return o;
|
|
|
|
|
|
|
|
switch (o) {
|
|
|
|
case Opt_lockproto:
|
|
|
|
strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
|
|
|
|
break;
|
|
|
|
case Opt_locktable:
|
|
|
|
strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
|
|
|
|
break;
|
|
|
|
case Opt_hostdata:
|
|
|
|
strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
|
|
|
|
break;
|
|
|
|
case Opt_spectator:
|
|
|
|
args->ar_spectator = 1;
|
|
|
|
break;
|
|
|
|
case Opt_ignore_local_fs:
|
|
|
|
/* Retained for backwards compat only */
|
|
|
|
break;
|
|
|
|
case Opt_localflocks:
|
|
|
|
args->ar_localflocks = 1;
|
|
|
|
break;
|
|
|
|
case Opt_localcaching:
|
|
|
|
/* Retained for backwards compat only */
|
|
|
|
break;
|
|
|
|
case Opt_debug:
|
|
|
|
if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
|
|
|
|
return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
|
|
|
|
args->ar_debug = result.boolean;
|
|
|
|
break;
|
|
|
|
case Opt_upgrade:
|
|
|
|
/* Retained for backwards compat only */
|
|
|
|
break;
|
|
|
|
case Opt_acl:
|
|
|
|
args->ar_posix_acl = result.boolean;
|
|
|
|
break;
|
|
|
|
case Opt_quota:
|
|
|
|
/* The quota option can be a flag or an enum. A non-zero int_32
|
|
|
|
result means that we have an enum index. Otherwise we have
|
|
|
|
to rely on the 'negated' flag to tell us whether 'quota' or
|
|
|
|
'noquota' was specified. */
|
|
|
|
if (result.negated)
|
|
|
|
args->ar_quota = GFS2_QUOTA_OFF;
|
|
|
|
else if (result.int_32 > 0)
|
|
|
|
args->ar_quota = opt_quota_values[result.int_32];
|
|
|
|
else
|
|
|
|
args->ar_quota = GFS2_QUOTA_ON;
|
|
|
|
break;
|
|
|
|
case Opt_suiddir:
|
|
|
|
args->ar_suiddir = result.boolean;
|
|
|
|
break;
|
|
|
|
case Opt_data:
|
|
|
|
/* The uint_32 result maps directly to GFS2_DATA_* */
|
|
|
|
args->ar_data = result.uint_32;
|
|
|
|
break;
|
|
|
|
case Opt_meta:
|
|
|
|
args->ar_meta = 1;
|
|
|
|
break;
|
|
|
|
case Opt_discard:
|
|
|
|
args->ar_discard = result.boolean;
|
|
|
|
break;
|
|
|
|
case Opt_commit:
|
|
|
|
if (result.int_32 <= 0)
|
|
|
|
return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
|
|
|
|
args->ar_commit = result.int_32;
|
|
|
|
break;
|
|
|
|
case Opt_statfs_quantum:
|
|
|
|
if (result.int_32 < 0)
|
|
|
|
return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
|
|
|
|
args->ar_statfs_quantum = result.int_32;
|
|
|
|
break;
|
|
|
|
case Opt_quota_quantum:
|
|
|
|
if (result.int_32 <= 0)
|
|
|
|
return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
|
|
|
|
args->ar_quota_quantum = result.int_32;
|
|
|
|
break;
|
|
|
|
case Opt_statfs_percent:
|
|
|
|
if (result.int_32 < 0 || result.int_32 > 100)
|
|
|
|
return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
|
|
|
|
args->ar_statfs_percent = result.int_32;
|
|
|
|
break;
|
|
|
|
case Opt_errors:
|
|
|
|
if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
|
|
|
|
return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
|
|
|
|
args->ar_errors = result.uint_32;
|
|
|
|
break;
|
|
|
|
case Opt_barrier:
|
|
|
|
args->ar_nobarrier = result.boolean;
|
|
|
|
break;
|
|
|
|
case Opt_rgrplvb:
|
|
|
|
args->ar_rgrplvb = result.boolean;
|
|
|
|
break;
|
|
|
|
case Opt_loccookie:
|
|
|
|
args->ar_loccookie = result.boolean;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return invalf(fc, "gfs2: invalid mount option: %s", param->key);
|
2009-09-28 09:30:49 +00:00
|
|
|
}
|
2019-03-27 14:46:00 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
static int gfs2_reconfigure(struct fs_context *fc)
|
|
|
|
{
|
|
|
|
struct super_block *sb = fc->root->d_sb;
|
|
|
|
struct gfs2_sbd *sdp = sb->s_fs_info;
|
|
|
|
struct gfs2_args *oldargs = &sdp->sd_args;
|
|
|
|
struct gfs2_args *newargs = fc->fs_private;
|
|
|
|
struct gfs2_tune *gt = &sdp->sd_tune;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
sync_filesystem(sb);
|
|
|
|
|
|
|
|
spin_lock(>->gt_spin);
|
|
|
|
oldargs->ar_commit = gt->gt_logd_secs;
|
|
|
|
oldargs->ar_quota_quantum = gt->gt_quota_quantum;
|
|
|
|
if (gt->gt_statfs_slow)
|
|
|
|
oldargs->ar_statfs_quantum = 0;
|
|
|
|
else
|
|
|
|
oldargs->ar_statfs_quantum = gt->gt_statfs_quantum;
|
|
|
|
spin_unlock(>->gt_spin);
|
|
|
|
|
|
|
|
if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
|
|
|
|
errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
|
|
|
|
errorf(fc, "gfs2: reconfiguration of lock table not allowed");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
|
|
|
|
errorf(fc, "gfs2: reconfiguration of host data not allowed");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (newargs->ar_spectator != oldargs->ar_spectator) {
|
|
|
|
errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (newargs->ar_localflocks != oldargs->ar_localflocks) {
|
|
|
|
errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (newargs->ar_meta != oldargs->ar_meta) {
|
|
|
|
errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (oldargs->ar_spectator)
|
|
|
|
fc->sb_flags |= SB_RDONLY;
|
|
|
|
|
|
|
|
if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
|
|
|
|
if (fc->sb_flags & SB_RDONLY) {
|
|
|
|
error = gfs2_make_fs_ro(sdp);
|
|
|
|
if (error)
|
|
|
|
errorf(fc, "gfs2: unable to remount read-only");
|
|
|
|
} else {
|
|
|
|
error = gfs2_make_fs_rw(sdp);
|
|
|
|
if (error)
|
|
|
|
errorf(fc, "gfs2: unable to remount read-write");
|
|
|
|
}
|
2009-09-28 09:30:49 +00:00
|
|
|
}
|
2019-03-27 14:46:00 +00:00
|
|
|
sdp->sd_args = *newargs;
|
2009-09-28 09:30:49 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
if (sdp->sd_args.ar_posix_acl)
|
|
|
|
sb->s_flags |= SB_POSIXACL;
|
|
|
|
else
|
|
|
|
sb->s_flags &= ~SB_POSIXACL;
|
|
|
|
if (sdp->sd_args.ar_nobarrier)
|
|
|
|
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
|
2009-09-28 09:30:49 +00:00
|
|
|
else
|
2019-03-27 14:46:00 +00:00
|
|
|
clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
|
|
|
|
spin_lock(>->gt_spin);
|
|
|
|
gt->gt_logd_secs = newargs->ar_commit;
|
|
|
|
gt->gt_quota_quantum = newargs->ar_quota_quantum;
|
|
|
|
if (newargs->ar_statfs_quantum) {
|
|
|
|
gt->gt_statfs_slow = 0;
|
|
|
|
gt->gt_statfs_quantum = newargs->ar_statfs_quantum;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
gt->gt_statfs_slow = 1;
|
|
|
|
gt->gt_statfs_quantum = 30;
|
|
|
|
}
|
|
|
|
spin_unlock(>->gt_spin);
|
|
|
|
|
|
|
|
gfs2_online_uevent(sdp);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct fs_context_operations gfs2_context_ops = {
|
|
|
|
.free = gfs2_fc_free,
|
|
|
|
.parse_param = gfs2_parse_param,
|
|
|
|
.get_tree = gfs2_get_tree,
|
|
|
|
.reconfigure = gfs2_reconfigure,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Set up the filesystem mount context */
|
|
|
|
static int gfs2_init_fs_context(struct fs_context *fc)
|
|
|
|
{
|
|
|
|
struct gfs2_args *args;
|
|
|
|
|
2019-10-30 08:16:43 +00:00
|
|
|
args = kmalloc(sizeof(*args), GFP_KERNEL);
|
2019-03-27 14:46:00 +00:00
|
|
|
if (args == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2019-10-30 08:16:43 +00:00
|
|
|
if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
|
|
|
|
struct gfs2_sbd *sdp = fc->root->d_sb->s_fs_info;
|
2019-03-27 14:46:00 +00:00
|
|
|
|
2019-10-30 08:16:43 +00:00
|
|
|
*args = sdp->sd_args;
|
|
|
|
} else {
|
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
args->ar_quota = GFS2_QUOTA_DEFAULT;
|
|
|
|
args->ar_data = GFS2_DATA_DEFAULT;
|
|
|
|
args->ar_commit = 30;
|
|
|
|
args->ar_statfs_quantum = 30;
|
|
|
|
args->ar_quota_quantum = 60;
|
|
|
|
args->ar_errors = GFS2_ERRORS_DEFAULT;
|
|
|
|
}
|
2019-03-27 14:46:00 +00:00
|
|
|
fc->fs_private = args;
|
|
|
|
fc->ops = &gfs2_context_ops;
|
|
|
|
return 0;
|
2009-09-28 09:30:49 +00:00
|
|
|
}
|
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
static int set_meta_super(struct super_block *s, struct fs_context *fc)
|
2009-06-05 06:18:57 +00:00
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
static int test_meta_super(struct super_block *s, struct fs_context *fc)
|
|
|
|
{
|
|
|
|
return (fc->sget_key == s->s_bdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int gfs2_meta_get_tree(struct fs_context *fc)
|
2006-08-25 16:13:37 +00:00
|
|
|
{
|
2009-06-05 06:18:57 +00:00
|
|
|
struct super_block *s;
|
2009-06-10 09:31:45 +00:00
|
|
|
struct gfs2_sbd *sdp;
|
2009-04-03 01:17:03 +00:00
|
|
|
struct path path;
|
2006-08-25 16:13:37 +00:00
|
|
|
int error;
|
2006-09-25 13:26:04 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
if (!fc->source || !*fc->source)
|
|
|
|
return -EINVAL;
|
2018-10-13 04:19:13 +00:00
|
|
|
|
2019-03-27 14:46:00 +00:00
|
|
|
error = kern_path(fc->source, LOOKUP_FOLLOW, &path);
|
2006-08-25 16:13:37 +00:00
|
|
|
if (error) {
|
2014-03-06 20:10:45 +00:00
|
|
|
pr_warn("path_lookup on %s returned error %d\n",
|
2019-03-27 14:46:00 +00:00
|
|
|
fc->source, error);
|
|
|
|
return error;
|
2006-08-25 16:13:37 +00:00
|
|
|
}
|
2019-03-27 14:46:00 +00:00
|
|
|
fc->fs_type = &gfs2_fs_type;
|
|
|
|
fc->sget_key = path.dentry->d_sb->s_bdev;
|
|
|
|
s = sget_fc(fc, test_meta_super, set_meta_super);
|
2009-04-03 01:17:03 +00:00
|
|
|
path_put(&path);
|
2009-06-05 06:18:57 +00:00
|
|
|
if (IS_ERR(s)) {
|
2014-03-06 20:10:45 +00:00
|
|
|
pr_warn("gfs2 mount does not exist\n");
|
2019-03-27 14:46:00 +00:00
|
|
|
return PTR_ERR(s);
|
2006-08-25 16:13:37 +00:00
|
|
|
}
|
2019-03-27 14:46:00 +00:00
|
|
|
if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
|
2009-09-28 09:30:49 +00:00
|
|
|
deactivate_locked_super(s);
|
2019-03-27 14:46:00 +00:00
|
|
|
return -EBUSY;
|
2009-09-28 09:30:49 +00:00
|
|
|
}
|
2009-06-05 06:18:57 +00:00
|
|
|
sdp = s->s_fs_info;
|
2019-03-27 14:46:00 +00:00
|
|
|
fc->root = dget(sdp->sd_master_dir);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct fs_context_operations gfs2_meta_context_ops = {
|
2019-10-04 16:51:58 +00:00
|
|
|
.free = gfs2_fc_free,
|
2019-03-27 14:46:00 +00:00
|
|
|
.get_tree = gfs2_meta_get_tree,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int gfs2_meta_init_fs_context(struct fs_context *fc)
|
|
|
|
{
|
|
|
|
int ret = gfs2_init_fs_context(fc);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
fc->ops = &gfs2_meta_context_ops;
|
|
|
|
return 0;
|
2006-01-16 16:50:04 +00:00
|
|
|
}
|
|
|
|
|
2006-03-02 21:33:41 +00:00
|
|
|
static void gfs2_kill_sb(struct super_block *sb)
|
2006-08-25 16:13:37 +00:00
|
|
|
{
|
|
|
|
struct gfs2_sbd *sdp = sb->s_fs_info;
|
2008-12-19 15:43:05 +00:00
|
|
|
|
|
|
|
if (sdp == NULL) {
|
|
|
|
kill_block_super(sb);
|
|
|
|
return;
|
2008-11-27 08:27:28 +00:00
|
|
|
}
|
2008-12-19 15:43:05 +00:00
|
|
|
|
2018-01-08 15:34:17 +00:00
|
|
|
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SYNC | GFS2_LFC_KILL_SB);
|
2008-12-19 15:43:05 +00:00
|
|
|
dput(sdp->sd_root_dir);
|
|
|
|
dput(sdp->sd_master_dir);
|
|
|
|
sdp->sd_root_dir = NULL;
|
|
|
|
sdp->sd_master_dir = NULL;
|
2008-12-19 15:32:06 +00:00
|
|
|
shrink_dcache_sb(sb);
|
2012-06-13 14:27:41 +00:00
|
|
|
kill_block_super(sb);
|
2006-08-25 16:13:37 +00:00
|
|
|
}
|
|
|
|
|
2006-01-16 16:50:04 +00:00
|
|
|
struct file_system_type gfs2_fs_type = {
|
|
|
|
.name = "gfs2",
|
|
|
|
.fs_flags = FS_REQUIRES_DEV,
|
2019-03-27 14:46:00 +00:00
|
|
|
.init_fs_context = gfs2_init_fs_context,
|
|
|
|
.parameters = &gfs2_fs_parameters,
|
2006-03-02 21:33:41 +00:00
|
|
|
.kill_sb = gfs2_kill_sb,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
};
|
2013-03-03 03:39:14 +00:00
|
|
|
MODULE_ALIAS_FS("gfs2");
|
2006-03-02 21:33:41 +00:00
|
|
|
|
|
|
|
struct file_system_type gfs2meta_fs_type = {
|
|
|
|
.name = "gfs2meta",
|
|
|
|
.fs_flags = FS_REQUIRES_DEV,
|
2019-03-27 14:46:00 +00:00
|
|
|
.init_fs_context = gfs2_meta_init_fs_context,
|
2006-01-16 16:50:04 +00:00
|
|
|
.owner = THIS_MODULE,
|
|
|
|
};
|
2013-03-03 03:39:14 +00:00
|
|
|
MODULE_ALIAS_FS("gfs2meta");
|