Skip to content

Commit

Permalink
Merge tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Fix a rare deadlock in virtiofs

 - Fix st_blocks in writeback cache mode

 - Fix wrong checks in splice move causing spurious warnings

 - Fix a race between a GETATTR request and a FUSE_NOTIFY_INVAL_INODE
   notification

 - Use rb-tree instead of linear search for pages currently under
   writeout by userspace

 - Fix copy_file_range() inconsistencies

* tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: copy_file_range should truncate cache
  fuse: fix copy_file_range cache issues
  fuse: optimize writepages search
  fuse: update attr_version counter on fuse_notify_inval_inode()
  fuse: don't check refcount after stealing page
  fuse: fix weird page warning
  fuse: use dump_page
  virtiofs: do not use fuse_fill_super_common() for device installation
  fuse: always allow query of st_dev
  fuse: always flush dirty data on close(2)
  fuse: invalidate inode attr in writeback cache mode
  fuse: Update stale comment in queue_interrupt()
  fuse: BUG_ON correction in fuse_dev_splice_write()
  virtiofs: Add mount option and atime behavior to the doc
  virtiofs: schedule blocking async replies in separate worker
  • Loading branch information
torvalds committed Jun 9, 2020
2 parents 52435c8 + 9b46418 commit 5b14671
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 85 deletions.
14 changes: 14 additions & 0 deletions Documentation/filesystems/virtiofs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@ Mount file system with tag ``myfs`` on ``/mnt``:
Please see https://virtio-fs.gitlab.io/ for details on how to configure QEMU
and the virtiofsd daemon.

Mount options
-------------

virtiofs supports general VFS mount options, for example, remount,
ro, rw, context, etc. It also supports FUSE mount options.

atime behavior
^^^^^^^^^^^^^^

The atime-related mount options, for example, noatime, strictatime,
are ignored. The atime behavior for virtiofs is the same as the
underlying filesystem of the directory that has been exported
on the host.

Internals
=========
Since the virtio-fs device uses the FUSE protocol for file system requests, the
Expand Down
14 changes: 7 additions & 7 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
list_add_tail(&req->intr_entry, &fiq->interrupts);
/*
* Pairs with smp_mb() implied by test_and_set_bit()
* from request_end().
* from fuse_request_end().
*/
smp_mb();
if (test_bit(FR_FINISHED, &req->flags)) {
Expand Down Expand Up @@ -764,16 +764,15 @@ static int fuse_check_page(struct page *page)
{
if (page_mapcount(page) ||
page->mapping != NULL ||
page_count(page) != 1 ||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_uptodate |
1 << PG_lru |
1 << PG_active |
1 << PG_reclaim))) {
pr_warn("trying to steal weird page\n");
pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
1 << PG_reclaim |
1 << PG_waiters))) {
dump_page(page, "fuse: trying to steal weird page");
return 1;
}
return 0;
Expand Down Expand Up @@ -1977,8 +1976,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf;
struct pipe_buffer *obuf;

BUG_ON(nbuf >= pipe->ring_size);
BUG_ON(tail == head);
if (WARN_ON(nbuf >= count || tail == head))
goto out_free;

ibuf = &pipe->bufs[tail & mask];
obuf = &bufs[nbuf];

Expand Down
12 changes: 11 additions & 1 deletion fs/fuse/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1689,8 +1689,18 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
struct inode *inode = d_inode(path->dentry);
struct fuse_conn *fc = get_fuse_conn(inode);

if (!fuse_allow_current_process(fc))
if (!fuse_allow_current_process(fc)) {
if (!request_mask) {
/*
* If user explicitly requested *nothing* then don't
* error out, but return st_dev only.
*/
stat->result_mask = 0;
stat->dev = inode->i_sb->s_dev;
return 0;
}
return -EACCES;
}

return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
}
Expand Down
120 changes: 92 additions & 28 deletions fs/fuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)

struct fuse_writepage_args {
struct fuse_io_args ia;
struct list_head writepages_entry;
struct rb_node writepages_entry;
struct list_head queue_entry;
struct fuse_writepage_args *next;
struct inode *inode;
Expand All @@ -366,17 +366,23 @@ struct fuse_writepage_args {
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
pgoff_t idx_from, pgoff_t idx_to)
{
struct fuse_writepage_args *wpa;
struct rb_node *n;

n = fi->writepages.rb_node;

list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
while (n) {
struct fuse_writepage_args *wpa;
pgoff_t curr_index;

wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
WARN_ON(get_fuse_inode(wpa->inode) != fi);
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
if (idx_from < curr_index + wpa->ia.ap.num_pages &&
curr_index <= idx_to) {
if (idx_from >= curr_index + wpa->ia.ap.num_pages)
n = n->rb_right;
else if (idx_to < curr_index)
n = n->rb_left;
else
return wpa;
}
}
return NULL;
}
Expand Down Expand Up @@ -445,9 +451,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (is_bad_inode(inode))
return -EIO;

if (fc->no_flush)
return 0;

err = write_inode_now(inode, 1);
if (err)
return err;
Expand All @@ -460,6 +463,10 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;

err = 0;
if (fc->no_flush)
goto inval_attr_out;

memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.lock_owner = fuse_lock_owner_id(fc, id);
Expand All @@ -475,6 +482,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
fc->no_flush = 1;
err = 0;
}

inval_attr_out:
/*
* In memory i_blocks is not maintained by fuse, if writeback cache is
* enabled, i_blocks from cached attr may not be accurate.
*/
if (!err && fc->writeback_cache)
fuse_invalidate_attr(inode);
return err;
}

Expand Down Expand Up @@ -712,6 +727,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
spin_unlock(&io->lock);

ia->ap.args.end = fuse_aio_complete_req;
ia->ap.args.may_block = io->should_dirty;
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
if (err)
fuse_aio_complete_req(fc, &ia->ap.args, err);
Expand Down Expand Up @@ -1570,7 +1586,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;

list_del(&wpa->writepages_entry);
rb_erase(&wpa->writepages_entry, &fi->writepages);
for (i = 0; i < ap->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
Expand Down Expand Up @@ -1658,6 +1674,36 @@ __acquires(fi->lock)
}
}

static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
{
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;

WARN_ON(!wpa->ia.ap.num_pages);
while (*p) {
struct fuse_writepage_args *curr;
pgoff_t curr_index;

parent = *p;
curr = rb_entry(parent, struct fuse_writepage_args,
writepages_entry);
WARN_ON(curr->inode != wpa->inode);
curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;

if (idx_from >= curr_index + curr->ia.ap.num_pages)
p = &(*p)->rb_right;
else if (idx_to < curr_index)
p = &(*p)->rb_left;
else
return (void) WARN_ON(true);
}

rb_link_node(&wpa->writepages_entry, parent, p);
rb_insert_color(&wpa->writepages_entry, root);
}

static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
int error)
{
Expand All @@ -1676,7 +1722,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
wpa->next = next->next;
next->next = NULL;
next->ia.ff = fuse_file_get(wpa->ia.ff);
list_add(&next->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, next);

/*
* Skip fuse_flush_writepages() to make it easy to crop requests
Expand Down Expand Up @@ -1811,7 +1857,7 @@ static int fuse_writepage_locked(struct page *page)
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);

spin_lock(&fi->lock);
list_add(&wpa->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, wpa);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
Expand Down Expand Up @@ -1923,10 +1969,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
WARN_ON(new_ap->num_pages != 0);

spin_lock(&fi->lock);
list_del(&new_wpa->writepages_entry);
rb_erase(&new_wpa->writepages_entry, &fi->writepages);
old_wpa = fuse_find_writeback(fi, page->index, page->index);
if (!old_wpa) {
list_add(&new_wpa->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, new_wpa);
spin_unlock(&fi->lock);
return false;
}
Expand Down Expand Up @@ -2041,7 +2087,7 @@ static int fuse_writepages_fill(struct page *page,
wpa->inode = inode;

spin_lock(&fi->lock);
list_add(&wpa->writepages_entry, &fi->writepages);
tree_insert(&fi->writepages, wpa);
spin_unlock(&fi->lock);

data->wpa = wpa;
Expand Down Expand Up @@ -3235,25 +3281,39 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;

if (fc->writeback_cache) {
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
inode_unlock(inode_in);
if (err)
return err;
}
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
inode_unlock(inode_in);
if (err)
return err;

inode_lock(inode_out);

err = file_modified(file_out);
if (err)
goto out;

if (fc->writeback_cache) {
err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
if (err)
goto out;
}
/*
* Write out dirty pages in the destination file before sending the COPY
* request to userspace. After the request is completed, truncate off
* pages (including partial ones) from the cache that have been copied,
* since these contain stale data at that point.
*
* This should be mostly correct, but if the COPY writes to partial
* pages (at the start or end) and the parts not covered by the COPY are
* written through a memory map after calling fuse_writeback_range(),
* then these partial page modifications will be lost on truncation.
*
* It is unlikely that someone would rely on such mixed style
* modifications. Yet this does give less guarantees than if the
* copying was performed with write(2).
*
* To fix this a i_mmap_sem style lock could be used to prevent new
* faults while the copy is ongoing.
*/
err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
if (err)
goto out;

if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
Expand All @@ -3274,6 +3334,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (err)
goto out;

truncate_inode_pages_range(inode_out->i_mapping,
ALIGN_DOWN(pos_out, PAGE_SIZE),
ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);

if (fc->writeback_cache) {
fuse_write_update_size(inode_out, pos_out + outarg.size);
file_update_time(file_out);
Expand Down Expand Up @@ -3351,5 +3415,5 @@ void fuse_init_file_inode(struct inode *inode)
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq);
INIT_LIST_HEAD(&fi->writepages);
fi->writepages = RB_ROOT;
}
3 changes: 2 additions & 1 deletion fs/fuse/fuse_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ struct fuse_inode {
wait_queue_head_t page_waitq;

/* List of writepage requestst (pending or sent) */
struct list_head writepages;
struct rb_root writepages;
};

/* readdir cache (directory only) */
Expand Down Expand Up @@ -249,6 +249,7 @@ struct fuse_args {
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;
bool may_block:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
Expand Down
Loading

0 comments on commit 5b14671

Please sign in to comment.