Skip to content

Commit

Permalink
f2fs: introduce inmem curseg
Browse files Browse the repository at this point in the history
Previous implementation of aligned pinfile allocation will:
- allocate new segment on cold data log no matter whether last used
segment is partially used or not, it makes IOs more random;
- force concurrent cold data/GCed IO going into warm data area, it
can make a bad effect on hot/cold data separation;

In this patch, we introduce a new type of log named 'inmem curseg',
the differents from normal curseg is:
- it reuses existed segment type (CURSEG_XXX_NODE/DATA);
- it only exists in memory, its segno, blkofs, summary will not b
 persisted into checkpoint area;

With this new feature, we can enhance scalability of log, special
allocators can be created for purposes:
- pure lfs allocator for aligned pinfile allocation or file
defragmentation
- pure ssr allocator for later feature

So that, let's update aligned pinfile allocation to use this new
inmem curseg fwk.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
  • Loading branch information
chaseyu authored and Jaegeuk Kim committed Sep 10, 2020
1 parent 376207a commit d0b9e42
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 52 deletions.
7 changes: 6 additions & 1 deletion fs/f2fs/checkpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -1619,11 +1619,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

f2fs_flush_sit_entries(sbi, cpc);

/* save inmem log status */
f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);

err = do_checkpoint(sbi, cpc);
if (err)
f2fs_release_discard_addrs(sbi);
else
f2fs_clear_prefree_segments(sbi, cpc);

f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
Expand Down Expand Up @@ -1654,7 +1659,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
}

sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
NR_CURSEG_TYPE - __cp_payload(sbi)) *
NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
F2FS_ORPHANS_PER_BLOCK;
}

Expand Down
6 changes: 5 additions & 1 deletion fs/f2fs/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
Expand Down Expand Up @@ -393,6 +393,10 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_seg[CURSEG_COLD_NODE],
si->full_seg[CURSEG_COLD_NODE],
si->valid_blks[CURSEG_COLD_NODE]);
seq_printf(s, " - Pinned file: %8d %8d %8d\n",
si->curseg[CURSEG_COLD_DATA_PINNED],
si->cursec[CURSEG_COLD_DATA_PINNED],
si->curzone[CURSEG_COLD_DATA_PINNED]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
Expand Down
12 changes: 9 additions & 3 deletions fs/f2fs/f2fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
*/
#define NR_CURSEG_DATA_TYPE (3)
#define NR_CURSEG_NODE_TYPE (3)
#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
#define NR_CURSEG_INMEM_TYPE (1)
#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)

enum {
CURSEG_HOT_DATA = 0, /* directory entry blocks */
Expand All @@ -982,8 +984,10 @@ enum {
CURSEG_HOT_NODE, /* direct node blocks of directory files */
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
NO_CHECK_TYPE,
CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
NR_PERSISTENT_LOG, /* number of persistent log */
CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
/* pinned file that needs consecutive block address */
NO_CHECK_TYPE, /* number of persistent & inmem log */
};

struct flush_cmd {
Expand Down Expand Up @@ -3333,6 +3337,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type);
void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
Expand Down
5 changes: 3 additions & 2 deletions fs/f2fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1656,13 +1656,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
}

down_write(&sbi->pin_sem);
map.m_seg_type = CURSEG_COLD_DATA_PINNED;

f2fs_lock_op(sbi);
f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED);
f2fs_unlock_op(sbi);

map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);

up_write(&sbi->pin_sem);

done += map.m_len;
Expand Down
2 changes: 1 addition & 1 deletion fs/f2fs/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);

/* Move out cursegs from the target range */
for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
f2fs_allocate_segment_for_resize(sbi, type, start, end);

/* do GC to move out valid blocks in the range */
Expand Down
107 changes: 74 additions & 33 deletions fs/f2fs/segment.c
Original file line number Diff line number Diff line change
Expand Up @@ -1962,7 +1962,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)

mutex_lock(&dirty_i->seglist_lock);
for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
__set_test_and_free(sbi, segno);
__set_test_and_free(sbi, segno, false);
mutex_unlock(&dirty_i->seglist_lock);
}

Expand Down Expand Up @@ -2500,31 +2500,39 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct summary_footer *sum_footer;

curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
curseg->next_blkoff = 0;
curseg->next_segno = NULL_SEGNO;

sum_footer = &(curseg->sum_blk->footer);
memset(sum_footer, 0, sizeof(struct summary_footer));
if (IS_DATASEG(type))
if (IS_DATASEG(curseg->seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
if (IS_NODESEG(type))
if (IS_NODESEG(curseg->seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
__set_sit_entry_type(sbi, type, curseg->segno, modified);
__set_sit_entry_type(sbi, curseg->seg_type, curseg->segno, modified);
}

static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);

/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
return CURSEG_I(sbi, type)->segno;
return curseg->segno;

/* inmem log may not locate on any segment after mount */
if (!curseg->inited)
return 0;

if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;

if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
(curseg->seg_type == CURSEG_HOT_DATA ||
IS_NODESEG(curseg->seg_type)))
return 0;

if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
Expand All @@ -2534,7 +2542,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
return 0;

return CURSEG_I(sbi, type)->segno;
return curseg->segno;
}

/*
Expand All @@ -2544,12 +2552,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned short seg_type = curseg->seg_type;
unsigned int segno = curseg->segno;
int dir = ALLOC_LEFT;

write_sum_page(sbi, curseg->sum_blk,
if (curseg->inited)
write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, segno));
if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
dir = ALLOC_RIGHT;

if (test_opt(sbi, NOHEAP))
Expand Down Expand Up @@ -2626,6 +2636,43 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
f2fs_put_page(sum_page, 1);
}

void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);

mutex_lock(&curseg->curseg_mutex);
if (!curseg->inited)
goto out;

if (get_valid_blocks(sbi, curseg->segno, false)) {
write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, curseg->segno));
} else {
mutex_lock(&DIRTY_I(sbi)->seglist_lock);
__set_test_and_free(sbi, curseg->segno, true);
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
}
out:
mutex_unlock(&curseg->curseg_mutex);
}

void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);

mutex_lock(&curseg->curseg_mutex);
if (!curseg->inited)
goto out;
if (get_valid_blocks(sbi, curseg->segno, false))
goto out;

mutex_lock(&DIRTY_I(sbi)->seglist_lock);
__set_test_and_inuse(sbi, curseg->segno);
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
out:
mutex_unlock(&curseg->curseg_mutex);
}

static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
Expand Down Expand Up @@ -2742,11 +2789,15 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;

if (!curseg->inited)
goto alloc;

if (!curseg->next_blkoff &&
!get_valid_blocks(sbi, curseg->segno, false) &&
!get_ckpt_valid_blocks(sbi, curseg->segno))
return;

alloc:
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
Expand Down Expand Up @@ -3130,19 +3181,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
bool put_pin_sem = false;

if (type == CURSEG_COLD_DATA) {
/* GC during CURSEG_COLD_DATA_PINNED allocation */
if (down_read_trylock(&sbi->pin_sem)) {
put_pin_sem = true;
} else {
type = CURSEG_WARM_DATA;
curseg = CURSEG_I(sbi, type);
}
} else if (type == CURSEG_COLD_DATA_PINNED) {
type = CURSEG_COLD_DATA;
}

down_read(&SM_I(sbi)->curseg_lock);

Expand Down Expand Up @@ -3208,9 +3246,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);

up_read(&SM_I(sbi)->curseg_lock);

if (put_pin_sem)
up_read(&sbi->pin_sem);
}

static void update_device_state(struct f2fs_io_info *fio)
Expand Down Expand Up @@ -3578,7 +3613,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
CURSEG_HOT_DATA]);
if (__exist_node_summaries(sbi))
blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
else
blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
} else {
Expand Down Expand Up @@ -3656,8 +3691,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
}

if (__exist_node_summaries(sbi))
f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
NR_CURSEG_TYPE - type, META_CP, true);
f2fs_ra_meta_pages(sbi,
sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
NR_CURSEG_PERSIST_TYPE - type, META_CP, true);

for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
Expand Down Expand Up @@ -4159,14 +4195,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;

array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
GFP_KERNEL);
array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
sizeof(*array)), GFP_KERNEL);
if (!array)
return -ENOMEM;

SM_I(sbi)->curseg_array = array;

for (i = 0; i < NR_CURSEG_TYPE; i++) {
for (i = 0; i < NO_CHECK_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
Expand All @@ -4176,8 +4212,13 @@ static int build_curseg(struct f2fs_sb_info *sbi)
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
if (i < NR_PERSISTENT_LOG)
array[i].seg_type = CURSEG_HOT_DATA + i;
else if (i == CURSEG_COLD_DATA_PINNED)
array[i].seg_type = CURSEG_COLD_DATA;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
array[i].inited = false;
}
return restore_curseg_summaries(sbi);
}
Expand Down Expand Up @@ -4416,7 +4457,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
* In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
* In LFS curseg, all blkaddr after .next_blkoff should be unused.
*/
for (i = 0; i < NO_CHECK_TYPE; i++) {
for (i = 0; i < NR_PERSISTENT_LOG; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
unsigned int blkofs = curseg->next_blkoff;
Expand Down Expand Up @@ -4645,7 +4686,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
int i, ret;

for (i = 0; i < NO_CHECK_TYPE; i++) {
for (i = 0; i < NR_PERSISTENT_LOG; i++) {
ret = fix_curseg_write_pointer(sbi, i);
if (ret)
return ret;
Expand Down
Loading

0 comments on commit d0b9e42

Please sign in to comment.