diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b14b87463ee04e..3ec941a0ea41c5 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -896,9 +896,12 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { check_and_free_fields(htab, l); + + migrate_disable(); if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); bpf_mem_cache_free(&htab->ma, l); + migrate_enable(); } static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) @@ -948,7 +951,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) if (htab_is_prealloc(htab)) { bpf_map_dec_elem_count(&htab->map); check_and_free_fields(htab, l); - __pcpu_freelist_push(&htab->freelist, &l->fnode); + pcpu_freelist_push(&htab->freelist, &l->fnode); } else { dec_elem_count(htab); htab_elem_free(htab, l); @@ -1018,7 +1021,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, */ pl_new = this_cpu_ptr(htab->extra_elems); l_new = *pl_new; - htab_put_fd_value(htab, old_elem); *pl_new = old_elem; } else { struct pcpu_freelist_node *l; @@ -1105,6 +1107,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, struct htab_elem *l_new = NULL, *l_old; struct hlist_nulls_head *head; unsigned long flags; + void *old_map_ptr; struct bucket *b; u32 key_size, hash; int ret; @@ -1183,12 +1186,27 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { hlist_nulls_del_rcu(&l_old->hash_node); + + /* l_old has already been stashed in htab->extra_elems, free + * its special fields before it is available for reuse. Also + * save the old map pointer in htab of maps before unlock + * and release it after unlock. + */ + old_map_ptr = NULL; + if (htab_is_prealloc(htab)) { + if (map->ops->map_fd_put_ptr) + old_map_ptr = fd_htab_map_get_ptr(map, l_old); + check_and_free_fields(htab, l_old); + } + } + htab_unlock_bucket(htab, b, hash, flags); + if (l_old) { + if (old_map_ptr) + map->ops->map_fd_put_ptr(map, old_map_ptr, true); if (!htab_is_prealloc(htab)) free_htab_elem(htab, l_old); - else - check_and_free_fields(htab, l_old); } - ret = 0; + return 0; err: htab_unlock_bucket(htab, b, hash, flags); return ret; @@ -1432,15 +1450,15 @@ static long htab_map_delete_elem(struct bpf_map *map, void *key) return ret; l = lookup_elem_raw(head, hash, key, key_size); - - if (l) { + if (l) hlist_nulls_del_rcu(&l->hash_node); - free_htab_elem(htab, l); - } else { + else ret = -ENOENT; - } htab_unlock_bucket(htab, b, hash, flags); + + if (l) + free_htab_elem(htab, l); return ret; } @@ -1853,13 +1871,14 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, * may cause deadlock. See comments in function * prealloc_lru_pop(). Let us do bpf_lru_push_free() * after releasing the bucket lock. + * + * For htab of maps, htab_put_fd_value() in + * free_htab_elem() may acquire a spinlock with bucket + * lock being held and it violates the lock rule, so + * invoke free_htab_elem() after unlock as well. */ - if (is_lru_map) { - l->batch_flink = node_to_free; - node_to_free = l; - } else { - free_htab_elem(htab, l); - } + l->batch_flink = node_to_free; + node_to_free = l; } dst_key += key_size; dst_val += value_size; @@ -1871,7 +1890,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, while (node_to_free) { l = node_to_free; node_to_free = node_to_free->batch_flink; - htab_lru_push_free(htab, l); + if (is_lru_map) + htab_lru_push_free(htab, l); + else + free_htab_elem(htab, l); } next_batch: