diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index 5ce00bc02583..7a2ba8ea0a05 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -336,14 +336,6 @@ typedef struct dmu_buf_impl {
 
 	/* The buffer was partially read.  More reads may follow. */
 	uint8_t db_partial_read;
-
-	/*
-	 * This block is being held under a writer rangelock of a Direct I/O
-	 * write that is waiting for previous buffered writes to synced out
-	 * due to mixed buffered and O_DIRECT operations. This is needed to
-	 * check whether to grab the rangelock in zfs_get_data().
-	 */
-	uint8_t db_mixed_io_dio_wait;
 } dmu_buf_impl_t;
 
 #define	DBUF_HASH_MUTEX(h, idx) \
@@ -392,7 +384,7 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
     uint64_t blkid, uint64_t *hash_out);
 
 int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
-void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx);
 void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
 void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
 boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
@@ -401,9 +393,6 @@ dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
     dmu_tx_t *tx);
 boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
-void dmu_buf_direct_mixed_io_wait(dmu_buf_impl_t *db, uint64_t txg,
-    boolean_t read);
-void dmu_buf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 blkptr_t *dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db);
 int dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa);
 arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 216d7d28853d..38ce279808f5 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -1088,7 +1088,6 @@ typedef struct zgd {
 	struct blkptr	*zgd_bp;
 	dmu_buf_t	*zgd_db;
 	struct zfs_locked_range *zgd_lr;
-	boolean_t	zgd_grabbed_rangelock;
 	void		*zgd_private;
 } zgd_t;
 
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index cf66c38f0ce2..77f7664fb22f 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -82,13 +82,6 @@ typedef struct dbuf_stats {
 	 */
 	kstat_named_t cache_levels[DN_MAX_LEVELS];
 	kstat_named_t cache_levels_bytes[DN_MAX_LEVELS];
-	/*
-	 * Statistics for Direct I/O.
-	 */
-	kstat_named_t direct_mixed_io_read_wait;
-	kstat_named_t direct_mixed_io_write_wait;
-	kstat_named_t direct_sync_wait;
-	kstat_named_t direct_undirty;
 	/*
 	 * Statistics about the dbuf hash table.
 	 */
@@ -137,10 +130,6 @@ dbuf_stats_t dbuf_stats = {
 	{ "cache_total_evicts",			KSTAT_DATA_UINT64 },
 	{ { "cache_levels_N",			KSTAT_DATA_UINT64 } },
 	{ { "cache_levels_bytes_N",		KSTAT_DATA_UINT64 } },
-	{ "direct_mixed_io_read_wait",		KSTAT_DATA_UINT64 },
-	{ "direct_mixed_io_write_wait",		KSTAT_DATA_UINT64 },
-	{ "direct_sync_wait",			KSTAT_DATA_UINT64 },
-	{ "direct_undirty",			KSTAT_DATA_UINT64 },
 	{ "hash_hits",				KSTAT_DATA_UINT64 },
 	{ "hash_misses",			KSTAT_DATA_UINT64 },
 	{ "hash_collisions",			KSTAT_DATA_UINT64 },
@@ -162,10 +151,6 @@ struct {
 	wmsum_t cache_total_evicts;
 	wmsum_t cache_levels[DN_MAX_LEVELS];
 	wmsum_t cache_levels_bytes[DN_MAX_LEVELS];
-	wmsum_t direct_mixed_io_read_wait;
-	wmsum_t direct_mixed_io_write_wait;
-	wmsum_t direct_sync_wait;
-	wmsum_t direct_undirty;
 	wmsum_t hash_hits;
 	wmsum_t hash_misses;
 	wmsum_t hash_collisions;
@@ -911,14 +896,6 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
 		ds->cache_levels_bytes[i].value.ui64 =
 		    wmsum_value(&dbuf_sums.cache_levels_bytes[i]);
 	}
-	ds->direct_mixed_io_read_wait.value.ui64 =
-	    wmsum_value(&dbuf_sums.direct_mixed_io_read_wait);
-	ds->direct_mixed_io_write_wait.value.ui64 =
-	    wmsum_value(&dbuf_sums.direct_mixed_io_write_wait);
-	ds->direct_sync_wait.value.ui64 =
-	    wmsum_value(&dbuf_sums.direct_sync_wait);
-	ds->direct_undirty.value.ui64 =
-	    wmsum_value(&dbuf_sums.direct_undirty);
 	ds->hash_hits.value.ui64 =
 	    wmsum_value(&dbuf_sums.hash_hits);
 	ds->hash_misses.value.ui64 =
@@ -1021,10 +998,6 @@ dbuf_init(void)
 		wmsum_init(&dbuf_sums.cache_levels[i], 0);
 		wmsum_init(&dbuf_sums.cache_levels_bytes[i], 0);
 	}
-	wmsum_init(&dbuf_sums.direct_mixed_io_read_wait, 0);
-	wmsum_init(&dbuf_sums.direct_mixed_io_write_wait, 0);
-	wmsum_init(&dbuf_sums.direct_sync_wait, 0);
-	wmsum_init(&dbuf_sums.direct_undirty, 0);
 	wmsum_init(&dbuf_sums.hash_hits, 0);
 	wmsum_init(&dbuf_sums.hash_misses, 0);
 	wmsum_init(&dbuf_sums.hash_collisions, 0);
@@ -1097,10 +1070,6 @@ dbuf_fini(void)
 		wmsum_fini(&dbuf_sums.cache_levels[i]);
 		wmsum_fini(&dbuf_sums.cache_levels_bytes[i]);
 	}
-	wmsum_fini(&dbuf_sums.direct_mixed_io_read_wait);
-	wmsum_fini(&dbuf_sums.direct_mixed_io_write_wait);
-	wmsum_fini(&dbuf_sums.direct_sync_wait);
-	wmsum_fini(&dbuf_sums.direct_undirty);
 	wmsum_fini(&dbuf_sums.hash_hits);
 	wmsum_fini(&dbuf_sums.hash_misses);
 	wmsum_fini(&dbuf_sums.hash_collisions);
@@ -1271,9 +1240,8 @@ dbuf_clear_data(dmu_buf_impl_t *db)
 {
 	ASSERT(MUTEX_HELD(&db->db_mtx));
 	dbuf_evict_user(db);
-	/* Direct I/O writes may have data */
-	if (db->db_buf == NULL)
-		db->db.db_data = NULL;
+	ASSERT3P(db->db_buf, ==, NULL);
+	db->db.db_data = NULL;
 	if (db->db_state != DB_NOFILL) {
 		db->db_state = DB_UNCACHED;
 		DTRACE_SET_STATE(db, "clear data");
@@ -2789,93 +2757,6 @@ dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 	return (dr != NULL);
 }
 
-void
-dmu_buf_direct_mixed_io_wait(dmu_buf_impl_t *db, uint64_t txg, boolean_t read)
-{
-	ASSERT(MUTEX_HELD(&db->db_mtx));
-
-	if (read == B_TRUE) {
-		/*
-		 * If a buffered read is in process, a Direct I/O read will
-		 * wait for the buffered I/O to complete.
-		 */
-		ASSERT3U(txg, ==, 0);
-		while (db->db_state == DB_READ) {
-			DBUF_STAT_BUMP(direct_mixed_io_read_wait);
-			cv_wait(&db->db_changed, &db->db_mtx);
-		}
-	} else {
-		/*
-		 * There must be an ARC buf associated with this Direct I/O
-		 * write otherwise there is no reason to wait for previous
-		 * dirty records to sync out.
-		 *
-		 * The db_state will temporarily be set to DB_CACHED so that
-		 * that any synchronous writes issued through the ZIL will
-		 * still be handled properly. In particular, the call to
-		 * dbuf_read() in dmu_sync_late_arrival() must account for the
-		 * data still being in the ARC. After waiting here for previous
-		 * TXGs to sync out, dmu_write_direct_done() will update the
-		 * db_state.
-		 */
-		ASSERT3P(db->db_buf, !=, NULL);
-		ASSERT3U(txg, >, 0);
-		db->db_mixed_io_dio_wait = TRUE;
-		db->db_state = DB_CACHED;
-		while (dbuf_find_dirty_lte(db, txg) != NULL) {
-			DBUF_STAT_BUMP(direct_mixed_io_write_wait);
-			cv_wait(&db->db_changed, &db->db_mtx);
-		}
-		db->db_mixed_io_dio_wait = FALSE;
-	}
-}
-
-/*
- * Direct I/O writes may need to undirty the open-context dirty record
- * associated with it in the event of an I/O error.
- */
-void
-dmu_buf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
-{
-	/*
-	 * Direct I/O writes always happen in open-context.
-	 */
-	ASSERT(!dmu_tx_is_syncing(tx));
-	ASSERT(MUTEX_HELD(&db->db_mtx));
-	ASSERT(db->db_state == DB_NOFILL || db->db_state == DB_UNCACHED);
-
-
-	/*
-	 * In the event of an I/O error we will handle the metaslab clean up in
-	 * zio_done(). Also, the dirty record's dr_overridden_by BP is not
-	 * currently set as that is done in dmu_sync_done(). Since the db_state
-	 * is still set to DB_NOFILL, dbuf_unoverride() will not be called in
-	 * dbuf_undirty() and the dirty record's BP will not be added the SPA's
-	 * spa_free_bplist via zio_free().
-	 *
-	 * This function can also be called in the event that a Direct I/O
-	 * write is overwriting a previous Direct I/O to the same block for
-	 * this TXG. It is important to go ahead and free up the space
-	 * accounting in this case through dbuf_undirty() -> dbuf_unoverride()
-	 * -> zio_free(). This is necessary because the space accounting for
-	 * determining if a write can occur in zfs_write() happens through
-	 * dmu_tx_assign(). This can cause an issue with Direct I/O writes in
-	 * the case of overwrites, because all DVA allocations are being done
-	 * in open-context. Constanstly allowing Direct I/O overwrites to the
-	 * same blocks can exhaust the pools available space leading to ENOSPC
-	 * errors at the DVA allcoation part of the ZIO pipeline, which will
-	 * eventually suspend the pool. By cleaning up space accounting now
-	 * the ENOSPC pool suspend can be avoided.
-	 *
-	 * Since we are undirtying the record for the Direct I/O in
-	 * open-context we must have a hold on the db, so it should never be
-	 * evicted after calling dbuf_undirty().
-	 */
-	VERIFY3B(dbuf_undirty(db, tx), ==, B_FALSE);
-
-	DBUF_STAT_BUMP(direct_undirty);
-}
-
 /*
  * Normally the db_blkptr points to the most recent on-disk content for the
  * dbuf (and anything newer will be cached in the dbuf). However, a recent
@@ -2951,7 +2832,7 @@ dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa)
 }
 
 void
-dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
+dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 	ASSERT0(db->db_level);
@@ -2959,14 +2840,41 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
 	ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
 
 	/*
-	 * Block cloning: We are going to clone into this block, so undirty
-	 * modifications done to this block so far in this txg. This includes
-	 * writes and clones into this block.
+	 * Block clones and Direct I/O writes always happen in open-context.
 	 */
+	ASSERT(!dmu_tx_is_syncing(tx));
+	ASSERT0(db->db_level);
+	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
+	ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
+
 	mutex_enter(&db->db_mtx);
 	DBUF_VERIFY(db);
-	VERIFY(!dbuf_undirty(db, tx));
+
+	/*
+	 * We are going to clone or issue a Direct I/O write on this block, so
+	 * undirty modifications done to this block so far in this txg. This
+	 * includes writes and clones into this block.
+	 *
+	 * If there dirty record associated with this txg from a previous Direct
+	 * I/O write then space accounting cleanup takes place. It is important
+	 * to go ahead free up the space accounting through dbuf_undirty() ->
+	 * dbuf_unoverride() -> zio_free(). Space accountiung for determining
+	 * if a write can occur in zfs_write() happens through dmu_tx_assign().
+	 * This can cuase an issue with Direct I/O writes in the case of
+	 * overwriting the same block, because all DVA allocations are being
+	 * done in open-context. Constantly allowing Direct I/O overwrites to
+	 * the same block can exhaust the pools available space leading to
+	 * ENOSPC errors at the DVA allocation part of the ZIO pipeline, which
+	 * will eventually suspend the pool. By cleaning up sapce acccounting
+	 * now, the ENOSPC error can be avoided.
+	 *
+	 * Since we are undirtying the record in open-context, we must have a
+	 * hold on the db, so it should never be evicted after calling
+	 * dbuf_undirty().
+	 */
+	VERIFY3B(dbuf_undirty(db, tx), ==, B_FALSE);
 	ASSERT0P(dbuf_find_dirty_eq(db, tx->tx_txg));
+
 	if (db->db_buf != NULL) {
 		/*
 		 * If there is an associated ARC buffer with this dbuf we can
@@ -2977,6 +2885,11 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
 		if (dr == NULL || dr->dt.dl.dr_data != db->db_buf)
 			arc_buf_destroy(db->db_buf, db);
 
+		/*
+		 * Setting the dbuf's data pointers to NULL will force all
+		 * future reads down to the devices to get the most up to date
+		 * version of the data after a Direct I/O write has completed.
+		 */
 		db->db_buf = NULL;
 		dbuf_clear_data(db);
 	}
@@ -2985,7 +2898,8 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
 	ASSERT3P(db->db.db_data, ==, NULL);
 
 	db->db_state = DB_NOFILL;
-	DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone");
+	DTRACE_SET_STATE(db,
+	    "allocating NOFILL buffer for clone or direct I/O write");
 
 	DBUF_VERIFY(db);
 	mutex_exit(&db->db_mtx);
@@ -3532,7 +3446,6 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
 	db->db_user_immediate_evict = FALSE;
 	db->db_freed_in_flight = FALSE;
 	db->db_pending_evict = FALSE;
-	db->db_mixed_io_dio_wait = FALSE;
 
 	if (blkid == DMU_BONUS_BLKID) {
 		ASSERT3P(parent, ==, dn->dn_dbuf);
@@ -4788,25 +4701,6 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 	dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
 
 	mutex_enter(&db->db_mtx);
-
-	/*
-	 * It is possible a buffered read has come in after a Direct I/O
-	 * write and is currently transistioning the db_state from DB_READ
-	 * in dbuf_read_impl() to another state in dbuf_read_done().  We
-	 * have to wait in order for the dbuf state to change from DB_READ
-	 * before syncing the dirty record of the Direct I/O write.
-	 */
-	if (db->db_state == DB_READ && !dr->dt.dl.dr_brtwrite) {
-		ASSERT3P(*datap, ==, NULL);
-		ASSERT3P(db->db_buf, ==, NULL);
-		ASSERT3P(db->db.db_data, ==, NULL);
-		ASSERT3U(dr->dt.dl.dr_override_state, ==, DR_OVERRIDDEN);
-		while (db->db_state == DB_READ) {
-			DBUF_STAT_BUMP(direct_sync_wait);
-			cv_wait(&db->db_changed, &db->db_mtx);
-		}
-	}
-
 	/*
 	 * To be synced, we must be dirtied.  But we might have been freed
 	 * after the dirty.
@@ -4819,13 +4713,21 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 		ASSERT(db->db.db_data != dr->dt.dl.dr_data);
 	} else if (db->db_state == DB_READ) {
 		/*
-		 * This buffer has a clone we need to write, and an in-flight
-		 * read on the BP we're about to clone. Its safe to issue the
-		 * write here because the read has already been issued and the
-		 * contents won't change.
+		 * This buffer was either cloned or had a Direct I/O write
+		 * occur and has an in-flgiht read on the BP. It is safe to
+		 * issue the write here, because the read has already been
+		 * issued and the contents won't change.
+		 *
+		 * We can verify the case of both the clone and Direct I/O
+		 * write by making sure the first dirty record for the dbuf
+		 * has no ARC buffer associated with it.
 		 */
-		ASSERT(dr->dt.dl.dr_brtwrite &&
-		    dr->dt.dl.dr_override_state == DR_OVERRIDDEN);
+		dbuf_dirty_record_t *dr_head =
+		    list_head(&db->db_dirty_records);
+		ASSERT3P(db->db_buf, ==, NULL);
+		ASSERT3P(db->db.db_data, ==, NULL);
+		ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL);
+		ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN);
 	} else {
 		ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL);
 	}
@@ -5522,7 +5424,7 @@ EXPORT_SYMBOL(dbuf_dirty);
 EXPORT_SYMBOL(dmu_buf_set_crypt_params);
 EXPORT_SYMBOL(dmu_buf_will_dirty);
 EXPORT_SYMBOL(dmu_buf_is_dirty);
-EXPORT_SYMBOL(dmu_buf_will_clone);
+EXPORT_SYMBOL(dmu_buf_will_clone_or_dio);
 EXPORT_SYMBOL(dmu_buf_will_not_fill);
 EXPORT_SYMBOL(dmu_buf_will_fill);
 EXPORT_SYMBOL(dmu_buf_fill_done);
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index ba47be9c9e15..db08b1843163 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -2676,7 +2676,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
 		ASSERT(db->db_blkid != DMU_SPILL_BLKID);
 		ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp));
 
-		dmu_buf_will_clone(dbuf, tx);
+		dmu_buf_will_clone_or_dio(dbuf, tx);
 
 		mutex_enter(&db->db_mtx);
 
diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c
index 0ff3e0e55e40..bf4733530204 100644
--- a/module/zfs/dmu_direct.c
+++ b/module/zfs/dmu_direct.c
@@ -90,78 +90,35 @@ dmu_write_direct_done(zio_t *zio)
 	dmu_sync_arg_t *dsa = zio->io_private;
 	dbuf_dirty_record_t *dr = dsa->dsa_dr;
 	dmu_buf_impl_t *db = dr->dr_dbuf;
-	uint64_t txg = dsa->dsa_tx->tx_txg;
 
 	abd_free(zio->io_abd);
+
 	mutex_enter(&db->db_mtx);
+	ASSERT3P(db->db_buf, ==, NULL);
+	ASSERT3P(dr->dt.dl.dr_data, ==, NULL);
+	ASSERT3P(db->db.db_data, ==, NULL);
+	db->db_state = DB_UNCACHED;
+	mutex_exit(&db->db_mtx);
 
-	if (zio->io_error == 0) {
-		/*
-		 * After a successful Direct I/O write any stale contents in
-		 * the ARC must be cleaned up in order to force all future
-		 * reads down to the VDEVs.
-		 *
-		 * If a previous write operation to this dbuf was buffered
-		 * (in the ARC) we have to wait for the previous dirty records
-		 * associated with this dbuf to be synced out if they are in
-		 * the quiesce or sync phase for their TXG. This is done to
-		 * guarantee we are not racing to destroy the ARC buf that
-		 * is associated with the dbuf between this done callback and
-		 * spa_sync(). Outside of using a heavy handed approach of
-		 * locking down the spa_syncing_txg while it is being updated,
-		 * there is no way to synchronize when a dirty record's TXG
-		 * has moved over to the sync phase.
-		 *
-		 * In order to make sure all TXG's are consistent we must
-		 * do this stall if there is an associated ARC buf with this
-		 * dbuf. It is because of this that a user should not really
-		 * be mixing buffered and Direct I/O writes. If they choose to
-		 * do so, there is an associated performance penalty for that
-		 * as we will not give up consistency with a TXG over
-		 * performance.
-		 */
-		if (db->db_buf) {
-			dmu_buf_direct_mixed_io_wait(db, txg - 1, B_FALSE);
-			ASSERT3P(db->db_buf, ==, dr->dt.dl.dr_data);
-			arc_buf_destroy(db->db_buf, db);
-			db->db_buf = NULL;
-			dr->dt.dl.dr_data = NULL;
-			db->db.db_data = NULL;
-			ASSERT3U(db->db_dirtycnt, ==, 1);
-		}
+	dmu_sync_done(zio, NULL, zio->io_private);
 
-		/*
-		 * The current contents of the dbuf are now stale.
-		 */
-		ASSERT3P(dr->dt.dl.dr_data, ==, NULL);
-		ASSERT3P(db->db.db_data, ==, NULL);
-		db->db_state = DB_UNCACHED;
-	} else {
+	if (zio->io_error != 0) {
 		if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
 			ASSERT3U(zio->io_error, ==, EAGAIN);
 
 		/*
-		 * If there is a valid ARC buffer assocatied with this dirty
-		 * record we will stall just like on a successful Direct I/O
-		 * write to make sure all TXG's are consistent. See comment
-		 * above.
+		 * In the event of an I/O error the metaslab cleanup is taken
+		 * care of in zio_done().
+		 *
+		 * Since we are undirtying the record in open-context, we must
+		 * have a hold on the db, so it should never be evicted after
+		 * calling dbuf_undirty().
 		 */
-		if (db->db_buf) {
-			ASSERT3P(db->db_buf, ==, dr->dt.dl.dr_data);
-			dmu_buf_direct_mixed_io_wait(db, txg - 1, B_FALSE);
-			dmu_buf_undirty(db, dsa->dsa_tx);
-			db->db_state = DB_CACHED;
-		} else {
-			ASSERT3P(dr->dt.dl.dr_data, ==, NULL);
-			dmu_buf_undirty(db, dsa->dsa_tx);
-			db->db_state = DB_UNCACHED;
-		}
-
-		ASSERT0(db->db_dirtycnt);
+		mutex_enter(&db->db_mtx);
+		VERIFY3B(dbuf_undirty(db, dsa->dsa_tx), ==, B_FALSE);
+		mutex_exit(&db->db_mtx);
 	}
 
-	mutex_exit(&db->db_mtx);
-	dmu_sync_done(zio, NULL, zio->io_private);
 	kmem_free(zio->io_bp, sizeof (blkptr_t));
 	zio->io_bp = NULL;
 }
@@ -183,26 +140,11 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
 	    WP_DMU_SYNC | WP_DIRECT_WR, &zp);
 	DB_DNODE_EXIT(db);
 
-	/*
-	 * If we going to overwrite a previous Direct I/O write that is part of
-	 * the current TXG, then we can can go ahead and undirty it now. Part
-	 * of it being undirtied will be allowing for previously allocated
-	 * space in the dr_overridden_bp BP's DVAs to be freed. This avoids
-	 * ENOSPC errors from possibly occuring when trying to allocate new
-	 * metaslabs in open-context for Direct I/O writes.
-	 */
-	mutex_enter(&db->db_mtx);
-	dr_head = dbuf_find_dirty_eq(db, dmu_tx_get_txg(tx));
-	if (dbuf_dirty_is_direct_write(db, dr_head)) {
-		dmu_buf_undirty(db, tx);
-	}
-	mutex_exit(&db->db_mtx);
-
 	/*
 	 * Dirty this dbuf with DB_NOFILL since we will not have any data
 	 * associated with the dbuf.
 	 */
-	dmu_buf_will_not_fill(&db->db, tx);
+	dmu_buf_will_clone_or_dio(&db->db, tx);
 
 	mutex_enter(&db->db_mtx);
 
@@ -289,7 +231,7 @@ dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size,
 
 	/*
 	 * The dbuf must be held until the Direct I/O write has completed in
-	 * the event there was any errors and dmu_buf_undirty() was called.
+	 * the event there was any errors and dbuf_undirty() was called.
 	 */
 	dmu_buf_rele_array(dbp, numbufs, FTAG);
 
@@ -325,10 +267,11 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
 		    db->db.db_object, db->db_level, db->db_blkid);
 
 		/*
-		 * If there is another buffered read for this dbuf, we will
-		 * wait for that to complete first.
+		 * If there is another read for this dbuf, we will wait for
+		 * that to complete first before checking the db_state below.
 		 */
-		dmu_buf_direct_mixed_io_wait(db, 0, B_TRUE);
+		while (db->db_state == DB_READ)
+			cv_wait(&db->db_changed, &db->db_mtx);
 
 		blkptr_t *bp = dmu_buf_get_bp_from_dbuf(db);
 
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index fb6f9475d350..2460805582f2 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -1096,32 +1096,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 	zgd->zgd_lwb = lwb;
 	zgd->zgd_private = zp;
 
-	dmu_buf_t *dbp;
-	error = dmu_buf_hold_noread(os, object, offset, zgd, &dbp);
-	zgd->zgd_db = dbp;
-	dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp;
-
-	if (error) {
-		zfs_get_done(zgd, error);
-		return (error);
-	}
-
-	/*
-	 * If a Direct I/O write is waiting for previous dirty records to sync
-	 * out in dmu_buf_direct_mixed_io_wait(), then the ranglock is already
-	 * held across the entire block by the O_DIRECT write.
-	 *
-	 * The dirty record for this TXG will also be used to identify if this
-	 * log record is associated with a Direct I/O write.
-	 */
-	mutex_enter(&db->db_mtx);
-	boolean_t rangelock_held = db->db_mixed_io_dio_wait;
-	zgd->zgd_grabbed_rangelock = !(rangelock_held);
-	dbuf_dirty_record_t *dr =
-	    dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg);
-	boolean_t direct_write = dbuf_dirty_is_direct_write(db, dr);
-	mutex_exit(&db->db_mtx);
-
 	/*
 	 * Write records come in two flavors: immediate and indirect.
 	 * For small writes it's cheaper to store the data with the
@@ -1130,10 +1104,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 	 * we don't have to write the data twice.
 	 */
 	if (buf != NULL) { /* immediate write */
-		if (zgd->zgd_grabbed_rangelock) {
-			zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
-			    offset, size, RL_READER);
-		}
+		zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset,
+		    size, RL_READER);
 		/* test for truncation needs to be done while range locked */
 		if (offset >= zp->z_size) {
 			error = SET_ERROR(ENOENT);
@@ -1150,29 +1122,19 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 		 * that no one can change the data. We need to re-check
 		 * blocksize after we get the lock in case it's changed!
 		 */
-		if (zgd->zgd_grabbed_rangelock) {
-			for (;;) {
-				uint64_t blkoff;
-				size = zp->z_blksz;
-				blkoff = ISP2(size) ? P2PHASE(offset, size) :
-				    offset;
-				offset -= blkoff;
-				zgd->zgd_lr = zfs_rangelock_enter(
-				    &zp->z_rangelock, offset, size, RL_READER);
-				if (zp->z_blksz == size)
-					break;
-				offset += blkoff;
-				zfs_rangelock_exit(zgd->zgd_lr);
-			}
-			ASSERT3U(dbp->db_size, ==, size);
-			ASSERT3U(dbp->db_offset, ==, offset);
-		} else {
-			/*
-			 * A Direct I/O write always covers an entire block.
-			 */
-			ASSERT3U(dbp->db_size, ==, zp->z_blksz);
+		for (;;) {
+			uint64_t blkoff;
+			size = zp->z_blksz;
+			blkoff = ISP2(size) ? P2PHASE(offset, size) :
+			    offset;
+			offset -= blkoff;
+			zgd->zgd_lr = zfs_rangelock_enter(
+			    &zp->z_rangelock, offset, size, RL_READER);
+			if (zp->z_blksz == size)
+				break;
+			offset += blkoff;
+			zfs_rangelock_exit(zgd->zgd_lr);
 		}
-
 		/* test for truncation needs to be done while range locked */
 		if (lr->lr_offset >= zp->z_size)
 			error = SET_ERROR(ENOENT);
@@ -1182,48 +1144,69 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 			zil_fault_io = 0;
 		}
 #endif
-		if (error) {
-			zfs_get_done(zgd, error);
-			return (error);
-		}
 
-		/*
-		 * All Direct I/O writes will have already completed and the
-		 * block pointer can be immediately stored in the log record.
-		 */
-		if (direct_write) {
-			lr->lr_blkptr = dr->dt.dl.dr_overridden_by;
-			zfs_get_done(zgd, 0);
-			return (0);
-		}
+		dmu_buf_t *dbp;
+		if (error == 0)
+			error = dmu_buf_hold_noread(os, object, offset, zgd,
+			    &dbp);
+
+		if (error == 0) {
+			zgd->zgd_db = dbp;
+			dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp;
+			mutex_enter(&db->db_mtx);
+			dbuf_dirty_record_t *dr =
+			    dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg);
+			boolean_t direct_write =
+			    dbuf_dirty_is_direct_write(db, dr);
+			mutex_exit(&db->db_mtx);
 
-		blkptr_t *bp = &lr->lr_blkptr;
-		zgd->zgd_bp = bp;
+			/*
+			 * All Direct I/O writes will have already completed and
+			 * the block pointer can be immediately stored in the
+			 * log record.
+			 */
+			if (direct_write) {
+				/*
+				 * A Direct I/O write always covers an entire
+				 * block.
+				 */
+				ASSERT3U(dbp->db_size, ==, zp->z_blksz);
+				lr->lr_blkptr = dr->dt.dl.dr_overridden_by;
+				zfs_get_done(zgd, 0);
+				return (0);
+			}
 
-		error = dmu_sync(zio, lr->lr_common.lrc_txg,
-		    zfs_get_done, zgd);
-		ASSERT(error || lr->lr_length <= size);
+			blkptr_t *bp = &lr->lr_blkptr;
+			zgd->zgd_bp = bp;
 
-		/*
-		 * On success, we need to wait for the write I/O
-		 * initiated by dmu_sync() to complete before we can
-		 * release this dbuf.  We will finish everything up
-		 * in the zfs_get_done() callback.
-		 */
-		if (error == 0)
-			return (0);
+			ASSERT3U(dbp->db_offset, ==, offset);
+			ASSERT3U(dbp->db_size, ==, size);
+
+			error = dmu_sync(zio, lr->lr_common.lrc_txg,
+			    zfs_get_done, zgd);
+			ASSERT(error || lr->lr_length <= size);
 
-		if (error == EALREADY) {
-			lr->lr_common.lrc_txtype = TX_WRITE2;
 			/*
-			 * TX_WRITE2 relies on the data previously
-			 * written by the TX_WRITE that caused
-			 * EALREADY.  We zero out the BP because
-			 * it is the old, currently-on-disk BP.
+			 * On success, we need to wait for the write I/O
+			 * initiated by dmu_sync() to complete before we can
+			 * release this dbuf.  We will finish everything up
+			 * in the zfs_get_done() callback.
 			 */
-			zgd->zgd_bp = NULL;
-			BP_ZERO(bp);
-			error = 0;
+			if (error == 0)
+				return (0);
+
+			if (error == EALREADY) {
+				lr->lr_common.lrc_txtype = TX_WRITE2;
+				/*
+				 * TX_WRITE2 relies on the data previously
+				 * written by the TX_WRITE that caused
+				 * EALREADY.  We zero out the BP because
+				 * it is the old, currently-on-disk BP.
+				 */
+				zgd->zgd_bp = NULL;
+				BP_ZERO(bp);
+				error = 0;
+			}
 		}
 	}
 
@@ -1232,18 +1215,16 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 	return (error);
 }
 
-
 static void
 zfs_get_done(zgd_t *zgd, int error)
 {
 	(void) error;
 	znode_t *zp = zgd->zgd_private;
 
-	ASSERT3P(zgd->zgd_db, !=, NULL);
-	dmu_buf_rele(zgd->zgd_db, zgd);
+	if (zgd->zgd_db)
+		dmu_buf_rele(zgd->zgd_db, zgd);
 
-	if (zgd->zgd_grabbed_rangelock)
-		zfs_rangelock_exit(zgd->zgd_lr);
+	zfs_rangelock_exit(zgd->zgd_lr);
 
 	/*
 	 * Release the vnode asynchronously as we currently have the