diff --git a/src/index.rs b/src/index.rs
index 0bf5f9c1..510c6c1c 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -15,7 +15,7 @@ use crate::{
 use std::arch::x86::*;
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
-use std::convert::TryInto;
+use std::{cmp::max, convert::TryInto};
 
 // Index chunk consists of 8 64-bit entries.
 const CHUNK_LEN: usize = CHUNK_ENTRIES * ENTRY_BYTES; // 512 bytes
@@ -238,25 +238,27 @@ impl IndexTable {
 	#[cfg(target_feature = "sse2")]
 	fn find_entry(&self, key_prefix: u64, sub_index: usize, chunk: &[u8]) -> (Entry, usize) {
 		assert!(chunk.len() >= CHUNK_ENTRIES * 8); // Bound checking (not done by SIMD instructions)
-		debug_assert!(
-			Entry::address_bits(self.id.index_bits()) <= 32,
-			"To be sure we can use all high 32 bits as key prefix"
-		);
 		const _: () = assert!(
 			CHUNK_ENTRIES % 4 == 0,
 			"We assume here we got buffer with a number of elements that is a multiple of 4"
 		);
 
+		let shift = max(32, Entry::address_bits(self.id.index_bits()));
 		unsafe {
-			let target = _mm_set1_epi32(((key_prefix << self.id.index_bits()) >> 32) as i32);
+			let target = _mm_set1_epi32(((key_prefix << self.id.index_bits()) >> shift) as i32);
+			let shift_mask = _mm_set_epi64x(0, shift.into());
 			let mut i = (sub_index >> 2) << 2; // We keep an alignment of 4
 			while i + 4 <= CHUNK_ENTRIES {
-				// We load the value 2 by 2 and move the high bits into the low part of the register
-				let first_two = _mm_shuffle_epi32::<0b10001101>(_mm_loadu_si128(
-					chunk[i * 8..].as_ptr() as *const __m128i,
+				// We load the value 2 by 2
+				// Then we remove the address by shifting such that the partial key is in the low
+				// part
+				let first_two = _mm_shuffle_epi32::<0b11011000>(_mm_srl_epi64(
+					_mm_loadu_si128(chunk[i * 8..].as_ptr() as *const __m128i),
+					shift_mask,
 				));
-				let last_two = _mm_shuffle_epi32::<0b10001101>(_mm_loadu_si128(
-					chunk[(i + 2) * 8..].as_ptr() as *const __m128i,
+				let last_two = _mm_shuffle_epi32::<0b11011000>(_mm_srl_epi64(
+					_mm_loadu_si128(chunk[(i + 2) * 8..].as_ptr() as *const __m128i),
+					shift_mask,
 				));
 				// We set into current the input low parts
 				let current = _mm_unpacklo_epi64(first_two, last_two);
@@ -575,6 +577,7 @@ impl IndexTable {
 #[cfg(test)]
 mod test {
 	use super::*;
+	use std::path::PathBuf;
 
 	#[test]
 	fn test_entries() {
@@ -595,4 +598,42 @@ mod test {
 
 		assert!(IndexTable::transmute_chunk(chunk2) == chunk);
 	}
+
+	#[test]
+	fn test_find_entries() {
+		for index_bits in [16, 18, 20, 22] {
+			let index_table = IndexTable {
+				id: TableId(index_bits.into()),
+				map: RwLock::new(None),
+				path: PathBuf::new(),
+			};
+
+			let data_address = Address::from_u64((1 << index_bits) - 1);
+
+			let mut chunk = [0; CHUNK_ENTRIES * 8];
+			chunk[0..8]
+				.copy_from_slice(&Entry::new(data_address, 1, index_bits).as_u64().to_le_bytes());
+			chunk[8..16].copy_from_slice(
+				&Entry::new(data_address, 1 << 10, index_bits).as_u64().to_le_bytes(),
+			);
+			chunk[16..24].copy_from_slice(
+				&Entry::new(data_address, 1 << 20, index_bits).as_u64().to_le_bytes(),
+			);
+
+			assert_eq!(
+				index_table
+					.find_entry(((1 << 10) << CHUNK_ENTRIES_BITS + SIZE_TIERS_BITS), 0, &chunk,)
+					.0
+					.partial_key(index_bits),
+				1 << 10
+			);
+			assert_eq!(
+				index_table
+					.find_entry(((1 << 20) << CHUNK_ENTRIES_BITS + SIZE_TIERS_BITS), 1, &chunk,)
+					.0
+					.partial_key(index_bits),
+				1 << 20
+			);
+		}
+	}
 }