diff --git a/src/libcore/iter/mod.rs b/src/libcore/iter/mod.rs
index 3999db0d63c99..d9b8c5ea589fd 100644
--- a/src/libcore/iter/mod.rs
+++ b/src/libcore/iter/mod.rs
@@ -1086,7 +1086,7 @@ impl<I: Iterator, P> Iterator for Filter<I, P> where P: FnMut(&I::Item) -> bool
 
     #[inline]
     fn next(&mut self) -> Option<I::Item> {
-        for x in self.iter.by_ref() {
+        for x in &mut self.iter {
             if (self.predicate)(&x) {
                 return Some(x);
             }
@@ -1099,6 +1099,26 @@ impl<I: Iterator, P> Iterator for Filter<I, P> where P: FnMut(&I::Item) -> bool
         let (_, upper) = self.iter.size_hint();
         (0, upper) // can't know a lower bound, due to the predicate
     }
+
+    // this special case allows the compiler to make `.filter(_).count()`
+    // branchless. Barring perfect branch prediction (which is unattainable in
+    // the general case), this will be much faster in >90% of cases (containing
+    // virtually all real workloads) and only a tiny bit slower in the rest.
+    //
+    // Having this specialization thus allows us to write `.filter(p).count()`
+    // where we would otherwise write `.map(|x| p(x) as usize).sum()`, which is
+    // less readable and also less backwards-compatible to Rust before 1.10.
+    //
+    // Using the branchless version will also simplify the LLVM byte code, thus
+    // leaving more budget for LLVM optimizations.
+    #[inline]
+    fn count(mut self) -> usize {
+        let mut count = 0;
+        for x in &mut self.iter {
+            count += (self.predicate)(&x) as usize;
+        }
+        count
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/src/libcoretest/iter.rs b/src/libcoretest/iter.rs
index 99d312930533f..e6d2494f5fda8 100644
--- a/src/libcoretest/iter.rs
+++ b/src/libcoretest/iter.rs
@@ -191,6 +191,12 @@ fn test_iterator_enumerate_count() {
     assert_eq!(xs.iter().count(), 6);
 }
 
+#[test]
+fn test_iterator_filter_count() {
+    let xs = [0, 1, 2, 3, 4, 5, 6, 7, 8];
+    assert_eq!(xs.iter().filter(|&&x| x % 2 == 0).count(), 5);
+}
+
 #[test]
 fn test_iterator_peekable() {
     let xs = vec![0, 1, 2, 3, 4, 5];