From e8ee00a6493b8cde09f5497f430d46a978ae865f Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Wed, 13 Feb 2019 12:19:58 +0100 Subject: [PATCH 1/5] Add provided methods `Seek::{stream_len, stream_position}` These two methods are defined in terms of `Seek::seek` and are added for convenience. Tests are included. --- src/libstd/io/mod.rs | 121 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 2 deletions(-) diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index e3e2754a7aa09..99bb24f54dd41 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1329,6 +1329,78 @@ pub trait Seek { /// [`SeekFrom::Start`]: enum.SeekFrom.html#variant.Start #[stable(feature = "rust1", since = "1.0.0")] fn seek(&mut self, pos: SeekFrom) -> Result; + + /// Returns the length of this stream (in bytes). + /// + /// This method is implemented using three seek operations. If this method + /// returns successfully, the seek position is unchanged (i.e. the position + /// before calling this method is the same as afterwards). However, if this + /// method returns an error, the seek position is undefined. + /// + /// If you need to obtain the length of *many* streams and you don't care + /// about the seek position afterwards, you can reduce the number of seek + /// operations by simply calling `seek(SeekFrom::End(0))` and use its + /// return value (it is also the stream length). + /// + /// Note that length of a stream can change over time (for example, when + /// data is appended to a file). So calling this method multiply times does + /// not necessarily return the same length each time. + /// + /// + /// # Example + /// + /// ```no_run + /// #![feature(seek_convenience)] + /// use std::{ + /// io::{self, Seek}, + /// fs::File, + /// }; + /// + /// fn main() -> io::Result<()> { + /// let mut f = File::open("foo.txt")?; + /// + /// let len = f.stream_len()?; + /// println!("The file is currently {} bytes long", len); + /// Ok(()) + /// } + /// ``` + #[unstable(feature = "seek_convenience", issue = "0")] + fn stream_len(&mut self) -> Result { + let old_pos = self.stream_position()?; + let len = self.seek(SeekFrom::End(0))?; + self.seek(SeekFrom::Start(old_pos))?; + Ok(len) + } + + /// Returns the current seek position from the start of the stream. + /// + /// This is equivalent to `self.seek(SeekFrom::Current(0))`. + /// + /// + /// # Example + /// + /// ```no_run + /// #![feature(seek_convenience)] + /// use std::{ + /// io::{self, BufRead, BufReader, Seek}, + /// fs::File, + /// }; + /// + /// fn main() -> io::Result<()> { + /// let mut f = BufReader::new(File::open("foo.txt")?); + /// + /// let before = f.stream_position()?; + /// f.read_line(&mut String::new())?; + /// let after = f.stream_position()?; + /// + /// println!("The first line was {} bytes long", after - before); + /// Ok(()) + /// } + /// ``` + #[unstable(feature = "seek_convenience", issue = "0")] + fn stream_position(&mut self) -> Result { + self.seek(SeekFrom::Current(0)) + } } /// Enumeration of possible methods to seek within an I/O object. @@ -2157,8 +2229,7 @@ impl Iterator for Lines { mod tests { use crate::io::prelude::*; use crate::io; - use super::Cursor; - use super::repeat; + use super::{Cursor, SeekFrom, repeat}; #[test] #[cfg_attr(target_os = "emscripten", ignore)] @@ -2380,4 +2451,50 @@ mod tests { super::read_to_end(&mut lr, &mut vec) }); } + + #[test] + fn seek_len() -> io::Result<()> { + let mut c = Cursor::new(vec![0; 15]); + assert_eq!(c.stream_len()?, 15); + + c.seek(SeekFrom::End(0))?; + let old_pos = c.stream_position()?; + assert_eq!(c.stream_len()?, 15); + assert_eq!(c.stream_position()?, old_pos); + + c.seek(SeekFrom::Start(7))?; + c.seek(SeekFrom::Current(2))?; + let old_pos = c.stream_position()?; + assert_eq!(c.stream_len()?, 15); + assert_eq!(c.stream_position()?, old_pos); + + Ok(()) + } + + #[test] + fn seek_position() -> io::Result<()> { + // All `asserts` are duplicated here to make sure the method does not + // change anything about the seek state. + let mut c = Cursor::new(vec![0; 15]); + assert_eq!(c.stream_position()?, 0); + assert_eq!(c.stream_position()?, 0); + + c.seek(SeekFrom::End(0))?; + assert_eq!(c.stream_position()?, 15); + assert_eq!(c.stream_position()?, 15); + + + c.seek(SeekFrom::Start(7))?; + c.seek(SeekFrom::Current(2))?; + assert_eq!(c.stream_position()?, 9); + assert_eq!(c.stream_position()?, 9); + + c.seek(SeekFrom::End(-3))?; + c.seek(SeekFrom::Current(1))?; + c.seek(SeekFrom::Current(-5))?; + assert_eq!(c.stream_position()?, 8); + assert_eq!(c.stream_position()?, 8); + + Ok(()) + } } From 598a1b4dd1a6cc1bfe689a6931af9e6aa47134e1 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 14 Mar 2019 13:43:17 +0100 Subject: [PATCH 2/5] Avoid third seek operation in `Seek::stream_len` when possible --- src/libstd/io/mod.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 99bb24f54dd41..9edda304bb9e8 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1332,10 +1332,11 @@ pub trait Seek { /// Returns the length of this stream (in bytes). /// - /// This method is implemented using three seek operations. If this method - /// returns successfully, the seek position is unchanged (i.e. the position - /// before calling this method is the same as afterwards). However, if this - /// method returns an error, the seek position is undefined. + /// This method is implemented using up to three seek operations. If this + /// method returns successfully, the seek position is unchanged (i.e. the + /// position before calling this method is the same as afterwards). + /// However, if this method returns an error, the seek position is + /// undefined. /// /// If you need to obtain the length of *many* streams and you don't care /// about the seek position afterwards, you can reduce the number of seek @@ -1368,7 +1369,13 @@ pub trait Seek { fn stream_len(&mut self) -> Result { let old_pos = self.stream_position()?; let len = self.seek(SeekFrom::End(0))?; - self.seek(SeekFrom::Start(old_pos))?; + + // Avoid seeking a third time when we were already at the end of the + // stream. The branch is usually way cheaper than a seek operation. + if old_pos != len { + self.seek(SeekFrom::Start(old_pos))?; + } + Ok(len) } From c518f2dd7040eca7591d3cacffe3646d8f54ac7c Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 14 Mar 2019 13:27:49 +0100 Subject: [PATCH 3/5] Overwrite Cursor's `Seek::stream_{len, position}` for performance --- src/libstd/io/cursor.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/libstd/io/cursor.rs b/src/libstd/io/cursor.rs index 873da0898c7fe..247d45c3ec91f 100644 --- a/src/libstd/io/cursor.rs +++ b/src/libstd/io/cursor.rs @@ -212,6 +212,14 @@ impl io::Seek for Cursor where T: AsRef<[u8]> { "invalid seek to a negative or overflowing position")) } } + + fn stream_len(&mut self) -> io::Result { + Ok(self.inner.as_ref().len() as u64) + } + + fn stream_position(&mut self) -> io::Result { + Ok(self.pos) + } } #[stable(feature = "rust1", since = "1.0.0")] From ea40aa46e7ddc3a363495951b88e7a4dc1dc17d7 Mon Sep 17 00:00:00 2001 From: Lukas Kalbertodt Date: Thu, 14 Mar 2019 17:51:11 +0100 Subject: [PATCH 4/5] Change "undefined" to "unspecified" in `Seek::stream_len` docs --- src/libstd/io/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 9edda304bb9e8..2e521b32903d2 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1336,7 +1336,7 @@ pub trait Seek { /// method returns successfully, the seek position is unchanged (i.e. the /// position before calling this method is the same as afterwards). /// However, if this method returns an error, the seek position is - /// undefined. + /// unspecified. /// /// If you need to obtain the length of *many* streams and you don't care /// about the seek position afterwards, you can reduce the number of seek From f95219fa580a514b5ca6c1425335afadbe394b57 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Sun, 17 Mar 2019 09:39:47 +0100 Subject: [PATCH 5/5] Apply suggestions from code review Fix typos in the documentation Co-Authored-By: LukasKalbertodt --- src/libstd/io/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 2e521b32903d2..f2ed1979966a4 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1340,11 +1340,11 @@ pub trait Seek { /// /// If you need to obtain the length of *many* streams and you don't care /// about the seek position afterwards, you can reduce the number of seek - /// operations by simply calling `seek(SeekFrom::End(0))` and use its + /// operations by simply calling `seek(SeekFrom::End(0))` and using its /// return value (it is also the stream length). /// /// Note that length of a stream can change over time (for example, when - /// data is appended to a file). So calling this method multiply times does + /// data is appended to a file). So calling this method multiple times does /// not necessarily return the same length each time. /// ///