Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize mp4 parse times by not copying video data #12

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 47 additions & 36 deletions src/reader.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::collections::BTreeMap;
use std::io::SeekFrom;
use std::io::{Read, Seek};

use crate::{
Expand All @@ -18,15 +17,19 @@ pub struct Mp4 {

impl Mp4 {
/// Parses the contents of a byte slice as MP4 data.
///
/// Sample ranges returned by the resulting [`Mp4`] should be used with the same input buffer.
pub fn read_bytes(bytes: &[u8]) -> Result<Self> {
let mp4 = Self::read(std::io::Cursor::new(bytes), bytes.len() as u64)?;
Ok(mp4)
}

/// Reads the contents of a file as MP4 data.
pub fn read_file(file_path: impl AsRef<std::path::Path>) -> Result<Self> {
/// Reads the contents of a file as MP4 data, and returns both the parsed MP4 and its raw data.
///
/// Sample ranges returned by the resulting [`Mp4`] should be used with the same input buffer.
pub fn read_file(file_path: impl AsRef<std::path::Path>) -> Result<(Self, Vec<u8>)> {
let bytes = std::fs::read(file_path)?;
Self::read_bytes(&bytes)
Ok((Self::read_bytes(&bytes)?, bytes))
}

pub fn read<R: Read + Seek>(mut reader: R, size: u64) -> Result<Self> {
Expand Down Expand Up @@ -104,7 +107,7 @@ impl Mp4 {
let mut tracks = this.build_tracks();
this.update_sample_list(&mut tracks)?;
this.tracks = tracks;
this.load_track_data(&mut reader)?;
this.update_tracks();

Ok(this)
}
Expand Down Expand Up @@ -262,7 +265,6 @@ impl Mp4 {
duration: trak.mdia.mdhd.duration,
kind: trak.mdia.minf.stbl.stsd.kind(),
samples,
data: Vec::new(),
},
);
}
Expand Down Expand Up @@ -407,29 +409,9 @@ impl Mp4 {
Ok(())
}

/// For every track, combine its samples into a single contiguous buffer.
///
/// This also updates sample offsets and the track duration if needed.
///
/// After this function is called, each track's [`Track::data`] may only be indexed by one of its samples' [`Sample::offset`]s.
fn load_track_data<R: Read + Seek>(&mut self, reader: &mut R) -> Result<()> {
/// Update track metadata after all samples have been read
fn update_tracks(&mut self) {
for track in self.tracks.values_mut() {
for sample in &mut track.samples {
let data_offset = track.data.len();

track
.data
.resize(track.data.len() + sample.size as usize, 0);

// at this point, `sample.offset` is the offset of the first byte of the sample in the file
reader.seek(SeekFrom::Start(sample.offset))?;
reader
.read_exact(&mut track.data[data_offset..data_offset + sample.size as usize])?;

// we want it to be the offset of the sample in the combined track data
sample.offset = data_offset as u64;
}

if track.duration == 0 {
track.duration = track
.samples
Expand All @@ -438,23 +420,30 @@ impl Mp4 {
.unwrap_or_default();
}
}

Ok(())
}
}

pub struct Track {
/// Internal field used when decoding a fragmented MP4 file.
first_traf_merged: bool,

pub width: u16,
pub height: u16,

pub track_id: u32,

/// Timescale of the sample.
///
/// One time unit is equal to `1.0 / timescale` seconds.
pub timescale: u64,

/// Duration of the track in time units.
pub duration: u64,

pub kind: Option<TrackKind>,

/// List of samples in the track.
pub samples: Vec<Sample>,
pub data: Vec<u8>,
}

impl Track {
Expand All @@ -474,11 +463,6 @@ impl Track {
trak
}

pub fn read_sample(&self, sample_id: u32) -> &[u8] {
let sample = &self.samples[sample_id as usize];
&self.data[sample.offset as usize..(sample.offset + sample.size) as usize]
}

pub fn raw_codec_config(&self, mp4: &Mp4) -> Option<Vec<u8>> {
let sample_description = &self.trak(mp4).mdia.minf.stbl.stsd;

Expand All @@ -501,16 +485,43 @@ impl Track {

#[derive(Default, Clone, Copy)]
pub struct Sample {
/// Sample number.
pub id: u32,

/// Whether or not an entire frame can be decoded from this one sample,
/// or if it needs the context of other samples.
pub is_sync: bool,

/// Size of the sample in bytes.
pub size: u64,

/// Offset of the sample in bytes from the start of the MP4 file.
pub offset: u64,

/// Timescale of the sample.
///
/// One time unit is equal to `1.0 / timescale` seconds.
pub timescale: u64,

/// Timestamp of the sample at which it should be decoded,
/// in time units.
pub decode_timestamp: u64,

/// Timestamp of the sample at which the sample should be displayed,
/// in time units.
pub composition_timestamp: u64,

/// Duration of the sample in time units.
pub duration: u64,
}

impl Sample {
/// Returns the range of bytes in the input data that this sample covers.
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.offset as usize..(self.offset + self.size) as usize
}
}

impl std::fmt::Debug for Track {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Track")
Expand Down
2 changes: 1 addition & 1 deletion tests/codec_detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ fn test_codec_parsing(
stsd_box_check: impl Fn(&StsdBox),
) {
let mp4_file = std::path::Path::new(paths::SAMPLE_BASE_PATH).join(video_path);
let video = re_mp4::Mp4::read_file(mp4_file).expect("Failed parsing mp4");
let (video, _) = re_mp4::Mp4::read_file(mp4_file).expect("Failed parsing mp4");

let track = video.tracks().get(&1);
let track = track.expect("Expected a video track with id 1");
Expand Down
7 changes: 7 additions & 0 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pub fn get_sample_data(mp4_data: &[u8], track: &re_mp4::Track) -> Vec<u8> {
let mut sample_data = Vec::new();
for sample in &track.samples {
sample_data.extend_from_slice(&mp4_data[sample.byte_range()]);
}
sample_data
}
7 changes: 5 additions & 2 deletions tests/mp4box_comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

mod paths;

#[path = "common/mod.rs"]
mod common;

use std::path::Path;

fn assert_snapshot(snapshot_path: &Path, contents: &[u8]) {
Expand Down Expand Up @@ -50,13 +53,13 @@ fn compare_video_snapshot_with_mp4box_output(video_path: &Path) {
"Failed to run mp4box."
);

let video = re_mp4::Mp4::read_file(base_path.join(video_path)).unwrap();
let (video, data) = re_mp4::Mp4::read_file(base_path.join(video_path)).unwrap();

for (id, track) in video.tracks() {
if track.kind == Some(re_mp4::TrackKind::Video) {
assert_snapshot(
&base_path.join(format!("{video_path_str}.track_{id}.bin")),
&track.data,
&common::get_sample_data(&data, track),
);
assert_snapshot(
&base_path.join(format!("{video_path_str}.track_{id}.segments")),
Expand Down
15 changes: 10 additions & 5 deletions tests/multitrack.rs
Original file line number Diff line number Diff line change
@@ -1,48 +1,53 @@
mod paths;

#[path = "common/mod.rs"]
mod common;

#[test]
fn multitrack_video_with_subtitles_and_sound() {
let mp4_with_subtitles_and_sound = std::path::Path::new(paths::SAMPLE_BASE_PATH)
.join("rerun404_avc_with_subtitles_and_sound.mp4");

let video = re_mp4::Mp4::read_file(mp4_with_subtitles_and_sound).unwrap();

let (video, data) = re_mp4::Mp4::read_file(mp4_with_subtitles_and_sound).unwrap();
assert_eq!(video.tracks().len(), 3);
assert_eq!(video.moov.mvhd.next_track_id, 4);

// Video track.
{
let track = video.tracks().get(&1).unwrap();
let data = common::get_sample_data(&data, track);
assert_eq!(track.kind, Some(re_mp4::TrackKind::Video));
assert_eq!(track.codec_string(&video), Some("avc1.640028".to_owned()));
assert_eq!(track.track_id, 1);
assert_eq!(track.width, 600);
assert_eq!(track.height, 600);
assert!(!track.samples.is_empty());
assert!(!track.data.is_empty());
assert!(!data.is_empty());
jprochazk marked this conversation as resolved.
Show resolved Hide resolved
}

// Audio track.
{
let track = video.tracks().get(&2).unwrap();
let data = common::get_sample_data(&data, track);
assert_eq!(track.kind, Some(re_mp4::TrackKind::Audio));
assert_eq!(track.codec_string(&video), None);
assert_eq!(track.track_id, 2);
assert_eq!(track.width, 0);
assert_eq!(track.height, 0);
assert!(!track.samples.is_empty());
assert!(!track.data.is_empty());
assert!(!data.is_empty());
}

// Subtitle track.
{
let track = video.tracks().get(&3).unwrap();
let data = common::get_sample_data(&data, track);
assert_eq!(track.kind, Some(re_mp4::TrackKind::Subtitle));
assert_eq!(track.codec_string(&video), None);
assert_eq!(track.track_id, 3);
assert_eq!(track.width, 0);
assert_eq!(track.height, 0);
assert!(!track.samples.is_empty());
assert!(!track.data.is_empty());
assert!(!data.is_empty());
}
}
Loading