Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try using new_zeroed_slice to optimize f.mvs with zeroed allocations #1360

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-and-benchmark-x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: release build main branch
run: |
git fetch --depth 1 origin main && git checkout origin/main
nice cargo +stable build --release --target-dir target.main
nice cargo build --release --target-dir target.main
- name: benchmark on chimera 8-bit test data
run: |
mkdir -p `dirname $LOCAL_FILE`
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-and-test-aarch64-android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ jobs:
key: aarch64-android-cargo-and-target-${{ hashFiles('**/Cargo.lock') }}
- name: cargo build for aarch64-linux-android
run: |
rustup target add --toolchain stable aarch64-linux-android
cargo +stable build --target aarch64-linux-android --release
rustup target add aarch64-linux-android
cargo build --target aarch64-linux-android --release
env:
AR: llvm-ar
CC: aarch64-linux-android26-clang
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-and-test-aarch64-darwin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
target/
key: arm-darwin-cargo-and-target-${{ hashFiles('**/Cargo.lock') }}
- name: cargo ${{ matrix.build.name }} build for aarch64-apple-darwin
run: cargo +stable build ${{ matrix.build.cargo_flags }}
run: cargo build ${{ matrix.build.cargo_flags }}
- name: test ${{ matrix.build.name }} build without frame delay
run: |
.github/workflows/test.sh \
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-and-test-x86-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ jobs:
argon_coveragetool_av1_base_and_extended_profiles_v2.1.1.zip
- name: cargo build for ${{ matrix.target }} ${{ matrix.build.name }}
run: |
rustup target add --toolchain stable ${{ matrix.target }}
cargo +stable build --target ${{ matrix.target }} ${{ matrix.build.flags }}
rustup target add ${{ matrix.target }}
cargo build --target ${{ matrix.target }} ${{ matrix.build.flags }}
env:
RUSTFLAGS: "-C overflow-checks=on"
- name: download, check, and unpack argon test vectors
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/build-and-test-x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ jobs:
- name: cargo build for ${{ matrix.target }} ${{ matrix.build.name }}
run: |
cargo clean
rustup target add --toolchain stable ${{ matrix.target }}
cargo +stable build --target ${{ matrix.target }} ${{ matrix.build.flags }}
rustup target add ${{ matrix.target }}
cargo build --target ${{ matrix.target }} ${{ matrix.build.flags }}
- name: meson test for ${{ matrix.target }} ${{ matrix.build.name }}
run: |
.github/workflows/test.sh \
Expand Down Expand Up @@ -110,7 +110,7 @@ jobs:
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: cargo build for x86_64-apple-darwin
run: |
cargo +stable build --release
cargo build --release
- name: meson test for x86_64-apple-darwin
run: |
.github/workflows/test.sh \
Expand Down
1 change: 1 addition & 0 deletions lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
any(target_arch = "riscv32", target_arch = "riscv64"),
feature(stdarch_riscv_feature_detection)
)]
#![feature(new_uninit)]
#![deny(unsafe_op_in_unsafe_fn)]
#![allow(clippy::all)]
#![deny(clippy::undocumented_unsafe_blocks)]
Expand Down
12 changes: 6 additions & 6 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use crate::src::cdf::CdfThreadContext;
use crate::src::ctx::CaseSet;
use crate::src::dequant_tables::dav1d_dq_tbl;
use crate::src::disjoint_mut::DisjointMut;
use crate::src::disjoint_mut::DisjointMutArcSlice;
use crate::src::disjoint_mut::DisjointMutSlice;
use crate::src::enum_map::enum_map;
use crate::src::enum_map::enum_map_ty;
Expand Down Expand Up @@ -5219,12 +5220,11 @@ pub fn rav1d_submit_frame(c: &Rav1dContext, state: &mut Rav1dState) -> Rav1dResu

// ref_mvs
if frame_hdr.frame_type.is_inter_or_switch() || frame_hdr.allow_intrabc {
// TODO fallible allocation
f.mvs = Some(
(0..f.sb128h as usize * 16 * (f.b4_stride >> 1) as usize)
.map(|_| Default::default())
.collect(),
);
let n_mvs = f.sb128h as usize * 16 * (f.b4_stride as usize >> 1);
if f.mvs.as_ref().map_or(true, |mvs| mvs.inner.len() != n_mvs) {
// TODO fallible allocation
f.mvs = Some(DisjointMutArcSlice::new_zeroed_slice(n_mvs));
}
if !frame_hdr.allow_intrabc {
for i in 0..7 {
f.refpoc[i] = f.refp[i].p.frame_hdr.as_ref().unwrap().frame_offset as c_uint;
Expand Down
32 changes: 32 additions & 0 deletions src/disjoint_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use std::ptr::addr_of_mut;
use std::sync::Arc;
use zerocopy::AsBytes;
use zerocopy::FromBytes;
use zerocopy::FromZeroes;

/// Wraps an indexable collection to allow unchecked concurrent mutable borrows.
///
Expand Down Expand Up @@ -1214,3 +1215,34 @@ impl<T> Default for DisjointMutArcSlice<T> {
[].into_iter().collect()
}
}

impl<T: FromZeroes> DisjointMutArcSlice<T> {
pub fn new_zeroed_slice(len: usize) -> Self {
#[cfg(debug_assertions)]
let inner = {
let box_slice = Box::<[T]>::new_zeroed_slice(len);
Arc::new(DisjointMut::new(box_slice))
};
#[cfg(not(debug_assertions))]
let inner = {
use std::mem;

let arc_slice = Arc::<[T]>::new_zeroed_slice(len);

// Do our best to check that `DisjointMut` is in fact `#[repr(transparent)]`.
const {
type A = Vec<u8>; // Some concrete sized type.
assert!(mem::size_of::<DisjointMut<A>>() == mem::size_of::<A>());
assert!(mem::align_of::<DisjointMut<A>>() == mem::align_of::<A>());
}

// SAFETY: When `#[cfg(not(debug_assertions))]`, `DisjointMut` is `#[repr(transparent)]`,
// containing only an `UnsafeCell`, which is also `#[repr(transparent)]`.
unsafe { Arc::from_raw(Arc::into_raw(arc_slice) as *const DisjointMut<[_]>) }
};
// SAFETY: `T: FromZeroes`, and the `MaybeUninit<T>` is all zeros,
// since it is allocated with `new_zeroed_slice`, so we can transmute away the `MaybeUninit`.
let inner = unsafe { Arc::from_raw(Arc::into_raw(inner) as *const DisjointMutSlice<T>) };
Self { inner }
}
}
2 changes: 1 addition & 1 deletion src/refmvs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use std::ptr;
use std::slice;
use zerocopy::FromZeroes;

#[derive(Clone, Copy, Default, PartialEq, Eq)]
#[derive(Clone, Copy, Default, PartialEq, Eq, FromZeroes)]
#[repr(C, packed)]
pub struct RefMvsTemporalBlock {
pub mv: Mv,
Expand Down
Loading