Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vec - add CeedVectorScale #757

Merged
merged 11 commits into from
Apr 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/julia-test-with-style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ jobs:
make -j2
export JULIA_LIBCEED_LIB=$(find $PWD/lib -name "libceed.*")
pushd julia/LibCEED.jl
julia --project -e 'import Pkg; Pkg.build(); Pkg.test("LibCEED"; coverage=true)'
julia --project -e 'import Pkg; Pkg.build(); Pkg.test("LibCEED"; coverage=true, test_args=["--run-dev-tests"])'
unset JULIA_LIBCEED_LIB && julia --project -e 'import Pkg; Pkg.build(); Pkg.test("LibCEED")'
julia --project=.style/ -e 'import Pkg; Pkg.instantiate()' && julia --project=.style/ .style/ceed_style.jl && git diff --exit-code src test examples
8 changes: 5 additions & 3 deletions .github/workflows/rust-test-with-style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ jobs:
- name: Rust setup
uses: actions-rs/toolchain@v1
with:
# Note: rustfmt not always included in nightly
toolchain: stable
# Note: rustfmt not always included in nightly, will attempt to downgrade until rustfmt found
toolchain: nightly
components: rustfmt
- name: Rust style
run: |
cargo fmt -- --check
cargo +nightly fmt --version
cargo +nightly fmt -- --check
57 changes: 53 additions & 4 deletions backends/cuda/ceed-cuda-qfunctioncontext.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,7 @@ static int CeedQFunctionContextSetDataDevice_Cuda(const CeedQFunctionContext
// freeing any previously allocated array if applicable
//------------------------------------------------------------------------------
static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx,
const CeedMemType mtype,
const CeedCopyMode cmode,
CeedScalar *data) {
const CeedMemType mtype, const CeedCopyMode cmode, CeedScalar *data) {
int ierr;
Ceed ceed;
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
Expand All @@ -153,7 +151,56 @@ static int CeedQFunctionContextSetData_Cuda(const CeedQFunctionContext ctx,
}

//------------------------------------------------------------------------------
// Get array
// Take data
//------------------------------------------------------------------------------
static int CeedQFunctionContextTakeData_Cuda(const CeedQFunctionContext ctx,
const CeedMemType mtype, CeedScalar *data) {
int ierr;
Ceed ceed;
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
CeedQFunctionContext_Cuda *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
if(impl->h_data == NULL && impl->d_data == NULL)
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set");
// LCOV_EXCL_STOP

// Sync array to requested memtype and update pointer
switch (mtype) {
case CEED_MEM_HOST:
if (impl->h_data == NULL) {
ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated);
CeedChkBackend(ierr);
impl->h_data = impl->h_data_allocated;
}
if (impl->memState == CEED_CUDA_DEVICE_SYNC) {
ierr = CeedQFunctionContextSyncD2H_Cuda(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_CUDA_HOST_SYNC;
*(void **)data = impl->h_data;
impl->h_data = NULL;
impl->h_data_allocated = NULL;
break;
case CEED_MEM_DEVICE:
if (impl->d_data == NULL) {
ierr = cudaMalloc((void **)&impl->d_data_allocated, bytes(ctx));
CeedChk_Cu(ceed, ierr);
impl->d_data = impl->d_data_allocated;
}
if (impl->memState == CEED_CUDA_HOST_SYNC) {
ierr = CeedQFunctionContextSyncH2D_Cuda(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_CUDA_DEVICE_SYNC;
*(void **)data = impl->d_data;
impl->d_data = NULL;
impl->d_data_allocated = NULL;
break;
}
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Get data
//------------------------------------------------------------------------------
static int CeedQFunctionContextGetData_Cuda(const CeedQFunctionContext ctx,
const CeedMemType mtype, CeedScalar *data) {
Expand Down Expand Up @@ -232,6 +279,8 @@ int CeedQFunctionContextCreate_Cuda(CeedQFunctionContext ctx) {

ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData",
CeedQFunctionContextSetData_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData",
CeedQFunctionContextTakeData_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData",
CeedQFunctionContextGetData_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreData",
Expand Down
53 changes: 53 additions & 0 deletions backends/cuda/ceed-cuda-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,57 @@ static int CeedVectorReciprocal_Cuda(CeedVector vec) {
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compute x = alpha x on the host
//------------------------------------------------------------------------------
static int CeedHostScale_Cuda(CeedScalar *x_array, CeedScalar alpha,
CeedInt length) {
for (int i = 0; i < length; i++)
x_array[i] *= alpha;
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compute x = alpha x on device (impl in .cu file)
//------------------------------------------------------------------------------
int CeedDeviceScale_Cuda(CeedScalar *x_array, CeedScalar alpha,
CeedInt length);

//------------------------------------------------------------------------------
// Compute x = alpha x
//------------------------------------------------------------------------------
static int CeedVectorScale_Cuda(CeedVector x, CeedScalar alpha) {
int ierr;
Ceed ceed;
ierr = CeedVectorGetCeed(x, &ceed); CeedChkBackend(ierr);
CeedVector_Cuda *x_data;
ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr);
CeedInt length;
ierr = CeedVectorGetLength(x, &length); CeedChkBackend(ierr);

// Set value for synced device/host array
switch(x_data->memState) {
case CEED_CUDA_HOST_SYNC:
ierr = CeedHostScale_Cuda(x_data->h_array, alpha, length);
CeedChkBackend(ierr);
break;
case CEED_CUDA_DEVICE_SYNC:
ierr = CeedDeviceScale_Cuda(x_data->d_array, alpha, length);
CeedChkBackend(ierr);
break;
case CEED_CUDA_BOTH_SYNC:
ierr = CeedDeviceScale_Cuda(x_data->d_array, alpha, length);
CeedChkBackend(ierr);
x_data->memState = CEED_CUDA_DEVICE_SYNC;
break;
// LCOV_EXCL_START
case CEED_CUDA_NONE_SYNC:
break; // Not possible, but included for completness
// LCOV_EXCL_STOP
}
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compute y = alpha x + y on the host
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -617,6 +668,8 @@ int CeedVectorCreate_Cuda(CeedInt n, CeedVector vec) {
CeedVectorReciprocal_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "AXPY",
CeedVectorAXPY_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Scale",
CeedVectorScale_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "PointwiseMult",
CeedVectorPointwiseMult_Cuda); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Destroy",
Expand Down
26 changes: 26 additions & 0 deletions backends/cuda/kernels/cuda-vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,32 @@ extern "C" int CeedDeviceReciprocal_Cuda(CeedScalar* d_array, CeedInt length) {
return 0;
}

//------------------------------------------------------------------------------
// Kernel for scale
//------------------------------------------------------------------------------
__global__ static void scaleValueK(CeedScalar * __restrict__ x, CeedScalar alpha,
CeedInt size) {
int idx = threadIdx.x + blockDim.x * blockIdx.x;
if (idx >= size)
return;
x[idx] *= alpha;
}

//------------------------------------------------------------------------------
// Compute x = alpha x on device
//------------------------------------------------------------------------------
extern "C" int CeedDeviceScale_Cuda(CeedScalar *x_array, CeedScalar alpha,
CeedInt length) {
const int bsize = 512;
const int vecsize = length;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize)
gridsize += 1;
scaleValueK<<<gridsize,bsize>>>(x_array, alpha, length);
return 0;
}

//------------------------------------------------------------------------------
// Kernel for axpy
//------------------------------------------------------------------------------
Expand Down
53 changes: 52 additions & 1 deletion backends/hip/ceed-hip-qfunctioncontext.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,56 @@ static int CeedQFunctionContextSetData_Hip(const CeedQFunctionContext ctx,
}

//------------------------------------------------------------------------------
// Get array
// Take data
//------------------------------------------------------------------------------
static int CeedQFunctionContextTakeData_Hip(const CeedQFunctionContext ctx,
const CeedMemType mtype, CeedScalar *data) {
int ierr;
Ceed ceed;
ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChkBackend(ierr);
CeedQFunctionContext_Hip *impl;
ierr = CeedQFunctionContextGetBackendData(ctx, &impl); CeedChkBackend(ierr);
if(impl->h_data == NULL && impl->d_data == NULL)
// LCOV_EXCL_START
return CeedError(ceed, CEED_ERROR_BACKEND, "No context data set");
// LCOV_EXCL_STOP

// Sync array to requested memtype and update pointer
switch (mtype) {
case CEED_MEM_HOST:
if (impl->h_data == NULL) {
ierr = CeedMalloc(bytes(ctx), &impl->h_data_allocated);
CeedChkBackend(ierr);
impl->h_data = impl->h_data_allocated;
}
if (impl->memState == CEED_HIP_DEVICE_SYNC) {
ierr = CeedQFunctionContextSyncD2H_Hip(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_HIP_HOST_SYNC;
*(void **)data = impl->h_data;
impl->h_data = NULL;
impl->h_data_allocated = NULL;
break;
case CEED_MEM_DEVICE:
if (impl->d_data == NULL) {
ierr = hipMalloc((void **)&impl->d_data_allocated, bytes(ctx));
CeedChk_Hip(ceed, ierr);
impl->d_data = impl->d_data_allocated;
}
if (impl->memState == CEED_HIP_HOST_SYNC) {
ierr = CeedQFunctionContextSyncH2D_Hip(ctx); CeedChkBackend(ierr);
}
impl->memState = CEED_HIP_DEVICE_SYNC;
*(void **)data = impl->d_data;
impl->d_data = NULL;
impl->d_data_allocated = NULL;
break;
}
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Get data
//------------------------------------------------------------------------------
static int CeedQFunctionContextGetData_Hip(const CeedQFunctionContext ctx,
const CeedMemType mtype, CeedScalar *data) {
Expand Down Expand Up @@ -227,6 +276,8 @@ int CeedQFunctionContextCreate_Hip(CeedQFunctionContext ctx) {

ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "SetData",
CeedQFunctionContextSetData_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "TakeData",
CeedQFunctionContextTakeData_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "GetData",
CeedQFunctionContextGetData_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "QFunctionContext", ctx, "RestoreData",
Expand Down
53 changes: 53 additions & 0 deletions backends/hip/ceed-hip-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,57 @@ static int CeedVectorReciprocal_Hip(CeedVector vec) {
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compute x = alpha x on the host
//------------------------------------------------------------------------------
static int CeedHostScale_Hip(CeedScalar *x_array, CeedScalar alpha,
CeedInt length) {
for (int i = 0; i < length; i++)
x_array[i] *= alpha;
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compute x = alpha x on device (impl in .cu file)
//------------------------------------------------------------------------------
int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha,
CeedInt length);

//------------------------------------------------------------------------------
// Compute x = alpha x
//------------------------------------------------------------------------------
static int CeedVectorScale_Hip(CeedVector x, CeedScalar alpha) {
int ierr;
Ceed ceed;
ierr = CeedVectorGetCeed(x, &ceed); CeedChkBackend(ierr);
CeedVector_Hip *x_data;
ierr = CeedVectorGetData(x, &x_data); CeedChkBackend(ierr);
CeedInt length;
ierr = CeedVectorGetLength(x, &length); CeedChkBackend(ierr);

// Set value for synced device/host array
switch(x_data->memState) {
case CEED_HIP_HOST_SYNC:
ierr = CeedHostScale_Hip(x_data->h_array, alpha, length);
CeedChkBackend(ierr);
break;
case CEED_HIP_DEVICE_SYNC:
ierr = CeedDeviceScale_Hip(x_data->d_array, alpha, length);
CeedChkBackend(ierr);
break;
case CEED_HIP_BOTH_SYNC:
ierr = CeedDeviceScale_Hip(x_data->d_array, alpha, length);
CeedChkBackend(ierr);
x_data->memState = CEED_HIP_DEVICE_SYNC;
break;
// LCOV_EXCL_START
case CEED_HIP_NONE_SYNC:
break; // Not possible, but included for completness
// LCOV_EXCL_STOP
}
return CEED_ERROR_SUCCESS;
}

//------------------------------------------------------------------------------
// Compute y = alpha x + y on the host
//------------------------------------------------------------------------------
Expand Down Expand Up @@ -611,6 +662,8 @@ int CeedVectorCreate_Hip(CeedInt n, CeedVector vec) {
CeedVectorNorm_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Reciprocal",
CeedVectorReciprocal_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "Scale",
CeedVectorScale_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "AXPY",
CeedVectorAXPY_Hip); CeedChkBackend(ierr);
ierr = CeedSetBackendFunction(ceed, "Vector", vec, "PointwiseMult",
Expand Down
25 changes: 25 additions & 0 deletions backends/hip/kernels/hip-vector.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,32 @@ extern "C" int CeedDeviceReciprocal_Hip(CeedScalar* d_array, CeedInt length) {
return 0;
}

//------------------------------------------------------------------------------
// Kernel for scale
//------------------------------------------------------------------------------
__global__ static void scaleValueK(CeedScalar * __restrict__ x, CeedScalar alpha,
CeedInt size) {
int idx = threadIdx.x + blockDim.x * blockIdx.x;
if (idx >= size)
return;
x[idx] *= alpha;
}

//------------------------------------------------------------------------------
// Compute x = alpha x on device
//------------------------------------------------------------------------------
extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha,
CeedInt length) {
const int bsize = 512;
const int vecsize = length;
int gridsize = vecsize / bsize;

if (bsize * gridsize < vecsize)
gridsize += 1;
hipLaunchKernelGGL(scaleValueK, dim3(gridsize), dim3(bsize), 0, 0, x_array, alpha,
length);
return 0;
}

//------------------------------------------------------------------------------
// Kernel for axpy
Expand Down
Loading