Skip to content

Commit

Permalink
[bsc] bump s5cmd version
Browse files Browse the repository at this point in the history
  • Loading branch information
voron committed Nov 4, 2023
1 parent c1910cc commit 9af913a
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 26 deletions.
2 changes: 1 addition & 1 deletion dysnix/bsc/templates/scripts/_init_from_gcs.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ while [ "${SYNC}" -gt 0 ] ; do
# we don't wanna sync ancient data here
time ${S5CMD} sync ${EXCLUDE_ANCIENT} s3://${STATE_SRC}/* ${CHAINDATA_DIR}/ > cplist_state.txt &
STATE_CP_PID=$!
time nice ${S5CMD} sync --part-size 200 --concurrency 2 ${EXCLUDE_STATE} s3://${ANCIENT_SRC}/* ${CHAINDATA_DIR}/ancient/ > cplist_ancient.txt &
time nice ${S5CMD} sync --delete --part-size 200 --concurrency 2 ${EXCLUDE_STATE} s3://${ANCIENT_SRC}/* ${CHAINDATA_DIR}/ancient/ > cplist_ancient.txt &
ANCIENT_CP_PID=$!

# wait for all syncs to complete
Expand Down
25 changes: 4 additions & 21 deletions dysnix/bsc/templates/scripts/_sync_to_gcs.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ ${S5CMD} cp updating "s3://${UPDATING_URL}"

# we're ready to perform actual data copy

# sync from local disk to cloud, without removing existing [missing on local disk] files
# sync from local disk to cloud with removing existing [missing on local disk] files
# run multiple syncs in background
# cp is recursive by default, thus we need to exclude ancient data here
time ${S5CMD} cp -n -s -u ${EXCLUDE_ANCIENT} "${CHAINDATA_DIR}/" "s3://${STATE_DST}/" > cplist_state.txt &
# sync is recursive by default, thus we need to exclude ancient data here
time ${S5CMD} --stat sync --delete ${EXCLUDE_ANCIENT} "${CHAINDATA_DIR}/" "s3://${STATE_DST}/" > cplist_state.txt &
STATE_CP_PID=$!
time nice ${S5CMD} cp -n -s -u --part-size 200 --concurrency 2 ${EXCLUDE_STATE} "${CHAINDATA_DIR}/ancient/" "s3://${ANCIENT_DST}/" > cplist_ancient.txt &
time nice ${S5CMD} --stat sync --delete --part-size 200 --concurrency 2 ${EXCLUDE_STATE} "${CHAINDATA_DIR}/ancient/" "s3://${ANCIENT_DST}/" > cplist_ancient.txt &
ANCIENT_CP_PID=$!
# Wait for each specified child process and return its termination status
# errors are "handled" by "set -e"
Expand All @@ -94,20 +94,3 @@ INODES=$(df -Phi "${DATA_DIR}" | tail -n 1 | awk '{print $3}')
SIZE=$(df -P -BG "${DATA_DIR}" | tail -n 1 | awk '{print $3}')G
echo -ne "Inodes:\t${INODES} Size:\t${SIZE}" > stats
${S5CMD} cp stats "s3://${STATS_URL}"

# get number of objects copied
cat cplist_state.txt cplist_ancient.txt > "${CPLIST}"
# we use a heuristic here - lot of uploaded objects => lot of object to remove in the cloud => we need to generate removal list
CP_OBJ_NUMBER=$(wc -l < "${CPLIST}")
echo "$(date -Iseconds) Uploaded objects: ${CP_OBJ_NUMBER}" | tee -a "${CPLOG}"
set +e
FORCE_CLEANUP=$(echo "{{ .Values.bsc.syncToGCS.forceCleanup }}" | tr '[:upper:]' '[:lower:]')
if [ "${CP_OBJ_NUMBER}" -gt 1000 ] || [ "${FORCE_CLEANUP}" == "true" ] ;then
set -e
# s5cmd doesn't support GCS object removal, just generate a list of files to remove via gsutil
# removal should be done in another sidecar
time $S5CMD --dry-run cp -n ${EXCLUDE_ANCIENT} "s3://${STATE_DST}/*" "${CHAINDATA_DIR}/" | awk '{print $2}'|sed 's/^s3/gs/' > rmlist.txt
time $S5CMD --dry-run cp -n ${EXCLUDE_STATE} "s3://${ANCIENT_DST}/*" "${CHAINDATA_DIR}/ancient/" | awk '{print $2}'|sed 's/^s3/gs/' >> rmlist.txt
echo "$(date -Iseconds) Objects to remove: $(wc -l < rmlist.txt)" | tee -a "${RMLOG}"
cp rmlist.txt "${RMLIST}"
fi
7 changes: 3 additions & 4 deletions dysnix/bsc/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,15 @@ bsc:
initFromRsyncImage: instrumentisto/rsync-ssh:latest
initFromGCS:
enabled: false
image: peakcom/s5cmd:v2.0.0
image: peakcom/s5cmd:v2.2.2
endpoint: "https://storage.googleapis.com"
keyID: "AWS_ACCESS_KEY_ID"
accessKey: "AWS_SECRET_ACCESS_KEY"
indexUrl: "bucket/path/to/file"
fullResyncOnSrcUpdate: false
syncToGCS:
enabled: false
# v2.0.0 may lead to OOM issues during uploads
image: peakcom/s5cmd:v1.4.0
image: peakcom/s5cmd:v2.2.2
endpoint: "https://storage.googleapis.com"
keyID: "AWS_ACCESS_KEY_ID"
accessKey: "AWS_SECRET_ACCESS_KEY"
Expand Down Expand Up @@ -305,7 +304,7 @@ rsyncd:
service:
port: 1873
name: rsyncd
# clean up GCS using a list from sync-to-gcs
# legacy clean up GCS using a list from sync-to-gcs
# workload identity is used
gcsCleanup:
enabled: false
Expand Down

0 comments on commit 9af913a

Please sign in to comment.