Skip to content

Commit

Permalink
Merge pull request #279 from dysnix/bsc-improve-gcs-init
Browse files Browse the repository at this point in the history
[bsc] improve init from GCS
  • Loading branch information
voron authored Jan 29, 2024
2 parents 166509e + 6fdf2e9 commit 13533ae
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 25 deletions.
2 changes: 1 addition & 1 deletion dysnix/bsc/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v2
name: bsc
description: Binance Smart Chain chart for Kubernetes
version: 0.6.38
version: 0.6.39
appVersion: 1.2.15

keywords:
Expand Down
91 changes: 67 additions & 24 deletions dysnix/bsc/templates/scripts/_init_from_gcs.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,42 @@ set -ex # -e exits on error
# AWS_SECRET_ACCESS_KEY

DATA_DIR="{{ .Values.bsc.base_path }}"
CHAINDATA_DIR="${DATA_DIR}/geth/chaindata"
GETH_DIR="${DATA_DIR}/geth"
CHAINDATA_DIR="${GETH_DIR}/chaindata"
STATE_TMP_DIR="${GETH_DIR}/state_tmp"
ANCIENT_TMP_DIR="${GETH_DIR}/ancient_tmp"
INITIALIZED_FILE="${DATA_DIR}/.initialized"
OUT_OF_SPACE_FILE="${DATA_DIR}/.out_of_space"
#without gs:// or s3://, just a bucket name and path
INDEX_URL="{{ .Values.bsc.initFromGCS.indexUrl }}"
GCS_BASE_URL="{{ .Values.bsc.initFromGCS.baseUrlOverride }}"
S5CMD=/s5cmd
EXCLUDE_ANCIENT="--exclude *.cidx --exclude *.ridx --exclude *.cdat --exclude *.rdat"
EXCLUDE_STATE="--exclude *.ldb --exclude *.sst"
INDEX="index"
S_UPDATING="/updating"
S_TIMESTAMP="/timestamp"
S_STATE_URL="/state_url"
S_ANCIENT_URL="/ancient_url"
S_STATS="/stats"
MAX_USED_SPACE_PERCENT={{ .Values.bsc.initFromGCS.maxUsedSpacePercent }}

# allow container interrupt
trap "{ exit 1; }" INT TERM

{{- if .Values.bsc.forceInitFromSnapshot }}
rm -f "${INITIALIZED_FILE}"
rm -f "${INITIALIZED_FILE}" "${OUT_OF_SPACE_FILE}"
{{- end }}

if [ -f "${INITIALIZED_FILE}" ]; then
echo "Blockchain already initialized. Exiting..."
exit 0
fi

if [ -f "${OUT_OF_SPACE_FILE}" ]; then
echo "Seems, we're out of space. Exiting with an error ..."
cat "${OUT_OF_SPACE_FILE}"
exit 2
fi

# we need to create temp files
cd /tmp

Expand Down Expand Up @@ -66,14 +78,10 @@ STATS_URL="${GCS_BASE_URL}${S_STATS}"
STATE_URL="${GCS_BASE_URL}${S_STATE_URL}"
ANCIENT_URL="${GCS_BASE_URL}${S_ANCIENT_URL}"


STATE_SRC="$(${S5CMD} cat s3://${STATE_URL})"
ANCIENT_SRC="$(${S5CMD} cat s3://${ANCIENT_URL})"
REMOTE_STATS="$(${S5CMD} cat s3://${STATS_URL})"

# create dst dirs
mkdir -p "${CHAINDATA_DIR}/ancient"

# save sync source
echo "${GCS_BASE_URL}" > "${DATA_DIR}/source"

Expand All @@ -97,45 +105,75 @@ TIMESTAMP_0="$(${S5CMD} cat s3://${TIMESTAMP_URL})"

# we're ready to perform actual data sync

# we're done when both are true
# we're done when all are true
# 1) start and stop timestamps did not changed during data sync - no process started or finished updating the cloud
# 2) 0 objects copied
# 2) start timestamp is before stop timestamp - no process is in progress updating the cloud
# 3) 0 objects copied
SYNC=2
CLEANUP=1
while [ "${SYNC}" -gt 0 ] ; do

# Cleanup
if [ ${CLEANUP} -eq 1 ];then
echo "$(date -Iseconds) Cleaning up local dir ..."
mkdir -p ${DATA_DIR}/geth
mv ${DATA_DIR}/geth ${DATA_DIR}/geth.old && rm -rf ${DATA_DIR}/geth.old &
echo "$(date -Iseconds) Cleaning up local dir ${GETH_DIR} ..."
mkdir -p "${GETH_DIR}"
mv "${GETH_DIR}" "${GETH_DIR}.old" && rm -rf "${GETH_DIR}.old" &
CLEANUP=0
fi

# sync from cloud to local disk, without removing existing [missing in the cloud] files
# sync from cloud to local disk, with removing existing [missing in the cloud] files
# run multiple syncs in background

# we don't wanna sync ancient data here
time ${S5CMD} sync ${EXCLUDE_ANCIENT} s3://${STATE_SRC}/* ${CHAINDATA_DIR}/ > cplist_state.txt &
time ${S5CMD} sync --delete s3://${STATE_SRC}/* ${STATE_TMP_DIR}/ > cplist_state.txt &
STATE_CP_PID=$!
time nice ${S5CMD} sync --delete --part-size 200 --concurrency 2 ${EXCLUDE_STATE} s3://${ANCIENT_SRC}/* ${CHAINDATA_DIR}/ancient/ > cplist_ancient.txt &
time nice ${S5CMD} sync --delete --part-size 200 --concurrency 2 s3://${ANCIENT_SRC}/* ${ANCIENT_TMP_DIR}/ > cplist_ancient.txt &
ANCIENT_CP_PID=$!

# wait for all syncs to complete
# TODO any errors handling here?
wait ${STATE_CP_PID} ${ANCIENT_CP_PID}

# shell tracks all sub-processes and stores exit codes internally
# it's not required to stay in wait state for all background processes at the same time
# we'll handle these processes sequentially
wait ${STATE_CP_PID}
STATE_CP_EXIT_CODE=$?
wait ${ANCIENT_CP_PID}
ANCIENT_CP_EXIT_CODE=$?

# let's handle out of disk space specially, thus we don't re-try, just stuck here if disk usage is high
VOLUME_USAGE_PERCENT=$(df "${DATA_DIR}" | tail -n 1 | awk '{print $5}'|tr -d %)
if [ "${VOLUME_USAGE_PERCENT}" -gt "${MAX_USED_SPACE_PERCENT}" ];then
set +x
# stop monitoring
if [ ${MON_PID} -ne 0 ];then kill ${MON_PID};MON_PID=0; fi
# out of inodes error is "handled" by "set -e"
echo "We're out of disk space. Marking ${DATA_DIR} as out-of-space and exiting. Check the source snapshot size" | tee -a "${OUT_OF_SPACE_FILE}"
echo "Source snapshot size ${REMOTE_STATS}" | tee -a "${OUT_OF_SPACE_FILE}"
echo "Disk usage is ${VOLUME_USAGE_PERCENT}%" | tee -a "${OUT_OF_SPACE_FILE}"
df -P -BG "${DATA_DIR}" | tee -a "${OUT_OF_SPACE_FILE}"
exit 2
fi
# s5cmd uses 0 for success and 1 for any errors
# no errors - we're good to go
# any errors - retry the download
# all the exit codes have to be 0
if [ "${STATE_CP_EXIT_CODE}" -ne "0" ] || [ "${ANCIENT_CP_EXIT_CODE}" -ne "0" ];then
echo "s5cmd sync returned non-zero, retrying sync after the short sleep"
# wait some time to not spam with billable requests too frequently
sleep 60
SYNC=2
continue
fi
# get start and stop timestamps from the cloud after sync
UPDATING_1="$(${S5CMD} cat s3://${UPDATING_URL})"
TIMESTAMP_1="$(${S5CMD} cat s3://${TIMESTAMP_URL})"

# compare timestamps before and after sync
if [ "${UPDATING_0}" -eq "${UPDATING_1}" ] && [ "${TIMESTAMP_0}" -eq "${TIMESTAMP_1}" ];then
echo "Timestamps are equal"
# ensuring start timestamp is earlier than stop timestamp
if [ "${UPDATING_0}" -eq "${UPDATING_1}" ] && [ "${TIMESTAMP_0}" -eq "${TIMESTAMP_1}" ] && [ "${TIMESTAMP_1}" -gt "${UPDATING_1}" ] ;then
echo "Timestamps did not changed and start timestamp is before stop timestamp"
echo -e "U_0=${UPDATING_0}\tU_1=${UPDATING_1},\tT_0=${TIMESTAMP_0}\tT_1=${TIMESTAMP_1}"
let SYNC=SYNC-1
else
echo "Timestamps changed, running sync again ..."
echo "Source timestamps changed or start timestamp is after stop timestamp, running sync again ..."
echo -e "U_0=${UPDATING_0}\tU_1=${UPDATING_1},\tT_0=${TIMESTAMP_0}\tT_1=${TIMESTAMP_1}"
# end timestamps -> begin timestamps
UPDATING_0=${UPDATING_1}
Expand All @@ -148,7 +186,7 @@ while [ "${SYNC}" -gt 0 ] ; do
continue
fi

# stop monitoring
# stop monitoring, we don't expect massive data copying
if [ ${MON_PID} -ne 0 ];then
kill ${MON_PID}
MON_PID=0
Expand All @@ -168,5 +206,10 @@ while [ "${SYNC}" -gt 0 ] ; do
fi
done

# prepare geth datadir from tmp dirs
mv "${STATE_TMP_DIR}" "${CHAINDATA_DIR}"
rm -rf "${CHAINDATA_DIR}/ancient"
mv "${ANCIENT_TMP_DIR}" "${CHAINDATA_DIR}/ancient"

# Mark data dir as initialized
touch ${INITIALIZED_FILE}
1 change: 1 addition & 0 deletions dysnix/bsc/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ bsc:
indexUrl: "bucket/path/to/file"
baseUrlOverride: "" # "bucket/path/to/dir"
fullResyncOnSrcUpdate: false
maxUsedSpacePercent: 93 # percents
syncToGCS:
enabled: false
image: peakcom/s5cmd:v2.2.2
Expand Down

0 comments on commit 13533ae

Please sign in to comment.