Skip to content

Feature/cu 86888fmx3/crawler stability #457

Feature/cu 86888fmx3/crawler stability

Feature/cu 86888fmx3/crawler stability #457

name: Check CopyRight Info on Files
on:
pull_request:
branches: [ main ]
jobs:
update-author:
name: check copyright info
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: write
steps:
- name: Check if PR is from Dependabot
id: check_dependabot
run: |
echo "check=$(jq -r '.pull_request.user.login' "$GITHUB_EVENT_PATH" | grep -c dependabot)" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
if: steps.check_dependabot.outputs.check == '0'
with:
fetch-depth: 0
- name: Get modified files
if: steps.check_dependabot.outputs.check == '0'
id: modified-files
uses: tj-actions/changed-files@v44.5.7
with:
files_ignore: |
# ignore the yml files and header files in the github folder
.github/workflows/copyright-headers/*.txt
.github/workflows/*.yml
.github/*.yml
**/assembly/**
- name: Find all changed files don't contain Copyright Info and add the info to it
if: ${{ steps.check_dependabot.outputs.check == '0' && steps.modified-files.outputs.all_changed_files != '' }}
run: |
for file in ${{ steps.modified-files.outputs.all_changed_files }}; do
if ! grep -qE 'Copyright [0-9]{4} Norconex Inc\.|Copyright [0-9]{4}-[0-9]{4} Norconex Inc\.' "$file"; then
# check if file type is java file
if [[ "${file}" == *.java ]]; then
cat .github/workflows/copyright-headers/java_file.txt $file > tmp.txt
mv tmp.txt $file
# check if file type is batch file
elif [[ "${file}" == *.bat ]]; then
sed -e '/@echo off/ {' -e 'r .github/workflows/copyright-headers/bat_file.txt' -e 'd' -e '}' -i $file
# check if file type is shell file
elif [[ "${file}" == *.sh ]]; then
sed -e '/#!\/bin\/bash/ {' -e 'r .github/workflows/copyright-headers/sh_file.txt' -e 'd' -e '}' -i $file
# check if file type is xml file
elif [[ "${file}" == *.xml ]]; then
sed -e '/<?xml version="1.0" encoding="UTF-8"?>/ {' -e 'r .github/workflows/copyright-headers/xml_file.txt' -e 'd' -e '}' -i $file
# cat .github/workflows/copyright-headers/xml_file.txt $file > tmp.txt
# mv tmp.txt $file
# check if file type is html file
elif [[ "${file}" == *.html ]]; then
cat .github/workflows/copyright-headers/html_file.txt $file > tmp.txt
mv tmp.txt $file
# check if file type is yml or yaml file
elif [[ "${file}" == *.yml || "${file}" == *.yaml ]]; then
cat .github/workflows/copyright-headers/yml_file.txt $file > tmp.txt
mv tmp.txt $file
else
echo "$file is in other format"
fi
else
echo "$file already contains copyright info, no changes is made"
fi
done
- name: Verify Year on files' Copyright Info is up to date
if: ${{ steps.check_dependabot.outputs.check == '0' && steps.modified-files.outputs.all_changed_files != '' }}
run: |
# make sure the file exist
touch /tmp/modified-file-list.txt
current_year=$(date +'%Y')
for file in ${{ steps.modified-files.outputs.all_changed_files }}; do
# exclude all jsp files
if [[ "${file}" != *.jsp ]]; then
# check if file contain single year copyright info
if grep -qE 'Copyright [0-9]{4} Norconex Inc\.' "$file"; then
echo "$file: Single year Copyright"
# get the year from the file's copyright info section
year_in_file=$(grep -oP 'Copyright \K[0-9]{4}' "$file")
# check if the year is a year place-holder, if so, replace it with current year
if [ "$year_in_file" == 2999 ]; then
# change the year-place-holder to the current year
sed -i "s/Copyright $year_in_file Norconex Inc./Copyright $current_year Norconex Inc./g" "$file"
echo "${file}" >> /tmp/modified-file-list.txt
#Check if the existing year found is the current year
elif [ "$year_in_file" != "$current_year" ]; then
# Append current year to the existing copyright info in each file
sed -i "s/Copyright $year_in_file Norconex Inc./Copyright $year_in_file-$current_year Norconex Inc./g" "$file"
echo "${file}" >> /tmp/modified-file-list.txt
else
# echo ""
echo "Skipping replacement in ${file} because the year is already the current year."
fi
# check if file contain range year copyright info
elif grep -qE 'Copyright [0-9]{4}-[0-9]{4} Norconex Inc\.' "$file"; then
echo "$file: Range year Copyright"
# Extract the second year from the copyright info
second_year=$(grep -oE 'Copyright [0-9]{4}-[0-9]{4} Norconex Inc\.' "${file}" | sed -n 's/.*-\([0-9]\{4\}\) Norconex Inc\./\1/p')
# Check if the second year is equal to the current year, if not, replace the 2nd year with the current year
if [ "${second_year}" != "$(date +'%Y')" ]; then
sed -i "s/Copyright \([0-9]\{4\}\)-\([0-9]\{4\}\) Norconex Inc\./Copyright \1-$(date +'%Y') Norconex Inc./" "${file}"
echo "${file}" >> /tmp/modified-file-list.txt
else
echo ""
echo "Skipping replacement in ${file} because the second year is already the current year."
fi
fi
fi
done
# Print out a summary of all modified files
echo ""
echo ""
echo " Here is a list of files that has been updated with the current year:"
more /tmp/modified-file-list.txt
rm /tmp/modified-file-list.txt
echo ""
echo ""
- name: Check git status
if: ${{ steps.check_dependabot.outputs.check == '0' && steps.modified-files.outputs.all_changed_files != '' }}
run: git status
- name: Upload Modified Files
if: ${{ steps.check_dependabot.outputs.check == '0' && steps.modified-files.outputs.all_changed_files != '' }}
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: Apply Copyright year changes