hacktricks/.github/workflows/translate_all.yml
carlospolop 0683e0376d f
2025-10-04 01:58:50 +02:00

243 lines
9.6 KiB
YAML

name: Translate All
on:
push:
branches:
- master
paths-ignore:
- 'scripts/**'
- '.gitignore'
- '.github/**'
- Dockerfile
workflow_dispatch:
permissions:
packages: write
id-token: write
contents: write
jobs:
translate:
name: Translate → ${{ matrix.name }} (${{ matrix.branch }})
runs-on: ubuntu-latest
# Run N languages in parallel (tune max-parallel if needed)
strategy:
fail-fast: false
# max-parallel: 3 #Nothing to run all in parallel
matrix:
include:
- { name: "Afrikaans", language: "Afrikaans", branch: "af" }
- { name: "German", language: "German", branch: "de" }
- { name: "Greek", language: "Greek", branch: "el" }
- { name: "Spanish", language: "Spanish", branch: "es" }
- { name: "French", language: "French", branch: "fr" }
- { name: "Hindi", language: "Hindi", branch: "hi" }
- { name: "Italian", language: "Italian", branch: "it" }
- { name: "Japanese", language: "Japanese", branch: "ja" }
- { name: "Korean", language: "Korean", branch: "ko" }
- { name: "Polish", language: "Polish", branch: "pl" }
- { name: "Portuguese", language: "Portuguese", branch: "pt" }
- { name: "Serbian", language: "Serbian", branch: "sr" }
- { name: "Swahili", language: "Swahili", branch: "sw" }
- { name: "Turkish", language: "Turkish", branch: "tr" }
- { name: "Ukrainian", language: "Ukrainian", branch: "uk" }
- { name: "Chinese", language: "Chinese", branch: "zh" }
# Ensure only one job per branch runs at a time (even across workflow runs)
concurrency:
group: translate-cloud-${{ matrix.branch }}
cancel-in-progress: false
container:
image: ghcr.io/hacktricks-wiki/hacktricks-cloud/translator-image:latest
env:
LANGUAGE: ${{ matrix.language }}
BRANCH: ${{ matrix.branch }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Update and download scripts
run: |
sudo apt-get update
# Install GitHub CLI properly
curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
&& sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& sudo apt update \
&& sudo apt install gh -y \
&& sudo apt-get install -y wget
mkdir scripts
cd scripts
wget -O get_and_save_refs.py https://raw.githubusercontent.com/HackTricks-wiki/hacktricks-cloud/master/scripts/get_and_save_refs.py
wget -O compare_and_fix_refs.py https://raw.githubusercontent.com/HackTricks-wiki/hacktricks-cloud/master/scripts/compare_and_fix_refs.py
wget -O translator.py https://raw.githubusercontent.com/HackTricks-wiki/hacktricks-cloud/master/scripts/translator.py
cd ..
- name: Run get_and_save_refs.py
run: |
python scripts/get_and_save_refs.py
- name: Download language branch & update refs
run: |
git config --global --add safe.directory /__w/hacktricks/hacktricks
git config --global user.name 'Translator'
git config --global user.email 'github-actions@github.com'
git config pull.rebase false
git checkout $BRANCH
git pull
python scripts/compare_and_fix_refs.py --files-unmatched-paths /tmp/file_paths.txt
git add .
git commit -m "Fix unmatched refs" || echo "No changes to commit"
git push || echo "No changes to push"
- name: Run translation script on changed files
run: |
git checkout master
export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
git diff --name-only HEAD~1 | grep -v "SUMMARY.md" | while read -r file; do
if echo "$file" | grep -qE '\.md$'; then
echo -n ",$file" >> /tmp/file_paths.txt
fi
done
echo "Files to translate (`wc -l < /tmp/file_paths.txt`):"
cat /tmp/file_paths.txt
echo ""
echo ""
touch /tmp/file_paths.txt
if [ -s /tmp/file_paths.txt ]; then
python scripts/translator.py \
--language "$LANGUAGE" \
--branch "$BRANCH" \
--api-key "$OPENAI_API_KEY" \
-f "$(cat /tmp/file_paths.txt)" \
-t 3
else
echo "No markdown files changed, skipping translation."
fi
- name: Build mdBook
run: |
git checkout "$BRANCH"
git pull
MDBOOK_BOOK__LANGUAGE=$BRANCH mdbook build || (echo "Error logs" && cat hacktricks-preprocessor-error.log && echo "" && echo "" && echo "Debug logs" && (cat hacktricks-preprocessor.log | tail -n 20) && exit 1)
- name: Push search index to hacktricks-searchindex repo
shell: bash
env:
PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
run: |
set -euo pipefail
ASSET="book/searchindex.js"
TARGET_REPO="HackTricks-wiki/hacktricks-searchindex"
FILENAME="searchindex-${BRANCH}.js"
if [ ! -f "$ASSET" ]; then
echo "Expected $ASSET to exist after build" >&2
exit 1
fi
TOKEN="${PAT_TOKEN}"
if [ -z "$TOKEN" ]; then
echo "No PAT_TOKEN available" >&2
exit 1
fi
# Clone the searchindex repo
git clone https://x-access-token:${TOKEN}@github.com/${TARGET_REPO}.git /tmp/searchindex-repo
# Compress the searchindex file
gzip -9 -k -f "$ASSET"
# Show compression stats
ORIGINAL_SIZE=$(wc -c < "$ASSET")
COMPRESSED_SIZE=$(wc -c < "${ASSET}.gz")
RATIO=$(awk "BEGIN {printf \"%.1f\", ($COMPRESSED_SIZE / $ORIGINAL_SIZE) * 100}")
echo "Compression: ${ORIGINAL_SIZE} bytes -> ${COMPRESSED_SIZE} bytes (${RATIO}%)"
# Copy ONLY the .gz version to the searchindex repo (no uncompressed .js)
cp "${ASSET}.gz" "/tmp/searchindex-repo/${FILENAME}.gz"
# Commit and push with retry logic
cd /tmp/searchindex-repo
git config user.name "GitHub Actions"
git config user.email "github-actions@github.com"
git add "${FILENAME}.gz"
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "Update ${FILENAME} from hacktricks-cloud build"
# Retry push up to 20 times with pull --rebase between attempts
MAX_RETRIES=20
RETRY_COUNT=0
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
if git push origin master; then
echo "Successfully pushed on attempt $((RETRY_COUNT + 1))"
break
else
RETRY_COUNT=$((RETRY_COUNT + 1))
if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then
echo "Push failed, attempt $RETRY_COUNT/$MAX_RETRIES. Pulling and retrying..."
# Try normal rebase first
if git pull --rebase origin master 2>&1 | tee /tmp/pull_output.txt; then
echo "Rebase successful, retrying push..."
else
# If rebase fails due to divergent histories (orphan branch reset), re-clone
if grep -q "unrelated histories\|refusing to merge\|fatal: invalid upstream\|couldn't find remote ref" /tmp/pull_output.txt; then
echo "Detected history rewrite, re-cloning repository..."
cd /tmp
rm -rf searchindex-repo
git clone https://x-access-token:${TOKEN}@github.com/${TARGET_REPO}.git searchindex-repo
cd searchindex-repo
git config user.name "GitHub Actions"
git config user.email "github-actions@github.com"
# Re-copy ONLY the .gz version (no uncompressed .js)
cp "${ASSET}.gz" "${FILENAME}.gz"
git add "${FILENAME}.gz"
git commit -m "Update ${FILENAME}.gz from hacktricks-cloud build"
echo "Re-cloned and re-committed, will retry push..."
else
echo "Rebase failed for unknown reason, retrying anyway..."
fi
fi
sleep 1
else
echo "Failed to push after $MAX_RETRIES attempts"
exit 1
fi
fi
done
fi
# Login in AWS
- name: Configure AWS credentials using OIDC
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: us-east-1
# Sync the build to S3
- name: Sync to S3
run: |
echo "Current branch:"
git rev-parse --abbrev-ref HEAD
echo "Syncing $BRANCH to S3"
aws s3 sync ./book s3://hacktricks-wiki/$BRANCH --delete
echo "Sync completed"
echo "Cat 3 files from the book"
find . -type f -name 'index.html' -print | head -n 3 | xargs -r cat