seo-postbuild.sh
113 lignes · 4377 octets
#!/usr/bin/env bash # Post-traitement SEO après `mdbook build` / build-all-langs.sh. # # - Génère un sitemap.xml global et par langue # - Copie robots.txt et llms.txt depuis seo/ vers book/ # # Usage : ./scripts/seo-postbuild.sh [BASE_URL] # Par défaut BASE_URL = https://demo.gitrust.eu/docs set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" cd "$ROOT" BASE_URL="${1:-https://demo.gitrust.eu/docs}" LANGS=(fr en de es pt it) BOOK_DIR="book" if [[ ! -d "$BOOK_DIR" ]]; then echo "✗ $BOOK_DIR/ introuvable — lance d'abord build-all-langs.sh" >&2 exit 1 fi # --------------------------------------------------------------------------- # robots.txt + llms.txt à la racine (avec URL d'origine) # --------------------------------------------------------------------------- sed "s|https://demo.gitrust.eu/docs|$BASE_URL|g" seo/robots.txt > "$BOOK_DIR/robots.txt" sed "s|https://demo.gitrust.eu/docs|$BASE_URL|g" seo/llms.txt > "$BOOK_DIR/llms.txt" echo "✓ robots.txt et llms.txt copiés vers $BOOK_DIR/" # --------------------------------------------------------------------------- # Correction des URLs injectées par theme/head.hbs : la variable {{ path }} de # mdBook produit un chemin en .md — on remplace par .html dans les tags SEO # (canonical, hreflang, og:url, JSON-LD url). On remplace aussi la base URL. # --------------------------------------------------------------------------- echo "→ Normalisation des URLs SEO dans les .html (.md → .html, base URL)" find "$BOOK_DIR" -name "*.html" -print0 | xargs -0 sed -i \ -e 's|\(demo\.gitrust\.eu/docs/[a-z/_0-9-]*\)\.md|\1.html|g' \ -e "s|https://demo.gitrust.eu/docs|$BASE_URL|g" # --------------------------------------------------------------------------- # Sitemap par langue # --------------------------------------------------------------------------- for lang in "${LANGS[@]}"; do lang_dir="$BOOK_DIR/$lang" [[ -d "$lang_dir" ]] || continue sitemap="$lang_dir/sitemap.xml" now="$(date -u +%Y-%m-%dT%H:%M:%SZ)" { echo '<?xml version="1.0" encoding="UTF-8"?>' echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' echo ' xmlns:xhtml="http://www.w3.org/1999/xhtml">' # Lister chaque page HTML de la langue (sauf les pages "print" mdbook) while IFS= read -r page; do rel="${page#$lang_dir/}" url="$BASE_URL/$lang/$rel" echo " <url>" echo " <loc>$url</loc>" echo " <lastmod>$now</lastmod>" # Alternates hreflang vers les 5 autres langues for alt in "${LANGS[@]}"; do alt_url="$BASE_URL/$alt/$rel" echo " <xhtml:link rel=\"alternate\" hreflang=\"$alt\" href=\"$alt_url\"/>" done echo " <xhtml:link rel=\"alternate\" hreflang=\"x-default\" href=\"$BASE_URL/fr/$rel\"/>" # Priorité heuristique : index > tutorial > how-to > reference > explanation case "$rel" in index.html) prio="1.0" ;; *tutorials/*) prio="0.8" ;; *how-to/*) prio="0.7" ;; *reference/*) prio="0.6" ;; *explanation/*) prio="0.5" ;; *) prio="0.5" ;; esac echo " <priority>$prio</priority>" echo " </url>" done < <(find "$lang_dir" -type f -name "*.html" ! -name "print.html" ! -name "404.html" | sort) echo '</urlset>' } > "$sitemap" pages=$(grep -c "<url>" "$sitemap") echo "✓ $sitemap ($pages pages)" done # --------------------------------------------------------------------------- # Sitemap index (racine) qui référence les 6 sitemaps par langue # --------------------------------------------------------------------------- { echo '<?xml version="1.0" encoding="UTF-8"?>' echo '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' now="$(date -u +%Y-%m-%dT%H:%M:%SZ)" for lang in "${LANGS[@]}"; do if [[ -f "$BOOK_DIR/$lang/sitemap.xml" ]]; then echo " <sitemap>" echo " <loc>$BASE_URL/$lang/sitemap.xml</loc>" echo " <lastmod>$now</lastmod>" echo " </sitemap>" fi done echo '</sitemapindex>' } > "$BOOK_DIR/sitemap.xml" echo "✓ $BOOK_DIR/sitemap.xml (index de $((${#LANGS[@]})) sitemaps par langue)" echo "" echo "Artefacts SEO produits :" echo " - $BOOK_DIR/robots.txt" echo " - $BOOK_DIR/llms.txt" echo " - $BOOK_DIR/sitemap.xml (index)" echo " - $BOOK_DIR/<lang>/sitemap.xml × ${#LANGS[@]}"
GitRust