-
Notifications
You must be signed in to change notification settings - Fork 160
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cronjobs: inject canonical URLs into older manual pages (SEO) (#1241)
The GRASS GIS manual pages of the different versions have been published for a long time with a difficult to understand concept of being invisible, redirected or shown, which also strongly affects the search engine ranking. SEO: Without indication of "canonical" URLs different versions wipe each out out in search engines. Canonical tags help consolidate duplicate or similar content by specifying the preferred version of a page, ensuring search engines index and rank the desired URL while avoiding duplicate content issues. This PR changes the cronjob scripts to - inject "grass-stable" as the "canonical" into older manual pages under versioned URL - inject "grass-devel" as the "canonical" into the development manual pages under versioned URL Like this no "duplicate content" from a SEO perspective should occur. Also [robots.txt](https://grass.osgeo.org/robots.txt) is updated to reactivate the manual pages of old GRASS GIS versions (which now contain "grass-stable" as the canonical). Additionally, rewrite red box injection to avoid globbing error `argument list too long` old versions of libpython manual. Fixes OSGeo/grass#4579
- Loading branch information
Showing
6 changed files
with
283 additions
and
109 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
#!/bin/sh | ||
#!/bin/bash | ||
|
||
# script to build GRASS GIS new current binaries + addons + progman from the `releasebranch_8_4` branch | ||
# (c) 2002-2024, GPL 2+ Markus Neteler <[email protected]> | ||
|
@@ -14,6 +14,8 @@ | |
# - generates the pyGRASS 8 HTML manual | ||
# - generates the user 8 HTML manuals | ||
# - injects DuckDuckGo search field | ||
# - copies over generated manual pages to grass-stable/manuals/ | ||
# - injects in versioned manual the "canonical" to point to "stable" manual (as seen in the Python manual pages) | ||
|
||
# Preparations, on server (neteler@grasslxd:$): | ||
# - install dependencies: | ||
|
@@ -169,6 +171,7 @@ echo "Copy over the manual + pygrass HTML pages:" | |
mkdir -p $TARGETHTMLDIR | ||
mkdir -p $TARGETHTMLDIR/addons # indeed only relevant the very first compile time | ||
# don't destroy the addons during update | ||
rm -rf /tmp/addons | ||
\mv $TARGETHTMLDIR/addons /tmp | ||
rm -f $TARGETHTMLDIR/*.* | ||
(cd $TARGETHTMLDIR ; rm -rf barscales colortables icons northarrows) | ||
|
@@ -325,12 +328,58 @@ export VERSION_NUMBER=$DOTVERSION | |
python3 $GRASSBUILDDIR/man/build_keywords.py $TARGETMAIN/grass$GMAJOR$GMINOR/manuals/ $TARGETMAIN/grass$GMAJOR$GMINOR/manuals/addons/ | ||
unset ARCH ARCH_DISTDIR GISBASE VERSION_NUMBER | ||
|
||
############################################ | ||
# Cloning new manual pages into grass-stable/manuals/ (following the Python manual pages concept) | ||
# - inject canonical URL therein to point to versioned manual page (avoiding "duplicate content" SEO punishment) | ||
# see https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls | ||
|
||
TARGETHTMLDIRSTABLE=$TARGETMAIN/grass-stable/manuals/ | ||
mkdir -p $TARGETHTMLDIRSTABLE $TARGETHTMLDIRSTABLE/addons | ||
# cleanup from previous run | ||
rm -rf /tmp/addons | ||
\mv $TARGETHTMLDIRSTABLE/addons /tmp | ||
rm -f $TARGETHTMLDIRSTABLE/*.* | ||
(cd $TARGETHTMLDIRSTABLE ; rm -rf barscales colortables icons northarrows) | ||
# clone manual pages | ||
cp -rp $TARGETHTMLDIR/* $TARGETHTMLDIRSTABLE/ | ||
|
||
############################################ | ||
# SEO: inject canonical link into versioned manual pages (e.g, grass84/) | ||
# - cd back into folder of versioned HTML manual pages | ||
# - run sed to replace an existing HTML header string in the upper part of the HTML file | ||
# with itself + canonical link of stable version | ||
# --> do this for core manual pages, addons, libpython, recursively | ||
|
||
process_files() { | ||
local dir="$1" | ||
local prefix="$2" | ||
|
||
find "$dir" -type f -name '*.html' | while IFS= read -r myfile; do | ||
if ! grep -q 'link rel="canonical"' "$myfile"; then | ||
manpage="$prefix$(basename ${myfile})" | ||
sed -i -e "s:</head>:<link rel=\"canonical\" href=\"https\://grass.osgeo.org/grass-stable/manuals/$manpage\">\n</head>:g" ${myfile} | ||
fi | ||
done | ||
} | ||
|
||
cd "$TARGETHTMLDIR" | ||
process_files "$TARGETHTMLDIR" "" | ||
process_files "$TARGETHTMLDIR/addons" "addons/" | ||
process_files "$TARGETHTMLDIR/libpython" "libpython/" | ||
|
||
# SEO: "stable" manual pages (grass-stable/) is canonical link | ||
|
||
############################################ | ||
# create sitemaps to expand the hugo sitemap | ||
|
||
# versioned manual: | ||
python3 $HOME/src/grass$GMAJOR-addons/utils/create_manuals_sitemap.py --dir=/var/www/code_and_data/grass$GMAJOR$GMINOR/manuals/ --url=https://grass.osgeo.org/grass$GMAJOR$GMINOR/manuals/ -o | ||
python3 $HOME/src/grass$GMAJOR-addons/utils/create_manuals_sitemap.py --dir=/var/www/code_and_data/grass$GMAJOR$GMINOR/manuals/addons/ --url=https://grass.osgeo.org/grass$GMAJOR$GMINOR/manuals/addons/ -o | ||
|
||
# grass-stable manual: | ||
python3 $HOME/src/grass$GMAJOR-addons/utils/create_manuals_sitemap.py --dir=/var/www/code_and_data/grass-stable/manuals/ --url=https://grass.osgeo.org/grass-stable/manuals/ -o | ||
python3 $HOME/src/grass$GMAJOR-addons/utils/create_manuals_sitemap.py --dir=/var/www/code_and_data/grass-stable/manuals/addons/ --url=https://grass.osgeo.org/grass-stable/manuals/addons/ -o | ||
|
||
############################################ | ||
# cleanup | ||
cd $GRASSBUILDDIR | ||
|
@@ -339,9 +388,10 @@ rm -rf lib/html/ lib/latex/ /tmp/addons | |
|
||
echo "Finished GRASS $VERSION $ARCH compilation." | ||
echo "Written to: $TARGETDIR" | ||
echo "Copied HTML ${GVERSION} manual to https://grass.osgeo.org/grass${VERSION}/manuals/" | ||
echo "Copied pygrass progman ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/libpython/" | ||
echo "Copied Addons ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/addons/" | ||
echo "Copied HTML ${GVERSION} manual to https://grass.osgeo.org/grass${VERSION}/manuals/ (with canonical in metadata)" | ||
echo "Copied pygrass progman ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/libpython/ (with canonical in metadata)" | ||
echo "Copied Addons ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/addons/ (with canonical in metadata)" | ||
## echo "Copied HTML ${GVERSION} progman to https://grass.osgeo.org/programming${GVERSION}" | ||
echo "Copied HTML stable manual to https://grass.osgeo.org/grass-stable/manuals/" | ||
|
||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
#!/bin/sh | ||
#!/bin/bash | ||
|
||
# script to build GRASS GIS legacy binaries + addons from the `releasebranch_7_8` branch | ||
# (c) 2008-2024, GPL 2+ Markus Neteler <[email protected]> | ||
|
@@ -11,11 +11,10 @@ | |
# - configures source code and then compiles it | ||
# - packages the binaries | ||
# - generated the install scripts | ||
# - generates the pyGRASS 7 HTML manual | ||
# - generates the user 7 HTML manuals | ||
# - generates the user legacy 7 HTML manuals | ||
# - injects DuckDuckGo search field | ||
# - injects "G8.x is the new version" box into core and addon manual pages | ||
# - injects canonical URL | ||
# - injects in versioned manual the "canonical" to point to "stable" manual (as seen in the Python manual pages) | ||
|
||
# Preparations, on server (neteler@grasslxd:$): | ||
# - install dependencies: | ||
|
@@ -171,6 +170,7 @@ echo "Copy over the manual + pygrass HTML pages:" | |
mkdir -p $TARGETHTMLDIR | ||
mkdir -p $TARGETHTMLDIR/addons # indeed only relevant the very first compile time | ||
# don't destroy the addons during update | ||
rm -rf /tmp/addons | ||
\mv $TARGETHTMLDIR/addons /tmp | ||
rm -f $TARGETHTMLDIR/*.* | ||
(cd $TARGETHTMLDIR ; rm -rf barscales colortables icons northarrows) | ||
|
@@ -190,7 +190,7 @@ cp -p AUTHORS CITING COPYING GPL.TXT INSTALL REQUIREMENTS.html $TARGETDIR/ | |
(cd $GRASSBUILDDIR/ ; $MYMAKE cleansphinx) | ||
|
||
############ | ||
# generate doxygen programmers's G8 manual | ||
# generate doxygen programmers's G7 manual | ||
## -> no, only in GRASS GIS 8 versions | ||
|
||
##### generate i18N stats for HTML page path: | ||
|
@@ -302,32 +302,77 @@ unset ARCH ARCH_DISTDIR GISBASE VERSION_NUMBER | |
# - cd into folder of HTML manual pages | ||
# - run sed to replace an existing HTML string in the upper part of the HTML file | ||
# with itself + the red box pointing to the respective stable version manual page | ||
# --> do this for core manual pages, addons, libpython | ||
## | ||
# for core manual pages | ||
# --> do this for core manual pages, addons, libpython, recursively | ||
|
||
# red box for outdated manual pages | ||
echo "Injecting G8.x new current version hint in a red box into MAN pages..." | ||
# inject G8.x current stable version hint in a red box: | ||
(cd $TARGETHTMLDIR/ ; for myfile in `grep -L 'document is for an older version of GRASS GIS' *.html` ; do sed -i -e "s:<div id=\"container\">:<div id=\"container\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that will be discontinued soon. You should upgrade, and read the <a href=\"../../../grass${NEW_CURRENT}/manuals/$myfile\">current manual page</a>.</p>:g" $myfile ; done) | ||
# also for addons, separately for landing page and addons | ||
(cd $TARGETHTMLDIR/addons/ ; sed -i -e "s:<table><tr><td>:<hr class=\"header\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that will be discontinued soon. You should upgrade, and read the <a href=\"../../../grass${NEW_CURRENT}/manuals/addons/index.html\">current addon manual page</a>.</p> <table><tr><td>:g" index.html) | ||
(cd $TARGETHTMLDIR/addons/ ; for myfile in `grep -L 'document is for an older version of GRASS GIS' *.html` ; do sed -i -e "s:<div id=\"container\">:<div id=\"container\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that will be discontinued soon. You should upgrade, and read the <a href=\"../../../grass${NEW_CURRENT}/manuals/addons/$myfile\">current manual page</a>.</p>:g" $myfile ; done) | ||
# also for Python | ||
(cd $TARGETHTMLDIR/libpython/ ; for myfile in `grep -L 'document is for an older version of GRASS GIS' *.html` ; do sed -i -e "s:^<hr class=\"header\">:<hr class=\"header\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that will be discontinued soon. You should upgrade, and read the <a href=\"../../../../grass${NEW_CURRENT}/manuals/libpython/$myfile\">current Python manual page</a>.</p>:g" $myfile ; done) | ||
|
||
# SEO: inject canonical link into all (old) manual pages to point to latest stable (avoid "duplicate content" SEO punishment) | ||
process_files() { | ||
local dir="$1" | ||
local prefix="$2" | ||
|
||
find "$dir" -type f -name '*.html' | while IFS= read -r myfile; do | ||
if ! grep -q 'document is for an older version of GRASS GIS' "$myfile"; then | ||
manpage="$prefix$(basename ${myfile})" | ||
sed -i -e "s:<div id=\"container\">:<div id=\"container\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that has been discontinued. You should upgrade, and read the <a href=\"../../../grass-stable/manuals/$manpage\">current manual page</a>.</p>:g" ${myfile} | ||
fi | ||
done | ||
} | ||
|
||
cd "$TARGETHTMLDIR" | ||
process_files "$TARGETHTMLDIR" "" | ||
process_files "$TARGETHTMLDIR/addons" "addons/" | ||
|
||
# also into addons landing page, separately due to different structure | ||
(cd $TARGETHTMLDIR/addons/ ; | ||
sed -i -e "s:<table><tr><td>:<hr class=\"header\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that has been discontinued. You should upgrade, and read the <a href=\"../../../grass-stable/manuals/addons/index.html\">current addon manual page</a>.</p> <table><tr><td>:g" index.html | ||
) | ||
|
||
# also into libpython pages, separately due to different structure | ||
# red box for outdated libpython manual pages | ||
echo "Injecting G8.x new current version hint in a red box into libpython MAN pages..." | ||
process_files() { | ||
local dir="$1" | ||
local prefix="$2" | ||
|
||
find "$dir" -type f -name '*.html' | while IFS= read -r myfile; do | ||
if ! grep -q 'document is for an older version of GRASS GIS' "$myfile"; then | ||
manpage="$prefix$(basename ${myfile})" | ||
sed -i -e "s:^<hr class=\"header\">:<hr class=\"header\"><p style=\"border\:3px; border-style\:solid; border-color\:#BC1818; padding\: 1em;\">Note\: This document is for an older version of GRASS GIS that has been discontinued. You should upgrade, and read the <a href=\"../../../../grass-stable/manuals/$manpage\">current Python library manual page</a>.</p>:g" ${myfile} | ||
fi | ||
done | ||
} | ||
|
||
process_files "$TARGETHTMLDIR/libpython" "libpython/" | ||
|
||
############################################ | ||
# SEO: inject canonical link into all (old) versioned manual pages to point to grass-stable (avoid "duplicate content" SEO punishment) | ||
# see https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls | ||
# - cd into folder of HTML manual pages | ||
# - cd back into folder of versioned HTML manual pages | ||
# - run sed to replace an existing HTML header string in the upper part of the HTML file | ||
# with itself + canonical link of stable version | ||
# --> do this for core manual pages, addons, libpython | ||
(cd $TARGETHTMLDIR/ ; for myfile in `grep -L 'link rel="canonical"' *.html` ; do sed -i -e "s:</head>:<link rel=\"canonical\" href=\"https\://grass.osgeo.org/grass${NEW_CURRENT}/manuals/$myfile\">\n</head>:g" $myfile ; done) | ||
(cd $TARGETHTMLDIR/addons/ ; for myfile in `grep -L 'link rel="canonical"' *.html` ; do sed -i -e "s:</head>:<link rel=\"canonical\" href=\"https\://grass.osgeo.org/grass${NEW_CURRENT}/manuals/addons/$myfile\">\n</head>:g" $myfile ; done) | ||
(cd $TARGETHTMLDIR/libpython/ ; for myfile in `grep -L 'link rel="canonical"' *.html` ; do sed -i -e "s:</head>:<link rel=\"canonical\" href=\"https\://grass.osgeo.org/grass${NEW_CURRENT}/manuals/libpython/$myfile\">\n</head>:g" $myfile ; done) | ||
# --> do this for core manual pages, addons, libpython, recursively | ||
|
||
process_files() { | ||
local dir="$1" | ||
local prefix="$2" | ||
|
||
find "$dir" -type f -name '*.html' | while IFS= read -r myfile; do | ||
if ! grep -q 'link rel="canonical"' "$myfile"; then | ||
manpage="$prefix$(basename ${myfile})" | ||
sed -i -e "s:</head>:<link rel=\"canonical\" href=\"https\://grass.osgeo.org/grass-stable/manuals/$manpage\">\n</head>:g" ${myfile} | ||
fi | ||
done | ||
} | ||
|
||
cd "$TARGETHTMLDIR" | ||
process_files "$TARGETHTMLDIR" "" | ||
process_files "$TARGETHTMLDIR/addons" "addons/" | ||
process_files "$TARGETHTMLDIR/libpython" "libpython/" | ||
|
||
############################################ | ||
# create sitemaps to expand the hugo sitemap | ||
# create local sitemap | ||
|
||
# versioned manual: | ||
python3 $HOME/src/grass$GMAJOR-addons/utils/create_manuals_sitemap.py --dir=/var/www/code_and_data/grass$GMAJOR$GMINOR/manuals/ --url=https://grass.osgeo.org/grass$GMAJOR$GMINOR/manuals/ -o | ||
python3 $HOME/src/grass$GMAJOR-addons/utils/create_manuals_sitemap.py --dir=/var/www/code_and_data/grass$GMAJOR$GMINOR/manuals/addons/ --url=https://grass.osgeo.org/grass$GMAJOR$GMINOR/manuals/addons/ -o | ||
|
||
|
@@ -339,8 +384,8 @@ rm -rf lib/html/ lib/latex/ /tmp/addons | |
|
||
echo "Finished GRASS $VERSION $ARCH compilation." | ||
echo "Written to: $TARGETDIR" | ||
echo "Copied HTML ${GVERSION} manual to https://grass.osgeo.org/grass${VERSION}/manuals/" | ||
echo "Copied pygrass progman ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/libpython/" | ||
echo "Copied Addons ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/addons/" | ||
echo "Copied HTML ${GVERSION} manual to https://grass.osgeo.org/grass${VERSION}/manuals/ (with canonical in metadata)" | ||
echo "Copied pygrass progman ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/libpython/ (with canonical in metadata)" | ||
echo "Copied Addons ${GVERSION} to https://grass.osgeo.org/grass${VERSION}/manuals/addons/ (with canonical in metadata)" | ||
|
||
exit 0 |
Oops, something went wrong.