-
Notifications
You must be signed in to change notification settings - Fork 924
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4603 from galaxyproject/matrixbot-galaxyhelp
Add UseGalaxy Help Forum to Matrix channel Bot
- Loading branch information
Showing
2 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: "[Cron] Send Single-cell Galaxy Help Topics to Matrix CoP Room" | ||
|
||
on: | ||
schedule: | ||
# * is a special character in YAML so you have to quote this string | ||
# We'll run this every monday at noon | ||
- cron: '0 12 * * 1' | ||
workflow_dispatch: | ||
|
||
jobs: | ||
runner-job: | ||
if: github.repository_owner == 'galaxyproject' | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v2 | ||
with: | ||
fetch-depth: 0 | ||
|
||
# BEGIN Dependencies | ||
- name: Install dependencies | ||
run: | | ||
sudo apt-get update | ||
sudo apt-get install -y coreutils curl libxml2-utils gawk sed grep jq wget | ||
# END Dependencies | ||
|
||
- name: Send announcements to single-cell matrix for unanswered posts | ||
run: | | ||
wget https://raw.githubusercontent.com/galaxyproject/training-material/main/bin/galaxy-help-news.sh | ||
HTML_TYPE="bullets" MAX_REPLIES="0" WANTED_TAGS="scrna scrna-seq" ROOM_ID='!TJRLNvfcbWbSRoUNpl:matrix.org' bash ./galaxy-help-news.sh | ||
env: | ||
MATRIX_ACCESS_TOKEN: ${{ secrets.matrix_access_token }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
#!/bin/bash | ||
|
||
MATRIX_SERVER=${MATRIX_SERVER:-"https://matrix.org"} | ||
ROOM_ID=${ROOM_ID:-'!TJRLNvfcbWbSRoUNpl:matrix.org'} ## GTN Single Cell Maintainers | ||
WANTED_TAGS=${WANTED_TAGS:-"scrna scrna-seq"} | ||
MAX_REPLIES=${MAX_REPLIES:-1} | ||
HTML_TYPE=${HTML_TYPE:-"bullets"} ## "table" | ||
|
||
## Result filters | ||
OPTS=${OPTS:-"?ascending=true&order=activity"} | ||
|
||
if [ -z "$MATRIX_ACCESS_TOKEN" ]; then | ||
echo " | ||
This is a Matrix bot that scrapes Galaxy Help for certain tags and posts to | ||
a Room for topics that have less than X replies. Run this maybe once a month. | ||
Example Usage: | ||
MATRIX_ACCESS_TOKEN='123_123_123' \\ | ||
MATRIX_SERVER='https://matrix.org' \\ | ||
ROOM_ID='!123_132_123:matrix.org' \\ | ||
WANTED_TAGS='tag1 tag2' \\ | ||
MAX_REPLIES=1 \\ | ||
HTML_TYPE='bullets' \\ | ||
bash $0 | ||
Where: | ||
MATRIX_ACCESS_TOKEN Can be found in your Matrix profile under | ||
'All settings' -> 'Help & About' -> 'Access Token' | ||
MATRIX_SERVER The name or base address of the Matrix server to | ||
post to. Default is '$MATRIX_SERVER' | ||
ROOM_ID The Room ID can be found in the URL of the room | ||
usually following format '!123123123:matrix.org'. | ||
Default is '$ROOM_ID' | ||
NOTE: Single quotes are very important here. | ||
WANTED_TAGS A space separated list of valid tags to find posts | ||
at https://help.galaxyproject.org/ | ||
Default is \"$WANTED_TAGS\" | ||
MAX_REPLIES Filter for posts that have less than or equal to | ||
this many replies. Default is \"$MAX_REPLIES\" | ||
HTML_TYPE Render either a 'table' or 'bullets'. HTML tables | ||
look great in the browser but don't render well on | ||
mobile. Default is \"$HTML_TYPE\" | ||
OPTS Extra arguments to append to help.galaxyproject.org | ||
URL. Default is \"$OPTS\" | ||
" >&2 | ||
exit 255 | ||
fi | ||
|
||
function tag_to_tsv { | ||
## For a given TAG, fetch from the help forum, extract and parse | ||
## the table and produce a 4-column TSV output of Link, Title, | ||
## Replies, Views. | ||
## | ||
## TODO: Add date too? | ||
local tag="$1" | ||
curl -s "https://help.galaxyproject.org/tag/${tag}${OPTS}" \ | ||
| xmllint --noblanks --html --xpath '//tr[@class="topic-list-item"]/td/span' - 2>/dev/null \ | ||
| sed -r 's|<span class[^>]+>||; s|</span>||; s|\s*<a.*href=\"([^\"]*)\" [^>]+>([^<]+)<.*|_ROW_\1\t\2|' \ | ||
| tr '\n' '\t' \ | ||
| sed -r 's|\s\s\s*|\t|g; s|_ROW_|\n|g' | ||
} | ||
|
||
function alltags_to_tsv { | ||
## For all wanted tags, populate a 4-column TSV output of Link, | ||
## Title, Replies, Views, and return the path of the table. | ||
local fetch_tags=$WANTED_TAGS | ||
local tmp_tsv | ||
tmp_tsv=$(mktemp --suffix=".tsv") | ||
for tag in ${fetch_tags}; do | ||
tag_to_tsv "$tag" >> "$tmp_tsv"; | ||
done | ||
## No duplicates, no blanks, no duplicate delimiters, | ||
## and sort by ascending reply count | ||
grep -v "^\s*$" "${tmp_tsv}" | sed 's|\t\t|\t|g' \ | ||
| sort | uniq | sort -t $'\t' -nk 3 > "${tmp_tsv}".temp | ||
echo -e "Link\tTitle\tReplies\tViews" > "${tmp_tsv}" | ||
cat "${tmp_tsv}".temp >> "${tmp_tsv}" | ||
rm "${tmp_tsv}".temp | ||
echo "${tmp_tsv}" | ||
} | ||
|
||
function filter_tsv { | ||
## Filter a TSV file for maximum replies and then return the path | ||
## of the new filtered table | ||
local tsv="$1" | ||
local tmp_tsv | ||
tmp_tsv=$(mktemp --suffix=".tsv") | ||
awk -F$'\t' -v replies="$MAX_REPLIES" '$3 <= replies' "${tsv}" > "${tmp_tsv}" | ||
echo "${tmp_tsv}" | ||
} | ||
|
||
function tsv_to_html { | ||
## Convert a TSV table into HTML text that can be fed into a JSON | ||
local tsv="$1" | ||
if [ "$HTML_TYPE" = "table" ]; then | ||
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies ≤ ${MAX_REPLIES}" '\ | ||
BEGIN { print "<h1>Updates from Galaxy Help</h1>"subtitle"\n<table>\n<thead><tr><th>Topic</th><th>Replies</th><th>Views</th></tr></thead>\n<tbody>"} \ | ||
END { print "</tbody>\n</table>"} \ | ||
NR > 0 {print "<tr><td><a href=\""$1"\">"$2"</a></td><td>"$3"</td><td>"$4"</td></tr>"}' \ | ||
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g' | ||
else ## bullets | ||
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies ≤ ${MAX_REPLIES}" '\ | ||
BEGIN { print "<h1>Updates from Galaxy Help</h1><br/><p>"subtitle"</p><ol>\n"} \ | ||
END { print "\n</ol>"} \ | ||
NR > 0 {print "<li><a href=\""$1"\">"$2"</a><ul><li>Replies: "$3" and Views: "$4"</li></ul></li>"}' \ | ||
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g' | ||
fi | ||
} | ||
|
||
function tsv_to_markdown { | ||
## Convert a TSV table into Markdown text that can be fed into a JSON | ||
local tsv="$1" | ||
awk -F$'\t' -v subtitle="Recent posts matching: **${WANTED_TAGS}**, with replies ≤ ${MAX_REPLIES}" '\ | ||
BEGIN { print "## Updates from Galaxy Help\\n***"subtitle"***\\n"} \ | ||
NR > 0 {print "* ["$2"]("$1")\\n * "$3" replies and "$4" views\\n"}' \ | ||
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g' | ||
} | ||
|
||
function md_and_html_to_json { | ||
## Stuff the Markdown and HTML text content into a JSON. | ||
local md_text="$1" | ||
local html_text="$2" | ||
local tmp_json; | ||
tmp_json=$(mktemp --suffix=".json") | ||
## See: https://spec.matrix.org/legacy/r0.0.0/client_server.html | ||
## "put-matrix-client-r0-rooms-roomid-send-eventtype-txnid" | ||
echo "{\ | ||
\"msgtype\":\"m.notice\", \ | ||
\"format\":\"org.matrix.custom.html\", \ | ||
\"body\": \"${md_text}\", \ | ||
\"formatted_body\": \"${html_text}\"}" > "${tmp_json}" | ||
echo "${tmp_json}" | ||
} | ||
|
||
function post_json_to_matrix { | ||
local json_file="$1" | ||
local txnid post_url | ||
txnid=$(date "+%Y%m%d%H%M${RANDOM:1:3}") ## date-specific transaction ID | ||
MATRIX_SERVER=${MATRIX_SERVER%/} ## remove trailing slash, if any | ||
## Build curl | ||
post_url="${MATRIX_SERVER}/_matrix/client/r0/rooms/" | ||
post_url="${post_url}"${ROOM_ID}"/send/m.room.message/${txnid}" | ||
post_url="${post_url}?access_token=${MATRIX_ACCESS_TOKEN}" | ||
## DEBUG: | ||
## - curl "$post_url" -X PUT --data '{"msgtype":"m.text","body":"hello"}' | ||
curl "$post_url" -X PUT --data "$(cat ${json_file})" | ||
} | ||
|
||
function sanity_check { | ||
## Assert that required binaries are in PATH | ||
local required_progs=( cat curl xmllint awk sed grep tr jq ) | ||
local miss="" | ||
for prog in "${required_progs[@]}"; do | ||
if ! which "${prog}" 2>/dev/null >&2; then | ||
miss="$miss $prog" | ||
fi | ||
done | ||
if [ "$miss" != "" ]; then | ||
echo "Cannot run without:$miss" | ||
exit 255 | ||
fi | ||
} | ||
|
||
## MAIN ## | ||
sanity_check | ||
|
||
main_tsv=$(filter_tsv "$(alltags_to_tsv)" ) | ||
if [[ $(wc -l < "${main_tsv}") == 0 ]]; then | ||
echo "Nothing new to post, aborting." >&2 | ||
exit 0 | ||
fi | ||
|
||
main_mdwn_text=$(tsv_to_markdown "${main_tsv}") | ||
main_html_text=$(tsv_to_html "${main_tsv}") | ||
|
||
main_json_file=$(md_and_html_to_json "${main_mdwn_text}" "${main_html_text}") | ||
if ! jq < "${main_json_file}" 2> /dev/null >&2; then | ||
echo "This is not a valid JSON, aborting." >&2 | ||
echo "See: ${main_json_file}" >&2 | ||
exit 255 | ||
fi | ||
|
||
post_json_to_matrix "${main_json_file}" |