Skip to content

Commit

Permalink
Merge pull request #4603 from galaxyproject/matrixbot-galaxyhelp
Browse files Browse the repository at this point in the history
Add UseGalaxy Help Forum to Matrix channel Bot
  • Loading branch information
hexylena authored Dec 20, 2023
2 parents 61611ee + 52c5fff commit 3cb7dcc
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 0 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/scrna-galaxyhelp-matrix.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: "[Cron] Send Single-cell Galaxy Help Topics to Matrix CoP Room"

on:
schedule:
# * is a special character in YAML so you have to quote this string
# We'll run this every monday at noon
- cron: '0 12 * * 1'
workflow_dispatch:

jobs:
runner-job:
if: github.repository_owner == 'galaxyproject'
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

# BEGIN Dependencies
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y coreutils curl libxml2-utils gawk sed grep jq wget
# END Dependencies

- name: Send announcements to single-cell matrix for unanswered posts
run: |
wget https://raw.githubusercontent.com/galaxyproject/training-material/main/bin/galaxy-help-news.sh
HTML_TYPE="bullets" MAX_REPLIES="0" WANTED_TAGS="scrna scrna-seq" ROOM_ID='!TJRLNvfcbWbSRoUNpl:matrix.org' bash ./galaxy-help-news.sh
env:
MATRIX_ACCESS_TOKEN: ${{ secrets.matrix_access_token }}
190 changes: 190 additions & 0 deletions bin/galaxy-help-news.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
#!/bin/bash

MATRIX_SERVER=${MATRIX_SERVER:-"https://matrix.org"}
ROOM_ID=${ROOM_ID:-'!TJRLNvfcbWbSRoUNpl:matrix.org'} ## GTN Single Cell Maintainers
WANTED_TAGS=${WANTED_TAGS:-"scrna scrna-seq"}
MAX_REPLIES=${MAX_REPLIES:-1}
HTML_TYPE=${HTML_TYPE:-"bullets"} ## "table"

## Result filters
OPTS=${OPTS:-"?ascending=true&order=activity"}

if [ -z "$MATRIX_ACCESS_TOKEN" ]; then
echo "
This is a Matrix bot that scrapes Galaxy Help for certain tags and posts to
a Room for topics that have less than X replies. Run this maybe once a month.
Example Usage:
MATRIX_ACCESS_TOKEN='123_123_123' \\
MATRIX_SERVER='https://matrix.org' \\
ROOM_ID='!123_132_123:matrix.org' \\
WANTED_TAGS='tag1 tag2' \\
MAX_REPLIES=1 \\
HTML_TYPE='bullets' \\
bash $0
Where:
MATRIX_ACCESS_TOKEN Can be found in your Matrix profile under
'All settings' -> 'Help & About' -> 'Access Token'
MATRIX_SERVER The name or base address of the Matrix server to
post to. Default is '$MATRIX_SERVER'
ROOM_ID The Room ID can be found in the URL of the room
usually following format '!123123123:matrix.org'.
Default is '$ROOM_ID'
NOTE: Single quotes are very important here.
WANTED_TAGS A space separated list of valid tags to find posts
at https://help.galaxyproject.org/
Default is \"$WANTED_TAGS\"
MAX_REPLIES Filter for posts that have less than or equal to
this many replies. Default is \"$MAX_REPLIES\"
HTML_TYPE Render either a 'table' or 'bullets'. HTML tables
look great in the browser but don't render well on
mobile. Default is \"$HTML_TYPE\"
OPTS Extra arguments to append to help.galaxyproject.org
URL. Default is \"$OPTS\"
" >&2
exit 255
fi

function tag_to_tsv {
## For a given TAG, fetch from the help forum, extract and parse
## the table and produce a 4-column TSV output of Link, Title,
## Replies, Views.
##
## TODO: Add date too?
local tag="$1"
curl -s "https://help.galaxyproject.org/tag/${tag}${OPTS}" \
| xmllint --noblanks --html --xpath '//tr[@class="topic-list-item"]/td/span' - 2>/dev/null \
| sed -r 's|<span class[^>]+>||; s|</span>||; s|\s*<a.*href=\"([^\"]*)\" [^>]+>([^<]+)<.*|_ROW_\1\t\2|' \
| tr '\n' '\t' \
| sed -r 's|\s\s\s*|\t|g; s|_ROW_|\n|g'
}

function alltags_to_tsv {
## For all wanted tags, populate a 4-column TSV output of Link,
## Title, Replies, Views, and return the path of the table.
local fetch_tags=$WANTED_TAGS
local tmp_tsv
tmp_tsv=$(mktemp --suffix=".tsv")
for tag in ${fetch_tags}; do
tag_to_tsv "$tag" >> "$tmp_tsv";
done
## No duplicates, no blanks, no duplicate delimiters,
## and sort by ascending reply count
grep -v "^\s*$" "${tmp_tsv}" | sed 's|\t\t|\t|g' \
| sort | uniq | sort -t $'\t' -nk 3 > "${tmp_tsv}".temp
echo -e "Link\tTitle\tReplies\tViews" > "${tmp_tsv}"
cat "${tmp_tsv}".temp >> "${tmp_tsv}"
rm "${tmp_tsv}".temp
echo "${tmp_tsv}"
}

function filter_tsv {
## Filter a TSV file for maximum replies and then return the path
## of the new filtered table
local tsv="$1"
local tmp_tsv
tmp_tsv=$(mktemp --suffix=".tsv")
awk -F$'\t' -v replies="$MAX_REPLIES" '$3 <= replies' "${tsv}" > "${tmp_tsv}"
echo "${tmp_tsv}"
}

function tsv_to_html {
## Convert a TSV table into HTML text that can be fed into a JSON
local tsv="$1"
if [ "$HTML_TYPE" = "table" ]; then
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies &le; ${MAX_REPLIES}" '\
BEGIN { print "<h1>Updates from Galaxy Help</h1>"subtitle"\n<table>\n<thead><tr><th>Topic</th><th>Replies</th><th>Views</th></tr></thead>\n<tbody>"} \
END { print "</tbody>\n</table>"} \
NR > 0 {print "<tr><td><a href=\""$1"\">"$2"</a></td><td>"$3"</td><td>"$4"</td></tr>"}' \
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
else ## bullets
awk -F$'\t' -v subtitle="Recent posts matching: <b>${WANTED_TAGS}</b>, with replies &le; ${MAX_REPLIES}" '\
BEGIN { print "<h1>Updates from Galaxy Help</h1><br/><p>"subtitle"</p><ol>\n"} \
END { print "\n</ol>"} \
NR > 0 {print "<li><a href=\""$1"\">"$2"</a><ul><li>Replies: "$3" and Views: "$4"</li></ul></li>"}' \
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
fi
}

function tsv_to_markdown {
## Convert a TSV table into Markdown text that can be fed into a JSON
local tsv="$1"
awk -F$'\t' -v subtitle="Recent posts matching: **${WANTED_TAGS}**, with replies ≤ ${MAX_REPLIES}" '\
BEGIN { print "## Updates from Galaxy Help\\n***"subtitle"***\\n"} \
NR > 0 {print "* ["$2"]("$1")\\n * "$3" replies and "$4" views\\n"}' \
"${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g'
}

function md_and_html_to_json {
## Stuff the Markdown and HTML text content into a JSON.
local md_text="$1"
local html_text="$2"
local tmp_json;
tmp_json=$(mktemp --suffix=".json")
## See: https://spec.matrix.org/legacy/r0.0.0/client_server.html
## "put-matrix-client-r0-rooms-roomid-send-eventtype-txnid"
echo "{\
\"msgtype\":\"m.notice\", \
\"format\":\"org.matrix.custom.html\", \
\"body\": \"${md_text}\", \
\"formatted_body\": \"${html_text}\"}" > "${tmp_json}"
echo "${tmp_json}"
}

function post_json_to_matrix {
local json_file="$1"
local txnid post_url
txnid=$(date "+%Y%m%d%H%M${RANDOM:1:3}") ## date-specific transaction ID
MATRIX_SERVER=${MATRIX_SERVER%/} ## remove trailing slash, if any
## Build curl
post_url="${MATRIX_SERVER}/_matrix/client/r0/rooms/"
post_url="${post_url}"${ROOM_ID}"/send/m.room.message/${txnid}"
post_url="${post_url}?access_token=${MATRIX_ACCESS_TOKEN}"
## DEBUG:
## - curl "$post_url" -X PUT --data '{"msgtype":"m.text","body":"hello"}'
curl "$post_url" -X PUT --data "$(cat ${json_file})"
}

function sanity_check {
## Assert that required binaries are in PATH
local required_progs=( cat curl xmllint awk sed grep tr jq )
local miss=""
for prog in "${required_progs[@]}"; do
if ! which "${prog}" 2>/dev/null >&2; then
miss="$miss $prog"
fi
done
if [ "$miss" != "" ]; then
echo "Cannot run without:$miss"
exit 255
fi
}

## MAIN ##
sanity_check

main_tsv=$(filter_tsv "$(alltags_to_tsv)" )
if [[ $(wc -l < "${main_tsv}") == 0 ]]; then
echo "Nothing new to post, aborting." >&2
exit 0
fi

main_mdwn_text=$(tsv_to_markdown "${main_tsv}")
main_html_text=$(tsv_to_html "${main_tsv}")

main_json_file=$(md_and_html_to_json "${main_mdwn_text}" "${main_html_text}")
if ! jq < "${main_json_file}" 2> /dev/null >&2; then
echo "This is not a valid JSON, aborting." >&2
echo "See: ${main_json_file}" >&2
exit 255
fi

post_json_to_matrix "${main_json_file}"

0 comments on commit 3cb7dcc

Please sign in to comment.