diff --git a/.github/workflows/scrna-galaxyhelp-matrix.yml b/.github/workflows/scrna-galaxyhelp-matrix.yml new file mode 100644 index 00000000000000..27973cb87c60cc --- /dev/null +++ b/.github/workflows/scrna-galaxyhelp-matrix.yml @@ -0,0 +1,32 @@ +name: "[Cron] Send Single-cell Galaxy Help Topics to Matrix CoP Room" + +on: + schedule: + # * is a special character in YAML so you have to quote this string + # We'll run this every monday at noon + - cron: '0 12 * * 1' + workflow_dispatch: + +jobs: + runner-job: + if: github.repository_owner == 'galaxyproject' + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + # BEGIN Dependencies + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y coreutils curl libxml2-utils gawk sed grep jq wget + # END Dependencies + + - name: Send announcements to single-cell matrix for unanswered posts + run: | + wget https://raw.githubusercontent.com/galaxyproject/training-material/main/bin/galaxy-help-news.sh + HTML_TYPE="bullets" MAX_REPLIES="0" WANTED_TAGS="scrna scrna-seq" ROOM_ID='!TJRLNvfcbWbSRoUNpl:matrix.org' bash ./galaxy-help-news.sh + env: + MATRIX_ACCESS_TOKEN: ${{ secrets.matrix_access_token }} diff --git a/bin/galaxy-help-news.sh b/bin/galaxy-help-news.sh new file mode 100644 index 00000000000000..9e96a5415fff60 --- /dev/null +++ b/bin/galaxy-help-news.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +MATRIX_SERVER=${MATRIX_SERVER:-"https://matrix.org"} +ROOM_ID=${ROOM_ID:-'!TJRLNvfcbWbSRoUNpl:matrix.org'} ## GTN Single Cell Maintainers +WANTED_TAGS=${WANTED_TAGS:-"scrna scrna-seq"} +MAX_REPLIES=${MAX_REPLIES:-1} +HTML_TYPE=${HTML_TYPE:-"bullets"} ## "table" + +## Result filters +OPTS=${OPTS:-"?ascending=true&order=activity"} + +if [ -z "$MATRIX_ACCESS_TOKEN" ]; then + echo " +This is a Matrix bot that scrapes Galaxy Help for certain tags and posts to +a Room for topics that have less than X replies. Run this maybe once a month. + +Example Usage: + + MATRIX_ACCESS_TOKEN='123_123_123' \\ + MATRIX_SERVER='https://matrix.org' \\ + ROOM_ID='!123_132_123:matrix.org' \\ + WANTED_TAGS='tag1 tag2' \\ + MAX_REPLIES=1 \\ + HTML_TYPE='bullets' \\ + bash $0 + +Where: + MATRIX_ACCESS_TOKEN Can be found in your Matrix profile under + 'All settings' -> 'Help & About' -> 'Access Token' + + MATRIX_SERVER The name or base address of the Matrix server to + post to. Default is '$MATRIX_SERVER' + + ROOM_ID The Room ID can be found in the URL of the room + usually following format '!123123123:matrix.org'. + Default is '$ROOM_ID' + NOTE: Single quotes are very important here. + + WANTED_TAGS A space separated list of valid tags to find posts + at https://help.galaxyproject.org/ + Default is \"$WANTED_TAGS\" + + MAX_REPLIES Filter for posts that have less than or equal to + this many replies. Default is \"$MAX_REPLIES\" + + HTML_TYPE Render either a 'table' or 'bullets'. HTML tables + look great in the browser but don't render well on + mobile. Default is \"$HTML_TYPE\" + + OPTS Extra arguments to append to help.galaxyproject.org + URL. Default is \"$OPTS\" + " >&2 + exit 255 +fi + +function tag_to_tsv { + ## For a given TAG, fetch from the help forum, extract and parse + ## the table and produce a 4-column TSV output of Link, Title, + ## Replies, Views. + ## + ## TODO: Add date too? + local tag="$1" + curl -s "https://help.galaxyproject.org/tag/${tag}${OPTS}" \ + | xmllint --noblanks --html --xpath '//tr[@class="topic-list-item"]/td/span' - 2>/dev/null \ + | sed -r 's|]+>||; s|||; s|\s*]+>([^<]+)<.*|_ROW_\1\t\2|' \ + | tr '\n' '\t' \ + | sed -r 's|\s\s\s*|\t|g; s|_ROW_|\n|g' +} + +function alltags_to_tsv { + ## For all wanted tags, populate a 4-column TSV output of Link, + ## Title, Replies, Views, and return the path of the table. + local fetch_tags=$WANTED_TAGS + local tmp_tsv + tmp_tsv=$(mktemp --suffix=".tsv") + for tag in ${fetch_tags}; do + tag_to_tsv "$tag" >> "$tmp_tsv"; + done + ## No duplicates, no blanks, no duplicate delimiters, + ## and sort by ascending reply count + grep -v "^\s*$" "${tmp_tsv}" | sed 's|\t\t|\t|g' \ + | sort | uniq | sort -t $'\t' -nk 3 > "${tmp_tsv}".temp + echo -e "Link\tTitle\tReplies\tViews" > "${tmp_tsv}" + cat "${tmp_tsv}".temp >> "${tmp_tsv}" + rm "${tmp_tsv}".temp + echo "${tmp_tsv}" +} + +function filter_tsv { + ## Filter a TSV file for maximum replies and then return the path + ## of the new filtered table + local tsv="$1" + local tmp_tsv + tmp_tsv=$(mktemp --suffix=".tsv") + awk -F$'\t' -v replies="$MAX_REPLIES" '$3 <= replies' "${tsv}" > "${tmp_tsv}" + echo "${tmp_tsv}" +} + +function tsv_to_html { + ## Convert a TSV table into HTML text that can be fed into a JSON + local tsv="$1" + if [ "$HTML_TYPE" = "table" ]; then + awk -F$'\t' -v subtitle="Recent posts matching: ${WANTED_TAGS}, with replies ≤ ${MAX_REPLIES}" '\ +BEGIN { print "

Updates from Galaxy Help

"subtitle"\n\n\n"} \ +END { print "\n
TopicRepliesViews
"} \ +NR > 0 {print ""$2""$3""$4""}' \ + "${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g' + else ## bullets + awk -F$'\t' -v subtitle="Recent posts matching: ${WANTED_TAGS}, with replies ≤ ${MAX_REPLIES}" '\ +BEGIN { print "

Updates from Galaxy Help


"subtitle"

    \n"} \ +END { print "\n
"} \ +NR > 0 {print "
  • "$2"
  • "}' \ + "${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g' + fi +} + +function tsv_to_markdown { + ## Convert a TSV table into Markdown text that can be fed into a JSON + local tsv="$1" + awk -F$'\t' -v subtitle="Recent posts matching: **${WANTED_TAGS}**, with replies ≤ ${MAX_REPLIES}" '\ +BEGIN { print "## Updates from Galaxy Help\\n***"subtitle"***\\n"} \ +NR > 0 {print "* ["$2"]("$1")\\n * "$3" replies and "$4" views\\n"}' \ + "${tsv}" | tr '\n' ' ' | sed 's|"|\\"|g' +} + +function md_and_html_to_json { + ## Stuff the Markdown and HTML text content into a JSON. + local md_text="$1" + local html_text="$2" + local tmp_json; + tmp_json=$(mktemp --suffix=".json") + ## See: https://spec.matrix.org/legacy/r0.0.0/client_server.html + ## "put-matrix-client-r0-rooms-roomid-send-eventtype-txnid" + echo "{\ +\"msgtype\":\"m.notice\", \ +\"format\":\"org.matrix.custom.html\", \ +\"body\": \"${md_text}\", \ +\"formatted_body\": \"${html_text}\"}" > "${tmp_json}" + echo "${tmp_json}" +} + +function post_json_to_matrix { + local json_file="$1" + local txnid post_url + txnid=$(date "+%Y%m%d%H%M${RANDOM:1:3}") ## date-specific transaction ID + MATRIX_SERVER=${MATRIX_SERVER%/} ## remove trailing slash, if any + ## Build curl + post_url="${MATRIX_SERVER}/_matrix/client/r0/rooms/" + post_url="${post_url}"${ROOM_ID}"/send/m.room.message/${txnid}" + post_url="${post_url}?access_token=${MATRIX_ACCESS_TOKEN}" + ## DEBUG: + ## - curl "$post_url" -X PUT --data '{"msgtype":"m.text","body":"hello"}' + curl "$post_url" -X PUT --data "$(cat ${json_file})" +} + +function sanity_check { + ## Assert that required binaries are in PATH + local required_progs=( cat curl xmllint awk sed grep tr jq ) + local miss="" + for prog in "${required_progs[@]}"; do + if ! which "${prog}" 2>/dev/null >&2; then + miss="$miss $prog" + fi + done + if [ "$miss" != "" ]; then + echo "Cannot run without:$miss" + exit 255 + fi +} + +## MAIN ## +sanity_check + +main_tsv=$(filter_tsv "$(alltags_to_tsv)" ) +if [[ $(wc -l < "${main_tsv}") == 0 ]]; then + echo "Nothing new to post, aborting." >&2 + exit 0 +fi + +main_mdwn_text=$(tsv_to_markdown "${main_tsv}") +main_html_text=$(tsv_to_html "${main_tsv}") + +main_json_file=$(md_and_html_to_json "${main_mdwn_text}" "${main_html_text}") +if ! jq < "${main_json_file}" 2> /dev/null >&2; then + echo "This is not a valid JSON, aborting." >&2 + echo "See: ${main_json_file}" >&2 + exit 255 +fi + +post_json_to_matrix "${main_json_file}"