Skip to content

Commit

Permalink
Merge pull request #145 from nchiasson-dgi/feature/tagged-foxml-migra…
Browse files Browse the repository at this point in the history
…tions

DDST-552: Updating PR with latest from main
  • Loading branch information
nchiasson-dgi authored Sep 23, 2024
2 parents a94ac05 + 5a99f80 commit 459788e
Show file tree
Hide file tree
Showing 9 changed files with 895 additions and 117 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ source:
# validation of the URI in the destination may fail (presumably, naive URI
# validation inside of Drupal, expecting HTTP-like URLs)... or may just
# require a third slash to imply an empty "authority" component?
cache_counts: true
# XXX: For big sets of things, counting could take a substantial amount of
# time, so let's skip it.
skip_count: true
destination:
plugin: entity:file
validate: true
Expand Down
36 changes: 29 additions & 7 deletions modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,13 @@ source:
# to the vocab in which to do the things.
- '@_vid'
- plugin: flatten
- plugin: migration_lookup
- plugin: dgi_migrate.process.locking_migration_lookup
migration: dgis_stub_terms_generic
stub_id: dgis_stub_terms_generic
lock_context_keys:
dgis_stub_terms_generic:
- { offset: [3] }
- { offset: [1], hash: '##'}
extract: &generic_term_extract
plugin: dgi_migrate.process.single_extract
index: [actual]
Expand Down Expand Up @@ -74,6 +78,7 @@ process:
method: models
- plugin: skip_on_empty
method: row
message: 'Skipping; no model defined.'
title:
- plugin: dgi_migrate.subproperty
source: '@_node_foxml_parsed'
Expand Down Expand Up @@ -1476,6 +1481,10 @@ process:
type: record_information
process_values: true
values:
_mods_xpath:
# XXX: Copy here so it's available for further subprocessing.
- plugin: get
source: parent_row/dest/_mods_xpath
field_description_standard:
- << : *nested_mods_node
query: 'mods:descriptionStandard'
Expand All @@ -1485,13 +1494,21 @@ process:
- plugin: dgi_migrate.subproperty
property: nodeValue
field_record_cataloging_language:
- << : *nested_mods_node
query: 'mods:languageOfCataloging'
- <<: *nested_mods_node
query: 'mods:languageOfCataloging[normalize-space()]'
- plugin: callback
callable: iterator_to_array
- plugin: skip_on_empty
method: process
- plugin: multiple_values
- plugin: dgi_migrate.subproperty
property: nodeValue
- plugin: dgi_migrate.sub_process
process_values: true
values:
_vid:
- plugin: default_value
default_value: language
<<: *generic_term_after
- <<: *generic_term_extract
_field_record_change_date_single:
- << : *nested_mods_node
query: 'mods:recordChangeDate[not(@point)]'
Expand Down Expand Up @@ -2694,18 +2711,23 @@ process:
- '@_unspecified_rights_statement'
- plugin: null_coalesce
nid:
- plugin: migration_lookup
- plugin: dgi_migrate.process.locking_migration_lookup
source: '@field_pid'
migration: dgis_stub_nodes
lock_context_keys: &dgis_stub_nodes_lock_context_keys
dgis_stub_nodes:
- { offset: [ 0 ], hash: '#/##' }
field_member_of:
- plugin: flatten
source:
- '@_members'
- '@_constituents'
- plugin: multiple_values
- plugin: migration_lookup
- plugin: dgi_migrate.process.locking_migration_lookup
migration: dgis_stub_nodes
stub_id: dgis_stub_nodes
lock_context_keys:
<< : *dgis_stub_nodes_lock_context_keys
migration_dependencies:
required:
- dgis_foxml_files
Expand Down
38 changes: 38 additions & 0 deletions scripts/env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,41 @@
# PROCESSES: The number of processes to use to run the migration import.
# ---
#PROCESSES=1

# ===
# SKIP_STATUS: Suppress dumping of migration status before/after operations.
# ---
# Default:
#SKIP_STATUS=false
# To skip, uncomment (or equivalently set):
#SKIP_STATUS=true

# ===
# MULTIPROCESS_SKIP_MIGRATIONS: Skip processing the specified migrations.
#
# May be of some use in resuming larger migrations, when we do not wish to
# undertake no-op cycling through other migrations.
# ---
#MULTIPROCESS_SKIP_MIGRATIONS=()

# ===
# MULTIPROCESS_PRE_ENQUEUE_PAUSE: Pause execution before enqueuing these.
#
# Expected to be a Bash array; in this instance, just a set of strings
# (representing migration IDs) between parentheses.
#
# NOTE: The prompt for this presently only shows in the *-import.log; _not_ in
# the main "run" process.
# ---
#MULTIPROCESS_PRE_ENQUEUE_PAUSE=()

# ===
# MULTIPROCESS_POST_PROCESS_PAUSE: Pause execution after finishing these.
#
# Expected to be a Bash array; in this instance, just a set of strings
# (representing migration IDs) between parentheses.
#
# NOTE: The prompt for this presently only shows in the *-import.log; _not_ in
# the main "run" process.
# ---
#MULTIPROCESS_POST_PROCESS_PAUSE=()
99 changes: 86 additions & 13 deletions scripts/util.in
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ function init_vars () {
declare -g TIME=${TIME:-/usr/bin/time}
declare -g LOG_DIR=${LOG_DIR:-$CONFIG_DIR}
declare -g PROCESSES=${PROCESSES:-1}
declare -g SKIP_STATUS=${SKIP_STATUS:-false}
declare -g -a MULTIPROCESS_SKIP_MIGRATIONS=(${MULTIPROCESS_SKIP_MIGRATIONS[@]})
declare -g -a MULTIPROCESS_PRE_ENQUEUE_PAUSE=(${MULTIPROCESS_PRE_ENQUEUE_PAUSE[@]})
declare -g -a MULTIPROCESS_POST_PROCESS_PAUSE=(${MULTIPROCESS_POST_PROCESS_PAUSE[@]})

# Initialize the log directory.
if ! [ -d "$LOG_DIR" ]; then
Expand Down Expand Up @@ -96,6 +100,21 @@ function do_migration_single_process() {
timedwwwdrush dgi-migrate:import "--root=$DRUPAL_ROOT" "--uri=$URI" "--user=$DRUPAL_USER" "--group=$MIGRATION_GROUP" "${@:2}"
}

# Helper; facilitate pausing for various reasons (likely snapshotting).
#
# Positional args:
# - 1: A descriptive string of _when_ we are pausing; e.g. "pre-enqueue",
# "post-enqueue", etc.
# - 2: The ID of the specific migration during which we are pausing.
function do_pause() {
local WHEN=$1
local MIGRATION_ID=$2
local DISCARD

read -ep "Pausing $WHEN of $MIGRATION_ID as requested. Hit enter to continue." DISCARD
echo "DISCARD is $DISCARD"
}

# Kick off a migration in multiple processes.
#
# Positional args:
Expand All @@ -106,23 +125,77 @@ function do_migration_multi_process() {
local NUM=${1}

local PROCESS_LOG_DIR="$LOG_DIR/$NUM-multiprocess-logs"
local STOP_LOCK_FILE="$LOG_DIR/$NUM-stop.lock"
local PAUSE_LOCK_FILE="$LOG_DIR/$NUM-pause.lock"

wwwdo mkdir -p $PROCESS_LOG_DIR
wwwdo touch $STOP_LOCK_FILE
wwwdo touch $PAUSE_LOCK_FILE

echo "Listing migrations..."
timedwwwdrush dgi-migrate:list-migrations "--group=$MIGRATION_GROUP" --format=string \
| sort -n --key=2 | cut -f1 | while read MIGRATION_ID; do
local -a migrations=($(wwwdrush dgi-migrate:list-migrations "--group=$MIGRATION_GROUP" --field=id --sort))
for MIGRATION_ID in ${migrations[@]}; do
if [ ! -f $PAUSE_LOCK_FILE ] ; then
echo "Pause lock file removed."
do_pause "pre-enqueue" "$MIGRATION_ID"
elif [ ! -f $STOP_LOCK_FILE ] ; then
echo "Stop lock file removed; exiting before touching $MIGRATION_ID."
return
elif [[ " ${MULTIPROCESS_SKIP_MIGRATIONS[@]} " =~ " $MIGRATION_ID " ]]; then
echo "Skipping $MIGRATION_ID as requested."
continue
elif [[ " ${MULTIPROCESS_PRE_ENQUEUE_PAUSE[@]} " =~ " $MIGRATION_ID " ]]; then
do_pause "pre-enqueue" "$MIGRATION_ID"
fi

echo "Enqueuing items for $MIGRATION_ID"
timedwwwdrush dgi-migrate:enqueue --user=$DRUPAL_USER $MIGRATION_ID "${@:2}"
timedwwwdrush dgi-migrate:enqueue "--user=$DRUPAL_USER" "--run=$NUM" "$MIGRATION_ID" "${@:2}" &
local ENQUEUEING_JOB=$!

echo "Starting $PROCESSES processes to process $MIGRATION_ID."
local -a PROCESS_JOBS=()
for i in $(seq 1 $PROCESSES); do
echo "Starting $i/$PROCESSES to process $MIGRATION_ID."
timedwwwdrush dgi-migrate:enqueued-process --user=$DRUPAL_USER $MIGRATION_ID "${@:2}" &> $PROCESS_LOG_DIR/$MIGRATION_ID.$i.log &
timedwwwdrush dgi-migrate:enqueued-process "--user=$DRUPAL_USER" "--run=$NUM" "$MIGRATION_ID" "${@:2}" &> $PROCESS_LOG_DIR/"$MIGRATION_ID.$i.log" &
PROCESS_JOBS+=($!)
done

wait $ENQUEUEING_JOB
echo "Work enqueueing finished; enqueueing terminal messages."
for i in $(seq 1 $PROCESSES); do
wwwdrush dgi-migrate:enqueue-terminal "$MIGRATION_ID" "$NUM"
done
echo "Waiting for processes to exit..."
wait
timedwwwdrush dgi-migrate:finish-enqueued-process --user=1 $MIGRATION_ID "${@:2}"

echo "Terminal messages enqueued; waiting for workers to finish..."
wait ${PROCESS_JOBS[@]}
echo "Workers exited."

if [ ! -f $PAUSE_LOCK_FILE ] ; then
do_pause "post-process, pre-finalize" "$MIGRATION_ID"
elif [ ! -f $STOP_LOCK_FILE ] ; then
echo "Lock file removed; exiting without finalizing batch."
return
fi

echo "Finalizing $MIGRATION_ID."
timedwwwdrush dgi-migrate:finish-enqueued-process "--user=$DRUPAL_USER" "--run=$NUM" "$MIGRATION_ID" "${@:2}"
echo "Finished $MIGRATION_ID."
if [[ " ${MULTIPROCESS_POST_PROCESS_PAUSE[@]} " =~ " $MIGRATION_ID " ]] ; then
do_pause "post-process" "$MIGRATION_ID"
fi

done
wwwdo rm $STOP_LOCK_FILE $PAUSE_LOCK_FILE
}

# Dump status for the given migration group.
#
# Can be skipped if SKIP_STATUS=true; in which case the call to this should be
# no-op.
function dump_status() {
if [ $SKIP_STATUS != 'true' ]; then
wwwdrush migrate:status --group=$MIGRATION_GROUP
fi
}

# Handle kicking off a migration.
Expand All @@ -146,7 +219,7 @@ function do_migration () {
# (lookin' at you, dgi_migrate_foxml_standard_mods_xslt dealio)
wwwdrush cache:rebuild
# Dump status before run.
wwwdrush migrate:status --group=$MIGRATION_GROUP
dump_status
{
# Do the import, one way or another.
if [ $PROCESSES -eq 1 ]; then
Expand All @@ -156,10 +229,10 @@ function do_migration () {
fi
} |& wwwdo tee $IMPORT_LOG > /dev/null
# Dump status after run.
wwwdrush migrate:status --group=$MIGRATION_GROUP
dump_status
# Dump messages after run, so they're not lost with a subsequent run.
wwwdo mkdir -p $MESSAGES_DIR
wwwdrush migrate:status --group=$MIGRATION_GROUP --field=id --format=string | \
wwwdrush dgi-migrate:list-migrations "--group=$MIGRATION_GROUP" --field=id | \
while read NAME ; do
wwwdrush migrate:messages --format=json $NAME | wwwdo tee "$MESSAGES_DIR/$NAME.json" > /dev/null
done
Expand Down Expand Up @@ -202,11 +275,11 @@ function do_rollback () {
# (lookin' at you, things involving Fedora dealio)
wwwdrush cache:rebuild
# Dump status before rollback.
wwwdrush migrate:status --group=$MIGRATION_GROUP
dump_status
# The base rollback.
timedwwwdrush dgi-migrate:rollback --user=$DRUPAL_USER --group=$MIGRATION_GROUP "${@:2}" |& wwwdo tee $ROLLBACK_LOG > /dev/null
timedwwwdrush dgi-migrate:rollback --user=$DRUPAL_USER --group=$MIGRATION_GROUP "--run=$NUM" "${@:2}" |& wwwdo tee $ROLLBACK_LOG > /dev/null
# Dump status after rollback.
wwwdrush migrate:status --group=$MIGRATION_GROUP
dump_status
set +x
} |& wwwdo tee $RUN_LOG

Expand Down
Loading

0 comments on commit 459788e

Please sign in to comment.