diff --git a/Gemfile.lock b/Gemfile.lock index 8853dac..0aa6109 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -48,7 +48,7 @@ GIT PATH remote: . specs: - spotlight-oaipmh-resources (3.0.0.pre.beta.12) + spotlight-oaipmh-resources (3.0.0.pre.beta.13) mods oai diff --git a/app/controllers/spotlight/resources/harvester_controller.rb b/app/controllers/spotlight/resources/harvester_controller.rb new file mode 100644 index 0000000..619e56e --- /dev/null +++ b/app/controllers/spotlight/resources/harvester_controller.rb @@ -0,0 +1,70 @@ + +module Spotlight::Resources + class HarvesterController < Spotlight::ApplicationController + + load_and_authorize_resource :exhibit, class: Spotlight::Exhibit + + # POST /harvester + def create + upload if resource_params.has_key?(:custom_mapping) + + harvester = build_harvester_by_type(resource_params[:type]) + if harvester.save + Spotlight::Resources::PerformHarvestsJob.perform_later(harvester: harvester, user: current_user) + flash[:notice] = t('spotlight.resources.harvester.performharvest.success', set: resource_params[:set]) + else + flash[:error] = "Failed to create harvester for #{resource_params[:set]}. #{harvester.errors.full_messages.to_sentence}" + end + redirect_to spotlight.admin_exhibit_catalog_path(current_exhibit, sort: :timestamp) + end + + private + + def upload + name = resource_params[:custom_mapping].original_filename + Dir.mkdir("public/uploads") unless Dir.exist?("public/uploads") + dir = "public/uploads/modsmapping" + if (resource_params[:type] == Spotlight::HarvestType::SOLR) + dir = "public/uploads/solrmapping" + end + Dir.mkdir(dir) unless Dir.exist?(dir) + + path = File.join(dir, name) + File.open(path, "w") { |f| f.write(resource_params[:custom_mapping].read) } + end + + def build_harvester_by_type(type) + if type == Spotlight::HarvestType::MODS + Spotlight::OaipmhHarvester.new( + base_url: resource_params[:url], + set: resource_params[:set], + mods_mapping_file: mapping_file(type), + exhibit: current_exhibit + ) + else + Spotlight::SolrHarvester.new( + base_url: resource_params[:url], + set: resource_params[:set], + solr_mapping_file: mapping_file(type), + exhibit: current_exhibit + ) + end + end + + def mapping_file(type) + return resource_params[:custom_mapping].original_filename if resource_params[:custom_mapping].present? + + mapping_file = if type == Spotlight::HarvestType::MODS + resource_params[:mods_mapping_file] + else + resource_params[:solr_mapping_file] + end + + mapping_file + end + + def resource_params + params.require(:harvester).permit(:type, :url, :set, :mods_mapping_file, :solr_mapping_file, :custom_mapping) + end + end +end diff --git a/app/controllers/spotlight/resources/oaipmh_harvester_controller.rb b/app/controllers/spotlight/resources/oaipmh_harvester_controller.rb deleted file mode 100644 index 2ee7642..0000000 --- a/app/controllers/spotlight/resources/oaipmh_harvester_controller.rb +++ /dev/null @@ -1,54 +0,0 @@ - -module Spotlight::Resources - class OaipmhHarvesterController < Spotlight::ApplicationController - - load_and_authorize_resource :exhibit, class: Spotlight::Exhibit - - # POST /oaipmh_harvester - def create - - my_params = resource_params - - #upload the mapping file if it exists - if (my_params.has_key?(:custom_mapping)) - upload - my_params.delete(:custom_mapping) - end - mapping_file = resource_params[:mapping_file] - if (resource_params.has_key?(:custom_mapping)) - mapping_file = resource_params[:custom_mapping].original_filename - end - harvester = Spotlight::OaipmhHarvester.create( - base_url: resource_params[:url], - set: resource_params[:set], - mapping_file: mapping_file, - exhibit: current_exhibit - ) - - if harvester.save - Spotlight::Resources::PerformHarvestsJob.perform_later(harvester: harvester, user: current_user) - flash[:notice] = t('spotlight.resources.oaipmh_harvester.performharvest.success', set: resource_params[:set]) - else - flash[:error] = "Failed to create harvester for #{resource_params[:set]}. #{harvester.errors.full_messages.to_sentence}" - end - redirect_to spotlight.admin_exhibit_catalog_path(current_exhibit, sort: :timestamp) - end - - private - - def upload - name = resource_params[:custom_mapping].original_filename - Dir.mkdir("public/uploads") unless Dir.exist?("public/uploads") - dir = "public/uploads/modsmapping" - Dir.mkdir(dir) unless Dir.exist?(dir) - - path = File.join(dir, name) - File.open(path, "w") { |f| f.write(resource_params[:custom_mapping].read) } - end - - - def resource_params - params.require(:oaipmh_harvester).permit(:url, :set, :mapping_file, :custom_mapping) - end - end -end diff --git a/app/jobs/spotlight/resources/perform_harvests_job.rb b/app/jobs/spotlight/resources/perform_harvests_job.rb index d4afcac..8dee122 100644 --- a/app/jobs/spotlight/resources/perform_harvests_job.rb +++ b/app/jobs/spotlight/resources/perform_harvests_job.rb @@ -26,7 +26,7 @@ def perform(harvester:, user: nil) @exhibit = harvester.exhibit @set = harvester.set @user = user - @sidecar_ids = harvester.harvest_oai_items(job_tracker: job_tracker, job_progress: progress) + @sidecar_ids = harvester.harvest_items(job_tracker: job_tracker, job_progress: progress) @total_errors = harvester.total_errors @total_warnings = 0 diff --git a/app/models/spotlight/harvest_type.rb b/app/models/spotlight/harvest_type.rb new file mode 100644 index 0000000..9557bc8 --- /dev/null +++ b/app/models/spotlight/harvest_type.rb @@ -0,0 +1,7 @@ +module Spotlight + class HarvestType + MODS = "MODS" + SOLR = "Solr" + HARVEST_TYPES = [MODS, SOLR] + end +end diff --git a/app/models/spotlight/harvester.rb b/app/models/spotlight/harvester.rb new file mode 100644 index 0000000..6dc0ae5 --- /dev/null +++ b/app/models/spotlight/harvester.rb @@ -0,0 +1,46 @@ +module Spotlight + class Harvester < ActiveRecord::Base + belongs_to :exhibit + + attr_accessor :total_errors + + validates :base_url, presence: true + validates :set, presence: true + + def self.mapping_files(dir_name) + if (Dir.exist?("public/uploads/#{dir_name}")) + files = Dir.entries("public/uploads/#{dir_name}") + files.delete('.') + files.delete('..') + else + files = Array.new + end + + files.insert(0, 'New Mapping File') + files.insert(0, 'Default Mapping File') + files + end + + def handle_item_harvest_error(error, parsed_item, job_tracker = nil) + error_msg = parsed_item.id + ' did not index successfully:' + Delayed::Worker.logger.add(Logger::ERROR, error_msg) + Delayed::Worker.logger.add(Logger::ERROR, error.message) + Delayed::Worker.logger.add(Logger::ERROR, error.backtrace) + if job_tracker.present? + job_tracker.append_log_entry(type: :error, exhibit: exhibit, message: error_msg) + job_tracker.append_log_entry(type: :error, exhibit: exhibit, message: error.message) + end + self.total_errors += 1 + end + + def update_progress_total(job_progress) + job_progress.total = complete_list_size + end + + def get_mapping_file + return if mapping_file.eql?('Default Mapping File') || mapping_file.eql?('New Mapping File') + + mapping_file + end + end +end diff --git a/app/models/spotlight/oaipmh_harvester.rb b/app/models/spotlight/oaipmh_harvester.rb index 6555148..d44fa92 100644 --- a/app/models/spotlight/oaipmh_harvester.rb +++ b/app/models/spotlight/oaipmh_harvester.rb @@ -3,29 +3,14 @@ require 'uri' module Spotlight - class OaipmhHarvester < ActiveRecord::Base - belongs_to :exhibit - - validates :base_url, presence: true - validates :set, presence: true - - attr_accessor :total_errors + class OaipmhHarvester < Harvester + alias_attribute :mapping_file, :mods_mapping_file def self.mapping_files - if (Dir.exist?('public/uploads/modsmapping')) - files = Dir.entries('public/uploads/modsmapping') - files.delete('.') - files.delete('..') - else - files = Array.new - end - - files.insert(0, 'New Mapping File') - files.insert(0, 'Default Mapping File') - files + super('modsmapping') end - def harvest_oai_items(job_tracker: nil, job_progress: nil) + def harvest_items(job_tracker: nil, job_progress: nil) self.total_errors = 0 @sidecar_ids = [] harvests = oaipmh_harvests @@ -94,15 +79,7 @@ def harvest_item(record, job_tracker, job_progress) job_progress&.increment rescue Exception => e - error_msg = parsed_oai_item.id + ' did not index successfully:' - Delayed::Worker.logger.add(Logger::ERROR, error_msg) - Delayed::Worker.logger.add(Logger::ERROR, e.message) - Delayed::Worker.logger.add(Logger::ERROR, e.backtrace) - if job_tracker.present? - job_tracker.append_log_entry(type: :error, exhibit: exhibit, message: error_msg) - job_tracker.append_log_entry(type: :error, exhibit: exhibit, message: e.message) - end - self.total_errors += 1 + handle_item_harvest_error(e, parsed_oai_item, job_tracker) end def oaipmh_harvests @@ -131,15 +108,5 @@ def client def oai_mods_converter @oai_mods_converter ||= Spotlight::Resources::OaipmhModsConverter.new(set, exhibit.slug, get_mapping_file) end - - def update_progress_total(job_progress) - job_progress.total = complete_list_size - end - - def get_mapping_file - return if mapping_file.eql?('Default Mapping File') || mapping_file.eql?('New Mapping File') - - mapping_file - end end end diff --git a/app/models/spotlight/resources/solr_converter.rb b/app/models/spotlight/resources/solr_converter.rb new file mode 100644 index 0000000..f88bf39 --- /dev/null +++ b/app/models/spotlight/resources/solr_converter.rb @@ -0,0 +1,180 @@ +include Spotlight::Resources::Exceptions +module Spotlight::Resources + + class SolrEntry + attr_accessor :solr_field + end + class ConverterItem + attr_accessor :spotlight_field, :solr_items, :default_value, :delimiter, :multivalue_facets + + def initialize() + delimiter = ", " + end + + def extract_values(solrmd) + + values = extract_solr_values(solrmd) + + #Remove duplicates + values = values.uniq + + finalvalue = nil + if (!values.empty?) + #if multiple values, allow for faceting on each item by keeping it as an array + if (!multivalue_facets.nil? && (multivalue_facets.eql?("yes") || multivalue_facets)) + + finalvalue = values; + else + finalvalue = values.join(delimiter) + end + end + finalvalue + end + +private + + def extract_solr_values(solrmd) + values = Array.new + if (!solr_items.nil?) + retvals = Array.new + solr_items.each do |item| + myretval = solrmd[item.solr_field] + + if (myretval.blank? && !default_value.blank?) + value = default_value + values << value + elsif (!myretval.blank?) + values << myretval + end + end + end + values + end + +end + + class SolrConverter + STANDARD_SPOTLIGHT_FIELDS = ['unique-id_tesim', 'full_title_tesim', 'spotlight_upload_description_tesim', 'thumbnail_url_ssm', 'full_image_url_ssm', 'spotlight_upload_date_tesim"', 'spotlight_upload_attribution_tesim'] + + attr_accessor :sidecar_hash + + #Initialize with the name of the set being converted + def initialize(set, exhibitslug, mapping_file) + @set = set + @exhibitslug = exhibitslug + @mapping_file = mapping_file + @converter_items = Array.new + @sidecar_hash = {} + end + + def convert(solrrecord) + if (@converter_items.empty?) + parse_mapping_file(mapping_file) + end + + solr_hash = {} + + @converter_items.each do |item| + value = item.extract_values(solrrecord) + + #Not sure why but if a value isn't assigned, the last existing value for the field gets + #placed in all non-existing values + solr_hash[get_spotlight_field_name(item.spotlight_field)] = value + @sidecar_hash[item.spotlight_field] = value + + end + solr_hash + end + + #Some spotlight fields use the exhibit slug, others do not + def get_spotlight_field_name(spotlight_field) + if (!STANDARD_SPOTLIGHT_FIELDS.include?(spotlight_field)) + spotlight_field = 'exhibit_' + @exhibitslug + '_' + spotlight_field + end + spotlight_field + end + + + #Retrieves the mapping file for the set, if one exists, otherwise uses the generic mapping file + def mapping_file + if (@mapping_file == nil) + engine_root = Spotlight::Oaipmh::Resources::Engine.root + @mapping_file = File.join(engine_root, 'config', 'default_solr_mapping.yml') + else + @mapping_file = Rails.root.join("public/uploads/solrmapping", @mapping_file) + end + @mapping_file + end + + + #private + + #parses the mapping file into a model + def parse_mapping_file(file) + + mapping_config = YAML.load_file(file) + mapping_config.each do |field| + + item = ConverterItem.new + #validate the spotlight-field is not null + if (!field.key?("spotlight-field") || field['spotlight-field'].blank?) + raise InvalidMappingFile, "spotlight-field is required for each entry" + end + item.spotlight_field = field['spotlight-field'] + + if (field.key?("delimiter")) + item.delimiter = field["delimiter"] + end + if (field.key?("default-value")) + item.default_value = field["default-value"] + end + + if (field.key?("multivalue-breaks")) + item.multivalue_facets = field["multivalue-breaks"] + end + + #must have a solr-field value + if (!field.key?("solr-field")) + raise InvalidMappingFile, "solr-field is required for each entry" + end + + #if using xpath, then add the values from xpath + if (field.key?('solr-field')) + item.solr_items = Array.new + field['solr-field'].each do |solr_field| + if (!solr_field.key?("field-name") || solr_field['field-name'].blank?) + raise InvalidMappingFile, "field-name is required for each solr-field entry" + end + + solritem = SolrEntry.new + solritem.solr_field = solr_field['field-name'] + item.solr_items << solritem + + end + end + + #If it is the unique field, set it + if (field['spotlight-field'].eql?("unique-id_tesim")) + delimiter = "" + if (!field["delimiter"].blank?) + delimiter = field["delimiter"] + end + + fields = Array.new + item.solr_items.each do |solr_item| + fields << solr_item.solr_field + end + @unique_id_field = fields.join(delimiter) + end + + @converter_items << item + end + @converter_items + end + + def get_unique_id_field() + @unique_id_field + end + + end +end diff --git a/app/models/spotlight/resources/solr_harvesting_parser.rb b/app/models/spotlight/resources/solr_harvesting_parser.rb new file mode 100644 index 0000000..99fb42f --- /dev/null +++ b/app/models/spotlight/resources/solr_harvesting_parser.rb @@ -0,0 +1,104 @@ +include Spotlight::Resources::Exceptions +module Spotlight::Resources + class SolrHarvestingParser + attr_reader :titles, :id + attr_accessor :metadata, :sidecar_data + def initialize(exhibit, converter) + @solr_hash = {} + @exhibit = exhibit + @converter = converter + end + + def to_solr + add_document_id + solr_hash + end + + def parse_record(unique_id_field) + if (!metadata[unique_id_field].blank?) + if (metadata[unique_id_field].kind_of?(Array)) + @id = metadata[unique_id_field][0] + else + @id = metadata[unique_id_field] + end + + #Strip out all of the decimals + @id = @id.gsub('.', '') + @id = @exhibit.id.to_s + "-" + @id.to_s + end + + @solr_hash = @converter.convert(metadata) + @sidecar_data = organize_sidecar_data(@converter.sidecar_hash) + end + + # private + + attr_reader :solr_hash, :exhibit + + + def add_document_id + if (!@id.blank?) + solr_hash[:id] = @id.to_s + else + #Generate a random number if no unique id is supplied. + solr_hash[:id] = rand.to_s[2..11] + end + end + + # Spotlight v3.3.0 + # Spotlight expects "exhibit-specific fields" (a.k.a. Exhibit#custom_fields) to not have + # a Solr suffix (e.g. _tesim, _ssim, etc.). This method assumes all non-configured fields + # are custom and thus removes their Solr suffix when adding them to the @item_sidecar hash. + # Configured fields are added as-is (Solr suffix included). + def organize_sidecar_data(hash) + organized_sidecar_data = {} + + hash.each do |field_name, value| + if configured_field_names.include?(field_name) + organized_sidecar_data[field_name] = value + else + custom_field_slug = field_name.sub(/_[^_]+$/, '') + organized_sidecar_data[custom_field_slug] = value + end + end + + organized_sidecar_data + end + + # Spotlight v3.3.0 + # Used to update an existing sidecar's data when harvesting (see + # Spotlight::SolrHarvester#harvest_item). Default "configured" fields are expected + # to be nested in a "configured_fields" sub-hash. This method assumes non-configured + # fields are "exhibit-specific fields" (a.k.a. Exhibit#custom_fields) and puts them + # in the "top level" of the hash (where Spotlight expects them to be). + # + # Example: + # { + # 'configured_fields' => { + # 'full_title_tesim' => 'My Title' + # }, + # 'custom-field' => 'Hello world' + # } + # + # @return [Hash] Sidecar data organized in the format that Spotlight expects + def reorganize_sidecar_data + reorganized_sidecar_data = { 'configured_fields' => {} } + custom_field_slugs = exhibit.custom_fields.map(&:slug) + + @sidecar_data.map do |field_name, value| + reorganized_sidecar_data['configured_fields'][field_name] = value if configured_field_names.include?(field_name) + next unless custom_field_slugs.include?(field_name) + + reorganized_sidecar_data[field_name] = value + end + + reorganized_sidecar_data + end + + # @return [Array] List of default fields names as configured in config/initializers/spotlight_initializer.rb + def configured_field_names + # Add full_title_tesim to the list since it's a default Spotlight field + @configured_field_names ||= ['full_title_tesim'] + exhibit.uploaded_resource_fields.map(&:field_name).map(&:to_s) + end + end +end diff --git a/app/models/spotlight/resources/solr_upload.rb b/app/models/spotlight/resources/solr_upload.rb new file mode 100644 index 0000000..386a9b7 --- /dev/null +++ b/app/models/spotlight/resources/solr_upload.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Spotlight + module Resources + ## + # Exhibit-specific resources, created using uploaded and custom fields + class SolrUpload < Spotlight::Resources::Upload + # this sets the path for Solr Uploads from the Harvester to include the URN (aka unique-id) as opposed to the spotlight generated ID. external_id already contains the exhibit_id at the beginning. + def compound_id + "#{external_id}" + end + end + end +end diff --git a/app/models/spotlight/solr_harvester.rb b/app/models/spotlight/solr_harvester.rb new file mode 100644 index 0000000..1b4cbda --- /dev/null +++ b/app/models/spotlight/solr_harvester.rb @@ -0,0 +1,140 @@ +require 'net/http' +require 'uri' + +module Spotlight + class SolrHarvester < Harvester + ROW_COUNT = 50 + DEFAULT_SORT_FIELD = '_id' + + alias_attribute :mapping_file, :solr_mapping_file + + def self.mapping_files + super('solrmapping') + end + + def harvest_items(job_tracker: nil, job_progress: nil) + self.total_errors = 0 + @sidecar_ids = [] + solr_converter.parse_mapping_file(solr_converter.mapping_file) + harvests = solr_harvests + @cursor = harvests['nextCursorMark'] + + update_progress_total(job_progress) + last_page_evaluated = harvests['response']['docs'].blank? + while (!last_page_evaluated) + harvests['response']['docs'].each do |record| + harvest_item(record, job_tracker, job_progress) + end + + unless last_page_evaluated + harvests = solr_harvests(@cursor) + @cursor = harvests['nextCursorMark'] + update_progress_total(job_progress) # set size can change mid-harvest + end + + # Terminate the loop if it is empty + last_page_evaluated = true if harvests['response']['docs'].blank? + + # Log an update every 100 records + if (job_progress.progress % 100).zero? + job_tracker.append_log_entry(type: :info, exhibit: exhibit, message: "#{job_progress.progress} of #{job_progress.total} (#{self.total_errors} errors)") + end + end + @sidecar_ids + end + + def harvest_item(record, job_tracker, job_progress) + parsed_solr_item = Spotlight::Resources::SolrHarvestingParser.new(exhibit, solr_converter) + + parsed_solr_item.metadata = record + parsed_solr_item.parse_record(solr_converter.get_unique_id_field) + # At this point, we know the candidate for the sidecar's document_id. This will be used in + # the Spotlight::Resources::LoadUrnsJob + @sidecar_ids << parsed_solr_item.id if Spotlight::Oaipmh::Resources.use_solr_document_urns + parsed_solr_item.to_solr + + # Create clean resource for editing + resource = Spotlight::Resources::SolrUpload.find_or_initialize_by(exhibit: exhibit, external_id: parsed_solr_item.id.upcase) + resource.data = parsed_solr_item.sidecar_data + # The resource's sidecar is set up correctly the first time; nothing special is required + if resource.solr_document_sidecars.blank? + resource.save_and_index + else + # As of Spotlight v3.3.0, if a resource already has a sidecar, the sidecar (and thus the data in Solr) + # will not update unless done explicitly. The sidecar's #data is organized differently than the + # resource's #data, so we can't just copy it over from the resource directly. + resource.save! + sidecar = resource.solr_document_sidecars.first + sidecar.data = parsed_solr_item.reorganize_sidecar_data + sidecar.save! + # Get the updated sidecar into our local variable to ensure proper indexing + resource.reload.reindex_later + end + + job_progress&.increment + rescue Exception => e + handle_item_harvest_error(e, parsed_solr_item, job_tracker) + end + + def solr_harvests(cursor = nil) + cursor = cursor.presence || '*' + sort_field = sort_field_for_set(set) + + solr_connection.get( + 'select', + params: { + q: '*:*', + cursorMark: cursor, + sort: "#{sort_field} asc", + rows: ROW_COUNT, + wt: 'json' + } + ) + end + + # This is meant to be a temporary solution to compensate for inconsistent data + # structures between the Solr sets. + # + # Our Solr harvest endpoint (https://fts.lib.harvard.edu/solr/) requires queries + # to use a "cursor" (as opposed to pagination, for example). Solr queries that + # use a cursor require a field that has a unique value to sort on. + # + # The Solr sets currently (2022-10-28) do not all use a consistent field that + # meets this requirement. Some store the unique value in a field called "_id", + # some store the field in a field called "id". + # + # Due to this inconsistency in the structure of the Solr data, a file has been + # added to explicitly declare what the unique field is for each Solr set. + # + # This method (and related logic) can be removed once the Solr data is changed + # to use a consistent unique identifying field. + def sort_field_for_set(set) + file = File.join( + Spotlight::Oaipmh::Resources::Engine.root, + 'harvard_yaml_mapping_files', + 'solr', + 'unique_key_mappings', + 'unique_keys_by_set.yml' + ) + return DEFAULT_SORT_FIELD unless File.exists?(file) + + YAML.load_file(file).dig(set, 'unique_key').presence || DEFAULT_SORT_FIELD + end + + def complete_list_size + @complete_list_size ||= solr_harvests['response']['numFound'] || 0 + end + + def solr_connection + # Add trailing "/" if it's missing from base_url + valid_base_url = base_url.match?(/\/$/) ? base_url : base_url + '/' + solr_url = valid_base_url + set + + @solr_connection ||= RSolr.connect(url: solr_url) + end + + def solr_converter + @solr_converter ||= Spotlight::Resources::SolrConverter.new(set, exhibit.slug, get_mapping_file) + end + end +end diff --git a/app/views/spotlight/resources/harvester/_form.html.erb b/app/views/spotlight/resources/harvester/_form.html.erb new file mode 100644 index 0000000..d8598b7 --- /dev/null +++ b/app/views/spotlight/resources/harvester/_form.html.erb @@ -0,0 +1,51 @@ +<%= bootstrap_form_for([current_exhibit, @resource.becomes(Spotlight::Harvester)], url: spotlight_oaipmh_resources_engine.exhibit_harvester_path(exhibit_id: current_exhibit), layout: :horizontal, label_col: 'col-md-2', control_col: 'col-sm-6 col-md-6' ) do |f| %> + <%= f.select :type, Spotlight::HarvestType::HARVEST_TYPES, {}, :onChange=>"toggleHarvesters(this.value);" %> + <%= f.text_field :url, help: t('.url-field.help'), label: t('.url'), required: true %> + <%= f.text_field :set, help: t('.set-field.help'), label: t('.set'), required: true %> + + <%= f.select :mods_mapping_file, + Spotlight::OaipmhHarvester.mapping_files, + { help: t('.mapping-file-field.help'), label: t('.mapping-file') }, + :onChange=>"toggleUploadArea(this.value);" %> + + + +
+
+ <%= cancel_link @resource, :back, class: 'btn btn-default' %> + <%= f.submit t('.add_item'), class: 'btn btn-primary' %> +
+
+<% end %> + +<%= javascript_tag do %> + function toggleUploadArea(value) { + if(value=='New Mapping File') { + document.getElementById("custom_mapping_file_div").style.display = "block"; + } + else { + document.getElementById("custom_mapping_file_div").style.display = "none"; + } + } + + function toggleHarvesters(value) { + if(value=='MODS') { + document.getElementById("oaiharvester").style.display = "block"; + document.getElementById("solrharvester").style.display = "none"; + } + else { + document.getElementById("oaiharvester").style.display = "none"; + document.getElementById("solrharvester").style.display = "block"; + } + } + + $('input[type="file"]').change(function(e){ + var fileName = e.target.files[0].name; + $('.custom-file-label').html(fileName); + }); +<% end %> diff --git a/app/views/spotlight/resources/oaipmh_harvester/_form.html.erb b/app/views/spotlight/resources/oaipmh_harvester/_form.html.erb deleted file mode 100644 index e625956..0000000 --- a/app/views/spotlight/resources/oaipmh_harvester/_form.html.erb +++ /dev/null @@ -1,31 +0,0 @@ -<%= bootstrap_form_for([current_exhibit, @resource.becomes(Spotlight::OaipmhHarvester)], url: spotlight_oaipmh_resources_engine.exhibit_oaipmh_harvester_path(exhibit_id: current_exhibit), layout: :horizontal, label_col: 'col-md-2', control_col: 'col-sm-6 col-md-6' ) do |f| %> - <%= f.text_field :url, help: t('.url-field.help'), label: t('.url'), required: true %> - <%= f.text_field :set, help: t('.set-field.help'), label: t('.set'), required: true %> - <%= f.select :mapping_file, - Spotlight::OaipmhHarvester.mapping_files, - { help: t('.mapping-file-field.help'), label: t('.mapping-file') }, - :onChange=>"toggleUploadArea(this.value);" %> - -
-
- <%= cancel_link @resource, :back, class: 'btn btn-default' %> - <%= f.submit t('.add_item'), class: 'btn btn-primary' %> -
-
-<% end %> - -<%= javascript_tag do %> - function toggleUploadArea(value) { - if(value=='New Mapping File') { - document.getElementById("custom_mapping_file_div").style.display = "block"; - } - else { - document.getElementById("custom_mapping_file_div").style.display = "none"; - } - } - - $('input[type="file"]').change(function(e){ - var fileName = e.target.files[0].name; - $('.custom-file-label').html(fileName); - }); -<% end %> diff --git a/config/default_solr_mapping.yml b/config/default_solr_mapping.yml new file mode 100644 index 0000000..2490014 --- /dev/null +++ b/config/default_solr_mapping.yml @@ -0,0 +1,20 @@ +#Structure: +# +# - spotlight-field: xxx (field names should be separated with dashes except for the suffix: firstpart-secondpart_ssim or _tesim) +# multivalue-breaks: "yes" (optional) - use this for splitting out multiple values to be broken on (and faceted on) individually (ex - subjects) +# default-value: xxx (optional) +# delimiter: xxx (optional, what to separate all path values with. Defaults to a space) +# solr-field: +# - field-name: xxx (repeatable - all path fields will be concatenated) +# +# Make sure to set unique-id_tesim if you want to choose an identifier for your record. +# Otherwise, reharvesting will result in the addition of more records rather than overwriting +# existing records. + +- spotlight-field: unique-id_tesim + solr-field: + - field-name: "hashKey" + +- spotlight-field: full_title_tesim + solr-field: + - field-name: "artist.romanizedName" diff --git a/config/locales/en.yml b/config/locales/en.yml index 32f2f2d..4f53e27 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -1,16 +1,19 @@ en: spotlight: resources: - oaipmh_harvester: + harvester: form: - title: 'OAI-PMH MODS Harvest URL' + title: 'Metadata Harvesting' + type: "Harvesting Type" + type-field: + help: "Choose where to harvest from (eg - Solr, OAI)" url: "Base URL" - add_item: "Add MODS items" + add_item: "Harvest items" url-field: - help: "Add the base URL of the OAI MODS Set." + help: "Add the base URL of the data to be harvested." set: "Set name" set-field: - help: "Type in the MODS set name." + help: "Type in the set name to be harvested." mapping-file: "Select Mapping File" mapping-file-field: help: "Select Mapping File to Use (default is mapping.yml)." diff --git a/config/routes.rb b/config/routes.rb index 688794a..f585683 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,5 +1,5 @@ Spotlight::Oaipmh::Resources::Engine.routes.draw do resources :exhibits, path: '/', only: [] do - resource :oaipmh_harvester, controller: :"spotlight/resources/oaipmh_harvester", only: [:create, :update] + resource :harvester, controller: :"spotlight/resources/harvester", only: [:create, :update] end end diff --git a/db/migrate/20221018000740_rename_spotlight_oaipmh_harvesters_to_spotlight_harvesters.rb b/db/migrate/20221018000740_rename_spotlight_oaipmh_harvesters_to_spotlight_harvesters.rb new file mode 100644 index 0000000..90d7fe3 --- /dev/null +++ b/db/migrate/20221018000740_rename_spotlight_oaipmh_harvesters_to_spotlight_harvesters.rb @@ -0,0 +1,5 @@ +class RenameSpotlightOaipmhHarvestersToSpotlightHarvesters < ActiveRecord::Migration[6.1] + def change + rename_table :spotlight_oaipmh_harvesters, :spotlight_harvesters + end +end diff --git a/db/migrate/20221018001026_add_type_to_spotlight_harvesters.rb b/db/migrate/20221018001026_add_type_to_spotlight_harvesters.rb new file mode 100644 index 0000000..d035c2a --- /dev/null +++ b/db/migrate/20221018001026_add_type_to_spotlight_harvesters.rb @@ -0,0 +1,18 @@ +class AddTypeToSpotlightHarvesters < ActiveRecord::Migration[6.1] + def up + add_column :spotlight_harvesters, :type, :string + + Spotlight::Harvester.find_each do |harvester| + next if harvester.type.present? + + harvester.type = 'Spotlight::OaipmhHarvester' + harvester.save! + end + + change_column_null :spotlight_harvesters, :type, false + end + + def down + remove_column :spotlight_harvesters, :type + end +end diff --git a/db/migrate/20221018232843_distinguish_mapping_files_by_type.rb b/db/migrate/20221018232843_distinguish_mapping_files_by_type.rb new file mode 100644 index 0000000..23d6844 --- /dev/null +++ b/db/migrate/20221018232843_distinguish_mapping_files_by_type.rb @@ -0,0 +1,6 @@ +class DistinguishMappingFilesByType < ActiveRecord::Migration[6.1] + def change + rename_column :spotlight_harvesters, :mapping_file, :mods_mapping_file + add_column :spotlight_harvesters, :solr_mapping_file, :string + end +end diff --git a/harvard_yaml_mapping_files/solr/unique_key_mappings/unique_keys_by_set.yml b/harvard_yaml_mapping_files/solr/unique_key_mappings/unique_keys_by_set.yml new file mode 100644 index 0000000..94171db --- /dev/null +++ b/harvard_yaml_mapping_files/solr/unique_key_mappings/unique_keys_by_set.yml @@ -0,0 +1,16 @@ +# This file is part of what is meant to be a temporary solution to compensate +# for inconsistent data structures in the Solr sets. +# +# This file (and related logic) can be removed once the Solr data is changed +# to use a consistent unique identifying field. +# +# @see Spotlight::SolrHarvester#sort_field_for_set + +tedming: + unique_key: id +acat-calendar: + unique_key: _id +acat-courses: + unique_key: _id +acat-faculty: + unique_key: _id diff --git a/lib/spotlight/oaipmh/resources/engine.rb b/lib/spotlight/oaipmh/resources/engine.rb index cc8860b..5cb308e 100644 --- a/lib/spotlight/oaipmh/resources/engine.rb +++ b/lib/spotlight/oaipmh/resources/engine.rb @@ -2,7 +2,7 @@ module Spotlight::Oaipmh::Resources class Engine < ::Rails::Engine - Spotlight::Oaipmh::Resources::Engine.config.resource_partial = 'spotlight/resources/oaipmh_harvester/form' + Spotlight::Oaipmh::Resources::Engine.config.resource_partial = 'spotlight/resources/harvester/form' initializer :append_migrations do |app| if !app.root.to_s.match(root.to_s) && app.root.join('db/migrate').children.none? { |path| path.fnmatch?("*.spotlight-oaipmh-resources.rb") } diff --git a/lib/spotlight/oaipmh/resources/version.rb b/lib/spotlight/oaipmh/resources/version.rb index 3cd4cbe..23178d0 100644 --- a/lib/spotlight/oaipmh/resources/version.rb +++ b/lib/spotlight/oaipmh/resources/version.rb @@ -2,7 +2,7 @@ module Spotlight module Oaipmh # :nodoc: module Resources - VERSION = "3.0.0-beta.12" + VERSION = "3.0.0-beta.13" end end end