diff --git a/config/traject/dataspace_research_data_config.rb b/config/traject/dataspace_research_data_config.rb index f6398d8..6cc5e0b 100644 --- a/config/traject/dataspace_research_data_config.rb +++ b/config/traject/dataspace_research_data_config.rb @@ -14,6 +14,30 @@ provide 'logger', Logger.new($stderr, level: Logger::ERROR) provide "nokogiri.each_record_xpath", "//items/item" provide "dataspace_communities", DataspaceCommunities.new('./spec/fixtures/files/dataspace_communities.json') + + provide "mapping_rescue", lambda { |context, exception| + if exception.is_a?(Traject::SolrJsonWriter::MaxSkippedRecordsExceeded) + context.logger.error("Encountered exception: #{exception}") + else + + # This is the implementation from Traject::Indexer#default_mapping_rescue + # @see https://github.com/traject/traject/blob/main/lib/traject/indexer.rb#L483 + msg = "Unexpected error on record #{context.record_inspect}\n" + msg += " while executing #{context.index_step.inspect}\n" + + msg += begin + "\n Record: #{context.source_record}\n" + rescue StandardError => to_s_exception + "\n (Could not log record, #{to_s_exception})\n" + end + + msg += Traject::Util.exception_to_log_message(exception) + + context.logger.error(msg) if context.logger + + raise exception + end + } end each_record do |record, context| diff --git a/spec/lib/dspace_indexer_spec.rb b/spec/lib/dspace_indexer_spec.rb index e09ff41..ddcafa2 100644 --- a/spec/lib/dspace_indexer_spec.rb +++ b/spec/lib/dspace_indexer_spec.rb @@ -38,6 +38,44 @@ response = Blacklight.default_index.connection.get 'select', params: { q: '*:*' } expect(response["response"]["numFound"]).to eq 38 end + + context "when an error is raised" do + before do + indexer.traject_indexer.configure do + to_field 'id' do |_, _, _| + raise(StandardError, "I just like raising errors") + end + end + + allow(indexer.traject_indexer.logger).to receive(:error) + end + + it "propagates StandardError instances" do + expect { indexer.index }.to raise_error(StandardError, "I just like raising errors") + + expect(indexer.traject_indexer.logger).not_to be_falsy + expect(indexer.traject_indexer.logger).to have_received(:error).with(/Unexpected error on record/).at_least(:once) + end + end + + context "when a max skipped records error is raised" do + before do + indexer.traject_indexer.configure do + to_field 'id' do |_, _, _| + raise(Traject::SolrJsonWriter::MaxSkippedRecordsExceeded) + end + end + + allow(indexer.traject_indexer.logger).to receive(:error) + end + + it "only logs an error message" do + indexer.index + + expect(indexer.traject_indexer.logger).not_to be_falsy + expect(indexer.traject_indexer.logger).to have_received(:error).with("Encountered exception: Traject::SolrJsonWriter::MaxSkippedRecordsExceeded").at_least(:once) + end + end end context 'invoking from CLI' do