From b4f995b048905567dc4a7da0e22341e56484437b Mon Sep 17 00:00:00 2001 From: Naomi Dushay Date: Wed, 13 Mar 2024 11:40:08 -0700 Subject: [PATCH 1/2] indexing_identifiers_spec less noisy --- spec/features/indexing_identifiers_spec.rb | 213 ++++++++++----------- 1 file changed, 100 insertions(+), 113 deletions(-) diff --git a/spec/features/indexing_identifiers_spec.rb b/spec/features/indexing_identifiers_spec.rb index 6a2d27240..c3a0aa6cd 100644 --- a/spec/features/indexing_identifiers_spec.rb +++ b/spec/features/indexing_identifiers_spec.rb @@ -16,6 +16,7 @@ sign_in create(:user), groups: ['sdr:administrator-role'] solr_conn.commit # ensure no deletes are pending visit '/' + item.identification # ensure item is created before searching end after do @@ -23,148 +24,134 @@ solr_conn.commit end - describe 'identifier searching' do - context 'for druids' do - let(:prefixed_druid) { item.externalIdentifier } + context 'for druids' do + let(:prefixed_druid) { item.externalIdentifier } - it 'matches query with bare and prefixed druid' do - [prefixed_druid, prefixed_druid.split(':').last].each do |query| - fill_in 'q', with: query - click_button 'search' - expect(page).to have_content('1 entry found') - expect(page).to have_css('dd.blacklight-id', text: solr_id) - end + it 'matches query with bare and prefixed druid' do + [prefixed_druid, prefixed_druid.split(':').last].each do |query| + fill_in 'q', with: query + click_button 'search' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) end end + end - context 'for sourceids' do - # SPEC: Source ID: M2549_2022-259_stertzer, where M2549 is the collection number and 2022-259 is the accession number - # sul:M0997_S1_B473_021_0001 (S is for series, B is for box, F is for folder ...) - let(:source_id) { "sul:M2549_2022-259_stertzer_#{SecureRandom.alphanumeric(12)}" } - let(:item) { FactoryBot.create_for_repository(:persisted_item, source_id:) } + context 'for sourceids' do + # SPEC: Source ID: M2549_2022-259_stertzer, where M2549 is the collection number and 2022-259 is the accession number + # sul:M0997_S1_B473_021_0001 (S is for series, B is for box, F is for folder ...) + let(:source_id) { "sul:M2549_2022-259_stertzer_#{SecureRandom.alphanumeric(12)}" } + let(:item) { FactoryBot.create_for_repository(:persisted_item, source_id:) } + + it 'matches whole string, including prefix before first colon' do + fill_in 'q', with: source_id + click_button 'search' + # expect a single result, but Solr may not finish commit for previous test delete in time + # expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end - before do - item.identification.sourceId # ensure item is created before searching - end + it 'matches without prefix before the first colon' do + fill_in 'q', with: source_id.split(':').last + click_button 'search' + # expect a single result, but Solr may not finish commit for previous test delete in time + # expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end - it 'matches whole string, including prefix before first colon' do - fill_in 'q', with: source_id + it 'matches source_id fragments' do + fragments = [ + 'M2549', + '2022-259', # accession number + 'M2549_2022-259', + 'M2549 2022 259', + 'stertzer', + '259_stertzer', + '259-stertzer', + '259 stertzer' + ] + fragments.each do |fragment| + fill_in 'q', with: fragment click_button 'search' # expect a single result, but Solr may not finish commit for previous test delete in time # expect(page).to have_content('1 entry found') expect(page).to have_css('dd.blacklight-id', text: solr_id) end + end - it 'matches without prefix before the first colon' do - fill_in 'q', with: source_id.split(':').last - click_button 'search' - # expect a single result, but Solr may not finish commit for previous test delete in time - # expect(page).to have_content('1 entry found') - expect(page).to have_css('dd.blacklight-id', text: solr_id) - end + it 'is not case sensitive' do + fill_in 'q', with: 'm2549 STERTZER' + click_button 'search' + # expect a single result, but Solr may not finish commit for previous test delete in time + # expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end - it 'matches source_id fragments' do - fragments = [ - 'M2549', - '2022-259', # accession number - 'M2549_2022-259', - 'M2549 2022 259', - 'stertzer', - '259_stertzer', - '259-stertzer', - '259 stertzer' - ] - fragments.each do |fragment| - fill_in 'q', with: fragment + punctuation_source_ids = [ + 'sulcons:8552-RB_Miscellanies_agabory,Before treatment photos', + 'Archiginnasio:Bassi_Box10_Folder2_Item3.14', + 'Revs:2012-015GHEW-CO-1980-b1_1.16_0007' + ] + punctuation_source_ids.each do |punctuation_source_id| + context "when punctuation in #{punctuation_source_id}" do + let(:source_id) { "#{punctuation_source_id}.#{SecureRandom.alphanumeric(4)}" } + + it 'matches without punctuation' do + fill_in 'q', with: source_id.gsub(/[_\-:.,]/, ' ') click_button 'search' - # expect a single result, but Solr may not finish commit for previous test delete in time - # expect(page).to have_content('1 entry found') expect(page).to have_css('dd.blacklight-id', text: solr_id) end end + end + end + + context 'for barcodes' do + let(:barcode) { '20503740296' } + let(:item) do + FactoryBot.create_for_repository(:persisted_item, identification: { + sourceId: "sul:#{SecureRandom.uuid}", + barcode: + }) + end - it 'is not case sensitive' do - fill_in 'q', with: 'm2549 STERTZER' + it 'matches query with bare and prefixed barcode' do + [barcode, "barcode:#{barcode}"].each do |query| + fill_in 'q', with: query click_button 'search' - # expect a single result, but Solr may not finish commit for previous test delete in time - # expect(page).to have_content('1 entry found') + expect(page).to have_content('1 entry found') expect(page).to have_css('dd.blacklight-id', text: solr_id) end - - punctuation_source_ids = [ - 'sulcons:8552-RB_Miscellanies_agabory,Before treatment photos', - 'Archiginnasio:Bassi_Box10_Folder2_Item3.14', - 'Revs:2012-015GHEW-CO-1980-b1_1.16_0007' - ] - punctuation_source_ids.each do |punctuation_source_id| - context "when punctuation in #{punctuation_source_id}" do - let(:source_id) { "#{punctuation_source_id}.#{SecureRandom.alphanumeric(4)}" } - - it 'matches without punctuation' do - fill_in 'q', with: source_id.gsub(/[_\-:.,]/, ' ') - click_button 'search' - expect(page).to have_css('dd.blacklight-id', text: solr_id) - end - end - end end + end - context 'for barcodes' do - let(:barcode) { '20503740296' } - let(:item) do - FactoryBot.create_for_repository(:persisted_item, identification: { - sourceId: "sul:#{SecureRandom.uuid}", - barcode: - }) - end - - before do - item.identification.barcode # ensure item is created before searching - end - - it 'matches query with bare and prefixed barcode' do - [barcode, "barcode:#{barcode}"].each do |query| - fill_in 'q', with: query - click_button 'search' - expect(page).to have_content('1 entry found') - expect(page).to have_css('dd.blacklight-id', text: solr_id) - end - end + context 'for ILS (folio) identifiers' do + let(:catalog_id) { 'a11403803' } + let(:item) do + FactoryBot.create_for_repository(:persisted_item, identification: { + sourceId: "sul:#{SecureRandom.uuid}", + catalogLinks: [{ + catalog: 'folio', + refresh: false, + catalogRecordId: catalog_id + }] + }) end - context 'for ILS (folio) identifiers' do - let(:catalog_id) { 'a11403803' } - let(:item) do - FactoryBot.create_for_repository(:persisted_item, identification: { - sourceId: "sul:#{SecureRandom.uuid}", - catalogLinks: [{ - catalog: 'folio', - refresh: false, - catalogRecordId: catalog_id - }] - }) - end - - before do - item.identification.catalogLinks # ensure item is created before searching - end - - it 'matches catalog identifier with and without folio prefix' do - [catalog_id, "folio:#{catalog_id}"].each do |query| - fill_in 'q', with: query - click_button 'search' - expect(page).to have_content('1 entry found') - expect(page).to have_css('dd.blacklight-id', text: solr_id) - end + it 'matches catalog identifier with and without folio prefix' do + [catalog_id, "folio:#{catalog_id}"].each do |query| + fill_in 'q', with: query + click_button 'search' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) end end + end - context 'for DOIs' do - # is there a reason to tokenize DOIs? + context 'for DOIs' do + # is there a reason to tokenize DOIs? - it 'matches bare and "doi:" prefixed DOIs' do - skip('write this test') - end + it 'matches bare and "doi:" prefixed DOIs' do + skip('write this test') end end end From dc2842b3bd82a2275ff61f0d8ba3fea624015ecd Mon Sep 17 00:00:00 2001 From: Naomi Dushay Date: Wed, 13 Mar 2024 11:44:12 -0700 Subject: [PATCH 2/2] indexing_tags_spec added --- spec/features/indexing_tags_spec.rb | 120 ++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 spec/features/indexing_tags_spec.rb diff --git a/spec/features/indexing_tags_spec.rb b/spec/features/indexing_tags_spec.rb new file mode 100644 index 000000000..3dc9e4ecf --- /dev/null +++ b/spec/features/indexing_tags_spec.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +require 'rails_helper' + +# Integration tests for expected behaviors of our Solr indexing choices, through +# our whole stack: tests create cocina objects with factories, write them +# to dor-services-app, index the new objects via dor-indexing-app and then use +# the Argo UI to test Solr behavior such as search results and facet values. +# +# tag tests need javascript for facet testing because they are so slow to load in production +# javascript possibly also needed for the edit tags modal +# +# rubocop:disable Capybara/ClickLinkOrButtonStyle +RSpec.describe 'Indexing and search results for tags', :js do + let(:item) { FactoryBot.create_for_repository(:persisted_item) } + let(:solr_id) { item.externalIdentifier } + let(:project_tag) { 'Project : ARS 78s : broken' } + let(:non_project_tag) { 'willet : murder of crows : curlew' } + let(:project_tag2) { 'Project : jira-517' } + let(:blacklight_config) { CatalogController.blacklight_config } + let(:solr_conn) { blacklight_config.repository_class.new(blacklight_config).connection } + + # I wanted to do this as a before(:context) but after much teeth gnashing, I gave up + # That would have facilitated a single setup done once that each test could utilize. + before do + sign_in create(:user), groups: ['sdr:administrator-role'] + solr_conn.commit # ensure no deletes are pending + visit solr_document_path(item.externalIdentifier) + find("a[aria-label='Edit tags']").click + within('#edit-modal') do + click_button '+ Add another tag' + fill_in currently_with: '', with: project_tag + click_button '+ Add another tag' + fill_in currently_with: '', with: non_project_tag + click_button '+ Add another tag' + fill_in currently_with: '', with: project_tag2 + click_button 'Save' + end + click_link_or_button 'Reindex' + # wait for indexing + expect(page).to have_text('Successfully updated index') # rubocop:disable RSpec/ExpectInHook + visit '/' + item.description # ensure item is created before searching + end + + after do + solr_conn.delete_by_id(solr_id) + solr_conn.commit + end + + # one giant it block to reduce the time to run the tests; this is because I + # fussed with before(:context) for some hours and then gave up on it. + it 'searches and facets behave as expected' do + # ------- search behavior -------- + + # project tags values include "Project" in searchable value + fill_in 'q', with: 'Project' + click_button 'search' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + + # project tags are searchable, tokenized + ['ARS', '78s', 'ARS 78s', 'broken', '"78s broken"'].each do |token| + fill_in 'q', with: token + click_button 'search' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end + + # non-project tags tags are searchable, tokenized + ['willet', 'murder of crows', 'murder', 'of', 'crows', 'curlew', '"crows curlew"'].each do |token| + fill_in 'q', with: token + click_button 'search' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end + + # Some Argo accessioneers have developed systems to work around problematic + # searching existing in Argo for years. In some cases there are giant spreadsheets + # with links that depend on old things working. + # + # project tags with spaces around the colon work (searching)' do + # Tag of “Project : jira-517” should match search term of “jira-517” + # We do not need “jira” or “517” to match, but … it can? + ['jira-517', 'jira', '517', '"jira 517"'].each do |token| + fill_in 'q', with: token + click_button 'search' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end + + # ------- facet behavior -------- + + # project tags are a hierarchical facet + fill_in 'q', with: solr_id + click_button 'search' + click_link_or_button 'Project' + # ensure facet has been expanded by javascript + expect(page).to have_css('#facet-exploded_project_tag_ssim') + # Note that "Project" is not indexed as part of facet + click_link_or_button 'ARS 78s' + click_link_or_button 'broken' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + + # non-project tags are a hierarchical facet + fill_in 'q', with: solr_id + click_button 'search' + click_link_or_button 'Tag' + # ensure facet has been expanded by javascript + expect(page).to have_css('#facet-exploded_nonproject_tag_ssim') + click_link_or_button 'willet' + skip 'FIXME: is this failing on spaces in nonproject tag values?' + click_link_or_button 'murder of crows' + click_link_or_button 'curlew' + expect(page).to have_content('1 entry found') + expect(page).to have_css('dd.blacklight-id', text: solr_id) + end +end +# rubocop:enable Capybara/ClickLinkOrButtonStyle