Skip to content

Commit

Permalink
Merge pull request #1541 from ubiquitypress/hyku-csv-importer
Browse files Browse the repository at this point in the history
CSVImporter file upload bug fix
orangewolf authored Aug 22, 2018
2 parents bc51174 + 904a369 commit 7526936
Showing 11 changed files with 173 additions and 89 deletions.
5 changes: 4 additions & 1 deletion .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -114,4 +114,7 @@ Performance/RegexpMatch:
Exclude:
- 'lib/importer/csv_parser.rb'
- 'app/services/solr_config_uploader.rb'
- 'app/models/account.rb'
- 'app/models/account.rb'

ClassLength:
Max: 120
Binary file added lib/assets/batch-upload-test/example3.tiff
Binary file not shown.
Binary file added lib/assets/batch-upload-test/sample.docx
Binary file not shown.
3 changes: 3 additions & 0 deletions lib/assets/csv_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,type,title,description,resource_type,contributor,contributor,date_created,file
11111111-2465-1111-5468-000123456789,ETD,Hammock Tacos,,image,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,example3.tiff
22222222-0971-2222-1353-000123456789,ETD,Freegan Intelligentsia,,text,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,sample.docx
1 change: 0 additions & 1 deletion lib/importer/csv_parser.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
module Importer
# rubocop:disable Metrics/ClassLength
class CSVParser
include Enumerable

2 changes: 1 addition & 1 deletion lib/importer/factory/image_factory.rb
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ module Factory
class ImageFactory < ObjectFactory
include WithAssociatedCollection

self.klass = GenericWork
self.klass = Image
# A way to identify objects that are not Hydra minted identifiers
self.system_identifier_field = :identifier

29 changes: 25 additions & 4 deletions lib/importer/factory/object_factory.rb
Original file line number Diff line number Diff line change
@@ -111,16 +111,37 @@ def transform_attributes
.merge(file_attributes)
end

# NOTE: This approach is probably broken since the actor that handled `:files` attribute was removed:
# https://github.com/samvera/hyrax/commit/3f1b58195d4381c51fde8b9149016c5b09f0c9b4
# Find existing file or upload new file. This assumes a Work will have unique file titles;
# could filter by URIs instead (slower).
# When an uploaded_file already exists we do not want to pass its id in `file_attributes`
# otherwise it gets reuploaded by `work_actor`.
def upload_ids
work_files_titles = object.file_sets.map(&:title) if object.present? && object.file_sets.present?
work_files_titles && work_files_titles.include?(attributes[:file]) ? [] : [import_file(file_paths.first)]
end

def file_attributes
files_directory.present? && files.present? ? { files: file_paths } : {}
hash = {}
hash[:uploaded_files] = upload_ids if files_directory.present? && attributes[:file].present?
hash[:remote_files] = attributes[:remote_files] if attributes[:remote_files].present?
hash
end

def file_paths
files.map { |file_name| File.join(files_directory, file_name) }
attributes[:file].map { |file_name| File.join(files_directory, file_name) } if attributes[:file]
end

def import_file(path)
u = Hyrax::UploadedFile.new
u.user_id = User.find_by_user_key(User.batch_user_key).id if User.find_by_user_key(User.batch_user_key)
u.file = CarrierWave::SanitizedFile.new(path)
u.save
u.id
end

## TO DO: handle invalid file in CSV
## currently the importer stops if no file corresponding to a given file_name is found

# Regardless of what the MODS Parser gives us, these are the properties we are prepared to accept.
def permitted_attributes
klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility]
1 change: 1 addition & 0 deletions lib/importer/factory/with_associated_collection.rb
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ def create_attributes
# Strip out the :collection key, and add the member_of_collection_ids,
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
def update_attributes
return super if attributes[:collection].nil?
super.except(:collection).merge(member_of_collection_ids: [collection.id])
end

82 changes: 41 additions & 41 deletions spec/lib/importer/factory/etd_factory_spec.rb
Original file line number Diff line number Diff line change
@@ -2,52 +2,50 @@

RSpec.describe Importer::Factory::ETDFactory, :clean do
let(:factory) { described_class.new(attributes) }
let(:actor) { double }
before do
allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor)
end
let(:files) { [] }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end

context 'with files' do
let(:factory) { described_class.new(attributes, 'tmp/files', files) }
let(:files) { ['img.png'] }
let(:coll) { create(:collection) }

context "for a new image" do
it 'calls the actor with the files' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
let(:work) { GenericWork }

context "for an existing image without files" do
let(:work) { create(:generic_work) }
let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }

it 'creates file sets' do
expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
end
# context 'with files' do
# let(:factory) { described_class.new(attributes, 'tmp/files', files) }
# let(:files) { ['img.png'] }
# let(:coll) { create(:collection) }
#
# context "for a new image" do
# it 'calls the actor with the files' do
# expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
#
# context "for an existing image without files" do
# let(:work) { create(:generic_work) }
# let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }
#
# it 'creates file sets' do
# expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
# end

context 'when a collection already exists' do
let!(:coll) { create(:collection) }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end
let(:actor) { Hyrax::CurationConcern.actor }

it 'does not create a new collection' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
@@ -58,4 +56,6 @@
end.to change(Collection, :count).by(0)
end
end

include_examples("csv_importer")
end
82 changes: 41 additions & 41 deletions spec/lib/importer/factory/image_factory_spec.rb
Original file line number Diff line number Diff line change
@@ -2,52 +2,50 @@

RSpec.describe Importer::Factory::ImageFactory, :clean do
let(:factory) { described_class.new(attributes) }
let(:actor) { double }
before do
allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor)
end
let(:files) { [] }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end

context 'with files' do
let(:factory) { described_class.new(attributes, 'tmp/files', files) }
let(:files) { ['img.png'] }
let!(:coll) { create(:collection) }

context "for a new image" do
it 'creates file sets with access controls' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
let(:work) { Image }

context "for an existing image without files" do
let(:work) { create(:generic_work) }
let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }

it 'creates file sets' do
expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
end
# context 'with files' do
# let(:factory) { described_class.new(attributes, 'tmp/files', files) }
# let(:files) { ['img.png'] }
# let!(:coll) { create(:collection) }
#
# context "for a new image" do
# it 'creates file sets with access controls' do
# expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
#
# context "for an existing image without files" do
# let(:work) { create(:generic_work) }
# let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }
#
# it 'creates file sets' do
# expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
# end

context 'when a collection already exists' do
let!(:coll) { create(:collection) }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end
let(:actor) { Hyrax::CurationConcern.actor }

it 'does not create a new collection' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
@@ -58,4 +56,6 @@
end.to change(Collection, :count).by(0)
end
end

include_examples("csv_importer")
end
57 changes: 57 additions & 0 deletions spec/support/shared_examples_for_csv_importer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
RSpec.shared_examples "csv_importer" do
context "with a file" do
let(:attributes) do
{
id: "123",
title: ["Gluten-free umami"],
file: ["world.png"]
}
end
let(:factory) { described_class.new(attributes, 'spec/fixtures/images') }

before { factory.run }

describe "#run" do
it "uploads the content of the file" do
expect(Hyrax::UploadedFile.last[:file]).to eq("world.png")
end
end

describe "when a work with the same id already exists" do
let(:new_attr) do
{
id: "123",
title: ["Squid tofu banjo"],
file: ["nypl-hydra-of-lerna.jpg"]
}
end

it "updates metadata" do
new_factory = described_class.new(new_attr, 'spec/fixtures/images')
new_factory.run
expect(work.last.title).to eq(["Squid tofu banjo"])
end
end
end

context "without a file" do
## the csv_importer still creates a Work when no file is provided.
## TO DO: handle invalid file in CSV; current behavior:
## the importer stops if no file corresponding to a given file_name is found
let(:attributes) { { id: "345", title: ["Artisan succulents"] } }
let(:factory) { described_class.new(attributes) }

before { factory.run }

it "creates a Work with supplied metadata" do
expect(work.find("345").title).to eq(["Artisan succulents"])
end

it "updates a Work with supplied metadata" do
new_attr = { id: "345", title: ["Retro humblebrag"] }
new_factory = described_class.new(new_attr)
new_factory.run
expect(work.find("345").title).to eq(["Retro humblebrag"])
end
end
end

0 comments on commit 7526936

Please sign in to comment.