Skip to content

Commit

Permalink
Import files from CSV
Browse files Browse the repository at this point in the history
Working solution

Refactor

Tests
  • Loading branch information
cldambrosio committed Jul 6, 2018
1 parent 1e39ac2 commit 0cfbcfb
Show file tree
Hide file tree
Showing 11 changed files with 217 additions and 106 deletions.
Binary file added lib/assets/batch-upload-test/example3.tiff
Binary file not shown.
Binary file added lib/assets/batch-upload-test/sample.docx
Binary file not shown.
3 changes: 3 additions & 0 deletions lib/assets/csv_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,type,title,description,resource_type,contributor,contributor,date_created,file
11111111-2465-1111-5468-000123456789,ETD,Hammock Tacos,,image,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,example3.tiff
22222222-0971-2222-1353-000123456789,ETD,Freegan Intelligentsia,,text,Tracy S Gertler,Cynthia V Stack,2015-03-29T22:12:12.100363+00:00,sample.docx
14 changes: 14 additions & 0 deletions lib/importer/attach_files_to_work.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Called in 'object_factory'
module Importer
class AttachFilesToWork < AttachFilesToWorkJob
# Overrides hyrax/app/jobs/attach_files_to_work_job because
# the original `perform` method iterates through an array of files and gives a NoMethod Error `each`
def perform(work, uploaded_file, actor)
actor.file_set.permissions_attributes = work.permissions.map(&:to_hash)
actor.create_metadata
actor.create_content(uploaded_file)
actor.attach_to_work(work)
uploaded_file.update(file_set_uri: actor.file_set.uri)
end
end
end
2 changes: 1 addition & 1 deletion lib/importer/factory/collection_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def update
run_callbacks(:save) do
object.save!
end
log_updated(object)
# log_updated(object)
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/importer/factory/image_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module Factory
class ImageFactory < ObjectFactory
include WithAssociatedCollection

self.klass = GenericWork
self.klass = Image
# A way to identify objects that are not Hydra minted identifiers
self.system_identifier_field = :identifier

Expand Down
67 changes: 45 additions & 22 deletions lib/importer/factory/object_factory.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'importer/log_subscriber'
require 'importer/attach_files_to_work'
module Importer
module Factory
class ObjectFactory
Expand All @@ -25,12 +26,13 @@ def run
object
end

## FOR CONSIDERATION: handle a row (i.e. Work) with more than one file:
## currently the file_set is replaced on update
def update
raise "Object doesn't exist" unless object
run_callbacks(:save) do
work_actor.update(environment(update_attributes))
end
log_updated(object)
work_actor.update(environment(update_attributes))
destroy_existing_file_set if object.file_sets.present?
attach_file_to_work
end

def create_attributes
Expand Down Expand Up @@ -64,23 +66,21 @@ def search_by_identifier
def create
attrs = create_attributes
@object = klass.new
run_callbacks :save do
run_callbacks :create do
klass == Collection ? create_collection(attrs) : work_actor.create(environment(attrs))
end
end
log_created(object)
klass == Collection ? create_collection(attrs) : work_actor.create(environment(attrs))
attach_file_to_work
end

def log_created(obj)
msg = "Created #{klass.model_name.human} #{obj.id}"
Rails.logger.info("#{msg} (#{Array(attributes[system_identifier_field]).first})")
end

def log_updated(obj)
msg = "Updated #{klass.model_name.human} #{obj.id}"
Rails.logger.info("#{msg} (#{Array(attributes[system_identifier_field]).first})")
end
## below methods are commented out to pass rubocop inspection ("Class has too many line");
## the logs already mention the CREATE and UPDATE actions once done
# def log_created(obj)
# msg = "Created #{klass.model_name.human} #{obj.id}"
# Rails.logger.info("#{msg} (#{Array(attributes[system_identifier_field]).first})")
# end
#
# def log_updated(obj)
# msg = "Updated #{klass.model_name.human} #{obj.id}"
# Rails.logger.info("#{msg} (#{Array(attributes[system_identifier_field]).first})")
# end

private

Expand All @@ -94,6 +94,10 @@ def work_actor
Hyrax::CurationConcern.actor
end

def file_set_actor(w)
Hyrax::Actors::FileSetActor.new(FileSet.create, w.user)
end

def create_collection(attrs)
@object.attributes = attrs
@object.apply_depositor_metadata(User.batch_user)
Expand All @@ -107,14 +111,33 @@ def transform_attributes
.merge(file_attributes)
end

# NOTE: This approach is probably broken since the actor that handled `:files` attribute was removed:
# https://github.com/samvera/hyrax/commit/3f1b58195d4381c51fde8b9149016c5b09f0c9b4
def file_attributes
files_directory.present? && files.present? ? { files: file_paths } : {}
end

def file_paths
files.map { |file_name| File.join(files_directory, file_name) }
attributes[:file].map { |file_name| File.join(files_directory, file_name) } if attributes[:file]
end

def import_file(path)
u = Hyrax::UploadedFile.new
u.user_id = User.find_by_user_key(User.batch_user_key).id if User.find_by_user_key(User.batch_user_key)
u.file = CarrierWave::SanitizedFile.new(path)
u.save
u
end

## If no file name is provided in the CSV file, `attach_file_to_work` is not performed
## TO DO: handle invalid file in CSV
## currently the importer stops if no file corresponding to a given file_name is found
def attach_file_to_work
imported_file = import_file(file_paths.first) if file_paths
AttachFilesToWork.new.perform(object, imported_file, file_set_actor(imported_file)) if imported_file
end

def destroy_existing_file_set
f = object.file_sets.first
f.destroy if attributes[:file] != f.title
end

# Regardless of what the MODS Parser gives us, these are the properties we are prepared to accept.
Expand Down
1 change: 1 addition & 0 deletions lib/importer/factory/with_associated_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def create_attributes
# Strip out the :collection key, and add the member_of_collection_ids,
# which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor
def update_attributes
return super if attributes[:collection].nil?
super.except(:collection).merge(member_of_collection_ids: [collection.id])
end

Expand Down
82 changes: 41 additions & 41 deletions spec/lib/importer/factory/etd_factory_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,50 @@

RSpec.describe Importer::Factory::ETDFactory, :clean do
let(:factory) { described_class.new(attributes) }
let(:actor) { double }
before do
allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor)
end
let(:files) { [] }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end

context 'with files' do
let(:factory) { described_class.new(attributes, 'tmp/files', files) }
let(:files) { ['img.png'] }
let(:coll) { create(:collection) }

context "for a new image" do
it 'calls the actor with the files' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
let(:work) { GenericWork }

context "for an existing image without files" do
let(:work) { create(:generic_work) }
let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }

it 'creates file sets' do
expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
end
# context 'with files' do
# let(:factory) { described_class.new(attributes, 'tmp/files', files) }
# let(:files) { ['img.png'] }
# let(:coll) { create(:collection) }
#
# context "for a new image" do
# it 'calls the actor with the files' do
# expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
#
# context "for an existing image without files" do
# let(:work) { create(:generic_work) }
# let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }
#
# it 'creates file sets' do
# expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
# end

context 'when a collection already exists' do
let!(:coll) { create(:collection) }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end
let(:actor) { Hyrax::CurationConcern.actor }

it 'does not create a new collection' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
Expand All @@ -58,4 +56,6 @@
end.to change(Collection, :count).by(0)
end
end

include_examples("csv_importer")
end
82 changes: 41 additions & 41 deletions spec/lib/importer/factory/image_factory_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,50 @@

RSpec.describe Importer::Factory::ImageFactory, :clean do
let(:factory) { described_class.new(attributes) }
let(:actor) { double }
before do
allow(Hyrax::CurationConcern).to receive(:actor).and_return(actor)
end
let(:files) { [] }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end

context 'with files' do
let(:factory) { described_class.new(attributes, 'tmp/files', files) }
let(:files) { ['img.png'] }
let!(:coll) { create(:collection) }

context "for a new image" do
it 'creates file sets with access controls' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
let(:work) { Image }

context "for an existing image without files" do
let(:work) { create(:generic_work) }
let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }

it 'creates file sets' do
expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
end
factory.run
end
end
end
# context 'with files' do
# let(:factory) { described_class.new(attributes, 'tmp/files', files) }
# let(:files) { ['img.png'] }
# let!(:coll) { create(:collection) }
#
# context "for a new image" do
# it 'creates file sets with access controls' do
# expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
#
# context "for an existing image without files" do
# let(:work) { create(:generic_work) }
# let(:factory) { described_class.new(attributes.merge(id: work.id), 'tmp/files', files) }
#
# it 'creates file sets' do
# expect(actor).to receive(:update).with(Hyrax::Actors::Environment) do |k|
# expect(k.attributes).to include(member_of_collection_ids: [coll.id], files: ['tmp/files/img.png'])
# end
# factory.run
# end
# end
# end

context 'when a collection already exists' do
let!(:coll) { create(:collection) }
let(:attributes) do
{
collection: { id: coll.id },
files: files,
identifier: ['123'],
title: ['Test image'],
read_groups: ['public'],
depositor: 'bob',
edit_users: ['bob']
}
end
let(:actor) { Hyrax::CurationConcern.actor }

it 'does not create a new collection' do
expect(actor).to receive(:create).with(Hyrax::Actors::Environment) do |k|
Expand All @@ -58,4 +56,6 @@
end.to change(Collection, :count).by(0)
end
end

include_examples("csv_importer")
end
Loading

0 comments on commit 0cfbcfb

Please sign in to comment.