Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add indexing for Hyrax::FileSet through valkyrie indexing #4780

Merged
merged 2 commits into from
Mar 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion app/actors/hyrax/actors/create_with_remote_files_actor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,18 @@ def attach_files(env, remote_files)
true
end

def create_file_from_url(env, uri, file_name, auth_header)
case env.curation_concern
when Valkyrie::Resource
create_file_from_url_through_valkyrie(env, uri, file_name, auth_header)
else
create_file_from_url_through_active_fedora(env, uri, file_name, auth_header)
end
end

# Generic utility for creating FileSet from a URL
# Used in to import files using URLs from a file picker like browse_everything
def create_file_from_url(env, uri, file_name, auth_header = {})
def create_file_from_url_through_active_fedora(env, uri, file_name, auth_header)
import_url = URI.decode_www_form_component(uri.to_s)
::FileSet.new(import_url: import_url, label: file_name) do |fs|
actor = Hyrax::Actors::FileSetActor.new(fs, env.user)
Expand All @@ -80,6 +89,23 @@ def create_file_from_url(env, uri, file_name, auth_header = {})
end
end

# Generic utility for creating Hyrax::FileSet from a URL
# Used in to import files using URLs from a file picker like browse_everything
def create_file_from_url_through_valkyrie(env, uri, file_name, auth_header)
import_url = URI.decode_www_form_component(uri.to_s)
fs = Hyrax.persister.save(resource: Hyrax::FileSet.new(import_url: import_url, label: file_name))
actor = Hyrax::Actors::FileSetActor.new(fs, env.user, use_valkyrie: true)
actor.create_metadata(visibility: env.curation_concern.visibility)
actor.attach_to_work(env.curation_concern)
if uri.scheme == 'file'
# Turn any %20 into spaces.
file_path = CGI.unescape(uri.path)
IngestLocalFileJob.perform_later(fs, file_path, env.user)
else
ImportUrlJob.perform_later(fs, operation_for(user: actor.user), auth_header)
end
end

def operation_for(user:)
Hyrax::Operation.create!(user: user,
operation_type: "Attach Remote File")
Expand Down
6 changes: 4 additions & 2 deletions app/actors/hyrax/actors/file_actor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class FileActor
# @param [FileSet] file_set the parent FileSet
# @param [Symbol, #to_sym] relation the type/use for the file
# @param [User] user the user to record as the Agent acting upon the file
def initialize(file_set, relation, user, use_valkyrie: false)
def initialize(file_set, relation, user, use_valkyrie: Hyrax.config.query_index_from_valkyrie)
@use_valkyrie = use_valkyrie
@file_set = file_set
@relation = normalize_relation(relation)
Expand Down Expand Up @@ -75,7 +75,7 @@ def perform_ingest_file_through_active_fedora(io)
CharacterizeJob.perform_later(file_set, repository_file.id, pathhint(io))
end

def perform_ingest_file_through_valkyrie(io)
def perform_ingest_file_through_valkyrie(io) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
file =
begin
Hyrax.storage_adapter.upload(resource: file_set, file: io, original_filename: io.original_name, use: relation)
Expand All @@ -87,7 +87,9 @@ def perform_ingest_file_through_valkyrie(io)
create_version(file_metadata, user)
id = file_metadata.file_identifier
file_set.file_ids << id
file_set.original_file_id = id
Hyrax.persister.save(resource: file_set)
Hyrax.publisher.publish('object.metadata.updated', object: file_set, user: user)
no-reply marked this conversation as resolved.
Show resolved Hide resolved
CharacterizeJob.perform_later(file_set, id.to_s, pathhint(io))
file_metadata
end
Expand Down
14 changes: 8 additions & 6 deletions app/actors/hyrax/actors/file_set_actor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class FileSetActor # rubocop:disable Metrics/ClassLength
include Lockable
attr_reader :file_set, :user, :attributes, :use_valkyrie

def initialize(file_set, user, use_valkyrie: false)
def initialize(file_set, user, use_valkyrie: Hyrax.config.query_index_from_valkyrie)
@use_valkyrie = use_valkyrie
@file_set = file_set
@user = user
Expand Down Expand Up @@ -68,7 +68,6 @@ def create_metadata(file_set_params = {})
yield(file_set) if block_given?
end

# Adds a FileSet to the work using ore:Aggregations.
# Locks to ensure that only one process is operating on the list at a time.
def attach_to_work(work, file_set_params = {})
acquire_lock_for(work.id) do
Expand All @@ -87,15 +86,18 @@ def attach_to_work(work, file_set_params = {})
def attach_to_valkyrie_work(work, file_set_params)
work = Hyrax.query_service.find_by(id: work.id) unless work.new_record
file_set.visibility = work.visibility unless assign_visibility?(file_set_params)
Hyrax.persister.save(resource: file_set)
work.member_ids << file_set.id
work.representative_id = file_set.id if work.representative_id.blank?
work.thumbnail_id = file_set.id if work.thumbnail_id.blank?
fs = Hyrax.persister.save(resource: file_set)
Hyrax.publisher.publish('object.metadata.updated', object: fs, user: user)
work.member_ids << fs.id
work.representative_id = fs.id if work.representative_id.blank?
work.thumbnail_id = fs.id if work.thumbnail_id.blank?
# Save the work so the association between the work and the file_set is persisted (head_id)
# NOTE: the work may not be valid, in which case this save doesn't do anything.
Hyrax.persister.save(resource: work)
Hyrax.publisher.publish('object.metadata.updated', object: work, user: user)
no-reply marked this conversation as resolved.
Show resolved Hide resolved
end

# Adds a FileSet to the work using ore:Aggregations.
def attach_to_af_work(work, file_set_params)
work.reload unless work.new_record?
file_set.visibility = work.visibility unless assign_visibility?(file_set_params)
Expand Down
116 changes: 116 additions & 0 deletions app/indexers/hyrax/valkyrie_file_set_indexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# frozen_string_literal: true

module Hyrax
##
# Indexes Hyrax::FileSet objects
class ValkyrieFileSetIndexer < Hyrax::ValkyrieIndexer
include Hyrax::ResourceIndexer
include Hyrax::PermissionIndexer
include Hyrax::VisibilityIndexer
include Hyrax::Indexer(:core_metadata)
include Hyrax::Indexer(:basic_metadata)

# include Hyrax::IndexesThumbnails # TODO: Is there a Valkyrie version of a thumbnail indexer?

def to_solr # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
super.tap do |solr_doc| # rubocop:disable Metrics/BlockLength
# Metadata from the FileSet
solr_doc['file_ids_ssim'] = resource.file_ids&.map(&:to_s)
solr_doc['original_file_id_ssi'] = resource.original_file_id.to_s
solr_doc['thumbnail_id_ssi'] = resource.thumbnail_id.to_s
solr_doc['extracted_text_id_ssi'] = resource.extracted_text_id.to_s

# Add in metadata from the original file.
file_metadata = original_file
return solr_doc unless file_metadata

# Label is the actual file name. It's not editable by the user.
solr_doc['original_file_alternate_ids_tesim'] = file_metadata.alternate_ids&.map(&:to_s) if file_metadata.alternate_ids.present?

solr_doc['original_filename_tesi'] = file_metadata.original_filename if file_metadata.original_filename.present?
solr_doc['original_filename_ssi'] = file_metadata.original_filename if file_metadata.original_filename.present?
solr_doc['mime_type_tesi'] = file_metadata.mime_type if file_metadata.mime_type.present?
solr_doc['mime_type_ssi'] = file_metadata.mime_type if file_metadata.mime_type.present?

solr_doc['file_format_tesim'] = file_format(file_metadata)
solr_doc['file_format_sim'] = file_format(file_metadata)
solr_doc['file_size_lts'] = file_metadata.size[0]
solr_doc['type_tesim'] = file_metadata.type if file_metadata.type.present?

# attributes set by fits
solr_doc['format_label_tesim'] = file_metadata.format_label if file_metadata.format_label.present?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to include all file metadata in the solr_doc? Right now, that is what this does.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻 seems okay to add a lot now and try to tidy up another time.

solr_doc['size_tesim'] = file_metadata.size if file_metadata.size.present?
no-reply marked this conversation as resolved.
Show resolved Hide resolved
solr_doc['well_formed_tesim'] = file_metadata.well_formed if file_metadata.well_formed.present?
solr_doc['valid_tesim'] = file_metadata.valid if file_metadata.valid.present?
solr_doc['fits_version_tesim'] = file_metadata.fits_version if file_metadata.fits_version.present?
solr_doc['exif_version_tesim'] = file_metadata.exif_version if file_metadata.exif_version.present?
solr_doc['checksum_tesim'] = file_metadata.checksum if file_metadata.checksum.present?

# shared attributes across multiple file types
solr_doc['frame_rate_tesim'] = file_metadata.frame_rate if file_metadata.frame_rate.present? # audio, video
solr_doc['bit_rate_tesim'] = file_metadata.bit_rate if file_metadata.bit_rate.present? # audio, video
solr_doc['duration_tesim'] = file_metadata.duration if file_metadata.duration.present? # audio, video
solr_doc['sample_rate_tesim'] = file_metadata.sample_rate if file_metadata.sample_rate.present? # audio, video

solr_doc['height_tesim'] = file_metadata.height if file_metadata.height.present? # image, video
solr_doc['width_tesim'] = file_metadata.width if file_metadata.width.present? # image, video

# attributes set by fits for audio files
solr_doc['bit_depth_tesim'] = file_metadata.bit_depth if file_metadata.bit_depth.present?
solr_doc['channels_tesim'] = file_metadata.channels if file_metadata.channels.present?
solr_doc['data_format_tesim'] = file_metadata.data_format if file_metadata.data_format.present?
solr_doc['offset_tesim'] = file_metadata.offset if file_metadata.offset.present?

# attributes set by fits for documents
solr_doc['file_title_tesim'] = file_metadata.file_title if file_metadata.file_title.present?
solr_doc['page_count_tesim'] = file_metadata.page_count if file_metadata.page_count.present?
solr_doc['language_tesim'] = file_metadata.language if file_metadata.language.present?
solr_doc['word_count_tesim'] = file_metadata.word_count if file_metadata.word_count.present?
solr_doc['character_count_tesim'] = file_metadata.character_count if file_metadata.character_count.present?
solr_doc['line_count_tesim'] = file_metadata.line_count if file_metadata.line_count.present?
solr_doc['character_set_tesim'] = file_metadata.character_set if file_metadata.character_set.present?
solr_doc['markup_basis_tesim'] = file_metadata.markup_basis if file_metadata.markup_basis.present?
solr_doc['paragraph_count_tesim'] = file_metadata.paragraph_count if file_metadata.paragraph_count.present?
solr_doc['markup_language_tesim'] = file_metadata.markup_language if file_metadata.markup_language.present?
solr_doc['table_count_tesim'] = file_metadata.table_count if file_metadata.table_count.present?
solr_doc['graphics_count_tesim'] = file_metadata.graphics_count if file_metadata.graphics_count.present?

# attributes set by fits for images
solr_doc['byte_order_tesim'] = file_metadata.byte_order if file_metadata.byte_order.present?
solr_doc['compression_tesim'] = file_metadata.compression if file_metadata.compression.present?
solr_doc['color_space_tesim'] = file_metadata.color_space if file_metadata.color_space.present?
solr_doc['profile_name_tesim'] = file_metadata.profile_name if file_metadata.profile_name.present?
solr_doc['profile_version_tesim'] = file_metadata.profile_version if file_metadata.profile_version.present?
solr_doc['orientation_tesim'] = file_metadata.orientation if file_metadata.orientation.present?
solr_doc['color_map_tesim'] = file_metadata.color_map if file_metadata.color_map.present?
solr_doc['image_producer_tesim'] = file_metadata.image_producer if file_metadata.image_producer.present?
solr_doc['capture_device_tesim'] = file_metadata.capture_device if file_metadata.capture_device.present?
solr_doc['scanning_software_tesim'] = file_metadata.scanning_software if file_metadata.scanning_software.present?
solr_doc['gps_timestamp_tesim'] = file_metadata.gps_timestamp if file_metadata.gps_timestamp.present?
solr_doc['latitude_tesim'] = file_metadata.latitude if file_metadata.latitude.present?
solr_doc['longitude_tesim'] = file_metadata.longitude if file_metadata.longitude.present?

# attributes set by fits for video
solr_doc['aspect_ratio_tesim'] = file_metadata.aspect_ratio if file_metadata.aspect_ratio.present?
end
end

private

def original_file
Hyrax.custom_queries.find_original_file(file_set: resource)
rescue Valkyrie::Persistence::ObjectNotFoundError
Hyrax.custom_queries.find_files(file_set: resource).first
end

def file_format(file)
if file.mime_type.present? && file.format_label.present?
"#{file.mime_type.split('/').last} (#{file.format_label.join(', ')})"
elsif file.mime_type.present?
file.mime_type.split('/').last
elsif file.format_label.present?
file.format_label
end
end
end
end
7 changes: 6 additions & 1 deletion app/indexers/hyrax/valkyrie_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,12 @@ class << self
# @example
# ValkyrieIndexer.for(resource: Book.new) # => #<BookIndexer ...>
def for(resource:)
indexer_class_for(resource).new(resource: resource)
case resource
when Hyrax::FileSet
Hyrax::ValkyrieFileSetIndexer.new(resource: resource)
else
indexer_class_for(resource).new(resource: resource)
end
end

private
Expand Down
1 change: 1 addition & 0 deletions app/models/hyrax/file_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Hyrax
# @see https://wiki.duraspace.org/display/samvera/Hydra%3A%3AWorks+Shared+Modeling
class FileSet < Hyrax::Resource
include Hyrax::Schema(:core_metadata)
include Hyrax::Schema(:basic_metadata)
no-reply marked this conversation as resolved.
Show resolved Hide resolved

attribute :file_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID) # id for FileMetadata resources
attribute :original_file_id, Valkyrie::Types::ID # id for FileMetadata resource
Expand Down
2 changes: 2 additions & 0 deletions app/models/hyrax/work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class Work < Hyrax::Resource
attribute :on_behalf_of, Valkyrie::Types::String
attribute :proxy_depositor, Valkyrie::Types::String
attribute :state, Valkyrie::Types::URI.default(Hyrax::ResourceStatus::ACTIVE)
attribute :representative_id, Valkyrie::Types::ID
attribute :thumbnail_id, Valkyrie::Types::ID
no-reply marked this conversation as resolved.
Show resolved Hide resolved

##
# @return [Boolean] true
Expand Down
2 changes: 1 addition & 1 deletion app/models/job_io_wrapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def size
nil # unable to determine
end

def file_set(use_valkyrie: false)
def file_set(use_valkyrie: Hyrax.config.query_index_from_valkyrie)
no-reply marked this conversation as resolved.
Show resolved Hide resolved
return FileSet.find(file_set_id) unless use_valkyrie
Hyrax.query_service.find_by(id: Valkyrie::ID.new(file_set_id))
end
Expand Down
3 changes: 2 additions & 1 deletion app/services/hyrax/work_uploads_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def initialize(work:, persister: Hyrax.persister)
# @note we immediately and silently discard uploads with an existing
# file_set_uri, in a half-considered attempt at supporting idempotency
# (for job retries). this is for legacy/AttachFilesToWorkJob
# compatibility, but could stand for a roubst reimplementation.
# compatibility, but could stand for a robust reimplementation.
#
# @param [Enumberable<Hyrax::UploadedFile>] files files to add
#
Expand Down Expand Up @@ -106,6 +106,7 @@ def make_file_set_and_ingest(file)
Hyrax::AccessControlList.copy_permissions(source: target_permissions, target: file_set)
append_to_work(file_set)
IngestJob.perform_later(wrap_file(file, file_set))
Hyrax.publisher.publish('object.metadata.updated', object: file_set, user: file.user)
{ file_set: file_set, user: file.user }
end

Expand Down
2 changes: 2 additions & 0 deletions config/initializers/listeners.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@

Hyrax.config.callback.set(:after_create_fileset, warn: false) do |file_set, user|
Hyrax.publisher.publish('file.set.attached', file_set: file_set, user: user)
Hyrax.publisher.publish('object.metadata.updated', object: file_set, user: user)
end

Hyrax.config.callback.set(:after_revert_content, warn: false) do |file_set, user, revision|
Hyrax.publisher.publish('file.set.restored', file_set: file_set, user: user, revision: revision)
Hyrax.publisher.publish('object.metadata.updated', object: file_set, user: user)
end

Hyrax.config.callback.set(:after_update_metadata, warn: false) do |curation_concern, user|
Expand Down
Loading