Skip to content

Commit

Permalink
Merge branch 'main' into download-cloud-files-later
Browse files Browse the repository at this point in the history
* main:
  fix oai after application_parser rework (#932)
  move various requires to a central location lib/bulkrax.rb, (#931)
  bump version to 7.0.0
  • Loading branch information
jeremyf committed Mar 8, 2024
2 parents ffb6f60 + ac34c18 commit a7ab196
Show file tree
Hide file tree
Showing 18 changed files with 304 additions and 268 deletions.
4 changes: 2 additions & 2 deletions app/controllers/concerns/bulkrax/datatables_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def format_importers(importers)
{
data: result,
recordsTotal: Bulkrax::Importer.count,
recordsFiltered: importers.size
recordsFiltered: Bulkrax::Importer.count
}
end

Expand All @@ -120,7 +120,7 @@ def format_exporters(exporters)
{
data: result,
recordsTotal: Bulkrax::Exporter.count,
recordsFiltered: exporters.size
recordsFiltered: Bulkrax::Exporter.count
}
end

Expand Down
2 changes: 0 additions & 2 deletions app/matchers/bulkrax/application_matcher.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# frozen_string_literal: true

require 'language_list'

module Bulkrax
class ApplicationMatcher
attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type
Expand Down
2 changes: 0 additions & 2 deletions app/models/bulkrax/csv_entry.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# frozen_string_literal: true

require 'csv'

module Bulkrax
# TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense.
# We do too much in these entry classes. We need to extract the common logic from the various
Expand Down
2 changes: 0 additions & 2 deletions app/models/bulkrax/importer.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# frozen_string_literal: true

require 'iso8601'

module Bulkrax
class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
include Bulkrax::ImporterExporterBehavior
Expand Down
3 changes: 0 additions & 3 deletions app/models/bulkrax/oai_entry.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# frozen_string_literal: true

require 'erb'
require 'ostruct'

module Bulkrax
class OaiEntry < Entry
serialize :raw_metadata, Bulkrax::NormalizedJson
Expand Down
139 changes: 70 additions & 69 deletions app/models/bulkrax/rdf_entry.rb
Original file line number Diff line number Diff line change
@@ -1,91 +1,92 @@
# frozen_string_literal: true

require 'rdf'
module Bulkrax
class RdfEntry < Entry
serialize :raw_metadata, Bulkrax::NormalizedJson
unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true'
module Bulkrax
class RdfEntry < Entry
serialize :raw_metadata, Bulkrax::NormalizedJson

def self.read_data(path)
RDF::Reader.open(path)
end
def self.read_data(path)
RDF::Reader.open(path)
end

def self.fields_from_data(data)
data.predicates.map(&:to_s)
end
def self.fields_from_data(data)
data.predicates.map(&:to_s)
end

def self.data_for_entry(data, source_id, parser)
reader = data
format = reader.class.format.to_sym
collections = []
children = []
delete = nil
data = RDF::Writer.for(format).buffer do |writer|
reader.each_statement do |statement|
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
writer << statement
def self.data_for_entry(data, source_id, parser)
reader = data
format = reader.class.format.to_sym
collections = []
children = []
delete = nil
data = RDF::Writer.for(format).buffer do |writer|
reader.each_statement do |statement|
collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s
children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
writer << statement
end
end
return {
source_id => reader.subjects.first.to_s,
delete: delete,
format: format,
data: data,
collection: collections,
children: children
}
end
return {
source_id => reader.subjects.first.to_s,
delete: delete,
format: format,
data: data,
collection: collections,
children: children
}
end

def self.related_children_parsed_mapping
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
def self.related_children_parsed_mapping
return @related_children_parsed_mapping if @related_children_parsed_mapping.present?

rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
return if rdf_related_children_field_mapping.blank?
rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
return if rdf_related_children_field_mapping.blank?

@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
end
@related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
end

def record
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
end
def record
@record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data'])
end

def build_metadata
raise StandardError, 'Record not found' if record.nil?
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
def build_metadata
raise StandardError, 'Record not found' if record.nil?
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?

self.parsed_metadata = {}
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
self.parsed_metadata = {}
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]

record.each_statement do |statement|
# Only process the subject for our record (in case other data is in the file)
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
add_metadata(statement.predicate.to_s, statement.object.to_s)
end
add_visibility
add_rights_statement
add_admin_set_id
add_collections
add_local
self.parsed_metadata['file'] = self.raw_metadata['file']
record.each_statement do |statement|
# Only process the subject for our record (in case other data is in the file)
next unless statement.subject.to_s == self.raw_metadata[source_identifier]
add_metadata(statement.predicate.to_s, statement.object.to_s)
end
add_visibility
add_rights_statement
add_admin_set_id
add_collections
add_local
self.parsed_metadata['file'] = self.raw_metadata['file']

self.parsed_metadata
end
self.parsed_metadata
end

def collections_created?
return true if self.raw_metadata['collection'].blank?
self.raw_metadata['collection'].length == self.collection_ids.length
end
def collections_created?
return true if self.raw_metadata['collection'].blank?
self.raw_metadata['collection'].length == self.collection_ids.length
end

def find_collection_ids
return self.collection_ids if collections_created?
if self.raw_metadata['collection'].present?
self.raw_metadata['collection'].each do |collection|
c = find_collection(collection)
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
def find_collection_ids
return self.collection_ids if collections_created?
if self.raw_metadata['collection'].present?
self.raw_metadata['collection'].each do |collection|
c = find_collection(collection)
self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
end
end
return self.collection_ids
end
return self.collection_ids
end
end
end
1 change: 0 additions & 1 deletion app/models/bulkrax/xml_entry.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# frozen_string_literal: true

require 'nokogiri'
module Bulkrax
# Generic XML Entry
class XmlEntry < Entry
Expand Down
1 change: 0 additions & 1 deletion app/models/concerns/bulkrax/export_behavior.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# frozen_string_literal: true
require 'marcel'

module Bulkrax
module ExportBehavior
Expand Down
1 change: 0 additions & 1 deletion app/models/concerns/bulkrax/importer_exporter_behavior.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# frozen_string_literal: true
require 'marcel'

module Bulkrax
module ImporterExporterBehavior
Expand Down
24 changes: 18 additions & 6 deletions app/parsers/bulkrax/application_parser.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# frozen_string_literal: true
require 'zip'
require 'marcel'

module Bulkrax
# An abstract class that establishes the API for Bulkrax's import and export parsing.
Expand Down Expand Up @@ -242,16 +240,30 @@ def calculate_type_delay(type)
return 0
end

def record_raw_metadata(record)
record.to_h
end

def record_deleted?(record)
return false unless record.key?(:delete)
ActiveModel::Type::Boolean.new.cast(record[:delete])
end

def record_remove_and_rerun?(record)
return false unless record.key?(:remove_and_rerun)
ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
end

def create_entry_and_job(current_record, type, identifier = nil)
identifier ||= current_record[source_identifier]
new_entry = find_or_create_entry(send("#{type}_entry_class"),
identifier,
'Bulkrax::Importer',
current_record.to_h)
record_raw_metadata(current_record))
new_entry.status_info('Pending', importer.current_run)
if current_record[:delete].present?
if record_deleted?(current_record)
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
elsif current_record[:remove_and_rerun].present? || remove_and_rerun
elsif record_remove_and_rerun?(current_record) || remove_and_rerun
delay = calculate_type_delay(type)
"Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
else
Expand All @@ -260,7 +272,7 @@ def create_entry_and_job(current_record, type, identifier = nil)
end

# Optional, define if using browse everything for file upload
def retrieve_cloud_files(files); end
def retrieve_cloud_files(_files, _importer); end

# @param file [#path, #original_filename] the file object that with the relevant data for the
# import.
Expand Down
Loading

0 comments on commit a7ab196

Please sign in to comment.