diff --git a/app/controllers/concerns/bulkrax/datatables_behavior.rb b/app/controllers/concerns/bulkrax/datatables_behavior.rb index d130de6df..240d0bf43 100644 --- a/app/controllers/concerns/bulkrax/datatables_behavior.rb +++ b/app/controllers/concerns/bulkrax/datatables_behavior.rb @@ -103,7 +103,7 @@ def format_importers(importers) { data: result, recordsTotal: Bulkrax::Importer.count, - recordsFiltered: importers.size + recordsFiltered: Bulkrax::Importer.count } end @@ -120,7 +120,7 @@ def format_exporters(exporters) { data: result, recordsTotal: Bulkrax::Exporter.count, - recordsFiltered: exporters.size + recordsFiltered: Bulkrax::Exporter.count } end diff --git a/app/matchers/bulkrax/application_matcher.rb b/app/matchers/bulkrax/application_matcher.rb index da1b6ebc3..0f7c7ccab 100644 --- a/app/matchers/bulkrax/application_matcher.rb +++ b/app/matchers/bulkrax/application_matcher.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'language_list' - module Bulkrax class ApplicationMatcher attr_accessor :to, :from, :parsed, :if, :split, :excluded, :nested_type diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb index f8573c586..55d28a69c 100644 --- a/app/models/bulkrax/csv_entry.rb +++ b/app/models/bulkrax/csv_entry.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'csv' - module Bulkrax # TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. # We do too much in these entry classes. We need to extract the common logic from the various diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index e24320380..ef60c0805 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'iso8601' - module Bulkrax class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength include Bulkrax::ImporterExporterBehavior diff --git a/app/models/bulkrax/oai_entry.rb b/app/models/bulkrax/oai_entry.rb index 5a87013fd..17a5f9c49 100644 --- a/app/models/bulkrax/oai_entry.rb +++ b/app/models/bulkrax/oai_entry.rb @@ -1,8 +1,5 @@ # frozen_string_literal: true -require 'erb' -require 'ostruct' - module Bulkrax class OaiEntry < Entry serialize :raw_metadata, Bulkrax::NormalizedJson diff --git a/app/models/bulkrax/rdf_entry.rb b/app/models/bulkrax/rdf_entry.rb index bb2426615..3af03ca19 100644 --- a/app/models/bulkrax/rdf_entry.rb +++ b/app/models/bulkrax/rdf_entry.rb @@ -1,91 +1,92 @@ # frozen_string_literal: true -require 'rdf' -module Bulkrax - class RdfEntry < Entry - serialize :raw_metadata, Bulkrax::NormalizedJson +unless ENV.fetch('BULKRAX_NO_RDF', 'false').to_s == 'true' + module Bulkrax + class RdfEntry < Entry + serialize :raw_metadata, Bulkrax::NormalizedJson - def self.read_data(path) - RDF::Reader.open(path) - end + def self.read_data(path) + RDF::Reader.open(path) + end - def self.fields_from_data(data) - data.predicates.map(&:to_s) - end + def self.fields_from_data(data) + data.predicates.map(&:to_s) + end - def self.data_for_entry(data, source_id, parser) - reader = data - format = reader.class.format.to_sym - collections = [] - children = [] - delete = nil - data = RDF::Writer.for(format).buffer do |writer| - reader.each_statement do |statement| - collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s - children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s - delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s) - writer << statement + def self.data_for_entry(data, source_id, parser) + reader = data + format = reader.class.format.to_sym + collections = [] + children = [] + delete = nil + data = RDF::Writer.for(format).buffer do |writer| + reader.each_statement do |statement| + collections << statement.object.to_s if parent_field(parser).present? && parent_field(parser) == statement.predicate.to_s + children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s + delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s) + writer << statement + end end + return { + source_id => reader.subjects.first.to_s, + delete: delete, + format: format, + data: data, + collection: collections, + children: children + } end - return { - source_id => reader.subjects.first.to_s, - delete: delete, - format: format, - data: data, - collection: collections, - children: children - } - end - def self.related_children_parsed_mapping - return @related_children_parsed_mapping if @related_children_parsed_mapping.present? + def self.related_children_parsed_mapping + return @related_children_parsed_mapping if @related_children_parsed_mapping.present? - rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') } - return if rdf_related_children_field_mapping.blank? + rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') } + return if rdf_related_children_field_mapping.blank? - @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first - end + @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first + end - def record - @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data']) - end + def record + @record ||= RDF::Reader.for(self.raw_metadata['format'].to_sym).new(self.raw_metadata['data']) + end - def build_metadata - raise StandardError, 'Record not found' if record.nil? - raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank? + def build_metadata + raise StandardError, 'Record not found' if record.nil? + raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank? - self.parsed_metadata = {} - self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]] + self.parsed_metadata = {} + self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]] - record.each_statement do |statement| - # Only process the subject for our record (in case other data is in the file) - next unless statement.subject.to_s == self.raw_metadata[source_identifier] - add_metadata(statement.predicate.to_s, statement.object.to_s) - end - add_visibility - add_rights_statement - add_admin_set_id - add_collections - add_local - self.parsed_metadata['file'] = self.raw_metadata['file'] + record.each_statement do |statement| + # Only process the subject for our record (in case other data is in the file) + next unless statement.subject.to_s == self.raw_metadata[source_identifier] + add_metadata(statement.predicate.to_s, statement.object.to_s) + end + add_visibility + add_rights_statement + add_admin_set_id + add_collections + add_local + self.parsed_metadata['file'] = self.raw_metadata['file'] - self.parsed_metadata - end + self.parsed_metadata + end - def collections_created? - return true if self.raw_metadata['collection'].blank? - self.raw_metadata['collection'].length == self.collection_ids.length - end + def collections_created? + return true if self.raw_metadata['collection'].blank? + self.raw_metadata['collection'].length == self.collection_ids.length + end - def find_collection_ids - return self.collection_ids if collections_created? - if self.raw_metadata['collection'].present? - self.raw_metadata['collection'].each do |collection| - c = find_collection(collection) - self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id) + def find_collection_ids + return self.collection_ids if collections_created? + if self.raw_metadata['collection'].present? + self.raw_metadata['collection'].each do |collection| + c = find_collection(collection) + self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id) + end end + return self.collection_ids end - return self.collection_ids end end end diff --git a/app/models/bulkrax/xml_entry.rb b/app/models/bulkrax/xml_entry.rb index ad5f5540e..64f940d37 100644 --- a/app/models/bulkrax/xml_entry.rb +++ b/app/models/bulkrax/xml_entry.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require 'nokogiri' module Bulkrax # Generic XML Entry class XmlEntry < Entry diff --git a/app/models/concerns/bulkrax/export_behavior.rb b/app/models/concerns/bulkrax/export_behavior.rb index cfafe279d..e8a9f499d 100644 --- a/app/models/concerns/bulkrax/export_behavior.rb +++ b/app/models/concerns/bulkrax/export_behavior.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require 'marcel' module Bulkrax module ExportBehavior diff --git a/app/models/concerns/bulkrax/importer_exporter_behavior.rb b/app/models/concerns/bulkrax/importer_exporter_behavior.rb index 2ad20a8d8..f14dbdd65 100644 --- a/app/models/concerns/bulkrax/importer_exporter_behavior.rb +++ b/app/models/concerns/bulkrax/importer_exporter_behavior.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require 'marcel' module Bulkrax module ImporterExporterBehavior diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index d2b603ee3..c0302008e 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -1,6 +1,4 @@ # frozen_string_literal: true -require 'zip' -require 'marcel' module Bulkrax # An abstract class that establishes the API for Bulkrax's import and export parsing. @@ -242,16 +240,30 @@ def calculate_type_delay(type) return 0 end + def record_raw_metadata(record) + record.to_h + end + + def record_deleted?(record) + return false unless record.key?(:delete) + ActiveModel::Type::Boolean.new.cast(record[:delete]) + end + + def record_remove_and_rerun?(record) + return false unless record.key?(:remove_and_rerun) + ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun]) + end + def create_entry_and_job(current_record, type, identifier = nil) identifier ||= current_record[source_identifier] new_entry = find_or_create_entry(send("#{type}_entry_class"), identifier, 'Bulkrax::Importer', - current_record.to_h) + record_raw_metadata(current_record)) new_entry.status_info('Pending', importer.current_run) - if current_record[:delete].present? + if record_deleted?(current_record) "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) - elsif current_record[:remove_and_rerun].present? || remove_and_rerun + elsif record_remove_and_rerun?(current_record) || remove_and_rerun delay = calculate_type_delay(type) "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run) else @@ -260,7 +272,7 @@ def create_entry_and_job(current_record, type, identifier = nil) end # Optional, define if using browse everything for file upload - def retrieve_cloud_files(files); end + def retrieve_cloud_files(_files, _importer); end # @param file [#path, #original_filename] the file object that with the relevant data for the # import. diff --git a/app/parsers/bulkrax/bagit_parser.rb b/app/parsers/bulkrax/bagit_parser.rb index a937f276b..a7f2bbd55 100644 --- a/app/parsers/bulkrax/bagit_parser.rb +++ b/app/parsers/bulkrax/bagit_parser.rb @@ -1,223 +1,224 @@ # frozen_string_literal: true -require 'bagit' -module Bulkrax - class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength - include ExportBehavior +unless ENV.fetch('BULKRAX_NO_BAGIT', 'false').to_s == 'true' + module Bulkrax + class BagitParser < CsvParser # rubocop:disable Metrics/ClassLength + include ExportBehavior - def self.export_supported? - true - end - - def valid_import? - return true if import_fields.present? - rescue => e - set_status_info(e) - false - end + def self.export_supported? + true + end - def entry_class - rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry" - rdf_format ? RdfEntry : CsvEntry - end + def valid_import? + return true if import_fields.present? + rescue => e + set_status_info(e) + false + end - def path_to_files(filename:) - @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first - end + def entry_class + rdf_format = parser_fields&.[]('metadata_format') == "Bulkrax::RdfEntry" + rdf_format ? RdfEntry : CsvEntry + end - # Take a random sample of 10 metadata_paths and work out the import fields from that - def import_fields - raise StandardError, 'No metadata files were found' if metadata_paths.blank? - @import_fields ||= metadata_paths.sample(10).map do |path| - entry_class.fields_from_data(entry_class.read_data(path)) - end.flatten.compact.uniq - end + def path_to_files(filename:) + @path_to_files ||= Dir.glob(File.join(import_file_path, '**/data', filename)).first + end - # Create an Array of all metadata records - def records(_opts = {}) - raise StandardError, 'No BagIt records were found' if bags.blank? - @records ||= bags.map do |bag| - path = metadata_path(bag) - raise StandardError, 'No metadata files were found' if path.blank? - data = entry_class.read_data(path) - get_data(bag, data) + # Take a random sample of 10 metadata_paths and work out the import fields from that + def import_fields + raise StandardError, 'No metadata files were found' if metadata_paths.blank? + @import_fields ||= metadata_paths.sample(10).map do |path| + entry_class.fields_from_data(entry_class.read_data(path)) + end.flatten.compact.uniq end - @records = @records.flatten - end + # Create an Array of all metadata records + def records(_opts = {}) + raise StandardError, 'No BagIt records were found' if bags.blank? + @records ||= bags.map do |bag| + path = metadata_path(bag) + raise StandardError, 'No metadata files were found' if path.blank? + data = entry_class.read_data(path) + get_data(bag, data) + end + + @records = @records.flatten + end - def get_data(bag, data) - if entry_class == CsvEntry - data = data.map do |data_row| - record_data = entry_class.data_for_entry(data_row, source_identifier, self) - next record_data if importerexporter.metadata_only? + def get_data(bag, data) + if entry_class == CsvEntry + data = data.map do |data_row| + record_data = entry_class.data_for_entry(data_row, source_identifier, self) + next record_data if importerexporter.metadata_only? - record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize - record_data + record_data[:file] = bag.bag_files.join('|') if Bulkrax.curation_concerns.include? record_data[:model]&.constantize + record_data + end + else + data = entry_class.data_for_entry(data, source_identifier, self) + data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only? end - else - data = entry_class.data_for_entry(data, source_identifier, self) - data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only? + + data end - data - end + # export methods - # export methods + # rubocop:disable Metrics/MethodLength, Metrics/AbcSize + def write_files + require 'open-uri' + require 'socket' - # rubocop:disable Metrics/MethodLength, Metrics/AbcSize - def write_files - require 'open-uri' - require 'socket' + folder_count = 1 + records_in_folder = 0 + work_entries = importerexporter.entries.where(type: work_entry_class.to_s) + collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s) + file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s) - folder_count = 1 - records_in_folder = 0 - work_entries = importerexporter.entries.where(type: work_entry_class.to_s) - collection_entries = importerexporter.entries.where(type: collection_entry_class.to_s) - file_set_entries = importerexporter.entries.where(type: file_set_entry_class.to_s) + work_entries[0..limit || total].each do |entry| + record = ActiveFedora::Base.find(entry.identifier) + next unless record - work_entries[0..limit || total].each do |entry| - record = ActiveFedora::Base.find(entry.identifier) - next unless record + bag_entries = [entry] - bag_entries = [entry] + if record.member_of_collection_ids.present? + collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) } + end - if record.member_of_collection_ids.present? - collection_entries.each { |ce| bag_entries << ce if ce.parsed_metadata.value?(record.id) } - end + if record.file_sets.present? + file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) } + end - if record.file_sets.present? - file_set_entries.each { |fse| bag_entries << fse if fse.parsed_metadata.value?(record.id) } - end + records_in_folder += bag_entries.count + if records_in_folder > records_split_count + folder_count += 1 + records_in_folder = bag_entries.count + end - records_in_folder += bag_entries.count - if records_in_folder > records_split_count - folder_count += 1 - records_in_folder = bag_entries.count - end + bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier) + + record.file_sets.each do |fs| + file_name = filename(fs) + next if file_name.blank? || fs.original_file.blank? + + io = open(fs.original_file.uri) + file = Tempfile.new([file_name, File.extname(file_name)], binmode: true) + file.write(io.read) + file.close + begin + bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank? + rescue => e + entry.set_status_info(e) + set_status_info(e) + end + end - bag ||= BagIt::Bag.new setup_bagit_folder(folder_count, entry.identifier) - - record.file_sets.each do |fs| - file_name = filename(fs) - next if file_name.blank? || fs.original_file.blank? - - io = open(fs.original_file.uri) - file = Tempfile.new([file_name, File.extname(file_name)], binmode: true) - file.write(io.read) - file.close - begin - bag.add_file(file_name, file.path) if bag.bag_files.select { |b| b.include?(file_name) }.blank? - rescue => e - entry.set_status_info(e) - set_status_info(e) + CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv| + bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata } end - end - CSV.open(setup_csv_metadata_export_file(folder_count, entry.identifier), "w", headers: export_headers, write_headers: true) do |csv| - bag_entries.each { |csv_entry| csv << csv_entry.parsed_metadata } + write_triples(folder_count, entry) + bag.manifest!(algo: 'sha256') end - - write_triples(folder_count, entry) - bag.manifest!(algo: 'sha256') end - end - # rubocop:enable Metrics/MethodLength, Metrics/AbcSize + # rubocop:enable Metrics/MethodLength, Metrics/AbcSize - def setup_csv_metadata_export_file(folder_count, id) - path = File.join(importerexporter.exporter_export_path, folder_count.to_s) - FileUtils.mkdir_p(path) unless File.exist?(path) + def setup_csv_metadata_export_file(folder_count, id) + path = File.join(importerexporter.exporter_export_path, folder_count.to_s) + FileUtils.mkdir_p(path) unless File.exist?(path) - File.join(path, id, 'metadata.csv') - end + File.join(path, id, 'metadata.csv') + end - def key_allowed(key) - !Bulkrax.reserved_properties.include?(key) && - new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) && - key != source_identifier.to_s - end + def key_allowed(key) + !Bulkrax.reserved_properties.include?(key) && + new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) && + key != source_identifier.to_s + end - def setup_triple_metadata_export_file(folder_count, id) - path = File.join(importerexporter.exporter_export_path, folder_count.to_s) - FileUtils.mkdir_p(path) unless File.exist?(path) + def setup_triple_metadata_export_file(folder_count, id) + path = File.join(importerexporter.exporter_export_path, folder_count.to_s) + FileUtils.mkdir_p(path) unless File.exist?(path) - File.join(path, id, 'metadata.nt') - end + File.join(path, id, 'metadata.nt') + end - def setup_bagit_folder(folder_count, id) - path = File.join(importerexporter.exporter_export_path, folder_count.to_s) - FileUtils.mkdir_p(path) unless File.exist?(path) + def setup_bagit_folder(folder_count, id) + path = File.join(importerexporter.exporter_export_path, folder_count.to_s) + FileUtils.mkdir_p(path) unless File.exist?(path) - File.join(path, id) - end + File.join(path, id) + end - # @todo(bjustice) - remove hyrax reference - def write_triples(folder_count, e) - sd = SolrDocument.find(e.identifier) - return if sd.nil? + # @todo(bjustice) - remove hyrax reference + def write_triples(folder_count, e) + sd = SolrDocument.find(e.identifier) + return if sd.nil? - req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname }) - rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples) - File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples| - triples.write(rdf) + req = ActionDispatch::Request.new({ 'HTTP_HOST' => Socket.gethostname }) + rdf = Hyrax::GraphExporter.new(sd, req).fetch.dump(:ntriples) + File.open(setup_triple_metadata_export_file(folder_count, e.identifier), "w") do |triples| + triples.write(rdf) + end end - end - # @todo - investigate getting directory structure - # @todo - investigate using perform_later, and having the importer check for - # DownloadCloudFileJob before it starts - def retrieve_cloud_files(files, _importer) - # There should only be one zip file for Bagit, take the first - return if files['0'].blank? - target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_')) - # Now because we want the files in place before the importer runs - Bulkrax::DownloadCloudFileJob.perform_later(files['0'], target_file) - return target_file - end + # @todo - investigate getting directory structure + # @todo - investigate using perform_later, and having the importer check for + # DownloadCloudFileJob before it starts + def retrieve_cloud_files(files) + # There should only be one zip file for Bagit, take the first + return if files['0'].blank? + target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_')) + # Now because we want the files in place before the importer runs + Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file) + return target_file + end - private + private - def bags - return @bags if @bags.present? - new_bag = bag(import_file_path) - @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) } - @bags.delete(nil) - raise StandardError, 'No valid bags found' if @bags.blank? - return @bags - end + def bags + return @bags if @bags.present? + new_bag = bag(import_file_path) + @bags = new_bag ? [new_bag] : Dir.glob("#{import_file_path}/**/*").map { |d| bag(d) } + @bags.delete(nil) + raise StandardError, 'No valid bags found' if @bags.blank? + return @bags + end - # Gather the paths to all bags; skip any stray files - def bag_paths - bags.map(&:bag_dir) - end + # Gather the paths to all bags; skip any stray files + def bag_paths + bags.map(&:bag_dir) + end - def metadata_file_name - raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank? - parser_fields['metadata_file_name'] - end + def metadata_file_name + raise StandardError, 'The metadata file name must be specified' if parser_fields['metadata_file_name'].blank? + parser_fields['metadata_file_name'] + end - # Gather the paths to all metadata files matching the metadata_file_name - def metadata_paths - @metadata_paths ||= bag_paths.map do |b| - Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) } - end.flatten.compact - end + # Gather the paths to all metadata files matching the metadata_file_name + def metadata_paths + @metadata_paths ||= bag_paths.map do |b| + Dir.glob("#{b}/**/*").select { |f| File.file?(f) && f.ends_with?(metadata_file_name) } + end.flatten.compact + end - def metadata_path(bag) - Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) } - end + def metadata_path(bag) + Dir.glob("#{bag.bag_dir}/**/*").detect { |f| File.file?(f) && f.ends_with?(metadata_file_name) } + end - def bag(path) - return nil unless path && File.exist?(File.join(path, 'bagit.txt')) - bag = BagIt::Bag.new(path) - return nil unless bag.valid? - bag - end + def bag(path) + return nil unless path && File.exist?(File.join(path, 'bagit.txt')) + bag = BagIt::Bag.new(path) + return nil unless bag.valid? + bag + end - # use the version of this method from the application parser instead - def real_import_file_path - return importer_unzip_path if file? && zip? - parser_fields['import_file_path'] + # use the version of this method from the application parser instead + def real_import_file_path + return importer_unzip_path if file? && zip? + parser_fields['import_file_path'] + end end end end diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index 9aeb6dcc5..67ef41f3c 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require 'csv' module Bulkrax class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength include ErroredEntries diff --git a/app/parsers/bulkrax/oai_dc_parser.rb b/app/parsers/bulkrax/oai_dc_parser.rb index 03a3a663d..886560f1d 100644 --- a/app/parsers/bulkrax/oai_dc_parser.rb +++ b/app/parsers/bulkrax/oai_dc_parser.rb @@ -105,6 +105,24 @@ def create_works set_status_info(e) end + def create_file_sets; end + + def create_relationships + ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id) + end + + def record_raw_metadata(_record) + nil + end + + def record_deleted?(_record) + false + end + + def record_remove_and_rerun?(_record) + false + end + # oai records so not let us set the source identifier easily def record_has_source_identifier(record, index) identifier = record.send(source_identifier) diff --git a/app/parsers/bulkrax/xml_parser.rb b/app/parsers/bulkrax/xml_parser.rb index a5854e3c2..0e9a33259 100644 --- a/app/parsers/bulkrax/xml_parser.rb +++ b/app/parsers/bulkrax/xml_parser.rb @@ -1,6 +1,4 @@ # frozen_string_literal: true -require 'marcel' - module Bulkrax class XmlParser < ApplicationParser def entry_class diff --git a/bulkrax.gemspec b/bulkrax.gemspec index adf49b707..6662a86c2 100644 --- a/bulkrax.gemspec +++ b/bulkrax.gemspec @@ -22,6 +22,7 @@ Gem::Specification.new do |s| s.add_dependency 'bagit', '~> 0.4' s.add_dependency 'coderay' s.add_dependency 'denormalize_fields' + s.add_dependency 'marcel' s.add_dependency 'iso8601', '~> 0.9.0' s.add_dependency 'kaminari' s.add_dependency 'language_list', '~> 1.2', '>= 1.2.1' diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index 1dbf40b60..8c201f8ee 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -3,8 +3,27 @@ require "bulkrax/version" require "bulkrax/engine" require 'active_support/all' + require 'coderay' +require 'csv' require 'denormalize_fields' +require 'erb' +require 'iso8601' +require 'language_list' +require 'marcel' +require 'nokogiri' +require 'ostruct' +require 'zip' + +def conditional_require(gem_name) + require gem_name +rescue LoadError + ENV["BULKRAX_NO_#{gem_name.upcase}"] = 'true' +end + +conditional_require 'bagit' +conditional_require 'rdf' + # rubocop:disable Metrics/ModuleLength module Bulkrax extend self # rubocop:disable Style/ModuleFunction diff --git a/lib/bulkrax/version.rb b/lib/bulkrax/version.rb index ea78a8e7b..c1f3be2e3 100644 --- a/lib/bulkrax/version.rb +++ b/lib/bulkrax/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Bulkrax - VERSION = '6.0.1' + VERSION = '7.0.0' end diff --git a/spec/parsers/bulkrax/bagit_parser_spec.rb b/spec/parsers/bulkrax/bagit_parser_spec.rb index 4ca1ae265..beec3052b 100644 --- a/spec/parsers/bulkrax/bagit_parser_spec.rb +++ b/spec/parsers/bulkrax/bagit_parser_spec.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'rails_helper' -require 'bagit' module Bulkrax RSpec.describe BagitParser do