Skip to content
This repository has been archived by the owner on Nov 28, 2024. It is now read-only.

Commit

Permalink
Merge pull request #602 from Data-Liberation-Front/move-mongooid-mode…
Browse files Browse the repository at this point in the history
…ls-to-legacy

Move mongooid models to legacy
  • Loading branch information
Floppy authored Apr 16, 2021
2 parents 4512989 + 9cdf871 commit dba2e04
Show file tree
Hide file tree
Showing 13 changed files with 570 additions and 559 deletions.
138 changes: 138 additions & 0 deletions app/models/legacy/package.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
require 'package_processor'
require 'lite_kitten/lite_kitten'

class LocalDataset < DataKitten::Dataset
extend DataKitten::PublishingFormats::Datapackage

def origin
:local
end

def publishing_format
# A 'package' can be a DataPackage, or it can be any collection of multiple CSVs,
# and/or collection of CSVs and their schema metadata
# ?? is this used in conjunction with package_helper
:datapackage
end
end

class RemoteDataset < DataKitten::Dataset

def initialize(url)
@access_url = url
detect_publishing_format
end

def detect_publishing_format
[
DataKitten::PublishingFormats::Datapackage,
DataKitten::PublishingFormats::CKAN
].each do |format|
if format.supported?(self)
extend format
break
end
end
end
end

class Legacy::Package
include Mongoid::Document
store_in collection: "packages"
include Mongoid::Timestamps

field :url, type: String
field :dataset, type: String
field :type, type: String

has_many :validations

def parse_package(dataset, validations)
attributes = {
:url => dataset.origin == :local ? nil : dataset.access_url,
:dataset => Marshal.dump(dataset),
:validations => validations,
:type => dataset.publishing_format
}

return attributes
end

def self.process(id, params)
PackageProcessor.new(params, id).process
end

def create_package(sources, schema_url = nil, schema = nil)
return nil if sources.count == 0

if sources.count == 1 && possible_package?(sources.first)
dataset = create_dataset(sources.first)
return create_datapackage(dataset) unless dataset.nil?
end

update_attributes({ type: set_type(sources) })

sources.each do |source|
validations << Validation.create_validation(source, schema_url, schema)
end

save
self
end

def create_dataset(source)
if source.respond_to?(:body)
dataset = LocalDataset.new(access_url: source.string_io)
else
dataset = RemoteDataset.new(source)
end
return nil unless [:ckan, :datapackage].include? dataset.publishing_format
dataset
end

def create_datapackage(dataset)
validations = create_validations(dataset)

return nil if validations.count == 0

update_attributes( parse_package(dataset, validations) )
save
self
end

def create_validations(dataset)
validations = []
dataset.distributions.each do |distribution|
if can_validate?(distribution)
validations << Validation.create_validation(distribution.access_url, nil, create_schema(distribution) )
end
end
validations
end

def possible_package?(source)
source.class == String || local_package?( source )
end

def local_package?(source)
source.respond_to?(:string_io) && source.filename =~ /datapackage\.json/
end

def set_type(sources)
return "files" if sources.first.respond_to?(:tempfile)
return "urls" if sources.first.class == String
end

def can_validate?(distribution)
return false unless distribution.format.extension == :csv
return distribution.access_url && distribution.access_url =~ /^http(s?)/
end

def create_schema(distribution)
unless distribution.schema.nil?
schema = Csvlint::Schema.from_json_table(nil, distribution.schema)
end
return schema
end

end
14 changes: 14 additions & 0 deletions app/models/legacy/schema.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class Legacy::Schema
include Mongoid::Document
store_in collection: "schemas"
include Mongoid::Timestamps

field :url, type: String

has_many :validations

def to_param
id.to_s
end

end
76 changes: 76 additions & 0 deletions app/models/legacy/summary.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
class LevelSummary
include Mongoid::Document
embedded_in :summary
field :errors_breakdown, type: Hash
field :warnings_breakdown, type: Hash
field :info_messages_breakdown, type: Hash
end

class CategorySummary
include Mongoid::Document
embedded_in :summary
field :structure_breakdown, type: Hash
field :schema_breakdown, type: Hash
field :context_breakdown, type: Hash
end

class Legacy::Summary
include Mongoid::Document
store_in collection: "summaries"
include Mongoid::Timestamps

field :sources, type: Integer
field :states, type: Hash
field :hosts, type: Hash

embeds_one :level_summary
embeds_one :category_summary

def self.generate
summary = Summary.create

validations = Validation.where(:url.ne => nil).order_by(:created_at.desc)
# retrieve validations from Mongo Datastore, ordered in reverse by date created

summary.sources = validations.length
summary.states = {"warnings"=>0, "valid"=>0, "not_found"=>0, "invalid"=>0}
summary.hosts = Hash.new 0
summary.create_level_summary( errors_breakdown: Hash.new(0), warnings_breakdown: Hash.new(0), info_messages_breakdown: Hash.new(0) )
summary.create_category_summary( structure_breakdown: Hash.new(0), schema_breakdown: Hash.new(0), context_breakdown: Hash.new(0) )

validations.each do |validation|
summary.states[validation.state] += 1
host = source_host(validation.url)
summary.hosts[host] += 1 unless host.nil?
validator = validation.validator
messages = []
[:errors, :warnings, :info_messages].each do |level|
unless validator.send(level).nil?
messages = messages + validator.send(level)
validator.send(level).uniq { |m| m.type }.each do |msg|
summary.level_summary.send("#{level}_breakdown".to_sym)[ msg.type ] += 1
end
end
end
[:structure, :schema, :context].each do |category|
messages.reject {|m| m.category != category }.uniq { |m| m.type }.each do |msg|
summary.category_summary.send("#{category}_breakdown".to_sym)[ msg.type ] += 1
end
end
end
summary.save
summary
end

private

def self.source_host(url)
host = URI.parse(url.to_s).host
return if host.nil?
host.downcase!
host = host.start_with?('www.') ? host[4..-1] : host
#TODO better option?
host.gsub(".", "\uff0e")
end

end
Loading

0 comments on commit dba2e04

Please sign in to comment.