This repository has been archived by the owner on Nov 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #602 from Data-Liberation-Front/move-mongooid-mode…
…ls-to-legacy Move mongooid models to legacy
- Loading branch information
Showing
13 changed files
with
570 additions
and
559 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
require 'package_processor' | ||
require 'lite_kitten/lite_kitten' | ||
|
||
class LocalDataset < DataKitten::Dataset | ||
extend DataKitten::PublishingFormats::Datapackage | ||
|
||
def origin | ||
:local | ||
end | ||
|
||
def publishing_format | ||
# A 'package' can be a DataPackage, or it can be any collection of multiple CSVs, | ||
# and/or collection of CSVs and their schema metadata | ||
# ?? is this used in conjunction with package_helper | ||
:datapackage | ||
end | ||
end | ||
|
||
class RemoteDataset < DataKitten::Dataset | ||
|
||
def initialize(url) | ||
@access_url = url | ||
detect_publishing_format | ||
end | ||
|
||
def detect_publishing_format | ||
[ | ||
DataKitten::PublishingFormats::Datapackage, | ||
DataKitten::PublishingFormats::CKAN | ||
].each do |format| | ||
if format.supported?(self) | ||
extend format | ||
break | ||
end | ||
end | ||
end | ||
end | ||
|
||
class Legacy::Package | ||
include Mongoid::Document | ||
store_in collection: "packages" | ||
include Mongoid::Timestamps | ||
|
||
field :url, type: String | ||
field :dataset, type: String | ||
field :type, type: String | ||
|
||
has_many :validations | ||
|
||
def parse_package(dataset, validations) | ||
attributes = { | ||
:url => dataset.origin == :local ? nil : dataset.access_url, | ||
:dataset => Marshal.dump(dataset), | ||
:validations => validations, | ||
:type => dataset.publishing_format | ||
} | ||
|
||
return attributes | ||
end | ||
|
||
def self.process(id, params) | ||
PackageProcessor.new(params, id).process | ||
end | ||
|
||
def create_package(sources, schema_url = nil, schema = nil) | ||
return nil if sources.count == 0 | ||
|
||
if sources.count == 1 && possible_package?(sources.first) | ||
dataset = create_dataset(sources.first) | ||
return create_datapackage(dataset) unless dataset.nil? | ||
end | ||
|
||
update_attributes({ type: set_type(sources) }) | ||
|
||
sources.each do |source| | ||
validations << Validation.create_validation(source, schema_url, schema) | ||
end | ||
|
||
save | ||
self | ||
end | ||
|
||
def create_dataset(source) | ||
if source.respond_to?(:body) | ||
dataset = LocalDataset.new(access_url: source.string_io) | ||
else | ||
dataset = RemoteDataset.new(source) | ||
end | ||
return nil unless [:ckan, :datapackage].include? dataset.publishing_format | ||
dataset | ||
end | ||
|
||
def create_datapackage(dataset) | ||
validations = create_validations(dataset) | ||
|
||
return nil if validations.count == 0 | ||
|
||
update_attributes( parse_package(dataset, validations) ) | ||
save | ||
self | ||
end | ||
|
||
def create_validations(dataset) | ||
validations = [] | ||
dataset.distributions.each do |distribution| | ||
if can_validate?(distribution) | ||
validations << Validation.create_validation(distribution.access_url, nil, create_schema(distribution) ) | ||
end | ||
end | ||
validations | ||
end | ||
|
||
def possible_package?(source) | ||
source.class == String || local_package?( source ) | ||
end | ||
|
||
def local_package?(source) | ||
source.respond_to?(:string_io) && source.filename =~ /datapackage\.json/ | ||
end | ||
|
||
def set_type(sources) | ||
return "files" if sources.first.respond_to?(:tempfile) | ||
return "urls" if sources.first.class == String | ||
end | ||
|
||
def can_validate?(distribution) | ||
return false unless distribution.format.extension == :csv | ||
return distribution.access_url && distribution.access_url =~ /^http(s?)/ | ||
end | ||
|
||
def create_schema(distribution) | ||
unless distribution.schema.nil? | ||
schema = Csvlint::Schema.from_json_table(nil, distribution.schema) | ||
end | ||
return schema | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
class Legacy::Schema | ||
include Mongoid::Document | ||
store_in collection: "schemas" | ||
include Mongoid::Timestamps | ||
|
||
field :url, type: String | ||
|
||
has_many :validations | ||
|
||
def to_param | ||
id.to_s | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
class LevelSummary | ||
include Mongoid::Document | ||
embedded_in :summary | ||
field :errors_breakdown, type: Hash | ||
field :warnings_breakdown, type: Hash | ||
field :info_messages_breakdown, type: Hash | ||
end | ||
|
||
class CategorySummary | ||
include Mongoid::Document | ||
embedded_in :summary | ||
field :structure_breakdown, type: Hash | ||
field :schema_breakdown, type: Hash | ||
field :context_breakdown, type: Hash | ||
end | ||
|
||
class Legacy::Summary | ||
include Mongoid::Document | ||
store_in collection: "summaries" | ||
include Mongoid::Timestamps | ||
|
||
field :sources, type: Integer | ||
field :states, type: Hash | ||
field :hosts, type: Hash | ||
|
||
embeds_one :level_summary | ||
embeds_one :category_summary | ||
|
||
def self.generate | ||
summary = Summary.create | ||
|
||
validations = Validation.where(:url.ne => nil).order_by(:created_at.desc) | ||
# retrieve validations from Mongo Datastore, ordered in reverse by date created | ||
|
||
summary.sources = validations.length | ||
summary.states = {"warnings"=>0, "valid"=>0, "not_found"=>0, "invalid"=>0} | ||
summary.hosts = Hash.new 0 | ||
summary.create_level_summary( errors_breakdown: Hash.new(0), warnings_breakdown: Hash.new(0), info_messages_breakdown: Hash.new(0) ) | ||
summary.create_category_summary( structure_breakdown: Hash.new(0), schema_breakdown: Hash.new(0), context_breakdown: Hash.new(0) ) | ||
|
||
validations.each do |validation| | ||
summary.states[validation.state] += 1 | ||
host = source_host(validation.url) | ||
summary.hosts[host] += 1 unless host.nil? | ||
validator = validation.validator | ||
messages = [] | ||
[:errors, :warnings, :info_messages].each do |level| | ||
unless validator.send(level).nil? | ||
messages = messages + validator.send(level) | ||
validator.send(level).uniq { |m| m.type }.each do |msg| | ||
summary.level_summary.send("#{level}_breakdown".to_sym)[ msg.type ] += 1 | ||
end | ||
end | ||
end | ||
[:structure, :schema, :context].each do |category| | ||
messages.reject {|m| m.category != category }.uniq { |m| m.type }.each do |msg| | ||
summary.category_summary.send("#{category}_breakdown".to_sym)[ msg.type ] += 1 | ||
end | ||
end | ||
end | ||
summary.save | ||
summary | ||
end | ||
|
||
private | ||
|
||
def self.source_host(url) | ||
host = URI.parse(url.to_s).host | ||
return if host.nil? | ||
host.downcase! | ||
host = host.start_with?('www.') ? host[4..-1] : host | ||
#TODO better option? | ||
host.gsub(".", "\uff0e") | ||
end | ||
|
||
end |
Oops, something went wrong.