Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
liamwhite committed Apr 22, 2024
2 parents 402a965 + 3f1f208 commit ec04d55
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 11 deletions.
38 changes: 29 additions & 9 deletions index/images.mk
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ all: import_es
import_es: dump_jsonl
$(ELASTICDUMP) --input=images.jsonl --output=http://localhost:9200/ --output-index=images --limit 10000 --retryAttempts=5 --type=data --transform="doc._source = Object.assign({},doc); doc._id = doc.id"

dump_jsonl: metadata true_uploaders uploaders deleters galleries tags hides upvotes downvotes faves tag_names
dump_jsonl: metadata true_uploaders uploaders deleters galleries tags sources hides upvotes downvotes faves tag_names
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'copy (select temp_images.jsonb_object_agg(object) from temp_images.image_search_json group by image_id) to stdout;' > images.jsonl
psql $(DATABASE) -v ON_ERROR_STOP=1 <<< 'drop schema temp_images cascade;'
sed -i images.jsonl -e 's/\\\\/\\/g'

metadata: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select id, jsonb_build_object(
'approved', approved,
'animated', is_animated,
'anonymous', anonymous,
'aspect_ratio', nullif(image_aspect_ratio, 'NaN'::float8),
'comment_count', comments_count,
Expand All @@ -23,6 +25,7 @@ metadata: image_search_json
'description', description,
'downvotes', downvotes_count,
'duplicate_id', duplicate_id,
'duration', (case when is_animated then image_duration else 0::float end),
'faves', faves_count,
'file_name', image_name,
'fingerprint', fingerprint,
Expand All @@ -35,10 +38,11 @@ metadata: image_search_json
'orig_sha512_hash', image_orig_sha512_hash,
'original_format', image_format,
'pixels', cast(image_width as bigint)*cast(image_height as bigint),
'processed', processed,
'score', score,
'size', image_size,
'sha512_hash', image_sha512_hash,
'source_url', lower(source_url),
'thumbnails_generated', thumbnails_generated,
'updated_at', updated_at,
'upvotes', upvotes_count,
'width', image_width,
Expand All @@ -64,33 +68,49 @@ deleters: image_search_json
galleries: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_interactions', jsonb_agg(jsonb_build_object('id', gi.gallery_id, 'position', gi.position))) from gallery_interactions gi group by image_id;
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_id', jsonb_agg(gi.gallery_id)) from gallery_interactions gi group by image_id;
insert into temp_images.image_search_json (image_id, object) select gi.image_id, jsonb_build_object('gallery_position', jsonb_object_agg(gi.gallery_id, gi.position)) from gallery_interactions gi group by image_id;
SQL

tags: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object('tag_ids', jsonb_agg(it.tag_id), 'tag_count', count(*)) from image_taggings it group by image_id;
insert into temp_images.image_search_json (image_id, object) select it.image_id, jsonb_build_object(
'tag_ids', jsonb_agg(it.tag_id),
'tag_count', count(*),
'error_tag_count', count(case when t.category = 'error' then t.category else null end),
'rating_tag_count', count(case when t.category = 'rating' then t.category else null end),
'origin_tag_count', count(case when t.category = 'origin' then t.category else null end),
'character_tag_count', count(case when t.category = 'character' then t.category else null end),
'oc_tag_count', count(case when t.category = 'oc' then t.category else null end),
'species_tag_count', count(case when t.category = 'species' then t.category else null end),
'body_type_tag_count', count(case when t.category = 'body-type' then t.category else null end),
'content_fanmade_tag_count', count(case when t.category = 'content-fanmade' then t.category else null end),
'content_official_tag_count', count(case when t.category = 'content-official' then t.category else null end),
'spoiler_tag_count', count(case when t.category = 'spoiler' then t.category else null end),
) from image_taggings it inner join tags t on t.id = it.tag_id group by image_id;
SQL

sources: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select s.image_id, jsonb_build_object('source_url', jsonb_agg(lower(s.source)), 'source_count', count(*)) from image_sources s group by image_id;
SQL

hides: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_ids', jsonb_agg(ih.user_id), 'hidden_by', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id;
insert into temp_images.image_search_json (image_id, object) select ih.image_id, jsonb_build_object('hidden_by_user_ids', jsonb_agg(ih.user_id), 'hidden_by_users', jsonb_agg(lower(u.name))) from image_hides ih inner join users u on u.id = ih.user_id group by image_id;
SQL

downvotes: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoted_by_ids', jsonb_agg(iv.user_id), 'downvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id;
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('downvoter_ids', jsonb_agg(iv.user_id), 'downvoters', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = false group by image_id;
SQL

upvotes: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoted_by_ids', jsonb_agg(iv.user_id), 'upvoted_by', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id;
insert into temp_images.image_search_json (image_id, object) select iv.image_id, jsonb_build_object('upvoter_ids', jsonb_agg(iv.user_id), 'upvoters', jsonb_agg(lower(u.name))) from image_votes iv inner join users u on u.id = iv.user_id where iv.up = true group by image_id;
SQL

faves: image_search_json
psql $(DATABASE) -v ON_ERROR_STOP=1 <<-SQL
insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('faved_by_ids', jsonb_agg(if.user_id), 'faved_by', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id;
insert into temp_images.image_search_json (image_id, object) select if.image_id, jsonb_build_object('favourited_by_user_ids', jsonb_agg(if.user_id), 'favourited_by_users', jsonb_agg(lower(u.name))) from image_faves if inner join users u on u.id = if.user_id group by image_id;
SQL

tag_names: tags_with_aliases
Expand Down
26 changes: 24 additions & 2 deletions lib/philomena/images/elasticsearch_index.ex
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ defmodule Philomena.Images.ElasticsearchIndex do
size: %{type: "integer"},
sha512_hash: %{type: "keyword"},
source_url: %{type: "keyword"},
source_count: %{type: "integer"},
tag_count: %{type: "integer"},
tag_ids: %{type: "keyword"},
tags: %{type: "text", analyzer: "keyword"},
Expand Down Expand Up @@ -87,7 +88,17 @@ defmodule Philomena.Images.ElasticsearchIndex do
namespace: %{type: "keyword"}
}
},
approved: %{type: "boolean"}
approved: %{type: "boolean"},
error_tag_count: %{type: "integer"},
rating_tag_count: %{type: "integer"},
origin_tag_count: %{type: "integer"},
character_tag_count: %{type: "integer"},
oc_tag_count: %{type: "integer"},
species_tag_count: %{type: "integer"},
body_type_tag_count: %{type: "integer"},
content_fanmade_tag_count: %{type: "integer"},
content_official_tag_count: %{type: "integer"},
spoiler_tag_count: %{type: "integer"}
}
}
}
Expand Down Expand Up @@ -120,6 +131,7 @@ defmodule Philomena.Images.ElasticsearchIndex do
uploader: if(!!image.user and !image.anonymous, do: String.downcase(image.user.name)),
true_uploader: if(!!image.user, do: String.downcase(image.user.name)),
source_url: image.sources |> Enum.map(&String.downcase(&1.source)),
source_count: length(image.sources),
file_name: image.image_name,
original_format: image.image_format,
processed: image.processed,
Expand Down Expand Up @@ -151,7 +163,17 @@ defmodule Philomena.Images.ElasticsearchIndex do
upvoters: image.upvoters |> Enum.map(&String.downcase(&1.name)),
downvoters: image.downvoters |> Enum.map(&String.downcase(&1.name)),
deleted_by_user: if(!!image.deleter, do: image.deleter.name),
approved: image.approved
approved: image.approved,
error_tag_count: Enum.count(image.tags, &(&1.category == "error")),
rating_tag_count: Enum.count(image.tags, &(&1.category == "rating")),
origin_tag_count: Enum.count(image.tags, &(&1.category == "origin")),
character_tag_count: Enum.count(image.tags, &(&1.category == "character")),
oc_tag_count: Enum.count(image.tags, &(&1.category == "oc")),
species_tag_count: Enum.count(image.tags, &(&1.category == "species")),
body_type_tag_count: Enum.count(image.tags, &(&1.category == "body-type")),
content_fanmade_tag_count: Enum.count(image.tags, &(&1.category == "content-fanmade")),
content_official_tag_count: Enum.count(image.tags, &(&1.category == "content-official")),
spoiler_tag_count: Enum.count(image.tags, &(&1.category == "spoiler"))
}
end

Expand Down

0 comments on commit ec04d55

Please sign in to comment.