Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
liamwhite committed Nov 9, 2024
2 parents ece6f09 + 60f51c6 commit 54c8291
Show file tree
Hide file tree
Showing 23 changed files with 1,123 additions and 225 deletions.
23 changes: 22 additions & 1 deletion .github/workflows/elixir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ jobs:
- run: docker compose pull
- run: docker compose build

- name: mix format
run: docker compose run app mix format --check-formatted

- name: Build and test
run: docker compose run app run-test

Expand All @@ -50,6 +53,24 @@ jobs:
- uses: actions/checkout@v4
- uses: crate-ci/typos@master

cargo:
name: Rust Linting and Unit Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Setup Rust
uses: dtolnay/rust-toolchain@stable

- name: cargo fmt
run: (cd native/philomena && cargo fmt --check)

- name: cargo clippy
run: (cd native/philomena && cargo clippy -- -D warnings)

- name: cargo test
run: (cd native/philomena && cargo test)

lint-and-test:
name: 'JavaScript Linting and Unit Tests'
runs-on: ubuntu-latest
Expand Down Expand Up @@ -80,4 +101,4 @@ jobs:
working-directory: ./assets

- run: npm run build
working-directory: ./assets
working-directory: ./assets
168 changes: 84 additions & 84 deletions assets/package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion assets/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"sass": "^1.75.0",
"typescript": "^5.4",
"typescript-eslint": "8.0.0-alpha.39",
"vite": "^5.2"
"vite": "^5.4"
},
"devDependencies": {
"@testing-library/dom": "^10.1.0",
Expand Down
8 changes: 6 additions & 2 deletions lib/mix/tasks/upload_to_s3.ex
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ defmodule Mix.Tasks.UploadToS3 do
end

defp upload_typical(queryable, batch_size, file_root, new_file_root, field_name) do
Batch.record_batches(queryable, [batch_size: batch_size], fn models ->
queryable
|> Batch.record_batches(batch_size: batch_size)
|> Enum.each(fn models ->
models
|> Task.async_stream(&upload_typical_model(&1, file_root, new_file_root, field_name),
timeout: :infinity
Expand All @@ -142,7 +144,9 @@ defmodule Mix.Tasks.UploadToS3 do
end

defp upload_images(queryable, batch_size, file_root, new_file_root) do
Batch.record_batches(queryable, [batch_size: batch_size], fn models ->
queryable
|> Batch.record_batches(batch_size: batch_size)
|> Enum.each(fn models ->
models
|> Task.async_stream(&upload_image_model(&1, file_root, new_file_root), timeout: :infinity)
|> Stream.run()
Expand Down
171 changes: 171 additions & 0 deletions lib/philomena/data_exports/aggregator.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
defmodule Philomena.DataExports.Aggregator do
@moduledoc """
Data generation module for data export logic.
"""

import Ecto.Query
alias PhilomenaQuery.Batch

# Direct PII
alias Philomena.Donations.Donation
alias Philomena.UserFingerprints.UserFingerprint
alias Philomena.UserIps.UserIp
alias Philomena.UserNameChanges.UserNameChange
alias Philomena.Users.User

# UGC for export
alias Philomena.ArtistLinks.ArtistLink
alias Philomena.Badges.Award
alias Philomena.Comments.Comment
alias Philomena.Commissions.Commission
alias Philomena.DnpEntries.DnpEntry
alias Philomena.DuplicateReports.DuplicateReport
alias Philomena.Filters.Filter
alias Philomena.ImageFaves.ImageFave
alias Philomena.ImageHides.ImageHide
alias Philomena.ImageVotes.ImageVote
alias Philomena.Images.Image
alias Philomena.PollVotes.PollVote
alias Philomena.Posts.Post
alias Philomena.Reports.Report
alias Philomena.SourceChanges.SourceChange
alias Philomena.TagChanges.TagChange
alias Philomena.Topics.Topic
alias Philomena.Bans.User, as: UserBan

# Direct UGC from form submission
@user_columns [
:created_at,
:name,
:email,
:description,
:current_filter_id,
:spoiler_type,
:theme,
:images_per_page,
:show_large_thumbnails,
:show_sidebar_and_watched_images,
:fancy_tag_field_on_upload,
:fancy_tag_field_on_edit,
:fancy_tag_field_in_settings,
:autorefresh_by_default,
:anonymous_by_default,
:comments_newest_first,
:comments_always_jump_to_last,
:comments_per_page,
:watch_on_reply,
:watch_on_new_topic,
:watch_on_upload,
:messages_newest_first,
:serve_webm,
:no_spoilered_in_watched,
:watched_images_query_str,
:watched_images_exclude_str,
:use_centered_layout,
:personal_title,
:hide_vote_counts,
:scale_large_images
]

# All these also have created_at and are selected by user_id
@indirect_columns [
{Donation, [:email, :amount, :fee, :note]},
{UserFingerprint, [:fingerprint, :uses, :updated_at]},
{UserIp, [:ip, :uses, :updated_at]},
{UserNameChange, [:name]},
{ArtistLink, [:aasm_state, :uri, :public, :tag_id]},
{Award, [:label, :badge_name, :badge_id]},
{Comment,
[
:ip,
:fingerprint,
:user_agent,
:referrer,
:anonymous,
:image_id,
:edited_at,
:edit_reason,
:body
]},
{Commission,
[:open, :sheet_image_id, :categories, :information, :contact, :will_create, :will_not_create]},
{DnpEntry, [:tag_id, :aasm_state, :dnp_type, :hide_reason, :feedback, :reason, :instructions],
:requesting_user_id},
{DuplicateReport, [:reason, :image_id, :duplicate_of_image_id]},
{Filter,
[
:name,
:description,
:public,
:hidden_complex_str,
:spoilered_complex_str,
:hidden_tag_ids,
:spoilered_tag_ids
]},
{ImageFave, [:image_id], :user_id, :image_id},
{ImageHide, [:image_id], :user_id, :image_id},
{ImageVote, [:image_id, :up], :user_id, :image_id},
{Image, [:ip, :fingerprint, :user_agent, :referrer, :anonymous, :description]},
{PollVote, [:rank, :poll_option_id]},
{Post,
[:ip, :fingerprint, :user_agent, :referrer, :anonymous, :edited_at, :edit_reason, :body]},
{Report,
[:ip, :fingerprint, :user_agent, :referrer, :reason, :reportable_id, :reportable_type]},
{SourceChange, [:ip, :fingerprint, :user_agent, :referrer, :image_id, :added, :value]},
{TagChange,
[:ip, :fingerprint, :user_agent, :referrer, :image_id, :added, :tag_id, :tag_name_cache]},
{Topic, [:title, :anonymous, :forum_id]},
{UserBan, [:reason, :generated_ban_id]}
]

@doc """
Get all of the export data for the given user.
"""
def get_for_user(user_id) do
[select_user(user_id)] ++ select_indirect(user_id)
end

defp select_user(user_id) do
select_schema_by_key(user_id, User, @user_columns, :id)
end

defp select_indirect(user_id) do
Enum.map(@indirect_columns, fn
{schema_name, columns} ->
select_schema_by_key(user_id, schema_name, columns)

{schema_name, columns, key_column} ->
select_schema_by_key(user_id, schema_name, columns, key_column)

{schema_name, columns, key_column, id_field} ->
select_schema_by_key(user_id, schema_name, columns, key_column, id_field)
end)
end

defp select_schema_by_key(
user_id,
schema_name,
columns,
key_column \\ :user_id,
id_field \\ :id
) do
table_name = schema_name.__schema__(:source)
columns = [:created_at] ++ columns

{"#{table_name}.jsonl",
schema_name
|> where([s], field(s, ^key_column) == ^user_id)
|> select([s], map(s, ^columns))
|> Batch.records(id_field: id_field)
|> results_as_json_lines()}
end

defp results_as_json_lines(list_of_maps) do
Stream.map(list_of_maps, fn map ->
map
|> Map.new(fn {k, v} -> {k, to_string(v)} end)
|> Jason.encode!()
|> Kernel.<>("\n")
end)
end
end
56 changes: 56 additions & 0 deletions lib/philomena/data_exports/zip_generator.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
defmodule Philomena.DataExports.ZipGenerator do
@moduledoc """
ZIP file generator for an export.
"""

alias Philomena.Native

@doc """
Write the ZIP file for the given aggregate data.
Expects a list of 2-tuples, with the first element being the name of the
file to generate, and the second element being a stream which generates the
binary contents of the file.
"""
@spec generate(Path.t(), Enumerable.t()) :: :ok | atom()
def generate(filename, aggregate) do
case Native.zip_open_writer(filename) do
{:ok, zip} ->
stream_aggregate(zip, aggregate)

error ->
error
end
end

@spec stream_aggregate(reference(), Enumerable.t()) :: {:ok, reference()} | :error
defp stream_aggregate(zip, aggregate) do
aggregate
|> Enum.reduce_while(:ok, fn {name, content_stream}, _ ->
with :ok <- Native.zip_start_file(zip, name),
:ok <- stream_file_data(zip, content_stream) do
{:cont, :ok}
else
error ->
{:halt, error}
end
end)
|> case do
:ok ->
Native.zip_finish(zip)

error ->
error
end
end

@spec stream_file_data(reference(), Enumerable.t(iodata())) :: :ok | :error
defp stream_file_data(zip, content_stream) do
Enum.reduce_while(content_stream, :ok, fn iodata, _ ->
case Native.zip_write(zip, IO.iodata_to_binary(iodata)) do
:ok -> {:cont, :ok}
error -> {:halt, error}
end
end)
end
end
12 changes: 12 additions & 0 deletions lib/philomena/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,16 @@ defmodule Philomena.Native do

@spec camo_image_url(String.t()) :: String.t()
def camo_image_url(_uri), do: :erlang.nif_error(:nif_not_loaded)

@spec zip_open_writer(Path.t()) :: {:ok, reference()} | {:error, atom()}
def zip_open_writer(_path), do: :erlang.nif_error(:nif_not_loaded)

@spec zip_start_file(reference(), String.t()) :: :ok | :error
def zip_start_file(_zip, _name), do: :erlang.nif_error(:nif_not_loaded)

@spec zip_write(reference(), binary()) :: :ok | :error
def zip_write(_zip, _data), do: :erlang.nif_error(:nif_not_loaded)

@spec zip_finish(reference()) :: :ok | :error
def zip_finish(_zip), do: :erlang.nif_error(:nif_not_loaded)
end
2 changes: 1 addition & 1 deletion lib/philomena/tags/search_index.ex
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ defmodule Philomena.Tags.SearchIndex do
category: tag.category,
aliased: !!tag.aliased_tag,
description: tag.description,
short_description: tag.description
short_description: tag.short_description
}
end
end
9 changes: 6 additions & 3 deletions lib/philomena/user_downvote_wipe.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ defmodule Philomena.UserDownvoteWipe do

ImageVote
|> where(user_id: ^user.id, up: false)
|> Batch.query_batches([id_field: :image_id], fn queryable ->
|> Batch.query_batches(id_field: :image_id)
|> Enum.each(fn queryable ->
{_, image_ids} = Repo.delete_all(select(queryable, [i_v], i_v.image_id), timeout: 120_000)

{count, nil} =
Expand All @@ -35,7 +36,8 @@ defmodule Philomena.UserDownvoteWipe do
if upvotes_and_faves_too do
ImageVote
|> where(user_id: ^user.id, up: true)
|> Batch.query_batches([id_field: :image_id], fn queryable ->
|> Batch.query_batches(id_field: :image_id)
|> Enum.each(fn queryable ->
{_, image_ids} = Repo.delete_all(select(queryable, [i_v], i_v.image_id), timeout: 120_000)

{count, nil} =
Expand All @@ -54,7 +56,8 @@ defmodule Philomena.UserDownvoteWipe do

ImageFave
|> where(user_id: ^user.id)
|> Batch.query_batches([id_field: :image_id], fn queryable ->
|> Batch.query_batches(id_field: :image_id)
|> Enum.each(fn queryable ->
{_, image_ids} = Repo.delete_all(select(queryable, [i_f], i_f.image_id), timeout: 120_000)

{count, nil} =
Expand Down
4 changes: 3 additions & 1 deletion lib/philomena/workers/tag_change_revert_worker.ex
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ defmodule Philomena.TagChangeRevertWorker do
batch_size = attributes["batch_size"] || 100
attributes = Map.delete(attributes, "batch_size")

Batch.query_batches(queryable, [batch_size: batch_size], fn queryable ->
queryable
|> Batch.query_batches(batch_size: batch_size)
|> Enum.each(fn queryable ->
ids = Repo.all(select(queryable, [tc], tc.id))
TagChanges.mass_revert(ids, cast_ip(atomify_keys(attributes)))
end)
Expand Down
1 change: 1 addition & 0 deletions lib/philomena_proxy/scrapers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ defmodule PhilomenaProxy.Scrapers do
}

@scrapers [
PhilomenaProxy.Scrapers.Bluesky,
PhilomenaProxy.Scrapers.Deviantart,
PhilomenaProxy.Scrapers.Pillowfort,
PhilomenaProxy.Scrapers.Twitter,
Expand Down
Loading

0 comments on commit 54c8291

Please sign in to comment.