Skip to content

Commit

Permalink
Add mix task to check website availability and cleanup list of compan…
Browse files Browse the repository at this point in the history
…ies (#692)

* introduce script to automatically check website availability

* Fix/remove unavailable companies

approva: inactive on github since 2019
canvas: inactive on github since 2019
askcharlie: website down, no github
aviacommerce: inactive on github
badger: website down, no github
chinch_financial: inactive since 2020
codecorps: inactive since 2019
slickspaces: rebranded to operto
daybit: inactive since 2021
diatom: inactive since 2019
exeq: deleted github
fred: no github
gitmonitor: no github
grok-interactive: inactive since 2018
hiringsolved: no github
icicletech: inactive since 2019
ispirata: inactive since 2021
kmonline: inactive since 2021
neon_tree_solutions: no github
palar_support: no github
patternmatch: inactive since 2021
qixxit: no github (possibly bought by lastminute.de)
rexpad: no github
salam: no github
the_club: no github
voicelayer: no github
wunder: inactive since 2021
xerpa: inactive since 2021
youprop: no github

* fix tests

---------

Co-authored-by: Sean Callan <[email protected]>
  • Loading branch information
tonnenpinguin and doomspork authored Dec 22, 2023
1 parent 93e0379 commit e327054
Show file tree
Hide file tree
Showing 34 changed files with 121 additions and 426 deletions.
113 changes: 113 additions & 0 deletions lib/mix/tasks/test.websites.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
defmodule Mix.Tasks.Test.Websites do
use Mix.Task

require Logger

@shortdoc "Checks company site reachability"

@moduledoc """
Checks if company websites are reachable.
$ mix test.websites
$ mix test.websites --num-workers 10 --timeout 5000 --github-token ghp_... priv/companies/abc.exs
The number of workers defines how many companies will be checked in parallel.
The timeout defines the timeout in ms passed on to the HTTP client.
An optional github token can be passed to get the updated_at field of the github org
Optionally a list of files can be passed - in that case only those files will be checked
"""

@impl true
@doc false
def run(args) do
{opts, args} = OptionParser.parse!(args, strict: [num_workers: :integer, timeout: :integer, github_token: :string])
opts = Keyword.merge([num_workers: 30, timeout: 1000], opts, &override_default/3)

Application.ensure_all_started(:companies)
companies_directory = Application.get_env(:companies, :companies_directory)

files =
case args do
[] ->
Path.wildcard(companies_directory)

input_files ->
Logger.info("Checking #{length(input_files)} files")
input_files
end

files
|> Enum.map(&eval_company_file/1)
|> ParallelStream.map(&check_company_website(&1, opts), num_workers: Keyword.fetch!(opts, :num_workers))
|> summarize_results(opts)
end

defp override_default(_key, _val1, val2), do: val2

defp eval_company_file(filename) do
filename
|> Code.eval_file()
|> elem(0)
end

defp check_company_website(%{website: website} = company, opts) do
if is_nil(website) or "" == website do
Logger.info("#{company.name} has no website")
{:error, company, :no_website}
else
check_website(company, opts)
end
end

defp check_website(%{website: website} = company, opts) do
case HTTPoison.get(website, [], timeout: Keyword.fetch!(opts, :timeout)) do
{:ok, _} ->
{:ok, company}

{:error, reason} ->
{:error, company, reason}
end
end

defp summarize_results(results, opts) do
grouped_by_status = Enum.group_by(results, &elem(&1, 0), &Tuple.delete_at(&1, 0))

case grouped_by_status[:error] do
nil ->
Logger.info("No issues detected")

errors ->
Logger.warn("There where #{length(errors)} unreachable websites:")

errors
|> Enum.each(fn {company, reason} ->
last_activity = maybe_get_last_activity(company, opts)

Logger.warn(
"#{company.name} (#{company.website}, #{company.github}#{last_activity}) is unreachable because #{inspect(reason)}"
)
end)
end
end

defp maybe_get_last_activity(company, opts) do
if company.github != "" && Keyword.has_key?(opts, :github_token) do
github_org = company.github |> String.trim_trailing("/") |> String.split("/") |> List.last()

Logger.debug("Getting activity for #{github_org}")

%{body: body} =
HTTPoison.get!("https://api.github.com/orgs/#{github_org}", [
{"Authorization", "bearer #{Keyword.fetch!(opts, :github_token)}"}
])

case Jason.decode!(body) do
%{"updated_at" => updated_at} -> " - #{updated_at}"
_ -> " - unknown"
end
else
""
end
end
end
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ defmodule Companies.MixProject do
{:excoveralls, "~> 0.12", only: :test},
{:floki, ">= 0.0.0", only: :test},
{:phoenix_live_reload, "~> 1.2", only: :dev},
{:dart_sass, "~> 0.5", runtime: Mix.env() == :dev}
{:dart_sass, "~> 0.5", runtime: Mix.env() == :dev},
{:parallel_stream, "~> 1.1", only: [:dev, :test]}
]
end

Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"mochiweb": {:hex, :mochiweb, "2.22.0", "f104d6747c01a330c38613561977e565b788b9170055c5241ac9dd6e4617cba5", [:rebar3], [], "hexpm", "cbbd1fd315d283c576d1c8a13e0738f6dafb63dc840611249608697502a07655"},
"nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"},
"nimble_publisher": {:hex, :nimble_publisher, "0.1.3", "ee856171ce221662009aecac709f96ed6339d7a6c1da43db7bb75a5016e4848f", [:mix], [{:earmark, "~> 1.4", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "dbcf800c4567d7bb71bb985781d70a55af134f1353c3a49b609d7ca679280e1e"},
"parallel_stream": {:hex, :parallel_stream, "1.1.0", "f52f73eb344bc22de335992377413138405796e0d0ad99d995d9977ac29f1ca9", [:mix], [], "hexpm", "684fd19191aedfaf387bbabbeb8ff3c752f0220c8112eb907d797f4592d6e871"},
"parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"},
"phoenix": {:hex, :phoenix, "1.6.15", "0a1d96bbc10747fd83525370d691953cdb6f3ccbac61aa01b4acb012474b047d", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.0", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 1.0 or ~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: false]}, {:plug, "~> 1.10", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.2", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "d70ab9fbf6b394755ea88b644d34d79d8b146e490973151f248cacd122d20672"},
"phoenix_ecto": {:hex, :phoenix_ecto, "4.4.0", "0672ed4e4808b3fbed494dded89958e22fb882de47a97634c0b13e7b0b5f7720", [:mix], [{:ecto, "~> 3.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "09864e558ed31ee00bd48fcc1d4fc58ae9678c9e81649075431e69dbabb43cc1"},
Expand Down
2 changes: 1 addition & 1 deletion priv/companies/anarock.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%{
industry: "Real Estate",
website: "https://www.anarock.com",
website: "https://anarock.com/",
github: "https://github.com/anarock",
location: %{
city: "Mumbai",
Expand Down
15 changes: 0 additions & 15 deletions priv/companies/appprova.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/askcharlie.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/aviacommerce.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/badger.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/canvas.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/cinch_financial.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/code_corps.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/daybit.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/diatom_enterprise_softwares.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/exeq.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/fred.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/gitmonitor.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/grok_interactive.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/hiringsolved.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/icicle_technologies.exs

This file was deleted.

15 changes: 0 additions & 15 deletions priv/companies/ispirata.exs

This file was deleted.

Loading

0 comments on commit e327054

Please sign in to comment.