diff --git a/lib/plugins/link/data/quirks_rewrite_host.txt b/lib/plugins/link/data/quirks_rewrite_host.txt new file mode 100644 index 0000000..0b80b0d --- /dev/null +++ b/lib/plugins/link/data/quirks_rewrite_host.txt @@ -0,0 +1,25 @@ +# list of host rewrites +# fixups: https://gist.github.com/Lexedia/bbbde4dbbf628b0bfe8476a96a977a8f + +x.com:fxtwitter.com +twitter.com:fxtwitter.com +vxtwitter.com:fxtwitter.com + +bsky.app:xsky.app + +instagram.com:ddinstagram.com + +tumblr.com:tpmblr.com + +# reddit: new reddit don't have titles for pages we don't handle in the reddit module +# fallback to old. which has nice titles +reddit.com:old.reddit.com + +pixiv.net:phixiv.net + +threads.net:fixthreads.net + +tiktok.com:tnktok.com +vm.tiktok.com:vm.tnktok.com + +twitch.tv:fxtwitch.tv diff --git a/lib/plugins/link/data/quirks_telegram_bot_user_agent.txt b/lib/plugins/link/data/quirks_telegram_bot_user_agent.txt new file mode 100644 index 0000000..3008272 --- /dev/null +++ b/lib/plugins/link/data/quirks_telegram_bot_user_agent.txt @@ -0,0 +1,30 @@ +# list of domains which needs a telegram bot user agent +# to return proper og:* meta tags +# www. subdomain is added automatically + +x.com +vxtwitter.com +fxtwitter.com +fixupx.com + +facebook.com + +instagram.com +xnstagram.com +ddinstagram.com + +tpmblr.com + +reddit.com +old.reddit.com + +xsky.app + +phixiv.net + +fixthreads.net + +tnktok.com +vm.tnktok.com + +fxtwitch.tv diff --git a/lib/plugins/link/quirks.ex b/lib/plugins/link/quirks.ex index 5acfdac..6f46f6b 100644 --- a/lib/plugins/link/quirks.ex +++ b/lib/plugins/link/quirks.ex @@ -1,32 +1,45 @@ defmodule Nola.Plugins.Link.Quirks do - # def uri(%URI{host: "x.com"} = uri) do - # %URI{uri | host: "vxtwitter.com"} - # end + @rewrite_hosts "./lib/plugins/link/data/quirks_rewrite_host.txt" + |> Util.read_file_list!() + |> Enum.map(fn line -> + [old, new] = String.split(line, ":") + {old, new} + end) + |> Enum.map(fn {old, new} -> [{old, new}, {"www.#{old}", new}] end) + |> List.flatten() + |> then(fn list -> + IO.puts("Link Quirks: rewrite_hosts: #{inspect(list)}") + list + end) - # reddit: new reddit don't have titles for pages we don't handle in the reddit module - # fallback to old. which has nice titles - def uri(%URI{host: reddit} = uri) when reddit in ["www.reddit.com", "reddit.com"] do - %URI{uri | host: "old.reddit.com"} + for {old, new} <- @rewrite_hosts do + def uri(%URI{host: unquote(old)} = uri) do + %URI{uri | host: unquote(new)} + end end def uri(url) do url end - def user_agent(host) - when host in [ - "x.com", - "vxtwitter.com", - "fxtwitter.com", - "instagram.com", - "facebook.com", - "xnstagram.com", - "ddinstagram.com" - ] do + @telegram_bot_hosts "./lib/plugins/link/data/quirks_telegram_bot_user_agent.txt" + |> Util.read_file_list!() + |> Enum.map(fn h -> [h, "www.#{h}"] end) + |> List.flatten() + |> then(fn list -> + IO.puts("Link Quirks: telegram_bot_hosts: #{inspect(list)}") + list + end) + + def user_agent(host) when host in @telegram_bot_hosts do "TelegramBot (like TwitterBot)" end def user_agent(_host) do "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36" end + + def list() do + [rewrite_hosts: @rewrite_hosts, telegram_bot_hosts: @telegram_bot_hosts] + end end diff --git a/lib/util.ex b/lib/util.ex index 71fddab..dea7834 100644 --- a/lib/util.ex +++ b/lib/util.ex @@ -1,84 +1,97 @@ defmodule Util do defmodule Map do def put_if_not_null(map, _key, nil) do map end def put_if_not_null(map, key, value) do Elixir.Map.put(map, key, value) end end def to_naive_date_time(naive = %NaiveDateTime{}), do: naive def to_naive_date_time(datetime = %DateTime{}), do: DateTime.to_naive(datetime) def to_naive_date_time(timestamp) when is_integer(timestamp) do timestamp |> to_date_time() |> to_naive_date_time() end def to_date_time(naive_or_timestamp, timezone \\ "Europe/Paris") def to_date_time(date = %DateTime{}, timezone) do DateTime.shift_zone!(date, timezone, Tzdata.TimeZoneDatabase) end def to_date_time(naive = %NaiveDateTime{}, timezone) do DateTime.from_naive!(naive, timezone, Tzdata.TimeZoneDatabase) end # todo: this is wrong. def to_date_time(timestamp, timezone) when is_integer(timestamp) do timestamp |> DateTime.from_unix!(:millisecond) |> DateTime.shift_zone!(timezone, Tzdata.TimeZoneDatabase) end def plusminus(number) when number > 0, do: "+#{number}" def plusminus(0), do: "0" def plusminus(number) when number < 0, do: "#{number}" def float_paparse(float) when is_float(float), do: {float, ""} def float_paparse(int) when is_integer(int), do: {int + 0.0, ""} def float_paparse(string) when is_binary(string) do string |> String.replace(",", ".") |> Float.parse() end def ets_mutate_select_each(ets, table, spec \\ [{:"$1", [], [:"$1"]}], fun) do ets.safe_fixtable(table, true) first = ets.select(table, spec, 1) do_ets_mutate_select_each(ets, table, fun, first) after ets.safe_fixtable(table, false) end defp do_ets_mutate_select_each(_, _, _, :"$end_of_table") do :ok end defp do_ets_mutate_select_each(ets, table, fun, {objs, continuation}) do for obj <- objs, do: fun.(table, obj) do_ets_mutate_select_each(ets, table, fun, ets.select(continuation)) end def ets_mutate_each(ets, table, fun) do ets.safe_fixtable(table, true) first = ets.first(table) do_ets_mutate_each(ets, table, fun, first) after ets.safe_fixtable(table, false) end defp do_ets_mutate_each(ets, table, fun, key) do case ets.lookup(table, key) do [elem] -> fun.(table, elem) _ -> nil end do_ets_mutate_each(ets, table, fun, ets.next(table, key)) end + + def read_file_list!(path) do + path + |> File.read!() + |> String.split("\n") + |> Enum.map(fn line -> + if !String.starts_with?(line, "#") do + String.trim(line) + end + end) + |> Enum.filter(& &1) + |> Enum.filter(&(&1 != "")) + end end