diff --git a/lib/open_ai.ex b/lib/open_ai.ex index cc0de27..da54e3a 100644 --- a/lib/open_ai.ex +++ b/lib/open_ai.ex @@ -1,20 +1,33 @@ defmodule OpenAi do + require Logger + def post(path, data, options \\ []) do config = Application.get_env(:nola, :openai, []) - url = "https://api.openai.com#{path}" + base_url = Keyword.get(config, :base_url, "https://api.openai.com") + url = "#{base_url}#{path}" headers = [{"user-agent", "internal private experiment bot, href@random.sh"}, {"content-type", "application/json"}, {"authorization", "Bearer " <> Keyword.get(config, :key, "unset-api-key")}] - options = options ++ [timeout: :timer.seconds(180), recv_timeout: :timer.seconds(180)] + options = options ++ [timeout: :timer.seconds(30), recv_timeout: :timer.seconds(30)] + Logger.debug("openai: post: #{url} #{inspect data}") with {:ok, json} <- Poison.encode(data), {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post(url, json, headers, options), {:ok, data} <- Poison.decode(body) do {:ok, data} else - {:ok, %HTTPoison.Response{status_code: code}} -> {:error, Plug.Conn.Status.reason_atom(code)} - {:error, %HTTPoison.Error{reason: reason}} -> {:error, reason} + {:ok, %HTTPoison.Response{status_code: code, body: body}} -> + Logger.error("OpenAI: HTTP #{code} #{inspect body}") + status = Plug.Conn.Status.reason_atom(code) + case Poison.decode(body) do + {:ok, %{"error" => %{"message" => message, "code" => code}}} -> + {:error, {status, message}} + kek -> + {:error, status} + end + {:error, %HTTPoison.Error{reason: reason}} -> + {:error, reason} end end end diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex index 4c4261f..84eb976 100644 --- a/lib/plugins/link.ex +++ b/lib/plugins/link.ex @@ -1,271 +1,303 @@ defmodule Nola.Plugins.Link do @moduledoc """ # Link Previewer An extensible link previewer for IRC. To extend the supported sites, create a new handler implementing the callbacks. See `link/` directory. The first in list handler that returns true to the `match/2` callback will be used, and if the handler returns `:error` or crashes, will fallback to the default preview. Unsupported websites will use the default link preview method, which is for html document the title, otherwise it'll use the mimetype and size. ## Configuration: ``` config :nola, Nola.Plugins.Link, handlers: [ Nola.Plugins.Link.Youtube: [ invidious: true ], Nola.Plugins.Link.Twitter: [], Nola.Plugins.Link.Imgur: [], ] ``` """ @ircdoc """ # Link preview Previews links (just post a link!). Announces real URL after redirections and provides extended support for YouTube, Twitter and Imgur. """ def short_irc_doc, do: false def irc_doc, do: @ircdoc require Logger + alias __MODULE__.Store + alias __MODULE__.Scraper def start_link() do GenServer.start_link(__MODULE__, [], name: __MODULE__) end @callback match(uri :: URI.t, options :: Keyword.t) :: {true, params :: Map.t} | false @callback expand(uri :: URI.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error @callback post_match(uri :: URI.t, content_type :: binary, headers :: [], opts :: Keyword.t) :: {:body | :file, params :: Map.t} | false @callback post_expand(uri :: URI.t, body :: binary() | Path.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error @optional_callbacks [expand: 3, post_expand: 4] defstruct [:client] def init([]) do + Store.setup() {:ok, _} = Registry.register(Nola.PubSub, "messages", [plugin: __MODULE__]) #{:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__]) Logger.info("Link handler started") {:ok, %__MODULE__{}} end def handle_info({:irc, :text, message = %{text: text}}, state) do String.split(text) |> Enum.map(fn(word) -> if String.starts_with?(word, "http://") || String.starts_with?(word, "https://") do uri = URI.parse(word) if uri.scheme && uri.host do spawn(fn() -> :timer.kill_after(:timer.seconds(30)) case expand_link([uri]) do {:ok, uris, text} -> text = case uris do [uri] -> text [luri | _] -> - if luri.host == uri.host && luri.path == luri.path do + if luri.host == uri.host && luri.path == uri.path do text else ["-> #{URI.to_string(luri)}", text] end end - if is_list(text) do - for line <- text, do: message.replyfun.(line) - else - message.replyfun.(text) + case text do + lines when is_list(lines) -> + for text <- lines, do: message.replyfun.(text) + text when is_binary(text) -> + message.replyfun.(text) + nil -> + nil end _ -> nil end end) end end end) {:noreply, state} end def handle_info(msg, state) do {:noreply, state} end def terminate(_reason, state) do :ok end # 1. Match the first valid handler # 2. Try to run the handler # 3. If :error or crash, default link. # If :skip, nothing # 4. ? # Over five redirections: cancel. def expand_link(acc = [_, _, _, _, _ | _]) do {:ok, acc, "link redirects more than five times"} end def expand_link(acc=[uri | _]) do Logger.debug("link: expanding: #{inspect uri}") handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers) handler = Enum.reduce_while(handlers, nil, fn({module, opts}, acc) -> Logger.debug("link: attempt expanding: #{inspect module} for #{inspect uri}") module = Module.concat([module]) case module.match(uri, opts) do {true, params} -> {:halt, {module, params, opts}} false -> {:cont, acc} end end) run_expand(acc, handler) end def run_expand(acc, nil) do expand_default(acc) end def run_expand(acc=[uri|_], {module, params, opts}) do Logger.debug("link: expanding #{inspect uri} with #{inspect module}") case module.expand(uri, params, opts) do {:ok, data} -> {:ok, acc, data} :error -> expand_default(acc) :skip -> nil end rescue e -> Logger.error("link: rescued #{inspect uri} with #{inspect module}: #{inspect e}") Logger.error(Exception.format(:error, e, __STACKTRACE__)) expand_default(acc) catch e, b -> Logger.error("link: catched #{inspect uri} with #{inspect module}: #{inspect {e, b}}") expand_default(acc) end defp get(url, headers \\ [], options \\ []) do get_req(url, :hackney.get(url, headers, <<>>, options)) end defp get_req(_, {:error, reason}) do {:error, reason} end defp get_req(url, {:ok, 200, headers, client}) do headers = Enum.reduce(headers, %{}, fn({key, value}, acc) -> Map.put(acc, String.downcase(key), value) end) content_type = Map.get(headers, "content-type", "application/octect-stream") length = Map.get(headers, "content-length", "0") {length, _} = Integer.parse(length) handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers) handler = Enum.reduce_while(handlers, false, fn({module, opts}, acc) -> module = Module.concat([module]) try do case module.post_match(url, content_type, headers, opts) do {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}} false -> {:cont, acc} end rescue e -> Logger.error(inspect(e)) {:cont, false} catch e, b -> Logger.error(inspect({b})) {:cont, false} end end) cond do handler != false and length <= 30_000_000 -> case get_body(url, 30_000_000, client, handler, <<>>) do {:ok, _} = ok -> ok :error -> {:ok, "file: #{content_type}, size: #{human_size(length)}"} end #String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> # get_body(url, 30_000_000, client, <<>>) true -> :hackney.close(client) {:ok, "file: #{content_type}, size: #{human_size(length)}"} end end defp get_req(_, {:ok, redirect, headers, client}) when redirect in 300..399 do headers = Enum.reduce(headers, %{}, fn({key, value}, acc) -> Map.put(acc, String.downcase(key), value) end) location = Map.get(headers, "location") :hackney.close(client) {:redirect, location} end defp get_req(_, {:ok, status, headers, client}) do :hackney.close(client) {:error, status, headers} end defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do case :hackney.stream_body(client) do {:ok, data} -> get_body(url, len, client, h, << acc::binary, data::binary >>) :done -> body = case mode do :body -> acc :file -> {:ok, tmpfile} = Plug.Upload.random_file("linkplugin") File.write!(tmpfile, acc) tmpfile end handler.post_expand(url, body, params, opts) {:error, reason} -> {:ok, "failed to fetch body: #{inspect reason}"} end end defp get_body(_, len, client, h, _acc) do :hackney.close(client) IO.inspect(h) {:ok, "Error: file over 30"} end def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do Logger.debug("link: expanding #{uri} with default") headers = [{"user-agent", "DmzBot (like TwitterBot)"}] options = [follow_redirect: false, max_body_length: 30_000_000] + url = URI.to_string(uri) case get(URI.to_string(uri), headers, options) do {:ok, text} -> {:ok, acc, text} {:redirect, link} -> new_uri = URI.parse(link) #new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} expand_link([new_uri | acc]) {:error, status, _headers} -> - text = Plug.Conn.Status.reason_phrase(status) - {:ok, acc, "Error: HTTP #{text} (#{status})"} + #text = Plug.Conn.Status.reason_phrase(status) + #{:ok, acc, "Error: HTTP #{text} (#{status})"} + retry_expand_with_scraper(acc, url) {:error, {:tls_alert, {:handshake_failure, err}}} -> - {:ok, acc, "TLS Error: #{to_string(err)}"} + {:ok, acc, nil} # "TLS Error: #{to_string(err)}"} + {:error, :timeout} -> + retry_expand_with_scraper(acc, url) {:error, reason} -> - {:ok, acc, "Error: #{to_string(reason)}"} + {:ok, acc, nil} #"Error: #{to_string(reason)}"} end end # Unsupported scheme, came from a redirect. def expand_default(acc = [uri | _]) do {:ok, [uri], "-> #{URI.to_string(uri)}"} end + # Last resort: scrape the page + # We'll be mostly calling this when 403 or 500 or timeout because site blocks us. + # An external service will scrape the page for us and return the body. + # We'll call directly the HTML handler on the result. + defp retry_expand_with_scraper(acc, url) do + Logger.info("Attempting scraper") + handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers) + Logger.info("Attempting scraper #{inspect handlers}") + with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"), + {:ok, body, _meta} <- Scraper.get(url), + {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil) + do + {:ok, acc, text} + else + error -> + Logger.debug("Attempt with scraper failed: #{inspect error}") + # We give up here. We don't return anything (the acc from caller `expand default` + # does not matter anymore) and I see returning error messages as useless. + {:ok, acc, nil} + end + end defp human_size(bytes) do bytes |> FileSize.new(:b) |> FileSize.scale() |> FileSize.format() end + end diff --git a/lib/plugins/link/github.ex b/lib/plugins/link/github.ex index 0069a40..77fa81f 100644 --- a/lib/plugins/link/github.ex +++ b/lib/plugins/link/github.ex @@ -1,49 +1,76 @@ defmodule Nola.Plugins.Link.Github do @behaviour Nola.Plugins.Link @impl true def match(uri = %URI{host: "github.com", path: path}, _) do - case String.split(path, "/") do - ["", user, repo] -> - {true, %{user: user, repo: repo, path: "#{user}/#{repo}"}} - _ -> - false + with ["", user, repo] <- String.split(path, "/") do + {true, %{user: user, repo: repo, path: "#{user}/#{repo}"}} + else + _ -> false end end def match(_, _), do: false @impl true def post_match(_, _, _, _), do: false @impl true def expand(_uri, %{user: user, repo: repo}, _opts) do - case HTTPoison.get("https://api.github.com/repos/#{user}/#{repo}") do - {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> - {:ok, json} = Jason.decode(body) - src = json["source"]["full_name"] - disabled = if(json["disabled"], do: " (disabled)", else: "") - archived = if(json["archived"], do: " (archived)", else: "") - fork = if src && src != json["full_name"] do - " (⑂ #{json["source"]["full_name"]})" - else - "" - end - start = "#{json["full_name"]}#{disabled}#{archived}#{fork} - #{json["description"]}" - tags = for(t <- json["topics"]||[], do: "##{t}") |> Enum.intersperse(", ") |> Enum.join("") - lang = if(json["language"], do: "#{json["language"]} - ", else: "") - issues = if(json["open_issues_count"], do: "#{json["open_issues_count"]} issues - ", else: "") - last_push = if at = json["pushed_at"] do - {:ok, date, _} = DateTime.from_iso8601(at) - " - last pushed #{DateTime.to_string(date)}" - else - "" - end - network = "#{lang}#{issues}#{json["stargazers_count"]} stars - #{json["subscribers_count"]} watchers - #{json["forks_count"]} forks#{last_push}" - {:ok, [start, tags, network]} - other -> - :error + with {:ok, response} <- HTTPoison.get("https://api.github.com/repos/#{user}/#{repo}"), + {:ok, json} <- Jason.decode(response.body) do + info = %{ + full_name: json["full_name"], + disabled: json["disabled"], + archived: json["archived"], + source: json["source"], + description: json["description"], + topics: json["topics"], + language: json["language"], + open_issues_count: json["open_issues_count"], + pushed_at: json["pushed_at"], + stargazers_count: json["stargazers_count"], + subscribers_count: json["subscribers_count"], + forks_count: json["forks_count"] + } + + start = build_start(info) + tags = build_tags(info) + network = build_network(info) + + {:ok, [start, tags, network]} + else + _ -> :error end end + defp build_start(info) do + parts = [] + |> maybe_add(info.disabled, " (disabled)") + |> maybe_add(info.archived, " (archived)") + |> maybe_add(info.source && info.source["full_name"] != info.full_name, " (⑂ #{info.source["full_name"]})") + + "#{info.full_name}#{parts} - #{info.description}" + end + + defp build_tags(info) do + for(t <- info.topics || [], do: "##{t}") |> Enum.intersperse(", ") |> Enum.join("") + end + + defp build_network(info) do + lang = info.language && "#{info.language} - " || "" + issues = info.open_issues_count && "#{info.open_issues_count} issues - " || "" + last_push = + if at = info.pushed_at do + {:ok, date, _} = DateTime.from_iso8601(at) + " - last pushed #{DateTime.to_string(date)}" + else + "" + end + "#{lang}#{issues}#{info.stargazers_count} stars - #{info.subscribers_count} watchers - #{info.forks_count} forks#{last_push}" + end + + defp maybe_add(acc, condition, value) do + if condition, do: acc ++ [value], else: acc + end end diff --git a/lib/plugins/link/html.ex b/lib/plugins/link/html.ex index a941aac..5899ed5 100644 --- a/lib/plugins/link/html.ex +++ b/lib/plugins/link/html.ex @@ -1,106 +1,134 @@ defmodule Nola.Plugins.Link.HTML do @behaviour Nola.Plugins.Link @impl true def match(_, _), do: false @impl true - def post_match(_url, "text/html"<>_, _header, _opts) do - {:body, nil} - end + def post_match(_url, "text/html" <> _, _header, _opts), do: {:body, nil} def post_match(_, _, _, _), do: false @impl true def post_expand(url, body, _params, _opts) do html = Floki.parse(body) - title = collect_title(html) opengraph = collect_open_graph(html) - itemprops = collect_itemprops(html) - text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do - sitename = if sn = Map.get(opengraph, "site_name") do - "#{sn}" - else - "" - end - paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do - "" - else - "[paywall] " - end - section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do - ": #{section}" - else - "" - end - date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do - {:ok, date, _} -> - "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " - _ -> - "" - end - uri = URI.parse(url) - - prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" - prefix = unless prefix == "" do - "#{prefix} — " - else - "" - end - [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ Nola.Irc.Message.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) + + text = if has_sufficient_opengraph_data?(opengraph) do + generate_text_from_opengraph(url, html, opengraph) else - clean_text(title) + clean_text(collect_title(html)) end + {:ok, text} end + defp has_sufficient_opengraph_data?(opengraph) do + Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") + end + + defp generate_text_from_opengraph(url, html, opengraph) do + itemprops = collect_itemprops(html) + prefix = collect_prefix_and_site_name(url, opengraph, itemprops) + description = collect_description(opengraph, itemprops, 500) + + [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ description + end + defp collect_title(html) do case Floki.find(html, "title") do - [{"title", [], [title]} | _] -> - String.trim(title) - _ -> - nil + [{"title", [], [title]} | _] -> String.trim(title) + _ -> "" end end defp collect_open_graph(html) do - Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> - case tag do - {"meta", values, []} -> - name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) - content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) - case name do - "og:" <> key -> - Map.put(acc, key, content) - "article:"<>_ -> - Map.put(acc, name, content) - _other -> acc - end - _other -> acc - end - end) + Floki.find(html, "head meta") + |> Enum.reduce(%{}, &extract_meta_tag/2) end + defp extract_meta_tag({"meta", values, []}, acc) do + with {_, name} <- List.keyfind(values, "property", 0, {nil, nil}), + {_, content} <- List.keyfind(values, "content", 0, {nil, nil}), + true <- is_valid_meta_tag?(name) do + Map.put(acc, strip_prefix(name), content) + else + _ -> acc + end + end + defp extract_meta_tag(_, acc), do: acc + + defp is_valid_meta_tag?(name) do + String.starts_with?(name, "og:") || String.starts_with?(name, "article:") + end + + defp is_valid_meta_tag?(nil) do + false + end + + defp strip_prefix("og:" <> key), do: key + defp strip_prefix(other), do: other + defp collect_itemprops(html) do - Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> - case tag do - {"meta", values, []} -> - name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) - content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) - case name do - "article:" <> key -> - Map.put(acc, name, content) - _other -> acc - end - _other -> acc - end - end) + Floki.find(html, "[itemprop]") + |> Enum.reduce(%{}, &extract_itemprop/2) end + defp extract_itemprop({"meta", values, []}, acc) do + with {_, name} <- List.keyfind(values, "itemprop", 0, {nil, nil}), + {_, content} <- List.keyfind(values, "content", 0, {nil, nil}), + true <- String.starts_with?(name, "article:") do + Map.put(acc, name, content) + else + _ -> acc + end + end + defp extract_itemprop(_, acc), do: acc + + defp collect_prefix_and_site_name(url, opengraph, itemprops) do + uri = URI.parse(url) + site_name = Map.get(opengraph, "site_name", uri.host) + paywall_status = get_paywall_status(opengraph, itemprops) + section = get_section(opengraph, itemprops) + + prefix = "#{paywall_status}#{site_name}#{section}" + if prefix == "", do: "", else: "#{prefix} — " + end + + defp get_paywall_status(opengraph, itemprops) do + content_tier = Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) + if content_tier == "free", do: "", else: "[paywall] " + end + + defp get_section(opengraph, itemprops) do + section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section")) + if section, do: ": #{section}", else: "" + end + + defp collect_description(opengraph, itemprops, max_length) do + date = get_formatted_date(opengraph, itemprops) + description = transform_description(Map.get(opengraph, "description"), max_length) + + Nola.Irc.Message.splitlong(clean_text("#{date}#{description}")) + end + + defp get_formatted_date(opengraph, itemprops) do + published_time = Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", "")) + case DateTime.from_iso8601(published_time) do + {:ok, date, _} -> "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " + _ -> "" + end + end + + # TODO: Swap with AI description instead of truncating. + defp transform_description(string, length) when is_binary(string) do + if String.length(string) >= length, do: String.truncate(string, length), else: string + end + defp transform_description(nil, _), do: nil + defp clean_text(text) do text |> String.replace("\n", " ") |> HtmlEntities.decode() end - end diff --git a/lib/plugins/link/reddit.ex b/lib/plugins/link/reddit.ex index 016e025..707e284 100644 --- a/lib/plugins/link/reddit.ex +++ b/lib/plugins/link/reddit.ex @@ -1,119 +1,119 @@ defmodule Nola.Plugins.Link.Reddit do @behaviour Nola.Plugins.Link @impl true def match(uri = %URI{host: "reddit.com", path: path}, _) do case String.split(path, "/") do ["", "r", sub, "comments", post_id, _slug] -> {true, %{mode: :post, path: path, sub: sub, post_id: post_id}} ["", "r", sub, "comments", post_id, _slug, ""] -> {true, %{mode: :post, path: path, sub: sub, post_id: post_id}} ["", "r", sub, ""] -> {true, %{mode: :sub, path: path, sub: sub}} ["", "r", sub] -> {true, %{mode: :sub, path: path, sub: sub}} # ["", "u", user] -> # {true, %{mode: :user, path: path, user: user}} _ -> false end end def match(uri = %URI{host: host, path: path}, opts) do if String.ends_with?(host, ".reddit.com") do match(%URI{uri | host: "reddit.com"}, opts) else false end end @impl true def post_match(_, _, _, _), do: false @impl true def expand(_, %{mode: :sub, sub: sub}, _opts) do url = "https://api.reddit.com/r/#{sub}/about" case HTTPoison.get(url) do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> sr = Jason.decode!(body) |> Map.get("data") |> IO.inspect(limit: :infinity) description = Map.get(sr, "public_description")||Map.get(sr, "description", "") |> String.split("\n") |> List.first() name = if title = Map.get(sr, "title") do Map.get(sr, "display_name_prefixed") <> ": " <> title else Map.get(sr, "display_name_prefixed") end nsfw = if Map.get(sr, "over18") do "[NSFW] " else "" end quarantine = if Map.get(sr, "quarantine") do "[Quarantined] " else "" end count = "#{Map.get(sr, "subscribers")} subscribers, #{Map.get(sr, "active_user_count")} active" preview = "#{quarantine}#{nsfw}#{name} — #{description} (#{count})" {:ok, preview} _ -> :error end end def expand(_uri, %{mode: :post, path: path, sub: sub, post_id: post_id}, _opts) do case HTTPoison.get("https://api.reddit.com#{path}?sr_detail=true") do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> json = Jason.decode!(body) op = List.first(json) |> Map.get("data") |> Map.get("children") |> List.first() |> Map.get("data") |> IO.inspect(limit: :infinity) sr = get_in(op, ["sr_detail", "display_name_prefixed"]) {self?, url} = if Map.get(op, "selftext") == "" do {false, Map.get(op, "url")} else {true, nil} end self_str = if(self?, do: "text", else: url) up = Map.get(op, "ups") down = Map.get(op, "downs") comments = Map.get(op, "num_comments") nsfw = if Map.get(op, "over_18") do "[NSFW] " else "" end state = cond do Map.get(op, "hidden") -> "hidden" Map.get(op, "archived") -> "archived" Map.get(op, "locked") -> "locked" Map.get(op, "quarantine") -> "quarantined" Map.get(op, "removed_by") || Map.get(op, "removed_by_category") -> "removed" Map.get(op, "banned_by") -> "banned" Map.get(op, "pinned") -> "pinned" Map.get(op, "stickied") -> "stickied" true -> nil end flair = if flair = Map.get(op, "link_flair_text") do "[#{flair}] " else "" end title = "#{nsfw}#{sr}: #{flair}#{Map.get(op, "title")}" state_str = if(state, do: "#{state}, ") - content = "by u/#{Map.get(op, "author")} - #{state_str}#{up} up, #{down} down, #{comments} comments - #{self_str}" + content = "by u/#{Map.get(op, "author")} - #{state_str}#{up} up, #{comments} comments - #{self_str}" {:ok, [title, content]} err -> :error end end end diff --git a/lib/plugins/link/scraper.ex b/lib/plugins/link/scraper.ex new file mode 100644 index 0000000..f5487e3 --- /dev/null +++ b/lib/plugins/link/scraper.ex @@ -0,0 +1,45 @@ +defmodule Nola.Plugins.Link.Scraper do + + defmodule UseScraper do + require Logger + + def get(url, config) do + base_url = Keyword.get(config, :base_url, "https://api.usescraper.com") + api_key = Keyword.get(config, :api_key, "unset api key") + options = Keyword.get(config, :http_options, []) + headers = [{"user-agent", "nola, href@random.sh"}, + {"content-type", "application/json"}, + {"authorization", "Bearer " <> api_key}] + Logger.debug("scraper: use_scraper: get: #{url}") + with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}), + {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options), + {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do + {:ok, body, meta} + else + {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} -> + Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}") + status = Plug.Conn.Status.reason_atom(code) + {:error, status} + {:ok, %{"status" => "failed"}} -> + Logger.error("scraper: use_scraper: scraper service failed for #{url}") + {:error, :scrape_failed} + {:ok, %HTTPoison.Response{status_code: code, body: body}} -> + Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}") + status = Plug.Conn.Status.reason_atom(code) + {:error, status} + {:error, %HTTPoison.Error{reason: reason}} -> + Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}") + {:error, reason} + end + end + end + + def get(url) do + config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || [] + case config[:service] do + "usescraper" -> UseScraper.get(url, config[:config] || []) + _ -> {:error, :scraping_disabled} + end + end + +end diff --git a/lib/plugins/link/store.ex b/lib/plugins/link/store.ex new file mode 100644 index 0000000..566cc9a --- /dev/null +++ b/lib/plugins/link/store.ex @@ -0,0 +1,30 @@ +defmodule Nola.Plugins.Link.Store do + require Record + import Ex2ms + + @type url() :: String.t() + + Record.defrecord(:link, link: nil, at: nil) + @type link :: record(:link, link: String.t(), at: nil) + + Record.defrecord(:link_entry, key: nil, at: nil) + @type link_entry :: record(:link_entry, key: {url(), String.t()}, at: nil) + + def setup do + :ets.new(:links, [:set, :public, :named_table, keypos: 2]) + end + + @spec insert_link(url()) :: true + def insert_link(url) do + :ets.insert(:links, link(link: url, at: NaiveDateTime.utc_now() |> NaiveDateTime.to_unix())) + end + + @spec get_link(url()) :: String.t() | nil + def get_link(url) do + case :ets.lookup(:links, url) do + [link] -> link + [] -> nil + end + end + +end