Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F86166
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
30 KB
Subscribers
None
View Options
diff --git a/lib/open_ai.ex b/lib/open_ai.ex
index cc0de27..da54e3a 100644
--- a/lib/open_ai.ex
+++ b/lib/open_ai.ex
@@ -1,20 +1,33 @@
defmodule OpenAi do
+ require Logger
+
def post(path, data, options \\ []) do
config = Application.get_env(:nola, :openai, [])
- url = "https://api.openai.com#{path}"
+ base_url = Keyword.get(config, :base_url, "https://api.openai.com")
+ url = "#{base_url}#{path}"
headers = [{"user-agent", "internal private experiment bot, href@random.sh"},
{"content-type", "application/json"},
{"authorization", "Bearer " <> Keyword.get(config, :key, "unset-api-key")}]
- options = options ++ [timeout: :timer.seconds(180), recv_timeout: :timer.seconds(180)]
+ options = options ++ [timeout: :timer.seconds(30), recv_timeout: :timer.seconds(30)]
+ Logger.debug("openai: post: #{url} #{inspect data}")
with {:ok, json} <- Poison.encode(data),
{:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post(url, json, headers, options),
{:ok, data} <- Poison.decode(body) do
{:ok, data}
else
- {:ok, %HTTPoison.Response{status_code: code}} -> {:error, Plug.Conn.Status.reason_atom(code)}
- {:error, %HTTPoison.Error{reason: reason}} -> {:error, reason}
+ {:ok, %HTTPoison.Response{status_code: code, body: body}} ->
+ Logger.error("OpenAI: HTTP #{code} #{inspect body}")
+ status = Plug.Conn.Status.reason_atom(code)
+ case Poison.decode(body) do
+ {:ok, %{"error" => %{"message" => message, "code" => code}}} ->
+ {:error, {status, message}}
+ kek ->
+ {:error, status}
+ end
+ {:error, %HTTPoison.Error{reason: reason}} ->
+ {:error, reason}
end
end
end
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex
index 4c4261f..84eb976 100644
--- a/lib/plugins/link.ex
+++ b/lib/plugins/link.ex
@@ -1,271 +1,303 @@
defmodule Nola.Plugins.Link do
@moduledoc """
# Link Previewer
An extensible link previewer for IRC.
To extend the supported sites, create a new handler implementing the callbacks.
See `link/` directory. The first in list handler that returns true to the `match/2` callback will be used,
and if the handler returns `:error` or crashes, will fallback to the default preview.
Unsupported websites will use the default link preview method, which is for html document the title, otherwise it'll use
the mimetype and size.
## Configuration:
```
config :nola, Nola.Plugins.Link,
handlers: [
Nola.Plugins.Link.Youtube: [
invidious: true
],
Nola.Plugins.Link.Twitter: [],
Nola.Plugins.Link.Imgur: [],
]
```
"""
@ircdoc """
# Link preview
Previews links (just post a link!).
Announces real URL after redirections and provides extended support for YouTube, Twitter and Imgur.
"""
def short_irc_doc, do: false
def irc_doc, do: @ircdoc
require Logger
+ alias __MODULE__.Store
+ alias __MODULE__.Scraper
def start_link() do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
end
@callback match(uri :: URI.t, options :: Keyword.t) :: {true, params :: Map.t} | false
@callback expand(uri :: URI.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error
@callback post_match(uri :: URI.t, content_type :: binary, headers :: [], opts :: Keyword.t) :: {:body | :file, params :: Map.t} | false
@callback post_expand(uri :: URI.t, body :: binary() | Path.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error
@optional_callbacks [expand: 3, post_expand: 4]
defstruct [:client]
def init([]) do
+ Store.setup()
{:ok, _} = Registry.register(Nola.PubSub, "messages", [plugin: __MODULE__])
#{:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__])
Logger.info("Link handler started")
{:ok, %__MODULE__{}}
end
def handle_info({:irc, :text, message = %{text: text}}, state) do
String.split(text)
|> Enum.map(fn(word) ->
if String.starts_with?(word, "http://") || String.starts_with?(word, "https://") do
uri = URI.parse(word)
if uri.scheme && uri.host do
spawn(fn() ->
:timer.kill_after(:timer.seconds(30))
case expand_link([uri]) do
{:ok, uris, text} ->
text = case uris do
[uri] -> text
[luri | _] ->
- if luri.host == uri.host && luri.path == luri.path do
+ if luri.host == uri.host && luri.path == uri.path do
text
else
["-> #{URI.to_string(luri)}", text]
end
end
- if is_list(text) do
- for line <- text, do: message.replyfun.(line)
- else
- message.replyfun.(text)
+ case text do
+ lines when is_list(lines) ->
+ for text <- lines, do: message.replyfun.(text)
+ text when is_binary(text) ->
+ message.replyfun.(text)
+ nil ->
+ nil
end
_ -> nil
end
end)
end
end
end)
{:noreply, state}
end
def handle_info(msg, state) do
{:noreply, state}
end
def terminate(_reason, state) do
:ok
end
# 1. Match the first valid handler
# 2. Try to run the handler
# 3. If :error or crash, default link.
# If :skip, nothing
# 4. ?
# Over five redirections: cancel.
def expand_link(acc = [_, _, _, _, _ | _]) do
{:ok, acc, "link redirects more than five times"}
end
def expand_link(acc=[uri | _]) do
Logger.debug("link: expanding: #{inspect uri}")
handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers)
handler = Enum.reduce_while(handlers, nil, fn({module, opts}, acc) ->
Logger.debug("link: attempt expanding: #{inspect module} for #{inspect uri}")
module = Module.concat([module])
case module.match(uri, opts) do
{true, params} -> {:halt, {module, params, opts}}
false -> {:cont, acc}
end
end)
run_expand(acc, handler)
end
def run_expand(acc, nil) do
expand_default(acc)
end
def run_expand(acc=[uri|_], {module, params, opts}) do
Logger.debug("link: expanding #{inspect uri} with #{inspect module}")
case module.expand(uri, params, opts) do
{:ok, data} -> {:ok, acc, data}
:error -> expand_default(acc)
:skip -> nil
end
rescue
e ->
Logger.error("link: rescued #{inspect uri} with #{inspect module}: #{inspect e}")
Logger.error(Exception.format(:error, e, __STACKTRACE__))
expand_default(acc)
catch
e, b ->
Logger.error("link: catched #{inspect uri} with #{inspect module}: #{inspect {e, b}}")
expand_default(acc)
end
defp get(url, headers \\ [], options \\ []) do
get_req(url, :hackney.get(url, headers, <<>>, options))
end
defp get_req(_, {:error, reason}) do
{:error, reason}
end
defp get_req(url, {:ok, 200, headers, client}) do
headers = Enum.reduce(headers, %{}, fn({key, value}, acc) ->
Map.put(acc, String.downcase(key), value)
end)
content_type = Map.get(headers, "content-type", "application/octect-stream")
length = Map.get(headers, "content-length", "0")
{length, _} = Integer.parse(length)
handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers)
handler = Enum.reduce_while(handlers, false, fn({module, opts}, acc) ->
module = Module.concat([module])
try do
case module.post_match(url, content_type, headers, opts) do
{mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}}
false -> {:cont, acc}
end
rescue
e ->
Logger.error(inspect(e))
{:cont, false}
catch
e, b ->
Logger.error(inspect({b}))
{:cont, false}
end
end)
cond do
handler != false and length <= 30_000_000 ->
case get_body(url, 30_000_000, client, handler, <<>>) do
{:ok, _} = ok -> ok
:error ->
{:ok, "file: #{content_type}, size: #{human_size(length)}"}
end
#String.starts_with?(content_type, "text/html") && length <= 30_000_000 ->
# get_body(url, 30_000_000, client, <<>>)
true ->
:hackney.close(client)
{:ok, "file: #{content_type}, size: #{human_size(length)}"}
end
end
defp get_req(_, {:ok, redirect, headers, client}) when redirect in 300..399 do
headers = Enum.reduce(headers, %{}, fn({key, value}, acc) ->
Map.put(acc, String.downcase(key), value)
end)
location = Map.get(headers, "location")
:hackney.close(client)
{:redirect, location}
end
defp get_req(_, {:ok, status, headers, client}) do
:hackney.close(client)
{:error, status, headers}
end
defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do
case :hackney.stream_body(client) do
{:ok, data} ->
get_body(url, len, client, h, << acc::binary, data::binary >>)
:done ->
body = case mode do
:body -> acc
:file ->
{:ok, tmpfile} = Plug.Upload.random_file("linkplugin")
File.write!(tmpfile, acc)
tmpfile
end
handler.post_expand(url, body, params, opts)
{:error, reason} ->
{:ok, "failed to fetch body: #{inspect reason}"}
end
end
defp get_body(_, len, client, h, _acc) do
:hackney.close(client)
IO.inspect(h)
{:ok, "Error: file over 30"}
end
def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do
Logger.debug("link: expanding #{uri} with default")
headers = [{"user-agent", "DmzBot (like TwitterBot)"}]
options = [follow_redirect: false, max_body_length: 30_000_000]
+ url = URI.to_string(uri)
case get(URI.to_string(uri), headers, options) do
{:ok, text} ->
{:ok, acc, text}
{:redirect, link} ->
new_uri = URI.parse(link)
#new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port}
expand_link([new_uri | acc])
{:error, status, _headers} ->
- text = Plug.Conn.Status.reason_phrase(status)
- {:ok, acc, "Error: HTTP #{text} (#{status})"}
+ #text = Plug.Conn.Status.reason_phrase(status)
+ #{:ok, acc, "Error: HTTP #{text} (#{status})"}
+ retry_expand_with_scraper(acc, url)
{:error, {:tls_alert, {:handshake_failure, err}}} ->
- {:ok, acc, "TLS Error: #{to_string(err)}"}
+ {:ok, acc, nil} # "TLS Error: #{to_string(err)}"}
+ {:error, :timeout} ->
+ retry_expand_with_scraper(acc, url)
{:error, reason} ->
- {:ok, acc, "Error: #{to_string(reason)}"}
+ {:ok, acc, nil} #"Error: #{to_string(reason)}"}
end
end
# Unsupported scheme, came from a redirect.
def expand_default(acc = [uri | _]) do
{:ok, [uri], "-> #{URI.to_string(uri)}"}
end
+ # Last resort: scrape the page
+ # We'll be mostly calling this when 403 or 500 or timeout because site blocks us.
+ # An external service will scrape the page for us and return the body.
+ # We'll call directly the HTML handler on the result.
+ defp retry_expand_with_scraper(acc, url) do
+ Logger.info("Attempting scraper")
+ handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers)
+ Logger.info("Attempting scraper #{inspect handlers}")
+ with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"),
+ {:ok, body, _meta} <- Scraper.get(url),
+ {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil)
+ do
+ {:ok, acc, text}
+ else
+ error ->
+ Logger.debug("Attempt with scraper failed: #{inspect error}")
+ # We give up here. We don't return anything (the acc from caller `expand default`
+ # does not matter anymore) and I see returning error messages as useless.
+ {:ok, acc, nil}
+ end
+ end
defp human_size(bytes) do
bytes
|> FileSize.new(:b)
|> FileSize.scale()
|> FileSize.format()
end
+
end
diff --git a/lib/plugins/link/github.ex b/lib/plugins/link/github.ex
index 0069a40..77fa81f 100644
--- a/lib/plugins/link/github.ex
+++ b/lib/plugins/link/github.ex
@@ -1,49 +1,76 @@
defmodule Nola.Plugins.Link.Github do
@behaviour Nola.Plugins.Link
@impl true
def match(uri = %URI{host: "github.com", path: path}, _) do
- case String.split(path, "/") do
- ["", user, repo] ->
- {true, %{user: user, repo: repo, path: "#{user}/#{repo}"}}
- _ ->
- false
+ with ["", user, repo] <- String.split(path, "/") do
+ {true, %{user: user, repo: repo, path: "#{user}/#{repo}"}}
+ else
+ _ -> false
end
end
def match(_, _), do: false
@impl true
def post_match(_, _, _, _), do: false
@impl true
def expand(_uri, %{user: user, repo: repo}, _opts) do
- case HTTPoison.get("https://api.github.com/repos/#{user}/#{repo}") do
- {:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
- {:ok, json} = Jason.decode(body)
- src = json["source"]["full_name"]
- disabled = if(json["disabled"], do: " (disabled)", else: "")
- archived = if(json["archived"], do: " (archived)", else: "")
- fork = if src && src != json["full_name"] do
- " (⑂ #{json["source"]["full_name"]})"
- else
- ""
- end
- start = "#{json["full_name"]}#{disabled}#{archived}#{fork} - #{json["description"]}"
- tags = for(t <- json["topics"]||[], do: "##{t}") |> Enum.intersperse(", ") |> Enum.join("")
- lang = if(json["language"], do: "#{json["language"]} - ", else: "")
- issues = if(json["open_issues_count"], do: "#{json["open_issues_count"]} issues - ", else: "")
- last_push = if at = json["pushed_at"] do
- {:ok, date, _} = DateTime.from_iso8601(at)
- " - last pushed #{DateTime.to_string(date)}"
- else
- ""
- end
- network = "#{lang}#{issues}#{json["stargazers_count"]} stars - #{json["subscribers_count"]} watchers - #{json["forks_count"]} forks#{last_push}"
- {:ok, [start, tags, network]}
- other ->
- :error
+ with {:ok, response} <- HTTPoison.get("https://api.github.com/repos/#{user}/#{repo}"),
+ {:ok, json} <- Jason.decode(response.body) do
+ info = %{
+ full_name: json["full_name"],
+ disabled: json["disabled"],
+ archived: json["archived"],
+ source: json["source"],
+ description: json["description"],
+ topics: json["topics"],
+ language: json["language"],
+ open_issues_count: json["open_issues_count"],
+ pushed_at: json["pushed_at"],
+ stargazers_count: json["stargazers_count"],
+ subscribers_count: json["subscribers_count"],
+ forks_count: json["forks_count"]
+ }
+
+ start = build_start(info)
+ tags = build_tags(info)
+ network = build_network(info)
+
+ {:ok, [start, tags, network]}
+ else
+ _ -> :error
end
end
+ defp build_start(info) do
+ parts = []
+ |> maybe_add(info.disabled, " (disabled)")
+ |> maybe_add(info.archived, " (archived)")
+ |> maybe_add(info.source && info.source["full_name"] != info.full_name, " (⑂ #{info.source["full_name"]})")
+
+ "#{info.full_name}#{parts} - #{info.description}"
+ end
+
+ defp build_tags(info) do
+ for(t <- info.topics || [], do: "##{t}") |> Enum.intersperse(", ") |> Enum.join("")
+ end
+
+ defp build_network(info) do
+ lang = info.language && "#{info.language} - " || ""
+ issues = info.open_issues_count && "#{info.open_issues_count} issues - " || ""
+ last_push =
+ if at = info.pushed_at do
+ {:ok, date, _} = DateTime.from_iso8601(at)
+ " - last pushed #{DateTime.to_string(date)}"
+ else
+ ""
+ end
+ "#{lang}#{issues}#{info.stargazers_count} stars - #{info.subscribers_count} watchers - #{info.forks_count} forks#{last_push}"
+ end
+
+ defp maybe_add(acc, condition, value) do
+ if condition, do: acc ++ [value], else: acc
+ end
end
diff --git a/lib/plugins/link/html.ex b/lib/plugins/link/html.ex
index a941aac..5899ed5 100644
--- a/lib/plugins/link/html.ex
+++ b/lib/plugins/link/html.ex
@@ -1,106 +1,134 @@
defmodule Nola.Plugins.Link.HTML do
@behaviour Nola.Plugins.Link
@impl true
def match(_, _), do: false
@impl true
- def post_match(_url, "text/html"<>_, _header, _opts) do
- {:body, nil}
- end
+ def post_match(_url, "text/html" <> _, _header, _opts), do: {:body, nil}
def post_match(_, _, _, _), do: false
@impl true
def post_expand(url, body, _params, _opts) do
html = Floki.parse(body)
- title = collect_title(html)
opengraph = collect_open_graph(html)
- itemprops = collect_itemprops(html)
- text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do
- sitename = if sn = Map.get(opengraph, "site_name") do
- "#{sn}"
- else
- ""
- end
- paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do
- ""
- else
- "[paywall] "
- end
- section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do
- ": #{section}"
- else
- ""
- end
- date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do
- {:ok, date, _} ->
- "#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
- _ ->
- ""
- end
- uri = URI.parse(url)
-
- prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}"
- prefix = unless prefix == "" do
- "#{prefix} — "
- else
- ""
- end
- [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ Nola.Irc.Message.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}"))
+
+ text = if has_sufficient_opengraph_data?(opengraph) do
+ generate_text_from_opengraph(url, html, opengraph)
else
- clean_text(title)
+ clean_text(collect_title(html))
end
+
{:ok, text}
end
+ defp has_sufficient_opengraph_data?(opengraph) do
+ Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description")
+ end
+
+ defp generate_text_from_opengraph(url, html, opengraph) do
+ itemprops = collect_itemprops(html)
+ prefix = collect_prefix_and_site_name(url, opengraph, itemprops)
+ description = collect_description(opengraph, itemprops, 500)
+
+ [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ description
+ end
+
defp collect_title(html) do
case Floki.find(html, "title") do
- [{"title", [], [title]} | _] ->
- String.trim(title)
- _ ->
- nil
+ [{"title", [], [title]} | _] -> String.trim(title)
+ _ -> ""
end
end
defp collect_open_graph(html) do
- Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) ->
- case tag do
- {"meta", values, []} ->
- name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1)
- content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
- case name do
- "og:" <> key ->
- Map.put(acc, key, content)
- "article:"<>_ ->
- Map.put(acc, name, content)
- _other -> acc
- end
- _other -> acc
- end
- end)
+ Floki.find(html, "head meta")
+ |> Enum.reduce(%{}, &extract_meta_tag/2)
end
+ defp extract_meta_tag({"meta", values, []}, acc) do
+ with {_, name} <- List.keyfind(values, "property", 0, {nil, nil}),
+ {_, content} <- List.keyfind(values, "content", 0, {nil, nil}),
+ true <- is_valid_meta_tag?(name) do
+ Map.put(acc, strip_prefix(name), content)
+ else
+ _ -> acc
+ end
+ end
+ defp extract_meta_tag(_, acc), do: acc
+
+ defp is_valid_meta_tag?(name) do
+ String.starts_with?(name, "og:") || String.starts_with?(name, "article:")
+ end
+
+ defp is_valid_meta_tag?(nil) do
+ false
+ end
+
+ defp strip_prefix("og:" <> key), do: key
+ defp strip_prefix(other), do: other
+
defp collect_itemprops(html) do
- Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) ->
- case tag do
- {"meta", values, []} ->
- name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1)
- content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
- case name do
- "article:" <> key ->
- Map.put(acc, name, content)
- _other -> acc
- end
- _other -> acc
- end
- end)
+ Floki.find(html, "[itemprop]")
+ |> Enum.reduce(%{}, &extract_itemprop/2)
end
+ defp extract_itemprop({"meta", values, []}, acc) do
+ with {_, name} <- List.keyfind(values, "itemprop", 0, {nil, nil}),
+ {_, content} <- List.keyfind(values, "content", 0, {nil, nil}),
+ true <- String.starts_with?(name, "article:") do
+ Map.put(acc, name, content)
+ else
+ _ -> acc
+ end
+ end
+ defp extract_itemprop(_, acc), do: acc
+
+ defp collect_prefix_and_site_name(url, opengraph, itemprops) do
+ uri = URI.parse(url)
+ site_name = Map.get(opengraph, "site_name", uri.host)
+ paywall_status = get_paywall_status(opengraph, itemprops)
+ section = get_section(opengraph, itemprops)
+
+ prefix = "#{paywall_status}#{site_name}#{section}"
+ if prefix == "", do: "", else: "#{prefix} — "
+ end
+
+ defp get_paywall_status(opengraph, itemprops) do
+ content_tier = Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free"))
+ if content_tier == "free", do: "", else: "[paywall] "
+ end
+
+ defp get_section(opengraph, itemprops) do
+ section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section"))
+ if section, do: ": #{section}", else: ""
+ end
+
+ defp collect_description(opengraph, itemprops, max_length) do
+ date = get_formatted_date(opengraph, itemprops)
+ description = transform_description(Map.get(opengraph, "description"), max_length)
+
+ Nola.Irc.Message.splitlong(clean_text("#{date}#{description}"))
+ end
+
+ defp get_formatted_date(opengraph, itemprops) do
+ published_time = Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))
+ case DateTime.from_iso8601(published_time) do
+ {:ok, date, _} -> "#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
+ _ -> ""
+ end
+ end
+
+ # TODO: Swap with AI description instead of truncating.
+ defp transform_description(string, length) when is_binary(string) do
+ if String.length(string) >= length, do: String.truncate(string, length), else: string
+ end
+ defp transform_description(nil, _), do: nil
+
defp clean_text(text) do
text
|> String.replace("\n", " ")
|> HtmlEntities.decode()
end
-
end
diff --git a/lib/plugins/link/reddit.ex b/lib/plugins/link/reddit.ex
index 016e025..707e284 100644
--- a/lib/plugins/link/reddit.ex
+++ b/lib/plugins/link/reddit.ex
@@ -1,119 +1,119 @@
defmodule Nola.Plugins.Link.Reddit do
@behaviour Nola.Plugins.Link
@impl true
def match(uri = %URI{host: "reddit.com", path: path}, _) do
case String.split(path, "/") do
["", "r", sub, "comments", post_id, _slug] ->
{true, %{mode: :post, path: path, sub: sub, post_id: post_id}}
["", "r", sub, "comments", post_id, _slug, ""] ->
{true, %{mode: :post, path: path, sub: sub, post_id: post_id}}
["", "r", sub, ""] ->
{true, %{mode: :sub, path: path, sub: sub}}
["", "r", sub] ->
{true, %{mode: :sub, path: path, sub: sub}}
# ["", "u", user] ->
# {true, %{mode: :user, path: path, user: user}}
_ ->
false
end
end
def match(uri = %URI{host: host, path: path}, opts) do
if String.ends_with?(host, ".reddit.com") do
match(%URI{uri | host: "reddit.com"}, opts)
else
false
end
end
@impl true
def post_match(_, _, _, _), do: false
@impl true
def expand(_, %{mode: :sub, sub: sub}, _opts) do
url = "https://api.reddit.com/r/#{sub}/about"
case HTTPoison.get(url) do
{:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
sr = Jason.decode!(body)
|> Map.get("data")
|> IO.inspect(limit: :infinity)
description = Map.get(sr, "public_description")||Map.get(sr, "description", "")
|> String.split("\n")
|> List.first()
name = if title = Map.get(sr, "title") do
Map.get(sr, "display_name_prefixed") <> ": " <> title
else
Map.get(sr, "display_name_prefixed")
end
nsfw = if Map.get(sr, "over18") do
"[NSFW] "
else
""
end
quarantine = if Map.get(sr, "quarantine") do
"[Quarantined] "
else
""
end
count = "#{Map.get(sr, "subscribers")} subscribers, #{Map.get(sr, "active_user_count")} active"
preview = "#{quarantine}#{nsfw}#{name} — #{description} (#{count})"
{:ok, preview}
_ ->
:error
end
end
def expand(_uri, %{mode: :post, path: path, sub: sub, post_id: post_id}, _opts) do
case HTTPoison.get("https://api.reddit.com#{path}?sr_detail=true") do
{:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
json = Jason.decode!(body)
op = List.first(json)
|> Map.get("data")
|> Map.get("children")
|> List.first()
|> Map.get("data")
|> IO.inspect(limit: :infinity)
sr = get_in(op, ["sr_detail", "display_name_prefixed"])
{self?, url} = if Map.get(op, "selftext") == "" do
{false, Map.get(op, "url")}
else
{true, nil}
end
self_str = if(self?, do: "text", else: url)
up = Map.get(op, "ups")
down = Map.get(op, "downs")
comments = Map.get(op, "num_comments")
nsfw = if Map.get(op, "over_18") do
"[NSFW] "
else
""
end
state = cond do
Map.get(op, "hidden") -> "hidden"
Map.get(op, "archived") -> "archived"
Map.get(op, "locked") -> "locked"
Map.get(op, "quarantine") -> "quarantined"
Map.get(op, "removed_by") || Map.get(op, "removed_by_category") -> "removed"
Map.get(op, "banned_by") -> "banned"
Map.get(op, "pinned") -> "pinned"
Map.get(op, "stickied") -> "stickied"
true -> nil
end
flair = if flair = Map.get(op, "link_flair_text") do
"[#{flair}] "
else
""
end
title = "#{nsfw}#{sr}: #{flair}#{Map.get(op, "title")}"
state_str = if(state, do: "#{state}, ")
- content = "by u/#{Map.get(op, "author")} - #{state_str}#{up} up, #{down} down, #{comments} comments - #{self_str}"
+ content = "by u/#{Map.get(op, "author")} - #{state_str}#{up} up, #{comments} comments - #{self_str}"
{:ok, [title, content]}
err ->
:error
end
end
end
diff --git a/lib/plugins/link/scraper.ex b/lib/plugins/link/scraper.ex
new file mode 100644
index 0000000..f5487e3
--- /dev/null
+++ b/lib/plugins/link/scraper.ex
@@ -0,0 +1,45 @@
+defmodule Nola.Plugins.Link.Scraper do
+
+ defmodule UseScraper do
+ require Logger
+
+ def get(url, config) do
+ base_url = Keyword.get(config, :base_url, "https://api.usescraper.com")
+ api_key = Keyword.get(config, :api_key, "unset api key")
+ options = Keyword.get(config, :http_options, [])
+ headers = [{"user-agent", "nola, href@random.sh"},
+ {"content-type", "application/json"},
+ {"authorization", "Bearer " <> api_key}]
+ Logger.debug("scraper: use_scraper: get: #{url}")
+ with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}),
+ {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options),
+ {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do
+ {:ok, body, meta}
+ else
+ {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} ->
+ Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}")
+ status = Plug.Conn.Status.reason_atom(code)
+ {:error, status}
+ {:ok, %{"status" => "failed"}} ->
+ Logger.error("scraper: use_scraper: scraper service failed for #{url}")
+ {:error, :scrape_failed}
+ {:ok, %HTTPoison.Response{status_code: code, body: body}} ->
+ Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}")
+ status = Plug.Conn.Status.reason_atom(code)
+ {:error, status}
+ {:error, %HTTPoison.Error{reason: reason}} ->
+ Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}")
+ {:error, reason}
+ end
+ end
+ end
+
+ def get(url) do
+ config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || []
+ case config[:service] do
+ "usescraper" -> UseScraper.get(url, config[:config] || [])
+ _ -> {:error, :scraping_disabled}
+ end
+ end
+
+end
diff --git a/lib/plugins/link/store.ex b/lib/plugins/link/store.ex
new file mode 100644
index 0000000..566cc9a
--- /dev/null
+++ b/lib/plugins/link/store.ex
@@ -0,0 +1,30 @@
+defmodule Nola.Plugins.Link.Store do
+ require Record
+ import Ex2ms
+
+ @type url() :: String.t()
+
+ Record.defrecord(:link, link: nil, at: nil)
+ @type link :: record(:link, link: String.t(), at: nil)
+
+ Record.defrecord(:link_entry, key: nil, at: nil)
+ @type link_entry :: record(:link_entry, key: {url(), String.t()}, at: nil)
+
+ def setup do
+ :ets.new(:links, [:set, :public, :named_table, keypos: 2])
+ end
+
+ @spec insert_link(url()) :: true
+ def insert_link(url) do
+ :ets.insert(:links, link(link: url, at: NaiveDateTime.utc_now() |> NaiveDateTime.to_unix()))
+ end
+
+ @spec get_link(url()) :: String.t() | nil
+ def get_link(url) do
+ case :ets.lookup(:links, url) do
+ [link] -> link
+ [] -> nil
+ end
+ end
+
+end
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Aug 31, 12:45 PM (7 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
55312
Default Alt Text
(30 KB)
Attached To
rNOLA Nola
Event Timeline
Log In to Comment