From b8a77c5d704a4a76f227854c1cab560eb98a7382 Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Sun, 13 Jan 2019 02:06:50 +0200 Subject: [PATCH] Add OEmbed parser --- lib/pleroma/web/rich_media/parser.ex | 6 ++- .../web/rich_media/parsers/oembed_parser.ex | 27 ++++++++++++ test/fixtures/rich_media/oembed.html | 3 ++ test/fixtures/rich_media/oembed.json | 1 + test/web/rich_media/parser_test.exs | 41 +++++++++++++++++++ 5 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 lib/pleroma/web/rich_media/parsers/oembed_parser.ex create mode 100644 test/fixtures/rich_media/oembed.html create mode 100644 test/fixtures/rich_media/oembed.json diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index fe092bf19..6da83c6e4 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -1,5 +1,9 @@ defmodule Pleroma.Web.RichMedia.Parser do - @parsers [Pleroma.Web.RichMedia.Parsers.OGP, Pleroma.Web.RichMedia.Parsers.TwitterCard] + @parsers [ + Pleroma.Web.RichMedia.Parsers.OGP, + Pleroma.Web.RichMedia.Parsers.TwitterCard, + Pleroma.Web.RichMedia.Parsers.OEmbed + ] if Mix.env() == :test do def parse(url), do: parse_url(url) diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex new file mode 100644 index 000000000..ca7226faf --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex @@ -0,0 +1,27 @@ +defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do + def parse(html, _data) do + with elements = [_ | _] <- get_discovery_data(html), + {:ok, oembed_url} <- get_oembed_url(elements), + {:ok, oembed_data} <- get_oembed_data(oembed_url) do + {:ok, oembed_data} + else + _e -> {:error, "No OEmbed data found"} + end + end + + defp get_discovery_data(html) do + html |> Floki.find("link[type='application/json+oembed']") + end + + defp get_oembed_url(nodes) do + {"link", attributes, _children} = nodes |> hd() + + {:ok, Enum.into(attributes, %{})["href"]} + end + + defp get_oembed_data(url) do + {:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url) + + {:ok, Poison.decode!(json)} + end +end diff --git a/test/fixtures/rich_media/oembed.html b/test/fixtures/rich_media/oembed.html new file mode 100644 index 000000000..55f17004b --- /dev/null +++ b/test/fixtures/rich_media/oembed.html @@ -0,0 +1,3 @@ + diff --git a/test/fixtures/rich_media/oembed.json b/test/fixtures/rich_media/oembed.json new file mode 100644 index 000000000..2a5f7a771 --- /dev/null +++ b/test/fixtures/rich_media/oembed.json @@ -0,0 +1 @@ +{"type":"photo","flickr_type":"photo","title":"Bacon Lollys","author_name":"\u202e\u202d\u202cbees\u202c","author_url":"https:\/\/www.flickr.com\/photos\/bees\/","width":"1024","height":"768","url":"https:\/\/farm4.staticflickr.com\/3040\/2362225867_4a87ab8baf_b.jpg","web_page":"https:\/\/www.flickr.com\/photos\/bees\/2362225867\/","thumbnail_url":"https:\/\/farm4.staticflickr.com\/3040\/2362225867_4a87ab8baf_q.jpg","thumbnail_width":150,"thumbnail_height":150,"web_page_short_url":"https:\/\/flic.kr\/p\/4AK2sc","license":"All Rights Reserved","license_id":0,"html":"\"Bacon<\/a>", + "license" => "All Rights Reserved", + "license_id" => 0, + "provider_name" => "Flickr", + "provider_url" => "https://www.flickr.com/", + "thumbnail_height" => 150, + "thumbnail_url" => + "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg", + "thumbnail_width" => 150, + "title" => "Bacon Lollys", + "type" => "photo", + "url" => "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg", + "version" => "1.0", + "web_page" => "https://www.flickr.com/photos/bees/2362225867/", + "web_page_short_url" => "https://flic.kr/p/4AK2sc", + "width" => "1024" + }} + end end