From c684265585ee712cc4606f39f8fe2be467e77e7c Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sat, 11 Dec 2021 17:36:49 +0000 Subject: [PATCH 01/27] Add import functionality --- lib/mix/tasks/pleroma/search.ex | 46 +++++++++++++++++++ .../elasticsearch/document_mappings/note.ex | 13 ++++++ lib/pleroma/elasticsearch/store.ex | 36 +++++++++++++++ mix.exs | 3 +- mix.lock | 4 ++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 lib/mix/tasks/pleroma/search.ex create mode 100644 lib/pleroma/elasticsearch/document_mappings/note.ex create mode 100644 lib/pleroma/elasticsearch/store.ex diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex new file mode 100644 index 000000000..4acd0e34a --- /dev/null +++ b/lib/mix/tasks/pleroma/search.ex @@ -0,0 +1,46 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search do + use Mix.Task + import Mix.Pleroma + import Ecto.Query + alias Pleroma.Elasticsearch + alias Pleroma.Activity + alias Pleroma.Repo + alias Pleroma.Pagination + + @shortdoc "Manages elasticsearch" + + def run(["import" | rest]) do + start_pleroma() + + query = from(a in Activity, where: not ilike(a.actor, "%/relay")) + |> Activity.with_preloaded_object + |> Activity.with_preloaded_user_actor + |> get_all + end + + defp get_all(query, max_id \\ nil) do + params = %{limit: 20} + params = if max_id == nil do + params + else + Map.put(params, :max_id, max_id) + end + + res = query + |> Pagination.fetch_paginated(params) + + if res == [] do + :ok + else + res + |> Pleroma.Elasticsearch.bulk_post(:activities) + + get_all(query, List.last(res).id) + end + end + +end diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/note.ex new file mode 100644 index 000000000..f4e3307fe --- /dev/null +++ b/lib/pleroma/elasticsearch/document_mappings/note.ex @@ -0,0 +1,13 @@ +defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do + alias Pleroma.Object + + def id(obj), do: obj.id + def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do + %{ + user: activity.user_actor.nickname, + content: activity.object.data["content"], + instance: URI.parse(activity.user_actor.ap_id).host, + hashtags: Object.hashtags(activity.object) + } + end +end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex new file mode 100644 index 000000000..2ff4bf889 --- /dev/null +++ b/lib/pleroma/elasticsearch/store.ex @@ -0,0 +1,36 @@ +defmodule Pleroma.Elasticsearch do + alias Pleroma.Activity + alias Pleroma.Elasticsearch.DocumentMappings + + defp url do + Pleroma.Config.get([:elasticsearch, :url]) + end + + def put(%Activity{} = activity) do + Elastix.Document.index( + url(), + "activities", + "activity", + DocumentMappings.Activity.id(activity), + DocumentMappings.Activity.encode(activity) + ) + end + + def bulk_post(data, :activities) do + d = data + |> Enum.map(fn d -> + [ + %{index: %{_id: DocumentMappings.Activity.id(d)}}, + DocumentMappings.Activity.encode(d) + ] + end) + |> List.flatten() + + IO.inspect Elastix.Bulk.post( + url(), + d, + index: "activities", + type: "activity" + ) + end +end diff --git a/mix.exs b/mix.exs index e69c737dd..f49353f7f 100644 --- a/mix.exs +++ b/mix.exs @@ -91,7 +91,7 @@ defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] defp warnings_as_errors(:prod), do: false - defp warnings_as_errors(_), do: true + defp warnings_as_errors(_), do: false # Specifies OAuth dependencies. defp oauth_deps do @@ -197,6 +197,7 @@ defp deps do ref: "289cda1b6d0d70ccb2ba508a2b0bd24638db2880"}, {:eblurhash, "~> 1.1.0"}, {:open_api_spex, "~> 3.10"}, + {:elastix, ">= 0.0.0"}, # indirect dependency version override {:plug, "~> 1.10.4", override: true}, diff --git a/mix.lock b/mix.lock index 18d5e3bea..bec9d025e 100644 --- a/mix.lock +++ b/mix.lock @@ -34,6 +34,8 @@ "ecto_enum": {:hex, :ecto_enum, "1.4.0", "d14b00e04b974afc69c251632d1e49594d899067ee2b376277efd8233027aec8", [:mix], [{:ecto, ">= 3.0.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "> 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:mariaex, ">= 0.0.0", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "8fb55c087181c2b15eee406519dc22578fa60dd82c088be376d0010172764ee4"}, "ecto_sql": {:hex, :ecto_sql, "3.6.2", "9526b5f691701a5181427634c30655ac33d11e17e4069eff3ae1176c764e0ba3", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.6.2", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.4.0 or ~> 0.5.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "5ec9d7e6f742ea39b63aceaea9ac1d1773d574ea40df5a53ef8afbd9242fdb6b"}, "eimp": {:hex, :eimp, "1.0.14", "fc297f0c7e2700457a95a60c7010a5f1dcb768a083b6d53f49cd94ab95a28f22", [:rebar3], [{:p1_utils, "1.0.18", [hex: :p1_utils, repo: "hexpm", optional: false]}], "hexpm", "501133f3112079b92d9e22da8b88bf4f0e13d4d67ae9c15c42c30bd25ceb83b6"}, + "elasticsearch": {:hex, :elasticsearch, "1.0.1", "8339538d90af6b280f10ecd02b1eae372f09373e629b336a13461babf7366495", [:mix], [{:httpoison, ">= 0.0.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:poison, ">= 0.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:sigaws, "~> 0.7", [hex: :sigaws, repo: "hexpm", optional: true]}, {:vex, "~> 0.6", [hex: :vex, repo: "hexpm", optional: false]}], "hexpm", "83e7d8b8bee3e7e19a06ab4d357d24845ac1da894e79678227fd52c0b7f71867"}, + "elastix": {:hex, :elastix, "0.10.0", "7567da885677ba9deffc20063db5f3ca8cd10f23cff1ab3ed9c52b7063b7e340", [:mix], [{:httpoison, "~> 1.4", [hex: :httpoison, repo: "hexpm", optional: false]}, {:poison, "~> 3.0 or ~> 4.0", [hex: :poison, repo: "hexpm", optional: true]}, {:retry, "~> 0.8", [hex: :retry, repo: "hexpm", optional: false]}], "hexpm", "5fb342ce068b20f7845f5dd198c2dc80d967deafaa940a6e51b846db82696d1d"}, "elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"}, "esshd": {:hex, :esshd, "0.1.1", "d4dd4c46698093a40a56afecce8a46e246eb35463c457c246dacba2e056f31b5", [:mix], [], "hexpm", "d73e341e3009d390aa36387dc8862860bf9f874c94d9fd92ade2926376f49981"}, "eternal": {:hex, :eternal, "1.2.2", "d1641c86368de99375b98d183042dd6c2b234262b8d08dfd72b9eeaafc2a1abd", [:mix], [], "hexpm", "2c9fe32b9c3726703ba5e1d43a1d255a4f3f2d8f8f9bc19f094c7cb1a7a9e782"}, @@ -112,6 +114,7 @@ "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, "recon": {:hex, :recon, "2.5.1", "430ffa60685ac1efdfb1fe4c97b8767c92d0d92e6e7c3e8621559ba77598678a", [:mix, :rebar3], [], "hexpm", "5721c6b6d50122d8f68cccac712caa1231f97894bab779eff5ff0f886cb44648"}, "remote_ip": {:git, "https://git.pleroma.social/pleroma/remote_ip.git", "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8", [ref: "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8"]}, + "retry": {:hex, :retry, "0.15.0", "ba6aaeba92905a396c18c299a07e638947b2ba781e914f803202bc1b9ae867c3", [:mix], [], "hexpm", "93d3310bce78c0a30cc94610684340a14adfc9136856a3f662e4d9ce6013c784"}, "sleeplocks": {:hex, :sleeplocks, "1.1.1", "3d462a0639a6ef36cc75d6038b7393ae537ab394641beb59830a1b8271faeed3", [:rebar3], [], "hexpm", "84ee37aeff4d0d92b290fff986d6a95ac5eedf9b383fadfd1d88e9b84a1c02e1"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, "sweet_xml": {:hex, :sweet_xml, "0.6.6", "fc3e91ec5dd7c787b6195757fbcf0abc670cee1e4172687b45183032221b66b8", [:mix], [], "hexpm", "2e1ec458f892ffa81f9f8386e3f35a1af6db7a7a37748a64478f13163a1f3573"}, @@ -125,6 +128,7 @@ "ueberauth": {:hex, :ueberauth, "0.6.3", "d42ace28b870e8072cf30e32e385579c57b9cc96ec74fa1f30f30da9c14f3cc0", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "afc293d8a1140d6591b53e3eaf415ca92842cb1d32fad3c450c6f045f7f91b60"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, "unsafe": {:hex, :unsafe, "1.0.1", "a27e1874f72ee49312e0a9ec2e0b27924214a05e3ddac90e91727bc76f8613d8", [:mix], [], "hexpm", "6c7729a2d214806450d29766abc2afaa7a2cbecf415be64f36a6691afebb50e5"}, + "vex": {:hex, :vex, "0.9.0", "613ea5eb3055662e7178b83e25b2df0975f68c3d8bb67c1645f0573e1a78d606", [:mix], [], "hexpm", "c69fff44d5c8aa3f1faee71bba1dcab05dd36364c5a629df8bb11751240c857f"}, "web_push_encryption": {:git, "https://github.com/lanodan/elixir-web-push-encryption.git", "026a043037a89db4da8f07560bc8f9c68bcf0cc0", [branch: "bugfix/otp-24"]}, "websocket_client": {:git, "https://github.com/jeremyong/websocket_client.git", "9a6f65d05ebf2725d62fb19262b21f1805a59fbf", []}, } From 3478492945d7069545593cc0e6e6b9e536ddb489 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sat, 11 Dec 2021 18:48:46 +0000 Subject: [PATCH 02/27] integrate search endpoint with ES --- config/config.exs | 3 ++ .../elasticsearch/document_mappings/note.ex | 1 + lib/pleroma/elasticsearch/store.ex | 11 +++++- lib/pleroma/web/common_api.ex | 19 +++++++++- .../controllers/search_controller.ex | 37 +++++++++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) diff --git a/config/config.exs b/config/config.exs index 681b49827..dea05d276 100644 --- a/config/config.exs +++ b/config/config.exs @@ -851,6 +851,9 @@ {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} ] +config :pleroma, :search, + provider: :builtin + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/note.ex index f4e3307fe..60efde599 100644 --- a/lib/pleroma/elasticsearch/document_mappings/note.ex +++ b/lib/pleroma/elasticsearch/document_mappings/note.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do def id(obj), do: obj.id def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do %{ + _timestamp: activity.inserted_at, user: activity.user_actor.nickname, content: activity.object.data["content"], instance: URI.parse(activity.user_actor.ap_id).host, diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 2ff4bf889..d9e9ed1a7 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -26,11 +26,20 @@ def bulk_post(data, :activities) do end) |> List.flatten() - IO.inspect Elastix.Bulk.post( + Elastix.Bulk.post( url(), d, index: "activities", type: "activity" ) end + + def search(query) do + Elastix.Search.search( + url(), + "activities", + ["activity"], + %{query: %{term: %{content: query}}} + ) + end end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 6f685cb7b..95ac7b71a 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -16,6 +16,8 @@ defmodule Pleroma.Web.CommonAPI do alias Pleroma.Web.ActivityPub.Utils alias Pleroma.Web.ActivityPub.Visibility alias Pleroma.Web.CommonAPI.ActivityDraft + alias Pleroma.Elasticsearch + alias Pleroma.Config import Pleroma.Web.Gettext import Pleroma.Web.CommonAPI.Utils @@ -395,9 +397,24 @@ def listen(user, data) do end end + def maybe_put_into_elasticsearch({:ok, activity}) do + if Config.get([:search, :provider]) == :elasticsearch do + actor = Pleroma.Activity.user_actor(activity) + activity + |> Map.put(:user_actor, actor) + |> Elasticsearch.put() + end + end + + def maybe_put_into_elasticsearch(_) do + {:ok, :skipped} + end + def post(user, %{status: _} = data) do with {:ok, draft} <- ActivityDraft.create(user, data) do - ActivityPub.create(draft.changes, draft.preview?) + activity = ActivityPub.create(draft.changes, draft.preview?) + maybe_put_into_elasticsearch(activity) + activity end end diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 64b177eb3..484a959af 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -45,6 +45,43 @@ def search(conn, params), do: do_search(:v1, conn, params) defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do query = String.trim(query) + options = search_options(params, user) + if Pleroma.Config.get([:search, :provider]) == :elasticsearch do + elasticsearch_search(conn, query, options) + else + builtin_search(version, conn, params) + end + end + + defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do + with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do + results = raw_results + |> Map.get(:body) + |> Map.get("hits") + |> Map.get("hits") + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.Activity.all_by_ids_with_object() + + json( + conn, + %{ + accounts: [], + hashtags: [], + statuses: StatusView.render("index.json", + activities: results, + for: user, + as: :activity + )} + ) + else + {:error, _} -> + conn + |> put_status(:internal_server_error) + |> json(%{error: "Search failed"}) + end + end + + defp builtin_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do options = search_options(params, user) timeout = Keyword.get(Repo.config(), :timeout, 15_000) default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []} From 345eb7b3f8bd241132fc414e20e4f1649d359df7 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sat, 11 Dec 2021 19:56:15 +0000 Subject: [PATCH 03/27] add extra filters --- lib/pleroma/elasticsearch/store.ex | 76 ++++++++++++++++++- .../controllers/search_controller.ex | 6 +- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index d9e9ed1a7..55c459801 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -2,6 +2,10 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity alias Pleroma.Elasticsearch.DocumentMappings + @searchable [ + "hashtag", "instance", "user" + ] + defp url do Pleroma.Config.get([:elasticsearch, :url]) end @@ -34,12 +38,82 @@ def bulk_post(data, :activities) do ) end + defp parse_term(t) do + if String.contains?(t, ":") and !String.starts_with?(t, "\"") do + [field, query] = String.split(t, ":") + if Enum.member?(@searchable, field) do + {field, query} + else + {"content", query} + end + else + {"content", t} + end + end + + defp search_user(params, q) do + if q["user"] != nil do + params ++ [%{match: %{user: %{ + query: Enum.join(q["user"], " "), + operator: "OR" + }}}] + else + params + end + end + + defp search_instance(params, q) do + if q["instance"] != nil do + params ++ [%{match: %{instance: %{ + query: Enum.join(q["instance"], " "), + operator: "OR" + }}}] + else + params + end + end + + defp search_content(params, q) do + if q["content"] != nil do + params ++ [%{match: %{content: %{ + query: Enum.join(q["content"], " "), + operator: "AND" + }}}] + else + params + end + end + + defp to_es(q) do + [] + |> search_content(q) + |> search_instance(q) + |> search_user(q) + end + + defp parse(query) do + String.split(query, " ") + |> Enum.map(&parse_term/1) + |> Enum.reduce(%{}, fn {field, query}, acc -> + Map.put(acc, field, + Map.get(acc, field, []) ++ [query] + ) + end) + |> to_es() + end + def search(query) do + q = %{query: %{ + bool: %{ + must: parse(query) + } + }} + IO.inspect(q) Elastix.Search.search( url(), "activities", ["activity"], - %{query: %{term: %{content: query}}} + q ) end end diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 484a959af..920ff5980 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -56,9 +56,9 @@ defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do results = raw_results - |> Map.get(:body) - |> Map.get("hits") - |> Map.get("hits") + |> Map.get(:body, %{}) + |> Map.get("hits", %{}) + |> Map.get("hits", []) |> Enum.map(fn result -> result["_id"] end) |> Pleroma.Activity.all_by_ids_with_object() From ed3a866f94e8912ee7974c203a5333fb7e6c04ee Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 17:23:44 +0000 Subject: [PATCH 04/27] make search provider configurable --- config/config.exs | 3 +- lib/mix/tasks/pleroma/search.ex | 28 +-- .../elasticsearch/document_mappings/note.ex | 13 +- lib/pleroma/elasticsearch/store.ex | 115 +++--------- lib/pleroma/search.ex | 12 ++ lib/pleroma/search/builtin.ex | 137 ++++++++++++++ lib/pleroma/search/elasticsearch.ex | 80 ++++++++ lib/pleroma/web/common_api.ex | 3 +- .../controllers/search_controller.ex | 172 +----------------- mix.exs | 6 +- mix.lock | 3 +- test/pleroma/web/rich_media/parser_test.exs | 4 +- 12 files changed, 289 insertions(+), 287 deletions(-) create mode 100644 lib/pleroma/search.ex create mode 100644 lib/pleroma/search/builtin.ex create mode 100644 lib/pleroma/search/elasticsearch.ex diff --git a/config/config.exs b/config/config.exs index dea05d276..e8acb5246 100644 --- a/config/config.exs +++ b/config/config.exs @@ -851,8 +851,7 @@ {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} ] -config :pleroma, :search, - provider: :builtin +config :pleroma, :search, provider: Pleroma.Search.Builtin # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 4acd0e34a..9e0f376c3 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -6,32 +6,33 @@ defmodule Mix.Tasks.Pleroma.Search do use Mix.Task import Mix.Pleroma import Ecto.Query - alias Pleroma.Elasticsearch alias Pleroma.Activity - alias Pleroma.Repo alias Pleroma.Pagination @shortdoc "Manages elasticsearch" - def run(["import" | rest]) do + def run(["import" | _rest]) do start_pleroma() - query = from(a in Activity, where: not ilike(a.actor, "%/relay")) - |> Activity.with_preloaded_object - |> Activity.with_preloaded_user_actor + from(a in Activity, where: not ilike(a.actor, "%/relay")) + |> Activity.with_preloaded_object() + |> Activity.with_preloaded_user_actor() |> get_all end defp get_all(query, max_id \\ nil) do params = %{limit: 20} - params = if max_id == nil do - params - else - Map.put(params, :max_id, max_id) - end - res = query - |> Pagination.fetch_paginated(params) + params = + if max_id == nil do + params + else + Map.put(params, :max_id, max_id) + end + + res = + query + |> Pagination.fetch_paginated(params) if res == [] do :ok @@ -42,5 +43,4 @@ defp get_all(query, max_id \\ nil) do get_all(query, List.last(res).id) end end - end diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/note.ex index 60efde599..35b70dd4a 100644 --- a/lib/pleroma/elasticsearch/document_mappings/note.ex +++ b/lib/pleroma/elasticsearch/document_mappings/note.ex @@ -2,13 +2,14 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do alias Pleroma.Object def id(obj), do: obj.id - def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do + + def encode(%{object: %{data: %{"type" => "Note"}}} = activity) do %{ - _timestamp: activity.inserted_at, - user: activity.user_actor.nickname, - content: activity.object.data["content"], - instance: URI.parse(activity.user_actor.ap_id).host, - hashtags: Object.hashtags(activity.object) + _timestamp: activity.inserted_at, + user: activity.user_actor.nickname, + content: activity.object.data["content"], + instance: URI.parse(activity.user_actor.ap_id).host, + hashtags: Object.hashtags(activity.object) } end end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 55c459801..31f77fadf 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -2,118 +2,45 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity alias Pleroma.Elasticsearch.DocumentMappings - @searchable [ - "hashtag", "instance", "user" - ] - defp url do Pleroma.Config.get([:elasticsearch, :url]) end def put(%Activity{} = activity) do Elastix.Document.index( - url(), - "activities", - "activity", - DocumentMappings.Activity.id(activity), - DocumentMappings.Activity.encode(activity) + url(), + "activities", + "activity", + DocumentMappings.Activity.id(activity), + DocumentMappings.Activity.encode(activity) ) end def bulk_post(data, :activities) do - d = data - |> Enum.map(fn d -> + d = + data + |> Enum.map(fn d -> [ - %{index: %{_id: DocumentMappings.Activity.id(d)}}, - DocumentMappings.Activity.encode(d) + %{index: %{_id: DocumentMappings.Activity.id(d)}}, + DocumentMappings.Activity.encode(d) ] - end) - |> List.flatten() + end) + |> List.flatten() Elastix.Bulk.post( - url(), - d, - index: "activities", - type: "activity" + url(), + d, + index: "activities", + type: "activity" ) end - defp parse_term(t) do - if String.contains?(t, ":") and !String.starts_with?(t, "\"") do - [field, query] = String.split(t, ":") - if Enum.member?(@searchable, field) do - {field, query} - else - {"content", query} - end - else - {"content", t} - end - end - - defp search_user(params, q) do - if q["user"] != nil do - params ++ [%{match: %{user: %{ - query: Enum.join(q["user"], " "), - operator: "OR" - }}}] - else - params - end - end - - defp search_instance(params, q) do - if q["instance"] != nil do - params ++ [%{match: %{instance: %{ - query: Enum.join(q["instance"], " "), - operator: "OR" - }}}] - else - params - end - end - - defp search_content(params, q) do - if q["content"] != nil do - params ++ [%{match: %{content: %{ - query: Enum.join(q["content"], " "), - operator: "AND" - }}}] - else - params - end - end - - defp to_es(q) do - [] - |> search_content(q) - |> search_instance(q) - |> search_user(q) - end - - defp parse(query) do - String.split(query, " ") - |> Enum.map(&parse_term/1) - |> Enum.reduce(%{}, fn {field, query}, acc -> - Map.put(acc, field, - Map.get(acc, field, []) ++ [query] - ) - end) - |> to_es() - end - - def search(query) do - q = %{query: %{ - bool: %{ - must: parse(query) - } - }} - IO.inspect(q) + def search_activities(q) do Elastix.Search.search( - url(), - "activities", - ["activity"], - q + url(), + "activities", + ["activity"], + q ) end end diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 000000000..99bce632c --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,12 @@ +defmodule Pleroma.Search do + @type search_map :: %{ + statuses: [map], + accounts: [map], + hashtags: [map] + } + + @doc """ + Searches for stuff + """ + @callback search(map, map, keyword) :: search_map +end diff --git a/lib/pleroma/search/builtin.ex b/lib/pleroma/search/builtin.ex new file mode 100644 index 000000000..019713f52 --- /dev/null +++ b/lib/pleroma/search/builtin.ex @@ -0,0 +1,137 @@ +defmodule Pleroma.Search.Builtin do + @behaviour Pleroma.Search + + alias Pleroma.Repo + alias Pleroma.User + alias Pleroma.Activity + alias Pleroma.Web.MastodonAPI.AccountView + alias Pleroma.Web.MastodonAPI.StatusView + alias Pleroma.Web.Endpoint + + require Logger + + @impl Pleroma.Search + def search(_conn, %{q: query} = params, options) do + version = Keyword.get(options, :version) + timeout = Keyword.get(Repo.config(), :timeout, 15_000) + default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []} + + default_values + |> Enum.map(fn {resource, default_value} -> + if params[:type] in [nil, resource] do + {resource, fn -> resource_search(version, resource, query, options) end} + else + {resource, fn -> default_value end} + end + end) + |> Task.async_stream(fn {resource, f} -> {resource, with_fallback(f)} end, + timeout: timeout, + on_timeout: :kill_task + ) + |> Enum.reduce(default_values, fn + {:ok, {resource, result}}, acc -> + Map.put(acc, resource, result) + + _error, acc -> + acc + end) + end + + defp resource_search(_, "accounts", query, options) do + accounts = with_fallback(fn -> User.search(query, options) end) + + AccountView.render("index.json", + users: accounts, + for: options[:for_user], + embed_relationships: options[:embed_relationships] + ) + end + + defp resource_search(_, "statuses", query, options) do + statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) + + StatusView.render("index.json", + activities: statuses, + for: options[:for_user], + as: :activity + ) + end + + defp resource_search(:v2, "hashtags", query, options) do + tags_path = Endpoint.url() <> "/tag/" + + query + |> prepare_tags(options) + |> Enum.map(fn tag -> + %{name: tag, url: tags_path <> tag} + end) + end + + defp resource_search(:v1, "hashtags", query, options) do + prepare_tags(query, options) + end + + defp prepare_tags(query, options) do + tags = + query + |> preprocess_uri_query() + |> String.split(~r/[^#\w]+/u, trim: true) + |> Enum.uniq_by(&String.downcase/1) + + explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) + + tags = + if Enum.any?(explicit_tags) do + explicit_tags + else + tags + end + + tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end) + + tags = + if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do + add_joined_tag(tags) + else + tags + end + + Pleroma.Pagination.paginate(tags, options) + end + + # If `query` is a URI, returns last component of its path, otherwise returns `query` + defp preprocess_uri_query(query) do + if query =~ ~r/https?:\/\// do + query + |> String.trim_trailing("/") + |> URI.parse() + |> Map.get(:path) + |> String.split("/") + |> Enum.at(-1) + else + query + end + end + + defp add_joined_tag(tags) do + tags + |> Kernel.++([joined_tag(tags)]) + |> Enum.uniq_by(&String.downcase/1) + end + + defp joined_tag(tags) do + tags + |> Enum.map(fn tag -> String.capitalize(tag) end) + |> Enum.join() + end + + defp with_fallback(f, fallback \\ []) do + try do + f.() + rescue + error -> + Logger.error("#{__MODULE__} search error: #{inspect(error)}") + fallback + end + end +end diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex new file mode 100644 index 000000000..f16ae58ce --- /dev/null +++ b/lib/pleroma/search/elasticsearch.ex @@ -0,0 +1,80 @@ +defmodule Pleroma.Search.Elasticsearch do + @behaviour Pleroma.Search + + alias Pleroma.Web.MastodonAPI.StatusView + + defp to_es(term) when is_binary(term) do + %{ + match: %{ + content: %{ + query: term, + operator: "AND" + } + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["hashtag", query]}) do + %{ + term: %{ + hashtags: %{ + value: query + } + } + } + end + + defp to_es({:filter, [field, query]}) do + %{ + term: %{ + field => %{ + value: query + } + } + } + end + + defp parse(query) do + query + |> SearchParser.parse!() + |> Enum.map(&to_es/1) + end + + @impl Pleroma.Search + def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do + q = %{ + query: %{ + bool: %{ + must: parse(query) + } + } + } + + IO.inspect(q) + + out = Pleroma.Elasticsearch.search_activities(q) + + with {:ok, raw_results} <- out do + results = + raw_results + |> Map.get(:body, %{}) + |> Map.get("hits", %{}) + |> Map.get("hits", []) + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.Activity.all_by_ids_with_object() + + %{ + "accounts" => [], + "hashtags" => [], + "statuses" => + StatusView.render("index.json", + activities: results, + for: user, + as: :activity + ) + } + end + end +end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 95ac7b71a..0c93b1976 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -398,8 +398,9 @@ def listen(user, data) do end def maybe_put_into_elasticsearch({:ok, activity}) do - if Config.get([:search, :provider]) == :elasticsearch do + if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do actor = Pleroma.Activity.user_actor(activity) + activity |> Map.put(:user_actor, actor) |> Elasticsearch.put() diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 920ff5980..c8f820f00 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,13 +5,9 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do use Pleroma.Web, :controller - alias Pleroma.Activity - alias Pleroma.Repo alias Pleroma.User alias Pleroma.Web.ControllerHelper - alias Pleroma.Web.Endpoint alias Pleroma.Web.MastodonAPI.AccountView - alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.Plugs.OAuthScopesPlug alias Pleroma.Web.Plugs.RateLimiter @@ -43,71 +39,13 @@ def account_search(%{assigns: %{user: user}} = conn, %{q: query} = params) do def search2(conn, params), do: do_search(:v2, conn, params) def search(conn, params), do: do_search(:v1, conn, params) - defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do - query = String.trim(query) - options = search_options(params, user) - if Pleroma.Config.get([:search, :provider]) == :elasticsearch do - elasticsearch_search(conn, query, options) - else - builtin_search(version, conn, params) - end - end + defp do_search(version, %{assigns: %{user: user}} = conn, params) do + options = + search_options(params, user) + |> Keyword.put(:version, version) - defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do - with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do - results = raw_results - |> Map.get(:body, %{}) - |> Map.get("hits", %{}) - |> Map.get("hits", []) - |> Enum.map(fn result -> result["_id"] end) - |> Pleroma.Activity.all_by_ids_with_object() - - json( - conn, - %{ - accounts: [], - hashtags: [], - statuses: StatusView.render("index.json", - activities: results, - for: user, - as: :activity - )} - ) - else - {:error, _} -> - conn - |> put_status(:internal_server_error) - |> json(%{error: "Search failed"}) - end - end - - defp builtin_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do - options = search_options(params, user) - timeout = Keyword.get(Repo.config(), :timeout, 15_000) - default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []} - - result = - default_values - |> Enum.map(fn {resource, default_value} -> - if params[:type] in [nil, resource] do - {resource, fn -> resource_search(version, resource, query, options) end} - else - {resource, fn -> default_value end} - end - end) - |> Task.async_stream(fn {resource, f} -> {resource, with_fallback(f)} end, - timeout: timeout, - on_timeout: :kill_task - ) - |> Enum.reduce(default_values, fn - {:ok, {resource, result}}, acc -> - Map.put(acc, resource, result) - - _error, acc -> - acc - end) - - json(conn, result) + search_provider = Pleroma.Config.get([:search, :provider]) + json(conn, search_provider.search(conn, params, options)) end defp search_options(params, user) do @@ -124,104 +62,6 @@ defp search_options(params, user) do |> Enum.filter(&elem(&1, 1)) end - defp resource_search(_, "accounts", query, options) do - accounts = with_fallback(fn -> User.search(query, options) end) - - AccountView.render("index.json", - users: accounts, - for: options[:for_user], - embed_relationships: options[:embed_relationships] - ) - end - - defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) - - StatusView.render("index.json", - activities: statuses, - for: options[:for_user], - as: :activity - ) - end - - defp resource_search(:v2, "hashtags", query, options) do - tags_path = Endpoint.url() <> "/tag/" - - query - |> prepare_tags(options) - |> Enum.map(fn tag -> - %{name: tag, url: tags_path <> tag} - end) - end - - defp resource_search(:v1, "hashtags", query, options) do - prepare_tags(query, options) - end - - defp prepare_tags(query, options) do - tags = - query - |> preprocess_uri_query() - |> String.split(~r/[^#\w]+/u, trim: true) - |> Enum.uniq_by(&String.downcase/1) - - explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) - - tags = - if Enum.any?(explicit_tags) do - explicit_tags - else - tags - end - - tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end) - - tags = - if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do - add_joined_tag(tags) - else - tags - end - - Pleroma.Pagination.paginate(tags, options) - end - - defp add_joined_tag(tags) do - tags - |> Kernel.++([joined_tag(tags)]) - |> Enum.uniq_by(&String.downcase/1) - end - - # If `query` is a URI, returns last component of its path, otherwise returns `query` - defp preprocess_uri_query(query) do - if query =~ ~r/https?:\/\// do - query - |> String.trim_trailing("/") - |> URI.parse() - |> Map.get(:path) - |> String.split("/") - |> Enum.at(-1) - else - query - end - end - - defp joined_tag(tags) do - tags - |> Enum.map(fn tag -> String.capitalize(tag) end) - |> Enum.join() - end - - defp with_fallback(f, fallback \\ []) do - try do - f.() - rescue - error -> - Logger.error("#{__MODULE__} search error: #{inspect(error)}") - fallback - end - end - defp get_author(%{account_id: account_id}) when is_binary(account_id), do: User.get_cached_by_id(account_id) diff --git a/mix.exs b/mix.exs index f49353f7f..195fd3a9d 100644 --- a/mix.exs +++ b/mix.exs @@ -91,7 +91,7 @@ defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] defp warnings_as_errors(:prod), do: false - defp warnings_as_errors(_), do: false + defp warnings_as_errors(_), do: true # Specifies OAuth dependencies. defp oauth_deps do @@ -198,6 +198,10 @@ defp deps do {:eblurhash, "~> 1.1.0"}, {:open_api_spex, "~> 3.10"}, {:elastix, ">= 0.0.0"}, + {:search_parser, + git: "https://github.com/FloatingGhost/pleroma-contrib-search-parser.git", + ref: "08971a81e68686f9ac465cfb6661d51c5e4e1e7f"}, + {:nimble_parsec, "~> 1.0", override: true}, # indirect dependency version override {:plug, "~> 1.10.4", override: true}, diff --git a/mix.lock b/mix.lock index bec9d025e..20e95c19f 100644 --- a/mix.lock +++ b/mix.lock @@ -83,7 +83,7 @@ "mogrify": {:hex, :mogrify, "0.9.1", "a26f107c4987477769f272bd0f7e3ac4b7b75b11ba597fd001b877beffa9c068", [:mix], [], "hexpm", "134edf189337d2125c0948bf0c228fdeef975c594317452d536224069a5b7f05"}, "mox": {:hex, :mox, "1.0.0", "4b3c7005173f47ff30641ba044eb0fe67287743eec9bd9545e37f3002b0a9f8b", [:mix], [], "hexpm", "201b0a20b7abdaaab083e9cf97884950f8a30a1350a1da403b3145e213c6f4df"}, "myhtmlex": {:git, "https://git.pleroma.social/pleroma/myhtmlex.git", "ad0097e2f61d4953bfef20fb6abddf23b87111e6", [ref: "ad0097e2f61d4953bfef20fb6abddf23b87111e6", submodules: true]}, - "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm", "5c040b8469c1ff1b10093d3186e2e10dbe483cd73d79ec017993fb3985b8a9b3"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.2.0", "b44d75e2a6542dcb6acf5d71c32c74ca88960421b6874777f79153bbbbd7dccc", [:mix], [], "hexpm", "52b2871a7515a5ac49b00f214e4165a40724cf99798d8e4a65e4fd64ebd002c1"}, "nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"}, "nodex": {:git, "https://git.pleroma.social/pleroma/nodex", "cb6730f943cfc6aad674c92161be23a8411f15d1", [ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"]}, "oban": {:hex, :oban, "2.3.4", "ec7509b9af2524d55f529cb7aee93d36131ae0bf0f37706f65d2fe707f4d9fd8", [:mix], [{:ecto_sql, ">= 3.4.3", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.14", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c70ca0434758fd1805422ea4446af5e910ddc697c0c861549c8f0eb0cfbd2fdf"}, @@ -115,6 +115,7 @@ "recon": {:hex, :recon, "2.5.1", "430ffa60685ac1efdfb1fe4c97b8767c92d0d92e6e7c3e8621559ba77598678a", [:mix, :rebar3], [], "hexpm", "5721c6b6d50122d8f68cccac712caa1231f97894bab779eff5ff0f886cb44648"}, "remote_ip": {:git, "https://git.pleroma.social/pleroma/remote_ip.git", "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8", [ref: "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8"]}, "retry": {:hex, :retry, "0.15.0", "ba6aaeba92905a396c18c299a07e638947b2ba781e914f803202bc1b9ae867c3", [:mix], [], "hexpm", "93d3310bce78c0a30cc94610684340a14adfc9136856a3f662e4d9ce6013c784"}, + "search_parser": {:git, "https://github.com/FloatingGhost/pleroma-contrib-search-parser.git", "08971a81e68686f9ac465cfb6661d51c5e4e1e7f", [ref: "08971a81e68686f9ac465cfb6661d51c5e4e1e7f"]}, "sleeplocks": {:hex, :sleeplocks, "1.1.1", "3d462a0639a6ef36cc75d6038b7393ae537ab394641beb59830a1b8271faeed3", [:rebar3], [], "hexpm", "84ee37aeff4d0d92b290fff986d6a95ac5eedf9b383fadfd1d88e9b84a1c02e1"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, "sweet_xml": {:hex, :sweet_xml, "0.6.6", "fc3e91ec5dd7c787b6195757fbcf0abc670cee1e4172687b45183032221b66b8", [:mix], [], "hexpm", "2e1ec458f892ffa81f9f8386e3f35a1af6db7a7a37748a64478f13163a1f3573"}, diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index 2f363b012..2fe7f1b0b 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -133,13 +133,13 @@ test "parses OEmbed" do assert Parser.parse("http://example.com/oembed") == {:ok, %{ - "author_name" => "‮‭‬bees‬", + "author_name" => "\u202E\u202D\u202Cbees\u202C", "author_url" => "https://www.flickr.com/photos/bees/", "cache_age" => 3600, "flickr_type" => "photo", "height" => "768", "html" => - "\"Bacon", + "\"Bacon", "license" => "All Rights Reserved", "license_id" => 0, "provider_name" => "Flickr", From 47833e31adf4484e18e5ad66dae54835f9891008 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 17:26:06 +0000 Subject: [PATCH 05/27] Remove IO inspect --- lib/pleroma/search/elasticsearch.ex | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index f16ae58ce..af2e13e48 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -52,8 +52,6 @@ def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) d } } - IO.inspect(q) - out = Pleroma.Elasticsearch.search_activities(q) with {:ok, raw_results} <- out do From e835212a7d012d8ebf383627fe3bfc8dec5f86f5 Mon Sep 17 00:00:00 2001 From: sadposter Date: Sun, 12 Dec 2021 18:25:20 +0000 Subject: [PATCH 06/27] enforce visibility --- lib/mix/tasks/pleroma/search.ex | 10 +++++++++- lib/pleroma/search/elasticsearch.ex | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 9e0f376c3..4e8e480d4 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -15,13 +15,15 @@ def run(["import" | _rest]) do start_pleroma() from(a in Activity, where: not ilike(a.actor, "%/relay")) + |> where([a], fragment("(? ->> 'type'::text) = 'Create'", a.data)) |> Activity.with_preloaded_object() |> Activity.with_preloaded_user_actor() |> get_all end defp get_all(query, max_id \\ nil) do - params = %{limit: 20} + IO.puts(max_id) + params = %{limit: 2000} params = if max_id == nil do @@ -38,6 +40,12 @@ defp get_all(query, max_id \\ nil) do :ok else res + |> Enum.filter(fn x -> + t = x.object + |> Map.get(:data, %{}) + |> Map.get("type", "") + t == "Note" + end) |> Pleroma.Elasticsearch.bulk_post(:activities) get_all(query, List.last(res).id) diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index af2e13e48..4c0127f0f 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -2,6 +2,7 @@ defmodule Pleroma.Search.Elasticsearch do @behaviour Pleroma.Search alias Pleroma.Web.MastodonAPI.StatusView + alias Pleroma.Web.ActivityPub.Visibility defp to_es(term) when is_binary(term) do %{ @@ -62,6 +63,7 @@ def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) d |> Map.get("hits", []) |> Enum.map(fn result -> result["_id"] end) |> Pleroma.Activity.all_by_ids_with_object() + |> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end) %{ "accounts" => [], From ef9dea66b2b980aabc76b314b36c6b0f38cc27e8 Mon Sep 17 00:00:00 2001 From: sadposter Date: Sun, 12 Dec 2021 19:31:47 +0000 Subject: [PATCH 07/27] fix bug in inbound --- lib/pleroma/elasticsearch/store.ex | 27 +++++++++++++++++++- lib/pleroma/search/elasticsearch.ex | 2 +- lib/pleroma/web/activity_pub/pipeline.ex | 2 +- lib/pleroma/web/activity_pub/side_effects.ex | 4 ++- lib/pleroma/web/common_api.ex | 20 +++------------ 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 31f77fadf..17ba71990 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -1,9 +1,34 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity alias Pleroma.Elasticsearch.DocumentMappings + alias Pleroma.Config defp url do - Pleroma.Config.get([:elasticsearch, :url]) + Config.get([:elasticsearch, :url]) + end + + def put_by_id(id) do + id + |> Activity.get_by_id_with_object() + |> maybe_put_into_elasticsearch() + end + + def maybe_put_into_elasticsearch({:ok, activity}) do + maybe_put_into_elasticsearch(activity) + end + + def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: %{type: "Note"}}} = activity) do + if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do + actor = Pleroma.Activity.user_actor(activity) + + activity + |> Map.put(:user_actor, actor) + |> put() + end + end + + def maybe_put_into_elasticsearch(_) do + {:ok, :skipped} end def put(%Activity{} = activity) do diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index 4c0127f0f..181009ad6 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -48,7 +48,7 @@ def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) d q = %{ query: %{ bool: %{ - must: parse(query) + must: parse(String.trim(query)) } } } diff --git a/lib/pleroma/web/activity_pub/pipeline.ex b/lib/pleroma/web/activity_pub/pipeline.ex index 0d6e8aad2..6831e963f 100644 --- a/lib/pleroma/web/activity_pub/pipeline.ex +++ b/lib/pleroma/web/activity_pub/pipeline.ex @@ -27,7 +27,7 @@ defp config, do: Config.get([:pipeline, :config], Config) def common_pipeline(object, meta) do case Repo.transaction(fn -> do_common_pipeline(object, meta) end, Utils.query_timeout()) do {:ok, {:ok, activity, meta}} -> - side_effects().handle_after_transaction(meta) + side_effects().handle_after_transaction(activity, meta) {:ok, activity, meta} {:ok, value} -> diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 701181a14..a4169d41f 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -537,7 +537,9 @@ defp add_notifications(meta, notifications) do end @impl true - def handle_after_transaction(meta) do + def handle_after_transaction(activity, meta) do + Pleroma.Elasticsearch.put_by_id(activity.id) + meta |> send_notifications() |> send_streamables() diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 0c93b1976..f6a131c21 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -16,8 +16,6 @@ defmodule Pleroma.Web.CommonAPI do alias Pleroma.Web.ActivityPub.Utils alias Pleroma.Web.ActivityPub.Visibility alias Pleroma.Web.CommonAPI.ActivityDraft - alias Pleroma.Elasticsearch - alias Pleroma.Config import Pleroma.Web.Gettext import Pleroma.Web.CommonAPI.Utils @@ -397,24 +395,12 @@ def listen(user, data) do end end - def maybe_put_into_elasticsearch({:ok, activity}) do - if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do - actor = Pleroma.Activity.user_actor(activity) - - activity - |> Map.put(:user_actor, actor) - |> Elasticsearch.put() - end - end - - def maybe_put_into_elasticsearch(_) do - {:ok, :skipped} - end - def post(user, %{status: _} = data) do with {:ok, draft} <- ActivityDraft.create(user, data) do activity = ActivityPub.create(draft.changes, draft.preview?) - maybe_put_into_elasticsearch(activity) + unless draft.preview? do + Pleroma.Elasticsearch.maybe_put_into_elasticsearch(activity) + end activity end end From dbdf0c005ee155407410e353fb43a8aaa9da559f Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 19:39:07 +0000 Subject: [PATCH 08/27] pipeline it --- lib/pleroma/web/activity_pub/pipeline.ex | 3 ++- lib/pleroma/web/activity_pub/side_effects.ex | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/web/activity_pub/pipeline.ex b/lib/pleroma/web/activity_pub/pipeline.ex index 6831e963f..ed61d850c 100644 --- a/lib/pleroma/web/activity_pub/pipeline.ex +++ b/lib/pleroma/web/activity_pub/pipeline.ex @@ -27,7 +27,8 @@ defp config, do: Config.get([:pipeline, :config], Config) def common_pipeline(object, meta) do case Repo.transaction(fn -> do_common_pipeline(object, meta) end, Utils.query_timeout()) do {:ok, {:ok, activity, meta}} -> - side_effects().handle_after_transaction(activity, meta) + side_effects().handle_after_transaction(meta) + side_effects().handle_after_transaction(activity) {:ok, activity, meta} {:ok, value} -> diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index a4169d41f..913b664ca 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -537,11 +537,16 @@ defp add_notifications(meta, notifications) do end @impl true - def handle_after_transaction(activity, meta) do - Pleroma.Elasticsearch.put_by_id(activity.id) + @impl true + def handle_after_transaction(%{data: %{"type" => "Create"}} = activity) do + Elasticsearch.put_by_id(activity.id) + end + def handle_after_transaction(meta) do meta |> send_notifications() |> send_streamables() end + + def handle_after_transaction(_), do: :ok end From 71126fea0ce2394c6e9fa0cc4fb4a6a011559590 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 19:40:05 +0000 Subject: [PATCH 09/27] fix multi-after-transaction --- lib/pleroma/web/activity_pub/side_effects.ex | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 913b664ca..f87ee93cd 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -536,7 +536,6 @@ defp add_notifications(meta, notifications) do |> Keyword.put(:notifications, notifications ++ existing) end - @impl true @impl true def handle_after_transaction(%{data: %{"type" => "Create"}} = activity) do Elasticsearch.put_by_id(activity.id) @@ -547,6 +546,4 @@ def handle_after_transaction(meta) do |> send_notifications() |> send_streamables() end - - def handle_after_transaction(_), do: :ok end From a504e250a90acc10e4b5669239ba18d051d58ad0 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 19:40:31 +0000 Subject: [PATCH 10/27] fully reference es --- lib/pleroma/web/activity_pub/side_effects.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index f87ee93cd..19e833dec 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -538,7 +538,7 @@ defp add_notifications(meta, notifications) do @impl true def handle_after_transaction(%{data: %{"type" => "Create"}} = activity) do - Elasticsearch.put_by_id(activity.id) + Pleroma.Elasticsearch.put_by_id(activity.id) end def handle_after_transaction(meta) do From 06ab00d62511b958c5a01140c588e9bb83470587 Mon Sep 17 00:00:00 2001 From: sadposter Date: Sun, 12 Dec 2021 20:01:07 +0000 Subject: [PATCH 11/27] fix buggos --- lib/pleroma/elasticsearch/store.ex | 2 +- lib/pleroma/web/activity_pub/side_effects.ex | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 17ba71990..f309bf7a5 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -17,7 +17,7 @@ def maybe_put_into_elasticsearch({:ok, activity}) do maybe_put_into_elasticsearch(activity) end - def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: %{type: "Note"}}} = activity) do + def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity) do if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do actor = Pleroma.Activity.user_actor(activity) diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 19e833dec..91e9c72e0 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -537,10 +537,18 @@ defp add_notifications(meta, notifications) do end @impl true - def handle_after_transaction(%{data: %{"type" => "Create"}} = activity) do + def handle_after_transaction(%Pleroma.Activity{data: %{"type" => "Create"}} = activity) do Pleroma.Elasticsearch.put_by_id(activity.id) end + def handle_after_transaction(%Pleroma.Activity{}) do + :ok + end + + def handle_after_transaction(%Pleroma.Object{}) do + :ok + end + def handle_after_transaction(meta) do meta |> send_notifications() From 72e22a6dae8cc3e5921e95e281bf37500bdd2762 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Mon, 13 Dec 2021 20:15:17 +0000 Subject: [PATCH 12/27] add mappings --- priv/es-mappings/activity.json | 21 +++++++++++++++++++++ priv/es-mappings/hashtag.json | 7 +++++++ priv/es-mappings/user.json | 14 ++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 priv/es-mappings/activity.json create mode 100644 priv/es-mappings/hashtag.json create mode 100644 priv/es-mappings/user.json diff --git a/priv/es-mappings/activity.json b/priv/es-mappings/activity.json new file mode 100644 index 000000000..e476fd59f --- /dev/null +++ b/priv/es-mappings/activity.json @@ -0,0 +1,21 @@ +{ + "properties": { + "_timestamp": { + "type": "date", + "index": true + }, + "instance": { + "type": "keyword" + }, + "content": { + "type": "text" + }, + "hashtags": { + "type": "keyword" + }, + "user": { + "type": "text" + } + } +} + diff --git a/priv/es-mappings/hashtag.json b/priv/es-mappings/hashtag.json new file mode 100644 index 000000000..5330b8a2e --- /dev/null +++ b/priv/es-mappings/hashtag.json @@ -0,0 +1,7 @@ +{ + "properties": { + "hashtag": { + "type": "text" + } + } +} diff --git a/priv/es-mappings/user.json b/priv/es-mappings/user.json new file mode 100644 index 000000000..20beb75c2 --- /dev/null +++ b/priv/es-mappings/user.json @@ -0,0 +1,14 @@ +{ + "properties": { + "instance": { + "type": "keyword" + }, + "nickname": { + "type": "text" + }, + "bio": { + "type": "text" + } + } +} + From 6bb19454fddb6986de2169d90758d104eb069e2a Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Mon, 13 Dec 2021 20:27:35 +0000 Subject: [PATCH 13/27] add user import --- lib/mix/tasks/pleroma/search.ex | 25 ++++++++++--------- .../{note.ex => activity.ex} | 0 .../elasticsearch/document_mappings/user.ex | 12 +++++++++ lib/pleroma/elasticsearch/store.ex | 25 +++++++++++++++++++ priv/es-mappings/user.json | 4 +++ 5 files changed, 54 insertions(+), 12 deletions(-) rename lib/pleroma/elasticsearch/document_mappings/{note.ex => activity.ex} (100%) create mode 100644 lib/pleroma/elasticsearch/document_mappings/user.ex diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 4e8e480d4..64396b92f 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -8,21 +8,28 @@ defmodule Mix.Tasks.Pleroma.Search do import Ecto.Query alias Pleroma.Activity alias Pleroma.Pagination + alias Pleroma.User @shortdoc "Manages elasticsearch" - def run(["import" | _rest]) do + def run(["import", "activities" | _rest]) do start_pleroma() from(a in Activity, where: not ilike(a.actor, "%/relay")) |> where([a], fragment("(? ->> 'type'::text) = 'Create'", a.data)) |> Activity.with_preloaded_object() |> Activity.with_preloaded_user_actor() - |> get_all + |> get_all(:activities) end - defp get_all(query, max_id \\ nil) do - IO.puts(max_id) + def run(["import", "users" | _rest]) do + start_pleroma() + + from(u in User, where: not ilike(u.ap_id, "%/relay")) + |> get_all(:users) + end + + defp get_all(query, index, max_id \\ nil) do params = %{limit: 2000} params = @@ -40,15 +47,9 @@ defp get_all(query, max_id \\ nil) do :ok else res - |> Enum.filter(fn x -> - t = x.object - |> Map.get(:data, %{}) - |> Map.get("type", "") - t == "Note" - end) - |> Pleroma.Elasticsearch.bulk_post(:activities) + |> Pleroma.Elasticsearch.bulk_post(index) - get_all(query, List.last(res).id) + get_all(query, index, List.last(res).id) end end end diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/activity.ex similarity index 100% rename from lib/pleroma/elasticsearch/document_mappings/note.ex rename to lib/pleroma/elasticsearch/document_mappings/activity.ex diff --git a/lib/pleroma/elasticsearch/document_mappings/user.ex b/lib/pleroma/elasticsearch/document_mappings/user.ex new file mode 100644 index 000000000..5d9945c23 --- /dev/null +++ b/lib/pleroma/elasticsearch/document_mappings/user.ex @@ -0,0 +1,12 @@ +defmodule Pleroma.Elasticsearch.DocumentMappings.User do + def id(obj), do: obj.id + + def encode(%{actor_type: "Person"} = user) do + %{ + timestamp: user.inserted_at, + instance: URI.parse(user.ap_id).host, + nickname: user.nickname, + bio: user.bio + } + end +end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index f309bf7a5..776bad921 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -44,6 +44,12 @@ def put(%Activity{} = activity) do def bulk_post(data, :activities) do d = data + |> Enum.filter(fn x -> + t = x.object + |> Map.get(:data, %{}) + |> Map.get("type", "") + t == "Note" + end) |> Enum.map(fn d -> [ %{index: %{_id: DocumentMappings.Activity.id(d)}}, @@ -60,6 +66,25 @@ def bulk_post(data, :activities) do ) end + def bulk_post(data, :users) do + d = + data + |> Enum.map(fn d -> + [ + %{index: %{_id: DocumentMappings.User.id(d)}}, + DocumentMappings.User.encode(d) + ] + end) + |> List.flatten() + + Elastix.Bulk.post( + url(), + d, + index: "users", + type: "user" + ) + end + def search_activities(q) do Elastix.Search.search( url(), diff --git a/priv/es-mappings/user.json b/priv/es-mappings/user.json index 20beb75c2..77cc66a4b 100644 --- a/priv/es-mappings/user.json +++ b/priv/es-mappings/user.json @@ -1,5 +1,9 @@ { "properties": { + "timestamp": { + "type": "date", + "index": true + }, "instance": { "type": "keyword" }, From 8547cefaff6fcde2c6a35bb9f1056ecc11e42fdb Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Tue, 14 Dec 2021 12:37:10 +0000 Subject: [PATCH 14/27] add hashtag indexing --- lib/mix/tasks/pleroma/search.ex | 11 ++++++++++- .../document_mappings/hashtag.ex | 10 ++++++++++ lib/pleroma/elasticsearch/store.ex | 19 +++++++++++++++++++ priv/es-mappings/hashtag.json | 4 ++++ 4 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 lib/pleroma/elasticsearch/document_mappings/hashtag.ex diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 64396b92f..751e0ca11 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -9,6 +9,7 @@ defmodule Mix.Tasks.Pleroma.Search do alias Pleroma.Activity alias Pleroma.Pagination alias Pleroma.User + alias Pleroma.Hashtag @shortdoc "Manages elasticsearch" @@ -29,8 +30,16 @@ def run(["import", "users" | _rest]) do |> get_all(:users) end + def run(["import", "hashtags" | _rest]) do + start_pleroma() + + from(h in Hashtag) + |> Pleroma.Repo.all() + |> Pleroma.Elasticsearch.bulk_post(:hashtags) + end + defp get_all(query, index, max_id \\ nil) do - params = %{limit: 2000} + params = %{limit: 1000} params = if max_id == nil do diff --git a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex new file mode 100644 index 000000000..1c47d1451 --- /dev/null +++ b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex @@ -0,0 +1,10 @@ +defmodule Pleroma.Elasticsearch.DocumentMappings.Hashtag do + def id(obj), do: obj.id + + def encode(hashtag) do + %{ + hashtag: hashtag.name, + timestamp: hashtag.inserted_at + } + end +end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 776bad921..74c933038 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -85,6 +85,25 @@ def bulk_post(data, :users) do ) end + def bulk_post(data, :hashtags) do + d = + data + |> Enum.map(fn d -> + [ + %{index: %{_id: DocumentMappings.Hashtag.id(d)}}, + DocumentMappings.Hashtag.encode(d) + ] + end) + |> List.flatten() + + Elastix.Bulk.post( + url(), + d, + index: "hashtags", + type: "hashtag" + ) + end + def search_activities(q) do Elastix.Search.search( url(), diff --git a/priv/es-mappings/hashtag.json b/priv/es-mappings/hashtag.json index 5330b8a2e..3a0ade8f8 100644 --- a/priv/es-mappings/hashtag.json +++ b/priv/es-mappings/hashtag.json @@ -1,5 +1,9 @@ { "properties": { + "timestamp": { + "type": "date", + "index": true + }, "hashtag": { "type": "text" } From c03e8d46e8a30511d614cf498d50773ff9436680 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Tue, 14 Dec 2021 13:53:46 +0000 Subject: [PATCH 15/27] Search through users and hashtags as well --- lib/mix/tasks/pleroma/search.ex | 2 +- .../elasticsearch/document_mappings/user.ex | 3 +- lib/pleroma/elasticsearch/store.ex | 105 +++++++++++++--- lib/pleroma/search/elasticsearch.ex | 117 ++++++++++-------- .../search/elasticsearch/activity_parser.ex | 38 ++++++ .../search/elasticsearch/hashtag_parser.ex | 30 +++++ .../search/elasticsearch/user_paser.ex | 53 ++++++++ lib/pleroma/user.ex | 1 + lib/pleroma/web/activity_pub/side_effects.ex | 2 +- 9 files changed, 280 insertions(+), 71 deletions(-) create mode 100644 lib/pleroma/search/elasticsearch/activity_parser.ex create mode 100644 lib/pleroma/search/elasticsearch/hashtag_parser.ex create mode 100644 lib/pleroma/search/elasticsearch/user_paser.ex diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 751e0ca11..2324561c1 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -26,7 +26,7 @@ def run(["import", "activities" | _rest]) do def run(["import", "users" | _rest]) do start_pleroma() - from(u in User, where: not ilike(u.ap_id, "%/relay")) + from(u in User, where: u.nickname not in ["internal.fetch", "relay"]) |> get_all(:users) end diff --git a/lib/pleroma/elasticsearch/document_mappings/user.ex b/lib/pleroma/elasticsearch/document_mappings/user.ex index 5d9945c23..0e57438f2 100644 --- a/lib/pleroma/elasticsearch/document_mappings/user.ex +++ b/lib/pleroma/elasticsearch/document_mappings/user.ex @@ -6,7 +6,8 @@ def encode(%{actor_type: "Person"} = user) do timestamp: user.inserted_at, instance: URI.parse(user.ap_id).host, nickname: user.nickname, - bio: user.bio + bio: user.bio, + display_name: user.name } end end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 74c933038..2d8aeabc2 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -1,24 +1,32 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity + alias Pleroma.User alias Pleroma.Elasticsearch.DocumentMappings alias Pleroma.Config + require Logger defp url do Config.get([:elasticsearch, :url]) end - def put_by_id(id) do + defp enabled? do + Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch + end + + def put_by_id(:activity, id) do id |> Activity.get_by_id_with_object() |> maybe_put_into_elasticsearch() end - def maybe_put_into_elasticsearch({:ok, activity}) do - maybe_put_into_elasticsearch(activity) + def maybe_put_into_elasticsearch({:ok, item}) do + maybe_put_into_elasticsearch(item) end - def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity) do - if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do + def maybe_put_into_elasticsearch( + %{data: %{"type" => "Create"}, object: %{data: %{"type" => "Note"}}} = activity + ) do + if enabled?() do actor = Pleroma.Activity.user_actor(activity) activity @@ -27,27 +35,48 @@ def maybe_put_into_elasticsearch(%{data: %{"type" => "Create"}, object: %{data: end end + def maybe_put_into_elasticsearch(%User{} = user) do + if enabled?() do + put(user) + end + end + def maybe_put_into_elasticsearch(_) do {:ok, :skipped} end def put(%Activity{} = activity) do - Elastix.Document.index( + {:ok, _} = Elastix.Document.index( url(), "activities", "activity", DocumentMappings.Activity.id(activity), DocumentMappings.Activity.encode(activity) ) + {:ok, _} = bulk_post( + activity.object.hashtags, :hashtags + ) + end + + def put(%User{} = user) do + {:ok, _ } = Elastix.Document.index( + url(), + "users", + "user", + DocumentMappings.User.id(user), + DocumentMappings.User.encode(user) + ) end def bulk_post(data, :activities) do d = data |> Enum.filter(fn x -> - t = x.object - |> Map.get(:data, %{}) - |> Map.get("type", "") + t = + x.object + |> Map.get(:data, %{}) + |> Map.get("type", "") + t == "Note" end) |> Enum.map(fn d -> @@ -58,7 +87,7 @@ def bulk_post(data, :activities) do end) |> List.flatten() - Elastix.Bulk.post( + {:ok, %{body: %{"errors" => false}}} = Elastix.Bulk.post( url(), d, index: "activities", @@ -104,12 +133,54 @@ def bulk_post(data, :hashtags) do ) end - def search_activities(q) do - Elastix.Search.search( - url(), - "activities", - ["activity"], - q - ) + def search(:raw, index, type, q) do + with {:ok, raw_results} <- Elastix.Search.search(url(), index, [type], q) do + results = + raw_results + |> Map.get(:body, %{}) + |> Map.get("hits", %{}) + |> Map.get("hits", []) + + {:ok, results} + else + {:error, e} -> + Logger.error(e) + {:error, e} + end + end + + def search(:activities, q) do + with {:ok, results} <- search(:raw, "activities", "activity", q) do + results + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.Activity.all_by_ids_with_object() + else + e -> + Logger.error(e) + [] + end + end + + def search(:users, q) do + with {:ok, results} <- search(:raw, "users", "user", q) do + results + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.User.get_all_by_ids() + else + e -> + Logger.error(e) + [] + end + end + + def search(:hashtags, q) do + with {:ok, results} <- search(:raw, "hashtags", "hashtag", q) do + results + |> Enum.map(fn result -> result["_source"]["hashtag"] end) + else + e -> + Logger.error(e) + [] + end end end diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index 181009ad6..e770fe536 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -2,79 +2,94 @@ defmodule Pleroma.Search.Elasticsearch do @behaviour Pleroma.Search alias Pleroma.Web.MastodonAPI.StatusView + alias Pleroma.Web.MastodonAPI.AccountView alias Pleroma.Web.ActivityPub.Visibility + alias Pleroma.Search.Elasticsearch.Parsers + alias Pleroma.Web.Endpoint - defp to_es(term) when is_binary(term) do + defp es_query(:activity, query) do %{ - match: %{ - content: %{ - query: term, - operator: "AND" + query: %{ + bool: %{ + must: Parsers.Activity.parse(query) } } } end - defp to_es({:quoted, term}), do: to_es(term) - - defp to_es({:filter, ["hashtag", query]}) do + defp es_query(:user, query) do %{ - term: %{ - hashtags: %{ - value: query + query: %{ + bool: %{ + must: Parsers.User.parse(query) } } } end - defp to_es({:filter, [field, query]}) do + defp es_query(:hashtag, query) do %{ - term: %{ - field => %{ - value: query + query: %{ + bool: %{ + must: Parsers.Hashtag.parse(query) } } } end - defp parse(query) do - query - |> SearchParser.parse!() - |> Enum.map(&to_es/1) - end - @impl Pleroma.Search def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do - q = %{ - query: %{ - bool: %{ - must: parse(String.trim(query)) - } - } + parsed_query = + query + |> String.trim() + |> SearchParser.parse!() + + activity_task = + Task.async(fn -> + q = es_query(:activity, parsed_query) + + Pleroma.Elasticsearch.search(:activities, q) + |> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end) + end) + + user_task = + Task.async(fn -> + q = es_query(:user, parsed_query) + + Pleroma.Elasticsearch.search(:users, q) + |> Enum.filter(fn x -> Pleroma.User.visible_for(x, user) == :visible end) + end) + + hashtag_task = + Task.async(fn -> + q = es_query(:hashtag, parsed_query) + + Pleroma.Elasticsearch.search(:hashtags, q) + end) + + activity_results = Task.await(activity_task) + user_results = Task.await(user_task) + hashtag_results = Task.await(hashtag_task) + + %{ + "accounts" => + AccountView.render("index.json", + users: user_results, + for: user + ), + "hashtags" => + Enum.map(hashtag_results, fn x -> + %{ + url: Endpoint.url() <> "/tag/" <> x, + name: x + } + end), + "statuses" => + StatusView.render("index.json", + activities: activity_results, + for: user, + as: :activity + ) } - - out = Pleroma.Elasticsearch.search_activities(q) - - with {:ok, raw_results} <- out do - results = - raw_results - |> Map.get(:body, %{}) - |> Map.get("hits", %{}) - |> Map.get("hits", []) - |> Enum.map(fn result -> result["_id"] end) - |> Pleroma.Activity.all_by_ids_with_object() - |> Enum.filter(fn x -> Visibility.visible_for_user?(x, user) end) - - %{ - "accounts" => [], - "hashtags" => [], - "statuses" => - StatusView.render("index.json", - activities: results, - for: user, - as: :activity - ) - } - end end end diff --git a/lib/pleroma/search/elasticsearch/activity_parser.ex b/lib/pleroma/search/elasticsearch/activity_parser.ex new file mode 100644 index 000000000..0c124d537 --- /dev/null +++ b/lib/pleroma/search/elasticsearch/activity_parser.ex @@ -0,0 +1,38 @@ +defmodule Pleroma.Search.Elasticsearch.Parsers.Activity do + defp to_es(term) when is_binary(term) do + %{ + match: %{ + content: %{ + query: term, + operator: "AND" + } + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["hashtag", query]}) do + %{ + term: %{ + hashtags: %{ + value: query + } + } + } + end + + defp to_es({:filter, [field, query]}) do + %{ + term: %{ + field => %{ + value: query + } + } + } + end + + def parse(q) do + Enum.map(q, &to_es/1) + end +end diff --git a/lib/pleroma/search/elasticsearch/hashtag_parser.ex b/lib/pleroma/search/elasticsearch/hashtag_parser.ex new file mode 100644 index 000000000..6e2801ed0 --- /dev/null +++ b/lib/pleroma/search/elasticsearch/hashtag_parser.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Search.Elasticsearch.Parsers.Hashtag do + defp to_es(term) when is_binary(term) do + %{ + term: %{ + hashtag: %{ + value: String.downcase(term), + } + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["hashtag", query]}) do + %{ + term: %{ + hashtag: %{ + value: String.downcase(query) + } + } + } + end + + defp to_es({:filter, _}), do: nil + + def parse(q) do + Enum.map(q, &to_es/1) + |> Enum.filter(fn x -> x != nil end) + end +end diff --git a/lib/pleroma/search/elasticsearch/user_paser.ex b/lib/pleroma/search/elasticsearch/user_paser.ex new file mode 100644 index 000000000..96bfdc7d2 --- /dev/null +++ b/lib/pleroma/search/elasticsearch/user_paser.ex @@ -0,0 +1,53 @@ +defmodule Pleroma.Search.Elasticsearch.Parsers.User do + defp to_es(term) when is_binary(term) do + %{ + bool: %{ + minimum_should_match: 1, + should: [ + %{ + match: %{ + bio: %{ + query: term, + operator: "AND" + } + } + }, + %{ + term: %{ + nickname: %{ + value: term + } + } + }, + %{ + match: %{ + display_name: %{ + query: term, + operator: "AND" + } + } + } + ] + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["user", query]}) do + %{ + term: %{ + nickname: %{ + value: query + } + } + } + end + + defp to_es({:filter, _}), do: nil + + def parse(q) do + Enum.map(q, &to_es/1) + |> Enum.filter(fn x -> x != nil end) + end +end diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 8e40dfc0d..a2cf22e55 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -1088,6 +1088,7 @@ def update_and_set_cache(struct, params) do def update_and_set_cache(changeset) do with {:ok, user} <- Repo.update(changeset, stale_error_field: :id) do + Pleroma.Elasticsearch.maybe_put_into_elasticsearch(user) set_cache(user) end end diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 91e9c72e0..a93961922 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -538,7 +538,7 @@ defp add_notifications(meta, notifications) do @impl true def handle_after_transaction(%Pleroma.Activity{data: %{"type" => "Create"}} = activity) do - Pleroma.Elasticsearch.put_by_id(activity.id) + Pleroma.Elasticsearch.put_by_id(:activity, activity.id) end def handle_after_transaction(%Pleroma.Activity{}) do From cc4c5f22f4e6946c40bfc4c9517eaeb28f39a27d Mon Sep 17 00:00:00 2001 From: sadposter Date: Tue, 14 Dec 2021 13:58:16 +0000 Subject: [PATCH 16/27] fix inbound federation --- lib/mix/tasks/pleroma/activity.ex | 58 +++++++++++++++++++++++++++++++ lib/pleroma/web/common_api.ex | 2 ++ 2 files changed, 60 insertions(+) create mode 100644 lib/mix/tasks/pleroma/activity.ex diff --git a/lib/mix/tasks/pleroma/activity.ex b/lib/mix/tasks/pleroma/activity.ex new file mode 100644 index 000000000..ca9224b67 --- /dev/null +++ b/lib/mix/tasks/pleroma/activity.ex @@ -0,0 +1,58 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2018 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Activity do + alias Pleroma.Activity + alias Pleroma.Activity.Search + alias Pleroma.User + alias Pleroma.Web.CommonAPI + alias Pleroma.Pagination + require Logger + import Mix.Pleroma + import Ecto.Query + + def run(["get", id | _rest]) do + start_pleroma() + + id + |> Activity.get_by_id() + |> IO.inspect() + end + + def run(["delete_by_keyword", user, keyword | _rest]) do + start_pleroma() + u = User.get_by_nickname(user) + + Activity + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> Activity.Queries.by_author(u) + |> query_with(keyword) + |> Pagination.fetch_paginated( + %{"offset" => 0, "limit" => 20, "skip_order" => false}, + :offset + ) + |> Enum.map(fn x -> CommonAPI.delete(x.id, u) end) + |> Enum.count() + |> IO.puts() + end + + defp query_with(q, search_query) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end +end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index f6a131c21..3266b1c60 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -398,9 +398,11 @@ def listen(user, data) do def post(user, %{status: _} = data) do with {:ok, draft} <- ActivityDraft.create(user, data) do activity = ActivityPub.create(draft.changes, draft.preview?) + unless draft.preview? do Pleroma.Elasticsearch.maybe_put_into_elasticsearch(activity) end + activity end end From 9d5d964bf87dd153b5f10ad3bb02a7a76459f577 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Tue, 14 Dec 2021 14:28:15 +0000 Subject: [PATCH 17/27] merge --- lib/mix/tasks/pleroma/activity.ex | 1 - lib/mix/tasks/pleroma/search.ex | 4 +- lib/pleroma/elasticsearch/store.ex | 52 +++++++++++-------- .../search/elasticsearch/hashtag_parser.ex | 2 +- .../search/elasticsearch/user_paser.ex | 2 +- 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/lib/mix/tasks/pleroma/activity.ex b/lib/mix/tasks/pleroma/activity.ex index ca9224b67..3a79d8f20 100644 --- a/lib/mix/tasks/pleroma/activity.ex +++ b/lib/mix/tasks/pleroma/activity.ex @@ -4,7 +4,6 @@ defmodule Mix.Tasks.Pleroma.Activity do alias Pleroma.Activity - alias Pleroma.Activity.Search alias Pleroma.User alias Pleroma.Web.CommonAPI alias Pleroma.Pagination diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 2324561c1..1fd880eab 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -24,8 +24,8 @@ def run(["import", "activities" | _rest]) do end def run(["import", "users" | _rest]) do - start_pleroma() - + start_pleroma() + from(u in User, where: u.nickname not in ["internal.fetch", "relay"]) |> get_all(:users) end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 2d8aeabc2..2f8375c48 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -46,26 +46,31 @@ def maybe_put_into_elasticsearch(_) do end def put(%Activity{} = activity) do - {:ok, _} = Elastix.Document.index( - url(), - "activities", - "activity", - DocumentMappings.Activity.id(activity), - DocumentMappings.Activity.encode(activity) - ) - {:ok, _} = bulk_post( - activity.object.hashtags, :hashtags - ) + {:ok, _} = + Elastix.Document.index( + url(), + "activities", + "activity", + DocumentMappings.Activity.id(activity), + DocumentMappings.Activity.encode(activity) + ) + + {:ok, _} = + bulk_post( + activity.object.hashtags, + :hashtags + ) end def put(%User{} = user) do - {:ok, _ } = Elastix.Document.index( - url(), - "users", - "user", - DocumentMappings.User.id(user), - DocumentMappings.User.encode(user) - ) + {:ok, _} = + Elastix.Document.index( + url(), + "users", + "user", + DocumentMappings.User.id(user), + DocumentMappings.User.encode(user) + ) end def bulk_post(data, :activities) do @@ -87,12 +92,13 @@ def bulk_post(data, :activities) do end) |> List.flatten() - {:ok, %{body: %{"errors" => false}}} = Elastix.Bulk.post( - url(), - d, - index: "activities", - type: "activity" - ) + {:ok, %{body: %{"errors" => false}}} = + Elastix.Bulk.post( + url(), + d, + index: "activities", + type: "activity" + ) end def bulk_post(data, :users) do diff --git a/lib/pleroma/search/elasticsearch/hashtag_parser.ex b/lib/pleroma/search/elasticsearch/hashtag_parser.ex index 6e2801ed0..644969c78 100644 --- a/lib/pleroma/search/elasticsearch/hashtag_parser.ex +++ b/lib/pleroma/search/elasticsearch/hashtag_parser.ex @@ -3,7 +3,7 @@ defp to_es(term) when is_binary(term) do %{ term: %{ hashtag: %{ - value: String.downcase(term), + value: String.downcase(term) } } } diff --git a/lib/pleroma/search/elasticsearch/user_paser.ex b/lib/pleroma/search/elasticsearch/user_paser.ex index 96bfdc7d2..76da3bd33 100644 --- a/lib/pleroma/search/elasticsearch/user_paser.ex +++ b/lib/pleroma/search/elasticsearch/user_paser.ex @@ -25,7 +25,7 @@ defp to_es(term) when is_binary(term) do query: term, operator: "AND" } - } + } } ] } From bed6f4e12cc7ff259db3e515243e1275733c389b Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Tue, 14 Dec 2021 14:33:34 +0000 Subject: [PATCH 18/27] add timeouts --- lib/pleroma/search/elasticsearch.ex | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index e770fe536..00ae6b3dc 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -9,6 +9,12 @@ defmodule Pleroma.Search.Elasticsearch do defp es_query(:activity, query) do %{ + size: 50, + terminate_after: 50, + timeout: "5s", + sort: [ + %{"_timestamp" => "desc"} + ], query: %{ bool: %{ must: Parsers.Activity.parse(query) @@ -19,6 +25,9 @@ defp es_query(:activity, query) do defp es_query(:user, query) do %{ + size: 50, + terminate_after: 50, + timeout: "5s", query: %{ bool: %{ must: Parsers.User.parse(query) @@ -29,6 +38,9 @@ defp es_query(:user, query) do defp es_query(:hashtag, query) do %{ + size: 50, + terminate_after: 50, + timeout: "5s", query: %{ bool: %{ must: Parsers.Hashtag.parse(query) From 7f30218b82a71eae20a69317cfed31e65a97e459 Mon Sep 17 00:00:00 2001 From: sadposter Date: Tue, 14 Dec 2021 14:48:24 +0000 Subject: [PATCH 19/27] extra cool --- lib/pleroma/elasticsearch/store.ex | 4 +++- lib/pleroma/search/elasticsearch.ex | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 2f8375c48..93501a2fb 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -120,7 +120,7 @@ def bulk_post(data, :users) do ) end - def bulk_post(data, :hashtags) do + def bulk_post([] = data, :hashtags) do d = data |> Enum.map(fn d -> @@ -139,6 +139,8 @@ def bulk_post(data, :hashtags) do ) end + def bulk_post(_, :hashtags), do: {:ok, nil} + def search(:raw, index, type, q) do with {:ok, raw_results} <- Elastix.Search.search(url(), index, [type], q) do results = diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index 00ae6b3dc..145bdec7b 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -7,7 +7,7 @@ defmodule Pleroma.Search.Elasticsearch do alias Pleroma.Search.Elasticsearch.Parsers alias Pleroma.Web.Endpoint - defp es_query(:activity, query) do + def es_query(:activity, query) do %{ size: 50, terminate_after: 50, @@ -23,7 +23,7 @@ defp es_query(:activity, query) do } end - defp es_query(:user, query) do + def es_query(:user, query) do %{ size: 50, terminate_after: 50, @@ -36,7 +36,7 @@ defp es_query(:user, query) do } end - defp es_query(:hashtag, query) do + def es_query(:hashtag, query) do %{ size: 50, terminate_after: 50, From c50ad91d3167e540599713ba1b8167e8d37c81bb Mon Sep 17 00:00:00 2001 From: sadposter Date: Tue, 14 Dec 2021 15:02:11 +0000 Subject: [PATCH 20/27] fix empty queries returning 50 results --- lib/pleroma/elasticsearch/store.ex | 2 + lib/pleroma/search/elasticsearch.ex | 72 ++++++++++++++++++----------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 93501a2fb..3cf80ebd4 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -141,6 +141,8 @@ def bulk_post([] = data, :hashtags) do def bulk_post(_, :hashtags), do: {:ok, nil} + def search(_, _, _, :skip), do: [] + def search(:raw, index, type, q) do with {:ok, raw_results} <- Elastix.Search.search(url(), index, [type], q) do results = diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index 145bdec7b..eabd2a852 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -8,45 +8,63 @@ defmodule Pleroma.Search.Elasticsearch do alias Pleroma.Web.Endpoint def es_query(:activity, query) do - %{ - size: 50, - terminate_after: 50, - timeout: "5s", - sort: [ - %{"_timestamp" => "desc"} - ], - query: %{ - bool: %{ - must: Parsers.Activity.parse(query) + must = Parsers.Activity.parse(query) + + if must == [] do + :skip + else + %{ + size: 50, + terminate_after: 50, + timeout: "5s", + sort: [ + %{"_timestamp" => "desc"} + ], + query: %{ + bool: %{ + must: must + } } } - } + end end def es_query(:user, query) do - %{ - size: 50, - terminate_after: 50, - timeout: "5s", - query: %{ - bool: %{ - must: Parsers.User.parse(query) + must = Parsers.User.parse(query) + + if must == [] do + :skip + else + %{ + size: 50, + terminate_after: 50, + timeout: "5s", + query: %{ + bool: %{ + must: must + } } } - } + end end def es_query(:hashtag, query) do - %{ - size: 50, - terminate_after: 50, - timeout: "5s", - query: %{ - bool: %{ - must: Parsers.Hashtag.parse(query) + must = Parsers.Hashtag.parse(query) + + if must == [] do + :skip + else + %{ + size: 50, + terminate_after: 50, + timeout: "5s", + query: %{ + bool: %{ + must: Parsers.Hashtag.parse(query) + } } } - } + end end @impl Pleroma.Search From d5c8415496811b6135a39f5a607417012ff07a0a Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Wed, 15 Dec 2021 10:57:47 +0000 Subject: [PATCH 21/27] re-add fetching by url --- lib/pleroma/elasticsearch/store.ex | 1 + lib/pleroma/search/elasticsearch.ex | 34 ++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 3cf80ebd4..caa49cbea 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -164,6 +164,7 @@ def search(:activities, q) do results |> Enum.map(fn result -> result["_id"] end) |> Pleroma.Activity.all_by_ids_with_object() + |> Enum.sort(&(&1.inserted_at >= &2.inserted_at)) else e -> Logger.error(e) diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index eabd2a852..614a48bb9 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -1,6 +1,8 @@ defmodule Pleroma.Search.Elasticsearch do @behaviour Pleroma.Search + alias Pleroma.Activity + alias Pleroma.Object.Fetcher alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.MastodonAPI.AccountView alias Pleroma.Web.ActivityPub.Visibility @@ -18,7 +20,8 @@ def es_query(:activity, query) do terminate_after: 50, timeout: "5s", sort: [ - %{"_timestamp" => "desc"} + "_score", + %{_timestamp: %{order: "desc", format: "basic_date_time"}} ], query: %{ bool: %{ @@ -39,6 +42,9 @@ def es_query(:user, query) do size: 50, terminate_after: 50, timeout: "5s", + sort: [ + "_score" + ], query: %{ bool: %{ must: must @@ -58,6 +64,9 @@ def es_query(:hashtag, query) do size: 50, terminate_after: 50, timeout: "5s", + sort: [ + "_score" + ], query: %{ bool: %{ must: Parsers.Hashtag.parse(query) @@ -67,6 +76,16 @@ def es_query(:hashtag, query) do end end + defp maybe_fetch(:activity, search_query) do + with true <- Regex.match?(~r/https?:/, search_query), + {:ok, object} <- Fetcher.fetch_object_from_id(search_query), + %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]) do + activity + else + _ -> nil + end + end + @impl Pleroma.Search def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do parsed_query = @@ -74,6 +93,11 @@ def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) d |> String.trim() |> SearchParser.parse!() + activity_fetch_task = + Task.async(fn -> + maybe_fetch(:activity, String.trim(query)) + end) + activity_task = Task.async(fn -> q = es_query(:activity, parsed_query) @@ -100,6 +124,14 @@ def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) d activity_results = Task.await(activity_task) user_results = Task.await(user_task) hashtag_results = Task.await(hashtag_task) + direct_activity = Task.await(activity_fetch_task) + + activity_results = + if direct_activity == nil do + activity_results + else + [direct_activity | activity_results] + end %{ "accounts" => From 25920c10df971b36b77017bb82e3eb40aa3edd77 Mon Sep 17 00:00:00 2001 From: sadposter Date: Wed, 15 Dec 2021 11:05:30 +0000 Subject: [PATCH 22/27] don't try indexing non-people --- lib/pleroma/elasticsearch/store.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index caa49cbea..291fdf075 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -35,7 +35,7 @@ def maybe_put_into_elasticsearch( end end - def maybe_put_into_elasticsearch(%User{} = user) do + def maybe_put_into_elasticsearch(%User{actor_type: "Person"} = user) do if enabled?() do put(user) end From 418212c71c71358557e3d72c64394b790bf60557 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Thu, 16 Dec 2021 15:09:36 +0000 Subject: [PATCH 23/27] fix remote hashtags --- lib/pleroma/elasticsearch/store.ex | 12 ++++++------ lib/pleroma/hashtag.ex | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 291fdf075..0860a5ee5 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -54,12 +54,6 @@ def put(%Activity{} = activity) do DocumentMappings.Activity.id(activity), DocumentMappings.Activity.encode(activity) ) - - {:ok, _} = - bulk_post( - activity.object.hashtags, - :hashtags - ) end def put(%User{} = user) do @@ -101,6 +95,12 @@ def bulk_post(data, :activities) do ) end + def maybe_bulk_post(data, type) do + if enabled?() do + bulk_post(data, type) + end + end + def bulk_post(data, :users) do d = data diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex index 53e2e9c89..1eae4d1e7 100644 --- a/lib/pleroma/hashtag.ex +++ b/lib/pleroma/hashtag.ex @@ -61,6 +61,7 @@ def get_or_create_by_names(names) when is_list(names) do {:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))} end) |> Repo.transaction() do + Pleroma.Elasticsearch.bulk_post(hashtags, :hashtags) {:ok, hashtags} else {:error, _name, value, _changes_so_far} -> {:error, value} From 1745606ab59863139445c8a5ad157326b7c2eab5 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Thu, 16 Dec 2021 15:20:22 +0000 Subject: [PATCH 24/27] maybe --- lib/pleroma/hashtag.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex index 1eae4d1e7..cdbfeab02 100644 --- a/lib/pleroma/hashtag.ex +++ b/lib/pleroma/hashtag.ex @@ -61,7 +61,7 @@ def get_or_create_by_names(names) when is_list(names) do {:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))} end) |> Repo.transaction() do - Pleroma.Elasticsearch.bulk_post(hashtags, :hashtags) + Pleroma.Elasticsearch.maybe_bulk_post(hashtags, :hashtags) {:ok, hashtags} else {:error, _name, value, _changes_so_far} -> {:error, value} From 2152832780d7bb67b6d98ddf3714058bac2c03f4 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Thu, 16 Dec 2021 15:58:56 +0000 Subject: [PATCH 25/27] add timestamp --- .../document_mappings/hashtag.ex | 2 +- lib/pleroma/elasticsearch/store.ex | 21 ++++++++++++------- lib/pleroma/hashtag.ex | 1 - 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex index 1c47d1451..ddf91231c 100644 --- a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex +++ b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex @@ -4,7 +4,7 @@ def id(obj), do: obj.id def encode(hashtag) do %{ hashtag: hashtag.name, - timestamp: hashtag.inserted_at + timestamp: hashtag.timestamp } end end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 0860a5ee5..5c9e15022 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -1,6 +1,7 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity alias Pleroma.User + alias Pleroma.Object alias Pleroma.Elasticsearch.DocumentMappings alias Pleroma.Config require Logger @@ -45,6 +46,12 @@ def maybe_put_into_elasticsearch(_) do {:ok, :skipped} end + def maybe_bulk_post(data, type) do + if enabled?() do + bulk_post(data, type) + end + end + def put(%Activity{} = activity) do {:ok, _} = Elastix.Document.index( @@ -54,6 +61,12 @@ def put(%Activity{} = activity) do DocumentMappings.Activity.id(activity), DocumentMappings.Activity.encode(activity) ) + + activity + |> Map.get(:object) + |> Object.hashtags() + |> Enum.map(fn x -> %{id: x, name: x, timestamp: DateTime.to_iso8601(DateTime.utc_now())} end) + |> bulk_post(:hashtags) end def put(%User{} = user) do @@ -95,12 +108,6 @@ def bulk_post(data, :activities) do ) end - def maybe_bulk_post(data, type) do - if enabled?() do - bulk_post(data, type) - end - end - def bulk_post(data, :users) do d = data @@ -120,7 +127,7 @@ def bulk_post(data, :users) do ) end - def bulk_post([] = data, :hashtags) do + def bulk_post(data, :hashtags) when is_list(data) do d = data |> Enum.map(fn d -> diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex index cdbfeab02..53e2e9c89 100644 --- a/lib/pleroma/hashtag.ex +++ b/lib/pleroma/hashtag.ex @@ -61,7 +61,6 @@ def get_or_create_by_names(names) when is_list(names) do {:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))} end) |> Repo.transaction() do - Pleroma.Elasticsearch.maybe_bulk_post(hashtags, :hashtags) {:ok, hashtags} else {:error, _name, value, _changes_so_far} -> {:error, value} From 9134ef5ecb72f6fe2042f978be3deef41c9b4ce7 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Thu, 16 Dec 2021 16:05:18 +0000 Subject: [PATCH 26/27] add timestamp --- lib/pleroma/elasticsearch/document_mappings/hashtag.ex | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex index ddf91231c..508e32131 100644 --- a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex +++ b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex @@ -1,10 +1,17 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.Hashtag do def id(obj), do: obj.id + def encode(%{timestamp: _} = hashtag) do + %{ + hashtag: hashtag.name, + timestamp: hashtag.timestamp + } + end + def encode(hashtag) do %{ hashtag: hashtag.name, - timestamp: hashtag.timestamp + timestamp: hashtag.inserted_at } end end From 742b86fdaa8383a24a404047dcfd5ab9ed7bd447 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Thu, 16 Dec 2021 16:05:33 +0000 Subject: [PATCH 27/27] format --- lib/pleroma/elasticsearch/document_mappings/hashtag.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex index 508e32131..ce9a86c8d 100644 --- a/lib/pleroma/elasticsearch/document_mappings/hashtag.ex +++ b/lib/pleroma/elasticsearch/document_mappings/hashtag.ex @@ -4,8 +4,8 @@ def id(obj), do: obj.id def encode(%{timestamp: _} = hashtag) do %{ hashtag: hashtag.name, - timestamp: hashtag.timestamp - } + timestamp: hashtag.timestamp + } end def encode(hashtag) do