phoenix_pubsub icon indicating copy to clipboard operation
phoenix_pubsub copied to clipboard

Presence stops working after ~1 week

Open flupke opened this issue 1 year ago • 5 comments

Environment

  • Elixir version (elixir -v): 1.14.0
  • Phoenix version (mix deps): 1.7.7
  • Operating system: ubuntu:jammy-20230126

Actual behavior

After a long running period, Presence stops working:

** (SystemLimitError) a system limit has been reached due to errors at the given arguments:
  * 2nd argument: not a valid match specification
    (stdlib 4.3.1) :ets.select(MultiplayerBackendWeb.Presence_shard0, [{{{"project:ubnGwd32md3iDZSwO4mTRz1T", :_, :"$1"}, :"$2", {:"$3", :_}}, [not: {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692810649862196}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692593405495489}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693026009636719}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693380369590663}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693463743616698}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692822408764548}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693580591386986}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692512764076126}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692867520504760}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692872751130110}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693060187174415}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1693408737896420}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", 1692744903811224}}}, {:or, {:"=:=", :"$3", {{:"multiplayer-backend-prod2@...", ...}}}, {:or, {:"=:=", :"$3", ...}, {:or, ...}}}}}}}}}}}}}}}}], [{{:"$1", :"$2"}}]}])
    (phoenix_pubsub 2.1.3) lib/phoenix/tracker/state.ex:167: Phoenix.Tracker.State.tracked_values/3
    (phoenix 1.7.7) lib/phoenix/presence.ex:532: Phoenix.Presence.list/2
    (multiplayer_backend 0.1.0) lib/multiplayer_backend_web/channels/project_channel.ex:56: MultiplayerBackendWeb.ProjectChannel.handle_info/2
    (phoenix 1.7.7) lib/phoenix/channel/server.ex:354: Phoenix.Channel.Server.handle_info/2
    (stdlib 4.3.1) gen_server.erl:1123: :gen_server.try_dispatch/4
    (stdlib 4.3.1) gen_server.erl:1200: :gen_server.handle_msg/6
    (stdlib 4.3.1) proc_lib.erl:240: :proc_lib.init_p_do_apply/3
Last message: :after_join

It fails here:

def handle_info(:after_join, socket) do
  presence_list = Presence.list(socket)  # <------------------------

  {:ok, project_coordinator_pid} =
    ProjectCoordinator.ensure_started(socket.assigns.project_id,
      presence: %{
        topic: socket.topic,
        list: presence_list,
        pubsub_server: socket.pubsub_server
      }
    )

  socket = assign(socket, :project_coordinator_pid, project_coordinator_pid)

  # Monitor the coordinator so we can kill the channel if it dies
  ref = Process.monitor(project_coordinator_pid)
  socket = assign(socket, :coordinator_monitor_ref, ref)

  {:ok, _} =
    Presence.track(socket, socket.assigns.peer_id, %{
      joined_at: inspect(System.system_time(:second)),
      online_at: inspect(System.system_time(:second)),
      user_data: remove_token_from_presence(socket.assigns.user_data)
    })

  push(socket, "presence_state", presence_list)
  get_project(socket)
  {:noreply, socket}
end

Restarting the instances of the cluster fixes the issue and it comes back after a week or two.

Expected behavior

It should not crash.

flupke avatar Sep 04 '23 10:09 flupke