sentry-elixir icon indicating copy to clipboard operation
sentry-elixir copied to clipboard

Sentry Structured Logging for Elixir

Open AbhiPrasad opened this issue 8 months ago • 11 comments

Problem Statement

https://develop.sentry.dev/sdk/telemetry/logs/

Sentry is adding support for structured logging. Let's add it to the Elixir SDK!

Solution Brainstorm

  1. Define the logs protocol and log envelope item in the SDK
  2. Add the Public API (SDK options and methods) as per the docs
  3. Make sure the the SDK follow the documented behavior
  4. Attach default attributes to the SDK as per docs
  5. Add logging integrations. We should support both Elixir Logger and erlangs's :logger. With Elixir Logger we should also make sure to add all the metadata directly as log attributes. We should sync about attribute naming before fully implementing.
  6. Create a GH discussion that contains instructions for setting up the SDK in your repo. Eventually this will be moved into the primary docs. See the JS SDK's GH discussion for inspiration.

AbhiPrasad avatar May 06 '25 19:05 AbhiPrasad

Gonna do it once I'm done with logging in the Ruby SDK 😄

solnic avatar May 06 '25 21:05 solnic

@solnic Any updates on this one? I'm asking since it's been a while since the last update, and I'm pondering if I should hack together something quick and dirty just for my own needs, or if this is just around the corner to being worked on?

eriknaslund avatar Aug 31 '25 22:08 eriknaslund

It might not be as robust as Sentry will do it, but we do have a quick-n-dirty implementation. Sharing below if it helps anyone. May need to be decoupled from anything app specific in there!

defmodule Ex.Sentry.LoggerHandler do
  @behaviour :logger_handler

  @impl true
  def adding_handler(config) do
    cfg = merge_config_defaults(get_in(config, [:config]) || %{})

    case Ex.Sentry.LogShipper.start_link(cfg) do
      {:ok, pid} ->
        {:ok, put_in(config, [:config, :shipper_pid], pid)}

      {:error, reason} ->
        {:error, reason}
    end
  end

  @impl true
  def log(log_event, %{config: %{shipper_pid: pid}}) when is_pid(pid) do
    Ex.Sentry.LogShipper.submit(pid, log_event)
    :ok
  end

  def log(_event, _cfg) do
    :ok
  end

  @impl true
  def changing_config(_action, old, new) do
    old_pid = get_in(old, [:config, :shipper_pid])
    new_cfg = merge_config_defaults(get_in(new, [:config]) || %{})

    if is_pid(old_pid) do
      Ex.Sentry.LogShipper.reconfigure(old_pid, new_cfg)
      {:ok, put_in(new, [:config, :shipper_pid], old_pid)}
    else
      case Ex.Sentry.LogShipper.start_link(new_cfg) do
        {:ok, pid} -> {:ok, put_in(new, [:config, :shipper_pid], pid)}
        {:error, reason} -> {:error, reason}
      end
    end
  end

  @impl true
  def removing_handler(%{config: %{shipper_pid: pid}}) when is_pid(pid) do
    Ex.Sentry.LogShipper.flush(pid)
    Process.exit(pid, :normal)
    :ok
  end

  def removing_handler(_),
    do: :ok

  defp merge_config_defaults(cfg) do
    cfg
    |> Map.put_new(:max_batch_size, 50)
    |> Map.put_new(:flush_interval_ms, 5_000)
    |> Map.put_new(:extra_attributes, %{})
    |> Map.put_new(:http_options, [])
  end
end


defmodule Ex.Sentry.LogShipper do
  @moduledoc false
  use GenServer

  def start_link(cfg) when is_map(cfg), do: GenServer.start_link(__MODULE__, cfg)
  def submit(pid, log_event), do: GenServer.cast(pid, {:log, log_event})
  def flush(pid, timeout \\ 10_000), do: GenServer.call(pid, :flush, timeout)
  def reconfigure(pid, new_cfg), do: GenServer.cast(pid, {:reconfigure, new_cfg})

  @impl true
  def init(cfg) do
    state = %{
      max_batch_size: Map.fetch!(cfg, :max_batch_size),
      flush_interval_ms: Map.fetch!(cfg, :flush_interval_ms),
      extra_attributes: Map.get(cfg, :extra_attributes, %{}) |> Map.new(),
      http_options: Map.get(cfg, :http_options, []),
      endpoint_uri: Map.get(cfg, :endpoint_uri, Sentry.Config.dsn().endpoint_uri),
      original_dsn: Map.get(cfg, :original_dsn, Sentry.Config.dsn().original_dsn),
      buffer: [],
      timer_ref: nil
    }

    {:ok, state}
  end

  @impl true
  def handle_cast({:log, event}, state) do
    state
    |> push(to_sentry_log(event, state))
    |> maybe_flush()
    |> noreply()
  end

  @impl true
  def handle_cast({:reconfigure, new_cfg}, state) do
    new_state =
      state
      |> Map.merge(%{
        max_batch_size: Map.get(new_cfg, :max_batch_size, state.max_batch_size),
        flush_interval_ms: Map.get(new_cfg, :flush_interval_ms, state.flush_interval_ms),
        extra_attributes: Map.get(new_cfg, :extra_attributes, state.extra_attributes) |> Map.new(),
        http_options: Map.get(new_cfg, :http_options, state.http_options),
        endpoint_uri: Map.get(new_cfg, :endpoint_uri, state.endpoint_uri),
        original_dsn: Map.get(new_cfg, :original_dsn, state.original_dsn)
      })

    {:noreply, new_state}
  end

  @impl true
  def handle_call(:flush, _from, state), do: {:reply, :ok, do_flush(state)}
  @impl true
  def handle_info(:flush_now, state), do: {:noreply, do_flush(state)}

  defp noreply(state), do: {:noreply, state}
  defp push(%{buffer: buf} = state, item), do: %{state | buffer: [item | buf]}

  defp maybe_flush(%{buffer: buf, max_batch_size: max} = state) when length(buf) >= max,
    do: do_flush(state)

  defp maybe_flush(%{buffer: buf, timer_ref: nil, flush_interval_ms: ms} = state) when buf != [] do
    ref = Process.send_after(self(), :flush_now, ms)
    %{state | timer_ref: ref}
  end

  defp maybe_flush(state), do: state

  defp do_flush(%{buffer: []} = state) do
    cancel_timer(state)
    %{state | timer_ref: nil}
  end

  defp do_flush(%{buffer: logs} = state) do
    cancel_timer(state)
    deliver(Enum.reverse(logs), state)
    %{state | buffer: [], timer_ref: nil}
  end

  defp cancel_timer(%{timer_ref: nil}), do: :ok
  defp cancel_timer(%{timer_ref: ref}), do: Process.cancel_timer(ref)

  defp deliver([], _state), do: :ok

  defp deliver(logs, state) do
    case state.endpoint_uri do
      nil ->
        :ok

      url ->
        Task.start(fn ->
          Req.post!(
            url,
            Keyword.merge(state.http_options,
              headers: [{"content-type", "application/x-sentry-envelope"}],
              body: build_envelope(logs, state.original_dsn)
            )
          )
        end)

        :ok
    end
  end

  defp build_envelope(logs, original_dsn) do
    header = %{
      "dsn" => original_dsn,
      "sent_at" => DateTime.utc_now() |> DateTime.to_iso8601(:extended)
    }

    item_header = %{
      "type" => "log",
      "item_count" => length(logs),
      "content_type" => "application/vnd.sentry.items.log+json"
    }

    item_payload = %{"items" => logs}

    Enum.join(
      [JSON.encode!(header), JSON.encode!(item_header), JSON.encode!(item_payload)],
      "\n"
    )
  end

  defp to_sentry_log(%{level: level, msg: msg, meta: meta}, state) do
    {trace_id, parent_span_id} = current_trace_and_span_ids()
    {body, attrs_from_msg} = message_to_body_and_attrs(msg)

    attributes =
      %{
        "module" => mfa_module(meta[:mfa]),
        "function" => mfa_function(meta[:mfa]),
        "file" => meta[:file],
        "line" => meta[:line],
        "request_id" => meta[:request_id],
        "client_version" => meta[:client_version],
        "ip_address" => meta[:ip_address],
        "sentry.release" => Sentry.Config.release(),
        "sentry.environment" => Sentry.Config.environment_name(),
        "sentry.trace.parent_span_id" => parent_span_id
      }
      |> Map.merge(flatten_map(state.extra_attributes))
      |> Map.merge(attrs_from_msg)
      |> Map.reject(fn {_k, v} -> is_nil(v) end)
      |> Map.new(fn {k, v} -> {k, to_sentry(v)} end)

    %{
      # Sentry expects seconds since the epoch but recognizes fractional seconds
      # :logger provides microseconds in event.time
      "timestamp" => meta[:time] / 1_000_000,
      "trace_id" => trace_id,
      "level" => level_to_sentry(level),
      "severity_number" => severity_number(level),
      "attributes" => attributes,
      "body" => body
    }
  end

  defp message_to_body_and_attrs({:string, chardata}), do: {IO.iodata_to_binary(chardata), %{}}

  defp message_to_body_and_attrs({:report_fun, fun}) when is_function(fun, 0) do
    message_to_body_and_attrs(fun.())
  end

  defp message_to_body_and_attrs({:report, report}) do
    body = inspect(report)

    attrs =
      %{"report" => normalize_report(report)}
      |> flatten_map()

    {body, attrs}
  end

  defp message_to_body_and_attrs({fmt, args}) when is_list(args) do
    text = :io_lib.format(fmt, args) |> IO.iodata_to_binary()
    {text, %{}}
  end

  defp message_to_body_and_attrs(other), do: {inspect(other), %{}}

  defp normalize_report(report) when is_map(report), do: report
  defp normalize_report(report) when is_list(report), do: Map.new(report)
  defp normalize_report(other), do: %{"value" => other}

  defp current_trace_and_span_ids() do
    case :otel_tracer.current_span_ctx() do
      :undefined -> {random_trace_id(), nil}
      span_ctx -> {:otel_span.hex_trace_id(span_ctx), :otel_span.hex_span_id(span_ctx)}
    end
  end

  defp random_trace_id(),
    do: :crypto.strong_rand_bytes(16) |> Base.encode16(case: :lower)

  defp level_to_sentry(level) do
    case level do
      :debug -> "debug"
      :info -> "info"
      :notice -> "info"
      :warning -> "warn"
      :warn -> "warn"
      :error -> "error"
      :critical -> "fatal"
      :alert -> "fatal"
      :emergency -> "fatal"
      _ -> "debug"
    end
  end

  defp severity_number(level) do
    case level_to_sentry(level) do
      "trace" -> 1
      "debug" -> 5
      "info" -> 9
      "warn" -> 13
      "error" -> 17
      "fatal" -> 21
      _ -> 9
    end
  end

  defp to_sentry(value) do
    case value do
      v when is_binary(v) -> %{"value" => v, "type" => "string"}
      v when is_boolean(v) -> %{"value" => v, "type" => "boolean"}
      v when is_integer(v) -> %{"value" => v, "type" => "integer"}
      v when is_float(v) -> %{"value" => v, "type" => "double"}
      v -> %{"value" => to_string(v), "type" => "string"}
    end
  end

  defp flatten_map(map), do: flatten_map(map, nil, %{})

  defp flatten_map(%{} = map, parent, acc) do
    Enum.reduce(map, acc, fn {k, v}, acc2 ->
      key =
        [parent, k]
        |> Enum.reject(&is_nil/1)
        |> Enum.map(&to_string/1)
        |> Enum.join(".")

      flatten_map(v, key, acc2)
    end)
  end

  defp flatten_map(value, key, acc), do: Map.put(acc, key, value)

  defp mfa_module({m, _f, _a}) when is_atom(m), do: inspect(m)
  defp mfa_module(_), do: nil
  defp mfa_function({_m, f, a}) when is_atom(f) and is_integer(a), do: "#{f}/#{a}"
  defp mfa_function(_), do: nil
end

venkatd avatar Sep 01 '25 12:09 venkatd

Thanks a bunch @venkatd , that's awesome! You just saved me from having to do the work of writing all that myself :). I'll be sure to try it out later tonight and I'll let you know how it worked out.

eriknaslund avatar Sep 01 '25 14:09 eriknaslund

@eriknaslund @venkatd I'll be working on support for structured logging very soon, first we need to decide whether we want to add support for distributed tracing before working on logs though. I've been busy with Ruby SDK lately but I'm getting back to Elixir SDK this month 🙂

solnic avatar Sep 02 '25 09:09 solnic

Quick follow-up - we need to add distributed tracing first, as it's required by the structured logging feature. Stay tuned!

solnic avatar Sep 02 '25 14:09 solnic

@venkatd I got it all working, and it's great to finally have structured logs and traces in Sentry :)

I didn't have tracing or logs set up (only crash reporting), so I had to do the following steps to get it working (hopefully useful for others trying to do the same).

Tracing and logging setup

## config.exs (or prod/dev)

# Tracing setup according to: https://docs.sentry.io/platforms/elixir/tracing/
config :opentelemetry, span_processor: {Sentry.OpenTelemetry.SpanProcessor, []}
config :opentelemetry, sampler: {Sentry.OpenTelemetry.Sampler, []}

# Configure the Sentry logger (remember to change :myapp to match your otp app).
config :myapp, :logger, [
  {:handler, :sentry_log, Ex.Sentry.LoggerHandler,
   %{
     config: %{},
     level: :warning
   }}
]
# application.ex

@impl true
def start(_type, _args) do
  # NOTE: The setup varies depending on if you're using cowboy or bandit.
  # See https://docs.sentry.io/platforms/elixir/tracing/ for more details.
  :opentelemetry_cowboy.setup()
  OpentelemetryPhoenix.setup(adapter: :cowboy2)
  OpentelemetryEcto.setup([:ls, :repo], db_statement: :enabled)

  # Add all configured logger handlers (remember to change :myapp to match your otp app).
  Logger.add_handlers(:myapp)    
  ...
# mix.exs

defp deps do
[
    # OpenTelemetry core packages.        
    {:opentelemetry, "~> 1.5"},
    {:opentelemetry_api, "~> 1.4"},
    {:opentelemetry_exporter, "~> 1.8"},
    {:opentelemetry_semantic_conventions, "~> 1.27"},
    
    # OpenTelemetry instrumentation specific libraries.
    {:opentelemetry_cowboy, "~> 1.0"},
    {:opentelemetry_ecto, "~> 1.2"},
    {:opentelemetry_phoenix, "~> 2.0"},
    ...

One change I had to make to your handler

For some reason :otel_tracer.current_span_ctx() always returned :undefined for me. This could perhaps have something to do with my otel setup, I honestly don't know. However I found that I could just fetch :otel_span_id and :otel_trace_id from meta instead. See diff below. The way I'm doing it might not be safe in all situations, but from my (limited) testing it seems that these two keys are always present.

--- orig.ex	2025-09-03 02:32:55.480225993 +0200
+++ mod.ex	2025-09-03 03:19:05.112742533 +0200
@@ -184,7 +184,6 @@
   end
 
   defp to_sentry_log(%{level: level, msg: msg, meta: meta}, state) do
-    {trace_id, parent_span_id} = current_trace_and_span_ids()
     {body, attrs_from_msg} = message_to_body_and_attrs(msg)
 
     attributes =
@@ -198,7 +197,7 @@
         "ip_address" => meta[:ip_address],
         "sentry.release" => Sentry.Config.release(),
         "sentry.environment" => Sentry.Config.environment_name(),
-        "sentry.trace.parent_span_id" => parent_span_id
+        "sentry.trace.parent_span_id" => meta[:otel_span_id]
       }
       |> Map.merge(flatten_map(state.extra_attributes))
       |> Map.merge(attrs_from_msg)
@@ -209,7 +208,7 @@
       # Sentry expects seconds since the epoch but recognizes fractional seconds
       # :logger provides microseconds in event.time
       "timestamp" => meta[:time] / 1_000_000,
-      "trace_id" => trace_id,
+      "trace_id" => meta[:otel_trace_id],
       "level" => level_to_sentry(level),
       "severity_number" => severity_number(level),
       "attributes" => attributes,
@@ -244,16 +243,6 @@
   defp normalize_report(report) when is_list(report), do: Map.new(report)
   defp normalize_report(other), do: %{"value" => other}
 
-  defp current_trace_and_span_ids() do
-    case :otel_tracer.current_span_ctx() do
-      :undefined -> {random_trace_id(), nil}
-      span_ctx -> {:otel_span.hex_trace_id(span_ctx), :otel_span.hex_span_id(span_ctx)}
-    end
-  end
-
-  defp random_trace_id(),
-    do: :crypto.strong_rand_bytes(16) |> Base.encode16(case: :lower)
-
   defp level_to_sentry(level) do
     case level do
       :debug -> "debug"

A sampling function to avoid getting too many spans into Sentry

After enabling tracing I got way too many spans pushed into Sentry. This trace sampler only includes the "top level" liveview and non-liveview requests (and their child-spans get automatically included). I know it's not related to the structured logging, but I thought I might as well share it since I wrote it :). It's just what I use for my own app, and it's not "good" in any way, so perhaps just use it as a starting point if you write your own. It has hard-coded paths and match patterns, only handling certain types of web requests etc.

## config.exs (or prod/dev)

config :sentry,
  traces_sampler: &MyappWeb.Monitoring.SentrySampler.traces_sampler/1,
  ...
defmodule MyappWeb.Monitoring.SentrySampler do
  @moduledoc """
  Functions related to sampling so we don't send too much data.
  """
  @ignored_url_paths Enum.map(MyappWeb.static_paths(), &("/" <> &1)) ++ ["/phoenix", "/health"]

  def traces_sampler(sampling_context) do
    op = sampling_context.transaction_context.op

    cond do
      # Include HTTP requests unless its' URL path starts with one of the ignored paths.
      op in [:GET, :POST, :DELETE] ->
        url_path = sampling_context.transaction_context.attributes[:"url.path"]
        ignore? = Enum.any?(@ignored_url_paths, &String.starts_with?(url_path, &1))

        if ignore?, do: 0.0, else: 1.0

      # Include all of our LiveView requests.
      String.starts_with?(op, "MyappWeb") ->
        1.0

      # Don't include other transactions.
      # It might for example be ops like "myapp.repo.query",which is emitted by OpentelemetryEcto
      # for every single query. Even though we ignore these transactions, they still get included
      # if their parent span is included, e.g. the HTTP GET request.
      true ->
        0.0
    end
  end
end

eriknaslund avatar Sep 03 '25 01:09 eriknaslund

@solnic That's awesome, thanks for the update :)

eriknaslund avatar Sep 03 '25 01:09 eriknaslund

@eriknaslund thanks for sharing the changes needed and glad it's working for you now

@solnic thanks for the hard work :)

venkatd avatar Sep 03 '25 07:09 venkatd

thanks guys 🙏 @venkatd @eriknaslund

patrols avatar Sep 07 '25 01:09 patrols

Thanks @venkatd and @eriknaslund for the code snippets!

I finally got an excuse to write elixir for my job at Sentry, and just made a PR to logflare to add support for a sentry sink: https://github.com/Logflare/logflare/pull/2787. The code that was shared here was a big help :)

AbhiPrasad avatar Sep 20 '25 03:09 AbhiPrasad