poetry2nix icon indicating copy to clipboard operation
poetry2nix copied to clipboard

python38Packages.apache-airflow: Webserver fails to start with "No module named 'pkg_resources'"

Open GuillaumeDesforges opened this issue 4 years ago • 6 comments
trafficstars

Bug description

$ airflow webserver -p 8080
  ____________       _____________
 ____    |__( )_________  __/__  /________      __
____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /
___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /
 _/_/  |_/_/  /_/    /_/    /_/  \____/____/|__/
[2020-12-02 12:20:53,022] {__init__.py:50} INFO - Using executor SequentialExecutor
[2020-12-02 12:20:53,022] {dagbag.py:417} INFO - Filling up the DagBag from /home/arsleust/polaranalytics/polar-airflow-etl/.airflow/dags
/nix/store/7rsjwcqvvmh8glk79j5wmn3zm5hw8xvi-python3-3.8.6-env/lib/python3.8/site-packages/airflow/models/dag.py:1337: PendingDeprecationWarning: The requested task could not be added to the DAG because a task with task_id create_tag_template_field_result is already in the DAG. Starting in Airflow 2.0, trying to overwrite a task will raise an exception.
  warnings.warn(
Running the Gunicorn Server with:
Workers: 4 sync
Host: 0.0.0.0:8080
Timeout: 120
Logfiles: - -
=================================================================            
Traceback (most recent call last):
  File "/nix/store/gw3bb8ab28fxkg9m43vvgjydynlbig38-python3.8-gunicorn-20.0.4/bin/.gunicorn-wrapped", line 6, in <module>
    from gunicorn.app.wsgiapp import run
  File "/nix/store/gw3bb8ab28fxkg9m43vvgjydynlbig38-python3.8-gunicorn-20.0.4/lib/python3.8/site-packages/gunicorn/app/wsgiapp.py", line 9, in <module>
    from gunicorn.app.base import Application
  File "/nix/store/gw3bb8ab28fxkg9m43vvgjydynlbig38-python3.8-gunicorn-20.0.4/lib/python3.8/site-packages/gunicorn/app/base.py", line 11, in <module>
    from gunicorn import util
  File "/nix/store/gw3bb8ab28fxkg9m43vvgjydynlbig38-python3.8-gunicorn-20.0.4/lib/python3.8/site-packages/gunicorn/util.py", line 26, in <module>
    import pkg_resources
ModuleNotFoundError: No module named 'pkg_resources'

Reproduce

pyproject.toml

[tool.poetry.dependencies]
python = "3.8"
apache-airflow = "1.10.12"

do poetry lock

shell.nix

let
  pkgs = import <nixpkgs> {};

  pythonPackages = pkgs.python38Packages;
  python = pythonPackages.python;

  inherit (pkgs) poetry2nix;

  python-env = poetry2nix.mkPoetryEnv {
    projectDir = ./.;
    inherit python;
    overrides = poetry2nix.overrides.withoutDefaults
      (
        import ./nix/overrides.nix {
          inherit pkgs;
        }
      );
  };

in
pkgs.mkShell {
  buildInputs = [
    pkgs.poetry
    python-env
  ];

  AIRFLOW_HOME = "${toString ./.airflow}";
}

./nix/overrides.nix

{ pkgs }:
self: super: {
  # Package fixes
  numpy = super.numpy.overridePythonAttrs (
    old: {
      nativeBuildInputs = old.nativeBuildInputs ++ [
        self.cython
      ];
    }
  );
  flask-openid = super.flask-openid.overridePythonAttrs (
    old: {
      propagatedBuildInputs = old.propagatedBuildInputs ++ [
        self.python3-openid
      ];
    }
  );
  python3-openid = super.python3-openid.overridePythonAttrs (
    old: rec {
      meta = old.meta // {
        priority = 1;
      };
    }
  );
  tenacity = super.tenacity.overridePythonAttrs (
    old: {
      propagatedBuildInputs = old.propagatedBuildInputs ++ [
        self.pbr
      ];
    }
  );
  python-daemon = super.python-daemon.overridePythonAttrs (
    old: {
      propagatedBuildInputs = old.propagatedBuildInputs ++ [
        self.twine
      ];
    }
  );
  croniter = super.croniter.overridePythonAttrs (
    old: {
      propagatedBuildInputs = old.propagatedBuildInputs ++ [
        self.pbr
      ];
    }
  );
  lockfile = super.lockfile.overridePythonAttrs (
    old: {
      propagatedBuildInputs = old.propagatedBuildInputs ++ [
        self.pbr
      ];
    }
  );
  pandas = super.pandas.overridePythonAttrs (
    old: {
      nativeBuildInputs = old.nativeBuildInputs ++ [
        self.cython
      ];
    }
  );
  apache-airflow = super.apache-airflow.overridePythonAttrs (
    old: {
      propagatedBuildInputs = old.propagatedBuildInputs ++ [
        self.bowler
      ];
    }
  );

  # =========================================
  # new packages
  # =========================================

  bowler = super.buildPythonPackage
    rec {
      pname = "bowler";
      version = "0.9.0";

      src = super.fetchPypi {
        inherit pname version;
        sha256 = "1ns57vb3b7ws07950p1m55a14sibmwfmsxaxl415hm5xwzi5rf6d";
      };

      propagatedBuildInputs = [
        self.attrs
        self.click
        self.fissix
        self.moreorless
        self.volatile
      ];

      doCheck = false;
    };
  fissix = super.buildPythonPackage
    rec {
      pname = "fissix";
      version = "20.8.0";

      src = super.fetchPypi {
        inherit pname version;
        sha256 = "06jp0ph0wxaixxawbhxh5nzqid5q5q081cdip0lg7bwvmij0ms6n";
      };

      propagatedBuildInputs = [
        self.appdirs
      ];

      doCheck = false;
    };
  moreorless = super.buildPythonPackage
    rec {
      pname = "moreorless";
      version = "0.3.0";

      src = super.fetchPypi {
        inherit pname version;
        sha256 = "1x3qyry3q12in421nrh4xaqjdzr6zmkc9h1gbbi91rf1sq5my72w";
      };

      nativeBuildInputs = [
        self.setuptools_scm
      ];

      propagatedBuildInputs = [
        self.parameterized
        self.click
        self.volatile
        self.parameterized
      ];

      doCheck = false;
    };
  volatile = super.buildPythonPackage
    rec {
      pname = "volatile";
      version = "2.1.0";

      src = super.fetchPypi {
        inherit pname version;
        sha256 = "1lri7a6pmlx9ghbrsgd702c3n862glwy0p8idh0lwdg313anmqwv";
      };

      doCheck = false;
    };
  psycopg2 = super.buildPythonPackage
    rec {
      pname = "psycopg2";
      version = "2.8.6";

      src = super.fetchPypi {
        inherit pname version;
        sha256 = "0hzmk6b1hb5riqkljr5xics6p4zbvmis6knbczb7zhq7273zc8zv";
      };

      nativeBuildInputs = [ pkgs.postgresql ];

      doCheck = false;
    };
}

Investigation

$ which airflow
/nix/store/7rsjwcqvvmh8glk79j5wmn3zm5hw8xvi-python3-3.8.6-env/bin/airflow

has content

#! /nix/store/k8p54jg8ipvnfz435mayf5bnqhw4qqap-bash-4.4-p23/bin/bash -e
export NIX_PYTHONPREFIX='/nix/store/7rsjwcqvvmh8glk79j5wmn3zm5hw8xvi-python3-3.8.6-env'
export NIX_PYTHONEXECUTABLE='/nix/store/7rsjwcqvvmh8glk79j5wmn3zm5hw8xvi-python3-3.8.6-env/bin/python3.8'
export NIX_PYTHONPATH='/nix/store/7rsjwcqvvmh8glk79j5wmn3zm5hw8xvi-python3-3.8.6-env/lib/python3.8/site-packages'
export PYTHONNOUSERSITE='true'
exec "/nix/store/9mcgpb3z1pnfma0n0hmxaiddi6wnlpdl-python3.8-apache-airflow-1.10.12/bin/airflow"  "$@"

but /nix/store/9mcgpb3z1pnfma0n0hmxaiddi6wnlpdl-python3.8-apache-airflow-1.10.12/bin/airflow starts with

#! /nix/store/k8p54jg8ipvnfz435mayf5bnqhw4qqap-bash-4.4-p23/bin/bash -e
export PATH='/nix/store/346skv0d24rqnf4npknbp9h5bs14j8zy-python3-3.8.6/bin:/nix/store/9mcgpb3z1pnfma0n0hmxaiddi6wnlpdl-python3.8-apache-airflow-1.10.12/bin:/nix/store/s87gb07v6hnj19f1kh517idhr6cgq4hm-python3.8-alembic-1.4.3/bin:/nix/store/mi0bwf9w03ywv1gnbldx8bkxsqhdd4x8-python3.8-mako-1.1.3/bin:/nix/store/v9ayrnp6das51iz467kl1pz6q1r0n3d0-python3.8-argcomplete-1.12.2/bin:/nix/store/6cfj2a1xiccw3icl604ihkyw8vbaci1j-python3.8-natsort-7.1.0/bin:/nix/store/kxj2x1rfvdv2q68wwxhifv5g0x3izyqx-python3.8-pbr-5.4.5/bin:/nix/store/9jifi44sf5icd698ilkdsnr39wf895i5-python3.8-dill-0.3.3/bin:/nix/store/7srgwbi32qb2xbn4xwnvdna5s9gmr5i5-python3.8-email-validator-1.1.2/bin:/nix/store/l9697s5fax6yhaz4myarc14iwzi8aghb-python3.8-flask-1.1.2/bin:/nix/store/v1p1aa4z2kvzjzk13ra7mhcbyl1bqiia-python3.8-flask-appbuilder-2.3.4/bin:/nix/store/wj2sr5ay4g56wgvjywckg7nhpzahwa9p-python3.8-babel-2.9.0/bin:/nix/store/ywn8zcz3n9i7hpas4d7lxy5hdndyhlkr-python3.8-pyjwt-1.7.1/bin:/nix/store/8r5az3pxld7n7sz3f7n59n0fr0q4x8cq-python3.8-jsonschema-3.2.0/bin:/nix/store/0yclxxs10bpnwff2wbvz3cwn4az9jv1y-python3.8-flask-swagger-0.2.14/bin:/nix/store/1z5a8dmc9hwwzzfm33llc0r6rb5vgpjs-python3.8-future-0.18.2/bin:/nix/store/gw3bb8ab28fxkg9m43vvgjydynlbig38-python3.8-gunicorn-20.0.4/bin:/nix/store/qpcdvi62b7gb8cq2kglm4baj35dn8sci-python3.8-json-merge-patch-0.2/bin:/nix/store/q04yn44fgpx17gnrigkjl17m2cwz9b8i-python3.8-markdown-2.6.11/bin:/nix/store/5d5vzi90k4lwfw5d5xccfrs6ny6m0j1y-python3.8-numpy-1.19.4/bin:/nix/store/bhin8cxywcplvcam3kadxqhhzpzwd9p2-python3.8-pygments-2.7.2/bin:/nix/store/ha5jl5261n1f7zq6xikak8n4b8cvpml5-python3.8-docutils-0.16/bin:/nix/store/wpckcwhmb24xjvypl0p7rkwl4ji6v8sy-python3.8-twine-3.2.0/bin:/nix/store/r7ixhz1sh1vpbk6l93giqcns5x3pcr3m-python3.8-keyring-21.3.1/bin:/nix/store/mwbwc9w3mnqg0gwzp5y5m8n44wqmmpgs-python3.8-pkginfo-1.5.0.1/bin:/nix/store/2harf52n7qchz467wlpr6k0f2x5wyzl6-python3.8-setuptools-47.3.1/bin:/nix/store/2fnsdrp7bgh0n5arx06cjyzs2h9hqzs5-python3.8-chardet-3.0.4/bin:/nix/store/vr2r9l3lddmz7w9q450h6dpyhfggij97-python3.8-tqdm-4.48.2/bin:/nix/store/74pazj5cl7wc8kbdfvw3hw41d0ick2kf-python3.8-python-nvd3-0.15.0/bin:/nix/store/k1ys8x4fl900iwvlgdpbci7qf8hlplls-python3.8-python-slugify-4.0.1/bin:/nix/store/d0v3b4ji92f5xnf45cm422b2f6x8wvhb-python3.8-tabulate-0.8.7/bin:/nix/store/6yy7wz93lbvr5r4776x3qz6yrcykxkml-python3.8-bowler-0.9.0/bin'${PATH:+':'}$PATH
export PYTHONNOUSERSITE='true'
exec -a "$0" "/nix/store/9mcgpb3z1pnfma0n0hmxaiddi6wnlpdl-python3.8-apache-airflow-1.10.12/bin/.airflow-wrapped"  "$@"

so it may not use the right Python ?

GuillaumeDesforges avatar Dec 02 '20 11:12 GuillaumeDesforges

Seems like setuptools is missing. Gunicorn does declare setuptools as a dependency (both wheel and sdist releases), therefore it should be in the final environment. Not sure what's going wrong.

DavHau avatar Dec 03 '20 18:12 DavHau

My hypothesis is that airflow spawns processes, and spawned processes do not live with the right PYTHON_PATH

GuillaumeDesforges avatar Dec 03 '20 19:12 GuillaumeDesforges

I'm also having this issue of setuptools being missing. Specifically for slugify. Should I make a new issue?

Suya1671 avatar Feb 08 '23 18:02 Suya1671

llama-index has the same pkg_resources error. I resolved it with

defaultPoetryOverrides.extend (self: super: {
  llama-index = super.llama-index.overridePythonAttrs (old: {
    propagatedBuildInputs = (old.propagatedBuildInputs or [ ])
      ++ [ super.setuptools ];
  });
});

buildInputs is not sufficient, it has to be in propagatedBuildInputs.

I'm not sure if that applies to airflow, but maybe this will help some searchers.

john-shaffer avatar Mar 14 '23 19:03 john-shaffer

@john-shaffer do you understand why buildInputs wasn't sufficient?

asymmetric avatar Mar 31 '23 14:03 asymmetric

@john-shaffer do you understand why buildInputs wasn't sufficient?

No, I would have expected setuptools to be available like any other dependency.

john-shaffer avatar Mar 31 '23 15:03 john-shaffer