poetry2nix
poetry2nix copied to clipboard
Collision in unstructured[pdf] for bin/.pytesseract-wrapped
Describe the issue
Using unstructured[pdf]
doesn't seem to work with poetry2nix unfortunately:
> error: collision between `/nix/store/3i4016yzs1r0q3bpsilcmgfwzkgczrnw-python3.11-pytesseract-0.3.10/bin/.pytesseract-wrapped' and `/nix/store/35rav0x2whmy02mdy3iipr63zlcdmn7k-python3.11-unstructured-pytesseract-0.3.12/bin/.pytesseract-wrapped'
Additional context
I've tried various overrides to align versions, as unstructured[pdf]
depends on unstructured-inference==0.7.31
and unstructured-pytesseract==0.3.12
, while unstructured-inference requires pytesseract==0.3.10
. I first thought that the issue was 0.3.12 vs 0.3.10 (though poetry handles this fine). But aligning versions with overrides hasn't helped with the binary conflict unfortunately 😖.
Interestingly the nixpkgs-packaged unstructured works fine as a buildInput for a devShell on its own, but doesn't when using poetry2nix.mkPoetryEnv.extraPackages...
Any help is appreciated. 🙏
flake.nix
:
{
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
inputs.poetry2nix.url = "github:nix-community/poetry2nix";
outputs =
{
self,
nixpkgs,
poetry2nix,
}:
let
supportedSystems = [
"x86_64-linux"
"x86_64-darwin"
"aarch64-linux"
"aarch64-darwin"
];
forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
pkgs = forAllSystems (system: nixpkgs.legacyPackages.${system});
in
{
packages = forAllSystems (
system:
let
inherit (poetry2nix.lib.mkPoetry2Nix { pkgs = pkgs.${system}; }) mkPoetryApplication;
in
{
default = mkPoetryApplication { projectDir = self; };
}
);
devShells = forAllSystems (
system:
let
inherit (poetry2nix.lib.mkPoetry2Nix { pkgs = pkgs.${system}; }) mkPoetryEnv overrides;
in
{
default = pkgs.${system}.mkShellNoCC {
packages =
with pkgs.${system}; [
(mkPoetryEnv {
projectDir = self;
preferWheels = true;
overrides = overrides.withDefaults (
self: super: {
iopath = super.iopath.overridePythonAttrs (old: {
buildInputs = old.buildInputs or [ ] ++ [ pkgs.${system}.python3Packages.setuptools ];
});
}
);
})
poetry
];
};
}
);
};
}
pyproject.toml
:
[tool.poetry]
name = "sample-project"
version = "0.1.0"
description = ""
authors = ["Author Name <[email protected]>"]
# readme = "README.md"
# license = "BSD"
packages = [
{ include = "sample_package" }
]
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
unstructured = {extras = ["pdf"], version = "^0.14.0"}
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
poetry show --tree (poetry.lock was too long for GH...
unstructured 0.14.0 A library that prepares raw documents for downstream ML tasks.
├── backoff *
├── beautifulsoup4 *
│ └── soupsieve >1.2
├── chardet *
├── dataclasses-json *
│ ├── marshmallow >=3.18.0,<4.0.0
│ │ └── packaging >=17.0
│ └── typing-inspect >=0.4.0,<1
│ ├── mypy-extensions >=0.3.0
│ └── typing-extensions >=3.7.4
├── emoji *
│ └── typing-extensions *
├── filetype *
├── google-cloud-vision *
│ ├── google-api-core >=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev
│ │ ├── google-auth >=1.25.0,<3.0dev
│ │ │ ├── cachetools >=2.0.0,<6.0
│ │ │ ├── pyasn1-modules >=0.2.1
│ │ │ │ └── pyasn1 >=0.4.6,<0.7.0
│ │ │ └── rsa >=3.1.4,<5
│ │ │ └── pyasn1 >=0.1.3 (circular dependency aborted here)
│ │ ├── googleapis-common-protos >=1.52.0,<2.0dev
│ │ │ └── protobuf >=3.15.0
│ │ ├── grpcio >=1.33.2,<2.0dev
│ │ ├── grpcio-status >=1.33.2,<2.0dev
│ │ │ ├── googleapis-common-protos >=1.5.5 (circular dependency aborted here)
│ │ │ ├── grpcio >=1.63.0 (circular dependency aborted here)
│ │ │ └── protobuf >=5.26.1,<6.0dev (circular dependency aborted here)
│ │ ├── protobuf >=3.12.0 (circular dependency aborted here)
│ │ └── requests >=2.18.0,<3.0.0dev
│ │ ├── certifi >=2017.4.17
│ │ ├── charset-normalizer >=2,<4
│ │ ├── idna >=2.5,<4
│ │ └── urllib3 >=1.21.1,<3
│ └── proto-plus >=1.15.0
│ └── protobuf >=3.19.0 (circular dependency aborted here)
├── langdetect *
│ └── six *
├── lxml *
├── nltk *
│ ├── click *
│ │ └── colorama *
│ ├── joblib *
│ ├── regex >=2021.8.3
│ └── tqdm *
│ └── colorama * (circular dependency aborted here)
├── numpy *
├── onnx *
│ ├── numpy >=1.20
│ └── protobuf >=3.20.2
├── pdf2image *
│ └── pillow *
├── pdfminer-six *
│ ├── charset-normalizer >=2.0.0
│ └── cryptography >=36.0.0
│ └── cffi >=1.12
│ └── pycparser *
├── pikepdf *
│ ├── deprecated *
│ │ └── wrapt >=1.10,<2
│ ├── lxml >=4.8
│ ├── packaging *
│ └── pillow >=10.0.1
├── pillow-heif *
│ └── pillow >=9.5.0
├── pypdf *
├── python-iso639 *
├── python-magic *
├── rapidfuzz *
├── requests *
│ ├── certifi >=2017.4.17
│ ├── charset-normalizer >=2,<4
│ ├── idna >=2.5,<4
│ └── urllib3 >=1.21.1,<3
├── tabulate *
├── typing-extensions *
├── unstructured-client *
│ ├── certifi >=2023.7.22
│ ├── charset-normalizer >=3.2.0
│ ├── dataclasses-json >=0.6.4
│ │ ├── marshmallow >=3.18.0,<4.0.0
│ │ │ └── packaging >=17.0
│ │ └── typing-inspect >=0.4.0,<1
│ │ ├── mypy-extensions >=0.3.0
│ │ └── typing-extensions >=3.7.4
│ ├── deepdiff >=6.0
│ │ └── ordered-set >=4.1.0,<4.2.0
│ ├── idna >=3.4
│ ├── jsonpath-python >=1.0.6
│ ├── marshmallow >=3.19.0 (circular dependency aborted here)
│ ├── mypy-extensions >=1.0.0 (circular dependency aborted here)
│ ├── packaging >=23.1 (circular dependency aborted here)
│ ├── pypdf >=4.0
│ ├── python-dateutil >=2.8.2
│ │ └── six >=1.5
│ ├── requests >=2.31.0
│ │ ├── certifi >=2017.4.17 (circular dependency aborted here)
│ │ ├── charset-normalizer >=2,<4 (circular dependency aborted here)
│ │ ├── idna >=2.5,<4 (circular dependency aborted here)
│ │ └── urllib3 >=1.21.1,<3
│ ├── six >=1.16.0 (circular dependency aborted here)
│ ├── typing-extensions >=4.7.1 (circular dependency aborted here)
│ ├── typing-inspect >=0.9.0 (circular dependency aborted here)
│ └── urllib3 >=1.26.18 (circular dependency aborted here)
├── unstructured-inference 0.7.31
│ ├── huggingface-hub *
│ │ ├── filelock *
│ │ ├── fsspec >=2023.5.0
│ │ ├── packaging >=20.9
│ │ ├── pyyaml >=5.1
│ │ ├── requests *
│ │ │ ├── certifi >=2017.4.17
│ │ │ ├── charset-normalizer >=2,<4
│ │ │ ├── idna >=2.5,<4
│ │ │ └── urllib3 >=1.21.1,<3
│ │ ├── tqdm >=4.42.1
│ │ │ └── colorama *
│ │ └── typing-extensions >=3.7.4.3
│ ├── layoutparser *
│ │ ├── effdet *
│ │ │ ├── omegaconf >=2.0
│ │ │ │ ├── antlr4-python3-runtime ==4.9.*
│ │ │ │ └── pyyaml >=5.1.0 (circular dependency aborted here)
│ │ │ ├── pycocotools >=2.0.2
│ │ │ │ ├── matplotlib >=2.1.0
│ │ │ │ │ ├── contourpy >=1.0.1
│ │ │ │ │ │ └── numpy >=1.20
│ │ │ │ │ ├── cycler >=0.10
│ │ │ │ │ ├── fonttools >=4.22.0
│ │ │ │ │ ├── kiwisolver >=1.3.1
│ │ │ │ │ ├── numpy >=1.23 (circular dependency aborted here)
│ │ │ │ │ ├── packaging >=20.0 (circular dependency aborted here)
│ │ │ │ │ ├── pillow >=8
│ │ │ │ │ ├── pyparsing >=2.3.1
│ │ │ │ │ └── python-dateutil >=2.7
│ │ │ │ │ └── six >=1.5
│ │ │ │ └── numpy * (circular dependency aborted here)
│ │ │ ├── timm >=0.9.2
│ │ │ │ ├── huggingface-hub * (circular dependency aborted here)
│ │ │ │ ├── pyyaml * (circular dependency aborted here)
│ │ │ │ ├── safetensors *
│ │ │ │ ├── torch *
│ │ │ │ │ ├── filelock * (circular dependency aborted here)
│ │ │ │ │ ├── fsspec * (circular dependency aborted here)
│ │ │ │ │ ├── jinja2 *
│ │ │ │ │ │ └── markupsafe >=2.0
│ │ │ │ │ ├── mkl >=2021.1.1,<=2021.4.0
│ │ │ │ │ │ ├── intel-openmp ==2021.*
│ │ │ │ │ │ └── tbb ==2021.*
│ │ │ │ │ ├── networkx *
│ │ │ │ │ ├── nvidia-cublas-cu12 12.1.3.1
│ │ │ │ │ ├── nvidia-cuda-cupti-cu12 12.1.105
│ │ │ │ │ ├── nvidia-cuda-nvrtc-cu12 12.1.105
│ │ │ │ │ ├── nvidia-cuda-runtime-cu12 12.1.105
│ │ │ │ │ ├── nvidia-cudnn-cu12 8.9.2.26
│ │ │ │ │ │ └── nvidia-cublas-cu12 * (circular dependency aborted here)
│ │ │ │ │ ├── nvidia-cufft-cu12 11.0.2.54
│ │ │ │ │ ├── nvidia-curand-cu12 10.3.2.106
│ │ │ │ │ ├── nvidia-cusolver-cu12 11.4.5.107
│ │ │ │ │ │ ├── nvidia-cublas-cu12 * (circular dependency aborted here)
│ │ │ │ │ │ ├── nvidia-cusparse-cu12 *
│ │ │ │ │ │ │ └── nvidia-nvjitlink-cu12 *
│ │ │ │ │ │ └── nvidia-nvjitlink-cu12 * (circular dependency aborted here)
│ │ │ │ │ ├── nvidia-cusparse-cu12 12.1.0.106 (circular dependency aborted here)
│ │ │ │ │ ├── nvidia-nccl-cu12 2.20.5
│ │ │ │ │ ├── nvidia-nvtx-cu12 12.1.105
│ │ │ │ │ ├── sympy *
│ │ │ │ │ │ └── mpmath >=0.19
│ │ │ │ │ ├── triton 2.3.0
│ │ │ │ │ │ └── filelock * (circular dependency aborted here)
│ │ │ │ │ └── typing-extensions >=4.8.0 (circular dependency aborted here)
│ │ │ │ └── torchvision *
│ │ │ │ ├── numpy * (circular dependency aborted here)
│ │ │ │ ├── pillow >=5.3.0,<8.3.dev0 || >=8.4.dev0 (circular dependency aborted here)
│ │ │ │ └── torch 2.3.0 (circular dependency aborted here)
│ │ │ ├── torch >=1.12.1 (circular dependency aborted here)
│ │ │ └── torchvision * (circular dependency aborted here)
│ │ ├── iopath *
│ │ │ ├── portalocker *
│ │ │ │ └── pywin32 >=226
│ │ │ ├── tqdm * (circular dependency aborted here)
│ │ │ └── typing-extensions * (circular dependency aborted here)
│ │ ├── numpy * (circular dependency aborted here)
│ │ ├── opencv-python *
│ │ │ └── numpy >=1.23.5 (circular dependency aborted here)
│ │ ├── pandas *
│ │ │ ├── numpy >=1.23.2 (circular dependency aborted here)
│ │ │ ├── python-dateutil >=2.8.2 (circular dependency aborted here)
│ │ │ ├── pytz >=2020.1
│ │ │ └── tzdata >=2022.7
│ │ ├── pdf2image *
│ │ │ └── pillow * (circular dependency aborted here)
│ │ ├── pdfplumber *
│ │ │ ├── pdfminer-six 20231228
│ │ │ │ ├── charset-normalizer >=2.0.0 (circular dependency aborted here)
│ │ │ │ └── cryptography >=36.0.0
│ │ │ │ └── cffi >=1.12
│ │ │ │ └── pycparser *
│ │ │ ├── pillow >=9.1 (circular dependency aborted here)
│ │ │ └── pypdfium2 >=4.18.0
│ │ ├── pillow * (circular dependency aborted here)
│ │ ├── pytesseract *
│ │ │ ├── packaging >=21.3 (circular dependency aborted here)
│ │ │ └── pillow >=8.0.0 (circular dependency aborted here)
│ │ ├── pyyaml >=5.1 (circular dependency aborted here)
│ │ ├── scipy *
│ │ │ └── numpy >=1.22.4,<2.3 (circular dependency aborted here)
│ │ ├── torch * (circular dependency aborted here)
│ │ └── torchvision * (circular dependency aborted here)
│ ├── onnx *
│ │ ├── numpy >=1.20 (circular dependency aborted here)
│ │ └── protobuf >=3.20.2
│ ├── onnxruntime >=1.17.0
│ │ ├── coloredlogs *
│ │ │ └── humanfriendly >=9.1
│ │ │ └── pyreadline3 *
│ │ ├── flatbuffers *
│ │ ├── numpy >=1.21.6 (circular dependency aborted here)
│ │ ├── packaging * (circular dependency aborted here)
│ │ ├── protobuf * (circular dependency aborted here)
│ │ └── sympy * (circular dependency aborted here)
│ ├── opencv-python !=4.7.0.68 (circular dependency aborted here)
│ ├── python-multipart *
│ ├── rapidfuzz *
│ └── transformers >=4.25.1
│ ├── filelock * (circular dependency aborted here)
│ ├── huggingface-hub >=0.23.0,<1.0 (circular dependency aborted here)
│ ├── numpy >=1.17 (circular dependency aborted here)
│ ├── packaging >=20.0 (circular dependency aborted here)
│ ├── pyyaml >=5.1 (circular dependency aborted here)
│ ├── regex !=2019.12.17
│ ├── requests * (circular dependency aborted here)
│ ├── safetensors >=0.4.1 (circular dependency aborted here)
│ ├── tokenizers >=0.19,<0.20
│ │ └── huggingface-hub >=0.16.4,<1.0 (circular dependency aborted here)
│ └── tqdm >=4.27 (circular dependency aborted here)
├── unstructured-pytesseract >=0.3.12
│ ├── packaging >=21.3
│ └── pillow >=8.0.0
└── wrapt *