spaCy
spaCy copied to clipboard
ValueError: spacy.strings.StringStore size changed, may indicate binary incompatibility without python version or packages change
How to reproduce the behaviour
It happened suddenly after an image rebuild.
Our dockerfile looks something like this, from nvidia CUDA 12.6.2 ubuntu 22.04 image
FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
# Upgrade ubuntu packages
# Install dependencies with pip base on pyproject.toml (see complete dump at the end)
CMD ["poetry", "run", "gunicorn", "nlp.api.predict:app", "-c" ,"config/gunicorn.config.py"]
With the python file predict.py (2 files simplified into a single one)
import spacy
from fastapi import FastAPI
class Pipeline:
def __init__(self) -> None:
self.model_path = None
self.nlp = None
self.get_and_load_model()
def get_and_load_model(self):
self.get_model_from_bucket()
def load_model(self):
spacy.require_gpu(0)
self.nlp = spacy.load(self.model_path)
def get_model_from_bucket(self) -> spacy.Language:
self.model_path = self.download_folder_files()
self.load_model()
def download_folder_files(self) -> str:
self.model_path = "/tmp/model"
# Download ML model from bucket to local folder /tmp/model/
app = FastAPI()
pipeline = Pipeline()
I get the following error while spawning the server:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/gunicorn/arbiter.py", line 609, in spawn_worker
worker.init_process()
File "/usr/local/lib/python3.10/dist-packages/uvicorn/workers.py", line 66, in init_process
super(UvicornWorker, self).init_process()
File "/usr/local/lib/python3.10/dist-packages/gunicorn/workers/base.py", line 134, in init_process
self.load_wsgi()
File "/usr/local/lib/python3.10/dist-packages/gunicorn/workers/base.py", line 146, in load_wsgi
self.wsgi = self.app.wsgi()
File "/usr/local/lib/python3.10/dist-packages/gunicorn/app/base.py", line 67, in wsgi
self.callable = self.load()
File "/usr/local/lib/python3.10/dist-packages/gunicorn/app/wsgiapp.py", line 58, in load
return self.load_wsgiapp()
File "/usr/local/lib/python3.10/dist-packages/gunicorn/app/wsgiapp.py", line 48, in load_wsgiapp
return util.import_app(self.app_uri)
File "/usr/local/lib/python3.10/dist-packages/gunicorn/util.py", line 371, in import_app
mod = importlib.import_module(module)
File "/usr/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/opt/app/nlp/api/predict.py", line 17, in <module>
pipeline = Pipeline()
File "/opt/app/nlp/api/pipeline.py", line 40, in __init__
self.get_and_load_model()
File "/opt/app/nlp/api/pipeline.py", line 55, in get_and_load_model
self.get_model_from_bucket()
File "/opt/app/nlp/api/pipeline.py", line 77, in get_model_from_bucket
self.load_model()
File "/opt/app/nlp/api/pipeline.py", line 63, in load_model
self.nlp = spacy.load(self.model_path)
File "/usr/local/lib/python3.10/dist-packages/spacy/__init__.py", line 51, in load
return util.load_model(
File "/usr/local/lib/python3.10/dist-packages/spacy/util.py", line 467, in load_model
return load_model_from_path(Path(name), **kwargs) # type: ignore[arg-type]
File "/usr/local/lib/python3.10/dist-packages/spacy/util.py", line 539, in load_model_from_path
nlp = load_model_from_config(
File "/usr/local/lib/python3.10/dist-packages/spacy/util.py", line 587, in load_model_from_config
nlp = lang_cls.from_config(
File "/usr/local/lib/python3.10/dist-packages/spacy/language.py", line 1855, in from_config
nlp = lang_cls(
File "/usr/local/lib/python3.10/dist-packages/spacy/language.py", line 188, in __init__
util.registry._entry_point_factories.get_all()
File "/usr/local/lib/python3.10/dist-packages/catalogue/__init__.py", line 110, in get_all
result.update(self.get_entry_points())
File "/usr/local/lib/python3.10/dist-packages/catalogue/__init__.py", line 125, in get_entry_points
result[entry_point.name] = entry_point.load()
File "/usr/local/lib/python3.10/dist-packages/importlib_metadata/__init__.py", line 189, in load
module = import_module(match.group('module'))
File "/usr/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/opt/app/nlp/architecture/ner.py", line 7, in <module>
from .entity_recognizer import EntityRecognizer
File "nlp/architecture/entity_recognizer.pyx", line 1, in init nlp.architecture.entity_recognizer
# cython: infer_types=True, profile=True, binding=True
ValueError: spacy.strings.StringStore size changed, may indicate binary incompatibility. Expected 96 from C header, got 64 from PyObject
We tried:
- Upgrading Spacy to 3.8.2 but it does not work because this version is incompatible with
cupy-cuda12xbecause of Numpy, see https://github.com/explosion/spaCy/issues/13669 - Downgrade numpy to 1.22, not working better than 1.26.4 even though https://github.com/explosion/spaCy/issues/13528 raises the same error
Your Environment
- spaCy version: 3.7.5
- Platform: Linux-6.10.4-linuxkit-x86_64-with-glibc2.35
- Python version: 3.10.12
- Python packages:
Package Version Editable project location
--------------------------------- ------------- -------------------------
accelerate 0.25.0
aiohttp 3.9.5
aiosignal 1.3.1
altair 5.3.0
anyio 4.4.0
async-timeout 4.0.3
attrs 23.2.0
black 24.4.2
blinker 1.8.2
blis 0.7.11
build 1.2.2.post1
CacheControl 0.14.1
cachetools 5.4.0
catalogue 2.0.10
certifi 2024.7.4
cffi 1.16.0
charset-normalizer 3.3.2
cleo 2.1.0
click 8.1.7
cloudpathlib 0.18.1
confection 0.1.5
coverage 7.6.0
crashtest 0.4.1
cryptography 43.0.1
cupy-cuda12x 12.3.0
cymem 2.0.8
Cython 0.29.37
datasets 2.20.0
dill 0.3.8
distlib 0.3.9
docstring_parser 0.16
dulwich 0.21.7
exceptiongroup 1.2.2
fastapi 0.110.3
fastjsonschema 2.20.0
fastrlock 0.8.2
filelock 3.15.4
freezegun 1.5.1
frozenlist 1.4.1
fsspec 2024.5.0
gitdb 4.0.11
GitPython 3.1.43
google-api-core 2.19.1
google-api-python-client 2.137.0
google-auth 2.32.0
google-auth-httplib2 0.2.0
google-cloud-aiplatform 1.59.0
google-cloud-appengine-logging 1.4.4
google-cloud-audit-log 0.2.5
google-cloud-bigquery 3.25.0
google-cloud-core 2.4.1
google-cloud-error-reporting 1.11.0
google-cloud-kms 2.24.1
google-cloud-logging 3.10.0
google-cloud-pipeline-components 2.15.0
google-cloud-profiler 4.1.0
google-cloud-resource-manager 1.12.4
google-cloud-secret-manager 2.20.1
google-cloud-storage 2.17.0
google-crc32c 1.5.0
google-resumable-media 2.7.1
googleapis-common-protos 1.63.2
grpc-google-iam-v1 0.13.1
grpcio 1.64.1
grpcio-status 1.62.2
gunicorn 22.0.0
h11 0.14.0
httplib2 0.22.0
huggingface-hub 0.23.4
idna 3.7
importlib_metadata 8.5.0
iniconfig 2.0.0
installer 0.7.0
jaraco.classes 3.4.0
jeepney 0.8.0
Jinja2 3.1.4
jsonschema 4.23.0
jsonschema-specifications 2023.12.1
keyring 24.3.1
kfp 2.7.0
kfp-pipeline-spec 0.3.0
kfp-server-api 2.0.5
kubernetes 26.1.0
langcodes 3.4.0
language_data 1.2.0
marisa-trie 1.2.0
markdown-it-py 3.0.0
MarkupSafe 2.1.5
mdurl 0.1.2
mock 5.1.0
more-itertools 10.5.0
mpmath 1.3.0
msgpack 1.1.0
multidict 6.0.5
multiprocess 0.70.16
murmurhash 1.0.10
mypy-extensions 1.0.0
networkx 3.3
nlp 0.3.6 /opt/app
numpy 1.26.4
nvidia-cublas-cu12 12.1.3.1
nvidia-cuda-cupti-cu12 12.1.105
nvidia-cuda-nvrtc-cu12 12.1.105
nvidia-cuda-runtime-cu12 12.1.105
nvidia-cudnn-cu12 8.9.2.26
nvidia-cufft-cu12 11.0.2.54
nvidia-curand-cu12 10.3.2.106
nvidia-cusolver-cu12 11.4.5.107
nvidia-cusparse-cu12 12.1.0.106
nvidia-nccl-cu12 2.19.3
nvidia-nvjitlink-cu12 12.5.82
nvidia-nvtx-cu12 12.1.105
oauthlib 3.2.2
packaging 24.1
pandas 2.2.2
pathspec 0.12.1
pexpect 4.9.0
pillow 10.4.0
pip 24.3.1
pkginfo 1.11.2
platformdirs 4.2.2
pluggy 1.5.0
poetry 1.8.4
poetry-core 1.9.1
poetry-plugin-export 1.8.0
preshed 3.0.9
prometheus_client 0.20.0
prometheus-fastapi-instrumentator 5.11.2
proto-plus 1.24.0
protobuf 4.25.3
psutil 6.0.0
ptyprocess 0.7.0
pyarrow 16.1.0
pyarrow-hotfix 0.6
pyasn1 0.6.0
pyasn1_modules 0.4.0
pycparser 2.22
pydantic 1.10.17
pydeck 0.9.1
Pygments 2.18.0
pynvml 11.5.2
pyparsing 3.1.2
pyproject_hooks 1.2.0
pytest 7.4.4
pytest-cov 4.1.0
pytest-mock 3.14.0
python-dateutil 2.9.0.post0
python-decouple 3.8
python-dotenv 1.0.1
pytz 2024.1
PyYAML 6.0.1
RapidFuzz 3.10.1
referencing 0.35.1
regex 2024.5.15
requests 2.32.3
requests-oauthlib 2.0.0
requests-toolbelt 0.10.1
rich 13.7.1
rpds-py 0.19.0
rsa 4.9
safetensors 0.4.3
SecretStorage 3.3.3
sentencepiece 0.1.99
setuptools 70.3.0
shapely 2.0.5
shellingham 1.5.4
simplejson 3.19.2
six 1.16.0
smart-open 7.0.4
smmap 5.0.1
sniffio 1.3.1
spacy 3.7.5
spacy-alignments 0.9.1
spacy-legacy 3.0.12
spacy-loggers 1.0.5
spacy-transformers 1.3.5
srsly 2.4.8
starlette 0.37.2
streamlit 1.36.0
sympy 1.13.0
tabulate 0.9.0
tenacity 8.5.0
thinc 8.2.5
tokenizers 0.15.2
toml 0.10.2
tomli 2.0.1
tomlkit 0.13.2
toolz 0.12.1
torch 2.2.0
tornado 6.4.1
tqdm 4.66.4
transformers 4.36.2
triton 2.2.0
trove-classifiers 2024.10.21.16
typer 0.9.4
typing_extensions 4.12.2
tzdata 2024.1
uritemplate 4.1.1
urllib3 1.26.19
uvicorn 0.25.0
virtualenv 20.27.1
wasabi 1.1.3
watchdog 4.0.1
weasel 0.4.1
websocket-client 1.8.0
wheel 0.42.0
wrapt 1.16.0
xxhash 3.4.1
yarl 1.9.4
zipp 3.20.2