clearml-server
clearml-server copied to clipboard
Windows 10 Mongo DB fassert failure
Hi,
I just installed clearml-server docker container for windows 10 and all containers start correctly without error. However, if I stop mongo or all of the clearml containers, Mongo fails to restart (continuous loop of restarts):
{"t":{"$date":"2022-09-15T21:48:04.152+00:00"},"s":"I", "c":"CONTROL", "id":23285, "ctx":"main","msg":"Automatically disabling TLS 1.0, to force-enable TLS 1.0 specify --sslDisabledProtocols 'none'"}
{"t":{"$date":"2022-09-15T21:48:04.154+00:00"},"s":"W", "c":"ASIO", "id":22601, "ctx":"main","msg":"No TransportLayer configured during NetworkInterface startup"}
{"t":{"$date":"2022-09-15T21:48:04.154+00:00"},"s":"I", "c":"NETWORK", "id":4648601, "ctx":"main","msg":"Implicit TCP FastOpen unavailable. If TCP FastOpen is required, set tcpFastOpenServer, tcpFastOpenClient, and tcpFastOpenQueueSize."}
{"t":{"$date":"2022-09-15T21:48:04.155+00:00"},"s":"I", "c":"STORAGE", "id":4615611, "ctx":"initandlisten","msg":"MongoDB starting","attr":{"pid":1,"port":27017,"dbPath":"/data/db","architecture":"64-bit","host":"e9d26a6ce632"}}
{"t":{"$date":"2022-09-15T21:48:04.155+00:00"},"s":"I", "c":"CONTROL", "id":23403, "ctx":"initandlisten","msg":"Build Info","attr":{"buildInfo":{"version":"4.4.9","gitVersion":"b4048e19814bfebac717cf5a880076aa69aba481","openSSLVersion":"OpenSSL 1.1.1f 31 Mar 2020","modules":[],"allocator":"tcmalloc","environment":{"distmod":"ubuntu2004","distarch":"x86_64","target_arch":"x86_64"}}}}
{"t":{"$date":"2022-09-15T21:48:04.155+00:00"},"s":"I", "c":"CONTROL", "id":51765, "ctx":"initandlisten","msg":"Operating System","attr":{"os":{"name":"Ubuntu","version":"20.04"}}}
{"t":{"$date":"2022-09-15T21:48:04.155+00:00"},"s":"I", "c":"CONTROL", "id":21951, "ctx":"initandlisten","msg":"Options set by command line","attr":{"options":{"net":{"bindIp":"*"},"setParameter":{"internalQueryMaxBlockingSortMemoryUsageBytes":"196100200"}}}}
{"t":{"$date":"2022-09-15T21:48:04.161+00:00"},"s":"I", "c":"STORAGE", "id":22270, "ctx":"initandlisten","msg":"Storage engine to use detected by data files","attr":{"dbpath":"/data/db","storageEngine":"wiredTiger"}}
{"t":{"$date":"2022-09-15T21:48:04.166+00:00"},"s":"I", "c":"STORAGE", "id":22315, "ctx":"initandlisten","msg":"Opening WiredTiger","attr":{"config":"create,cache_size=3467M,session_max=33000,eviction=(threads_min=4,threads_max=4),config_base=false,statistics=(fast),log=(enabled=true,archive=true,path=journal,compressor=snappy),file_manager=(close_idle_time=100000,close_scan_interval=10,close_handle_minimum=250),statistics_log=(wait=0),verbose=[recovery_progress,checkpoint_progress,compact_progress],"}}
{"t":{"$date":"2022-09-15T21:48:04.630+00:00"},"s":"E", "c":"STORAGE", "id":22435, "ctx":"initandlisten","msg":"WiredTiger error","attr":{"error":1,"message":"[1663278484:630141][1:0x7f351e44ccc0], file:WiredTiger.wt, connection: __posix_open_file, 805: /data/db/WiredTiger.wt: handle-open: open: Operation not permitted"}}
{"t":{"$date":"2022-09-15T21:48:04.668+00:00"},"s":"E", "c":"STORAGE", "id":22435, "ctx":"initandlisten","msg":"WiredTiger error","attr":{"error":1,"message":"[1663278484:668109][1:0x7f351e44ccc0], file:WiredTiger.wt, connection: __posix_open_file, 805: /data/db/WiredTiger.wt: handle-open: open: Operation not permitted"}}
{"t":{"$date":"2022-09-15T21:48:04.705+00:00"},"s":"E", "c":"STORAGE", "id":22435, "ctx":"initandlisten","msg":"WiredTiger error","attr":{"error":1,"message":"[1663278484:705045][1:0x7f351e44ccc0], file:WiredTiger.wt, connection: __posix_open_file, 805: /data/db/WiredTiger.wt: handle-open: open: Operation not permitted"}}
{"t":{"$date":"2022-09-15T21:48:04.707+00:00"},"s":"W", "c":"STORAGE", "id":22347, "ctx":"initandlisten","msg":"Failed to start up WiredTiger under any compatibility version. This may be due to an unsupported upgrade or downgrade."}
{"t":{"$date":"2022-09-15T21:48:04.707+00:00"},"s":"F", "c":"STORAGE", "id":28595, "ctx":"initandlisten","msg":"Terminating.","attr":{"reason":"1: Operation not permitted"}}
{"t":{"$date":"2022-09-15T21:48:04.707+00:00"},"s":"F", "c":"-", "id":23091, "ctx":"initandlisten","msg":"Fatal assertion","attr":{"msgid":28595,"file":"src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp","line":954}}
{"t":{"$date":"2022-09-15T21:48:04.707+00:00"},"s":"F", "c":"-", "id":23092, "ctx":"initandlisten","msg":"\n\n***aborting after fassert() failure\n\n"}
I have confirmed that the "WiredTiger.wt" file does exist in the local filesystem:
C:\opt\clearml\data\mongo_4\db\WiredTiger.wt
There appears to be an article regarding this type of error on medium but I am not sure how to implement the solution:
https://medium.com/vena-engineering/the-case-of-the-hidden-mongo-data-8e8ae25f85a8
Thanks for any help
Hi @jmrichardson,
Sorry, missed this 🙁 I'll try to take a look in the coming days, apologies 🙏
Hi @jmrichardson , can you please share the volumes mapping that you have for the mongo service in the docker-compose? Another question: is it a clean installation or upgrade from the previous version of clearml? If it is an upgrade then from what was the previous version of clearml (or mongo)?
Hi @evg-allegro
This was a clean, new installation on Win10. I have not made any changes to the yml file but it's attached for reference. I am only using it on win10 for dev purposes and running live on Linux so this is not a major issue for me because I am not worried about losing my work in dev. Thought you would like to know about it anyways and appreciate the help
version: "3.6"
services:
apiserver:
command:
- apiserver
container_name: clearml-apiserver
image: allegroai/clearml:latest
restart: unless-stopped
volumes:
- c:/opt/clearml/logs:/var/log/clearml
- c:/opt/clearml/config:/opt/clearml/config
- c:/opt/clearml/data/fileserver:/mnt/fileserver
depends_on:
- redis
- mongo
- elasticsearch
- fileserver
environment:
CLEARML_ELASTIC_SERVICE_HOST: elasticsearch
CLEARML_ELASTIC_SERVICE_PORT: 9200
CLEARML_ELASTIC_SERVICE_PASSWORD: ${ELASTIC_PASSWORD}
CLEARML_MONGODB_SERVICE_HOST: mongo
CLEARML_MONGODB_SERVICE_PORT: 27017
CLEARML_REDIS_SERVICE_HOST: redis
CLEARML_REDIS_SERVICE_PORT: 6379
CLEARML_SERVER_DEPLOYMENT_TYPE: ${CLEARML_SERVER_DEPLOYMENT_TYPE:-win10}
CLEARML__apiserver__pre_populate__enabled: "true"
CLEARML__apiserver__pre_populate__zip_files: "/opt/clearml/db-pre-populate"
CLEARML__apiserver__pre_populate__artifacts_path: "/mnt/fileserver"
ports:
- "8008:8008"
networks:
- backend
- frontend
elasticsearch:
networks:
- backend
container_name: clearml-elastic
environment:
ES_JAVA_OPTS: -Xms2g -Xmx2g -Dlog4j2.formatMsgNoLookups=true
ELASTIC_PASSWORD: ${ELASTIC_PASSWORD}
bootstrap.memory_lock: "true"
cluster.name: clearml
cluster.routing.allocation.node_initial_primaries_recoveries: "500"
cluster.routing.allocation.disk.watermark.low: 500mb
cluster.routing.allocation.disk.watermark.high: 500mb
cluster.routing.allocation.disk.watermark.flood_stage: 500mb
discovery.zen.minimum_master_nodes: "1"
discovery.type: "single-node"
http.compression_level: "7"
node.ingest: "true"
node.name: clearml
reindex.remote.whitelist: '*.*'
xpack.monitoring.enabled: "false"
xpack.security.enabled: "false"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
restart: unless-stopped
volumes:
- c:/opt/clearml/data/elastic_7:/usr/share/elasticsearch/data
- /usr/share/elasticsearch/logs
fileserver:
networks:
- backend
- frontend
command:
- fileserver
container_name: clearml-fileserver
image: allegroai/clearml:latest
restart: unless-stopped
volumes:
- c:/opt/clearml/logs:/var/log/clearml
- c:/opt/clearml/data/fileserver:/mnt/fileserver
- c:/opt/clearml/config:/opt/clearml/config
ports:
- "8081:8081"
mongo:
networks:
- backend
container_name: clearml-mongo
image: mongo:4.4.9
restart: unless-stopped
command: --setParameter internalQueryMaxBlockingSortMemoryUsageBytes=196100200
volumes:
- c:/opt/clearml/data/mongo_4/db:/data/db
- c:/opt/clearml/data/mongo_4/configdb:/data/configdb
redis:
networks:
- backend
container_name: clearml-redis
image: redis:5.0
restart: unless-stopped
volumes:
- c:/opt/clearml/data/redis:/data
webserver:
command:
- webserver
container_name: clearml-webserver
image: allegroai/clearml:latest
restart: unless-stopped
volumes:
- c:/clearml/logs:/var/log/clearml
depends_on:
- apiserver
ports:
- "8080:80"
networks:
- backend
- frontend
networks:
backend:
driver: bridge
frontend:
name: frontend
driver: bridge