HPCCloud-deploy
HPCCloud-deploy copied to clipboard
No SSH key is provided at cluster creation
Hello,
I tried to add a new cluster to my HPCCloud installation. I selected the ComputeNode preset, changed the name of the cluster and pressed the Save button. From here, nothing happened.
I took a look at the network calls and found that two requests were made:
- http://localhost:8888/api/v1/clusters, POST, 201 Created:
{
"_id":"5a607fd80640fd00e5c661df",
"config":{
"host":"192.168.100.100",
"hydra":{
"executablePath":"/some/path/fake"
},
"jobOutputDir":"/home/demo",
"numberOfSlots":1,
"nwchem":{
"enable":false
},
"openfoam":{
"enable":true
},
"parallelEnvironment":"",
"paraview":{
"installDir":"/opt/paraview"
},
"pyfr":{
"cuda":false,
"opencl":[
],
"openmp":[
{
"cblas":"/usr/lib/libblas/libblas.so",
"cblas-type":"",
"cc":"",
"cflags":"",
"name":"Default"
}
]
},
"scheduler":{
"defaultQueue":"",
"maxWallTime":{
"hours":0,
"minutes":0,
"seconds":0
},
"pbs":{
"numberOfCoresPerNode":1,
"numberOfGpusPerNode":0,
"numberOfNodes":1
},
"sge":{
"numberOfGpusPerNode":0,
"numberOfSlots":1
},
"slurm":{
"numberOfCoresPerNode":1,
"numberOfGpusPerNode":0,
"numberOfNodes":1
},
"type":"sge"
},
"ssh":{
"key":"5a607fd80640fd00e5c661df",
"user":"demo"
}
},
"name":"Test",
"status":"creating",
"type":"trad",
"userId":"5a5f67d40640fd70eb473e7d"
}
- http://localhost:8888/api/v1/clusters, GET, 200 OK:
[
{
"_id":"5a607fd80640fd00e5c661df",
"config":{
"host":"192.168.100.100",
"hydra":{
"executablePath":"/some/path/fake"
},
"jobOutputDir":"/home/demo",
"numberOfSlots":1,
"nwchem":{
"enable":false
},
"openfoam":{
"enable":true
},
"parallelEnvironment":"",
"paraview":{
"installDir":"/opt/paraview"
},
"pyfr":{
"cuda":false,
"opencl":[
],
"openmp":[
{
"cblas":"/usr/lib/libblas/libblas.so",
"cblas-type":"",
"cc":"",
"cflags":"",
"name":"Default"
}
]
},
"scheduler":{
"defaultQueue":"",
"maxWallTime":{
"hours":0,
"minutes":0,
"seconds":0
},
"pbs":{
"numberOfCoresPerNode":1,
"numberOfGpusPerNode":0,
"numberOfNodes":1
},
"sge":{
"numberOfGpusPerNode":0,
"numberOfSlots":1
},
"slurm":{
"numberOfCoresPerNode":1,
"numberOfGpusPerNode":0,
"numberOfNodes":1
},
"type":"sge"
},
"ssh":{
"key":"5a607fd80640fd00e5c661df",
"user":"demo"
}
},
"name":"Test",
"status":"creating",
"type":"trad",
"userId":"5a5f67d40640fd70eb473e7d"
}
]
Updating the page later, the cluster is still stuck in the creating status.
Thanks for your help !
Looking at Celery logs on the VM, I found this error:
[2018-01-18 11:07:05,214: ERROR/MainProcess] Task cumulus.ssh.tasks.key.generate_key_pair[a30f3157-22a7-43ac-b3d1-f5e010847cad] raised unexpected: ConnectionError(ProtocolError('Connection aborted.', BadStatusLine("''",)),)
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/celery/app/trace.py", line 438, in __protected_call__
return self.run(*args, **kwargs)
File "/opt/hpccloud/cumulus/cumulus/ssh/tasks/key.py", line 80, in generate_key_pair
json={'status': 'error'})
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 137, in patch
return request('patch', url, data=data, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 57, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 475, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 585, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 453, in send
raise ConnectionError(err, request=request)
ConnectionError: ('Connection aborted.', BadStatusLine("''",))
BadStatusLine, suggest that the data requests is receiving doesn't make sense. If you try the creation again do you get the same issue?
@cjh1, Yes, I tried multiple times and I got the same result each time.
@felixveysseyre From within the main VM can you ping 192.168.100.100 ( which is the compute VM )?
Actually the failure is when talking to Girder not the compute VM. From within the main VM what happens if you do a curl http://localhost:8080/
. Also take a look in /home/hpccloud/.girder/logs/error.log
for any errors issued by Girder.
@cjh1
- curl http://localhost:8080/ address:
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Girder</title>
<link rel="stylesheet" href="static/built/googlefonts.css">
<link rel="stylesheet" href="static/built/fontello/css/fontello.css">
<link rel="stylesheet" href="static/built/fontello/css/animation.css">
<link rel="stylesheet" href="static/built/girder_lib.min.css">
<link rel="icon" type="image/png" href="static/img/Girder_Favicon.png">
</head>
<body>
<div id="g-global-info-apiroot" class="hide">api/v1</div>
<div id="g-global-info-staticroot" class="hide">static</div>
<script src="static/built/girder_lib.min.js"></script>
<script src="static/built/girder_app.min.js"></script>
<script type="text/javascript">
$(function () {
girder.events.trigger('g:appload.before');
girder.app = new girder.views.App({
el: 'body',
parentView: null,
contactEmail: 'kitware\u0040kitware\u002Ecom',
brandName: 'Girder',
bannerColor: '\u00233F3B3B',
registrationPolicy: 'open',
enablePasswordLogin: true
}).render();
girder.events.trigger('g:appload.after', girder.app);
});
</script>
</body>
</html>
-
/home/hpccloud/.girder/logs/error.log
:
empty
-
/home/hpccloud/.girder/logs/info.log
[2018-01-19 09:48:07,578] INFO: Running in mode: development
[2018-01-19 09:48:07,799] INFO: Connecting to MongoDB: mongodb://localhost:27017/girder
[2018-01-19 09:48:07,823] INFO: Resolving plugin dependencies...
[2018-01-19 09:48:07,825] INFO: Loaded plugin "pvwproxy"
[2018-01-19 09:48:07,830] INFO: Loaded plugin "hpccloud"
[2018-01-19 09:48:07,867] INFO: Loaded plugin "taskflow"
[2018-01-19 09:48:07,908] INFO: Loaded plugin "cumulus"
[2018-01-19 09:48:07,909] INFO: Loaded plugin "sftp"
[2018-01-19 09:48:07,911] INFO: Loaded plugin "newt"
[19/Jan/2018:09:48:07] ENGINE Bus STARTING
[2018-01-19 09:48:07,912] INFO: Started asynchronous event manager thread.
[19/Jan/2018:09:48:07] ENGINE Started monitor thread '_TimeoutMonitor'.
[19/Jan/2018:09:48:07] ENGINE Started monitor thread 'Autoreloader'.
[19/Jan/2018:09:48:08] ENGINE Serving on http://127.0.0.1:8080
[19/Jan/2018:09:48:08] ENGINE Bus STARTED
[2018-01-19 09:54:32,953] INFO: cumulus.ssh.tasks.key.generate_key_pair
-
/var/log/celery/command.log
:
[2018-01-19 09:48:10,433: INFO/MainProcess] Connected to amqp://guest:**@127.0.0.1:5672//
[2018-01-19 09:48:10,438: INFO/MainProcess] mingle: searching for neighbors
[2018-01-19 09:48:11,449: INFO/MainProcess] mingle: all alone
[2018-01-19 09:48:11,475: WARNING/MainProcess] command@vagrant-ubuntu-trusty-64 ready.
[2018-01-19 09:54:32,968: INFO/MainProcess] Received task: cumulus.ssh.tasks.key.generate_key_pair[45ed647a-4127-401b-bc8e-cd364eb680a5]
[2018-01-19 09:54:33,189: INFO/Worker-3] Starting new HTTP connection (1): Kitwares-iMac.local
[2018-01-19 09:54:33,475: INFO/Worker-3] Starting new HTTP connection (1): Kitwares-iMac.local
[2018-01-19 09:54:33,501: ERROR/MainProcess] Task cumulus.ssh.tasks.key.generate_key_pair[45ed647a-4127-401b-bc8e-cd364eb680a5] raised unexpected: ConnectionError(ProtocolError('Connection aborted.', BadStatusLine("''",)),)
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/celery/app/trace.py", line 438, in __protected_call__
return self.run(*args, **kwargs)
File "/opt/hpccloud/cumulus/cumulus/ssh/tasks/key.py", line 80, in generate_key_pair
json={'status': 'error'})
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 137, in patch
return request('patch', url, data=data, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 57, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 475, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 585, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 453, in send
raise ConnectionError(err, request=request)
ConnectionError: ('Connection aborted.', BadStatusLine("''",))
Hi,
I had the same issue (no SSH key provided at cluster creation) when using the prebuilt VMs. I'm not sure if it is the same case, but I fixed it modifying the existing hostname in /opt/hpccloud/cumulus/cumulus/conf/config.json
for the hpccloud-server VM:
- “baseUrl": "http://Melmac2.local:8080/api/v1”,
+ “baseUrl": "http://localhost:8080/api/v1",
I found later that this is also mentioned in the HPCCloud deployment with Vagrant: https://kitware.github.io/HPCCloud/docs/vagrant__introduction.html#Fixing-invalid-Hostname.
With this baseUrl
change, the key pair is now shown when saving the cluster preferences.