gremlin-visualizer
gremlin-visualizer copied to clipboard
Cosmos DB Graph proxy server
Hi @prabushitha, thanks for the great work on the visualizer. I had some issues with Microsoft Cosmos DB, because of the executed query ${query}${nodeLimitQuery}.dedup().as('node').project('id', 'label', 'properties', 'edges').by(__.id()).by(__.label()).by(__.valueMap().by(__.unfold())).by(__.outE().project('id', 'from', 'to', 'label', 'properties').by(__.id()).by(__.select('node').id()).by(__.inV().id()).by(__.label()).by(__.valueMap().by(__.unfold())).fold()).
It contains Gremlin constructs not supported by Cthe Cosmos DB Graph implementation of Gremlin: by(__.valueMap().by(__.unfold()).
I created a version that works correctly for Cosmos DB Graph where I did split the query into three different queries to get it working correctly. I also added an additional endpoint in the service to request the incoming and outgoing edges. Because Cosmos B Graph contains an authenticated endpoint I configure the proxy server with an included config.js file:
config.js:
var config = {}
config.endpoint = "wss://<yourservername>.gremlin.cosmosdb.azure.com:443/gremlin";
config.primaryKey = "<your primary key as can be found under keys in Azure>"
config.database = "<name of the database>";
config.collection = "<name of the collection>";
config.port = 3001;
module.exports = config;
proxy-server.js:
const express = require('express');
const bodyParser = require('body-parser');
const Gremlin = require('gremlin');
const cors = require('cors');
const app = express();
const fs = require('fs');
const path = require('path');
const config = require('./config');
app.use(cors({
credentials: true,
}));
// parse application/json
app.use(bodyParser.json());
// Each property has as value an array with length 1 - take first element
function mapVertexPropertiesToObj(propInObj) {
let propOutObj = {};
Object.keys(propInObj).forEach(k => propOutObj[k] = propInObj[k][0]);
return propOutObj;
}
function edgesToVisualizationStructure(edges) {
if (!!edges) {
return edges.map(
edge => ({
id: typeof edge.id !== "string" ? JSON.stringify(edge.id) : edge.id,
from: edge.from,
to: edge.to,
label: edge.label,
properties: edge.properties,
})
);
} else {
return [];
}
}
function nodesToVisualizationStructure(nodeList) {
return nodeList.map(
node => ({
id: node.id,
label: node.label,
properties: mapVertexPropertiesToObj(node.properties),
edges: edgesToVisualizationStructure(node.edges)
})
);
}
function makeSelfQuery(query) {
const theQuery = `${query}.
as('node').
project('id', 'label', 'properties').
by(__.id()).
by(__.label()).
by(__.valueMap())
`;
return theQuery;
}
function makeInQuery(query, nodeLimit) {
// original query: `${query}${nodeLimitQuery}.dedup().as('node').project('id', 'label', 'properties', 'edges').by(__.id()).by(__.label()).by(__.valueMap().by(__.unfold())).by(__.outE().project('id', 'from', 'to', 'label', 'properties').by(__.id()).by(__.select('node').id()).by(__.inV().id()).by(__.label()).by(__.valueMap().by(__.unfold())).fold())`;
const nodeLimitQuery = !isNaN(nodeLimit) && Number(nodeLimit) > 0 ? `.limit(${nodeLimit})` : '';
const theQuery = `${query}${nodeLimitQuery}.
dedup().
as('node').
project('id', 'label', 'properties', 'edges').
by(__.id()).
by(__.label()).
by(__.valueMap()).
by(__.outE().as('outEdge').
project('id', 'from', 'to', 'label', 'properties').
by(__.id()).
by(select('node').id()).
by(__.inV().id()).
by(__.label()).
by(__.valueMap()).
fold()
)
`;
// coalesce(select('outEdge').inV().count().is(gt(0)).select('outEdge').inV().id(), constant("NO_TO_VERTEX"))
return theQuery;
}
function makeOutQuery(query, nodeLimit) {
// original query: `${query}${nodeLimitQuery}.dedup().as('node').project('id', 'label', 'properties', 'edges').by(__.id()).by(__.label()).by(__.valueMap().by(__.unfold())).by(__.outE().project('id', 'from', 'to', 'label', 'properties').by(__.id()).by(__.select('node').id()).by(__.inV().id()).by(__.label()).by(__.valueMap().by(__.unfold())).fold())`;
const nodeLimitQuery = !isNaN(nodeLimit) && Number(nodeLimit) > 0 ? `.limit(${nodeLimit})` : '';
const theQuery = `${query}${nodeLimitQuery}.
dedup().
as('node').
project('id', 'label', 'properties', 'edges').
by(__.id()).
by(__.label()).
by(__.valueMap()).
by(__.inE().
project('id', 'from', 'to', 'label', 'properties').
by(__.id()).
by(__.outV().id()).
by(select('node').id()).
by(__.label()).
by(__.valueMap()).
fold()
)
`;
return theQuery;
}
async function executeQuery(query) {
const authenticator = new Gremlin.driver.auth.PlainTextSaslAuthenticator(`/dbs/${config.database}/colls/${config.collection}`, config.primaryKey)
const client = new Gremlin.driver.Client(
config.endpoint,
{
authenticator,
traversalsource : "g",
rejectUnauthorized : true,
mimeType : "application/vnd.gremlin-v2.0+json"
}
);
console.log(query);
try {
const result = await client.submit(query, {})
console.log(JSON.stringify(result, null, 2));
return result;
}
catch(err) {
console.error(err);
return null;
}
}
app.post('/query', async (req, res, next) => {
const nodeLimit = req.body.nodeLimit;
let query = "" + req.body.query;
let visualizationNodesAndEdges = [];
// Support for sample files to show possible
if (query.startsWith("sample:")) {
try {
const sample = query.split(":")[1];
visualizationNodesAndEdges = JSON.parse(fs.readFileSync(path.join(__dirname, "samples", `${sample}.json`), 'utf8'));
}
catch(err) {
console.error(err);
}
} else {
let theQuery;
if(query.endsWith(".out()")) {
theQuery = makeOutQuery(query, nodeLimit);
} else if (query.endsWith(".in()")) {
theQuery = makeInQuery(query, nodeLimit);
} else {
theQuery = makeSelfQuery(query);
}
const result = await executeQuery(theQuery);
if (result !== null) {
visualizationNodesAndEdges = nodesToVisualizationStructure(result._items);
}
}
const visualizationNodesAndEdgesPrettyfiedJSon = JSON.stringify(visualizationNodesAndEdges, null, 2);
console.log(visualizationNodesAndEdgesPrettyfiedJSon);
res.send(visualizationNodesAndEdgesPrettyfiedJSon);
});
app.get('/edgecount/:nodeId', async (req, res, next) => {
const nodeId = req.params.nodeId;
let query = `g.V("${nodeId}").project("inEdgesCount", "outEdgesCount").by(__.inE().count()).by(__.outE().count())`;
const result = await executeQuery(query); // result._items in format: [ { "inEdgesCount": 2, "outEdgesCount": 0 } ]
let countInfo;
if (result === null || result._items.length === 0) {
countInfo = { 'inEdgesCount': -1, 'outEdgesCount': -1}; // error - node does not exist?
} else {
countInfo = result._items[0];
}
res.send(JSON.stringify(countInfo, null, 2));
});
app.listen(config.port, () => console.log(`Simple Gremlin proxy-server listening on port ${config.port}!`));
I also added support for sample graph visualization files without using a Gremlin server, and no support for drilling through the graph. Create next to the proxy-server.js file a folder samples, and add for example the following file:
modern.json: (patterned after the graph created with TinkerFactory.createModern() as described in the Tinkerpop Reference Documentation
[
{
"id": "1",
"label": "person",
"properties": {
"name": "marko",
"age": 29
},
"edges": [
{
"id": "7",
"from": "1",
"to": "2",
"label": "knows",
"properties": {
"weight": 0.5
}
},
{
"id": "8",
"from": "1",
"to": "4",
"label": "knows",
"properties": {
"weight": 1.0
}
},
{
"id": "9",
"from": "1",
"to": "3",
"label": "created",
"properties": {
"weight": 0.4
}
}
]
},
{
"id": "2",
"label": "person",
"properties": {
"name": "vadas",
"age": 27
}
},
{
"id": "3",
"label": "software",
"properties": {
"name": "lop",
"lang": "java"
}
},
{
"id": "4",
"label": "person",
"properties": {
"name": "josh",
"age": 32
},
"edges": [
{
"id": "11",
"from": "4",
"to": "3",
"label": "created",
"properties": {
"weight": 0.4
}
},
{
"id": "10",
"from": "4",
"to": "5",
"label": "created",
"properties": {
"weight": 1.0
}
}
]
},
{
"id": "5",
"label": "software",
"properties": {
"name": "ripple",
"lang": "java"
}
},
{
"id": "6",
"label": "person",
"properties": {
"name": "peter",
"age": 35
},
"edges": [
{
"id": "12",
"from": "6",
"to": "3",
"label": "created",
"properties": {
"weight": 0.2
}
}
]
}
]
If instead of a query the following string is specified sample:modern the sample JSON is read and returned.
Was able to run stuff. But what what is format of json? It is not graphson.
It is the format sent by the proxy-server.js to the front-end:-) I did some additional improvements on the proxy-server.js so other queries are handled correctly as well. I will post an updated version soon.
Did you run it against Cosmos DB, or did it work for other Gremlin servers as well?
@svdoever did you complete this? Is your fork available for me to play around with? Im playing with cosmos a bit and would like to try this out.