Janusgraph 难以理解如何从 gremlin 查询中操作数据答案

【问题标题】：Janusgraph troubles to understand how to manipulate data from gremlin queryJanusgraph 难以理解如何从 gremlin 查询中操作数据
【发布时间】：2019-06-14 19:45:28
【问题描述】：

我正在尝试重建使用 gremlin 查询从图表中检索到的数据。确切地说，我很难找到一种有效的方法来处理返回的数据。

我使用的 JanusGraph 版本是 0.3.1，它在 Cassandra + ES 上运行，它还配置了 ConfiguredGraphFactory。所以我可以动态创建图表。

我正在使用 gremlin javascript 版本 3.4.2（3.3.3 无法正常工作）

我构建了一个小例子来更好地解释我的意思。此示例中的图表是在没有架构的情况下创建的。

首先，我将使用以下函数连接到图形：

const gremlin = require('gremlin');

const { Graph } = gremlin.structure;
const { DriverRemoteConnection } = gremlin.driver;

const __ = gremlin.process.statics;
const P = gremlin.process.P;

const GREMLIN_URL = "ws://localhost:8182/gremlin";
const GRAPH_NAME = "graphtest";

let connection;

function getTraversal() {
    const graph = new Graph();
    connection = new DriverRemoteConnection(GREMLIN_URL, { traversalSource: GRAPH_NAME });
    return g = graph.traversal().withRemote(connection);
}

function closeConnection() {
    if(connection && connection.close) connection.close();
}

然后我将创建一些顶点和边。

我们将有一个用户、两个机构和两个“经过训练”的边缘将用户连接到机构：

async function createVerticesAndEdges() {
    const g = getTraversal();
    const userV = await g.addV('user').property('name', 'Emily').property('identityId', '1234').next();
    console.log('user', userV);
    const institutionV = await g.addV('institution').property('name', 'University of California').property('identityId', 'CA83').next();
    console.log('institution', institutionV);
    const institutionV2 = await g.addV('institution').property('name', 'University of Illinois').property('identityId', 'IL847').next();
    console.log('institution2', institutionV2);
    const trainedE = await g.addE('trained').property('title', 'MS in Computer Science').property('grade', 'B')
        .from_(__.V().has('identityId', '1234')).to(__.V().has('identityId', 'CA83')).next();
    console.log('trained', trainedE);
    const trainedE2 = await g.addE('trained').property('title', 'Political Science').property('grade', 'A')
        .from_(__.V().has('identityId', '1234')).to(__.V().has('identityId', 'IL847')).next();
    console.log('trained2', trainedE2);
    closeConnection();
}

然后，假设我想检索用户参加的所有培训，并且我还想获得参加培训的机构的名称。

所以我运行的查询是这样的：

async function getUserTrainings() {
    const g = getTraversal();
    const result = await g.V()
        .hasLabel('user')
        .has('identityId', '1234')
        .as('u').outE()
        .hasLabel('trained')
        .inV()
        .path()
        .unfold()
        .where(P.neq('u'))
        .toList();
    closeConnection();
    console.log(result);
}

我得到这个输出：

[ 
    Edge {
    id: { relationId: 'odxqw-3b4-27th-38o'
        },                                                                                                                                                                                     alber@DESKTOP-8CVHP91 MINGW64 ~/Ref    label: 'trained',
    outV: 4288,
    inV: 4200,
    properties: {}
    },
  Vertex { id: 4200, label: 'institution', properties: undefined
    },
  Edge {
    id: { relationId: 'odxco-3b4-27th-3ao'
        },
    label: 'trained',
    outV: 4288,
    inV: 4272,
    properties: {}
    },
  Vertex { id: 4272, label: 'institution', properties: undefined
    }
]

这不是蝙蝠，我可以使用顶点 id 和边 inV 来重建关系并按照我的意愿返回数据，但问题是，如您所见，此查询不返回属性.所以有点没用。

但是，通过 gremlin 文档，我发现了 valueMap() 步骤，所以我可以像这样稍微编辑前面的查询：

async function getUserTrainings() {
    const g = getTraversal();
    const result = await g.V()
        .hasLabel('user')
        .has('identityId', '1234')
        .as('u').outE()
        .hasLabel('trained')
        .inV()
        .path()
        .unfold()
        .where(P.neq('u'))
        .valueMap(true)
        .toList();
    closeConnection();
    console.log(result);
}

得到这个输出：

[ 
    Map {
    EnumValue { typeName: 'T', elementName: 'id'
        } => { relationId: 'odxqw-3b4-27th-38o'
        },
    'title' => 'Political Science',
    EnumValue { typeName: 'T', elementName: 'label'
        } => 'trained',
    'grade' => 'A'
    },
  Map {
    'name' => [ 'University of Illinois'
        ],
    EnumValue { typeName: 'T', elementName: 'id'
        } => 4200,
    'identityId' => [ 'IL847'
        ],
    EnumValue { typeName: 'T', elementName: 'label'
        } => 'institution'
    },
  Map {
    EnumValue { typeName: 'T', elementName: 'id'
        } => { relationId: 'odxco-3b4-27th-3ao'
        },
    'title' => 'MS in Computer Science',
    EnumValue { typeName: 'T', elementName: 'label'
        } => 'trained',
    'grade' => 'B'
    },
  Map {
    'name' => [ 'University of California'
        ],
    EnumValue { typeName: 'T', elementName: 'id'
        } => 4272,
    'identityId' => [ 'CA83'
        ],
    EnumValue { typeName: 'T', elementName: 'label'
        } => 'institution'
    }
]

所以，除了返回的数据并不清楚（我的意思是，每个顶点都相同的“typeName：'T'”是什么？）之外，现在我确实取回了属性，但是我松开了边缘的 outV 和 inV，我无法按照我需要的方式重建数据。（了解哪个顶点由哪个边连接）。

我想我可以只使用第一个查询而不使用 valueMap() 步骤，然后对于我返回的每个顶点和边，将执行另一个查询以使用 id 检索属性。

对于像这样的简单情况，这可能没问题，但我认为对于可能涉及数百个顶点和边的查询来说，这并不是真正有效的。

那么最后一个问题是：从路径重构数据（包括顶点和边属性）最有效的方法是什么？

【问题讨论】：

标签： javascript gremlin janusgraph

【解决方案1】：

我将只讨论我认为您问题的核心，因为我认为直接回答可能会澄清其他子问题：

假设我想检索用户参加的所有培训，并且我还想知道参加培训的机构的名称

gremlin> g.V().has('user','identityId', '1234').
......1>   outE('trained').
......2>   project('institution','title','grade').
......3>     by(inV().values('name')).
......4>     by('title').
......5>     by('grade')
==>[institution:University of California,title:MS in Computer Science,grade:B]
==>[institution:University of Illinois,title:Political Science,grade:A]

我认为可以澄清的一个附带问题：

每个顶点都相同的“typeName: 'T'”是什么？

T是Gremlin中的一个枚举值，代表了图元素的一些核心结构属性，具体来说是label和id。

【讨论】：

非常感谢斯蒂芬的回答。实际上，我现在刚刚发现，通过 gremlin 库，我可以在从 gremlin.process.t 导入 t 的 Map 中访问这些 EnumValues。对于遍历的问题，我想对 gremlin 不熟悉是主要问题。无论如何，你建议我的遍历真的很整洁，有没有办法也用类似的东西来获取relationIds和vertexIds？再次感谢您的宝贵时间
当然...只需向project() 添加更多键，然后为每个键添加一个by() 调制器，以将“训练有素”的边缘转换为您想要的键。正如您在第一个 by() 中看到的那样，您可以将匿名遍历作为参数传递，这样您就拥有 Gremlin 的全部功能，可以将您想要的任何内容放入该键中。