可以按类别分组,但不能按您呈现的方式。这确实是一件好事,因为您的“类别”实际上是数据,您真的不应该在存储或输出中将“数据”表示为“键”。
所以真的建议这样改造:
db.collection.aggregate([
{ "$match": { "user": 1 } },
{ "$group": {
"_id": "$category",
"items": {
"$push": {
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
}
}
}},
{ "$group": {
"_id": null,
"categories": {
"$push": {
"_id": "$_id",
"items": "$items"
}
}
}}
])
你会得到这样的输出:
{
"_id" : null,
"categories" : [
{
"_id" : "bikes",
"items" : [
{
"_id": 3,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
},
{
"_id" : "phones",
"items" : [
{
"_id": 1,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id": 2,
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
]
}
]
}
拥有不随数据变化而改变的通用键名确实更好。这其实就是面向对象的模式。
如果您真的认为您需要“数据作为键”,那么对于聚合框架,您要么知道您期望的“类别”,要么准备生成管道阶段:
db.utest.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$unwind": "$phones" },
{ "$match": { "phones": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$push": "$phones" },
"bikes": { "$first": "$bikes" }
}},
{ "$unwind": "$bikes" },
{ "$match": { "bikes": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"phones": { "$first": "$phones" },
"bikes": { "$push": "$bikes" }
}},
{ "$project": {
"_id": 0,
"phones": 1,
"bikes": 1
}}
])
您可以使用 MongoDB 2.6 将其缩短一点,因为您可以使用 $setDifference 运算符过滤掉 false 值:
db.collection.aggregate([
{ "$match": { "user": "1" } },
{ "$group": {
"_id": null,
"phones": {
"$push": {
"$cond": [
{ "$eq": ["$category","phones"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
},
"bikes": {
"$push": {
"$cond": [
{ "$eq": ["$category","bikes"] },
{
"_id": "$_id",
"field1": "$field1",
"field2": "$field2",
"tags": "$tags"
},
false
]
}
}
}},
{ "$project": {
"_id": 0,
"phones": { "$setDifference": ["$phones",[false]] },
"bikes": { "$setDifference": ["$bikes",[false]] }
}}
])
两者都按照您的要求产生输出:
{
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
这里的一般情况是聚合框架不会允许将字段数据用作键,因此您需要对数据进行分组或自己指定键名。
获得“动态”键名的唯一方法是改用mapReduce:
db.collection.mapReduce(
function () {
var obj = { };
var category = this.category;
delete this.user;
delete this.category;
obj[category] = [this];
emit(null,obj);
},
function (key,values) {
var reduced = {};
values.forEach(function(value) {
Object.keys(value).forEach(function(key) {
if ( !reduced.hasOwnProperty(key) )
reduced[key] = [];
value[key].forEach(function(item) {
reduced[key].push(item);
});
});
});
return reduced;
},
{
"query": { "user": "1" },
"out": { "inline": 1 }
}
)
所以现在密钥生成是动态的,但是输出是通过非常 mapReduce 的方式完成的:
{
"_id" : null,
"value" : {
"phones" : [
{
"_id" : "1",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag1",
"tag3"
]
},
{
"_id" : "2",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag2",
"tag3"
]
}
],
"bikes" : [
{
"_id" : "3",
"field1" : "value1",
"field2" : "value2",
"tags" : [
"tag3",
"tag4"
]
}
]
}
}
因此输出受 mapReduce 如何引导输出的限制,并且此处评估 JavaScript 将比聚合框架的本机操作慢。操纵权力更大,但这是一种权衡。
总结一下,如果你坚持这种模式,那么聚合框架的第一种方法是最快和最好的方法,而且一旦从服务器返回,你总是可以重新构造结果。如果您坚持打破模式并需要来自服务器的动态键,那么 mapReduce 将在其他聚合框架被认为不切实际的情况下执行此操作。