【问题标题】:MongoDb Aggregation Data manipulation - Objects to ArraysMongoDb 聚合数据操作 - 对象到数组
【发布时间】:2021-11-03 21:29:18
【问题描述】:

我有以下示例数据集

[{
  "_id": {
    "$oid": "60f83d3cd66842301905aa77"
  },
  "id": 527438,
  "name": "CryptoPunk #4050",
  "asset_contract": {
    "name": "CryptoPunks",
    "address": "0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb"
  },
  "traits": [
    {
      "trait_type": "type",
      "value": "Male",
      "display_type": null,
      "max_value": null,
      "trait_count": 6039,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Mohawk",
      "display_type": null,
      "max_value": null,
      "trait_count": 441,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Earring",
      "display_type": null,
      "max_value": null,
      "trait_count": 2459,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Frown",
      "display_type": null,
      "max_value": null,
      "trait_count": 261,
      "order": null
    }
  ],
  "token_id": "4050",
  "permalink": "https://opensea.io/assets/0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb/4050",
  "background_color": null,
  "image_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-",
  "image_preview_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-=s250",
  "animation_url": null,
  "vault_contract": "0x269616d549d7e8eaa82dfb17028d0b212d11232a"
},{
  "_id": { "$oid": "60f83d3cbc3f0161da2141f7" },
  "id": 17736625,
  "name": "OJ Simpson",
  "asset_contract": {
    "name": "Hashmasks",
    "address": "0xc2c747e0f7004f9e8817db2ca4997657a7746928"
  },
  "traits": [
    {
      "trait_type": "Character",
      "value": "Male",
      "display_type": null,
      "max_value": null,
      "trait_count": 8659,
      "order": null
    },
    {
      "trait_type": "Mask",
      "value": "Doodle",
      "display_type": null,
      "max_value": null,
      "trait_count": 2187,
      "order": null
    },
    {
      "trait_type": "Eye Color",
      "value": "Dark",
      "display_type": null,
      "max_value": null,
      "trait_count": 7419,
      "order": null
    },
    {
      "trait_type": "Item",
      "value": "No Item",
      "display_type": null,
      "max_value": null,
      "trait_count": 14533,
      "order": null
    },
    {
      "trait_type": "Skin Color",
      "value": "Dark",
      "display_type": null,
      "max_value": null,
      "trait_count": 3784,
      "order": null
    },
    {
      "trait_type": "Token ID",
      "value": 3535,
      "display_type": "number",
      "max_value": null,
      "trait_count": 0,
      "order": null
    },
    {
      "trait_type": "Background",
      "value": "Doodle",
      "display_type": null,
      "max_value": null,
      "trait_count": 5538,
      "order": null
    }
  ],
  "token_id": "3535",
  "permalink": "https://opensea.io/assets/0xc2c747e0f7004f9e8817db2ca4997657a7746928/3535",
  "background_color": null,
  "image_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu",
  "image_preview_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu=s250",
  "animation_url": null,
  "vault_contract": "0xc7a8b45e184138114e6085c82936a8db93dd156a"
}]

我想更新到的

[{
    "_id": {
      "$oid": "60f83d3cd66842301905aa77"
    },
    "id": 527438,
    "name": "CryptoPunk #4050",
    "asset_contract": {
      "name": "CryptoPunks",
      "address": "0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb"
    },
    "traits":
      {
        "type": "Male",
        "accessory": ["Mohawk", "Earing", "Frown"], 
      },
    "token_id": "4050",
    "permalink": "https://opensea.io/assets/0xb47e3cd837ddf8e4c57f05d70ab865de6e193bbb/4050",
    "background_color": null,
    "image_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-",
    "image_preview_url": "https://lh3.googleusercontent.com/sO18rDQYhC5yIcj12RVsv31pbbsZo_2muQQbTJMQHn47EKGhnirs8mxzohm58HAZ7taBoe4pU6x1qntlExk_TtJ-=s250",
    "animation_url": null,
    "vault_contract": "0x269616d549d7e8eaa82dfb17028d0b212d11232a"
  },{
  "_id": { "$oid": "60f83d3cbc3f0161da2141f7" },
  "id": 17736625,
  "name": "OJ Simpson",
  "asset_contract": {
    "name": "Hashmasks",
    "address": "0xc2c747e0f7004f9e8817db2ca4997657a7746928"
  },
  "traits": {
      "character": "Male",
      "mask": "Doodle",
      "eye_color": "Dark",
      "item": "No Item",
      "skin_color": "Dark",
      "token_id": 3535,
      "background": "Doodle",
    },
  "token_id": "3535",
  "permalink": "https://opensea.io/assets/0xc2c747e0f7004f9e8817db2ca4997657a7746928/3535",
  "background_color": null,
  "image_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu",
  "image_preview_url": "https://lh3.googleusercontent.com/NZQu7CNjgJ_1uhbUVwEb-14rZPJmPCaqaXy0qnUpgm5Qll0BvmmF7tPMjBhFH6ZZp_qzOPxHi0NFmRkOjHoBQ0BODcWI8NlyBXLu=s250",
  "animation_url": null,
  "vault_contract": "0xc7a8b45e184138114e6085c82936a8db93dd156a"
}]

其背后的逻辑是

  • 查看 Traits 数组对象
  • 获取 trait_type 值并使用小写名称(空格下划线)创建新键
  • 将新键的值设置为"value"的值

所以,

      "trait_type": "type",
      "value": "Male",

//becomes

            "type": "Male"
  • 如果有多个相同特征类型的实例,请创建一个值数组。

所以,

{
      "trait_type": "accessory",
      "value": "Mohawk",
      "display_type": null,
      "max_value": null,
      "trait_count": 441,
      "order": null
    },
    {
      "trait_type": "accessory",
      "value": "Earring",
      "display_type": null,
      "max_value": null,
      "trait_count": 2459,
      "order": null
    },

// becomes

            "accessory": ["Mohawk", "Earring"]

【问题讨论】:

    标签: json mongodb elasticsearch aggregation-framework algolia


    【解决方案1】:

    查询

    • 即使管道不允许我们使用此处使用的组查找等阶段,它也是一个聚合更新。 (您可以使用$out 和替换集合之后或$merge 来替换文档(类似于更新))

    • 第一张地图

      • 对于每个特征(特征的文档成员),将其放入数组
        [["trait_type": "type"] ["value": "Male"] ["display_type": null] ...]
      • 减少该数组以仅从它们构造 1 个文档
        {"type" "type","value" :"Male"}(小写字母和“_”也是如此)
    • 现在的特征就像

      "traits": [
        {
          "type": "type",
          "value": "Male"
        },
        {
          "type": "accessory",
          "value": "Mohawk"
        },
        {
          "type": "accessory",
          "value": "Earring"
        },
        {
          "type": "accessory",
          "value": "Frown"
        }
      ]
      
    • 使用虚拟集合 [{}] 进行查找(我们这样做是为了在该数组中创建一个组),这就像一个技巧,允许我们在 1 个文档中使用阶段运算符

      • 查找管道按类型展开和分组
      "traits": [
        {
          "values": [
            "Mohawk",
            "Earring",
            "Frown"
          ],
          "type": "accessory"
        },
        {
          "values": [
            "Male"
          ],
          "type": "type"
        }
      ]
      
      • 然后它是一个替换根来获取类型的值,使其成为字段名称和值作为值(if size=1 removes the array)
    • 查找后我们有

      "traits": [
        {
          "accessory": [
            "Mohawk",
            "Earring",
            "Frown"
          ]
        },
        {
          "type": "Male"
        }
      ]
      
    • 所以我们要做的就是减少这些特征并合并对象 (无论如何,键都是唯一的,因为我们按它们分组)

    • 我们得到了预期的输出(至少我认为还可以)

    Test code here

    db.collection.aggregate([
      {
        "$set": {
          "traits": {
            "$map": {
              "input": "$traits",
              "as": "t",
              "in": {
                "$reduce": {
                  "input": {
                    "$map": {
                      "input": {
                        "$objectToArray": "$$t"
                      },
                      "as": "m",
                      "in": [
                        "$$m.k",
                        "$$m.v"
                      ]
                    }
                  },
                  "initialValue": {},
                  "in": {
                    "$let": {
                      "vars": {
                        "type_value": "$$value",
                        "ta": "$$this"
                      },
                      "in": {
                        "$let": {
                          "vars": {
                            "key": {
                              "$arrayElemAt": [
                                "$$ta",
                                0
                              ]
                            },
                            "value": {
                              "$arrayElemAt": [
                                "$$ta",
                                1
                              ]
                            }
                          },
                          "in": {
                            "$switch": {
                              "branches": [
                                {
                                  "case": {
                                    "$eq": [
                                      "$$key",
                                      "value"
                                    ]
                                  },
                                  "then": {
                                    "$mergeObjects": [
                                      "$$type_value",
                                      {
                                        "value": "$$value"
                                      }
                                    ]
                                  }
                                },
                                {
                                  "case": {
                                    "$eq": [
                                      "$$key",
                                      "trait_type"
                                    ]
                                  },
                                  "then": {
                                    "$mergeObjects": [
                                      "$$type_value",
                                      {
                                        "type": {
                                          "$replaceAll": {
                                            "input": {
                                              "$toLower": "$$value"
                                            },
                                            "find": " ",
                                            "replacement": "_"
                                          }
                                        }
                                      }
                                    ]
                                  }
                                }
                              ],
                              "default": "$$type_value"
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      },
      {
        "$lookup": {
          "from": "dummy",
          "let": {
            "traits": "$traits"
          },
          "pipeline": [
            {
              "$set": {
                "traits": "$$traits"
              }
            },
            {
              "$unwind": {
                "path": "$traits"
              }
            },
            {
              "$replaceRoot": {
                "newRoot": "$traits"
              }
            },
            {
              "$group": {
                "_id": "$type",
                "values": {
                  "$push": "$value"
                }
              }
            },
            {
              "$set": {
                "type": "$_id"
              }
            },
            {
              "$project": {
                "_id": 0
              }
            },
            {
              "$replaceRoot": {
                "newRoot": {
                  "$cond": [
                    {
                      "$eq": [
                        {
                          "$size": "$values"
                        },
                        1
                      ]
                    },
                    {
                      "$arrayToObject": {
                        "$let": {
                          "vars": {
                            "pair": [
                              [
                                "$type",
                                {
                                  "$arrayElemAt": [
                                    "$values",
                                    0
                                  ]
                                }
                              ]
                            ]
                          },
                          "in": "$$pair"
                        }
                      }
                    },
                    {
                      "$arrayToObject": {
                        "$let": {
                          "vars": {
                            "pair": [
                              [
                                "$type",
                                "$values"
                              ]
                            ]
                          },
                          "in": "$$pair"
                        }
                      }
                    }
                  ]
                }
              }
            }
          ],
          "as": "traits"
        }
      },
      {
        "$set": {
          "traits": {
            "$mergeObjects": "$traits"
          }
        }
      }
    ])
    

    【讨论】:

    • 非常感谢@Takis_。我正在运行它的集合有 57k 条记录,我无法将 dummy 附加到记录集的末尾。当我尝试使用 Mongo shell(以及 Compass Aggregation)运行它时,我也收到了 Invalid $set :: caused by :: Unrecognized expression '$setField'
    • 实际用途是在一个集合上执行聚合并将输出移动到另一个集合,因此db.rawCollection.aggregation() 并输出所有结果以更新辅助集合db.cleanCollection。这将使用脚本每 5 分钟运行一次。
    • 为此,您还需要 1 个名为 dummy 的数据库集合,它将有 1 个空文档 collection=[{}](或任何名称,只要您在查询时也更改它)。 $setField 错误是因为您没有运行 MongoDB 5,没关系,还有另一种方法,我将更新查询。在您的情况下,您需要 $out 保存到另一个集合。
    • 我更新了查询删除了$setField 将其替换为$mergeObjects 它很好,虚拟是您数据库中的一个新集合,只有一个文档空文档dummy=[{}] 您只需要权限创建该集合。你可以给它任何你想要的名字,但你也必须在$lookup 中替换那个名字。对于$out,看看如何在你的驱动程序中使用它,它将结果输出到一个新的集合中(不更新)新的集合将成为聚合的结果。
    • 感谢 Takis,我又奖励了 50 分并分配给您,非常感谢您一直以来的支持。
    猜你喜欢
    • 2017-07-14
    • 2021-06-21
    • 1970-01-01
    • 1970-01-01
    • 2020-07-27
    • 1970-01-01
    • 2020-11-27
    • 2020-08-20
    • 2020-07-02
    相关资源
    最近更新 更多