【问题标题】:Doctrine big query performances学说大查询性能
【发布时间】:2024-04-17 22:15:02
【问题描述】:

你好 * :)

我在一个大的学说查询中遇到了很多困难,数据库中有 17000 个联系人,并且有近 100 万行用于统计和订单。

如您所见,有很多计算值,我确实需要这些值来执行过滤。

起初我尝试使用 DQL 进行查询,但在性能方面同样糟糕,查询时间超过 15 或 30 秒..

有没有人可以给我一些建议,我确定我不是唯一需要做这种查询的人!

提前致谢:)

这是查询

 $qb = $this->createQueryBuilder('contact');

    $qb->select("contact.id");
    $qb->addSelect("contact.source");
    $qb->addSelect("contact.updatedAt");
    $qb->addSelect("contact.createdAt");

    // orders
    $qb->addSelect("COUNT(distinct shop_order.id) AS totalOrders");
    $qb->addSelect("SUM(distinct shop_order.price) AS totalSpentOrders");
    $qb->addSelect("AVG(distinct shop_order.price) AS averageCart");
    $qb->addSelect("MAX(shop_order.createdAt) AS lastOrderDatedAt");
    $qb->addSelect("MIN(shop_order.createdAt) AS firstOrderDatedAt");
    $qb->addSelect("last_shop_order.price AS totalSpentLastOrder");

    // orders statuses
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN shop_order.status = " . Order::STATUS_WAITING . " THEN shop_order.id ELSE :empty END)) AS totalOrdersWaitingOrError");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN shop_order.status = " . Order::STATUS_PAID . " THEN shop_order.id ELSE :empty END)) AS totalOrdersPaid");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN shop_order.status = " . Order::STATUS_DELIVERED . " THEN shop_order.id ELSE :empty END)) AS totalOrdersDelivered");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN shop_order.status = " . Order::STATUS_PAYMENT_ERROR . " THEN shop_order.id ELSE :empty END)) AS totalOrdersPaymentError");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN shop_order.status = " . Order::STATUS_SHIPPED . " THEN shop_order.id ELSE :empty END)) AS totalOrdersShipped");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN shop_order.status = " . Order::STATUS_ABORTED . " THEN shop_order.id ELSE :empty END)) AS totalOrdersAborted");

    $qb->leftJoin("contact.orders", "shop_order", Join::WITH, "shop_order.isValid = 1 AND shop_order.contact = contact");
    $qb->leftJoin("contact.orders", "last_shop_order", Join::WITH, "last_shop_order = FIRST(SELECT lso FROM App:Order lso WHERE lso.isValid = 1 AND lso.contact = contact ORDER BY lso.createdAt DESC)");

    // order cart
    $qb->leftJoin("shop_order.cart", "cart");
    $qb->leftJoin("cart.productCarts", "product_carts");
    $qb->leftJoin("product_carts.product", "product");

    // abandonned carts
    $qb->addSelect("COUNT(distinct abandonned_cart.id) AS totalAbandonnedCarts");
    $qb->addSelect("SUM(distinct abandonned_product.price) AS totalAmountAbandonnedCarts");

    $qb->leftJoin("contact.carts", "abandonned_cart");
    $qb->leftJoin("abandonned_cart.shopOrder", "abandonned_shop_order");
    $qb->leftJoin("abandonned_cart.productCarts", "abandonned_product_cart");
    $qb->leftJoin("abandonned_product_cart.product", "abandonned_product");
    $qb->andWhere("abandonned_shop_order.id IS NULL");

    $qb->addSelect("SUM(distinct last_abandonned_cart.amount) AS totalAmountLastAbandonnedCart");

    $qb->leftJoin("contact.carts", "last_abandonned_cart", Join::WITH, "last_abandonned_cart = FIRST(SELECT lac FROM App:Cart lac LEFT JOIN lac.shopOrder lacso WHERE lacso IS NULL AND lac.contact = contact ORDER BY lac.createdAt DESC)");
    $qb->leftJoin("last_abandonned_cart.productCarts", "last_abandonned_product_cart");
    $qb->leftJoin("last_abandonned_product_cart.product", "last_abandonned_product");

    // behavior
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_SENT . " THEN contact_stat.id ELSE :empty END)) AS totalEmailSent");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_CLICKED . " THEN contact_stat.id ELSE :empty END)) AS totalEmailClicked");
    $qb->addSelect("COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_OPENED . " THEN contact_stat.id ELSE :empty END)) AS totalEmailOpened");
    $qb->addSelect("(COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_CLICKED . " THEN contact_stat.id ELSE :empty END)) / COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_SENT . " THEN contact_stat.id ELSE :empty END))) * 100 AS emailClickedRate");
    $qb->addSelect("(COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_OPENED . " THEN contact_stat.id ELSE :empty END)) / COUNT(DISTINCT(CASE WHEN contact_stat.type = " . ContactStat::TYPE_EMAIL_SENT . " THEN contact_stat.id ELSE :empty END))) * 100  AS emailOpenedRate");
    $qb->leftJoin("contact.contactStats", "contact_stat");

    // other contact informations
    $qb->leftJoin("contact.address", "address");
    $qb->leftJoin("contact.contactOperations", "contact_operation");
    $qb->leftJoin("contact_operation.operation", "operation");
    $qb->leftJoin("contact.accountTypes", "account_type");
    $qb->leftJoin("contact.medicalInformation", "medical_information");

    $qb->setParameter("empty", null);

    $qb->orderBy("contact.updatedAt", "DESC");
    $qb->groupBy("contact.id");

我试图删除一些连接和计算值这是 SQL 查询:

SELECT c0_.id AS id_0, c0_.email AS email_1, c0_.first_name AS first_name_2, c0_.last_name AS last_name_3, c0_.type AS type_4, c0_.updated_at AS updated_at_5, c0_.source AS source_6, c0_.created_at AS created_at_7, a1_.postcode AS postcode_8, a1_.country AS country_9, m2_.skin_type AS skin_type_10, c0_.is_optin_sms AS is_optin_sms_11 FROM contact c0_ LEFT JOIN address a1_ ON c0_.address_id = a1_.id LEFT JOIN medical_information m2_ ON c0_.id = m2_.contact_id LEFT JOIN contact_operation c3_ ON c0_.id = c3_.contact_id LEFT JOIN operation o4_ ON c3_.operation_id = o4_.id LEFT JOIN account_type_contact a6_ ON c0_.id = a6_.contact_id LEFT JOIN account_type a5_ ON a5_.id = a6_.account_type_id LEFT JOIN contact_stat c7_ ON c0_.id = c7_.contact_id LEFT JOIN shop_order s8_ ON c0_.id = s8_.contact_id AND (s8_.is_valid = 1 AND s8_.contact_id = c0_.id) LEFT JOIN shop_order s9_ ON c0_.id = s9_.contact_id AND (s9_.id = (SELECT s10_.id FROM shop_order s10_ WHERE s10_.is_valid = 1 AND s10_.contact_id = c0_.id ORDER BY s10_.created_at DESC LIMIT 1)) LEFT JOIN cart c11_ ON s8_.cart_id = c11_.id LEFT JOIN product_cart p12_ ON c11_.id = p12_.cart_id LEFT JOIN product p13_ ON p12_.product_id = p13_.id LEFT JOIN cart c14_ ON c0_.id = c14_.contact_id LEFT JOIN shop_order s15_ ON c14_.id = s15_.cart_id AND (s15_.id IS NULL AND s15_.contact_id = c0_.id) GROUP BY c0_.id ORDER BY c0_.updated_at DESC;

查询耗时超过 8 秒..

有查询的解释


【问题讨论】:

    标签: sql performance doctrine query-builder


    【解决方案1】:

    百万行很多吗?这取决于基础设施,但取决于现代硬件,这应该不是什么大问题。

    我担心的是您的查询有 sum ( distinct .... )。这可能是因为如果没有这个,您将无法获得正确的结果;编写查询时的一个典型错误。即没有选择正确的功能,因为需要它而不是没有它没有得到正确的结果。像这样做 sum 似乎有问题。

    所以首先执行您的查询而不使用 group by 并分析结果。很有可能您的数据中出现了不受欢迎的笛卡尔积……这可能导致您走上了放置差异化的道路。这需要先解决。如果最后需要对一百万的总数据集进行分组;基于硬件,这应该没什么大不了的。

    【讨论】:

    • 如果我没有按联系人分组,我只有一行,所需的结果是每行计算值的 17000 个联系人
    • 百万行仅用于获取设置的计算值,例如订单总和或他们支付的总金额
    • 所以没有 group by 你得到正确的结果?该查询需要多长时间? (对于这样的分析,我经常做一个计数(*),这样就不会因为结果显示需要时间而得到歪斜的结果)。那么,如果你从 sum 中删除了所有不同的值,你的结果仍然正确吗?
    • 没有不同的计算值是不正确的,查询需要超过 30 秒的所有连接.. 如果我删除所有的选择总和 ou count 也是一样
    • 我用一个没有选择总和的更简单的查询添加了更多信息,等等。查询仍然用了 8 秒