【问题标题】:Find Duplicates in array of transactions objects using JavaScript, and combined duplicates in array of array objects使用 JavaScript 在交易对象数组中查找重复项,并在数组对象数组中组合重复项
【发布时间】:2019-04-28 02:51:09
【问题描述】:

我有一个交易对象数组,我需要在其中根据属性查找重复项(如果对象的所有值都相同,除了 ID 和 TIME,则对象是重复的,时间差应在 1 分钟内)。 我需要将相同的重复事务组合为一个数组对象。

以下是交易的输入。

我尝试使用 Reduce 函数,但无法获得预期的输出。

var newArray = transactions.reduce(function(acc, curr) {
      //finding Index in the array where the NamaCategory matched
      var findIfduplicateExist = acc.findIndex(function(item) {
        let accepoch = new Date(item.time).valueOf();
        let currepoch= new Date(curr.time).valueof();
        if(item.sourceAccount === curr.sourceAccount &&
        item.targetAccount===curr.targetAccount &&
        item.amount===curr.amount&&
        accepoch<currepoch+(1*60*1000))
          let obj={
           'id':curr.id,
            'sourceAccount':curr.sourceAccount,
            'targetAccount':curr.targetAccount,
            'time':curr.time,
            'category':curr.category,
            'amount':curr.amount 
          }
      })
      // if in the new array no such object exist, create a new object 
      if (findIfNameExist === -1) {
        acc.push(obj)
      } else {
        // if attributes matches , then push the value 
        acc[findIfNameExist].value.push(curr)
      }
  return acc;

}, []);

输入交易:

[
  {
    id: 3,
    sourceAccount: 'A',
    targetAccount: 'B',
    amount: 100,
    category: 'eating_out',
    time: '2018-03-02T10:34:30.000Z'
  },
  {
    id: 1,
    sourceAccount: 'A',
    targetAccount: 'B',
    amount: 100,
    category: 'eating_out',
    time: '2018-03-02T10:33:00.000Z'
  },
  {
    id: 6,
    sourceAccount: 'A',
    targetAccount: 'C',
    amount: 250,
    category: 'other',
    time: '2018-03-02T10:33:05.000Z'
  },
  {
    id: 4,
    sourceAccount: 'A',
    targetAccount: 'B',
    amount: 100,
    category: 'eating_out',
    time: '2018-03-02T10:36:00.000Z'
  },
  {
    id: 2,
    sourceAccount: 'A',
    targetAccount: 'B',
    amount: 100,
    category: 'eating_out',
    time: '2018-03-02T10:33:50.000Z'
  },
  {
    id: 5,
    sourceAccount: 'A',
    targetAccount: 'C',
    amount: 250,
    category: 'other',
    time: '2018-03-02T10:33:00.000Z'
  }
];

预期输出如下:

[   
  [
    {
      id: 1,
      sourceAccount: "A",
      targetAccount: "B",
      amount: 100,
      category: "eating_out",
      time: "2018-03-02T10:33:00.000Z"
    },
    {
      id: 2,
      sourceAccount: "A",
      targetAccount: "B",
      amount: 100,
      category: "eating_out",
      time: "2018-03-02T10:33:50.000Z"
    },
    {
      id: 3,
      sourceAccount: "A",
      targetAccount: "B",
      amount: 100,
      category: "eating_out",
      time: "2018-03-02T10:34:30.000Z"
    }  
  ], 
  [
    {
      id: 5,
      sourceAccount: "A",
      targetAccount: "C",
      amount: 250,
      category: "other",
      time: "2018-03-02T10:33:00.000Z"
    },
    {
      id: 6,
      sourceAccount: "A",
      targetAccount: "C",
      amount: 250,
      category: "other",
      time: "2018-03-02T10:33:05.000Z"
    }   
  ] 
]

【问题讨论】:

  • 向我们展示你的尝试,即使它没有让你一路走到那里。
  • 你需要支持哪个JS-Version?
  • @HerrErker 这是节点 8.1.3
  • @misorude 用我尝试过的代码更新了我的问题。

标签: javascript arrays duplicates javascript-objects


【解决方案1】:

当您第一次获得按 id 排序的交易副本时会更容易(也更有效)。我假设 id 是一个递增的数字,因此以后的事务总是有一个更大的数字。这样您只需将时间戳与累加器中的最后一个进行比较:

// Example data
const transactions = [ { id: 3, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:34:30.000Z' }, { id: 1, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:33:00.000Z' }, { id: 6, sourceAccount: 'A', targetAccount: 'C', amount: 250, category: 'other', time: '2018-03-02T10:33:05.000Z' }, { id: 4, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:36:00.000Z' }, { id: 2, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:33:50.000Z' }, { id: 5, sourceAccount: 'A', targetAccount: 'C', amount: 250, category: 'other', time: '2018-03-02T10:33:00.000Z' } ];

const newArray = [...transactions].sort((a,b) => a.id - b.id).reduce( (acc, curr) => {
    let group = acc[acc.length-1], 
        prev = group && group[group.length-1];
    if (!prev || prev.sourceAccount !== curr.sourceAccount ||
                 prev.targetAccount !== curr.targetAccount ||
                 prev.amount !== curr.amount ||
                 Date.parse(prev.time) + (1*60*1000) < Date.parse(curr.time)) {
        // different keys or larger time difference: create new group
        acc.push(group = []);
    }
    group.push(curr);
    return acc;
}, []);

console.log(newArray);

【讨论】:

  • 它看起来很吸引我,只需要您根据您所做的假设输入,如果我们按时间排序,那不是更好的解决方案。
  • 当然,如果我们不能假设 id 是增量的,那么 .sort((a,b) =&gt; a.time.localeCompare(b.time)) 应该可以解决问题。
  • 不过要小心,因为在我看来,时间戳在理论上可能是相等的,这取决于毫秒部分有多少细节,但仍然如此......然后 id 会更好它保证永远不会相等。
【解决方案2】:

这可以通过Array.sortArray.reduceObject.values 以简洁的方式完成:

const data = [{ id: 3, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:34:30.000Z' }, { id: 1, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:33:00.000Z' }, { id: 6, sourceAccount: 'A', targetAccount: 'C', amount: 250, category: 'other', time: '2018-03-02T10:33:05.000Z' }, { id: 4, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:36:00.000Z' }, { id: 2, sourceAccount: 'A', targetAccount: 'B', amount: 100, category: 'eating_out', time: '2018-03-02T10:33:50.000Z' }, { id: 5, sourceAccount: 'A', targetAccount: 'C', amount: 250, category: 'other', time: '2018-03-02T10:33:00.000Z' }] 

const sort = arr => arr.sort((a,b) =>`${a.id}${a.time}`.localeCompare(`${b.id}${b.time}`))
const getTime = obj => new Date(obj.time).getTime()
const isDub = (arr, obj) => arr.length ? Math.abs(getTime(arr[arr.length-1]) - getTime(obj))/1000 > 60 : false

const result = Object.values(sort(data).reduce((r, c) => {
  let key = [c.sourceAccount, c.targetAccount].join('-')
  r[key] = isDub(r[key] || [], c) ? r[key] : [...r[key] || [], c]
  return r
}, {}))

console.log(result)

您确实需要对数组进行预排序,以便在根据您的分钟内要求比较重复项时只处理最后一个条目。

【讨论】:

    【解决方案3】:

    您可以进行多列排序,然后在每个组中查找重复项。

    const SECONDS = 60;
    const MILLISECONDS = 1000;
    
    const getTimeDifference = (t1, t2) => {
      return new Date(t1) - new Date(t2);
    };
    
    const multiLevelSort = (transactions = [], colsToSort = []) => {
      return transactions.sort((a, b) => {
        return colsToSort.reduce((acc, col) => {
          if (acc !== 0 || a[col] == b[col]) {
            return acc;
          }
    
          const c1 = a[col], c2 = b[col];
          if (col === "time") {
            return getTimeDifference(c1, c2) > 0 ? 1 : -1;
          } else {
            return c1 > c2 ? 1 : -1;
          }
        }, 0);
      });
    };
    
    const isUniqueTransaction = (prev, curr, matchKeys = []) => {
      if (!prev || !curr) {
        return true;
      }
      return matchKeys.reduce((acc, key) => {
        /* Current key is time then difference should be more than equal
         * 1 min for transaction to be unique.
         */
        if (key === "time") {
          return (
            acc ||
            getTimeDifference(curr[key], prev[key]) >= 1 * SECONDS * MILLISECONDS
          );
        }
    
        return acc || prev[key] !== curr[key];
      }, false);
    };
    
    function findDuplicateTransactions(transactions = []) {
      const matchingKeys = [
        "sourceAccount",
        "targetAccount",
        "amount",
        "category",
        "time"
      ];
      const sortedTransactions = multiLevelSort(transactions, matchingKeys);
      let duplicates = [];
      let group = [];
      sortedTransactions.forEach((curr, idx, transactions) => {
        // Previous Transaction find check if current trasaction is unique.
        const prev = group && group[group.length - 1];
        const isUnique = isUniqueTransaction(prev, curr, matchingKeys);
        if (isUnique) {
          if (group.length > 1) {
            duplicates.push(group);
          }
          group = [];
        }
        group.push(curr);
      });
    
      // Push last group if it has more than 1 transaction
      if (group.length > 1) {
        duplicates.push(group);
      }
    
      // Sort duplicate trasaction groups based on first transaction in group
      return duplicates.sort((a, b) => {
        return getTimeDifference(a[0].time, b[0].time);
      });
    }
    

    【讨论】:

      【解决方案4】:

      您也可以像下面这样使用Array.sortArray.forEach 来实现此目的

      我最初通过连接属性值(不包括idtime)并增加时间戳对数组进行排序

      let arr = [{  id: 3,  sourceAccount: 'A',  targetAccount: 'B',  amount: 100,  category: 'eating_out',  time: '2018-03-02T10:34:30.000Z'},{  id: 1,  sourceAccount: 'A',  targetAccount: 'B',  amount: 100,  category: 'eating_out',  time: '2018-03-02T10:33:00.000Z'},{  id: 6,  sourceAccount: 'A',  targetAccount: 'C',  amount: 250,  category: 'other',  time: '2018-03-02T10:33:05.000Z'},{  id: 4,  sourceAccount: 'A',  targetAccount: 'B',  amount: 100,  category: 'eating_out',  time: '2018-03-02T10:36:00.000Z'},{  id: 2,  sourceAccount: 'A',  targetAccount: 'B',  amount: 100,  category: 'eating_out',  time: '2018-03-02T10:33:50.000Z'},{  id: 5,  sourceAccount: 'A',  targetAccount: 'C',  amount: 250,  category: 'other',  time: '2018-03-02T10:33:00.000Z'}];
      
      let res = []
       ,  getKey = ({id, time, ...rest}) => Object.entries(rest).map(([k, v]) => k + '-' + v).join(';')
       ,  getTimeDiff = (t1, t2) => Math.abs(new Date(t1).getTime() - new Date(t2).getTime())
      
      arr.sort((a,b) => {
          let akey = getKey(a)
            , bkey = getKey(b)
        
          return akey.localeCompare(bkey) || +new Date(a.time) - +new Date(b.time)
        })
        .forEach((d, i, t) => 
          i == 0 || 
            (getKey(d) == getKey(t[i-1]) && getTimeDiff(t[i-1].time, d.time)/1000 < 60)
            ? res.push((res.pop() || []).concat(d))
            : res.push([d])
        )
      
      console.log(res)

      【讨论】:

        猜你喜欢
        • 2014-08-08
        • 2018-12-18
        • 2018-08-13
        • 2020-07-26
        • 2020-03-11
        • 1970-01-01
        • 2020-05-20
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多