为什么我的程序内存写得比读快？答案

【问题标题】：Why seem my program memory write faster than read?为什么我的程序内存写得比读快？
【发布时间】：2018-01-02 19:34:39
【问题描述】：

我的简单程序：

//usage:
//indent ./a.c;gcc -O0 ./a.c
//./a.out max r/w repeat timeout

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int
main (int argc, char **argv)
{
  time_t const start_time = time (NULL);
  time_t timeout;
  int max;
  int repeat;
  if (argc == 5)
    {
      max = atoi (argv[1]);
      repeat = atoi (argv[3]);
      timeout = ((time_t) (atoi (argv[4])));
    }
  else
    return 1;

  unsigned char **block_array =
    calloc (sizeof (unsigned char *), (size_t) (max));

  size_t block_length = (size_t) (1024u * 1024u);

  unsigned char data[3];
  data[0] = 'a';
  data[1] = 'b';
  data[2] = 'c';

  unsigned i = 0u;
  //initialize block_array
  for (i = 0u; i < max; i++)
    {
      do
    {
      if ((timeout > ((time_t) (0)))
          && ((time (NULL) - start_time) > timeout))
        {
          puts ("timeouted!");
          return 0;
        }
      block_array[i] = malloc (block_length);
      if (block_array[i] != NULL)
        {
          unsigned bi = 0u;
          for (bi = 0u; bi < block_length; bi++)
        block_array[i][bi] = data[bi % ((unsigned) (sizeof (data)))];
        }
      else
        {
          printf ("%u error\n", i);
        }
    }
      while (NULL == block_array[i]);
    }
  puts ("init ok");

  unsigned score = 0u;
//do page read test
  if ('r' == argv[2][0])
    for (;;)
      {
    for (i = 0u; i < max; i++)
      {
        if ((timeout > ((time_t) (0)))
        && ((time (NULL) - start_time) > timeout))
          {
        puts ("timeouted!");
        goto show_score;
          }

        unsigned bi = 0u;
        for (bi = 0u; bi < block_length; bi++)
          {
        data[bi % ((unsigned) (sizeof (data)))] = block_array[i][bi];
          }
        score++;
      }
    if (repeat >= 0)
      {
        repeat--;
        if (0 == repeat)
          goto show_score;
      }
      }
//do page write test
  else if ('w' == argv[2][0])
    for (;;)
      {
    for (i = 0u; i < max; i++)
      {
        if ((timeout > ((time_t) (0)))
        && ((time (NULL) - start_time) > timeout))
          {
        puts ("timeouted!");
        goto show_score;
          }

        unsigned bi = 0u;
        for (bi = 0u; bi < block_length; bi++)
          {
        block_array[i][bi] = data[bi % ((unsigned) (sizeof (data)))];
          }
        score++;
      }
    if (repeat >= 0)
      {
        repeat--;
        if (0 == repeat)
          goto show_score;
      }
      }
show_score:
  printf ("score:%u\n", score);
  return 0;
}

我也同样测试了 Debian Jessie(Linux 3.16)(较少测试)和 Debian Stretch(Linux 4.9)(更多测试确定)

我已经重复了很多次相同的测试来确定这一点，所以我只发布一个简短的结果。

测试结果：

$ cat /proc/meminfo |grep SwapTotal
SwapTotal:             0 kB
$ time ./a.out 100 r  5 -1
init ok
score:500

real    0m2.689s
user    0m2.604s
sys 0m0.080s
$ time ./a.out 100 w  5 -1
init ok
score:500

real    0m2.567s
user    0m2.496s
sys 0m0.060s
$

【问题讨论】：

这是一个编码问题，属于 Stack Overflow。

标签： linux debian kernel memory

【解决方案1】：

在“r”和“w”情况下，循环内的主要分配从内存读取并写回内存，即它们本质上是相同的——你并没有真正测试内存读取与内存写入。事实证明，每种情况的时间都非常接近。

'w' 情况可能会稍微快一些，因为缓存可能包含您要从内存中读取的值，因为在这种情况下您并没有更改源地址。

【讨论】：

aren't changing the source address in that case.('w')?我不明白我是如何更改地址的，因为我似乎做了一个对称的案例。
您“读取”的地址不会改变，因此在第一次从内存中“读取”之后，该位置的内容可能会在缓存中找到，而根本不会从内存中加载。这可以解释“w”案例的执行速度稍快。
你是说a=b，b的速度比a更重要吗？
不（不确定你的意思）......我是说在'w'的情况下，因为你在赋值右侧使用的值在每个循环的迭代，CPU 不需要每次都从内存中获取它 - 它会从缓存中获取它，而不是这样更快。在“r”的情况下，右侧每次都不同，因此该值不会在缓存中，每次都必须从内存中读取。但最重要的是，每种情况本质上是相同的——我只是想解释稍微快一点的“w”情况，这是你问题的要点。
AFAIK,left=rightin 'w' case ,right 是 缓存命中，left 是 缓存未命中；在“r”情况下，right 是 缓存未命中，left 是缓存命中（我认为小的data 数组总是在缓存中，因为它足够小并且足够“热”）。为什么在'w'的情况下更多缓存命中？