如何避免 std::vector<> 初始化其所有元素？答案

【问题标题】：How can I avoid std::vector<> to initialize all its elements?如何避免 std::vector<> 初始化其所有元素？
【发布时间】：2011-08-22 22:45:25
【问题描述】：

编辑：我编辑了问题及其标题以更准确。

考虑以下源代码：

#include <vector>
struct xyz {
    xyz() { } // empty constructor, but the compiler doesn't care
    xyz(const xyz& o): v(o.v) { } 
    xyz& operator=(const xyz& o) { v=o.v; return *this; }
    int v; // <will be initialized to int(), which means 0
};

std::vector<xyz> test() {
    return std::vector<xyz>(1024); // will do a memset() :-(
}

...如何避免 vector 分配的内存使用其第一个元素的副本进行初始化，这是一个 O(n) 操作为了速度我宁愿跳过，因为我的默认值构造函数什么都不做？

如果不存在通用解决方案，则可以使用 g++ 特定解决方案（但我找不到任何属性来做到这一点）。

编辑：生成的代码如下（命令行：arm-elf-g++-4.5 -O3 -S -fno-verbose-asm -o - test.cpp | arm-elf-c++ filt | grep -vE '^[[:space:]]+[.@].*$' )

test():
    mov r3, #0
    stmfd   sp!, {r4, lr}
    mov r4, r0
    str r3, [r0, #0]
    str r3, [r0, #4]
    str r3, [r0, #8]
    mov r0, #4096
    bl  operator new(unsigned long)
    add r1, r0, #4096
    add r2, r0, #4080
    str r0, [r4, #0]
    stmib   r4, {r0, r1}
    add r2, r2, #12
    b       .L4          @
.L8:                     @
    add     r0, r0, #4   @
.L4:                     @
    cmp     r0, #0       @  fill the memory
    movne   r3, #0       @
    strne   r3, [r0, #0] @
    cmp     r0, r2       @
    bne     .L8          @
    str r1, [r4, #4]
    mov r0, r4
    ldmfd   sp!, {r4, pc}

编辑：为了完整起见，这里是 x86_64 的程序集：

.globl test()
test():
LFB450:
    pushq   %rbp
LCFI0:
    movq    %rsp, %rbp
LCFI1:
    pushq   %rbx
LCFI2:
    movq    %rdi, %rbx
    subq    $8, %rsp
LCFI3:
    movq    $0, (%rdi)
    movq    $0, 8(%rdi)
    movq    $0, 16(%rdi)
    movl    $4096, %edi
    call    operator new(unsigned long)
    leaq    4096(%rax), %rcx
    movq    %rax, (%rbx)
    movq    %rax, 8(%rbx)
    leaq    4092(%rax), %rdx
    movq    %rcx, 16(%rbx)
    jmp     L4          @
L8:                     @
    addq    $4, %rax    @
L4:                     @
    testq   %rax, %rax  @ memory-filling loop
    je      L2          @
    movl    $0, (%rax)  @
L2:                     @
    cmpq    %rdx, %rax  @
    jne     L8          @
    movq    %rcx, 8(%rbx)
    movq    %rbx, %rax
    addq    $8, %rsp
    popq    %rbx
    leave
LCFI4:
    ret
LFE450:
EH_frame1:
LSCIE1:
LECIE1:
LSFDE1:
LASFDE1:
LEFDE1:

编辑：我认为结论是当你想避免不必要的初始化时不要使用std::vector<>。我最终展开了我自己的模板化容器，它的性能更好（并且有用于 neon 和 armv7 的专门版本）。

【问题讨论】：

纯粹出于好奇：为什么？
g++ 不会将值自动初始化为 0。一些编译器在调试模式下会这样做，但不会在优化模式下。
@ildjarn：因为用零填充内存是有代价的，我希望编译器在我保留未初始化的值时尊重我的选择。
您认为v 是值初始化的假设是不正确的：它不是值初始化的。编译器、库、运行时或操作系统可能会为您对内存进行零初始化，但这不是值初始化。
@jcayzac：出于好奇，为什么不简单地调用reserve，而不是实际放置元素？或者，您如何使用 vector 之后未初始化？

标签： c++ optimization gcc default-value

【解决方案1】：

我也很好奇。您只想随机初始化内存吗？

向量元素存储在连续的内存位置，因此可以进行随机初始化。

【讨论】：

我不希望内存随机初始化，我希望它根本不初始化。我不希望我生成的程序集包含一个用任何东西填充内存的循环。我希望它保持原样。

【解决方案2】：

您将所有原语包装在一个结构中：

struct IntStruct
{
    IntStruct();

    int myInt;
}

将 IntStruct() 定义为空构造函数。因此，您将v 声明为IntStruct v;，因此当xyzs 的vector 全部值初始化时，它们所做的只是值初始化v，这是一个无操作。

编辑：我误读了这个问题。如果您有原始类型的vector，这就是您应该做的，因为vector 被定义为在通过resize() 方法创建元素时进行值初始化。结构不需要在构造时对其成员进行值初始化，尽管这些“未初始化”值仍然可以通过其他方式设置为 0——嘿，它们可以是任何东西。

【讨论】：

【解决方案3】：

我没有看到内存已初始化。默认的 int() 构造函数什么都不做，就像在 C 中一样。

程序：

#include <iostream>
#include <vector>

struct xyz {
    xyz() {}
    xyz(const xyz& o): v(o.v) {} 
    xyz& operator=(const xyz& o) { v=o.v; return *this; }
    int v;
};

std::vector<xyz> test() {
    return std::vector<xyz>(1024);
}

int main()
{
    std::vector<xyz> foo = test();
    for(int i = 0; i < 10; ++i)
    {
        std::cout << i << ": " << foo[i].v << std::endl;
    }
    return 0;
}

输出：

$ g++ -o foo foo.cc
$ ./foo 
0: 1606418432
1: 1606418432
2: 1606418432
3: 1606418432
4: 1606418432
5: 1606418432
6: 1606418432
7: 1606418432
8: 1606418432
9: 1606418432

编辑：

如果您只是尝试将向量初始化为一些重要的东西，并且不想浪费时间默认构造其内容，您可能想尝试创建自定义迭代器并将其传递给向量的构造函数。

修改示例：

#include <iostream>
#include <vector>
#include <iterator>

struct xyz {
    xyz() {}
    xyz(int init): v(init) {}
    xyz(const xyz& o): v(o.v) {} 
    xyz& operator=(const xyz& o) { v=o.v; return *this; }
    int v;
};

class XYZInitIterator: public std::iterator<std::input_iterator_tag, xyz>
{
public:
                        XYZInitIterator(int init): count(init) {}
                        XYZInitIterator(const XYZInitIterator& iter)
                        : count(iter.count) {}
    XYZInitIterator&    operator=(const XYZInitIterator& iter)
                        { count = iter.count; return *this; }
    value_type          operator*() const { return xyz(count); }
    bool                operator==(const XYZInitIterator& other) const 
                        { return count == other.count; }
    bool                operator!=(const XYZInitIterator& other) const 
                        { return count != other.count; }
    value_type          operator++() { return xyz(++count); }
    value_type          operator++(int) { return xyz(count++); }
private:
    int count;
};

std::vector<xyz> test() {
    XYZInitIterator start(0), end(1024);
    return std::vector<xyz>(start, end);
}

int main()
{
    std::vector<xyz> foo = test();
    for(int i = 0; i < 10; ++i)
    {
        std::cout << std::dec << i << ": " << std::hex << foo[i].v << std::endl;
    }
    return 0;
}

输出：

$ g++ -o foo foo.cc
$ ./foo 
0: 0
1: 1
2: 2
3: 3
4: 4
5: 5
6: 6
7: 7
8: 8
9: 9

【讨论】：

您刚刚显示它已初始化。仅限于值 1606418432（由 Tony 解释）。

【解决方案4】：

如果你想要一个只保留内存但没有初始化元素的向量，请使用reserve 而不是构造函数：

std::vector<xyz> v;
v.reserve(1024);
assert(v.capacity() >= 1024);
assert(v.size() == 0);

【讨论】：

...但是你不能使用它^^
不保证保留区域不会被运行时或操作系统初始化为0。

【解决方案5】：

由于目前声明struct 的方式，没有默认初始化结构的int 成员的机制，因此您会得到默认的C 行为，即不确定的初始化。为了使用默认初始化值初始化int 成员变量，您必须将其添加到结构构造函数的初始化列表中。例如，

struct xyz {
    xyz(): v() { } //initialization list sets the value of int v to 0
    int v;
};

在哪里

struct xyz {
    xyz(): { } //no initialization list, therefore 'v' remains uninitialized
    int v;
};

【讨论】：

吹毛求疵：没有“int 构造函数”之类的东西。只有类类型有构造函数，而 int 不是类类型。 OP 在任何时候都不会显式初始化 int 对象，这意味着它未初始化。
C++03 标准 8.5/5 怎么样？不是说在第二种情况下 v 也应该设置为 0 吗？
这是我对第8.5节的理解……如果你写xyz object;或xyz object = xyz();，那么xyz的默认构造函数就会被调用。在情况 #1 中，存在显式成员初始化列表，这会将 v 的值初始化为 0，但在情况 #2 中，由于默认构造函数不会初始化 v，因此它的值将是不确定的。

【解决方案6】：

这是vector 的一个奇怪角落。问题是不是您的元素正在被初始化......它是第一个原型元素中的随机内容被复制到向量中的所有其他元素。（这种行为随着 C++11 的变化而改变，它的值初始化每个元素）。

这样做（/was）有一个很好的理由：考虑一些引用计数的对象......如果你构造一个vector 要求初始化为这样一个对象的 1000 个元素，你显然想要一个引用计数为1000 个，而不是拥有 1000 个独立的“克隆”。我说“显然”是因为首先计算对象引用意味着这是非常可取的。

无论如何，你几乎不走运。实际上，vector 确保所有元素都是相同的，即使它正在同步的内容恰好是未初始化的垃圾。

在非标准 g++ 特定的快乐黑客领域中，我们可以利用 vector 接口中的任何公共模板化成员函数作为后门，只需将模板专门用于某些新类型即可更改私有成员数据。

警告：不仅仅是为了这个“解决方案”，而是为了避免默认构造的整个努力......不要对具有重要不变量的类型这样做 - 你打破封装，很容易让vector 本身或您尝试调用operator=() 的某些操作，复制构造函数和/或析构函数，其中*this/left- 和/或右侧参数不尊重这些不变量。例如，避免使用您希望为 NULL 或指向有效对象、引用计数器、资源句柄等指针的值类型。

#include <iostream>
#include <vector>

struct Uninitialised_Resize
{
    explicit Uninitialised_Resize(int n) : n_(n) { }
    explicit Uninitialised_Resize() { }
    int n_;
};

namespace std
{
    template <>
    template <>
    void vector<int>::assign(Uninitialised_Resize ur, Uninitialised_Resize)
    {
        this->_M_impl._M_finish = this->_M_impl._M_start + ur.n_;

        // note: a simpler alternative (doesn't need "n_") is to set...
        //   this->_M_impl._M_finish = this->_M_impl._M_end_of_storage;
        // ...which means size() will become capacity(), which may be more
        // you reserved() (due to rounding; good) or have data for
        // (bad if you have to track in-use elements elsewhere,
        //  which makes the situation equivalent to just reserve()),
        // but if you can somehow use the extra elements then all's good.
    }
}

int main()
{
    {
        // try to get some non-0 values on heap ready for recycling...
        std::vector<int> x(10000);
        for (int i = 0; i < x.size(); ++i)
            x[i] = i;
    }

    std::vector<int> x;
    x.reserve(10000);
    for (int i = 1; i < x.capacity(); ++i)
        if (x[0] != x[i])
        {
            std::cout << "lucky\n";
            break;
        }
    x.assign(Uninitialised_Resize(1000), Uninitialised_Resize());

    for (int i = 1; i < x.size(); ++i)
        if (x[0] != x[i])
        {
            std::cout << "success [0] " << x[0] << " != [" << i << "] "
                << x[i] << '\n';
            break;
        }
}

我的输出：

lucky
success [0] 0 != [1] 1

这表明新的vector 被重新分配了第一个向量在超出范围时释放的堆，并显示这些值没有被分配破坏。当然，如果不仔细检查vector 源，就无法知道其他一些重要的类不变量是否已失效，并且私有成员的确切名称/导入可能随时变化......

【讨论】：

同意；被调用的std::vector<> 构造函数默认初始化xyz 的一个实例，并使用该值作为源复制初始化std::vector<> 中的1024 个元素中的每一个。完全巧合的是，对于 OP，在这种特殊情况下默认初始化 xyz 会导致 xyz::v 成为 0。
这解释了 Mike 看到的奇怪的 1606418432 值。这是一个很大的关闭，优化方面。初始化在代码中无论如何都会被覆盖的内存听起来像是对资源的极大浪费。
如果你要将未初始化的向量传递给其他东西，也许你不想要向量而真的想要一个数组，即new int[1024]?
"你显然想要复制一个原型对象" 这一点都不明显，事实上这不再是 C++0x 中的行为。在 C++0x 中，这个构造函数现在不是复制构造 N 个元素，而是初始化 N 个元素。
@jcayzac：嗯，好消息：如果你有一个符合 C++0x 的 C++ 标准库实现，这正是这个构造函数所做的。 :-)

【解决方案7】：

分配元素的初始化是由分配器模板参数控制的，如果你需要定制，定制它。但是请记住，这很容易在肮脏的黑客领域中结束，因此请谨慎使用。例如，这是一个非常肮脏的解决方案。它将避免初始化，但很可能性能会更差，但为了演示（正如人们所说，这是不可能的！...不可能不在 C++ 程序员的词汇中！）：

template <typename T>
class switch_init_allocator : public std::allocator< T > {
  private:
    bool* should_init;
  public:
    template <typename U>
    struct rebind {
      typedef switch_init_allocator<U> other;
    };

    //provide the required no-throw constructors / destructors:
    switch_init_allocator(bool* aShouldInit = NULL) throw() : std::allocator<T>(), should_init(aShouldInit) { };
    switch_init_allocator(const switch_init_allocator<T>& rhs) throw() : std::allocator<T>(rhs), should_init(rhs.should_init) { };
    template <typename U>
    switch_init_allocator(const switch_init_allocator<U>& rhs, bool* aShouldInit = NULL) throw() : std::allocator<T>(rhs), should_init(aShouldInit) { };
    ~switch_init_allocator() throw() { };

    //import the required typedefs:
    typedef typename std::allocator<T>::value_type value_type;
    typedef typename std::allocator<T>::pointer pointer;
    typedef typename std::allocator<T>::reference reference;
    typedef typename std::allocator<T>::const_pointer const_pointer;
    typedef typename std::allocator<T>::const_reference const_reference;
    typedef typename std::allocator<T>::size_type size_type;
    typedef typename std::allocator<T>::difference_type difference_type;

    //redefine the construct function (hiding the base-class version):
    void construct( pointer p, const_reference cr) {
      if((should_init) && (*should_init))
        new ((void*)p) T ( cr );
      //else, do nothing.
    };
};

template <typename T>
class my_vector : public std::vector<T, switch_init_allocator<T> > {
  public:
    typedef std::vector<T, switch_init_allocator<T> > base_type;
    typedef switch_init_allocator<T> allocator_type;
    typedef std::vector<T, allocator_type > vector_type;
    typedef typename base_type::size_type size_type;
  private:
    bool switch_flag; //the order here is very important!!
    vector_type vec;
  public:  
    my_vector(size_type aCount) : switch_flag(false), vec(aCount, allocator_type(&switch_flag)) { };
    //... and the rest of this wrapper class...
    vector_type& get_vector() { return vec; };
    const vector_type& get_vector() const { return vec; };
    void set_switch(bool value) { switch_flag = value; };
};

class xyz{};

int main(){
  my_vector<xyz> v(1024); //this won't initialize the memory at all.
  v.set_switch(true); //set back to true to turn initialization back on (needed for resizing and such)
}

当然，上面的方法很尴尬，不推荐，当然不会比让内存填满第一个元素的副本更好（特别是因为使用这种标志检查会阻碍每个元素-建造）。但在寻求优化 STL 容器中元素的分配和初始化时，这是一条值得探索的途径，所以我想展示它。关键是，您可以注入代码来阻止 std::vector 容器调用复制构造函数来初始化您的元素的唯一位置是向量分配器对象的构造函数。

此外，您可以取消“开关”并简单地执行“no-init-allocator”，但随后，您还关闭了在调整大小期间复制数据所需的复制构造（这将使矢量类不太有用）。

【讨论】：

我只是 tested the idea 通过修改你的代码来强制执行 should_init == false （所以分配器被命名为 uninitialized_allocator），但它没有工作。它仍然产生相同的程序集。
我已经添加了 asm volatile("@构造发射");在construct()中，并且该行永远不会出现在程序集中，这意味着没有代码会调用该方法。
通过从头开始定义分配器（而不是从 std::allocator 派生），我设法让它工作（使用无初始化版本）。问题在于嵌套在 std::allocator 中的“重新绑定”类模板。有了这个，确实没有初始化发生，只是一个 malloc() 然后一个 free() （我简单的 no-init 分配函数的一部分）。
我在其他方法中添加了 asm cmets，并且分配器的默认构造函数和析构函数是向量进行的仅有的两个调用。至于模板化的复制构造函数，我认为 gnu libstdc++ 使用了一个名为rebind 的模板成员类。对于construct()，我仍在查看它的头文件，其中的东西似乎很复杂......看起来你的覆盖并没有覆盖GNU版本的std::allocator<>中的任何东西。
查看我添加的重新绑定类模板（这是一个正式的要求，我只是忘记了它，它使向量排序恢复到 std::allocator 的内部）。跨度>

【解决方案8】：

作为参考，下面的代码导致了 g++ 中的最佳组装： 我并不是说我会使用它，也不鼓励你这样做。这不是正确的 C++！这是一个非常非常肮脏的 hack！ 我猜它甚至可能取决于 g++ 版本，所以，真的，不要使用它。如果我看到它在某个地方使用，我会呕吐。

#include <vector>

template<typename T>
static T create_uninitialized(size_t size, size_t capacity) {
    T v;
#if defined(__GNUC__)
    // Don't say it. I know -_-;
    // Oddly, _M_impl is public in _Vector_base !?
    typedef typename T::value_type     value_type;
    typedef typename T::allocator_type allocator_type;
    typedef std::_Vector_base<value_type, allocator_type> base_type;
    base_type& xb(reinterpret_cast<base_type&>(v));
    value_type* p(new value_type[capacity]);
#if !defined(__EXCEPTIONS)
    size=p?size:0;         // size=0 if p is null
    capacity=p?capacity:0; // capacity=0 if p is null
#endif
    capacity=std::max(size, capacity); // ensure size<=capacity
    xb._M_impl._M_start = p;
    xb._M_impl._M_finish = p+size;
    xb._M_impl._M_end_of_storage = p+capacity;
#else
    // Fallback, for the other compilers
    capacity=std::max(size, capacity);
    v.reserve(capacity);
    v.resize(size);
#endif
    return v;
}

struct xyz {
    // empty default constructor
    xyz() { }
    xyz(const xyz& o): v(o.v) { }
    xyz& operator=(const xyz& o) { v=o.v; return *this; }
    int v;
    typedef std::vector<xyz> vector;
};

// test functions for assembly dump
extern xyz::vector xyz_create() {
    // Create an uninitialized vector of 12 elements, with
    // a capacity to hold 256 elements.
    return create_uninitialized<xyz::vector>(12,256);
}

extern void xyz_fill(xyz::vector& x) {
    // Assign some values for testing
    for (int i(0); i<x.size(); ++i) x[i].v = i;
}

// test
#include <iostream>
int main() {
    xyz::vector x(xyz_create());
    xyz_fill(x);
    // Dump the vector
    for (int i(0); i<x.size(); ++i) std::cerr << x[i].v << "\n";
    return 0;
}

编辑：意识到_Vector_impl 是公开的，这简化了事情。

编辑：这里是为 xyz_create() 生成的 ARM 程序集，使用 -fno-exceptions 编译（使用 c++filt 解构）并且没有任何内存初始化循环：

xyz_create():
    mov r3, #0
    stmfd   sp!, {r4, lr}
    mov r4, r0
    str r3, [r0, #0]
    str r3, [r0, #4]
    str r3, [r0, #8]
    mov r0, #1024
    bl  operator new[](unsigned long)(PLT)
    cmp r0, #0
    moveq   r3, r0
    movne   r3, #1024
    moveq   r2, r0
    movne   r2, #48
    add r2, r0, r2
    add r3, r0, r3
    stmia   r4, {r0, r2, r3}    @ phole stm
    mov r0, r4
    ldmfd   sp!, {r4, pc}

..这里是 x86_64：

xyz_create():
    pushq   %rbp
    movq    %rsp, %rbp
    pushq   %rbx
    movq    %rdi, %rbx
    subq    $8, %rsp
    movq    $0, (%rdi)
    movq    $0, 8(%rdi)
    movq    $0, 16(%rdi)
    movl    $1024, %edi
    call    operator new[](unsigned long)
    cmpq    $1, %rax
    movq    %rax, (%rbx)
    sbbq    %rdx, %rdx
    notq    %rdx
    andl    $1024, %edx
    cmpq    $1, %rax
    sbbq    %rcx, %rcx
    leaq    (%rax,%rdx), %rdx
    notq    %rcx
    andl    $48, %ecx
    movq    %rdx, 16(%rbx)
    leaq    (%rax,%rcx), %rcx
    movq    %rbx, %rax
    movq    %rcx, 8(%rbx)
    addq    $8, %rsp
    popq    %rbx
    leave
    ret

【讨论】：

好吧，你已经把我弄脏了……废话！修复它的方法多么痛苦……！ ;-}

【解决方案9】：

你无法避免 std::vector 的元素初始化。

出于这个原因，我使用了一个 std::vector 派生类。 resize() 在本例中实现。你也必须实现构造函数。

虽然这不是标准的 C++ 而是编译器实现 :-(

#include <vector>

template<typename _Tp, typename _Alloc = std::allocator<_Tp>>
class uvector : public std::vector<_Tp, _Alloc>
{
    typedef std::vector<_Tp, _Alloc> parent;
    using parent::_M_impl;

public:
    using parent::capacity;
    using parent::reserve;
    using parent::size;
    using typename parent::size_type;

    void resize(size_type sz)
    {
        if (sz <= size())
            parent::resize(sz);
        else
        {
            if (sz > capacity()) reserve(sz);
            _M_impl._M_finish = _M_impl._M_start + sz;
        }
    }
};

【讨论】：