【问题标题】:Base64 decode snippet in C++C++ 中的 Base64 解码片段
【发布时间】:2010-09-15 21:47:18
【问题描述】:

C++中有免费的Base64解码代码sn-p吗?

【问题讨论】:

标签: c++ base64


【解决方案1】:

这是我对the implementation that was originally written by René Nyffenegger 的修改。为什么我要修改它?好吧,因为我似乎不适合使用存储在 std::string 对象中的二进制数据;)

base64.h

#ifndef _BASE64_H_
#define _BASE64_H_

#include <vector>
#include <string>
typedef unsigned char BYTE;

std::string base64_encode(BYTE const* buf, unsigned int bufLen);
std::vector<BYTE> base64_decode(std::string const&);

#endif

base64.cpp

#include "base64.h"
#include <iostream>

static const std::string base64_chars =
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


static inline bool is_base64(BYTE c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
}

std::string base64_encode(BYTE const* buf, unsigned int bufLen) {
  std::string ret;
  int i = 0;
  int j = 0;
  BYTE char_array_3[3];
  BYTE char_array_4[4];

  while (bufLen--) {
    char_array_3[i++] = *(buf++);
    if (i == 3) {
      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
      char_array_4[3] = char_array_3[2] & 0x3f;

      for(i = 0; (i <4) ; i++)
        ret += base64_chars[char_array_4[i]];
      i = 0;
    }
  }

  if (i)
  {
    for(j = i; j < 3; j++)
      char_array_3[j] = '\0';

    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
    char_array_4[3] = char_array_3[2] & 0x3f;

    for (j = 0; (j < i + 1); j++)
      ret += base64_chars[char_array_4[j]];

    while((i++ < 3))
      ret += '=';
  }

  return ret;
}

std::vector<BYTE> base64_decode(std::string const& encoded_string) {
  int in_len = encoded_string.size();
  int i = 0;
  int j = 0;
  int in_ = 0;
  BYTE char_array_4[4], char_array_3[3];
  std::vector<BYTE> ret;

  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
    char_array_4[i++] = encoded_string[in_]; in_++;
    if (i ==4) {
      for (i = 0; i <4; i++)
        char_array_4[i] = base64_chars.find(char_array_4[i]);

      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

      for (i = 0; (i < 3); i++)
          ret.push_back(char_array_3[i]);
      i = 0;
    }
  }

  if (i) {
    for (j = i; j <4; j++)
      char_array_4[j] = 0;

    for (j = 0; j <4; j++)
      char_array_4[j] = base64_chars.find(char_array_4[j]);

    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

    for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
  }

  return ret;
}

用法如下:

std::vector<BYTE> myData;
...
std::string encodedData = base64_encode(&myData[0], myData.size());
std::vector<BYTE> decodedData = base64_decode(encodedData);

【讨论】:

  • 谢谢。对您的代码的使用有任何限制吗? (我的个人案例将是学术 CFD 代码)。
  • 我尝试使用您的方法解码 jpg 文件,方法是将整个矢量写入具有其大小的 CFile,但图像标题已损坏并不令人意外。不过,大小是相等的。有更好的方法来恢复图像文件吗?
  • @masche:这是关于在字节级别上对任何类型的数据进行编码和解码。图像->原始数据(字节)->编码为base64字符串,然后返回base64字符串->解码为原始数据(字节)->构建一些输入流或对象或在其之上的任何内容以将其用作图像再次...
  • 抱歉 - 您的代码运行良好,甚至可以恢复图像。简单地将整个向量压缩到 CFile 中,这太愚蠢了,这是行不通的!如果我迭代向量并将每个字节写入文件,它就可以工作。也许文件流在这里是一个更好的解决方案。
  • 在 std::string 中存储二进制数据有什么问题?
【解决方案2】:

Encoding and decoding base 64 with C++

这是该页面的实现:

/*
   base64.cpp and base64.h

   Copyright (C) 2004-2008 René Nyffenegger

   This source code is provided 'as-is', without any express or implied
   warranty. In no event will the author be held liable for any damages
   arising from the use of this software.

   Permission is granted to anyone to use this software for any purpose,
   including commercial applications, and to alter it and redistribute it
   freely, subject to the following restrictions:

   1. The origin of this source code must not be misrepresented; you must not
      claim that you wrote the original source code. If you use this source code
      in a product, an acknowledgment in the product documentation would be
      appreciated but is not required.

   2. Altered source versions must be plainly marked as such, and must not be
      misrepresented as being the original source code.

   3. This notice may not be removed or altered from any source distribution.

   René Nyffenegger rene.nyffenegger@adp-gmbh.ch

*/

static const std::string base64_chars =
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


static inline bool is_base64(unsigned char c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
}

std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
  std::string ret;
  int i = 0;
  int j = 0;
  unsigned char char_array_3[3];
  unsigned char char_array_4[4];

  while (in_len--) {
    char_array_3[i++] = *(bytes_to_encode++);
    if (i == 3) {
      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
      char_array_4[3] = char_array_3[2] & 0x3f;

      for(i = 0; (i <4) ; i++)
        ret += base64_chars[char_array_4[i]];
      i = 0;
    }
  }

  if (i)
  {
    for(j = i; j < 3; j++)
      char_array_3[j] = '\0';

    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
    char_array_4[3] = char_array_3[2] & 0x3f;

    for (j = 0; (j < i + 1); j++)
      ret += base64_chars[char_array_4[j]];

    while((i++ < 3))
      ret += '=';

  }

  return ret;

}
std::string base64_decode(std::string const& encoded_string) {
  int in_len = encoded_string.size();
  int i = 0;
  int j = 0;
  int in_ = 0;
  unsigned char char_array_4[4], char_array_3[3];
  std::string ret;

  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
    char_array_4[i++] = encoded_string[in_]; in_++;
    if (i ==4) {
      for (i = 0; i <4; i++)
        char_array_4[i] = base64_chars.find(char_array_4[i]);

      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

      for (i = 0; (i < 3); i++)
        ret += char_array_3[i];
      i = 0;
    }
  }

  if (i) {
    for (j = i; j <4; j++)
      char_array_4[j] = 0;

    for (j = 0; j <4; j++)
      char_array_4[j] = base64_chars.find(char_array_4[j]);

    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

    for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
  }

  return ret;
}

【讨论】:

  • 我们应该避免所有这些不必要的字符串连接——既然我们知道in_len,我们知道ret的长度,为什么不在初始化时给它一个固定的长度呢?
  • 在解码函数中,if (i)之后,第一个for循环不是必须的。如果 char_array_4 中的字节用 0 填充,find() 返回 -1,但根本没有使用。因此,第二个 for 循环可以是:for (j = 0; j &lt; i; j++)。另外,char_array_3[2] 的第三行完全没用,可以去掉。为什么?因为第二个块只处理其余部分(当 i 不为零时),而这只能是原始文本的一两个字节(如果有三个,那么就没有其余部分了,因为三个字节可以平滑地编码成4 个字符。)
  • Google 并不总是您的朋友。这个实现几乎是你可能选择的最糟糕的一个。看到这个:stackoverflow.com/questions/342409/…
  • 这个代码有问题。将解码后的值存储在字符串中是错误的!想象一下你有一个字节数组被编码,一些值是'\0',编码部分绝对没问题。但是当您将其反转时,该值将被截断为 '\0'。请参考@LihO 的回答。
【解决方案3】:

这里有几个sn-ps。然而,这个是紧凑、高效且对 C++11 友好的:

static std::string base64_encode(const std::string &in) {

    std::string out;

    int val = 0, valb = -6;
    for (uchar c : in) {
        val = (val << 8) + c;
        valb += 8;
        while (valb >= 0) {
            out.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(val>>valb)&0x3F]);
            valb -= 6;
        }
    }
    if (valb>-6) out.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[((val<<8)>>(valb+8))&0x3F]);
    while (out.size()%4) out.push_back('=');
    return out;
}

static std::string base64_decode(const std::string &in) {

    std::string out;

    std::vector<int> T(256,-1);
    for (int i=0; i<64; i++) T["ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i;

    int val=0, valb=-8;
    for (uchar c : in) {
        if (T[c] == -1) break;
        val = (val << 6) + T[c];
        valb += 6;
        if (valb >= 0) {
            out.push_back(char((val>>valb)&0xFF));
            valb -= 8;
        }
    }
    return out;
}

【讨论】:

  • 我应该收回它...我不应该在调试模式下进行性能测试,抱歉。它至少比公认的解决方案快。
  • 将“int val”移出其范围是 UB。 "unsigned val=0; int valb=..." 是正确的。
  • typedef unsigned char uchar;
【解决方案4】:

我认为这个效果更好:

#include <string>

static const char* B64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static const int B64index[256] =
{
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  62, 63, 62, 62, 63,
    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0,  0,  0,  0,  0,  0,
    0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0,  0,  0,  0,  63,
    0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
};

const std::string b64encode(const void* data, const size_t &len)
{
    std::string result((len + 2) / 3 * 4, '=');
    unsigned char *p = (unsigned  char*) data;
    char *str = &result[0];
    size_t j = 0, pad = len % 3;
    const size_t last = len - pad;

    for (size_t i = 0; i < last; i += 3)
    {
        int n = int(p[i]) << 16 | int(p[i + 1]) << 8 | p[i + 2];
        str[j++] = B64chars[n >> 18];
        str[j++] = B64chars[n >> 12 & 0x3F];
        str[j++] = B64chars[n >> 6 & 0x3F];
        str[j++] = B64chars[n & 0x3F];
    }
    if (pad)  /// Set padding
    {
        int n = --pad ? int(p[last]) << 8 | p[last + 1] : p[last];
        str[j++] = B64chars[pad ? n >> 10 & 0x3F : n >> 2];
        str[j++] = B64chars[pad ? n >> 4 & 0x03F : n << 4 & 0x3F];
        str[j++] = pad ? B64chars[n << 2 & 0x3F] : '=';
    }
    return result;
}

const std::string b64decode(const void* data, const size_t &len)
{
    if (len == 0) return "";

    unsigned char *p = (unsigned char*) data;
    size_t j = 0,
        pad1 = len % 4 || p[len - 1] == '=',
        pad2 = pad1 && (len % 4 > 2 || p[len - 2] != '=');
    const size_t last = (len - pad1) / 4 << 2;
    std::string result(last / 4 * 3 + pad1 + pad2, '\0');
    unsigned char *str = (unsigned char*) &result[0];

    for (size_t i = 0; i < last; i += 4)
    {
        int n = B64index[p[i]] << 18 | B64index[p[i + 1]] << 12 | B64index[p[i + 2]] << 6 | B64index[p[i + 3]];
        str[j++] = n >> 16;
        str[j++] = n >> 8 & 0xFF;
        str[j++] = n & 0xFF;
    }
    if (pad1)
    {
        int n = B64index[p[last]] << 18 | B64index[p[last + 1]] << 12;
        str[j++] = n >> 16;
        if (pad2)
        {
            n |= B64index[p[last + 2]] << 6;
            str[j++] = n >> 8 & 0xFF;
        }
    }
    return result;
}

std::string b64encode(const std::string& str)
{
    return b64encode(str.c_str(), str.size());
}

std::string b64decode(const std::string& str64)
{
    return b64decode(str64.c_str(), str64.size());
}

感谢Jens Alfke for pointing out a performance issue,我对这篇帖子做了一些修改。这个工作方式比以前更快。它的另一个优点是可以顺利处理损坏的数据。

上一版:虽然在这类问题中,速度似乎有点过头了,但只是为了好玩,我做了一些其他修改,以使这个算法成为目前最快的算法据我所知。特别感谢 GaspardP for his valuable suggestions 和不错的基准测试。

【讨论】:

  • 所有对strchr 的调用都会减慢解码器的速度——每个解码的字节平均循环32 次。大多数解决方案使用 256 项查找表来避免这种情况,这样会快很多。
  • 我在stackoverflow.com/questions/342409/… 上发表了评论——虽然这段代码不是编码最快的,但它是解码最快的(与其他 16 个实现相比)。
  • 在解码器中,您可以通过使用char* 而不是std::string(只需执行char* out = &amp;str[0],然后使用out[j++] 而不是 str[j++]) 通过这样做,您将跳过 std::string::operator[] 所做的不必要检查。另外,避免最后一个push_back,通过分配一个多字节(std::string str;str.resize(3*((len+3)/4));,然后在最后使用out[j++]everywere 和str.resize(j);,这可能会导致非常昂贵
  • 您在上次编辑中添加了内存泄漏,更不用说缓冲区副本了。不要在没有delete 的情况下使用new。实际上,根本不要使用new。 @Gaspard,我不知道有任何“std::string::operator[] 进行了不必要的检查”(事实上,我有理由确定没有,至少在发布时),但如果真的绝望,你可以使用 vector&lt;char&gt; -无论如何,您的替换代码不是 Gaspard 建议的,除了元素访问之外,它仍然使用字符串进行所有操作,并且无论我认为这是不必要的事实,它都是安全/快速的;)
  • @polfosolఠ_ఠ,谢谢。请考虑在编码器中使用unsigned char *p,而不是char *p。如果我的输入包含字节 &gt;= 0x80,我的 base64 字符串已损坏。添加unsigned后,似乎一切正常。
【解决方案5】:

使用base-n mini lib,你可以做到以下几点:

some_data_t in[] { ... };
constexpr int len = sizeof(in)/sizeof(in[0]);

std::string encoded;
bn::encode_b64(in, in + len, std::back_inserter(encoded));

some_data_t out[len];
bn::decode_b64(encoded.begin(), encoded.end(), out);

API 是通用的,基于迭代器的。

披露:我是作者。

【讨论】:

  • std::size 会比sizeof hax 更好
  • 确实,从 c++17 开始,这显然是正确的等待。
  • 从一开始就是正确的做法。只是在 C++17 之前你必须自己实现它(但它实际上是单行的)。 sizeof hack 在 C++ 中从不没问题。
【解决方案6】:

根据 GaspardP 提出的this excellent comparison,我不会选择这个解决方案。这不是最差的,但也不是最好的。唯一的原因是它可能更容易理解。

我发现其他两个答案很难理解。它们还会在我的编译器中产生一些警告,并且在解码部分使用 find 函数会导致效率非常低。所以我决定自己动手。

标题:

#ifndef _BASE64_H_
#define _BASE64_H_

#include <vector>
#include <string>
typedef unsigned char BYTE;

class Base64
{
public:
    static std::string encode(const std::vector<BYTE>& buf);
    static std::string encode(const BYTE* buf, unsigned int bufLen);
    static std::vector<BYTE> decode(std::string encoded_string);
};

#endif

主体:

static const BYTE from_base64[] = {    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                                    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                                    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255,  62, 255,  63,
                                     52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 255, 255, 255,
                                    255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
                                     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255,  63,
                                    255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
                                     41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255};

static const char to_base64[] =
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


std::string Base64::encode(const std::vector<BYTE>& buf)
{
    if (buf.empty())
        return ""; // Avoid dereferencing buf if it's empty
    return encode(&buf[0], (unsigned int)buf.size());
}

std::string Base64::encode(const BYTE* buf, unsigned int bufLen)
{
    // Calculate how many bytes that needs to be added to get a multiple of 3
    size_t missing = 0;
    size_t ret_size = bufLen;
    while ((ret_size % 3) != 0)
    {
        ++ret_size;
        ++missing;
    }

    // Expand the return string size to a multiple of 4
    ret_size = 4*ret_size/3;

    std::string ret;
    ret.reserve(ret_size);

    for (unsigned int i=0; i<ret_size/4; ++i)
    {
        // Read a group of three bytes (avoid buffer overrun by replacing with 0)
        size_t index = i*3;
        BYTE b3[3];
        b3[0] = (index+0 < bufLen) ? buf[index+0] : 0;
        b3[1] = (index+1 < bufLen) ? buf[index+1] : 0;
        b3[2] = (index+2 < bufLen) ? buf[index+2] : 0;

        // Transform into four base 64 characters
        BYTE b4[4];
        b4[0] =                            ((b3[0] & 0xfc) >> 2);
        b4[1] = ((b3[0] & 0x03) << 4) +    ((b3[1] & 0xf0) >> 4);
        b4[2] = ((b3[1] & 0x0f) << 2) +    ((b3[2] & 0xc0) >> 6);
        b4[3] = ((b3[2] & 0x3f) << 0);

        // Add the base 64 characters to the return value
        ret.push_back(to_base64[b4[0]]);
        ret.push_back(to_base64[b4[1]]);
        ret.push_back(to_base64[b4[2]]);
        ret.push_back(to_base64[b4[3]]);
    }

    // Replace data that is invalid (always as many as there are missing bytes)
    for (size_t i=0; i<missing; ++i)
        ret[ret_size - i - 1] = '=';

    return ret;
}

std::vector<BYTE> Base64::decode(std::string encoded_string)
{
    // Make sure string length is a multiple of 4
    while ((encoded_string.size() % 4) != 0)
        encoded_string.push_back('=');

    size_t encoded_size = encoded_string.size();
    std::vector<BYTE> ret;
    ret.reserve(3*encoded_size/4);

    for (size_t i=0; i<encoded_size; i += 4)
    {
        // Get values for each group of four base 64 characters
        BYTE b4[4];
        b4[0] = (encoded_string[i+0] <= 'z') ? from_base64[encoded_string[i+0]] : 0xff;
        b4[1] = (encoded_string[i+1] <= 'z') ? from_base64[encoded_string[i+1]] : 0xff;
        b4[2] = (encoded_string[i+2] <= 'z') ? from_base64[encoded_string[i+2]] : 0xff;
        b4[3] = (encoded_string[i+3] <= 'z') ? from_base64[encoded_string[i+3]] : 0xff;

        // Transform into a group of three bytes
        BYTE b3[3];
        b3[0] = ((b4[0] & 0x3f) << 2) + ((b4[1] & 0x30) >> 4);
        b3[1] = ((b4[1] & 0x0f) << 4) + ((b4[2] & 0x3c) >> 2);
        b3[2] = ((b4[2] & 0x03) << 6) + ((b4[3] & 0x3f) >> 0);

        // Add the byte to the return value if it isn't part of an '=' character (indicated by 0xff)
        if (b4[1] != 0xff) ret.push_back(b3[0]);
        if (b4[2] != 0xff) ret.push_back(b3[1]);
        if (b4[3] != 0xff) ret.push_back(b3[2]);
    }

    return ret;
}

用法:

BYTE buf[] = "ABCD";
std::string encoded = Base64::encode(buf, 4);
// encoded = "QUJDRA=="
std::vector<BYTE> decoded = Base64::decode(encoded);

这里的好处是解码函数还可以解码 Base64 编码的 URL 变体。

【讨论】:

  • 没有找到()的奖励积分,以及输出的储备()。 1 点关闭,因为您将输入作为副本(因此如果需要,您可以在末尾添加一个 =)。如果它是一个不复制的东西会很好。
  • longer:我最喜欢这个答案......也非常适合行数。没有 find() 的奖励积分和输出的 Reserve()。我要改进的事情(这会增加代码的容量):您将输入作为副本(因此如果需要,您可以在末尾添加一个 = )。如果它是一个无副本的东西会很好。也可以写成普通函数——不需要类。并且应该在取消引用 buf[0] 之前检查空 buf 向量。并添加接口以将数据写入引用(因此调用者可以重用内存)。
  • 感谢您的反馈,我按值取字符串的原因是,如果我总能保证它具有有效长度,它会简化代码。我认为在大多数情况下,RVO 无论如何都应该阻止字符串复制,所以这应该不是问题。至于取消引用 buf[0] - 很好,我会解决这个问题 :)
  • 我完成了我自己的修改版本,我会在这里发布作为答案,以供您高兴...我通过添加测试消除了对字符串作为副本的需求(您已经进行了测试,所以它并没有真正增加任何性能问题)。
  • 我可能错了,但是 from_base64[] 中索引 60 和 61 的值不应该交换吗?我猜这个想法是通过返回 255 来忽略“
【解决方案7】:

使用更紧凑的查找表和使用 C++17 功能的一些变化:

std::string base64_decode(const std::string_view in) {
  // table from '+' to 'z'
  const uint8_t lookup[] = {
      62,  255, 62,  255, 63,  52,  53, 54, 55, 56, 57, 58, 59, 60, 61, 255,
      255, 0,   255, 255, 255, 255, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
      10,  11,  12,  13,  14,  15,  16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
      255, 255, 255, 255, 63,  255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
      36,  37,  38,  39,  40,  41,  42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
  static_assert(sizeof(lookup) == 'z' - '+' + 1);

  std::string out;
  int val = 0, valb = -8;
  for (uint8_t c : in) {
    if (c < '+' || c > 'z')
      break;
    c -= '+';
    if (lookup[c] >= 64)
      break;
    val = (val << 6) + lookup[c];
    valb += 6;
    if (valb >= 0) {
      out.push_back(char((val >> valb) & 0xFF));
      valb -= 8;
    }
  }
  return out;
}

如果您没有 std::string_view,请尝试使用 std::experimental::string_view。

【讨论】:

    【解决方案8】:

    我对@9​​87654321@的变化:

    它避免了以多次测试为代价来复制参数。

    使用 uint8_t 而不是 BYTE。

    为处理字符串添加了一些方便的函数,尽管通常输入数据是二进制的并且内部可能有零字节,因此通常不应将其作为字符串进行操作(这通常意味着以空值结尾的数据)。

    还添加了一些强制转换来修复编译器警告(至少在 GCC 上,我还没有通过 MSVC 运行它)。

    部分文件base64.hpp

    void base64_encode(string & out, const vector<uint8_t>& buf);
    void base64_encode(string & out, const uint8_t* buf, size_t bufLen);
    void base64_encode(string & out, string const& buf);
    
    void base64_decode(vector<uint8_t> & out, string const& encoded_string);
    
    // Use this if you know the output should be a valid string
    void base64_decode(string & out, string const& encoded_string);
    

    文件base64.cpp

    static const uint8_t from_base64[128] = {
        // 8 rows of 16 = 128
        // Note: only requires 123 entries, as we only lookup for <= z , which z=122
    
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255,  62, 255,  63,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255,   0, 255, 255, 255,
        255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
         15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255,  63,
        255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
         41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255
    };
    
    static const char to_base64[65] =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        "abcdefghijklmnopqrstuvwxyz"
        "0123456789+/";
    
    
    void base64_encode(string & out, string const& buf)
    {
       if (buf.empty())
          base64_encode(out, NULL, 0);
       else
          base64_encode(out, reinterpret_cast<uint8_t const*>(&buf[0]), buf.size());
    }
    
    
    void base64_encode(string & out, std::vector<uint8_t> const& buf)
    {
       if (buf.empty())
          base64_encode(out, NULL, 0);
       else
          base64_encode(out, &buf[0], buf.size());
    }
    
    void base64_encode(string & ret, uint8_t const* buf, size_t bufLen)
    {
       // Calculate how many bytes that needs to be added to get a multiple of 3
       size_t missing = 0;
       size_t ret_size = bufLen;
       while ((ret_size % 3) != 0)
       {
          ++ret_size;
          ++missing;
       }
    
       // Expand the return string size to a multiple of 4
       ret_size = 4*ret_size/3;
    
       ret.clear();
       ret.reserve(ret_size);
    
       for (size_t i = 0; i < ret_size/4; ++i)
       {
          // Read a group of three bytes (avoid buffer overrun by replacing with 0)
          const size_t index = i*3;
          const uint8_t b3_0 = (index+0 < bufLen) ? buf[index+0] : 0;
          const uint8_t b3_1 = (index+1 < bufLen) ? buf[index+1] : 0;
          const uint8_t b3_2 = (index+2 < bufLen) ? buf[index+2] : 0;
    
          // Transform into four base 64 characters
          const uint8_t b4_0 =                        ((b3_0 & 0xfc) >> 2);
          const uint8_t b4_1 = ((b3_0 & 0x03) << 4) + ((b3_1 & 0xf0) >> 4);
          const uint8_t b4_2 = ((b3_1 & 0x0f) << 2) + ((b3_2 & 0xc0) >> 6);
          const uint8_t b4_3 = ((b3_2 & 0x3f) << 0);
    
          // Add the base 64 characters to the return value
          ret.push_back(to_base64[b4_0]);
          ret.push_back(to_base64[b4_1]);
          ret.push_back(to_base64[b4_2]);
          ret.push_back(to_base64[b4_3]);
       }
    
       // Replace data that is invalid (always as many as there are missing bytes)
       for (size_t i = 0; i != missing; ++i)
          ret[ret_size - i - 1] = '=';
    }
    
    
    template <class Out>
    void base64_decode_any( Out & ret, std::string const& in)
    {
       typedef typename Out::value_type T;
    
       // Make sure the *intended* string length is a multiple of 4
       size_t encoded_size = in.size();
    
       while ((encoded_size % 4) != 0)
          ++encoded_size;
    
       const size_t N = in.size();
       ret.clear();
       ret.reserve(3*encoded_size/4);
    
       for (size_t i = 0; i < encoded_size; i += 4)
       {
          // Note: 'z' == 122
    
          // Get values for each group of four base 64 characters
          const uint8_t b4_0 = (            in[i+0] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+0])] : 0xff;
          const uint8_t b4_1 = (i+1 < N and in[i+1] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+1])] : 0xff;
          const uint8_t b4_2 = (i+2 < N and in[i+2] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+2])] : 0xff;
          const uint8_t b4_3 = (i+3 < N and in[i+3] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+3])] : 0xff;
    
          // Transform into a group of three bytes
          const uint8_t b3_0 = ((b4_0 & 0x3f) << 2) + ((b4_1 & 0x30) >> 4);
          const uint8_t b3_1 = ((b4_1 & 0x0f) << 4) + ((b4_2 & 0x3c) >> 2);
          const uint8_t b3_2 = ((b4_2 & 0x03) << 6) + ((b4_3 & 0x3f) >> 0);
    
          // Add the byte to the return value if it isn't part of an '=' character (indicated by 0xff)
          if (b4_1 != 0xff) ret.push_back( static_cast<T>(b3_0) );
          if (b4_2 != 0xff) ret.push_back( static_cast<T>(b3_1) );
          if (b4_3 != 0xff) ret.push_back( static_cast<T>(b3_2) );
       }
    }
    
    void base64_decode(vector<uint8_t> & out, string const& encoded_string)
    {
       base64_decode_any(out, encoded_string);
    }
    
    void base64_decode(string & out, string const& encoded_string)
    {
       base64_decode_any(out, encoded_string);
    }
    

    【讨论】:

    • 我不知道 && 和 ||在 C++ 中有一个定义的评估顺序,所以我今天学到了一些新东西。在这种情况下,您想检查索引的条件,但同时确保索引没有超出范围,那么它非常有用。
    • 是的,我一直使用这种技术。
    【解决方案9】:

    我的版本是用于 C++Builder 的 Base64 的简单快速编码器(解码器)。

    // ---------------------------------------------------------------------------
    UnicodeString __fastcall TExample::Base64Encode(void *data, int length)
    {
        if (length <= 0)
            return L"";
        static const char set[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
        unsigned char *in = (unsigned char*)data;
        char *pos, *out = pos = new char[((length - 1) / 3 + 1) << 2];
        while ((length -= 3) >= 0)
        {
            pos[0] = set[in[0] >> 2];
            pos[1] = set[((in[0] & 0x03) << 4) | (in[1] >> 4)];
            pos[2] = set[((in[1] & 0x0F) << 2) | (in[2] >> 6)];
            pos[3] = set[in[2] & 0x3F];
            pos += 4;
            in += 3;
        };
        if ((length & 2) != 0)
        {
            pos[0] = set[in[0] >> 2];
            if ((length & 1) != 0)
            {
                pos[1] = set[((in[0] & 0x03) << 4) | (in[1] >> 4)];
                pos[2] = set[(in[1] & 0x0F) << 2];
            }
            else
            {
                pos[1] = set[(in[0] & 0x03) << 4];
                pos[2] = '=';
            };
            pos[3] = '=';
            pos += 4;
        };
        UnicodeString code = UnicodeString(out, pos - out);
        delete[] out;
        return code;
    };
    
    // ---------------------------------------------------------------------------
    int __fastcall TExample::Base64Decode(const UnicodeString &code, unsigned char **data)
    {
        int length;
        if (((length = code.Length()) == 0) || ((length & 3) != 0))
            return 0;
        wchar_t *str = code.c_str();
        unsigned char *pos, *out = pos = new unsigned char[(length >> 2) * 3];
        while (*str != 0)
        {
            length = -1;
            int shift = 18, bits = 0;
            do
            {
                wchar_t s = str[++length];
                if ((s >= L'A') && (s <= L'Z'))
                    bits |= (s - L'A') << shift;
                else if ((s >= L'a') && (s <= L'z'))
                       bits |= (s - (L'a' - 26)) << shift;
                else if (((s >= L'0') && (s <= L'9')))
                       bits |= (s - (L'0' - 52)) << shift;
                else if (s == L'+')
                       bits |= 62 << shift;
                else if (s == L'/')
                       bits |= 63 << shift;
                else if (s == L'=')
                {
                    length--;
                    break;
                }
                else
                {
                    delete[] out;
                    return 0;
                };
            }
            while ((shift -= 6) >= 0);
            pos[0] = bits >> 16;
            pos[1] = bits >> 8;
            pos[2] = bits;
            pos += length;
            str += 4;
        };
        *data = out;
        return pos - out;
    };
    //---------------------------------------------------------------------------
    

    【讨论】:

      【解决方案10】:

      我用这个:

      class BinaryVector {
      public:
          std::vector<char> bytes;
      
          uint64_t bit_count = 0;
      
      public:
          /* Add a bit to the end */
          void push_back(bool bit);
      
          /* Return false if character is unrecognized */
          bool pushBase64Char(char b64_c);
      };
      
      void BinaryVector::push_back(bool bit)
      {
          if (!bit_count || bit_count % 8 == 0) {
              bytes.push_back(bit << 7);
          }
          else {
              uint8_t next_bit = 8 - (bit_count % 8) - 1;
              bytes[bit_count / 8] |= bit << next_bit;
          }
          bit_count++;
      }
      
      /* Converts one Base64 character to 6 bits */
      bool BinaryVector::pushBase64Char(char c)
      {
          uint8_t d;
      
          // A to Z
          if (c > 0x40 && c < 0x5b) {
              d = c - 65;  // Base64 A is 0
          }
          // a to z
          else if (c > 0x60 && c < 0x7b) {
              d = c - 97 + 26;  // Base64 a is 26
          }
          // 0 to 9
          else if (c > 0x2F && c < 0x3a) {
              d = c - 48 + 52;  // Base64 0 is 52
          }
          else if (c == '+') {
              d = 0b111110;
          }
          else if (c == '/') {
              d = 0b111111;
          }
          else if (c == '=') {
              d = 0;
          }
          else {
              return false;
          }
      
          push_back(d & 0b100000);
          push_back(d & 0b010000);
          push_back(d & 0b001000);
          push_back(d & 0b000100);
          push_back(d & 0b000010);
          push_back(d & 0b000001);
      
          return true;
      }
      
      bool loadBase64(std::vector<char>& b64_bin, BinaryVector& vec)
      {
          for (char& c : b64_bin) {
              if (!vec.pushBase64Char(c)) {
                  return false;
              }
          }
          return true;
      }
      
      

      使用vec.bytes 访问转换后的数据。

      【讨论】:

        【解决方案11】:

        我先做了自己的版本,然后找到了这个话题。

        为什么我的版本看起来比这里展示的其他版本更简单?难道我做错了什么?我没有测试它的速度。

        inline char const* b64units = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
        
        inline char* b64encode(void const* a, int64_t b) {
            ASSERT(a != nullptr);
            if (b > 0) {
                uint8_t const* aa = static_cast<uint8_t const*>(a);
                uint8_t v = 0;
                int64_t bp = 0;
                int64_t sb = 0;
                int8_t off = 0;
                int64_t nt = ((b + 2) / 3) * 4;
                int64_t nd = (b * 8) / 6;
                int64_t tl = ((b * 8) % 6) ? 1 : 0;
                int64_t nf = nt - nd - tl;
                int64_t ri = 0;
                char* r = new char[nt + 1]();
                for (int64_t i = 0; i < nd; i++) {
                    v = (aa[sb] << off) | (aa[sb + 1] >> (8 - off));
                    v >>= 2;
                    r[ri] = b64units[v];
                    ri += 1;
                    bp += 6;
                    sb = (bp / 8);
                    off = (bp % 8);
                }
                if (tl > 0) {
                    v = (aa[sb] << off);
                    v >>= 2;
                    r[ri] = b64units[v];
                    ri += 1;
                }
                for (int64_t i = 0; i < nf; i++) {
                    r[ri] = '=';
                    ri += 1;
                }
                return r;
            } else return nullptr;
        }
        

        P.S.:我的方法效果很好。我用Node.js测试了它:

        let data = 'stackabuse.com';
        let buff = new Buffer(data);
        let base64data = buff.toString('base64');
        

        【讨论】:

          【解决方案12】:

          我喜欢this solution on GitHub

          这是一个单独的 hpp 文件,它使用 vector 类型的原始数据,这与公认的答案不同。

          #pragma once
          
          #include <string>
          #include <vector>
          #include <stdexcept>
          #include <cstdint>
          
          namespace base64
          {
              inline static const char kEncodeLookup[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
              inline static const char kPadCharacter = '=';
          
              using byte = std::uint8_t;
          
              inline std::string encode(const std::vector<byte>& input)
              {
                  std::string encoded;
                  encoded.reserve(((input.size() / 3) + (input.size() % 3 > 0)) * 4);
          
                  std::uint32_t temp{};
                  auto it = input.begin();
          
                  for(std::size_t i = 0; i < input.size() / 3; ++i)
                  {
                      temp  = (*it++) << 16;
                      temp += (*it++) << 8;
                      temp += (*it++);
                      encoded.append(1, kEncodeLookup[(temp & 0x00FC0000) >> 18]);
                      encoded.append(1, kEncodeLookup[(temp & 0x0003F000) >> 12]);
                      encoded.append(1, kEncodeLookup[(temp & 0x00000FC0) >> 6 ]);
                      encoded.append(1, kEncodeLookup[(temp & 0x0000003F)      ]);
                  }
          
                  switch(input.size() % 3)
                  {
                  case 1:
                      temp = (*it++) << 16;
                      encoded.append(1, kEncodeLookup[(temp & 0x00FC0000) >> 18]);
                      encoded.append(1, kEncodeLookup[(temp & 0x0003F000) >> 12]);
                      encoded.append(2, kPadCharacter);
                      break;
                  case 2:
                      temp  = (*it++) << 16;
                      temp += (*it++) << 8;
                      encoded.append(1, kEncodeLookup[(temp & 0x00FC0000) >> 18]);
                      encoded.append(1, kEncodeLookup[(temp & 0x0003F000) >> 12]);
                      encoded.append(1, kEncodeLookup[(temp & 0x00000FC0) >> 6 ]);
                      encoded.append(1, kPadCharacter);
                      break;
                  }
          
                  return encoded;
              }
          
              std::vector<byte> decode(const std::string& input)
              {
                  if(input.length() % 4)
                      throw std::runtime_error("Invalid base64 length!");
          
                  std::size_t padding{};
          
                  if(input.length())
                  {
                      if(input[input.length() - 1] == kPadCharacter) padding++;
                      if(input[input.length() - 2] == kPadCharacter) padding++;
                  }
          
                  std::vector<byte> decoded;
                  decoded.reserve(((input.length() / 4) * 3) - padding);
          
                  std::uint32_t temp{};
                  auto it = input.begin();
          
                  while(it < input.end())
                  {
                      for(std::size_t i = 0; i < 4; ++i)
                      {
                          temp <<= 6;
                          if     (*it >= 0x41 && *it <= 0x5A) temp |= *it - 0x41;
                          else if(*it >= 0x61 && *it <= 0x7A) temp |= *it - 0x47;
                          else if(*it >= 0x30 && *it <= 0x39) temp |= *it + 0x04;
                          else if(*it == 0x2B)                temp |= 0x3E;
                          else if(*it == 0x2F)                temp |= 0x3F;
                          else if(*it == kPadCharacter)
                          {
                              switch(input.end() - it)
                              {
                              case 1:
                                  decoded.push_back((temp >> 16) & 0x000000FF);
                                  decoded.push_back((temp >> 8 ) & 0x000000FF);
                                  return decoded;
                              case 2:
                                  decoded.push_back((temp >> 10) & 0x000000FF);
                                  return decoded;
                              default:
                                  throw std::runtime_error("Invalid padding in base64!");
                              }
                          }
                          else throw std::runtime_error("Invalid character in base64!");
          
                          ++it;
                      }
          
                      decoded.push_back((temp >> 16) & 0x000000FF);
                      decoded.push_back((temp >> 8 ) & 0x000000FF);
                      decoded.push_back((temp      ) & 0x000000FF);
                  }
          
                  return decoded;
              }
          }
          

          【讨论】:

            【解决方案13】:

            这是我写的一篇文章,它使用联合和位域来获得最大的效率和可读性。

            const char PADDING_CHAR = '=';
            const char* ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
            const uint8_t DECODED_ALPHBET[128]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,0,0,0,0,0,0,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,0,0,0,0,0};
            
            /**
             * Given a string, this function will encode it in 64b (with padding)
             */
            std::string encodeBase64(const std::string& binaryText)
            {
                std::string encoded((binaryText.size()/3 + (binaryText.size()%3 > 0)) << 2, PADDING_CHAR);
            
                const char* bytes = binaryText.data();
                union
                {
                    uint32_t temp = 0;
                    struct
                    {
                        uint32_t first : 6, second : 6, third : 6, fourth : 6;
                    } tempBytes;
                };
                std::string::iterator currEncoding = encoded.begin();
            
                for(uint32_t i = 0, lim = binaryText.size() / 3; i < lim; ++i, bytes+=3)
                {
                    temp = bytes[0] << 16 | bytes[1] << 8 | bytes[2];
                    (*currEncoding++) = ALPHABET[tempBytes.fourth];
                    (*currEncoding++) = ALPHABET[tempBytes.third];
                    (*currEncoding++) = ALPHABET[tempBytes.second];
                    (*currEncoding++) = ALPHABET[tempBytes.first];
                }
            
                switch(binaryText.size() % 3)
                {
                case 1:
                    temp = bytes[0] << 16;
                    (*currEncoding++) = ALPHABET[tempBytes.fourth];
                    (*currEncoding++) = ALPHABET[tempBytes.third];
                    break;
                case 2:
                    temp = bytes[0] << 16 | bytes[1] << 8;
                    (*currEncoding++) = ALPHABET[tempBytes.fourth];
                    (*currEncoding++) = ALPHABET[tempBytes.third];
                    (*currEncoding++) = ALPHABET[tempBytes.second];
                    break;
                }
            
                return encoded;
            }
            
            /**
             * Given a 64b padding-encoded string, this function will decode it.
             */
            std::string decodeBase64(const std::string& base64Text)
            {
                if( base64Text.empty() )
                    return "";
            
                assert((base64Text.size()&3) == 0 && "The base64 text to be decoded must have a length devisible by 4!");
            
                uint32_t numPadding =  (*std::prev(base64Text.end(),1) == PADDING_CHAR) + (*std::prev(base64Text.end(),2) == PADDING_CHAR);
            
                std::string decoded((base64Text.size()*3>>2) - numPadding, '.');
            
                union
                {
                    uint32_t temp;
                    char tempBytes[4];
                };
                const uint8_t* bytes = reinterpret_cast<const uint8_t*>(base64Text.data());
            
                std::string::iterator currDecoding = decoded.begin();
            
                for(uint32_t i = 0, lim = (base64Text.size() >> 2) - (numPadding!=0); i < lim; ++i, bytes+=4)
                {
                    temp = DECODED_ALPHBET[bytes[0]] << 18 | DECODED_ALPHBET[bytes[1]] << 12 | DECODED_ALPHBET[bytes[2]] << 6 | DECODED_ALPHBET[bytes[3]];
                    (*currDecoding++) = tempBytes[2];
                    (*currDecoding++) = tempBytes[1];
                    (*currDecoding++) = tempBytes[0];
                }
            
                switch (numPadding)
                {
                case 2:
                    temp = DECODED_ALPHBET[bytes[0]] << 18 | DECODED_ALPHBET[bytes[1]] << 12;
                    (*currDecoding++) = tempBytes[2];
                    break;
                
                case 1:
                    temp = DECODED_ALPHBET[bytes[0]] << 18 | DECODED_ALPHBET[bytes[1]] << 12 | DECODED_ALPHBET[bytes[2]] << 6;
                    (*currDecoding++) = tempBytes[2];
                    (*currDecoding++) = tempBytes[1];
                    break;
                }
            
                return decoded;
            }
            

            【讨论】:

              猜你喜欢
              • 1970-01-01
              • 1970-01-01
              • 2011-11-14
              • 1970-01-01
              • 2013-04-06
              • 2020-04-22
              • 1970-01-01
              • 1970-01-01
              • 2012-10-24
              相关资源
              最近更新 更多