来自长十六进制值的向量

Vector from long hex value

在 C++ 中,我可以使用

初始化向量
std::vector<uint8_t> data = {0x01, 0x02, 0x03};

为方便起见(我有 python 字节字符串自然输出为十六进制转储),我想初始化为以下形式的非定界十六进制值:

std::vector<uint8_t> data = 0x229597354972973aabbe7;

是否有有效的 c++ 变体?

结合 Evg、JHbonarius 和 1201ProgramAlarm 的评论:

答案是没有直接的方法将一个长的十六进制值分组到一个向量中,但是,使用 user defined literals 提供了一种巧妙的符号改进。

首先,在代码中的任何地方使用 RHS 0x229597354972973aabbe7 都会失败,因为没有后缀的文字被假定为 int 类型,并且将无法包含在寄存器中。在 MSVC 中,导致 E0023“整数常量太大”。使用后缀符号可以限制较小的十六进制序列或探索大型数据类型,但这会破坏任何对简单性的渴望。

需要手动转换,但用户定义的文字可能会提供稍微更优雅的表示法。例如,我们可以使用

启用十六进制序列到向量的转换
std::vector<uint8_t> val1 = 0x229597354972973aabbe7_hexvec;
std::vector<uint8_t> val2 = "229597354972973aabbe7"_hexvec;

使用以下代码:

#include <vector>
#include <iostream>
#include <string>
#include <algorithm>


// Quick Utlity function to view results:
std::ostream & operator << (std::ostream & os, std::vector<uint8_t> & v)
{
    for (const auto & t : v)
        os << std::hex << (int)t << " ";

    return os;
}

std::vector<uint8_t> convertHexToVec(const char * str, size_t len)
{
    // conversion takes strings of form "FFAA54" or "0x11234" or "0X000" and converts to a vector of bytes.

    // Get the first two characters and skip them if the string starts with 0x or 0X for hex specification:
    std::string start(str, 2);
    int offset = (start == "0x" || start == "0X") ? 2 : 0;

    // Round up the number of groupings to allow for ff_hexvec  fff_hexvec and remove the offset to properly count 0xfff_hexvec
    std::vector<uint8_t> result((len + 1 - offset) / 2);

    size_t ind = result.size() - 1;

    // Loop from right to left in in pairs of two but watch out for a lone character on the left without a pair because 0xfff_hexvec is valid:
    for (const char* it = str + len - 1; it >= str + offset; it -= 2) {
        int  val = (str + offset) > (it - 1); // check if taking 2 values will run off the start and use this value to reduce by 1 if we will
        std::string s(std::max(it - 1, str + offset), 2 - val);
        result[ind--] = (uint8_t)stol(s, nullptr, 16);
    }
        
    return result;
}

std::vector<uint8_t> operator"" _hexvec(const char*str, std::size_t len)
{
    // Handles the conversion form "0xFFAABB"_hexvec or "12441AA"_hexvec
    return convertHexToVec(str, len);
}

std::vector<uint8_t> operator"" _hexvec(const char*str)
{
    // Handles the form 0xFFaaBB_hexvec and 0Xf_hexvec
    size_t len = strlen(str);
    return convertHexToVec(str, len);   
}

int main()
{
    std::vector<uint8_t> v;

    std::vector<uint8_t> val1 = 0x229597354972973aabbe7_hexvec;
    std::vector<uint8_t> val2 = "229597354972973aabbe7"_hexvec;

    std::cout << val1 << "\n";
    std::cout << val2 << "\n";

    return 0;
}

编码人员必须决定这是否优于实现和使用更传统的 convertHexToVec("0x41243124FF") 函数。

Is there a variant of this that is valid c++?

我认为不是。


以下代码是有效的 C++,并使用了更“传统的十六进制转换”过程。

  • 确认并删除前导'0x',同时确认所有字符都是 十六进制字符。

  • modifyFor_SDFE() - 'space delimited format extraction'

此函数在两个字符字节描述符周围插入 spaces。

请注意,此函数还会在修改后的字符串前后添加一个space字符。这个新字符串用于创建和初始化 std::stringstream (ss1).

  • 通过插入 spaces,正常流“格式化提取”工作干净

代码逐个提取每个十六进制值,并将每个值推入向量,并在推入最后一个字节时结束 (stream.eof())。请注意,向量会根据需要自动增长(不会发生溢出)。

请注意,不需要“0x”前缀..因为流模式设置为十六进制。

请注意,溢出问题(上面表示为“0x22...be7 可能会溢出。”只是 side-stepped,一次只读取一个字节。将来可能会很方便努力使用更大的十六进制字符串。


#include <iostream>
using std::cout, std::cerr, std::endl, std::hex,
      std::dec, std::cin, std::flush; // c++17

#include <iomanip>
using std::setw, std::setfill;

#include <string>
using std::string;

#include <sstream>
using std::stringstream;

#include <vector>
using std::vector;
typedef vector<uint8_t>  UI8Vec_t;

#include <cstdint>
#include <cassert>


class F889_t // Functor ctor and dtor use compiler provided defaults
{
  bool    verbose;

public:
  int operator()(int argc, char* argv[])     // functor entry
    {
      verbose = ( (argc > 1) ? ('V' == toupper(argv[1][0])) : false );
      return exec(argc, argv);
    }
  // 2 lines

private:

  int exec(int , char** )
    {
      UI8Vec_t   resultVec;                            // output

      // example1 input
      // string data1 = "0x229597354972973aabbe7";     // 23 chars, hex string
      // to_ui8_vec(resultVec, data1);
      // cout << (verbose ? "" : "\n") << "  vector result       "
      //      << show(ui8Vec);  // show results

      // example2 input   46 chars (no size limit)
      string data = "0x330508465083084bBCcf87eBBaa379279543795922fF";

      to_ui8_vec (resultVec, data);

      cout << (verbose ? "  vector elements      " : "\n  ")
           << show(resultVec) << endl; // show results

      if(verbose) { cout << "\n  F889_t::exec()  (verbose)  ("
                         <<  __cplusplus  << ")" << endl; }

      return 0;
    } // int exec(int, char**)
  // 7 lines

  void to_ui8_vec(UI8Vec_t& retVal,         // output (pass by reference)
                  string    sData)          //  input (pass by value)
    {
      if(verbose) { cout << "\n  input data        '" << sData
         << "'                       (" << sData.size() << " chars)" << endl;}
      { // misc format checks:
        size_t szOrig = sData.size();
        {
          // confirm leading hex indicator exists
          assert(sData.substr(0,2) == string("0x"));
          sData.erase(0,2);                 // discard leading "0x"
        }
        size_t sz = sData.size();
        assert(sz == (szOrig - 2)); // paranoia
        // to test that this will detect any typos in data:
        //    temporarily append or insert an invalid char, i.e. sData += 'q';
        assert(sData.find_first_not_of("0123456789abcdefABCDEF") == std::string::npos);
      }

      modifyFor_SDFE (sData); // SDFE - 'Space Delimited Formatted Extraction'

      stringstream ss1(sData); // create / initialize stream with SDFE

      if(verbose) { cout << "  SDFE  data         '" << ss1.str() // echo init
                         << "' (" << sData.size() << " chars)" << endl; }

      extract_values_from_SDFE_push_back_into_vector(retVal, ss1);

    } // void to_ui8_vec (vector<uint8_t>&, string)
  // 13 lines

  // modify s (of any size) for 'Space Delimited Formatted Extraction'
  void modifyFor_SDFE (string& s)
    {
      size_t indx = s.size();
      while (indx > 2)
      {
        indx -= 2;
        s.insert (indx, 1, ' ');  // indx, count, delimiter
      }
      s.insert(0, 1, ' '); // delimiter at front of s
      s += ' ';            // delimiter at tail of s
    } // void modifyFor_SDFE (string&)
  // 6 lines

  void extract_values_from_SDFE_push_back_into_vector(UI8Vec_t&      retVal,
                                                      stringstream&  ss1)
    {
      do {
        uint  n = 0;

        ss1 >> hex >> n;  // use SDFE, hex mode - extract one field at a time

        if(!ss1.good())   // check ss1 state
        {
          if(ss1.eof()) break; // quietly exit, this is a normal stream exit
          // else make some noise before exit loop
          cerr << "\n  err: data input line invalid [" << ss1.str() << ']' << endl; break;
        }

        retVal.push_back(static_cast<uint8_t>(n & 0xff)); // append to vector

      } while(true);
    } // void extract_from_SDFE_push_back_to_vector(UI8Vec_t& , string)
  // 6 lines

  string show(const UI8Vec_t& ui8Vec)
    {
      stringstream ss ("\n  ");
      for (uint i = 0; i < ui8Vec.size(); ++i) {
        ss << setfill('0') << setw(2) << hex 
           << static_cast<int>(ui8Vec[i]) << ' '; }
      if(verbose) { ss << "  (" << dec << ui8Vec.size() << " elements)"; }
      return ss.str();
    }
  // 5 lines

}; // class F889_t

int main(int argc, char* argv[]) { return F889_t()(argc, argv); }

使用 'verbose' 第二个参数

调用时的典型输出
$ ./dumy889 verbose

  input data        '0x330508465083084bBCcf87eBBaa379279543795922fF'                       (46 chars)
  SDFE  data         ' 33 05 08 46 50 83 08 4b BC cf 87 eB Ba a3 79 27 95 43 79 59 22 fF ' (67 chars)
  vector elements      33 05 08 46 50 83 08 4b bc cf 87 eb ba a3 79 27 95 43 79 59 22 ff   (22 elements)

不带参数调用时

$ ./dumy889 

  33 05 08 46 50 83 08 4b bc cf 87 eb ba a3 79 27 95 43 79 59 22 ff 

行数不包括空行,也不包括仅为注释或大括号的行。您可以根据需要计算行数。