C++ 位操作:如何将 char 解码/解压缩为 int,并显示正确的字符串?

C++ Bit Operations: How to decode / decompress char to int, and display correct string?

我正在研究位运算。从我的 decode/decompress 算法的第 8 次迭代开始,我的位开始漂移和失控,我需要更多 experienced/smarter 人的帮助,所以我的位不会漂移。我已经在我的调试器中经历了太多次,但我仍然无法确定。我怀疑在某些时候我向左或向右移动了 1 或更多。完整节目在 Github here.

decode/decompress 的输入是:

unsigned char bytes[25] = { 9,0,207,176,159,163,255,33,58,115,
                              199,255,255,181,223,67,102,69,173,
                              6,35,103,245,160,164 };

参考数组是:

const char symbols[27] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
                          'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
                          'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' '};

解码后的 'bytes' 字符数组的输出是:

'BEAM_ME_UP_SGO_TY____N__INTU_LYGENT__IFE' //Underscores represent space.

使用库:

#include <bitset>
#include <iostream>
#include <queue>
#include <stack>
#include <cassert>
#include <string>
#include <sstream>

算法函数:

/* Function reads one char at a time, and converts chars to int.
 * Resulting integer value then indexes into char array of 27 chars
 * A-Z and 'space', storing results in string stream.
 * */

void bitWiseOpsDemo(unsigned char *bytey, size_t sizeOfArray){ //Input char array and size.
    assert(sizeOfArray>0);
    const int READIBLE_AMOUNT = 5;
    int result;
    stringstream stringOfCharacters;
    string words;
    int numBitsReadible = 0;
    int shift;
    int indexInto;
    short buffer; //will be used to hold bits
    int whileLoopIndex = 0;

    while(whileLoopIndex<sizeOfArray) {
        (buffer |= (bytey[whileLoopIndex]) & 0xFF);
        numBitsReadible += 8;
        result |= (buffer & 0xFFFF); //copy buffer to result.
        while(numBitsReadible>=READIBLE_AMOUNT){//While readible, read
            result |= (buffer & 0xFFFF); //copy buffer to result.
            indexInto = result;
            result <<=5;
            numBitsReadible -= 5; //Reducd the amount of bytes readible by 5.

            //calculate the amount to shift result by
            shift = numBitsReadible;
            result <<= shift;
            indexInto >>=shift;

            (indexInto &= 0x1F); //set all bits to 0, except the five we want in mask
            stringOfCharacters << symbols[indexInto]; //Get symbol and store in streamstring
            if(numBitsReadible==0){
                (result &= 0x0000); //Flush buffer if none readible.
            }
        }
        buffer |= (result &= 0xFF00); // Change 'Y', not 'x': YYYYYYYY xxxxxxxx
        buffer <<= 8; //Make room in buffer for next char
        ++whileLoopIndex;
    }
    words = stringOfCharacters.str();
    cout << words << endl;
}

解决此问题的一种方法是将代码提取视为迭代器(为简单起见,在本例中为 forward_only 迭代器)。

这允许您对逻辑使用标准算法,同时让迭代器提取 5 位的块。

这是一个(简单的)示例。

#include <iostream>
#include <utility>
#include <iterator>
#include <algorithm>

template<std::size_t Bits>
struct bit_iterator
{
    bit_iterator(const unsigned char* buffer, std::size_t bitnum)
    : _buffer(buffer)
    , _bitnum(bitnum)
    {}

    std::size_t operator*() {
        std::size_t result = 0;
        for(std::size_t count = 0 ; count < Bits ; ++count)
        {
            auto bit = 7 - (_bitnum % 8);
            auto byte = _bitnum / 8;
            auto val = _buffer[byte] & (1 << bit);
            if (val) {
                result |= 1 << ((Bits-1) - count);
            }
            ++_bitnum;
        }
        return result;
    }

    bit_iterator<Bits>& operator++() {
        return *this;
    }

    bool operator==(const bit_iterator<Bits>& r) const {
        return _buffer == r._buffer && (_bitnum + Bits) > r._bitnum;
    }

    bool operator!=(const bit_iterator<Bits>& r) const {
        return !(*this == r);
    }

    const unsigned char* _buffer;
    std::size_t _bitnum;
};

struct to_char
{

    char operator()(std::size_t index) const {
        return symbols[index];
    }
    static const char symbols[27];
};

const char to_char::symbols[27] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
    'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
    'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' '
};

int main()
{
    static const unsigned char bytes[25] = { 9,0,207,176,159,163,255,33,58,115,
        199,255,255,181,223,67,102,69,173,
        6,35,103,245,160,164 };


    // stream to stdout

    std::transform(bit_iterator<5>(bytes, 0),
                   bit_iterator<5>(bytes, std::extent<decltype(bytes)>::value * 8),
                   std::ostream_iterator<char>(std::cout),
                   to_char());
    std::cout << std::endl;

    // or to a string
    std::string result;
    std::transform(bit_iterator<5>(bytes, 0),
                   bit_iterator<5>(bytes, std::extent<decltype(bytes)>::value * 8),
                   std::back_inserter(result),
                   to_char());
    std::cout << result << std::endl;

    // or just print the codes
    std::copy(bit_iterator<5>(bytes, 0),
              bit_iterator<5>(bytes, std::extent<decltype(bytes)>::value * 8),
              std::ostream_iterator<std::size_t>(std::cout, ", "));
    std::cout << std::endl;

    return 0;
}

预期输出:

BEAM?ME?UP?SCOTTY????NO?INTELLIGENT?LIFE
BEAM?ME?UP?SCOTTY????NO?INTELLIGENT?LIFE
1, 4, 0, 12, 31, 12, 4, 31, 20, 15, 31, 18, 2, 14, 19, 19, 24, 31, 31, 31, 31, 13, 14, 31, 8, 13, 19, 4, 11, 11, 8, 6, 4, 13, 19, 31, 11, 8, 5, 4, 

打印出代码可能有助于诊断编码问题。

这一行没有按照您的想法行事,我已将其注释掉...

result |= (buffer & 0xFFFF); //copy buffer to result.

不是抄袭。这是工作代码,请注意,我还在下面将位操作中使用的类型更改为无符号。代码生成

BEAMMEUPSCOTTYNOINTELLIGENTLIFE

我相信这就是您想要得到的。代码如下,注意,我认为SO上的C++高手可以使它看起来更整洁。

#include <bitset>
#include <iostream>
#include <queue>
#include <stack>
#include <cassert>
#include <string>
#include <sstream>

using namespace std;
const int READIBLE_AMOUNT = 5;

const char symbols[27] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
  'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
  'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' '};

inline void bitWiseOpsDemo(unsigned char *bytey, size_t sizeOfArray){
  assert(sizeOfArray == 25);
  //const int READIBLE_AMOUNT = 5;
  unsigned int result = 0;
  stringstream stringOfCharacters;
  string words;
  int numBitsReadible = 0;
  int shift;
  unsigned int indexInto;
  unsigned short buffer = 0; //will be used to hold bits
  //sizeOfArray = 1; //Only want to loop once right now for testing purposes. Comment out later.
  int whileLoopIndex = 0;
  //int innerLoopIndex = 0;
  while(whileLoopIndex < sizeOfArray) {
    (buffer |= (bytey[whileLoopIndex]) & 0xFF);
    numBitsReadible += 8;
    //result |= (buffer & 0xFFFF); //copy buffer to result.
    result = buffer;
    while(numBitsReadible>=READIBLE_AMOUNT){//While readible, read
      result |= (buffer & 0xFFFF); //copy buffer to result.
      indexInto = result;
      result <<=5;
      //Only want to manipulate the 'result' here
      numBitsReadible -= 5; //Reducd the amount of bytes readible by 5.
      //calculate the amount to shift result by
      shift = numBitsReadible;
      result <<= shift;
      indexInto >>=shift;
      //set all bits to 0, except the five we want in mask
      (indexInto &= 0x1F);

      stringOfCharacters << symbols[indexInto]; //Get symbol and store in streamstring
      if(numBitsReadible==0) {
        (result &= 0x0000); //Flush buffer if none readible.
      }   
    }   
    buffer |= (result &= 0xFF00); // Change 'Y', not 'x': YYYYYYYY xxxxxxxx
    buffer <<= 8; //Make room in buffer for next char
    ++whileLoopIndex;
  }
  words = stringOfCharacters.str();
  cout << words << endl;
}


const size_t SIZE = 25; 

int main() {
  unsigned char bytes[SIZE] = {9, 0, 207, 176, 159, 163, 255, 33, 58, 115, 199, 255, 255, 181, 223, 67, 102, 69, 173, 6, 35, 103, 245, 160, 164};
  bitWiseOpsDemo(bytes, SIZE);
  return 0;
}