"Occasional" 分段错误
"Occasional" Segmentation Fault
问题与 GRIB 解析器有关(link 到 GRIB 文件 https://github.com/Gifciak/GRIB),
当我执行我的代码时(通过代码块或通过控制台 linux - g++ main.cpp -pedantic
)我收到错误,分段错误但它并不总是发生。
例如,我编译了10次,其中8次会出错,2次一切正常,这会为我提供控制台输出和信息。
根据我的研究,问题出在 std::copy
,因为它可能正在尝试复制一个不再存在的迭代器。
有人可以解释为什么会这样吗?
为什么它不总是崩溃或成功?
#include <iostream>
#include <vector>
#include <fstream>
#include <iterator>
#include <algorithm>
using ByteVec = std::vector<uint8_t>;
template<typename T, size_t size = sizeof(T)>
auto getReverseEndianValue(const auto & iter) {
union {
T result;
char tmp[size];
} buffer;
auto reverseIter = std::make_reverse_iterator(std::next(iter, size));
std::copy(reverseIter, std::next(reverseIter, size), buffer.tmp);
return buffer.result;
}
enum Edition {
Edition_Unknown = -1,
Edition_GRIB1 = 1,
};
namespace section {
class IS {
public:
uint32_t magicFlag;
uint32_t size;
Edition edition;
static IS read(const auto & iter) {
IS result;
result.magicFlag = getReverseEndianValue<uint32_t>(iter);
result.size = getReverseEndianValue<uint32_t, 3>(iter + 4);
result.edition = (*(iter + 7) == 1 ? Edition_GRIB1 : Edition_Unknown);
return result;
}
};
class PDS {
public:
uint32_t size;
uint8_t tableVersion;
uint8_t indentificatorOfCenter;
uint8_t numProcessID;
uint8_t gridIndentification;
uint8_t flagForGDSorBMS;
uint8_t indParamAndUnit;
uint8_t indTypeOfLevelOrLayer;
uint16_t levelOrLayer;
uint8_t year;
uint8_t month;
uint8_t day;
uint8_t hour;
uint8_t minute;
uint8_t forecastTimeUnit;
uint8_t p1;
uint8_t p2;
uint8_t indTimeRange;
uint16_t averageOrAccumulate;
uint8_t missing;
uint8_t century;
uint8_t subcenterId;
uint16_t decimalScale;
ByteVec data;
static PDS read(const auto& iter) {
PDS result;
result.size = getReverseEndianValue<uint32_t, 3>(iter);
result.tableVersion = getReverseEndianValue<uint8_t>(iter + 3);
result.indentificatorOfCenter = getReverseEndianValue<uint8_t>(iter + 4);
result.numProcessID = getReverseEndianValue<uint8_t>(iter + 5);
result.gridIndentification = getReverseEndianValue<uint8_t>(iter + 6);
result.flagForGDSorBMS = getReverseEndianValue<uint8_t>(iter + 7);
result.indParamAndUnit = getReverseEndianValue<uint8_t>(iter + 8);
result.indTypeOfLevelOrLayer = getReverseEndianValue<uint8_t>(iter + 9);
result.levelOrLayer = getReverseEndianValue<uint16_t>(iter + 10);
result.year = getReverseEndianValue<uint8_t>(iter + 12);
result.month = getReverseEndianValue<uint8_t>(iter + 13);
result.day = getReverseEndianValue<uint8_t>(iter + 14);
result.hour = getReverseEndianValue<uint8_t>(iter + 15);
result.minute = getReverseEndianValue<uint8_t>(iter + 16);
result.forecastTimeUnit = getReverseEndianValue<uint8_t>(iter + 17);
result.p1 = getReverseEndianValue<uint8_t>(iter + 18);
result.p2 = getReverseEndianValue<uint8_t>(iter + 19);
result.indTimeRange = getReverseEndianValue<uint8_t>(iter + 20);
result.averageOrAccumulate = getReverseEndianValue<uint16_t>(iter + 21);
result.missing = getReverseEndianValue<uint8_t>(iter + 23);
result.century = getReverseEndianValue<uint8_t>(iter + 24);
result.subcenterId = getReverseEndianValue<uint8_t>(iter + 25);
result.decimalScale = getReverseEndianValue<uint16_t>(iter + 26);
return result;
}
};
}
class GribData {
private:
section::IS secIS;
section::PDS secPDS;
public:
void print() {
std::cout
<< "### Section IS ###\n"
<< "magicFlag: " << +secIS.magicFlag << "\n"
<< "size: " << +secIS.size << "\n"
<< "edition: " << +secIS.edition << "\n"
<< "\n### Section PDS ###\n"
<< "size: " << +secPDS.size << "\n"
<< "tableVersion: " << +secPDS.tableVersion << "\n"
<< "indentificatorOfCenter: " << +secPDS.indentificatorOfCenter << "\n"
<< "numProcessID: " << +secPDS.numProcessID << "\n"
<< "gridIndentification: " << +secPDS.gridIndentification << "\n"
<< "flagForGDSorBMS: " << +secPDS.flagForGDSorBMS << "\n"
<< "indParamAndUnit: " << +secPDS.indParamAndUnit << "\n"
<< "indTypeOfLevelOrLayer: " << +secPDS.indTypeOfLevelOrLayer << "\n"
<< "levelOrLayer: " << +secPDS.levelOrLayer << "\n"
<< "year: " << +secPDS.year << "\n"
<< "month: " << +secPDS.month << "\n"
<< "day: " << +secPDS.day << "\n"
<< "hour: " << +secPDS.hour << "\n"
<< "minute: " << +secPDS.minute << "\n"
<< "forecastTimeUnit: " << +secPDS.forecastTimeUnit << "\n"
<< "p1: " << +secPDS.p1 << "\n"
<< "p2: " << +secPDS.p2 << "\n"
<< "indTimeRange: " << +secPDS.indTimeRange << "\n"
<< "averageOrAccumulate: " << +secPDS.averageOrAccumulate << "\n"
<< "missing: " << +secPDS.missing << "\n"
<< "century: " << +secPDS.century << "\n"
<< "subcenterId: " << +secPDS.subcenterId << "\n"
<< "decimalScale: " << +secPDS.decimalScale << "\n";
}
static GribData loadData(const ByteVec& rawdata) {
GribData result;
constexpr char MAGIC_START[4] = { 'G', 'R', 'I', 'B' };
constexpr char MAGIC_END[4] = { '7', '7', '7', '7' };
auto start = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_START),
std::end(MAGIC_START));
auto end = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_END),
std::end(MAGIC_END));
ByteVec data(start, end + sizeof(MAGIC_END));
result.secIS = section::IS::read(data.cbegin());
result.secPDS = section::PDS::read(data.cbegin() + 8);
auto size = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 4);
auto sec1 = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 8);
auto sec2 = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 8 + sec1);
auto sec3 = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 8 + sec1 + sec2);
std::cout
<< "size: " << size << "\n"
<< "sec0: " << 8 << "\n"
<< "sec1: " << sec1 << "\n"
<< "sec2: " << sec2 << "\n"
<< "sec3: " << sec3 << "\n"
<< "end flag: " << sizeof(MAGIC_END) << "\n"
<< "sum: " << 8 + sec1 + sec2 + sec3 + sizeof(MAGIC_END) << "\n\n";
return result;
}
static GribData loadDataFromFile(const std::string& path) {
std::ifstream file(path, std::ios::binary);
ByteVec data;
std::copy(std::istreambuf_iterator<char>(file),
{},
std::back_inserter(data));
return loadData(data);
}
};
int main() {
auto grib = GribData::loadDataFromFile("message_2_G1.grib");
grib.print();
}
这是预期的结果,因为我从控制台复制了它
size: 4538
sec0: 8
sec1: 28
sec2: 178
sec3: 4320
end flag: 4
sum: 4538
### Section IS ###
magicFlag: 1196575042
size: 1191186874
edition: 1
### Section PDS ###
size: 28
tableVersion: 2
indentificatorOfCenter: 7
numProcessID: 81
gridIndentification: 37
flagForGDSorBMS: 128
indParamAndUnit: 33
indTypeOfLevelOrLayer: 100
levelOrLayer: 850
year: 15
month: 3
day: 10
hour: 0
minute: 0
forecastTimeUnit: 1
p1: 0
p2: 0
indTimeRange: 10
averageOrAccumulate: 0
missing: 0
century: 21
subcenterId: 0
decimalScale: 1
首先,使用g++ main.cpp -pedantic
不是很有用,因为你没有启用任何警告。将 -Wall -Wextra
添加到您的编译器标志,以及 -g
以便您可以对其进行调试。
使用 -fsanitize=undefined
编译显示 运行时间错误,这是由于在需要有效指针的地方使用空指针导致的:
/usr/include/c++/8/bits/stl_algobase.h:368:23: runtime error: null pointer passed as argument 2, which is declared to never be null
Segmentation fault (core dumped)
这意味着您的程序有错误。
使用 -D_GLIBCXX_DEBUG
编译会向 std::vector
添加额外的检查,这会告诉您问题所在:
/usr/include/c++/8/debug/safe_iterator.h:374:
Error: attempt to advance a past-the-end iterator 4 steps, which falls
outside its valid range.
Objects involved in the operation:
iterator @ 0x0x7fffb09ceb90 {
type = __gnu_debug::_Safe_iterator<__gnu_cxx::__normal_iterator<unsigned char const*, std::__cxx1998::vector<unsigned char, std::allocator<unsigned char> > >, std::__debug::vector<unsigned char, std::allocator<unsigned char> > > (constant iterator);
state = past-the-end;
references sequence with type 'std::__debug::vector<unsigned char, std::allocator<unsigned char> >' @ 0x0x7fffb09cf050
}
Aborted (core dumped)
您应该 运行 在调试器下运行程序以查看此无效迭代器增量发生的位置。 运行 GDB 中的程序然后使用其 up
命令向上移动堆栈显示错误来自此处,在 loadData
:
constexpr char MAGIC_START[4] = { 'G', 'R', 'I', 'B' };
constexpr char MAGIC_END[4] = { '7', '7', '7', '7' };
auto start = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_START),
std::end(MAGIC_START));
auto end = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_END),
std::end(MAGIC_END));
ByteVec data(start, end + sizeof(MAGIC_END));
^^^^^^^^^^^^^^^^^^^^^^^
考虑当 rawdata
不包含 MAGIC_START
个字符但包含 MAGIC_END
个字符时会发生什么。 start
和 end
会形成有效的迭代器范围吗?
考虑当 rawdata
不包含 MAGIC_END
字符时会发生什么。 end + sizeof(MAGIC_END)
有效吗?
您不应该假设对 std::search
的两次调用按预期工作。您应该通过测试 start == rawdata.end()
或 end == rawdata.end()
来添加一些错误检查。如果其中任何一个为真,则说明出了问题(可能是 rawdata
字符串中的错误输入)。
您还应该学习如何使用调试器,并了解您的编译器提供的用于检测错误的其他工具(例如,应该使用 GCC 的 -fsanitize=undefined
和 -D_GLIBCXX_DEBUG
选项来帮助确认存在错误,应该使用 GDB 来查找这些错误发生的位置)。
问题与 GRIB 解析器有关(link 到 GRIB 文件 https://github.com/Gifciak/GRIB),
当我执行我的代码时(通过代码块或通过控制台 linux - g++ main.cpp -pedantic
)我收到错误,分段错误但它并不总是发生。
例如,我编译了10次,其中8次会出错,2次一切正常,这会为我提供控制台输出和信息。
根据我的研究,问题出在 std::copy
,因为它可能正在尝试复制一个不再存在的迭代器。
有人可以解释为什么会这样吗? 为什么它不总是崩溃或成功?
#include <iostream>
#include <vector>
#include <fstream>
#include <iterator>
#include <algorithm>
using ByteVec = std::vector<uint8_t>;
template<typename T, size_t size = sizeof(T)>
auto getReverseEndianValue(const auto & iter) {
union {
T result;
char tmp[size];
} buffer;
auto reverseIter = std::make_reverse_iterator(std::next(iter, size));
std::copy(reverseIter, std::next(reverseIter, size), buffer.tmp);
return buffer.result;
}
enum Edition {
Edition_Unknown = -1,
Edition_GRIB1 = 1,
};
namespace section {
class IS {
public:
uint32_t magicFlag;
uint32_t size;
Edition edition;
static IS read(const auto & iter) {
IS result;
result.magicFlag = getReverseEndianValue<uint32_t>(iter);
result.size = getReverseEndianValue<uint32_t, 3>(iter + 4);
result.edition = (*(iter + 7) == 1 ? Edition_GRIB1 : Edition_Unknown);
return result;
}
};
class PDS {
public:
uint32_t size;
uint8_t tableVersion;
uint8_t indentificatorOfCenter;
uint8_t numProcessID;
uint8_t gridIndentification;
uint8_t flagForGDSorBMS;
uint8_t indParamAndUnit;
uint8_t indTypeOfLevelOrLayer;
uint16_t levelOrLayer;
uint8_t year;
uint8_t month;
uint8_t day;
uint8_t hour;
uint8_t minute;
uint8_t forecastTimeUnit;
uint8_t p1;
uint8_t p2;
uint8_t indTimeRange;
uint16_t averageOrAccumulate;
uint8_t missing;
uint8_t century;
uint8_t subcenterId;
uint16_t decimalScale;
ByteVec data;
static PDS read(const auto& iter) {
PDS result;
result.size = getReverseEndianValue<uint32_t, 3>(iter);
result.tableVersion = getReverseEndianValue<uint8_t>(iter + 3);
result.indentificatorOfCenter = getReverseEndianValue<uint8_t>(iter + 4);
result.numProcessID = getReverseEndianValue<uint8_t>(iter + 5);
result.gridIndentification = getReverseEndianValue<uint8_t>(iter + 6);
result.flagForGDSorBMS = getReverseEndianValue<uint8_t>(iter + 7);
result.indParamAndUnit = getReverseEndianValue<uint8_t>(iter + 8);
result.indTypeOfLevelOrLayer = getReverseEndianValue<uint8_t>(iter + 9);
result.levelOrLayer = getReverseEndianValue<uint16_t>(iter + 10);
result.year = getReverseEndianValue<uint8_t>(iter + 12);
result.month = getReverseEndianValue<uint8_t>(iter + 13);
result.day = getReverseEndianValue<uint8_t>(iter + 14);
result.hour = getReverseEndianValue<uint8_t>(iter + 15);
result.minute = getReverseEndianValue<uint8_t>(iter + 16);
result.forecastTimeUnit = getReverseEndianValue<uint8_t>(iter + 17);
result.p1 = getReverseEndianValue<uint8_t>(iter + 18);
result.p2 = getReverseEndianValue<uint8_t>(iter + 19);
result.indTimeRange = getReverseEndianValue<uint8_t>(iter + 20);
result.averageOrAccumulate = getReverseEndianValue<uint16_t>(iter + 21);
result.missing = getReverseEndianValue<uint8_t>(iter + 23);
result.century = getReverseEndianValue<uint8_t>(iter + 24);
result.subcenterId = getReverseEndianValue<uint8_t>(iter + 25);
result.decimalScale = getReverseEndianValue<uint16_t>(iter + 26);
return result;
}
};
}
class GribData {
private:
section::IS secIS;
section::PDS secPDS;
public:
void print() {
std::cout
<< "### Section IS ###\n"
<< "magicFlag: " << +secIS.magicFlag << "\n"
<< "size: " << +secIS.size << "\n"
<< "edition: " << +secIS.edition << "\n"
<< "\n### Section PDS ###\n"
<< "size: " << +secPDS.size << "\n"
<< "tableVersion: " << +secPDS.tableVersion << "\n"
<< "indentificatorOfCenter: " << +secPDS.indentificatorOfCenter << "\n"
<< "numProcessID: " << +secPDS.numProcessID << "\n"
<< "gridIndentification: " << +secPDS.gridIndentification << "\n"
<< "flagForGDSorBMS: " << +secPDS.flagForGDSorBMS << "\n"
<< "indParamAndUnit: " << +secPDS.indParamAndUnit << "\n"
<< "indTypeOfLevelOrLayer: " << +secPDS.indTypeOfLevelOrLayer << "\n"
<< "levelOrLayer: " << +secPDS.levelOrLayer << "\n"
<< "year: " << +secPDS.year << "\n"
<< "month: " << +secPDS.month << "\n"
<< "day: " << +secPDS.day << "\n"
<< "hour: " << +secPDS.hour << "\n"
<< "minute: " << +secPDS.minute << "\n"
<< "forecastTimeUnit: " << +secPDS.forecastTimeUnit << "\n"
<< "p1: " << +secPDS.p1 << "\n"
<< "p2: " << +secPDS.p2 << "\n"
<< "indTimeRange: " << +secPDS.indTimeRange << "\n"
<< "averageOrAccumulate: " << +secPDS.averageOrAccumulate << "\n"
<< "missing: " << +secPDS.missing << "\n"
<< "century: " << +secPDS.century << "\n"
<< "subcenterId: " << +secPDS.subcenterId << "\n"
<< "decimalScale: " << +secPDS.decimalScale << "\n";
}
static GribData loadData(const ByteVec& rawdata) {
GribData result;
constexpr char MAGIC_START[4] = { 'G', 'R', 'I', 'B' };
constexpr char MAGIC_END[4] = { '7', '7', '7', '7' };
auto start = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_START),
std::end(MAGIC_START));
auto end = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_END),
std::end(MAGIC_END));
ByteVec data(start, end + sizeof(MAGIC_END));
result.secIS = section::IS::read(data.cbegin());
result.secPDS = section::PDS::read(data.cbegin() + 8);
auto size = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 4);
auto sec1 = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 8);
auto sec2 = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 8 + sec1);
auto sec3 = getReverseEndianValue<uint32_t, 3>(data.cbegin() + 8 + sec1 + sec2);
std::cout
<< "size: " << size << "\n"
<< "sec0: " << 8 << "\n"
<< "sec1: " << sec1 << "\n"
<< "sec2: " << sec2 << "\n"
<< "sec3: " << sec3 << "\n"
<< "end flag: " << sizeof(MAGIC_END) << "\n"
<< "sum: " << 8 + sec1 + sec2 + sec3 + sizeof(MAGIC_END) << "\n\n";
return result;
}
static GribData loadDataFromFile(const std::string& path) {
std::ifstream file(path, std::ios::binary);
ByteVec data;
std::copy(std::istreambuf_iterator<char>(file),
{},
std::back_inserter(data));
return loadData(data);
}
};
int main() {
auto grib = GribData::loadDataFromFile("message_2_G1.grib");
grib.print();
}
这是预期的结果,因为我从控制台复制了它
size: 4538 sec0: 8 sec1: 28 sec2: 178 sec3: 4320 end flag: 4 sum: 4538 ### Section IS ### magicFlag: 1196575042 size: 1191186874 edition: 1 ### Section PDS ### size: 28 tableVersion: 2 indentificatorOfCenter: 7 numProcessID: 81 gridIndentification: 37 flagForGDSorBMS: 128 indParamAndUnit: 33 indTypeOfLevelOrLayer: 100 levelOrLayer: 850 year: 15 month: 3 day: 10 hour: 0 minute: 0 forecastTimeUnit: 1 p1: 0 p2: 0 indTimeRange: 10 averageOrAccumulate: 0 missing: 0 century: 21 subcenterId: 0 decimalScale: 1
首先,使用g++ main.cpp -pedantic
不是很有用,因为你没有启用任何警告。将 -Wall -Wextra
添加到您的编译器标志,以及 -g
以便您可以对其进行调试。
使用 -fsanitize=undefined
编译显示 运行时间错误,这是由于在需要有效指针的地方使用空指针导致的:
/usr/include/c++/8/bits/stl_algobase.h:368:23: runtime error: null pointer passed as argument 2, which is declared to never be null
Segmentation fault (core dumped)
这意味着您的程序有错误。
使用 -D_GLIBCXX_DEBUG
编译会向 std::vector
添加额外的检查,这会告诉您问题所在:
/usr/include/c++/8/debug/safe_iterator.h:374:
Error: attempt to advance a past-the-end iterator 4 steps, which falls
outside its valid range.
Objects involved in the operation:
iterator @ 0x0x7fffb09ceb90 {
type = __gnu_debug::_Safe_iterator<__gnu_cxx::__normal_iterator<unsigned char const*, std::__cxx1998::vector<unsigned char, std::allocator<unsigned char> > >, std::__debug::vector<unsigned char, std::allocator<unsigned char> > > (constant iterator);
state = past-the-end;
references sequence with type 'std::__debug::vector<unsigned char, std::allocator<unsigned char> >' @ 0x0x7fffb09cf050
}
Aborted (core dumped)
您应该 运行 在调试器下运行程序以查看此无效迭代器增量发生的位置。 运行 GDB 中的程序然后使用其 up
命令向上移动堆栈显示错误来自此处,在 loadData
:
constexpr char MAGIC_START[4] = { 'G', 'R', 'I', 'B' };
constexpr char MAGIC_END[4] = { '7', '7', '7', '7' };
auto start = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_START),
std::end(MAGIC_START));
auto end = std::search(rawdata.cbegin(),
rawdata.cend(),
std::begin(MAGIC_END),
std::end(MAGIC_END));
ByteVec data(start, end + sizeof(MAGIC_END));
^^^^^^^^^^^^^^^^^^^^^^^
考虑当 rawdata
不包含 MAGIC_START
个字符但包含 MAGIC_END
个字符时会发生什么。 start
和 end
会形成有效的迭代器范围吗?
考虑当 rawdata
不包含 MAGIC_END
字符时会发生什么。 end + sizeof(MAGIC_END)
有效吗?
您不应该假设对 std::search
的两次调用按预期工作。您应该通过测试 start == rawdata.end()
或 end == rawdata.end()
来添加一些错误检查。如果其中任何一个为真,则说明出了问题(可能是 rawdata
字符串中的错误输入)。
您还应该学习如何使用调试器,并了解您的编译器提供的用于检测错误的其他工具(例如,应该使用 GCC 的 -fsanitize=undefined
和 -D_GLIBCXX_DEBUG
选项来帮助确认存在错误,应该使用 GDB 来查找这些错误发生的位置)。