使用 fstream 从文件中读取缺失值 "NA"
reading missing values "NA" from file using fstream
我正在使用这些代码从 txt 文件中读取数据,如果我的数据包含所有数字,它就可以正常工作。遗憾的是,文件中目前存在缺失值,表示为 "NA"。请您建议任何 way/codes 来处理这个问题,并且不会影响以后的计算。谢谢
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;
struct Data {
vector<double> cow_id;
vector<double> age_obs;
vector<double> dim_obs;
vector<double> my_obs;
vector<double> mcf_obs;
vector<double> mcp_obs;
vector<double> mcl_obs;
vector<double> bw_obs;
vector<double> bcs_obs;
void read_input (const string filepath)
{
ifstream data_in (filepath.c_str());
if (!data_in)
{cout<<"Failed to open"<<endl;}
else
{
double id,age, dim, my, mcf, mcp, mcl, bw, bcs;
string dummy_line;
getline(data_in, dummy_line);
string line;
while (data_in>>id>>age>>dim>>my>>mcf>>mcp>>mcl>>bw>>bcs)
{
cow_id.push_back(id);
age_obs.push_back(age);
dim_obs.push_back(dim);
my_obs.push_back(my);
mcf_obs.push_back(mcf);
mcp_obs.push_back(mcp);
mcl_obs.push_back(mcl);
bw_obs.push_back(bw);
bcs_obs.push_back(bcs);
}
data_in.close();
}
size_t size=age_obs.size();
for (size_t i=0; i<size; i++)
{
cout<<cow_id[i]<<'\t'<<age_obs[i]<<'\t'<<dim_obs[i]<<'\t'<<my_obs[i]<<'\t'<<mcf_obs[i]<<'\t'<<mcp_obs[i]<<'\t'<<mcl_obs[i]<<'\t'<<bw_obs[i]<<'\t'<<bcs_obs[i]<<endl;
}
}
};
int main()
{
Data input;
input.read_input("C:\Data\C++\learncpp\data.txt");
}
您需要以字符串标记的形式读取文件。如果不等于 "NA".
,则将标记转换为整数
// Helper function to get a double from a token.
// If the string is "NA", decide what's an appropriate value to return.
double get_number(string const& str)
{
if ( str == "NA" )
{
return 0.0; // Maybe?
}
else
{
return std::stod(str);
}
}
void read_input (const string filepath)
{
ifstream data_in (filepath.c_str());
if (!data_in)
{cout<<"Failed to open"<<endl;}
else
{
// Read tokens as strings.
string id, age, dim, my, mcf, mcp, mcl, bw, bcs;
string dummy_line;
getline(data_in, dummy_line);
string line;
while (data_in >> id >> age >> dim >> my >> mcf >> mcp >> mcl >> bw >> bcs)
{
// Get the number from the string and add to the vectors.
cow_id.push_back(get_number(id));
age_obs.push_back(get_number(age));
dim_obs.push_back(get_number(dim));
my_obs.push_back(get_number(my));
mcf_obs.push_back(get_number(mcf));
mcp_obs.push_back(get_number(mcp));
mcl_obs.push_back(get_number(mcl));
bw_obs.push_back(get_number(bw));
bcs_obs.push_back(get_number(bcs));
}
data_in.close();
}
size_t size=age_obs.size();
for (size_t i=0; i<size; i++)
{
cout<<cow_id[i]<<'\t'<<age_obs[i]<<'\t'<<dim_obs[i]<<'\t'<<my_obs[i]<<'\t'<<mcf_obs[i]<<'\t'<<mcp_obs[i]<<'\t'<<mcl_obs[i]<<'\t'<<bw_obs[i]<<'\t'<<bcs_obs[i]<<endl;
}
}
实施对 OP 有效的 get_number
。
double get_number(string const& str)
{
if ( str == "NA" )
{
return 0.0; // Maybe?
}
else
{
std:istringstream iss(str);
double val;
iss >> val;
return val;
}
}
我正在使用这些代码从 txt 文件中读取数据,如果我的数据包含所有数字,它就可以正常工作。遗憾的是,文件中目前存在缺失值,表示为 "NA"。请您建议任何 way/codes 来处理这个问题,并且不会影响以后的计算。谢谢
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;
struct Data {
vector<double> cow_id;
vector<double> age_obs;
vector<double> dim_obs;
vector<double> my_obs;
vector<double> mcf_obs;
vector<double> mcp_obs;
vector<double> mcl_obs;
vector<double> bw_obs;
vector<double> bcs_obs;
void read_input (const string filepath)
{
ifstream data_in (filepath.c_str());
if (!data_in)
{cout<<"Failed to open"<<endl;}
else
{
double id,age, dim, my, mcf, mcp, mcl, bw, bcs;
string dummy_line;
getline(data_in, dummy_line);
string line;
while (data_in>>id>>age>>dim>>my>>mcf>>mcp>>mcl>>bw>>bcs)
{
cow_id.push_back(id);
age_obs.push_back(age);
dim_obs.push_back(dim);
my_obs.push_back(my);
mcf_obs.push_back(mcf);
mcp_obs.push_back(mcp);
mcl_obs.push_back(mcl);
bw_obs.push_back(bw);
bcs_obs.push_back(bcs);
}
data_in.close();
}
size_t size=age_obs.size();
for (size_t i=0; i<size; i++)
{
cout<<cow_id[i]<<'\t'<<age_obs[i]<<'\t'<<dim_obs[i]<<'\t'<<my_obs[i]<<'\t'<<mcf_obs[i]<<'\t'<<mcp_obs[i]<<'\t'<<mcl_obs[i]<<'\t'<<bw_obs[i]<<'\t'<<bcs_obs[i]<<endl;
}
}
};
int main()
{
Data input;
input.read_input("C:\Data\C++\learncpp\data.txt");
}
您需要以字符串标记的形式读取文件。如果不等于 "NA".
,则将标记转换为整数// Helper function to get a double from a token.
// If the string is "NA", decide what's an appropriate value to return.
double get_number(string const& str)
{
if ( str == "NA" )
{
return 0.0; // Maybe?
}
else
{
return std::stod(str);
}
}
void read_input (const string filepath)
{
ifstream data_in (filepath.c_str());
if (!data_in)
{cout<<"Failed to open"<<endl;}
else
{
// Read tokens as strings.
string id, age, dim, my, mcf, mcp, mcl, bw, bcs;
string dummy_line;
getline(data_in, dummy_line);
string line;
while (data_in >> id >> age >> dim >> my >> mcf >> mcp >> mcl >> bw >> bcs)
{
// Get the number from the string and add to the vectors.
cow_id.push_back(get_number(id));
age_obs.push_back(get_number(age));
dim_obs.push_back(get_number(dim));
my_obs.push_back(get_number(my));
mcf_obs.push_back(get_number(mcf));
mcp_obs.push_back(get_number(mcp));
mcl_obs.push_back(get_number(mcl));
bw_obs.push_back(get_number(bw));
bcs_obs.push_back(get_number(bcs));
}
data_in.close();
}
size_t size=age_obs.size();
for (size_t i=0; i<size; i++)
{
cout<<cow_id[i]<<'\t'<<age_obs[i]<<'\t'<<dim_obs[i]<<'\t'<<my_obs[i]<<'\t'<<mcf_obs[i]<<'\t'<<mcp_obs[i]<<'\t'<<mcl_obs[i]<<'\t'<<bw_obs[i]<<'\t'<<bcs_obs[i]<<endl;
}
}
实施对 OP 有效的 get_number
。
double get_number(string const& str)
{
if ( str == "NA" )
{
return 0.0; // Maybe?
}
else
{
std:istringstream iss(str);
double val;
iss >> val;
return val;
}
}