rcpp中的排列顺序
Permutation order in rcpp
我有跟进 this question。在 R
中,您可以在辅助键之后订购重复项。我怎样才能在 Rcpp
中做到这一点?我正在尝试以下操作(灵感来自 Kevin Ushey 对原始问题的回答),这似乎适用于简单的情况。我如何概括这一点?
// [[Rcpp::export]]
Rcpp::IntegerVector order_(Rcpp::NumericVector x, Rcpp::NumericVector y) {
// sort x
Rcpp::NumericVector sorted_x = clone(x).sort();
// find order of x only
Rcpp::IntegerVector order = Rcpp::match(sorted_x, x);
// find order of y for each order of x
for (int i = 1; i <= Rcpp::max(order); ++i){
Rcpp::LogicalVector duplicates = order == i;
int k = which_max(duplicates);
Rcpp::NumericVector duplicated_y = y[duplicates];
Rcpp::NumericVector sorted_dy = clone(duplicated_y).sort();
Rcpp::IntegerVector order_y = Rcpp::match(sorted_dy, duplicated_y);
for (int j = 0; j < order_y.length(); ++j){
order(k + j) = order(k + j) + order_y(j) - 1;
}
}
return order;
}
如果我理解您的要求,一种方法是利用 comparison operators of std::tuple
,应该 "do the right thing"。
// [[Rcpp::plugins(cpp11)]]
#include <Rcpp.h>
#include <tuple>
template <typename... Ts>
struct Point {
std::size_t index;
using tuple_t = std::tuple<Ts...>;
tuple_t data;
template <typename... Vs>
Point(std::size_t i, const Vs&... vs)
: index(i),
data(std::forward<decltype(vs[i])>(vs[i])...)
{}
bool operator<(const Point& other) const {
return data < other.data;
}
};
template <typename... Ts>
class PointVector {
public:
std::size_t sz;
using vector_t = std::vector<Point<Ts...>>;
vector_t data;
template <typename... Vs>
PointVector(const Vs&... vs)
: sz(min_size(vs...))
{
data.reserve(sz);
for (std::size_t i = 0; i < sz; i++) {
data.emplace_back(i, vs...);
}
}
Rcpp::IntegerVector sorted_index() const {
vector_t tmp(data);
std::stable_sort(tmp.begin(), tmp.end());
Rcpp::IntegerVector res(sz);
for (std::size_t i = 0; i < sz; i++) {
res[i] = tmp[i].index + 1;
}
return res;
}
private:
template <typename V>
std::size_t min_size(const V& v) {
return v.size();
}
template <typename T, typename S, typename... Vs>
std::size_t min_size(const T& t, const S& s, const Vs&... vs) {
return t.size() < s.size() ?
min_size(t, vs...) :
min_size(s, vs...);
}
};
using namespace Rcpp;
// [[Rcpp::export]]
IntegerVector order2(NumericVector x, NumericVector y) {
PointVector<double, double> pv(x, y);
return pv.sorted_index();
}
// [[Rcpp::export]]
IntegerVector order3(NumericVector x, NumericVector y, NumericVector z) {
PointVector<double, double, double> pv(x, y, z);
return pv.sorted_index();
}
使用此数据进行演示,
x <- rep(1:2 + 0.5, 10)
y <- rep(1:4 + 0.5, 5)
z <- rep(1:5 + 0.5, 4)
(df <- data.frame(x, y, z))
# x y z
# 1 1.5 1.5 1.5
# 2 2.5 2.5 2.5
# 3 1.5 3.5 3.5
# 4 2.5 4.5 4.5
# 5 1.5 1.5 5.5
# 6 2.5 2.5 1.5
# 7 1.5 3.5 2.5
# 8 2.5 4.5 3.5
# 9 1.5 1.5 4.5
# 10 2.5 2.5 5.5
# 11 1.5 3.5 1.5
# 12 2.5 4.5 2.5
# 13 1.5 1.5 3.5
# 14 2.5 2.5 4.5
# 15 1.5 3.5 5.5
# 16 2.5 4.5 1.5
# 17 1.5 1.5 2.5
# 18 2.5 2.5 3.5
# 19 1.5 3.5 4.5
# 20 2.5 4.5 5.5
C++ 版本产生与基本 R 等价物相同的结果:
all.equal(base::order(x, y, z), order3(x, y, z))
# [1] TRUE
df[order3(x, y, z),]
# x y z
# 1 1.5 1.5 1.5
# 17 1.5 1.5 2.5
# 13 1.5 1.5 3.5
# 9 1.5 1.5 4.5
# 5 1.5 1.5 5.5
# 11 1.5 3.5 1.5
# 7 1.5 3.5 2.5
# 3 1.5 3.5 3.5
# 19 1.5 3.5 4.5
# 15 1.5 3.5 5.5
# 6 2.5 2.5 1.5
# 2 2.5 2.5 2.5
# 18 2.5 2.5 3.5
# 14 2.5 2.5 4.5
# 10 2.5 2.5 5.5
# 16 2.5 4.5 1.5
# 12 2.5 4.5 2.5
# 8 2.5 4.5 3.5
# 4 2.5 4.5 4.5
# 20 2.5 4.5 5.5
编辑: 这里有一个更容易接受的版本,它允许您编写 auto pv = MakePointVector(...)
而不是指定所有类型参数:
// Point class as before
template <typename... Vecs>
class PointVector {
public:
std::size_t sz;
using point_t = Point<typename std::remove_reference<decltype(Vecs{}[0])>::type...>;
using vector_t = std::vector<point_t>;
vector_t data;
template <typename... Vs>
PointVector(const Vecs&... vs)
: sz(min_size(vs...))
{
data.reserve(sz);
for (std::size_t i = 0; i < sz; i++) {
data.emplace_back(i, vs...);
}
}
Rcpp::IntegerVector sorted_index() const {
vector_t tmp(data);
std::stable_sort(tmp.begin(), tmp.end());
Rcpp::IntegerVector res(sz);
for (std::size_t i = 0; i < sz; i++) {
res[i] = tmp[i].index + 1;
}
return res;
}
private:
template <typename V>
std::size_t min_size(const V& v) {
return v.size();
}
template <typename T, typename S, typename... Vs>
std::size_t min_size(const T& t, const S& s, const Vs&... vs) {
return t.size() < s.size() ?
min_size(t, vs...) :
min_size(s, vs...);
}
};
template <typename... Vecs>
PointVector<Vecs...> MakePointVector(const Vecs&... vecs) {
return PointVector<Vecs...>(vecs...);
}
using namespace Rcpp;
// [[Rcpp::export]]
IntegerVector order2(NumericVector x, NumericVector y) {
auto pv = MakePointVector(x, y);
return pv.sorted_index();
}
// [[Rcpp::export]]
IntegerVector order3(NumericVector x, NumericVector y, NumericVector z) {
auto pv = MakePointVector(x, y, z);
return pv.sorted_index();
}
/*** R
all.equal(base::order(x, y), order2(x, y))
# [1] TRUE
all.equal(base::order(x, y, z), order3(x, y, z))
# [1] TRUE
*/
我有跟进 this question。在 R
中,您可以在辅助键之后订购重复项。我怎样才能在 Rcpp
中做到这一点?我正在尝试以下操作(灵感来自 Kevin Ushey 对原始问题的回答),这似乎适用于简单的情况。我如何概括这一点?
// [[Rcpp::export]]
Rcpp::IntegerVector order_(Rcpp::NumericVector x, Rcpp::NumericVector y) {
// sort x
Rcpp::NumericVector sorted_x = clone(x).sort();
// find order of x only
Rcpp::IntegerVector order = Rcpp::match(sorted_x, x);
// find order of y for each order of x
for (int i = 1; i <= Rcpp::max(order); ++i){
Rcpp::LogicalVector duplicates = order == i;
int k = which_max(duplicates);
Rcpp::NumericVector duplicated_y = y[duplicates];
Rcpp::NumericVector sorted_dy = clone(duplicated_y).sort();
Rcpp::IntegerVector order_y = Rcpp::match(sorted_dy, duplicated_y);
for (int j = 0; j < order_y.length(); ++j){
order(k + j) = order(k + j) + order_y(j) - 1;
}
}
return order;
}
如果我理解您的要求,一种方法是利用 comparison operators of std::tuple
,应该 "do the right thing"。
// [[Rcpp::plugins(cpp11)]]
#include <Rcpp.h>
#include <tuple>
template <typename... Ts>
struct Point {
std::size_t index;
using tuple_t = std::tuple<Ts...>;
tuple_t data;
template <typename... Vs>
Point(std::size_t i, const Vs&... vs)
: index(i),
data(std::forward<decltype(vs[i])>(vs[i])...)
{}
bool operator<(const Point& other) const {
return data < other.data;
}
};
template <typename... Ts>
class PointVector {
public:
std::size_t sz;
using vector_t = std::vector<Point<Ts...>>;
vector_t data;
template <typename... Vs>
PointVector(const Vs&... vs)
: sz(min_size(vs...))
{
data.reserve(sz);
for (std::size_t i = 0; i < sz; i++) {
data.emplace_back(i, vs...);
}
}
Rcpp::IntegerVector sorted_index() const {
vector_t tmp(data);
std::stable_sort(tmp.begin(), tmp.end());
Rcpp::IntegerVector res(sz);
for (std::size_t i = 0; i < sz; i++) {
res[i] = tmp[i].index + 1;
}
return res;
}
private:
template <typename V>
std::size_t min_size(const V& v) {
return v.size();
}
template <typename T, typename S, typename... Vs>
std::size_t min_size(const T& t, const S& s, const Vs&... vs) {
return t.size() < s.size() ?
min_size(t, vs...) :
min_size(s, vs...);
}
};
using namespace Rcpp;
// [[Rcpp::export]]
IntegerVector order2(NumericVector x, NumericVector y) {
PointVector<double, double> pv(x, y);
return pv.sorted_index();
}
// [[Rcpp::export]]
IntegerVector order3(NumericVector x, NumericVector y, NumericVector z) {
PointVector<double, double, double> pv(x, y, z);
return pv.sorted_index();
}
使用此数据进行演示,
x <- rep(1:2 + 0.5, 10)
y <- rep(1:4 + 0.5, 5)
z <- rep(1:5 + 0.5, 4)
(df <- data.frame(x, y, z))
# x y z
# 1 1.5 1.5 1.5
# 2 2.5 2.5 2.5
# 3 1.5 3.5 3.5
# 4 2.5 4.5 4.5
# 5 1.5 1.5 5.5
# 6 2.5 2.5 1.5
# 7 1.5 3.5 2.5
# 8 2.5 4.5 3.5
# 9 1.5 1.5 4.5
# 10 2.5 2.5 5.5
# 11 1.5 3.5 1.5
# 12 2.5 4.5 2.5
# 13 1.5 1.5 3.5
# 14 2.5 2.5 4.5
# 15 1.5 3.5 5.5
# 16 2.5 4.5 1.5
# 17 1.5 1.5 2.5
# 18 2.5 2.5 3.5
# 19 1.5 3.5 4.5
# 20 2.5 4.5 5.5
C++ 版本产生与基本 R 等价物相同的结果:
all.equal(base::order(x, y, z), order3(x, y, z))
# [1] TRUE
df[order3(x, y, z),]
# x y z
# 1 1.5 1.5 1.5
# 17 1.5 1.5 2.5
# 13 1.5 1.5 3.5
# 9 1.5 1.5 4.5
# 5 1.5 1.5 5.5
# 11 1.5 3.5 1.5
# 7 1.5 3.5 2.5
# 3 1.5 3.5 3.5
# 19 1.5 3.5 4.5
# 15 1.5 3.5 5.5
# 6 2.5 2.5 1.5
# 2 2.5 2.5 2.5
# 18 2.5 2.5 3.5
# 14 2.5 2.5 4.5
# 10 2.5 2.5 5.5
# 16 2.5 4.5 1.5
# 12 2.5 4.5 2.5
# 8 2.5 4.5 3.5
# 4 2.5 4.5 4.5
# 20 2.5 4.5 5.5
编辑: 这里有一个更容易接受的版本,它允许您编写 auto pv = MakePointVector(...)
而不是指定所有类型参数:
// Point class as before
template <typename... Vecs>
class PointVector {
public:
std::size_t sz;
using point_t = Point<typename std::remove_reference<decltype(Vecs{}[0])>::type...>;
using vector_t = std::vector<point_t>;
vector_t data;
template <typename... Vs>
PointVector(const Vecs&... vs)
: sz(min_size(vs...))
{
data.reserve(sz);
for (std::size_t i = 0; i < sz; i++) {
data.emplace_back(i, vs...);
}
}
Rcpp::IntegerVector sorted_index() const {
vector_t tmp(data);
std::stable_sort(tmp.begin(), tmp.end());
Rcpp::IntegerVector res(sz);
for (std::size_t i = 0; i < sz; i++) {
res[i] = tmp[i].index + 1;
}
return res;
}
private:
template <typename V>
std::size_t min_size(const V& v) {
return v.size();
}
template <typename T, typename S, typename... Vs>
std::size_t min_size(const T& t, const S& s, const Vs&... vs) {
return t.size() < s.size() ?
min_size(t, vs...) :
min_size(s, vs...);
}
};
template <typename... Vecs>
PointVector<Vecs...> MakePointVector(const Vecs&... vecs) {
return PointVector<Vecs...>(vecs...);
}
using namespace Rcpp;
// [[Rcpp::export]]
IntegerVector order2(NumericVector x, NumericVector y) {
auto pv = MakePointVector(x, y);
return pv.sorted_index();
}
// [[Rcpp::export]]
IntegerVector order3(NumericVector x, NumericVector y, NumericVector z) {
auto pv = MakePointVector(x, y, z);
return pv.sorted_index();
}
/*** R
all.equal(base::order(x, y), order2(x, y))
# [1] TRUE
all.equal(base::order(x, y, z), order3(x, y, z))
# [1] TRUE
*/