使用 mmap() 在进程之间共享 std::map
Share std::map between processes with mmap()
我正在尝试分享一个 std::map<std::string, std::chrono::system_clock::time_point> map
:每个字符串都是一个标识站点的主机名,time_point 是进程最后一次访问该站点的时间。
我尝试使用 mmap
,但每个进程仍然看到自己的映射副本。
这是我的代码(我删除了所有与我的问题无关的方法和变量):
#include <sys/mman.h>
#include <unistd.h>
#include <iostream>
#include <map>
#include <string>
#include <chrono>
typedef std::map<std::string, std::chrono::system_clock::time_point> mymap;
typedef mymap::iterator iter;
typedef mymap* mapPointer;
class MmapManager {
private:
MmapManager() {
frequency = (mapPointer) mmap(NULL, sizeof(frequency), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (frequency == MAP_FAILED) {
std::cout << "mapping failed" << std::endl;
}
};
~MmapManager() {
std::cout << "~MmapManager()" << std::endl;
}
public:
// my class was designed with the singleton pattern
static MmapManager& getInstance() {
static MmapManager instance;
return instance;
}
private:
// pointer to my map
mapPointer frequency;
public:
// check if the process already visited site "host"
bool isHostAlreadyVisited(std::string host) {
return frequency->find(host) != frequency->end();
}
// add new visited site and time of the visit
void addHost(std::string host) {
(*frequency)[host] = std::chrono::system_clock::now();
std::cout << "PROC " << getpid() << " added " << host << std::endl;
}
// get time of the visit for site "host"
std::chrono::system_clock::time_point getElement(std::string host) {
return (*frequency)[host];
}
// print the map
void showMap(void) {
std::cout << "PROC " << getpid() << " prints map keys" << std::endl;
for (auto it = frequency->begin(); it != frequency->end(); ++it) {
std::cout << it->first << std::endl;
}
}
};
int main(void) {
// simulate the processes
for (int i=0; i<5; i++) {
// child process
if (fork() == 0) {
// if child never visited this site...
if (! MmapManager::getInstance().isHostAlreadyVisited("www.google.com")) {
std::cout << "PID " << getpid() << " www.google.com is new" << std::endl;
// ...add it to the map
MmapManager::getInstance().addHost("www.google.com");
}
else {
// if child already visited it, calculate
// how much time passed since last visit
auto now = std::chrono::system_clock::now();
auto before = MmapManager::getInstance().getElement("www.google.com");
std::chrono::duration<double> diff = now-before;
std::cout << "PID " << getpid() << " visited www.google.com " << diff.count() << " seconds ago" << std::endl;
}
MmapManager::getInstance().showMap();
_exit(EXIT_SUCCESS);
}
}
return 0;
}
这是可能的输出之一:
PID 12457 www.google.com is new
PID 12459 www.google.com is new
PID 12458 www.google.com is new
PID 12460 www.google.com is new
PID 12461 www.google.com is new
我不能使用其他外部库,如 Boost 或使用线程:我知道它们共享内存,但程序是这样设计的(子进程做事)而且我不能修改它(原始代码是不是我的)。
为什么每个进程仍然看到自己的地图副本?
编辑:我想我做了您建议我做的所有事情:
map
中的插入受到锁定机制的保护(感谢 kfsone);
- 为
string
创建了一个自定义分配器,为 map
创建了另一个(感谢 Maxim Egorushkin 提供这两个);
map
在分叉前分配(感谢 Zan Lynx)。
输出没有区别,map
仍然没有共享:
MmapManager()
printMap
map empty
PID 5085 www.google.com is new
PID 5086 www.google.com is new
PROC 5086 added www.goole.com
PROC 5085 added www.goole.com
PID 5087 www.google.com is new
PROC 5087 added www.goole.com
你建议我使用 Boost,但我想在我的代码运行后使用它:我不是重新发明轮子,只是通过艰苦的方式学习。
下面是我的新代码:
#include <sys/mman.h>
#include <unistd.h>
#include <sys/shm.h> /* shmat(), IPC_RMID */
#include <semaphore.h> /* sem_open(), sem_destroy(), sem_wait().. */
#include <fcntl.h> /* O_CREAT, O_EXEC */
#include <stdlib.h>
#include <iostream>
#include <map>
#include <string>
#include <chrono>
#include <cstddef>
#include <vector>
#include <limits>
#include <memory>
template<typename T> class stringAllocator {
public :
typedef T value_type;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef T const * const_pointer;
typedef T& reference;
typedef T const & const_reference;
template<typename U> struct rebind {
typedef stringAllocator<U> other;
};
pointer address (reference value ) const {
return &value;
}
const_pointer address (const_reference value) const {
return &value;
}
size_type max_size () const throw() {
return std::numeric_limits <size_type>::max() / sizeof(T);
}
stringAllocator () throw () {}
stringAllocator (stringAllocator const &) throw () {}
template <typename U>
stringAllocator(stringAllocator <U> const &) throw () {}
~stringAllocator() throw () {}
pointer allocate (size_type n) {
pointer ptr = (pointer)malloc(n * sizeof(value_type));
return ptr;
}
void deallocate (pointer p, size_type n) {
free(p);
}
void construct (pointer p, const_reference value) {
new(p) T(value);
}
void destroy (pointer p) {
p->~T();
}
};
template <class T1, class T2>
bool operator==(const stringAllocator<T1>&, const stringAllocator<T2>&) throw() {
return true;
}
template <class T1, class T2>
bool operator!=(const stringAllocator<T1>&, const stringAllocator<T2>&) throw() {
return false;
}
typedef std::basic_string<
char,
std::char_traits<char>,
stringAllocator<char>
> myString;
/*************************************** map allocator ****************************************/
template<typename T> class mapAllocator{
public :
typedef T value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
template<typename U>
struct rebind {
typedef mapAllocator<U> other;
};
mapAllocator() throw() {}
mapAllocator (mapAllocator const &) throw () {}
~mapAllocator() throw () {}
template<typename U>
mapAllocator(mapAllocator<U> const&) {}
pointer address(reference r) { return &r; }
const_pointer address(const_reference r) { return &r; }
pointer allocate(size_type cnt, typename std::allocator<void>::const_pointer = 0) {
pointer new_memory = reinterpret_cast<pointer>(::operator new(cnt * sizeof (T)));
return new_memory;
}
void deallocate(pointer p, size_type n) {
::operator delete(p);
}
size_type max_size() const {
return std::numeric_limits<size_type>::max() / sizeof(T);
}
void construct(pointer p, const T& t) {
new(p) T(t);
}
void destroy(pointer p) {
p->~T();
}
};
template <class T1, class T2>
bool operator==(const mapAllocator<T1>&, const mapAllocator<T2>&) throw() {
return true;
}
template <class T1, class T2>
bool operator!=(const mapAllocator<T1>&, const mapAllocator<T2>&) throw() {
return false;
}
/*************************************** end map allocator ****************************************/
// class compare for map with std::string as Key
class strless {
public:
bool operator() (const myString first, const myString second ) const {
return first.compare(second) < 0;
}
};
template<typename Key, typename T>
using Map = std::map<
Key, // class Key
T, // class T
strless, // class Compare = std::less<Key>
mapAllocator<std::pair<const Key, T> // class Allocator = std::allocator<std::pair<const Key, T> >
>
>;
// typedef for the actual map I need to share between processes
typedef Map<myString, std::chrono::system_clock::time_point> frequencyMap;
class MmapManager {
private:
MmapManager() {
std::cout << "MmapManager()" << std::endl;
semMmap = sem_open("semaphore", O_CREAT|O_EXCL, 0644, 1);
sem_unlink("semaphore");
};
~MmapManager() {
std::cout << "~MmapManager()" << std::endl;
}
public:
static MmapManager& getInstance() {
static MmapManager instance;
return instance;
}
private:
frequencyMap fmap;
sem_t *semMmap;
public:
void start(void) {}
bool isHostAlreadyVisited(myString host) {
return fmap.find(host) != fmap.end();
}
void addHost(myString host) {
sem_wait(semMmap);
fmap[host] = std::chrono::system_clock::now();
sem_post(semMmap);
std::cout << "PROC " << getpid() << " added " << host << std::endl;
}
// get time of the visit for site "host"
std::chrono::system_clock::time_point getElement(myString host) {
return fmap[host];
}
void printMap(void) {
std::cout << "printMap" << std::endl;
if (!fmap.empty()) {
for (auto it : fmap) {
std::cout << it.first << ' ';
}
std::cout << std::endl;
} else {
std::cout << "map empty" << std::endl;
}
}
};
int main(void) {
MmapManager::getInstance().start();
for (int i=0; i<3; i++) {
if (fork() == 0) {
if (!MmapManager::getInstance().isHostAlreadyVisited("www.google.com")) {
std::cout << "PID " << getpid() << " www.google.com is new" << std::endl;
MmapManager::getInstance().addHost("www.goole.com");
}
else {
// if child already visited it, calculate
// how much time passed since last visit
auto now = std::chrono::system_clock::now();
auto before = MmapManager::getInstance().getElement("www.google.com");
std::chrono::duration<double> diff = now-before;
std::cout << "PID " << getpid() << " visited www.google.com " << diff.count() << " seconds ago" << std::endl;
}
_exit(EXIT_SUCCESS);
}
}
MmapManager::getInstance().printMap();
return 0;
}
这是行不通的,因为虽然您将容器对象放入共享内存,但元素仍然是从堆中分配的,因此其他进程无法访问它们。
您需要一个自定义分配器来分配共享内存中的元素。请参阅 Creating maps in shared memory 了解如何完成。
请注意,您使用的字符串class也必须从共享内存中分配内存。
换句话说,共享内存中不能有指向堆内存的指针,因为进程之间不共享堆内存。 std
classes 有一个分配器模板参数,默认从堆中分配内存。这需要更改为 shared memory allocator 才能通过共享内存共享此类对象。
您的代码不起作用的另一个原因是您仅在调用 fork()
.
之后 创建地图
如果你想让所有 children 看到你的 MAP_SHARED|MAP_ANONYMOUS
地图,那么你必须在分叉 之前调用 mmap()
。
我正在尝试分享一个 std::map<std::string, std::chrono::system_clock::time_point> map
:每个字符串都是一个标识站点的主机名,time_point 是进程最后一次访问该站点的时间。
我尝试使用 mmap
,但每个进程仍然看到自己的映射副本。
这是我的代码(我删除了所有与我的问题无关的方法和变量):
#include <sys/mman.h>
#include <unistd.h>
#include <iostream>
#include <map>
#include <string>
#include <chrono>
typedef std::map<std::string, std::chrono::system_clock::time_point> mymap;
typedef mymap::iterator iter;
typedef mymap* mapPointer;
class MmapManager {
private:
MmapManager() {
frequency = (mapPointer) mmap(NULL, sizeof(frequency), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (frequency == MAP_FAILED) {
std::cout << "mapping failed" << std::endl;
}
};
~MmapManager() {
std::cout << "~MmapManager()" << std::endl;
}
public:
// my class was designed with the singleton pattern
static MmapManager& getInstance() {
static MmapManager instance;
return instance;
}
private:
// pointer to my map
mapPointer frequency;
public:
// check if the process already visited site "host"
bool isHostAlreadyVisited(std::string host) {
return frequency->find(host) != frequency->end();
}
// add new visited site and time of the visit
void addHost(std::string host) {
(*frequency)[host] = std::chrono::system_clock::now();
std::cout << "PROC " << getpid() << " added " << host << std::endl;
}
// get time of the visit for site "host"
std::chrono::system_clock::time_point getElement(std::string host) {
return (*frequency)[host];
}
// print the map
void showMap(void) {
std::cout << "PROC " << getpid() << " prints map keys" << std::endl;
for (auto it = frequency->begin(); it != frequency->end(); ++it) {
std::cout << it->first << std::endl;
}
}
};
int main(void) {
// simulate the processes
for (int i=0; i<5; i++) {
// child process
if (fork() == 0) {
// if child never visited this site...
if (! MmapManager::getInstance().isHostAlreadyVisited("www.google.com")) {
std::cout << "PID " << getpid() << " www.google.com is new" << std::endl;
// ...add it to the map
MmapManager::getInstance().addHost("www.google.com");
}
else {
// if child already visited it, calculate
// how much time passed since last visit
auto now = std::chrono::system_clock::now();
auto before = MmapManager::getInstance().getElement("www.google.com");
std::chrono::duration<double> diff = now-before;
std::cout << "PID " << getpid() << " visited www.google.com " << diff.count() << " seconds ago" << std::endl;
}
MmapManager::getInstance().showMap();
_exit(EXIT_SUCCESS);
}
}
return 0;
}
这是可能的输出之一:
PID 12457 www.google.com is new
PID 12459 www.google.com is new
PID 12458 www.google.com is new
PID 12460 www.google.com is new
PID 12461 www.google.com is new
我不能使用其他外部库,如 Boost 或使用线程:我知道它们共享内存,但程序是这样设计的(子进程做事)而且我不能修改它(原始代码是不是我的)。
为什么每个进程仍然看到自己的地图副本?
编辑:我想我做了您建议我做的所有事情:
map
中的插入受到锁定机制的保护(感谢 kfsone);- 为
string
创建了一个自定义分配器,为map
创建了另一个(感谢 Maxim Egorushkin 提供这两个); map
在分叉前分配(感谢 Zan Lynx)。
输出没有区别,map
仍然没有共享:
MmapManager()
printMap
map empty
PID 5085 www.google.com is new
PID 5086 www.google.com is new
PROC 5086 added www.goole.com
PROC 5085 added www.goole.com
PID 5087 www.google.com is new
PROC 5087 added www.goole.com
你建议我使用 Boost,但我想在我的代码运行后使用它:我不是重新发明轮子,只是通过艰苦的方式学习。
下面是我的新代码:
#include <sys/mman.h>
#include <unistd.h>
#include <sys/shm.h> /* shmat(), IPC_RMID */
#include <semaphore.h> /* sem_open(), sem_destroy(), sem_wait().. */
#include <fcntl.h> /* O_CREAT, O_EXEC */
#include <stdlib.h>
#include <iostream>
#include <map>
#include <string>
#include <chrono>
#include <cstddef>
#include <vector>
#include <limits>
#include <memory>
template<typename T> class stringAllocator {
public :
typedef T value_type;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T* pointer;
typedef T const * const_pointer;
typedef T& reference;
typedef T const & const_reference;
template<typename U> struct rebind {
typedef stringAllocator<U> other;
};
pointer address (reference value ) const {
return &value;
}
const_pointer address (const_reference value) const {
return &value;
}
size_type max_size () const throw() {
return std::numeric_limits <size_type>::max() / sizeof(T);
}
stringAllocator () throw () {}
stringAllocator (stringAllocator const &) throw () {}
template <typename U>
stringAllocator(stringAllocator <U> const &) throw () {}
~stringAllocator() throw () {}
pointer allocate (size_type n) {
pointer ptr = (pointer)malloc(n * sizeof(value_type));
return ptr;
}
void deallocate (pointer p, size_type n) {
free(p);
}
void construct (pointer p, const_reference value) {
new(p) T(value);
}
void destroy (pointer p) {
p->~T();
}
};
template <class T1, class T2>
bool operator==(const stringAllocator<T1>&, const stringAllocator<T2>&) throw() {
return true;
}
template <class T1, class T2>
bool operator!=(const stringAllocator<T1>&, const stringAllocator<T2>&) throw() {
return false;
}
typedef std::basic_string<
char,
std::char_traits<char>,
stringAllocator<char>
> myString;
/*************************************** map allocator ****************************************/
template<typename T> class mapAllocator{
public :
typedef T value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
template<typename U>
struct rebind {
typedef mapAllocator<U> other;
};
mapAllocator() throw() {}
mapAllocator (mapAllocator const &) throw () {}
~mapAllocator() throw () {}
template<typename U>
mapAllocator(mapAllocator<U> const&) {}
pointer address(reference r) { return &r; }
const_pointer address(const_reference r) { return &r; }
pointer allocate(size_type cnt, typename std::allocator<void>::const_pointer = 0) {
pointer new_memory = reinterpret_cast<pointer>(::operator new(cnt * sizeof (T)));
return new_memory;
}
void deallocate(pointer p, size_type n) {
::operator delete(p);
}
size_type max_size() const {
return std::numeric_limits<size_type>::max() / sizeof(T);
}
void construct(pointer p, const T& t) {
new(p) T(t);
}
void destroy(pointer p) {
p->~T();
}
};
template <class T1, class T2>
bool operator==(const mapAllocator<T1>&, const mapAllocator<T2>&) throw() {
return true;
}
template <class T1, class T2>
bool operator!=(const mapAllocator<T1>&, const mapAllocator<T2>&) throw() {
return false;
}
/*************************************** end map allocator ****************************************/
// class compare for map with std::string as Key
class strless {
public:
bool operator() (const myString first, const myString second ) const {
return first.compare(second) < 0;
}
};
template<typename Key, typename T>
using Map = std::map<
Key, // class Key
T, // class T
strless, // class Compare = std::less<Key>
mapAllocator<std::pair<const Key, T> // class Allocator = std::allocator<std::pair<const Key, T> >
>
>;
// typedef for the actual map I need to share between processes
typedef Map<myString, std::chrono::system_clock::time_point> frequencyMap;
class MmapManager {
private:
MmapManager() {
std::cout << "MmapManager()" << std::endl;
semMmap = sem_open("semaphore", O_CREAT|O_EXCL, 0644, 1);
sem_unlink("semaphore");
};
~MmapManager() {
std::cout << "~MmapManager()" << std::endl;
}
public:
static MmapManager& getInstance() {
static MmapManager instance;
return instance;
}
private:
frequencyMap fmap;
sem_t *semMmap;
public:
void start(void) {}
bool isHostAlreadyVisited(myString host) {
return fmap.find(host) != fmap.end();
}
void addHost(myString host) {
sem_wait(semMmap);
fmap[host] = std::chrono::system_clock::now();
sem_post(semMmap);
std::cout << "PROC " << getpid() << " added " << host << std::endl;
}
// get time of the visit for site "host"
std::chrono::system_clock::time_point getElement(myString host) {
return fmap[host];
}
void printMap(void) {
std::cout << "printMap" << std::endl;
if (!fmap.empty()) {
for (auto it : fmap) {
std::cout << it.first << ' ';
}
std::cout << std::endl;
} else {
std::cout << "map empty" << std::endl;
}
}
};
int main(void) {
MmapManager::getInstance().start();
for (int i=0; i<3; i++) {
if (fork() == 0) {
if (!MmapManager::getInstance().isHostAlreadyVisited("www.google.com")) {
std::cout << "PID " << getpid() << " www.google.com is new" << std::endl;
MmapManager::getInstance().addHost("www.goole.com");
}
else {
// if child already visited it, calculate
// how much time passed since last visit
auto now = std::chrono::system_clock::now();
auto before = MmapManager::getInstance().getElement("www.google.com");
std::chrono::duration<double> diff = now-before;
std::cout << "PID " << getpid() << " visited www.google.com " << diff.count() << " seconds ago" << std::endl;
}
_exit(EXIT_SUCCESS);
}
}
MmapManager::getInstance().printMap();
return 0;
}
这是行不通的,因为虽然您将容器对象放入共享内存,但元素仍然是从堆中分配的,因此其他进程无法访问它们。
您需要一个自定义分配器来分配共享内存中的元素。请参阅 Creating maps in shared memory 了解如何完成。
请注意,您使用的字符串class也必须从共享内存中分配内存。
换句话说,共享内存中不能有指向堆内存的指针,因为进程之间不共享堆内存。 std
classes 有一个分配器模板参数,默认从堆中分配内存。这需要更改为 shared memory allocator 才能通过共享内存共享此类对象。
您的代码不起作用的另一个原因是您仅在调用 fork()
.
如果你想让所有 children 看到你的 MAP_SHARED|MAP_ANONYMOUS
地图,那么你必须在分叉 之前调用 mmap()
。