在 C++ 中嵌入 Python：在 python 脚本中导入模块在一个函数调用期间有效，但在另一个函数调用期间无效

Question

我正在编写一个程序，它使用 C++ 作为数据管理后端，可以调用用户创建的 python 脚本来执行各种任务。尽管与在 python 脚本中导入模块有关，但我运行遇到了一个问题。我将值从 C++ 传递到 python 并从 sklearn 对这些值执行高斯过程回归，然后简单地 return 将优化模型值或 GPR 不确定性返回到 C++。我将这两种情况（模型优化和模型验证）作为 python 脚本中的两个独立函数，因为它们将从 C++ 的不同位置调用。

当我运行第一个函数（模型优化）时，一切都很好，我得到了优化的超参数到 return 到 C++，没有发生意外。但是在第二次函数调用期间，脚本失败，因为它无法从 sklearn 导入 GPR 模块（与前一个函数成功导入的模块相同）。我不太熟悉将 python 嵌入到 C++ 中，所以很可能我只是遗漏了一些东西，或者没有完全理解规则。不可能从 C++ 端提供本身可以是运行的代码，因此我将尽我所能提供尽可能多的嵌入代码。下面的 python 脚本完整显示。如果您需要更多信息，请告诉我，我很乐意提供。感谢您提供的任何帮助。

C++：主要

//other stuff

Py_Initialize();

//do more other stuff (embedding happens here)

Py_Finalize();

//do even more other stuff

C++：模型优化

PyRun_SimpleString("import sys");
PyRun_SimpleString("sys.path.append(\".\")");

pName = PyString_FromString(file.c_str());

pModule = PyImport_Import(pName);
Py_DECREF(pName);

if (pModule != NULL) {
    pFunc = PyObject_GetAttrString(pModule, function.c_str());

    pArgs = PyTuple_New(size);

    PyTuple_SetItem(pArgs, 0, PyLong_FromLong(gp->getInnerFPSize()));
    PyTuple_SetItem(pArgs, 1, PyLong_FromLong(ntrain));

    k = 2;

    for(i = 0; i < trainingFP[modelNumber].size(); i++){
        for(j = 0; j < trainingFP[modelNumber][i].size(); j++){
            PyTuple_SetItem(pArgs, k, 
            PyFloat_FromDouble(trainingFP[modelNumber][i][j]));
            k++;
        }           
    }
    for(i = 0; i < trainingForces[modelNumber].size(); i++){
        PyTuple_SetItem(pArgs, k, 
        PyFloat_FromDouble(trainingForces[modelNumber][i]));
        k++;
    }
    Py_INCREF(pValue);
    pValue = PyObject_CallObject(pFunc, pArgs);
    Py_DECREF(pArgs);

 }else {
    PyErr_Print();
    fprintf(stderr, "Failed to load \"%s\"\n", function.c_str());
    return 1;
}

Py_XDECREF(pFunc);
Py_DECREF(pModule);

optimalSigma = PyFloat_AsDouble(PyList_GetItem(pValue, 1));
optimalSigmaN = PyFloat_AsDouble(PyList_GetItem(pValue, 0));
optimalSigmaF = PyFloat_AsDouble(PyList_GetItem(pValue, 2));

Py_DECREF(pValue);

C++：模型验证

PyRun_SimpleString("import sys");
PyRun_SimpleString("sys.path.append(\".\")");

pName = PyString_FromString(file.c_str());

pModule = PyImport_Import(pName);
Py_DECREF(pName);

if (pModule != NULL) {
    pFunc = PyObject_GetAttrString(pModule, function.c_str());

    pArgs = PyTuple_New(size);

    PyTuple_SetItem(pArgs, 0, PyFloat_FromDouble(testFP[0].size()));
    PyTuple_SetItem(pArgs, 1, PyFloat_FromDouble(testFP.size()));
    PyTuple_SetItem(pArgs, 2, PyFloat_FromDouble(trainingFP.size()));
    PyTuple_SetItem(pArgs, 3, PyFloat_FromDouble(sigma));
    PyTuple_SetItem(pArgs, 4, PyFloat_FromDouble(sigmaN));
    PyTuple_SetItem(pArgs, 5, PyFloat_FromDouble(sigmaF));

    k = 6;

    for(i = 0; i < testFP.size(); i++){
        for(j = 0; j < testFP[i].size(); j++){
            PyTuple_SetItem(pArgs, k, PyFloat_FromDouble(testFP[i][j]));
            k++;
        }           
    }
    for(i = 0; i < trainingFP.size(); i++){
        for(j = 0; j < trainingFP[i].size(); j++){
            PyTuple_SetItem(pArgs, k, PyFloat_FromDouble(trainingFP[i][j]));
            k++;
        }           
    }
    for(i = 0; i < trainingFP.size(); i++){
        PyTuple_SetItem(pArgs, k, PyFloat_FromDouble(trainingForces[i]));
        k++;
    }
    Py_INCREF(pValue);
    pValue = PyObject_CallObject(pFunc, pArgs);
    Py_DECREF(pArgs);

 }else {
    PyErr_Print();
    fprintf(stderr, "Failed to load \"%s\"\n", function.c_str());
}

Py_XDECREF(pFunc);
Py_DECREF(pModule);

for(i = 0; i < testFP.size(); i++)
    prediction[i] = PyFloat_AsDouble(PyList_GetItem(pValue, i));

Py_DECREF(pValue);

Python

def GPR(*X):

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
import re

#initialize local variables
counter = 0
sigma_l_initial = 1
sigma_n_initial = 1
sigma_f_initial = 2
innerFPSize = int(X[0])
ntrain = int(X[1])

optimized_hyperparameters = []
forces = []
fp = [] 
sigma_l_bounds = [.01,100]
sigma_n_bounds = [.001,.1]

fp.append([])
#pass values from c++ conversion tuple to local lists
for x in X:
    if counter > 1 and counter < 2 + innerFPSize * ntrain:
        fp[len(fp) - 1].append(x)
    elif counter >= 2 + innerFPSize * ntrain:
        forces.append(x)

    counter += 1

    if len(fp[len(fp) -1]) == innerFPSize:
        if len(fp) < ntrain:
            fp.append([])

#GPR routine
krbf = sigma_f_initial*RBF(length_scale=sigma_l_initial,length_scale_bounds=(sigma_l_bounds[0],sigma_l_bounds[1]))
noise_kernel = WhiteKernel(noise_level=sigma_n_initial,noise_level_bounds=(sigma_n_bounds[0],sigma_n_bounds[1]))

gp = GaussianProcessRegressor(kernel=krbf + noise_kernel,normalize_y=True,n_restarts_optimizer=25)
gp.fit(fp, forces)

#get optimized hyperparameters
rr = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", str(gp.kernel_))
optimized_hyperparameters.append(float(rr[-1]))
optimized_hyperparameters.append(float(rr[-2]))
optimized_hyperparameters.append(float(rr[0]))

return optimized_hyperparameters





def GPR_unc(*X):

try:
    from sklearn.gaussian_process import GaussianProcessRegressor
    from sklearn.gaussian_process.kernels import RBF, WhiteKernel
except:
    print 'THIS REFUSES TO WORK'

#initialize variables
uncertainty = []
testFP = []
trainingFP = []
trainingForces = []

innerFPSize = int(X[0])
testSize = int(X[1])
ntrain = int(X[2])
sigma = float(X[3])
sigma_n = float(X[4])
sigma_f = float(X[5])

counter = 0

setTrainFP = setTrainForces = False
setTestFP = True

testFP.append([])
trainingFP.append([])

#parse data from C++ arrays
for x in X:

    try:
        if counter > 5 and setTestFP == True:
            testFP[len(testFP) - 1].append(x)
        elif setTrainFP == True:
            trainingFP[len(trainingFP) - 1].append(x)
        elif setTrainForces == True:
            trainingForces.append(x)

        if counter > 5 and setTestFP == True:
            if len(testFP[len(testFP) -1]) == innerFPSize:
                if len(testFP) + 1 <= testSize:
                    testFP.append([])
                else:
                    setTestFP = False
                    setTrainFP = True                                   
        elif setTrainFP == True:
            if len(trainingFP[len(trainingFP) -1]) == innerFPSize:
                if(len(trainingFP)) + 1 <= ntrain:
                    trainingFP.append([])
                else:
                    setTrainFP = False
                    setTrainForces = True
        counter += 1
    except:
        print 'ERROR'

#perform static "optimization" of gpr kernel to get gpr object
krbf = sigma_f**2*RBF(length_scale=sigma,length_scale_bounds=(sigma,sigma))
noise_kernel = WhiteKernel(noise_level=sigma_n,noise_level_bounds=(sigma_n,sigma_n))
gp = GaussianProcessRegressor(kernel=krbf + noise_kernel,normalize_y=True, optimizer=None)
gp.fit(trainingFP, trainingForces)

#get uncertanties on test set
val,std=gp.predict(testFP,return_std=True)

#ensure that the uncertainty is loaded into a float list in order to be sent back to c++
for x in std:
    uncertainty.append(float(x))
for x in std:
    uncertainty.append(float(x) * float(x))

return uncertainty

尝试从 GPR_unc 函数（python 代码中的第二个函数）导入模块时，python 脚本失败。

Answer 1

在模型验证函数中，python 元组：

pArgs = PyTuple_New(size);

与附加到它的大小相比，传递给它的大小不正确。崩溃最终变成了一个简单的 "writing past the end of the array without resizing it".

在 C++ 中嵌入 Python：在 python 脚本中导入模块在一个函数调用期间有效，但在另一个函数调用期间无效

Embedding Python in C++: Importing modules in python script works during one function call but not another

c++

python

python-embedding

python-import

importerror