AttributeError: 'numpy.ndarray' object has no attribute 'transform'

AttributeError: 'numpy.ndarray' object has no attribute 'transform'

我想创建一个包含两个步骤的 sklearn 管道:

  1. 自定义转换器函数
  2. Keras 分类模型

这是我的数据集(当然,我提供了一个简化的子集来显示数据格式):

x_train

array([[[0.45977011, 0.16666667, 0.18373494, ..., 0.33333333,
         0.71317829, 0.7246617 ],
        [0.6091954 , 0.25      , 0.28313253, ..., 0.33333333,
         0.66666667, 0.73101353],
        [0.25287356, 0.75      , 0.34337349, ..., 0.16666667,
         0.62790698, 0.62137531],
        ...,
        [0.6091954 , 0.58333333, 0.20481928, ..., 0.33333333,
         0.62015504, 0.65009666],
        [0.41954023, 0.91666667, 0.30722892, ..., 0.33333333,
         0.71317829, 0.76719138],
        [0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
         0.5503876 , 0.71306269]],

       [[0.6091954 , 0.25      , 0.28313253, ..., 0.33333333,
         0.66666667, 0.73101353],
        [0.25287356, 0.75      , 0.34337349, ..., 0.16666667,
         0.62790698, 0.62137531],
        [0.54022989, 0.5       , 0.34337349, ..., 0.33333333,
         0.57364341, 0.66238608],
        ...,
        [0.41954023, 0.91666667, 0.30722892, ..., 0.33333333,
         0.71317829, 0.76719138],
        [0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
         0.5503876 , 0.71306269],
        [0.44252874, 0.75      , 0.48192771, ..., 0.41666667,
         0.62015504, 0.65023474]],

       [[0.25287356, 0.75      , 0.34337349, ..., 0.16666667,
         0.62790698, 0.62137531],
        [0.54022989, 0.5       , 0.34337349, ..., 0.33333333,
         0.57364341, 0.66238608],
        [0.3908046 , 0.33333333, 0.34939759, ..., 0.41666667,
         0.58914729, 0.70450152],
        ...,
        [0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
         0.5503876 , 0.71306269],
        [0.44252874, 0.75      , 0.48192771, ..., 0.41666667,
         0.62015504, 0.65023474],
        [0.60344828, 0.41666667, 0.46686747, ..., 0.25      ,
         0.66666667, 0.61391881]]]

y_train

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

这是我当前的代码:

import numpy as np
from keras.wrappers.scikit_learn import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import *
from tensorflow.keras.callbacks import *
from sklearn.pipeline import Pipeline

# Custom transformer
class Transformer():

    def transform(self, x):
        x_img = np.apply_along_axis(self.rec_plot, 1, x).astype('float16')
        return x_img
    
    def rec_plot(s, eps=0.10, steps=10):
        d = pdist(s[:,None])
        d = np.floor(d/eps)
        d[d>steps] = steps
        Z = squareform(d)
        return Z
    
    def fit(self, x, y=None):
        return x

def create_model():
    model = Sequential()

    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 17)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))

    #sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    print(model.summary())
    
    return model

transformer = Transformer()
clf = KerasClassifier(build_fn=create_model, verbose=0)

blackbox_model = Pipeline([('transform', transformer),('clf',clf)])
blackbox_model.fit(x_train, y_train)

当我在我的数据集上 运行 这段代码时,出现以下错误:

AttributeError: 'numpy.ndarray' object has no attribute 'transform'

好像和数据格式有关(所以上面分享了我的数据格式)。但是我不确定如何解决这个问题。

方法 Transformer().fit() 应该 return self.

因为您的 Transformer 对象是无状态的,所以使用起来可能更容易 sklearn.preprocessing.FunctionTransformer。您可以使用转换函数实例化 class 。类似于以下内容(未经测试):

import sklearn.preprocessing

def _rec_plot(s, eps=0.10, steps=10):
    d = pdist(s[:,None])
    d = np.floor(d/eps)
    d[d>steps] = steps
    Z = squareform(d)
    return Z

def fun(x, y=None):
    return np.apply_along_axis(_rec_plot, 1, x).astype('float16')

transformer = sklearn.preprocessing.FunctionTransformer(func=fun)

我还建议不要使用语法 from module import *,因为这会污染您的命名空间。当我第一次读到你的问题时,我想知道问题是不是因为所有不必要的导入而导致函数名称冲突。