如何将 One-Hot Encoding 层添加到 Tensorflow 模型?
How to add One-Hot Encoding layer to Tensorflow model?
我想为 Tensorflow 2 模型添加一个 One-Hot 编码层。这是我目前所拥有的:
import pandas as pd
import tensorflow as tf
# import CSV file to pandas DataFrame called df
# set categorical (CAT_COLUMNS) and numerical (NUM_COLUMNS) features
feature_cols = []
# Create IndicatorColumn for categorical features
for feature in CAT_COLUMNS:
vocab = df[feature].unique()
feature_cols.append(tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(feature, vocab)))
# Create NumericColumn for numerical features
for feature in NUM_COLUMNS:
feature_cols.append(tf.feature_column.numeric_column(feature, dtype=tf.int32))
print(feature_cols)
我应该如何在 Tensorflow 模型中使用 feature_cols
,以便 One-Hot Encoding 仅应用于分类特征?
model = tf.keras.Sequential([
tf.keras.layers.Dense(units=1, input_shape=[len(df.columns)]),
tf.keras.layers.Dense(units=128, activation=tf.nn.relu),
tf.keras.layers.Dense(units=1, activation=tf.nn.softmax)
])
我认为您可以将分类特征和数值特征作为单独的输入提供,然后使用 tf.keras.layers.Concatenate
将它们组合起来。
使用tf.keras.layers.experimental.preprocessing
.
阅读 https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers 示例。
@Fluxy - 示例multi-input(用于分类特征和数值特征)
import numpy as np
import tensorflow as tf
import keras
x= np.array([[1,2],[3,4],[5,6],[7,8], [1,2],[3,4],[5,6],[7,8]], dtype='int32')
print(x)
y=np.array([0,0,1,2,0,0,1,2]) # 3 classes = num_tokens in layers.CategoryEncoding
print(y)
ds= tf.data.Dataset.from_tensor_slices((x,y)) # tuple
print(ds)
features, labels= tuple(zip(*ds))
#inp = ds.map(lambda x, y: (x, tf.one_hot(y, depth=3)))
#print(list(inp.as_numpy_iterator()))
numerical_input = tf.keras.layers.Input(shape=(2,), dtype=tf.float32)
categorical_input = tf.keras.layers.Input(shape=(1,), dtype=tf.int32)
encoded = tf.keras.layers.CategoryEncoding( num_tokens=3, output_mode="one_hot")(categorical_input)
concat = tf.keras.layers.concatenate([numerical_input, encoded])
model = tf.keras.models.Model(inputs=[numerical_input, categorical_input], outputs=[concat])
predicted = model.predict([x, y])
print(predicted)
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)
p.s.
如果维度太高 - 使用嵌入层而不是连接层 - 建议
我想为 Tensorflow 2 模型添加一个 One-Hot 编码层。这是我目前所拥有的:
import pandas as pd
import tensorflow as tf
# import CSV file to pandas DataFrame called df
# set categorical (CAT_COLUMNS) and numerical (NUM_COLUMNS) features
feature_cols = []
# Create IndicatorColumn for categorical features
for feature in CAT_COLUMNS:
vocab = df[feature].unique()
feature_cols.append(tf.feature_column.indicator_column(
tf.feature_column.categorical_column_with_vocabulary_list(feature, vocab)))
# Create NumericColumn for numerical features
for feature in NUM_COLUMNS:
feature_cols.append(tf.feature_column.numeric_column(feature, dtype=tf.int32))
print(feature_cols)
我应该如何在 Tensorflow 模型中使用 feature_cols
,以便 One-Hot Encoding 仅应用于分类特征?
model = tf.keras.Sequential([
tf.keras.layers.Dense(units=1, input_shape=[len(df.columns)]),
tf.keras.layers.Dense(units=128, activation=tf.nn.relu),
tf.keras.layers.Dense(units=1, activation=tf.nn.softmax)
])
我认为您可以将分类特征和数值特征作为单独的输入提供,然后使用 tf.keras.layers.Concatenate
将它们组合起来。
使用tf.keras.layers.experimental.preprocessing
.
阅读 https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers 示例。
@Fluxy - 示例multi-input(用于分类特征和数值特征)
import numpy as np
import tensorflow as tf
import keras
x= np.array([[1,2],[3,4],[5,6],[7,8], [1,2],[3,4],[5,6],[7,8]], dtype='int32')
print(x)
y=np.array([0,0,1,2,0,0,1,2]) # 3 classes = num_tokens in layers.CategoryEncoding
print(y)
ds= tf.data.Dataset.from_tensor_slices((x,y)) # tuple
print(ds)
features, labels= tuple(zip(*ds))
#inp = ds.map(lambda x, y: (x, tf.one_hot(y, depth=3)))
#print(list(inp.as_numpy_iterator()))
numerical_input = tf.keras.layers.Input(shape=(2,), dtype=tf.float32)
categorical_input = tf.keras.layers.Input(shape=(1,), dtype=tf.int32)
encoded = tf.keras.layers.CategoryEncoding( num_tokens=3, output_mode="one_hot")(categorical_input)
concat = tf.keras.layers.concatenate([numerical_input, encoded])
model = tf.keras.models.Model(inputs=[numerical_input, categorical_input], outputs=[concat])
predicted = model.predict([x, y])
print(predicted)
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)
如果维度太高 - 使用嵌入层而不是连接层 - 建议