Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if 'weight' in sequence_feature:
feature_columns.append(VarLenSparseFeat(prefix+"weighted_seq",maxlen=3,vocabulary_size=2,embedding_dim=embedding_size,length_name=prefix+"weighted_seq"+"_seq_length",weight_name=prefix+"weight"))
s_input, s_len_input = gen_sequence(
2, 3, sample_size)
model_input[prefix+"weighted_seq"] = s_input
model_input[prefix+'weight'] = np.random.randn(sample_size,3,1)
model_input[prefix+"weighted_seq"+"_seq_length"] = s_len_input
sequence_feature.pop(sequence_feature.index('weight'))
for i in range(sparse_feature_num):
dim = np.random.randint(1, 10)
feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,use_hash=hash_flag,dtype=tf.int32))
for i in range(dense_feature_num):
feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=tf.float32))
for i, mode in enumerate(sequence_feature):
dim = np.random.randint(1, 10)
maxlen = np.random.randint(1, 10)
feature_columns.append(
VarLenSparseFeat(prefix +'sequence_' + mode, maxlen=maxlen,vocabulary_size=dim, embedding_dim=embedding_size, combiner=mode))
for fc in feature_columns:
if isinstance(fc,SparseFeat):
model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
elif isinstance(fc,DenseFeat):
model_input[fc.name] = np.random.random(sample_size)
else:
s_input, s_len_input = gen_sequence(
fc.vocabulary_size, fc.maxlen, sample_size)
dim = np.random.randint(1, 10)
feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,use_hash=hash_flag,dtype=tf.int32))
for i in range(dense_feature_num):
feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=tf.float32))
for i, mode in enumerate(sequence_feature):
dim = np.random.randint(1, 10)
maxlen = np.random.randint(1, 10)
feature_columns.append(
VarLenSparseFeat(prefix +'sequence_' + mode, maxlen=maxlen,vocabulary_size=dim, embedding_dim=embedding_size, combiner=mode))
for fc in feature_columns:
if isinstance(fc,SparseFeat):
model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
elif isinstance(fc,DenseFeat):
model_input[fc.name] = np.random.random(sample_size)
else:
s_input, s_len_input = gen_sequence(
fc.vocabulary_size, fc.maxlen, sample_size)
model_input[fc.name] = s_input
if include_length:
fc.length_name = prefix+"sequence_"+str(i)+'_seq_length'
model_input[prefix+"sequence_"+str(i)+'_seq_length'] = s_len_input
if classification:
y = np.random.randint(0, 2, sample_size)
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.do simple Transformation for dense features
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.set hashing space for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000,embedding_dim=4, use_hash=True, dtype='string') # since the input is string
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, )
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name:train[name] for name in feature_names}
test_model_input = {name:test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns,dnn_feature_columns, task='binary')
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.count #unique features for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
def get_xy_fd():
feature_columns = [SparseFeat('user',3,embedding_dim=10),SparseFeat(
'gender', 2,embedding_dim=4), SparseFeat('item', 3 + 1,embedding_dim=8), SparseFeat('item_gender', 2 + 1,embedding_dim=4),DenseFeat('score', 1)]
feature_columns += [VarLenSparseFeat('hist_item', maxlen=4, vocabulary_size=3+1, embedding_dim=8,embedding_name='item'),
VarLenSparseFeat('hist_item_gender', maxlen=4,vocabulary_size=3+1,embedding_dim=4, embedding_name='item_gender')]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
igender = np.array([1, 2, 1]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}
x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
def get_xy_fd(hash_flag=False):
feature_columns = [SparseFeat('user', 3, embedding_dim=10,use_hash=hash_flag),
SparseFeat('gender', 2, embedding_dim=4,use_hash=hash_flag),
SparseFeat('item', 3 + 1, embedding_dim=4,use_hash=hash_flag),
SparseFeat('item_gender', 2 + 1, embedding_dim=4,use_hash=hash_flag),
DenseFeat('score', 1)]
feature_columns += [VarLenSparseFeat('sess_0_item',maxlen=4,vocabulary_size=3+1,embedding_dim=4,use_hash=hash_flag,embedding_name='item'),VarLenSparseFeat('sess_0_item_gender',maxlen=4,vocabulary_size=2+1,embedding_dim=4,use_hash=hash_flag,embedding_name='item_gender')]
feature_columns += [VarLenSparseFeat('sess_1_item', maxlen= 4,vocabulary_size=3 + 1,embedding_dim=4, use_hash=hash_flag, embedding_name='item'),VarLenSparseFeat('sess_1_item_gender', maxlen= 4,vocabulary_size=2 + 1, embedding_dim=4,use_hash=hash_flag,embedding_name='item_gender')]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
igender = np.array([1, 2, 1]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])
sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
def __new__(cls, name, dimension=1, dtype="float32"):
return super(DenseFeat, cls).__new__(cls, name, dimension, dtype)