Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
model_input[prefix+"weighted_seq"] = s_input
model_input[prefix+'weight'] = np.random.randn(sample_size,3,1)
model_input[prefix+"weighted_seq"+"_seq_length"] = s_len_input
sequence_feature.pop(sequence_feature.index('weight'))
for i in range(sparse_feature_num):
dim = np.random.randint(1, 10)
feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,use_hash=hash_flag,dtype=tf.int32))
for i in range(dense_feature_num):
feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=tf.float32))
for i, mode in enumerate(sequence_feature):
dim = np.random.randint(1, 10)
maxlen = np.random.randint(1, 10)
feature_columns.append(
VarLenSparseFeat(prefix +'sequence_' + mode, maxlen=maxlen,vocabulary_size=dim, embedding_dim=embedding_size, combiner=mode))
for fc in feature_columns:
if isinstance(fc,SparseFeat):
model_input[fc.name]= np.random.randint(0, fc.vocabulary_size, sample_size)
elif isinstance(fc,DenseFeat):
model_input[fc.name] = np.random.random(sample_size)
else:
s_input, s_len_input = gen_sequence(
fc.vocabulary_size, fc.maxlen, sample_size)
model_input[fc.name] = s_input
if include_length:
fc.length_name = prefix+"sequence_"+str(i)+'_seq_length'
model_input[prefix+"sequence_"+str(i)+'_seq_length'] = s_len_input
def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
sequence_feature=['sum', 'mean', 'max', 'weight'], classification=True, include_length=False,
hash_flag=False, prefix=''):
feature_columns = []
model_input = {}
if 'weight' in sequence_feature:
feature_columns.append(VarLenSparseFeat(prefix+"weighted_seq",maxlen=3,vocabulary_size=2,embedding_dim=embedding_size,length_name=prefix+"weighted_seq"+"_seq_length",weight_name=prefix+"weight"))
s_input, s_len_input = gen_sequence(
2, 3, sample_size)
model_input[prefix+"weighted_seq"] = s_input
model_input[prefix+'weight'] = np.random.randn(sample_size,3,1)
model_input[prefix+"weighted_seq"+"_seq_length"] = s_len_input
sequence_feature.pop(sequence_feature.index('weight'))
for i in range(sparse_feature_num):
dim = np.random.randint(1, 10)
feature_columns.append(SparseFeat(prefix+'sparse_feature_'+str(i), dim,embedding_size,use_hash=hash_flag,dtype=tf.int32))
for i in range(dense_feature_num):
feature_columns.append(DenseFeat(prefix+'dense_feature_'+str(i), 1,dtype=tf.float32))
for i, mode in enumerate(sequence_feature):
dim = np.random.randint(1, 10)
linear_logit = get_linear_logit(features, linear_feature_columns, init_std=init_std, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
sparse_embedding = {fc_j.embedding_name: {fc_i.embedding_name: Embedding(fc_j.vocabulary_size, embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=0.0001, seed=seed),
embeddings_regularizer=l2(
l2_reg_embedding),
mask_zero=isinstance(fc_j,
VarLenSparseFeat),
name='sparse_emb_' + str(
fc_j.embedding_name) + '_' + fc_i.embedding_name)
for fc_i in
sparse_feature_columns + varlen_sparse_feature_columns} for fc_j in
sparse_feature_columns + varlen_sparse_feature_columns}
dense_value_list = get_dense_input(features, dnn_feature_columns)
embed_list = []
for fc_i, fc_j in itertools.combinations(sparse_feature_columns + varlen_sparse_feature_columns, 2):
i_input = features[fc_i.name]
if fc_i.use_hash:
i_input = Hash(fc_i.vocabulary_size)(i_input)
j_input = features[fc_j.name]
if fc_j.use_hash:
j_input = Hash(fc_j.vocabulary_size)(j_input)
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', )
# 2.count #unique features for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4)
for feat in sparse_features]
use_weighted_sequence = False
if use_weighted_sequence:
varlen_feature_columns = [VarLenSparseFeat('genres', maxlen= max_len,vocabulary_size=len(
key2index) + 1,embedding_dim=4, combiner='mean',weight_name='genres_weight')] # Notice : value 0 is for padding for sequence input feature
else:
varlen_feature_columns = [VarLenSparseFeat('genres', maxlen=max_len,vocabulary_size= len(
key2index) + 1,embedding_dim=4, combiner='mean',weight_name=None)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns+dnn_feature_columns)
# 3.generate input data for model
model_input = {name:data[name] for name in sparse_features}#
model_input["genres"] = genres_list
model_input["genres_weight"] = np.random.randn(data.shape[0],max_len,1)
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns,dnn_feature_columns,task='regression')
igender = np.array([1, 2, 1])# 0 is mask value
score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]])
hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]])
behavior_length = np.array([3,3,2])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
'hist_item': hist_iid, 'hist_item_gender': hist_igender,
'score': score,"seq_length":behavior_length}
if use_neg:
feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_columns += [VarLenSparseFeat('neg_hist_item', maxlen=4,vocabulary_size=3+1,embedding_dim=8, embedding_name='item',length_name="seq_length"),
VarLenSparseFeat('neg_hist_item_gender', maxlen=4, vocabulary_size=3+1,embedding_dim=4,embedding_name='item_gender',length_name="seq_length")]
x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
y = [1, 0, 1]
return x, y, feature_columns, behavior_feature_list
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[ 1, 2, 3,0], [ 1, 2, 3,0], [ 1, 2, 0,0]])
hist_igender = np.array([[1, 1, 2,0 ], [2, 1, 1, 0], [2, 1, 0, 0]])
behavior_length = np.array([3,3,2])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
'hist_item': hist_iid, 'hist_item_gender': hist_igender,
'score': score,"seq_length":behavior_length}
if use_neg:
feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_columns += [VarLenSparseFeat('neg_hist_item', maxlen=4,vocabulary_size=3+1,embedding_dim=8, embedding_name='item',length_name="seq_length"),
VarLenSparseFeat('neg_hist_item_gender', maxlen=4, vocabulary_size=3+1,embedding_dim=4,embedding_name='item_gender',length_name="seq_length")]
x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
y = [1, 0, 1]
return x, y, feature_columns, behavior_feature_list
varlen_sparse_feature_columns = list(filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
def get_xy_fd():
feature_columns = [SparseFeat('user',3,embedding_dim=10),SparseFeat(
'gender', 2,embedding_dim=4), SparseFeat('item', 3 + 1,embedding_dim=8), SparseFeat('item_gender', 2 + 1,embedding_dim=4),DenseFeat('score', 1)]
feature_columns += [VarLenSparseFeat('hist_item', maxlen=4, vocabulary_size=3+1, embedding_dim=8,embedding_name='item'),
VarLenSparseFeat('hist_item_gender', maxlen=4,vocabulary_size=3+1,embedding_dim=4, embedding_name='item_gender')]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
igender = np.array([1, 2, 1]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}
x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
y = [1, 0, 1]
filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []