Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_materialize():
DT1 = dt.Frame(A=range(12))[::2, :]
DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2)
DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0])
DT = dt.cbind(DT1, DT2, DT3, force=True)
assert frame_column_rowindex(DT, 0).type == "slice"
assert frame_column_rowindex(DT, 1).type == "arr32"
assert frame_column_rowindex(DT, 2) is None
DT.materialize()
assert frame_column_rowindex(DT, 0) is None
assert frame_column_rowindex(DT, 1) is None
assert frame_column_rowindex(DT, 2) is None
def join(names1, names2):
with pytest.warns(DatatableWarning):
return dt.cbind(dt.Frame(names=names1),
dt.Frame(names=names2)).names
def test_cbind_api():
DT1 = dt.Frame(A=[1, 2, 3])
DT2 = dt.Frame(B=[-4, -5, None])
DT3 = dt.Frame(X=["makes", "gonna", "make"])
RES1 = dt.cbind(DT1, DT2, DT3)
RES2 = dt.cbind([DT1, DT2, DT3])
RES3 = dt.cbind((DT1, DT2, DT3)) # tuple
RES4 = dt.cbind([DT1], [DT2, DT3])
RES5 = dt.cbind(DT1, [DT2], DT3)
RES6 = dt.cbind((frame for frame in [DT1, DT2, DT3])) # generator
assert_equals(RES1, RES2)
assert_equals(RES1, RES3)
assert_equals(RES1, RES4)
assert_equals(RES1, RES5)
assert_equals(RES1, RES6)
def test_create_from_doublestar_expansion():
DT0 = dt.Frame(A=range(3), B=["df", "qe;r", None])
DT1 = dt.Frame(D=[7.99, -12.5, 0.1], E=[None]*3)
DT = dt.Frame(**DT0, **DT1)
assert_equals(DT, dt.cbind(DT0, DT1))
def test_cbind_expanded_frame():
DT = dt.Frame(A=[1, 2], B=['a', "E"], C=[7, 1000], D=[-3.14, 159265])
RES = dt.cbind(*DT)
assert_equals(DT, RES)
:func:`~tmtoolkit.topicmod.model_io.ldamodel_full_doc_topics` to retrieve the full document-topic
distribution as datatable Frame
:param topic_word_distrib: topic-word distribution; shape KxM, where K is number of topics, M is vocabulary size
:param vocab: vocabulary list/array of length K
:param colname_rowindex: column name for the "row index", i.e. the column that identifies each row
:param row_labels: format string for each row index where ``{i0}`` or ``{i1}`` are replaced by the respective
zero- or one-indexed topic numbers or an array with individual row labels
:return: datatable Frame
"""
if isinstance(row_labels, str):
rownames = [row_labels.format(i0=i, i1=i + 1) for i in range(topic_word_distrib.shape[0])]
else:
rownames = row_labels
return dt.cbind(dt.Frame({colname_rowindex: rownames}),
dt.Frame(topic_word_distrib, names=list(vocab)))
distribution as datatable Frame
:param doc_topic_distrib: document-topic distribution; shape NxK, where N is the number of documents, K is the
number of topics
:param doc_labels: list/array of length N with a string label for each document
:param colname_rowindex: column name for the "row index", i.e. the column that identifies each row
:param topic_labels: format string for each row index where ``{i0}`` or ``{i1}`` are replaced by the respective
zero- or one-indexed topic numbers or an array with individual row labels
:return: datatable Frame
"""
if isinstance(topic_labels, str):
colnames = [topic_labels.format(i0=i, i1=i+1) for i in range(doc_topic_distrib.shape[1])]
else:
colnames = topic_labels
return dt.cbind(dt.Frame({colname_rowindex: doc_labels}),
dt.Frame(doc_topic_distrib, names=list(colnames)))
def transform(self, X: dt.Frame, y: np.array = None):
from aif360.datasets import BinaryLabelDataset
# Transformation should only occur during training when y is present
if self.fitted and (self.label_names in X.names or y is not None):
if self.label_names not in X.names:
X = dt.cbind(X, dt.Frame(y))
X_pd = X.to_pandas()
X = dt.Frame(X_pd.fillna(X_pd.mean()))
transformed_X: BinaryLabelDataset = self.lfr.transform(
BinaryLabelDataset(
df=X.to_pandas(),
favorable_label=self.favorable_label,
unfavorable_label=self.unfavorable_label,
label_names=self.label_names,
protected_attribute_names=self.protected_attribute_names,
)
)
return dt.Frame(
transformed_X.features,
names=[name+"_lfr" for name in transformed_X.feature_names],