Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Returns
-------
int
the KMeans K
"""
check_types([
("X", X, [list], False),
("input_relation", input_relation, [str], False),
("n_cluster", n_cluster, [list, tuple], False),
("init", init, ["kmeanspp", "random"], True),
("max_iter", max_iter, [int, float], False),
("tol", tol, [int, float], False),
("elbow_score_stop", elbow_score_stop, [int, float], False)])
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
if not(type(n_cluster) == list):
L = range(n_cluster[0], n_cluster[1])
else:
L = n_cluster
L.sort()
schema, relation = schema_relation(input_relation)
schema = str_column(schema)
relation_alpha = ''.join(ch for ch in relation if ch.isalnum())
for i in L:
cursor.execute("DROP MODEL IF EXISTS {}.__vpython_kmeans_tmp_model_{}__".format(schema, relation_alpha))
model = KMeans("{}.__vpython_kmeans_tmp_model_{}__".format(schema, relation_alpha), cursor, i, init, max_iter, tol)
model.fit(input_relation, X)
def __init__(self,
name: str,
cursor = None,
n_cluster: int = 8,
init: str = "kmeanspp",
max_iter: int = 300,
tol: float = 1e-4):
check_types([
("name", name, [str], False),
("n_cluster", n_cluster, [int, float], False),
("max_iter", max_iter, [int, float], False),
("tol", tol, [int, float], False)])
if not(cursor):
cursor = read_auto_connect().cursor()
else:
check_cursor(cursor)
self.type = "clustering"
self.name = name
self.cursor = cursor
self.n_cluster = n_cluster
if (type(init) == str):
self.init = init.lower()
else:
self.init = init
self.max_iter = max_iter
self.tol = tol
#---#
"(SELECT ... FROM ...) x" as long as an alias is given at the end of the
relation.
cursor: DBcursor, optional
Vertica DB cursor.
Returns
-------
float
score
"""
check_types([
("y_true", y_true, [str], False),
("y_score", y_score, [str], False),
("input_relation", input_relation, [str], False)])
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
query = "SELECT MAX(ABS({} - {})) FROM {}".format(y_true, y_score, input_relation)
cursor.execute(query)
result = cursor.fetchone()[0]
if (conn):
conn.close()
return (result)
#---#
cursor: DBcursor, optional
Vertica DB cursor.
Returns
-------
tablesample
An object containing the result. For more information, check out
utilities.tablesample.
"""
check_types([
("y_true", y_true, [str], False),
("y_score", y_score, [str], False),
("input_relation", input_relation, [str], False),
("labels", labels, [list], False)])
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
num_classes = str(len(labels))
query = "SELECT CONFUSION_MATRIX(obs, response USING PARAMETERS num_classes = {}) OVER() FROM (SELECT DECODE({}".format(num_classes, y_true)
for idx, item in enumerate(labels):
query += ", '{}', {}".format(item, idx)
query += ") AS obs, DECODE({}".format(y_score)
for idx,item in enumerate(labels):
query += ", '{}', {}".format(item, idx)
query += ") AS response FROM {}) x".format(input_relation)
result = to_tablesample(query, cursor)
if (conn):
conn.close()
result.table_info = False
Returns
-------
tablesample
An object containing the result. For more information, check out
utilities.tablesample.
"""
check_types([
("y_true", y_true, [str], False),
("y_score", y_score, [str], False),
("input_relation", input_relation, [str], False),
("nbins", nbins, [int, float], False),
("auc_roc", auc_roc, [bool], False),
("best_threshold", best_threshold, [bool], False)])
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
query = "SELECT ROC(obs, prob USING PARAMETERS num_bins = {}) OVER() FROM (SELECT (CASE WHEN {} = '{}' THEN 1 ELSE 0 END) AS obs, {}::float AS prob FROM {}) AS prediction_output"
query = query.format(nbins, y_true, pos_label, y_score, input_relation)
cursor.execute(query)
query_result = cursor.fetchall()
if (conn):
conn.close()
threshold, false_positive, true_positive = [item[0] for item in query_result], [item[1] for item in query_result], [item[2] for item in query_result]
auc=0
for i in range(len(false_positive) - 1):
if (false_positive[i + 1] - false_positive[i] != 0.0):
a = (true_positive[i + 1] - true_positive[i]) / (false_positive[i + 1] - false_positive[i])
b = true_positive[i + 1] - a * false_positive[i + 1]
Char which is enclosing the str values.
escape: str, optional
Separator between each record.
Returns
-------
dict
dictionary containing for each column its type.
See Also
--------
read_csv : Ingests a CSV file in the Vertica DB.
read_json : Ingests a JSON file in the Vertica DB.
"""
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
flex_name = "VERTICAPY_{}_FLEX".format(random.randint(0, 10000000))
cursor.execute("CREATE FLEX LOCAL TEMP TABLE {}(x int) ON COMMIT PRESERVE ROWS;".format(flex_name))
header_names = '' if not(header_names) else "header_names = '{}',".format(sep.join(header_names))
try:
with open(path, "r") as fs:
cursor.copy("COPY {} FROM STDIN PARSER FCSVPARSER(type = 'traditional', delimiter = '{}', header = {}, {} enclosed_by = '{}', escape = '{}') NULL '{}';".format(flex_name, sep, header, header_names, quotechar, escape, na_rep), fs)
except:
cursor.execute("COPY {} FROM LOCAL '{}' PARSER FCSVPARSER(type = 'traditional', delimiter = '{}', header = {}, {} enclosed_by = '{}', escape = '{}') NULL '{}';".format(flex_name, path, sep, header, header_names, quotechar, escape, na_rep))
cursor.execute("SELECT compute_flextable_keys('{}');".format(flex_name))
cursor.execute("SELECT key_name, data_type_guess FROM {}_keys".format(flex_name))
result = cursor.fetchall()
dtype = {}
name: str
Text index name.
cursor: DBcursor, optional
Vertica DB cursor.
print_info: bool, optional
If set to true, displays the result of the query.
raise_error: bool, optional
If the text index couldn't be dropped, raises the entire error instead
of displaying a warning.
"""
check_types([
("name", name, [str], False),
("print_info", print_info, [bool], False),
("raise_error", raise_error, [bool], False)])
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
try:
query="DROP TEXT INDEX {};".format(name)
cursor.execute(query)
if (conn):
conn.close()
if (print_info):
print("The text index {} was successfully dropped.".format(name))
except:
if (conn):
conn.close()
if (raise_error):
raise
load_amazon : Ingests the amazon dataset in the Vertica DB.
(Time Series / Regression).
load_iris : Ingests the iris dataset in the Vertica DB.
(Clustering / Classification).
load_market : Ingests the market dataset in the Vertica DB.
(Basic Data Exploration).
load_titanic : Ingests the titanic dataset in the Vertica DB.
(Classification).
load_winequality : Ingests the winequality dataset in the Vertica DB.
(Regression / Classification).
"""
check_types([
("schema", schema, [str], False),
("name", name, [str], False)])
if not(cursor):
cursor = read_auto_connect().cursor()
else:
check_cursor(cursor)
try:
vdf = vDataFrame(name, cursor, schema = schema)
except:
cursor.execute("CREATE TABLE {}.{}(\"time\" Timestamp, \"val\" Numeric(11,7), \"id\" Integer);".format(str_column(schema), str_column(name)))
try:
path = os.path.dirname(verticapy.__file__) + "/learn/data/smart_meters.csv"
query = "COPY {}.{}(\"time\", \"val\", \"id\") FROM {} DELIMITER ',' NULL '' ENCLOSED BY '\"' ESCAPE AS '\\' SKIP 1;".format(str_column(schema), str_column(name), "{}")
if ("vertica_python" in str(type(cursor))):
with open(path, "r") as fs:
cursor.copy(query.format('STDIN'), fs)
else:
cursor.execute(query.format("LOCAL '{}'".format(path)))
vdf = vDataFrame(name, cursor, schema = schema)
except:
max_depth: int = 5,
min_samples_leaf: int = 1,
min_info_gain: float = 0.0,
nbins: int = 32):
check_types([
("name", name, [str], False),
("n_estimators", n_estimators, [int, float], False),
("max_features", max_features, [str, int, float], False),
("max_leaf_nodes", max_leaf_nodes, [int, float], False),
("sample", sample, [int, float], False),
("max_depth", max_depth, [int, float], False),
("min_samples_leaf", min_samples_leaf, [int, float], False),
("min_info_gain", min_info_gain, [int, float], False),
("nbins", nbins, [int, float], False)])
if not(cursor):
cursor = read_auto_connect().cursor()
else:
check_cursor(cursor)
self.type = "classifier"
self.cursor = cursor
self.name = name
self.n_estimators = n_estimators
self.max_features = max_features
self.max_leaf_nodes = max_leaf_nodes
self.sample = sample
self.max_depth = max_depth
self.min_samples_leaf = min_samples_leaf
self.min_info_gain = min_info_gain
self.nbins = nbins
#---#
"(SELECT ... FROM ...) x" as long as an alias is given at the end of the
relation.
cursor: DBcursor, optional
Vertica DB cursor.
Returns
-------
float
score
"""
check_types([
("y_true", y_true, [str], False),
("y_score", y_score, [str], False),
("input_relation", input_relation, [str], False)])
if not(cursor):
conn = read_auto_connect()
cursor = conn.cursor()
else:
conn = False
check_cursor(cursor)
query = "SELECT MSE({}, {}) OVER () FROM {}".format(y_true, y_score, input_relation)
cursor.execute(query)
result = cursor.fetchone()[0]
if (conn):
conn.close()
return (result)
#---#