Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def getDS(from_data):
'''
Get a datasource give the input
:param input: a string or an object
:return: a datasource
'''
if isinstance(from_data, DataSource):
from_ds = from_data
elif isinstance(from_data, DataFrame):
from_ds = DataSource(from_data)
else: # assume is a file
from_ds = FileDS(from_data)
if from_ds is None:
log.error('No data matched the input data')
return from_ds
import pandas
from mindsdb.libs.data_types.data_source import DataSource
class WindowDS(DataSource):
def _setup(self, df, col_max, col_min, window_size_samples=300, step_size=30, min_size = 100):
header = list(df.columns.values)
data = df.values.tolist()
max_index = header.index(col_max)
min_index = header.index(col_min)
ret = []
for row in data:
max = row[max_index]
min = row[min_index]
new_max = max
def getDS(from_data):
'''
Get a datasource give the input
:param input: a string or an object
:return: a datasource
'''
if isinstance(from_data, DataSource):
from_ds = from_data
elif isinstance(from_data, DataFrame):
from_ds = DataSource(from_data)
else: # assume is a file
from_ds = FileDS(from_data)
if from_ds is None:
log.error('No data matched the input data')
return from_ds
import pandas
import json
from mindsdb.libs.data_types.data_source import DataSource
class ArrayToColsDS(DataSource):
def _setup(self, df, cols_to_split = {}):
header = list(df.columns.values)
data = df.values.tolist()
new_data = []
new_header = []
for row in data:
n_row = []
for i, col in enumerate(header):
cell = row[i]
if col in cols_to_split:
ncols = cols_to_split[col]
if cell is None:
import os
import pandas as pd
import psycopg2
from mindsdb.libs.data_types.data_source import DataSource
from mindsdb.libs.data_types.mindsdb_logger import log
class PostgresDS(DataSource):
def _setup(self, query=None, host='localhost', user='postgres', password='', database='postgres', port=5432, table=None):
if query is None:
query = f'SELECT * FROM {table}'
con = psycopg2.connect(dbname=database, user=user, password=password, host=host, port=port)
df = pd.read_sql(query, con=con)
con.close()
col_map = {}
for col in df.columns:
col_map[col] = col
return df, col_map
import os
import pandas as pd
import MySQLdb
from mindsdb.libs.data_types.data_source import DataSource
from mindsdb.libs.data_types.mindsdb_logger import log
class MySqlDS(DataSource):
def _setup(self, query=None, host='localhost', user='root', password='', database='mysql', port=3306, table=None):
if query is None:
query = f'SELECT * FROM {table}'
con = MySQLdb.connect(host, user, password, database, port=port)
df = pd.read_sql(query, con=con)
con.close()
col_map = {}
for col in df.columns:
col_map[col] = col
return df, col_map
import pandas
import re
from io import BytesIO, StringIO
import csv
import codecs
import json
import traceback
import codecs
import requests
from mindsdb.libs.data_types.data_source import DataSource
from pandas.io.json import json_normalize
from mindsdb.libs.data_types.mindsdb_logger import log
class FileDS(DataSource):
def cleanRow(self, row):
n_row = []
for cell in row:
if str(cell) in ['', ' ', ' ', 'NaN', 'nan', 'NA']:
cell = None
n_row.append(cell)
return n_row
def _getDataIo(self, file):
"""
This gets a file either url or local file and defiens what the format is as well as dialect
:param file: file path or url
:return: data_io, format, dialect
"""
import os
import boto3
from botocore import UNSIGNED
from botocore.client import Config
from mindsdb.libs.data_types.data_source import DataSource
from mindsdb.libs.data_types.mindsdb_logger import log
from mindsdb.libs.data_sources.file_ds import FileDS
class S3DS(DataSource):
def _setup(self, bucket_name, file_path, access_key=None, secret_key=None, use_default_credentails=False):
if access_key is not None and secret_key is not None:
s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
elif use_default_credentails:
s3 = boto3.client('s3')
else:
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
self.tmp_file_name = '.tmp_mindsdb_data_file'
with open(self.tmp_file_name, 'wb') as fw:
s3.download_fileobj(bucket_name, file_path, fw)
file_ds = FileDS(self.tmp_file_name)
return file_ds._df, file_ds._col_map
import pandas
import logging
import csv
import mindsdb.config as CONFIG
import re
from mindsdb.libs.data_types.data_source import DataSource
class CSVFileDS(DataSource):
def clean(self, header):
clean_header = []
col_count={}
replace_chars = """ ,./;'[]!@#$%^&*()+{-=+~`}\\|:"<>?"""
for col in header:
orig_col = col
for char in replace_chars:
col = col.replace(char,'_')
col = re.sub('_+','_',col)
if col[-1] == '_':
col = col[:-1]
col_count[col] = 1 if col not in col_count else col_count[col]+1