How to use the mindsdb.libs.data_types.data_source.DataSource function in MindsDB

To help you get started, we’ve selected a few MindsDB examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / mindsdb / mindsdb / libs / helpers / multi_data_source.py View on Github external
def getDS(from_data):
    '''
    Get a datasource give the input

    :param input: a string or an object
    :return: a datasource
    '''

    if isinstance(from_data, DataSource):
        from_ds = from_data

    elif isinstance(from_data, DataFrame):
        from_ds = DataSource(from_data)


    else:  # assume is a file
        from_ds = FileDS(from_data)
        if from_ds is None:
            log.error('No data matched the input data')

    return from_ds
github mindsdb / mindsdb / mindsdb / libs / data_sources / window_ds.py View on Github external
import pandas


from mindsdb.libs.data_types.data_source import DataSource




class WindowDS(DataSource):

    def _setup(self, df, col_max, col_min, window_size_samples=300, step_size=30, min_size = 100):

        header = list(df.columns.values)
        data = df.values.tolist()

        max_index = header.index(col_max)
        min_index = header.index(col_min)

        ret = []

        for row in data:
            max = row[max_index]
            min = row[min_index]

            new_max = max
github mindsdb / mindsdb / mindsdb / libs / helpers / multi_data_source.py View on Github external
def getDS(from_data):
    '''
    Get a datasource give the input

    :param input: a string or an object
    :return: a datasource
    '''

    if isinstance(from_data, DataSource):
        from_ds = from_data

    elif isinstance(from_data, DataFrame):
        from_ds = DataSource(from_data)


    else:  # assume is a file
        from_ds = FileDS(from_data)
        if from_ds is None:
            log.error('No data matched the input data')

    return from_ds
github mindsdb / mindsdb / mindsdb / libs / data_sources / array_to_cols_ds.py View on Github external
import pandas
import json

from mindsdb.libs.data_types.data_source import DataSource



class ArrayToColsDS(DataSource):

    def _setup(self, df, cols_to_split = {}):

        header = list(df.columns.values)
        data = df.values.tolist()

        new_data = []
        new_header = []

        for row in data:
            n_row = []
            for i, col in enumerate(header):
                cell = row[i]
                if col in cols_to_split:
                    ncols = cols_to_split[col]
                    if cell is None:
github mindsdb / mindsdb / mindsdb / libs / data_sources / postgres_ds.py View on Github external
import os

import pandas as pd
import psycopg2

from mindsdb.libs.data_types.data_source import DataSource
from mindsdb.libs.data_types.mindsdb_logger import log


class PostgresDS(DataSource):

    def _setup(self, query=None, host='localhost', user='postgres', password='', database='postgres', port=5432, table=None):

        if query is None:
            query = f'SELECT * FROM {table}'

        con = psycopg2.connect(dbname=database, user=user, password=password, host=host, port=port)
        df = pd.read_sql(query, con=con)
        con.close()

        col_map = {}
        for col in df.columns:
            col_map[col] = col

        return df, col_map
github mindsdb / mindsdb / mindsdb / libs / data_sources / mysql_ds.py View on Github external
import os

import pandas as pd
import MySQLdb

from mindsdb.libs.data_types.data_source import DataSource
from mindsdb.libs.data_types.mindsdb_logger import log


class MySqlDS(DataSource):

    def _setup(self, query=None, host='localhost', user='root', password='', database='mysql', port=3306, table=None):

        if query is None:
            query = f'SELECT * FROM {table}'

        con = MySQLdb.connect(host, user, password, database, port=port)
        df = pd.read_sql(query, con=con)
        con.close()

        col_map = {}
        for col in df.columns:
            col_map[col] = col

        return df, col_map
github mindsdb / mindsdb / mindsdb / libs / data_sources / file_ds.py View on Github external
import pandas
import re
from io import BytesIO, StringIO
import csv
import codecs
import json
import traceback
import codecs
import requests

from mindsdb.libs.data_types.data_source import DataSource
from pandas.io.json import json_normalize
from mindsdb.libs.data_types.mindsdb_logger import log


class FileDS(DataSource):
    
    def cleanRow(self, row):
        n_row = []
        for cell in row:
            if str(cell) in ['', ' ', '  ', 'NaN', 'nan', 'NA']:
                cell = None
            n_row.append(cell)

        return n_row

    def _getDataIo(self, file):
        """
        This gets a file either url or local file and defiens what the format is as well as dialect
        :param file: file path or url
        :return: data_io, format, dialect
        """
github mindsdb / mindsdb / mindsdb / libs / data_sources / s3_ds.py View on Github external
import os

import boto3
from botocore import UNSIGNED
from botocore.client import Config


from mindsdb.libs.data_types.data_source import DataSource
from mindsdb.libs.data_types.mindsdb_logger import log
from mindsdb.libs.data_sources.file_ds import FileDS


class S3DS(DataSource):

    def _setup(self, bucket_name, file_path, access_key=None, secret_key=None, use_default_credentails=False):
        if access_key is not None and secret_key is not None:
            s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
        elif use_default_credentails:
            s3 = boto3.client('s3')
        else:
            s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))

        self.tmp_file_name = '.tmp_mindsdb_data_file'

        with open(self.tmp_file_name, 'wb') as fw:
            s3.download_fileobj(bucket_name, file_path, fw)

        file_ds = FileDS(self.tmp_file_name)
        return file_ds._df, file_ds._col_map
github mindsdb / mindsdb / mindsdb / libs / data_sources / csv_file_ds.py View on Github external
import pandas
import logging
import csv
import mindsdb.config as CONFIG
import re

from mindsdb.libs.data_types.data_source import DataSource

class CSVFileDS(DataSource):

    def clean(self, header):

        clean_header = []
        col_count={}

        replace_chars = """ ,./;'[]!@#$%^&*()+{-=+~`}\\|:"<>?"""

        for col in header:
            orig_col = col
            for char in replace_chars:
                col = col.replace(char,'_')
            col = re.sub('_+','_',col)
            if col[-1] == '_':
                col = col[:-1]
            col_count[col] = 1 if col not in col_count else col_count[col]+1