How to use the omniduct.filesystems.base.FileSystemFileDesc function in omniduct

To help you get started, we’ve selected a few omniduct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github airbnb / omniduct / omniduct / filesystems / s3.py View on Github external
def _dir(self, path):
        iterator = self.__dir_paginator(path)

        for response_data in iterator:
            for prefix in response_data.get('CommonPrefixes', []):
                yield FileSystemFileDesc(
                    fs=self,
                    path=prefix['Prefix'][:-len(self.path_separator)],
                    name=prefix['Prefix'][:-len(self.path_separator)].split(self.path_separator)[-1],  # Remove trailing slash
                    type='directory',
                )
            for prefix in response_data.get('Contents', []):
                if self.skip_hadoop_artifacts and prefix['Key'].endswith('_$folder$'):
                    continue
                yield FileSystemFileDesc(
                    fs=self,
                    path=prefix['Key'],
                    name=prefix['Key'].split(self.path_separator)[-1],
                    type='file',
                    bytes=prefix['Size'],
                    owner=prefix['Owner']['DisplayName'] if 'Owner' in prefix else None,
                    last_modified=prefix['LastModified']
                )
github airbnb / omniduct / omniduct / filesystems / s3.py View on Github external
def _dir(self, path):
        iterator = self.__dir_paginator(path)

        for response_data in iterator:
            for prefix in response_data.get('CommonPrefixes', []):
                yield FileSystemFileDesc(
                    fs=self,
                    path=prefix['Prefix'][:-len(self.path_separator)],
                    name=prefix['Prefix'][:-len(self.path_separator)].split(self.path_separator)[-1],  # Remove trailing slash
                    type='directory',
                )
            for prefix in response_data.get('Contents', []):
                if self.skip_hadoop_artifacts and prefix['Key'].endswith('_$folder$'):
                    continue
                yield FileSystemFileDesc(
                    fs=self,
                    path=prefix['Key'],
                    name=prefix['Key'].split(self.path_separator)[-1],
                    type='file',
                    bytes=prefix['Size'],
                    owner=prefix['Owner']['DisplayName'] if 'Owner' in prefix else None,
                    last_modified=prefix['LastModified']
github airbnb / omniduct / omniduct / filesystems / base.py View on Github external
def __new__(cls, fs, path, name, type, bytes=None, owner=None,
                group=None, permissions=None, created=None, last_modified=None,
                last_accessed=None, **extra):
        assert type in ('directory', 'file')
        return (
            super(FileSystemFileDesc, cls)
            .__new__(cls,
                     fs=fs,
                     path=path,
                     name=name,
                     type=type,
                     bytes=bytes,
                     owner=owner,
                     group=group,
                     permissions=permissions,
                     created=created,
                     last_modified=last_modified,
                     last_accessed=last_accessed,
                     extra=extra)
        )
github airbnb / omniduct / omniduct / filesystems / local.py View on Github external
if os.name == 'posix':
                import grp
                import pwd

                stat = os.stat(f_path)

                attrs.update({
                    'owner': pwd.getpwuid(stat.st_uid).pw_name,
                    'group': grp.getgrgid(stat.st_gid).gr_name,
                    'permissions': oct(stat.st_mode),
                    'created': str(datetime.datetime.fromtimestamp(stat.st_ctime)),
                    'last_modified': str(datetime.datetime.fromtimestamp(stat.st_mtime)),
                    'last_accessed': str(datetime.datetime.fromtimestamp(stat.st_atime)),
                })

            yield FileSystemFileDesc(
                fs=self,
                path=f_path,
                name=f,
                type='directory' if os.path.isdir(f_path) else 'file',
                bytes=os.path.getsize(f_path),
                **attrs
            )
github airbnb / omniduct / omniduct / remotes / ssh_paramiko.py View on Github external
def _dir(self, path):
        for attrs in self.__client_sftp.listdir_attr(path):
            yield FileSystemFileDesc(
                fs=self,
                path=posixpath.join(path, attrs.filename),
                name=attrs.filename,
                type='directory' if stat.S_ISDIR(attrs.st_mode) else 'file',  # TODO: What about links, which are of form: lrwxrwxrwx?
                bytes=attrs.st_size,
                owner=attrs.st_uid,
                group=attrs.st_gid,
                last_modified=attrs.st_mtime,
            )
github airbnb / omniduct / omniduct / filesystems / webhdfs.py View on Github external
def _dir(self, path):
        files = self.__webhdfs.list_dir(path)
        for f in files['FileStatuses']['FileStatus']:
            yield FileSystemFileDesc(
                fs=self,
                path=posixpath.join(path, f['pathSuffix']),
                name=f['pathSuffix'],
                type=f['type'].lower(),
                bytes=f['length'],
                owner=f['owner'],
                group=f['group'],
                last_modified=f['modificationTime'],
                last_accessed=f['accessTime'],
                permissions=f['permission'],
                replication=f['replication']
            )
github airbnb / omniduct / omniduct / remotes / ssh.py View on Github external
if len(dir) == 0:  # Directory is empty
            return

        dir = dir.assign(
            last_modified=lambda x: x.apply(convert_to_datetime, axis=1),
            type=lambda x: x.apply(lambda x: 'directory' if x.file_mode.startswith('d') else 'file', axis=1)
        ).drop(
            ['month', 'day', 'time'],
            axis=1
        ).sort_values(
            ['type', 'path']
        ).reset_index(drop=True)

        for i, row in dir.iterrows():
            yield FileSystemFileDesc(
                fs=self,
                path=posixpath.join(path, row.path),
                name=row.path,
                type='directory' if row.file_mode.startswith('d') else 'file',  # TODO: What about links, which are of form: lrwxrwxrwx?
                bytes=row.bytes,
                owner=row.owner,
                group=row.group,
                last_modified=row.last_modified,
            )