How to use the pyemma.coordinates.source function in pyEMMA

To help you get started, we’ve selected a few pyEMMA examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github markovmodel / molPX / molpx / unused_untested.py View on Github external
xtcs.append(these_trajs)
            if struct is None:
                struct = '%s-%u-protein.pdb'%(proj,ii)
                struct_already = True
                struct = os.path.join(subdir, struct)
                struct = sorted(glob(struct))

            elif isinstance(struct, str) and not struct_already:
                struct = os.path.join(subdir,struct)
                struct = sorted(glob(struct))

            if isinstance(struct,list):
                struct=struct[0]
            ii += 1

    src = _source(xtcs, top=struct)

    return src, xtcs
github markovmodel / PyEMMA / pyemma / coordinates / estimation / covariance.py View on Github external
def partial_fit(self, X):
        """ incrementally update the estimates

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.
        """
        from pyemma.coordinates import source

        self._estimate(source(X), partial_fit=True)
        self._estimated = True

        return self
github markovmodel / molPX / molpx / generate.py View on Github external
pos :
        ndarray with the positions of the sample
    geom_smpl :
        sampled geometries. Can be of two types:

        * default: :obj:`mdtraj.Trajectory` with :obj:`n_points`-frames
        * if keep_all_samples = True: list of length :obj:`n_geom_samples`. Each element is an :obj:`mdtraj.Trajectory` object of :obj:`n_points`-frames.

    """

    MD_trajectories = _bmutils.listify_if_not_list(MD_trajectories)
    if isinstance(MD_trajectories[0], _md.Trajectory):
        src = MD_trajectories
    else:
        src = _source(MD_trajectories, top=MD_top)


    # Find out if we already have a clustering object
    try:
        projected_trajectories.dtrajs
        cl = projected_trajectories
    except:
        idata = _bmutils.data_from_input(projected_trajectories)
        cl = _bmutils.regspace_cluster_to_target([dd[:,proj_idxs] for dd in idata], n_points, n_try_max=10, verbose=verbose)

    pos = cl.clustercenters
    cat_smpl = cl.sample_indexes_by_cluster(_np.arange(cl.n_clusters), n_geom_samples)

    geom_smpl = _bmutils.save_traj_wrapper(src, _np.vstack(cat_smpl), None, top=MD_top, stride=proj_stride)

    atom_slice = _bmutils.parse_atom_sel(atom_selection, geom_smpl.top)
github MobleyLab / blues / utils / kpca.py View on Github external
prot_lig = np.concatenate((prot_index, lig_coord), axis=0)

#print 'prot_lig', prot_lig
#feat.add_selection(lig_heavy)
#feat.add_backbone_torsions(selstr="(resid >= 105) and (resid <= 115)")
#feat.add_backbone_torsions(selstr="(resid >= 132) and (resid <= 146)")
#feat.add_angles(np.array([[1635, 1639, 2640]]), cossin=False)
#feat.add_angles(np.array([[1635, 1639, 2640]]), cossin=False)

feat.add_dihedrals(np.array([[1638, 1622, 2634, 2640]]), cossin=False)
#feat.add_dihedrals(np.array([[1638, 2634, 2638, 2640]]), cossin=False)
feat.add_distances(prot_lig)
print feat.dimension()


inp = coor.source(traj_list, feat)

#pca_obj = coor.pca(inp, dim=5)
pca_obj = coor.tica(inp, dim=5, lag=2)

Y = pca_obj.get_output()
print Y
print Y[0][:,0]
num_clusters = 4
input_data = coor.cluster_kmeans(data=Y, k=num_clusters, max_iter=1000)
mapped_data = [np.array(dtraj)[:,0] for dtraj in input_data.get_output()]
frame_dict = {}
for num in range(num_clusters):
    frame_dict[num] = []

for index, entry in enumerate(mapped_data[0]):
    frame_dict[entry].append(index)
github markovmodel / molPX / projX / generate.py View on Github external
* ``paths_dict[idxs][type_of_path]["proj"]`` : ndarray of shape (n_points, proj_dim) with the
            coordinates of the projection along the path

            * paths_dict[idxs][type_of_path]["proj"] : geometries along the path

    idata :
        list of ndarrays with the the data in  :obj:`projected_trajectories`
    """

    if not isinstance(MD_trajectories, list):
        MD_trajectories = [MD_trajectories]

    if isinstance(MD_trajectories[0], _md.Trajectory):
        src = MD_trajectories
    else:
        src = _source(MD_trajectories, top=MD_top)


    idata = _data_from_input(projected_trajectories)
    assert  len(MD_trajectories) == len(idata), "Mismatch between the number of " \
                                                "MD_trajectories and projected_trajectories: %u vs %u"%(len(MD_trajectories), len(idata))

    #TODO: assert total_n_frames (strided) coincies with the n_frames in data
    # What's the hightest dimensionlatiy that the input data allows?
    input_dim = idata[0].shape[1]
    if proj_idxs is None:
       proj_idxs = _np.arange(n_projs)
    else:
        if isinstance(proj_idxs, int):
            proj_idxs = [proj_idxs]

    proj_dim = _np.max((proj_dim, _np.max(proj_idxs)+1))
github MobleyLab / blues / blues / id-bindingmodes.py View on Github external
def main(fpath, molid, args):
    prefix = os.path.join(fpath,molid,molid)
    trajfiles = glob.glob(prefix+'*-centered.dcd')
    topfiles = glob.glob(prefix+'*-centered.pdb')
    jsonfile = os.path.join(prefix+'-acc_its.json')

    #Pre-process trajectory files, check for unbound ligands
    trajfiles = utils.check_bound_ligand(trajfiles,
                                        topfiles[0])

    #Select features to analyze in trajectory
    feat = coor.featurizer(topfiles[0])
    lig_atoms = feat.select("resname LIG and not type H")
    feat.add_selection(lig_atoms)
    inp = coor.source(trajfiles, feat)

    #Define some input parameters
    dt = 8 #Frames in MD taken every 8ps == 0.008 ns
    lag_list = np.arange(1, 40,5) #Give a range of lag times to try

    #Initailize object to assign trajectories to clusters
    data = msm.ConstructMSM(inp)

    #Select the apprioriate lagtime from the implied timescale plots
    #data.plotImpliedTimescales(data.Y, dt, lag_list, outfname=prefix)

    #Selecting a lagtime of 150ps, every BLUES iteration is 176ps
    lagtime = 150

    #Discretize the trajectories
    #Estimate our Markov model from the discrete trajectories
github markovmodel / molPX / projX / api.py View on Github external
keep_all_samples = boolean, default is False
        In principle, once the closest-to-ref geometry has been kept, the other geometries are discarded, and the
        output sample contains only n_point geometries. HOWEVER, there are special cases where the user might
        want to keep all sampled geometries. Typical use-case is when the n_points is low and many representatives
        per clustercenters will be much more informative than the other way around
        (i know, this is confusing TODO: write this better)


    projected data: nd.array or list of nd.arrays OR pyemma.clustering object
        Although 2D is the most usual case, the dimensionality of the clustering and the one of the visualization (2D)
        do not necessarily have to be the same
    """


    src = _source(MDtrajectory_files, top=topology)

    # Find out if we already have a clustering object
    try:
        projected_data.dtrajs
        cl = projected_data
    except:
        idata = _data_from_input(projected_data)
        cl = _cluster_to_target([dd[:,idxs] for dd in idata], n_points, n_try_max=10, verbose=verbose)

    pos = cl.clustercenters
    cat_smpl = cl.sample_indexes_by_cluster(_np.arange(cl.n_clusters), n_geom_samples)
    geom_smpl = _save_traj(src, _np.vstack(cat_smpl), None, stride=proj_stride)
    if n_geom_samples>1:
        if not keep_all_samples:
            geom_smpl = _re_warp(geom_smpl, [n_geom_samples] * cl.n_clusters)
            # Of the most populated geom, get the most compact
github markovmodel / molPX / molpx / _bmutils.py View on Github external
-------

    moldata: pyemma FeatureReader or list of md.Trajectories, depending on the input
    """

    if isinstance(inp, (_FeatureReader, _FragmentedTrajectoryReader)):
        moldata = inp

    # Everything else gets listified
    else:
        inp = listify_if_not_list(inp)
        # We have geometries so don't do anything
        if isinstance(inp[0], _md.Trajectory):
            moldata = inp
        elif isinstance(inp[0], str):
            moldata = _source(inp, top=MD_top)
        # TODO consider letting pyemma fail here instead of catching this
        else:
            raise TypeError("Please revise tyour input, it should be a str ",type(inp[0]))
    return moldata
github MobleyLab / blues / utils / kpca_circle.py View on Github external
prot_lig = np.concatenate((prot_index, lig_coord), axis=0)

#print 'prot_lig', prot_lig
#feat.add_selection(lig_heavy)
#feat.add_backbone_torsions(selstr="(resid >= 105) and (resid <= 115)")
#feat.add_backbone_torsions(selstr="(resid >= 132) and (resid <= 146)")
feat.add_angles(np.array([[132, 126, 141]]), cossin=False)
feat.add_angles(np.array([[134, 128, 141]]), cossin=False)

#feat.add_dihedrals(np.array([[1638, 1622, 2634, 2640]]), cossin=False)
#feat.add_dihedrals(np.array([[1638, 2634, 2638, 2640]]), cossin=False)
feat.add_distances(prot_lig)
print feat.dimension()


inp = coor.source(traj_list, feat)

pca_obj = coor.pca(inp, dim=4)
#pca_obj = coor.tica(inp, dim=4, lag=2)

Y = pca_obj.get_output()
print Y
print Y[0][:,0]
num_clusters = 4
input_data = coor.cluster_kmeans(data=Y, k=num_clusters, max_iter=1000)
mapped_data = [np.array(dtraj)[:,0] for dtraj in input_data.get_output()]
frame_dict = {}
for num in range(num_clusters):
    frame_dict[num] = []

for index, entry in enumerate(mapped_data[0]):
    frame_dict[entry].append(index)