How to use the pyemma.coordinates.source function in pyEMMA

To help you get started, we’ve selected a few pyEMMA examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

markovmodel / molPX / molpx / unused_untested.py View on Github

xtcs.append(these_trajs)
            if struct is None:
                struct = '%s-%u-protein.pdb'%(proj,ii)
                struct_already = True
                struct = os.path.join(subdir, struct)
                struct = sorted(glob(struct))

            elif isinstance(struct, str) and not struct_already:
                struct = os.path.join(subdir,struct)
                struct = sorted(glob(struct))

            if isinstance(struct,list):
                struct=struct[0]
            ii += 1

    src = _source(xtcs, top=struct)

    return src, xtcs

markovmodel / PyEMMA / pyemma / coordinates / estimation / covariance.py View on Github

def partial_fit(self, X):
        """ incrementally update the estimates

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.
        """
        from pyemma.coordinates import source

        self._estimate(source(X), partial_fit=True)
        self._estimated = True

        return self

markovmodel / molPX / molpx / generate.py View on Github

pos :
        ndarray with the positions of the sample
    geom_smpl :
        sampled geometries. Can be of two types:

        * default: :obj:`mdtraj.Trajectory` with :obj:`n_points`-frames
        * if keep_all_samples = True: list of length :obj:`n_geom_samples`. Each element is an :obj:`mdtraj.Trajectory` object of :obj:`n_points`-frames.

    """

    MD_trajectories = _bmutils.listify_if_not_list(MD_trajectories)
    if isinstance(MD_trajectories[0], _md.Trajectory):
        src = MD_trajectories
    else:
        src = _source(MD_trajectories, top=MD_top)


    # Find out if we already have a clustering object
    try:
        projected_trajectories.dtrajs
        cl = projected_trajectories
    except:
        idata = _bmutils.data_from_input(projected_trajectories)
        cl = _bmutils.regspace_cluster_to_target([dd[:,proj_idxs] for dd in idata], n_points, n_try_max=10, verbose=verbose)

    pos = cl.clustercenters
    cat_smpl = cl.sample_indexes_by_cluster(_np.arange(cl.n_clusters), n_geom_samples)

    geom_smpl = _bmutils.save_traj_wrapper(src, _np.vstack(cat_smpl), None, top=MD_top, stride=proj_stride)

    atom_slice = _bmutils.parse_atom_sel(atom_selection, geom_smpl.top)

MobleyLab / blues / utils / kpca.py View on Github

prot_lig = np.concatenate((prot_index, lig_coord), axis=0)

#print 'prot_lig', prot_lig
#feat.add_selection(lig_heavy)
#feat.add_backbone_torsions(selstr="(resid &gt;= 105) and (resid &lt;= 115)")
#feat.add_backbone_torsions(selstr="(resid &gt;= 132) and (resid &lt;= 146)")
#feat.add_angles(np.array([[1635, 1639, 2640]]), cossin=False)
#feat.add_angles(np.array([[1635, 1639, 2640]]), cossin=False)

feat.add_dihedrals(np.array([[1638, 1622, 2634, 2640]]), cossin=False)
#feat.add_dihedrals(np.array([[1638, 2634, 2638, 2640]]), cossin=False)
feat.add_distances(prot_lig)
print feat.dimension()


inp = coor.source(traj_list, feat)

#pca_obj = coor.pca(inp, dim=5)
pca_obj = coor.tica(inp, dim=5, lag=2)

Y = pca_obj.get_output()
print Y
print Y[0][:,0]
num_clusters = 4
input_data = coor.cluster_kmeans(data=Y, k=num_clusters, max_iter=1000)
mapped_data = [np.array(dtraj)[:,0] for dtraj in input_data.get_output()]
frame_dict = {}
for num in range(num_clusters):
    frame_dict[num] = []

for index, entry in enumerate(mapped_data[0]):
    frame_dict[entry].append(index)

markovmodel / molPX / projX / generate.py View on Github

* ``paths_dict[idxs][type_of_path]["proj"]`` : ndarray of shape (n_points, proj_dim) with the
            coordinates of the projection along the path

            * paths_dict[idxs][type_of_path]["proj"] : geometries along the path

    idata :
        list of ndarrays with the the data in  :obj:`projected_trajectories`
    """

    if not isinstance(MD_trajectories, list):
        MD_trajectories = [MD_trajectories]

    if isinstance(MD_trajectories[0], _md.Trajectory):
        src = MD_trajectories
    else:
        src = _source(MD_trajectories, top=MD_top)


    idata = _data_from_input(projected_trajectories)
    assert  len(MD_trajectories) == len(idata), "Mismatch between the number of " \
                                                "MD_trajectories and projected_trajectories: %u vs %u"%(len(MD_trajectories), len(idata))

    #TODO: assert total_n_frames (strided) coincies with the n_frames in data
    # What's the hightest dimensionlatiy that the input data allows?
    input_dim = idata[0].shape[1]
    if proj_idxs is None:
       proj_idxs = _np.arange(n_projs)
    else:
        if isinstance(proj_idxs, int):
            proj_idxs = [proj_idxs]

    proj_dim = _np.max((proj_dim, _np.max(proj_idxs)+1))

MobleyLab / blues / blues / id-bindingmodes.py View on Github

def main(fpath, molid, args):
    prefix = os.path.join(fpath,molid,molid)
    trajfiles = glob.glob(prefix+'*-centered.dcd')
    topfiles = glob.glob(prefix+'*-centered.pdb')
    jsonfile = os.path.join(prefix+'-acc_its.json')

    #Pre-process trajectory files, check for unbound ligands
    trajfiles = utils.check_bound_ligand(trajfiles,
                                        topfiles[0])

    #Select features to analyze in trajectory
    feat = coor.featurizer(topfiles[0])
    lig_atoms = feat.select("resname LIG and not type H")
    feat.add_selection(lig_atoms)
    inp = coor.source(trajfiles, feat)

    #Define some input parameters
    dt = 8 #Frames in MD taken every 8ps == 0.008 ns
    lag_list = np.arange(1, 40,5) #Give a range of lag times to try

    #Initailize object to assign trajectories to clusters
    data = msm.ConstructMSM(inp)

    #Select the apprioriate lagtime from the implied timescale plots
    #data.plotImpliedTimescales(data.Y, dt, lag_list, outfname=prefix)

    #Selecting a lagtime of 150ps, every BLUES iteration is 176ps
    lagtime = 150

    #Discretize the trajectories
    #Estimate our Markov model from the discrete trajectories

markovmodel / molPX / projX / api.py View on Github

keep_all_samples = boolean, default is False
        In principle, once the closest-to-ref geometry has been kept, the other geometries are discarded, and the
        output sample contains only n_point geometries. HOWEVER, there are special cases where the user might
        want to keep all sampled geometries. Typical use-case is when the n_points is low and many representatives
        per clustercenters will be much more informative than the other way around
        (i know, this is confusing TODO: write this better)


    projected data: nd.array or list of nd.arrays OR pyemma.clustering object
        Although 2D is the most usual case, the dimensionality of the clustering and the one of the visualization (2D)
        do not necessarily have to be the same
    """


    src = _source(MDtrajectory_files, top=topology)

    # Find out if we already have a clustering object
    try:
        projected_data.dtrajs
        cl = projected_data
    except:
        idata = _data_from_input(projected_data)
        cl = _cluster_to_target([dd[:,idxs] for dd in idata], n_points, n_try_max=10, verbose=verbose)

    pos = cl.clustercenters
    cat_smpl = cl.sample_indexes_by_cluster(_np.arange(cl.n_clusters), n_geom_samples)
    geom_smpl = _save_traj(src, _np.vstack(cat_smpl), None, stride=proj_stride)
    if n_geom_samples>1:
        if not keep_all_samples:
            geom_smpl = _re_warp(geom_smpl, [n_geom_samples] * cl.n_clusters)
            # Of the most populated geom, get the most compact

markovmodel / molPX / molpx / _bmutils.py View on Github

-------

    moldata: pyemma FeatureReader or list of md.Trajectories, depending on the input
    """

    if isinstance(inp, (_FeatureReader, _FragmentedTrajectoryReader)):
        moldata = inp

    # Everything else gets listified
    else:
        inp = listify_if_not_list(inp)
        # We have geometries so don't do anything
        if isinstance(inp[0], _md.Trajectory):
            moldata = inp
        elif isinstance(inp[0], str):
            moldata = _source(inp, top=MD_top)
        # TODO consider letting pyemma fail here instead of catching this
        else:
            raise TypeError("Please revise tyour input, it should be a str ",type(inp[0]))
    return moldata

MobleyLab / blues / utils / kpca_circle.py View on Github

prot_lig = np.concatenate((prot_index, lig_coord), axis=0)

#print 'prot_lig', prot_lig
#feat.add_selection(lig_heavy)
#feat.add_backbone_torsions(selstr="(resid &gt;= 105) and (resid &lt;= 115)")
#feat.add_backbone_torsions(selstr="(resid &gt;= 132) and (resid &lt;= 146)")
feat.add_angles(np.array([[132, 126, 141]]), cossin=False)
feat.add_angles(np.array([[134, 128, 141]]), cossin=False)

#feat.add_dihedrals(np.array([[1638, 1622, 2634, 2640]]), cossin=False)
#feat.add_dihedrals(np.array([[1638, 2634, 2638, 2640]]), cossin=False)
feat.add_distances(prot_lig)
print feat.dimension()


inp = coor.source(traj_list, feat)

pca_obj = coor.pca(inp, dim=4)
#pca_obj = coor.tica(inp, dim=4, lag=2)

Y = pca_obj.get_output()
print Y
print Y[0][:,0]
num_clusters = 4
input_data = coor.cluster_kmeans(data=Y, k=num_clusters, max_iter=1000)
mapped_data = [np.array(dtraj)[:,0] for dtraj in input_data.get_output()]
frame_dict = {}
for num in range(num_clusters):
    frame_dict[num] = []

for index, entry in enumerate(mapped_data[0]):
    frame_dict[entry].append(index)

How to use the pyemma.coordinates.source function in pyEMMA

To help you get started, we’ve selected a few pyEMMA examples, based on popular ways it is used in public projects.

pyEMMA

Package Health Score

Popular pyEMMA functions

Similar packages