Source code for polychrom.polymerutils

"""
Loading and saving individual conformations
===========================================


The module :py:mod:`polychrom.polymerutils` provides tools for saving and loading individual conformations. Note that
saving and loading trajectories should generally be done using :py:mod:`polychrom.hdf5_format` module. This module
provides tools for loading/saving invividual conformations, or for working with projects that have both  old-style
and new-style trajectories.

For projects using both old-style and new-style trajectories(e.g. in a project that was switched to polychrom,
and new files were added), a function :py:func:`polychrom.polymerutils.fetch_block` can be helpful as it provides the
same interface for fetching a conformation from both old-style and new-style trajectory. Note however that it is not
the fastest way to iterate over conformations in the new-style trajectory, and the
:py:func:`polychrom.hdf5_format.list_URIs` is faster.

A typical workflow with the new-style trajectories should be:

.. code-block:: python

    URIs = polychrom.hdf5_format.list_URIs(folder)
    for URI in URIs:
        data = polychrom.hdf5_format.load_URI(URI)
        xyz = data["pos"]
"""


from __future__ import absolute_import, division, print_function, unicode_literals

import glob
import io
import os

import joblib
import numpy as np
import six

from polychrom.hdf5_format import load_URI

from . import hdf5_format


[docs]def load(filename): """Universal load function for any type of data file It always returns just XYZ positions - use fetch_block or hdf5_format.load_URI for loading the whole metadata Accepted file types ------------------- New-style URIs (HDF5 based storage) Text files in openmm-polymer format joblib files in openmm-polymer format Parameters ---------- filename: str filename to load or a URI """ if "::" in filename: return hdf5_format.load_URI(filename)["pos"] if not os.path.exists(filename): raise IOError("File not found :( \n %s" % filename) try: # loading from a joblib file here return dict(joblib.load(filename)).pop("data") except Exception: # checking for a text file data_file = open(filename) line0 = data_file.readline() try: N = int(line0) except (ValueError, UnicodeDecodeError): raise TypeError("Could not read the file. Not text or joblib.") data = [list(map(float, i.split())) for i in data_file.readlines()] if len(data) != N: raise ValueError("N does not correspond to the number of lines!") return np.array(data)
[docs]def fetch_block(folder, ind, full_output=False): """ A more generic function to fetch block number "ind" from a trajectory in a folder This function is useful both if you want to load both "old style" trajectories (block1.dat), and "new style" trajectories ("blocks_1-50.h5") It will be used in files "show" Parameters ---------- folder: str, folder with a trajectory ind: str or int, number of a block to fetch full_output: bool (default=False) If set to true, outputs a dict with positions, eP, eK, time etc. if False, outputs just the conformation (relevant only for new-style URIs, so default is False) Returns ------- data, Nx3 numpy array if full_output==True, then dict with data and metadata; XYZ is under key "pos" """ blocksh5 = glob.glob(os.path.join(folder, "blocks*.h5")) blocksdat = glob.glob(os.path.join(folder, "block*.dat")) ind = int(ind) if (len(blocksh5) > 0) and (len(blocksdat) > 0): raise ValueError("both .h5 and .dat files found in folder - exiting") if (len(blocksh5) == 0) and (len(blocksdat) == 0): raise ValueError("no blocks found") if len(blocksh5) > 0: fnames = [os.path.split(i)[-1] for i in blocksh5] inds = [i.split("_")[-1].split(".")[0].split("-") for i in fnames] exists = [(int(i[0]) <= ind) and (int(i[1]) >= ind) for i in inds] if True not in exists: raise ValueError(f"block {ind} not found in files") if exists.count(True) > 1: raise ValueError("Cannot find the file uniquely: names are wrong") pos = exists.index(True) block = load_URI(blocksh5[pos] + f"::{ind}") if not full_output: block = block["pos"] if len(blocksdat) > 0: block = load(os.path.join(folder, f"block{ind}.dat")) return block
[docs]def save(data, filename, mode="txt", pdbGroups=None): """ Basically unchanged polymerutils.save function from openmm-polymer It can save into txt or joblib formats used by old openmm-polymer It is also very useful for saving files to PDB format to make them compatible with nglview, pymol_show and others """ data = np.asarray(data, dtype=np.float32) if mode.lower() == "joblib": joblib.dump({"data": data}, filename=filename, compress=9) return if mode.lower() == "txt": lines = [str(len(data)) + "\n"] for particle in data: lines.append("{0:.3f} {1:.3f} {2:.3f}\n".format(*particle)) if filename is None: return lines elif isinstance(filename, six.string_types): with open(filename, "w") as myfile: myfile.writelines(lines) elif hasattr(filename, "writelines"): filename.writelines(lines) else: raise ValueError("Not sure what to do with filename {0}".format(filename)) elif mode == "pdb": data = data - np.minimum(np.min(data, axis=0), np.zeros(3, float) - 100)[None, :] retret = "" def add(st, n): if len(st) > n: return st[:n] else: return st + " " * (n - len(st)) if pdbGroups is None: pdbGroups = ["A" for i in range(len(data))] else: pdbGroups = [str(int(i)) for i in pdbGroups] for i, line, group in zip(list(range(len(data))), data, pdbGroups): atomNum = (i + 1) % 9000 segmentNum = (i + 1) // 9000 + 1 line = [float(j) for j in line] ret = add("ATOM", 6) ret = add(ret + "{:5d}".format(atomNum), 11) ret = ret + " " ret = add(ret + "CA", 17) ret = add(ret + "ALA", 21) ret = add(ret + group[0], 22) ret = add(ret + str(atomNum), 26) ret = add(ret + " ", 30) # ret = add(ret + "%i" % (atomNum), 30) ret = add(ret + ("%8.3f" % line[0]), 38) ret = add(ret + ("%8.3f" % line[1]), 46) ret = add(ret + ("%8.3f" % line[2]), 54) ret = add(ret + (" 1.00"), 61) ret = add(ret + str(float(i % 8 > 4)), 67) ret = add(ret, 73) ret = add(ret + str(segmentNum), 77) retret += ret + "\n" with open(filename, "w") as f: f.write(retret) f.flush() elif mode == "pyxyz": with open(filename, "w") as f: for i in data: filename.write("C {0} {1} {2}".format(*i)) else: raise ValueError("Unknown mode : %s, use h5dict, joblib, txt or pdb" % mode)
[docs]def rotation_matrix(rotate): """Calculates rotation matrix based on three rotation angles""" tx, ty, tz = rotate Rx = np.array([[1, 0, 0], [0, np.cos(tx), -np.sin(tx)], [0, np.sin(tx), np.cos(tx)]]) Ry = np.array([[np.cos(ty), 0, -np.sin(ty)], [0, 1, 0], [np.sin(ty), 0, np.cos(ty)]]) Rz = np.array([[np.cos(tz), -np.sin(tz), 0], [np.sin(tz), np.cos(tz), 0], [0, 0, 1]]) return np.dot(Rx, np.dot(Ry, Rz))