import numpy as np
import parmed
from scipy.spatial import distance_matrix
from .model import ArbdModel
from .core_objects import Group, ParticleType, PointParticle
from .interactions import HarmonicBond
from .logger import logger
[docs]
class ParmedArbd(ArbdModel):
"""
Class for converting ParmEd structures to ARBD models for simulation.
This class facilitates the conversion of molecular structures loaded with ParmEd
into ARBD simulation models, preserving bonded and non-bonded interactions.
It also supports creating dual topology models for free energy calculations.
Parameters:
parmed_structure: The original ParmEd structure
atom_types: Dictionary mapping atom type names to ParticleType objects
atoms_map: Mapping from ParmEd atoms to ARBD atoms
"""
def __init__(self, parmed_structure=None, psf=None, pdb=None, parameter_files=None,
system_type='charmm', integrator='MD', cutoff=12, **kwargs):
"""
Initialize ParmedArbd model from a ParmEd structure or PSF/PDB files.
Args:
parmed_structure: Existing ParmEd structure object (optional)
psf: Path to PSF file (optional if parmed_structure is provided)
pdb: Path to PDB file (optional if parmed_structure is provided)
parameter_files: List of parameter files for force field (optional)
system_type: Type of system ('charmm', etc.)
integrator: Simulation integrator type
cutoff: Non-bonded interaction cutoff distance
**kwargs: Additional arguments for ArbdModel
"""
# Initialize empty model first
ArbdModel.__init__(self, [], integrator=integrator, cutoff=cutoff, **kwargs)
self.atom_types = {}
self.atoms_map = {}
# Load structure if provided
if parmed_structure is not None:
self.parmed_structure = parmed_structure
elif psf is not None and pdb is not None:
self.parmed_structure = self._read_files(psf, pdb, parameter_files, system_type)
else:
self.parmed_structure = None
logger.warning("No structure provided; use load_structure() to load one")
return
# Convert the structure to ARBD model
if self.parmed_structure is not None:
self._build_model_from_structure()
def _read_files(self, psf, pdb, parameter_files=None, system_type='charmm'):
"""
Read PSF and PDB files into a ParmEd structure.
Args:
psf: Path to PSF file
pdb: Path to PDB file
parameter_files: List of parameter files
system_type: Type of system ('charmm', etc.)
Returns:
ParmEd structure object
"""
if system_type == 'charmm':
p1 = parmed.charmm.CharmmPsfFile(psf)
c1 = parmed.load_file(pdb)
for a, b in zip(p1.atoms, c1.atoms):
a.xx = b.xx
a.xy = b.xy
a.xz = b.xz
a.bfactor = b.bfactor
if parameter_files is not None:
if isinstance(parameter_files, str):
parameter_files = [parameter_files]
params = parmed.charmm.CharmmParameterSet(*parameter_files)
p1.load_parameters(params)
else:
raise NotImplementedError(f'Cannot import a "{system_type}" model')
return p1
[docs]
def load_structure(self, parmed_structure=None, psf=None, pdb=None,
parameter_files=None, system_type='charmm'):
"""
Load a ParmEd structure or read from PSF/PDB files.
Args:
parmed_structure: Existing ParmEd structure object (optional)
psf: Path to PSF file (optional if parmed_structure is provided)
pdb: Path to PDB file (optional if parmed_structure is provided)
parameter_files: List of parameter files (optional)
system_type: Type of system ('charmm', etc.)
Returns:
self for method chaining
"""
if parmed_structure is not None:
self.parmed_structure = parmed_structure
elif psf is not None and pdb is not None:
self.parmed_structure = self._read_files(psf, pdb, parameter_files, system_type)
else:
raise ValueError("Either parmed_structure or both psf and pdb must be provided")
# Clear existing model and rebuild
self.clear_all()
self._build_model_from_structure()
return self
def _validate_atom(self, atom):
"""
Validate that atom properties are compatible with ARBD model.
Args:
atom: ParmEd atom object to validate
Raises:
NotImplementedError: If atom has unsupported properties
"""
try:
atom.multipoles
raise NotImplementedError(f'Atom {atom} uses multipoles attribute for AMEOBA')
except AttributeError:
pass
for k in 'tortors other_locations anisou hybridization irotat tree screen solvent_radius join altloc marked cmaps children aromatic formal_charge'.split():
_a = atom.__getattribute__(k)
if _a is not None:
_isnum = isinstance(_a, float) or isinstance(_a, int)
if (_isnum and (_a != 0)) or ((not _isnum) and len(_a) > 0):
_msg = f'Atom {atom.idx} "{k}" is non-null ({_a})'
logger.warning(_msg)
if len(atom.other_locations) > 0:
_msg = f'Atom {atom.idx} has other locations which is not supported'
raise NotImplementedError(_msg)
def _validate_topology(self):
"""
Validate that the topology is compatible with ARBD model.
Raises:
NotImplementedError: If topology has unsupported features
"""
part = self.parmed_structure
if len(part.rb_torsions) > 0:
_msg = f'Topology includes RB torsions'
raise NotImplementedError(_msg)
if len(part.urey_bradleys) > 0:
_msg = f'Topology includes Urey Bradley terms'
logger.warning(_msg)
for k in ['impropers', 'cmaps', 'trigonal_angles', 'out_of_plane_bends',
'pi_torsions', 'stretch_bends', 'torsion_torsions', 'chiral_frames',
'multipole_frames', 'adjusts', 'links']:
_a = part.__getattribute__(k)
if len(_a) > 0:
_msg = f'Topology includes {len(_a)} "{k}"'
logger.warning(_msg)
if part.unknown_functional != False:
_msg = f'Topology unknown_functional is "{part.unknown_functional}" (not "False")'
raise NotImplementedError(_msg)
if part.symmetry is not None:
_msg = f'Topology symmetry is "{part.symmetry}" (not "None")'
raise NotImplementedError(_msg)
if part.nrexcl > 4:
_msg = f'Topology nrexcl is "{part.nrexcl}" (not <= "4")'
raise NotImplementedError(_msg)
if part._combining_rule != 'lorentz':
_msg = f'Unrecognized non-bonded combining rule "{part._combining_rule}"'
raise NotImplementedError(_msg)
# Validate first atom as representative
self._validate_atom(part.atoms[0])
def _create_particle_types(self):
"""
Create ARBD ParticleType objects from atom types in the ParmEd structure.
Returns:
Dictionary mapping atom type names to ParticleType objects
"""
_atypes = set(a.atom_type for a in self.parmed_structure.atoms)
logger.info(f'Importing {len(_atypes)} types from ParmEd')
_new_types_d = {}
for t in _atypes:
_kwargs = {k: t.__getattribute__(k)
for k in 'number mass atomic_number epsilon rmin charge'.split()}
for k in 'epsilon_14 rmin_14'.split():
_a1 = t.__getattribute__(k)
_a2 = t.__getattribute__(k.split('_')[0])
if _a1 != _a2:
_msg = f'{t}.{k} differs from normal term ({_a1} != {_a2})'
logger.warning(_msg)
for k in 'nbfix nbthole _bond_type'.split():
_a = t.__getattribute__(k)
if _a is not None and len(_a) > 0:
_msg = f'{t}.{k} is non-null ({_a})'
logger.warning(_msg)
assert(t.name not in _new_types_d)
_new_types_d[t.name] = ParticleType(t.name,
damping_coefficient=1e-4, # 0.1 ps
**_kwargs)
self.atom_types = _new_types_d
return _new_types_d
def _build_model_from_structure(self):
"""
Build ARBD model from the loaded ParmEd structure.
"""
# Validate topology first
self._validate_topology()
# Create particle types
atom_types = self._create_particle_types()
# Create model structure
allsegs = Group(name='allsegs', _segments={})
atoms_d = {}
bond_t_d = {}
# Helper function to get or create residue group
def _get_residue(res):
segid = res.segid if res.segid is not None else res.chain
if segid not in allsegs._segments:
allsegs._segments[segid] = Group(name=segid, _residues={}, parent=allsegs)
logger.info(f'Creating segment {segid}')
seg = allsegs._segments[segid]
key = (segid, res.name, res.number)
if key not in seg._residues:
seg._residues[key] = Group(name=res.name, resid=res.number, chain=res.chain, parent=seg)
logger.debug((seg, segid, res.name, res.number, res._idx))
return seg._residues[key]
# Helper function to create an atom
def _add_atom(atom):
a = atom
mapping = dict(bfactor='beta',
occupancy=None)
for k, v in list(mapping.items()):
if v is None:
mapping[k] = k
res = _get_residue(a.residue)
_a = PointParticle(name=a.name, type_=atom_types[a.type],
position=np.array((a.xx, a.xy, a.xz)), parent=res,
**{mapping[k]: a.__getattribute__(k) for k in mapping.keys()})
atoms_d[a] = _a
return _a
# Helper function to get or create a bond type
def _get_bond_type(bond):
t = bond.type
if t not in bond_t_d:
if t.penalty is not None:
logger.warning(f'Bond.penalty for bond {t} not null')
width = 0.76565392/np.sqrt(np.abs(t.k)) # units conversion
b = HarmonicBond(k=t.k, r0=t.req,
resolution=0.02*width,
range_=[max(0, t.req-5*width), t.req+5*width])
bond_t_d[t] = b
return bond_t_d[t]
# Add atoms to model
logger.info(f'Importing {len(self.parmed_structure.atoms)} atoms from ParmEd')
for i, a in enumerate(self.parmed_structure.atoms):
assert(a.idx == i)
_add_atom(a)
# Add bonds to model
logger.info(f'Importing {len(self.parmed_structure.bonds)} bonds from ParmEd')
for b0 in self.parmed_structure.bonds:
t = _get_bond_type(b0)
a1, a2 = [atoms_d[a] for a in (b0.atom1, b0.atom2)]
seg = a1.parent.parent
seg.add_bond(a1, a2, t, self.parmed_structure.nrexcl >= 1)
# Add the segment group to the model
self.add(allsegs)
# Store mapping from ParmEd atoms to ARBD atoms
self.atoms_map = atoms_d
return self
[docs]
def simulate(self, output_name, output_directory='output', log_file=None,
directory='.', binary=None, num_procs=None, dry_run=False, **conf_params):
"""
Run simulation with the ARBD model.
Args:
output_name: Base name for output files
output_directory: Directory for output files
log_file: File for logging
directory: Working directory
binary: Path to simulation binary
num_procs: Number of processors to use
dry_run: If True, don't actually run
**conf_params: Additional configuration parameters
Returns:
Result of the simulation
"""
from .engine import ArbdEngine
engine = ArbdEngine(timestep=conf_params.get('timestep', 1e-6))
return engine.simulate(
self, output_name,
output_directory=output_directory,
directory=directory,
log_file=log_file,
binary=binary,
num_procs=num_procs,
dry_run=dry_run,
**conf_params
)
[docs]
@classmethod
def create_dual_topology_model(cls, p1, p2, u1=None, u2=None, **kwargs):
"""
Create a dual topology ARBD model from two ParmEd structures.
Args:
p1: First ParmEd structure
p2: Second ParmEd structure
u1: Optional MDAnalysis universe for p1
u2: Optional MDAnalysis universe for p2
**kwargs: Additional arguments for ParmedArbd
Returns:
ParmedArbd model with dual topology
"""
# Create the dual topology
dual_structure, pairs, bp_pairs = cls.create_dual_topology(p1, p2, u1, u2)
# Create ParmedArbd model with the dual structure
model = cls(parmed_structure=dual_structure, **kwargs)
# Store the extra bond information for use in simulation
model.dual_topology_pairs = pairs
model.bp_pairs = bp_pairs
return model
[docs]
def write_restraint_files(self, fep_file='dual.fep.exb', min_fep_file='dual.min.fep.exb',
bp_file='dual.bp.exb'):
"""
Write restraint files for dual topology simulations.
Args:
fep_file: Output file for FEP restraints
min_fep_file: Output file for minimization FEP restraints
bp_file: Output file for base pair restraints
"""
if not hasattr(self, 'dual_topology_pairs'):
raise AttributeError("This model doesn't have dual topology information. "
"Use create_dual_topology_model to create one.")
with open(fep_file, 'w') as fh:
with open(min_fep_file, 'w') as fh2:
for i, j, r0 in self.dual_topology_pairs:
fh.write(f'bond {i} {j} 1 {r0}\n')
fh2.write(f'bond {i} {j} 100 {r0}\n')
if hasattr(self, 'bp_pairs') and self.bp_pairs:
with open(bp_file, 'w') as fh:
for i, j in self.bp_pairs:
fh.write(f'bond {i} {j} 1 2.8\n')
[docs]
@staticmethod
def convert_sod_to_mg(structure):
"""
Convert sodium ions to magnesium ions in a structure.
Args:
structure: ParmEd structure to modify
Returns:
Modified structure
"""
for a in structure:
if a.residue.name == 'WAT':
break
elif a.residue.name != 'Na+':
continue
# Find appropriate magnesium atom type
try:
mg_type = structure.atom_types['Mg2+']
except KeyError:
# If Mg2+ atom type doesn't exist, create it based on Na+ properties
na_type = a.atom_type
mg_type = parmed.AtomType('Mg2+', 12, 24.305, 2.0)
mg_type.epsilon = na_type.epsilon
mg_type.rmin = na_type.rmin
mg_type.epsilon_14 = na_type.epsilon_14
mg_type.rmin_14 = na_type.rmin_14
structure.atom_types[mg_type.name] = mg_type
# Update atom properties to magnesium
a.atom_type = mg_type
a.residue.name = 'Mg2+'
a.name = 'Mg2+'
a.type = 'Mg2+'
a.atomic_number = 12
a._charge = 2
a.charge = 2
a.mass = 24.305
return structure
[docs]
@staticmethod
def create_dual_topology(p1, p2, u1=None, u2=None):
"""
Create a dual topology structure from two ParmEd structures.
Args:
p1: First ParmEd structure
p2: Second ParmEd structure
u1: Optional MDAnalysis universe for p1
u2: Optional MDAnalysis universe for p2
Returns:
Tuple of (combined structure, restraint pairs, base pair bonds)
"""
# Ensure structures have same number of residues
assert len(p1.residues) == len(p2.residues)
# Check index alignment
i = p1.residues[5].atoms[0].idx
assert i == p2.residues[5].atoms[0].idx
# Verify compatibility
assert len(p2.urey_bradleys) == 0
assert len(p2.cmaps) == 0
# Create new empty structure with same parameters as p1
new = p1[:0]
# Mapping dictionaries
p2_to_p1 = dict()
u_p2_to_new = dict()
# Create maps for bonded terms
bond_map = {a: [] for a in p2.atoms}
angle_map = {a: [] for a in p2.atoms}
dihed_map = {a: [] for a in p2.atoms}
improper_map = {a: [] for a in p2.atoms}
# Populate bonded term maps
for b in p2.bonds:
bond_map[b.atom1].append((b, 0))
bond_map[b.atom2].append((b, 1))
for b in p2.angles:
angle_map[b.atom1].append((b, 0))
angle_map[b.atom2].append((b, 1))
angle_map[b.atom3].append((b, 2))
for b in p2.dihedrals:
dihed_map[b.atom1].append((b, 0))
dihed_map[b.atom2].append((b, 1))
dihed_map[b.atom3].append((b, 2))
dihed_map[b.atom4].append((b, 3))
for b in p2.impropers:
improper_map[b.atom1].append((b, 0))
improper_map[b.atom2].append((b, 1))
improper_map[b.atom3].append((b, 2))
improper_map[b.atom4].append((b, 3))
pairs = []
# Process each segment of p1
last_p1_i = 0
first_p1_atom = dict() # keys are segids
first_new_atom = dict() # keys are segids
last_p1_new_atom = dict() # keys are segids
processed_segments = set()
# First pass: Add all atoms from p1 and unique atoms from p2
for res1, res2 in zip(p1.residues, p2.residues):
seg = res1.segid
assert seg == res2.segid
# Add complete segment from p1 to new molecule if not already processed
if seg not in processed_segments:
_next = [r.atoms[-1].idx + 1 for r in p1.residues if r.segid == seg][-1]
first_p1_atom[seg] = last_p1_i
first_new_atom[seg] = len(new.atoms)
new = new + p1[last_p1_i:_next]
last_p1_new_atom[seg] = len(new.atoms)
last_p1_i = _next
processed_segments.add(seg)
# If residue is different in p1 and p2, add unique atoms from p2
if res1.name != res2.name:
# Perform distance search to match atoms
c1 = np.array([p1.coordinates[a.idx] for a in res1.atoms])
c2 = np.array([p2.coordinates[a.idx] for a in res2.atoms])
dists = distance_matrix(c1, c2)
common_ids_j = set()
common_atoms_j = set()
# Find common atoms based on name, type, and charge
for i, j in zip(*np.where(dists < 0.1)):
a1, a2 = res1.atoms[i], res2.atoms[j]
p2_to_p1[a2] = a1
if a1.name[0] == 'H':
continue # Handle hydrogens separately
if a1.name == a2.name and (a1.type == a2.type) and (a1.charge == a2.charge):
common_ids_j.add(j)
common_atoms_j.add(a2)
# Handle hydrogen atoms
for i, j in zip(*np.where(dists < 0.1)):
a1, a2 = res1.atoms[i], res2.atoms[j]
if a1.name[0] != 'H':
continue # Only consider hydrogens
if a1.name == a2.name and (a1.type == a2.type) and (a1.charge == a2.charge):
# Check that parent atoms of hydrogens are already considered common
assert len(a1.bonds) == 1 and len(a2.bonds) == 1
b1 = [a for a in (a1.bonds[0].atom1, a1.bonds[0].atom2) if a != a1][0]
b2 = [a for a in (a2.bonds[0].atom1, a2.bonds[0].atom2) if a != a2][0]
if b2 in common_atoms_j:
common_ids_j.add(j)
common_atoms_j.add(a2)
# Special handling for H2' atoms (fix for topology issues)
if any(a.name == "H2'" for a in res1.atoms) and any(a.name == "H2'" for a in res2.atoms):
a1 = [a for a in res1.atoms if a.name == "H2'"][0]
j, a2 = next((i, a) for i, a in enumerate(res2.atoms) if a.name == "H2'")
if a2 not in p2_to_p1:
p2_to_p1[a2] = a1
common_ids_j.add(j)
common_atoms_j.add(a2)
# Find unique atoms in res2
unique_atoms_j = [
res2.atoms[j] for j in range(len(res2.atoms))
if j not in common_ids_j
]
unique_ids_j = [a2.idx for a2 in unique_atoms_j]
# Add unique atoms from res2 to new structure
if unique_ids_j:
logger.info(f'Adding {len(unique_ids_j)} unique atoms from residue {res2.name}')
new = new + p2[unique_ids_j]
# Second pass: Add bonded terms and set up dual topology information
current_seg = None
current_new_atom = None
for res1, res2 in zip(p1.residues, p2.residues):
seg = res1.segid
assert seg == res2.segid
if current_seg != seg:
current_seg = seg
current_new_atom = last_p1_new_atom[seg]
p1_to_new_di = first_new_atom[seg] - first_p1_atom[seg]
if res1.name != res2.name:
# Perform distance search to match atoms
c1 = np.array([p1.coordinates[a.idx] for a in res1.atoms])
c2 = np.array([p2.coordinates[a.idx] for a in res2.atoms])
dists = distance_matrix(c1, c2)
common_ids_i = set()
common_ids_j = set()
common_atoms_j = set()
# Find common atoms
for i, j in zip(*np.where(dists < 0.1)):
a1, a2 = res1.atoms[i], res2.atoms[j]
p2_to_p1[a2] = a1
if a1.name[0] == 'H':
continue
if a1.name == a2.name and (a1.type == a2.type) and (a1.charge == a2.charge):
common_ids_i.add(i)
common_ids_j.add(j)
common_atoms_j.add(a2)
# Handle hydrogen atoms
for i, j in zip(*np.where(dists < 0.1)):
a1, a2 = res1.atoms[i], res2.atoms[j]
if a1.name[0] != 'H':
continue
if a1.name == a2.name and (a1.type == a2.type) and (a1.charge == a2.charge):
assert len(a1.bonds) == 1 and len(a2.bonds) == 1
b1 = [a for a in (a1.bonds[0].atom1, a1.bonds[0].atom2) if a != a1][0]
b2 = [a for a in (a2.bonds[0].atom1, a2.bonds[0].atom2) if a != a2][0]
if b2 in common_atoms_j:
common_ids_i.add(i)
common_ids_j.add(j)
common_atoms_j.add(a2)
# Special handling for H2' atoms
if any(a.name == "H2'" for a in res1.atoms) and any(a.name == "H2'" for a in res2.atoms):
i, a1 = next((i, a) for i, a in enumerate(res1.atoms) if a.name == "H2'")
j, a2 = next((i, a) for i, a in enumerate(res2.atoms) if a.name == "H2'")
if a2 in p2_to_p1:
assert p2_to_p1[a2] == a1
p2_to_p1[a2] = a1
common_ids_i.add(i)
common_ids_j.add(j)
common_atoms_j.add(a2)
# Get unique atoms in res2
unique_atoms_j = [
res2.atoms[j] for j in range(len(res2.atoms))
if j not in common_ids_j
]
unique_ids_j = [a2.idx for a2 in unique_atoms_j]
# Find mapping of unique atoms from res2 to new structure
_num_new = len(unique_ids_j)
_tmp = {a.name: a for a in new.atoms[current_new_atom:current_new_atom + _num_new]}
current_new_atom += _num_new
u_p2_to_new.update({a2: _tmp[a2.name] for a2 in unique_atoms_j})
# Find pairs of dual-topology atoms for restraints, excluding hydrogens
for i in range(len(c1)):
if i in common_ids_i or res1.atoms[i].name[0] == 'H':
dists[i, :] += 100 # Exclude from pairing
for j in range(len(c2)):
if j in common_ids_j or res2.atoms[j].name[0] == 'H':
dists[:, j] += 100 # Exclude from pairing
# Create restraint pairs
if len(c1) < len(c2):
for i, a1 in enumerate(res1.atoms):
if i in common_ids_i or a1.name[0] == 'H':
continue
j = np.argmin(dists[i])
pairs.append((p1_to_new_di + a1.idx, u_p2_to_new[res2.atoms[j]].idx, dists[i, j]))
else:
for j, a2 in enumerate(res2.atoms):
if j in common_ids_j or a2.name[0] == 'H':
continue
i = np.argmin(dists[:, j])
pairs.append((p1_to_new_di + res1.atoms[i].idx, u_p2_to_new[a2].idx, dists[i, j]))
# Set beta factor for FEP atoms
for i, a in enumerate(new.atoms[p1_to_new_di + res1.atoms[0].idx:p1_to_new_di + res1.atoms[-1].idx + 1]):
if i not in common_ids_i:
a.bfactor = -1.0
for _, a in u_p2_to_new.items():
a.bfactor = 1.0
# Add bonded potentials from p2 that include unique atoms
processed = set() # avoid duplicate entries
for a2 in unique_atoms_j:
for pot_map in (bond_map, angle_map, dihed_map, improper_map):
for b, rank in pot_map[a2]:
# Avoid double-counting
if b in processed:
continue
processed.add(b)
# Get atoms involved in the potential
if pot_map == bond_map:
atoms = (b.atom1, b.atom2)
gen_pot = lambda new_atoms: parmed.Bond(*new_atoms, type=b.type)
pot_list = new.bonds
elif pot_map == angle_map:
atoms = (b.atom1, b.atom2, b.atom3)
gen_pot = lambda new_atoms: parmed.Angle(*new_atoms, type=b.type)
pot_list = new.angles
elif pot_map == dihed_map:
atoms = (b.atom1, b.atom2, b.atom3, b.atom4)
gen_pot = lambda new_atoms: parmed.Dihedral(*new_atoms, type=b.type, improper=b.improper)
pot_list = new.dihedrals
elif pot_map == improper_map:
atoms = (b.atom1, b.atom2, b.atom3, b.atom4)
gen_pot = lambda new_atoms: parmed.Improper(*new_atoms, type=b.type)
pot_list = new.impropers
else:
raise Exception("Unknown potential map")
# Skip if no atoms in bond are unique atoms
if all(a in unique_atoms_j for a in atoms):
continue
# Find corresponding atoms in new structure
new_atoms = []
for a in atoms:
if a in unique_atoms_j:
new_atoms.append(u_p2_to_new[a])
else:
new_atoms.append(new.atoms[p1_to_new_di + p2_to_p1[a].idx])
assert all(a in new.atoms for a in new_atoms)
# Add potential to new structure
pot_list.append(gen_pot(new_atoms))
# Handle base-pair information if MDAnalysis universes are provided
bp_pairs = None
if u1 is not None and u2 is not None:
bp_pairs = []
# Function to find base pairs in a nucleic acid structure
def find_bp(u):
# Find segment termini
seg_ends = [a.position for seg in u.segments
for a in [seg.atoms[0], seg.atoms[-1]]]
seg_ends = np.array(seg_ends)
dists = distance_matrix(seg_ends, seg_ends) + 100 * np.eye(len(seg_ends))
seg_end_map = {}
for i, d in enumerate(dists):
j = np.argmin(d)
if i in seg_end_map:
assert seg_end_map[i] == j
seg_end_map[i] = j
bp = -1 * np.ones(len(u.residues), dtype=int)
processed = set()
for I, J in seg_end_map.items():
if I in processed:
continue
processed.add(I)
processed.add(J)
reses1, reses2 = [u.segments[K // 2].residues[::1 - 2 * (K % 2)] for K in (I, J)]
for i, (r1, r2) in enumerate(zip(reses1, reses2)):
if i == len(reses1) // 2:
break
bp[r1.resindex] = r2.resindex
bp[r2.resindex] = r1.resindex
assert np.all(bp >= 0)
return bp
# Find base pairs in both structures
bp1, bp2 = [find_bp(u) for u in (u1, u2)]
# Define atom pairs for base-pairing
bp_bond_atoms = {
'DA': [('N1', 'N3'), ('N6', 'O4')],
'DT': [('N3', 'N1'), ('O4', 'N6')],
'DC': [('N3', 'N1'), ('O2', 'N2')],
'DG': [('N1', 'N3'), ('N2', 'O2')],
'A': [('N1', 'N3'), ('N6', 'O4')],
'T': [('N3', 'N1'), ('O4', 'N6')],
'U': [('N3', 'N1'), ('O4', 'N6')],
'C': [('N3', 'N1'), ('O2', 'N2')],
'G': [('N1', 'N3'), ('N2', 'O2')]
}
# Map residue names to keys in bp_bond_atoms
resname_to_key = {}
for key in bp_bond_atoms:
resname_to_key[key] = key
# Handle modified bases
if key in ('DA', 'DT', 'DC', 'DG'):
for i in range(1, 10):
mod_name = f"{key}{i}"
resname_to_key[mod_name] = key
# Create base pair bonds
for ra, (r1b, r2b) in enumerate(zip(bp1, bp2)):
# Ra pairs with r1b; r2a pairs with r2b
R1a = p1.residues[ra]
R2a = p2.residues[ra]
R1b = p1.residues[r1b] if r1b >= 0 else None
R2b = p2.residues[r2b] if r2b >= 0 else None
# Add base pair bonds for structure 1
if R1b is not None and ra < r1b:
try:
res_key = resname_to_key[R1a.name]
for n1, n2 in bp_bond_atoms[res_key]:
a = next(x for x in R1a.atoms if x.name == n1)
b = next(x for x in R1b.atoms if x.name == n2)
# Account for index offsets
seg = R1a.segid
ai = a.idx + first_new_atom[seg] - first_p1_atom[seg]
seg = R1b.segid
bi = b.idx + first_new_atom[seg] - first_p1_atom[seg]
bp_pairs.append((ai, bi))
except KeyError:
logger.warning(f"Residue {R1a.name} not found in base pair definitions")
continue
# Add base pair bonds for structure 2 (if different)
if R2b is not None and ra < r2b:
# Skip redundant base pairs
if R1b is not None and R1a.name == R2a.name and R1b.name == R2b.name:
continue
try:
res_key = resname_to_key[R2a.name]
for n1, n2 in bp_bond_atoms[res_key]:
a = next(x for x in R2a.atoms if x.name == n1)
b = next(x for x in R2b.atoms if x.name == n2)
try:
ai = u_p2_to_new[a].idx
assert u_p2_to_new[a].name == n1
except KeyError:
seg = R1a.segid
a1 = next(x for x in R1a.atoms if x.name == n1)
ai = a1.idx + first_new_atom[seg] - first_p1_atom[seg]
try:
bi = u_p2_to_new[b].idx
assert u_p2_to_new[b].name == n2
except KeyError:
seg = R1b.segid
b1 = next(x for x in R1b.atoms if x.name == n2)
bi = b1.idx + first_new_atom[seg] - first_p1_atom[seg]
bp_pairs.append((ai, bi))
except KeyError:
logger.warning(f"Residue {R2a.name} not found in base pair definitions")
continue
return new, pairs, bp_pairs