Source code for arbdmodel.simplearbd

#!/usr/bin/env python3
import re
import sys
import argparse
from pathlib import Path

from .structure_rigidbody import StructureRigidBodyModel, SimpleArbdEngine
from .sim_config import SimConf
from .logger import logger

[docs] class SimpleArbdConfig: """ Parse and manage SimpleARBD configuration file. This class provides a modern, clean interface for reading SimpleARBD configuration files and setting up the simulation. """ def __init__(self, config_path): """ Initialize SimpleArbdConfig. Args: config_path: Path to the SimpleARBD configuration file """ self.config_path = Path(config_path) if not self.config_path.exists(): raise FileNotFoundError(f"Config file not found: {self.config_path}") self.config = self._parse_config() self.simconf = self._create_simconf() def _parse_config(self): """ Parse the SimpleARBD configuration file. Returns: Dict containing configuration parameters """ logger.info(f"Parsing config file: {self.config_path}") config = {} with open(self.config_path) as f: text = f.read() # Parse diffusible objects match = re.search(r'Diffusible_objects:([ \w\.]+)', text) if match: config['diffusible_objects'] = match.group(1).strip().split() # Parse static objects match = re.search(r'Static_objects \(Enter NA for no static object\):([ \w\.]+)', text) if match: val = match.group(1).strip() config['static_objects'] = [] if val == 'NA' else val.split() # Parse remaining configuration parameters parameter_patterns = { 'salt_concentration': r'SaltConcentration:(\s*[0-9]*\.[0-9]*)', 'temperature': r'Temperature \(K\):(\s*[0-9]*\.?[0-9]*)', 'viscosity': r'Viscosity:(\s*[0-9]*\.?[0-9]*)', 'solvent_density': r'Solvent_density:(\s*[0-9]*\.?[0-9]*)', 'num_heavy_cluster': r'Number_of_heavy_cluster \(Integer\):(\s*[0-9]+)', 'gaussian_width': r'GaussianWidth:(\s*[0-9]*\.?[0-9]*)', 'skip_parametrizing_diffusible': r'Skip_parametrizing_diffusible \(Yes/No\):([ \w]+)', 'gigantic_stat_objects': r'Gigantic_stat_objects \(Yes/No\):([ \w]+)', 'python_path': r'Python_path:(\s*\S+)', 'hydro_path': r'Hydro_path:(\s*\S+)', 'apbs_path': r'Apbs_path:(\s*\S+)', 'vmd_path': r'Vmd_path:(\s*\S+)', 'parameters_folder': r'Parameters_folder:(\s*\S+)', 'num_replicas': r'Num_replicas \(Integer\):(\s*[0-9]+)', 'timestep': r'Timestep \(Float\):(\s*[0-9]*\.?[0-9]*)', 'steps': r'Steps \(Integer\):(\s*[0-9]+)', 'interactive': r'Interactive \(Yes/No\):([ \w]+)', 'grid_path': r'Grid_path:(\s*\S+)', 'well_depth': r'WellDepth \(Positive\):\s*([0-9]+[\.]*[0-9]*)', 'well_resolution': r'WellResolution \(Positive\):\s*([0-9]+[\.]*[0-9]*)', 'arbd_path': r'ARBD_path:(\s*\S+)', 'simulation_path': r'Path_for_ARBD_simulations:(\s*\S+)', } # Extract cell vectors and origin vector_patterns = { 'cell_basis_vector1': r'CellBasisVector1:\s*([0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]*)', 'cell_basis_vector2': r'CellBasisVector2:\s*([0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]*)', 'cell_basis_vector3': r'CellBasisVector3:\s*([0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]*)', 'cell_origin': r'CellOrigin:\s*(-*[0-9]+[\.]*[0-9]* -*[0-9]+[\.]*[0-9]* -*[0-9]+[\.]*[0-9]*)', 'initial_coor_basis_vector1': r'InitialCoorBasisVector1:\s*([0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]*)', 'initial_coor_basis_vector2': r'InitialCoorBasisVector2:\s*([0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]*)', 'initial_coor_basis_vector3': r'InitialCoorBasisVector3:\s*([0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]* [0-9]+[\.]*[0-9]*)', 'initial_coor_origin': r'InitialCoorOrigin:\s*(-*[0-9]+[\.]*[0-9]* -*[0-9]+[\.]*[0-9]* -*[0-9]+[\.]*[0-9]*)', } # Extract all parameters using regex for param, pattern in parameter_patterns.items(): match = re.search(pattern, text) if match: config[param] = match.group(1).strip() # Extract and convert vector parameters for param, pattern in vector_patterns.items(): match = re.search(pattern, text) if match: # Convert space-separated values to list of floats values = [float(x) for x in match.group(1).split()] config[param] = values # Extract copies per object match = re.search(r'Number_of_copies_per_object \(Integer\(s\)\):([ 0-9]+)', text) if match: copies = match.group(1).strip().split() if 'diffusible_objects' in config: config['copies_per_object'] = { obj: int(copies[i]) for i, obj in enumerate(config['diffusible_objects']) if i < len(copies) } # Extract extra potential tags match = re.search(r'Extra_potentials_tags \(Path, vdw cluster group\):([\s\S]*)\n', text) if match: tags = re.findall(r'\((\S+\.dx,\s*\w+)\)', match.group(1)) config['extra_potentials'] = [] for tag in tags: parts = tag.split(',') config['extra_potentials'].append({ 'path': parts[0].strip(), 'vdw_type': parts[1].strip() }) # Convert appropriate values to correct types type_conversions = { 'salt_concentration': float, 'temperature': float, 'viscosity': float, 'solvent_density': float, 'num_heavy_cluster': int, 'gaussian_width': float, 'num_replicas': int, 'timestep': float, 'steps': int, 'well_depth': float, 'well_resolution': float, } for param, convert in type_conversions.items(): if param in config: try: config[param] = convert(config[param]) except (ValueError, TypeError): logger.warning(f"Could not convert {param} to {convert.__name__}") # Boolean conversions bool_conversions = { 'skip_parametrizing_diffusible': lambda x: x.lower() == 'yes', 'gigantic_stat_objects': lambda x: x.lower() == 'yes', 'interactive': lambda x: x.lower() == 'yes', } for param, convert in bool_conversions.items(): if param in config: try: config[param] = convert(config[param]) except (ValueError, TypeError, AttributeError): logger.warning(f"Could not convert {param} to boolean") return config def _create_simconf(self) -> SimConf: """ Create a SimConf object from the parsed configuration. Returns: SimConf object """ # Extract parameters for SimConf params = { 'temperature': self.config.get('temperature', 300), 'viscosity': self.config.get('viscosity', 0.01), 'solvent_density': self.config.get('solvent_density', 1.0), 'num_heavy_cluster': self.config.get('num_heavy_cluster', 3), 'timestep': self.config.get('timestep', 0.0002), 'num_steps': self.config.get('steps', 10000000), 'output_period': 1000, # Default } # Add binary paths if available binary_paths = { 'hydro_path': 'hydro_path', 'apbs_path': 'apbs_path', 'vmd_path': 'vmd_path', 'arbd_path': 'arbd_path' } for config_key, simconf_key in binary_paths.items(): if config_key in self.config: params[simconf_key] = self.config[config_key] return SimConf(**params)
[docs] def create_model(self) -> StructureRigidBodyModel: """ Create a StructureRigidBodyModel from the configuration. Returns: StructureRigidBodyModel instance """ # Set up cell vectors and origin for model cell_vectors = None cell_origin = None if all(key in self.config for key in ['cell_basis_vector1', 'cell_basis_vector2', 'cell_basis_vector3']): cell_vectors = [ self.config['cell_basis_vector1'], self.config['cell_basis_vector2'], self.config['cell_basis_vector3'] ] if 'cell_origin' in self.config: cell_origin = self.config['cell_origin'] # Create the model model = StructureRigidBodyModel( cell_vectors=cell_vectors, cell_origin=cell_origin, configuration=self.simconf, use_boundary='extra_potentials' in self.config and len(self.config['extra_potentials']) > 0, boundary_params={ 'well_depth': self.config.get('well_depth', 1.0), 'resolution': self.config.get('well_resolution', 2.0), } ) return model
[docs] def create_engine(self) -> SimpleArbdEngine: """ Create a SimpleArbdEngine from the configuration. Returns: SimpleArbdEngine instance """ # Create the engine with appropriate configuration engine = SimpleArbdEngine( configuration=self.simconf, extra_bd_file_lines='' ) return engine
[docs] def setup_diffusible_objects(self, model: StructureRigidBodyModel): """ Set up diffusible objects in the model. Args: model: StructureRigidBodyModel to add diffusible objects to """ if 'diffusible_objects' not in self.config: logger.warning("No diffusible objects specified in configuration") return # Create initial region from configuration initial_region = None if all(key in self.config for key in [ 'initial_coor_basis_vector1', 'initial_coor_basis_vector2', 'initial_coor_basis_vector3', 'initial_coor_origin' ]): initial_region = { 'bv1': self.config['initial_coor_basis_vector1'], 'bv2': self.config['initial_coor_basis_vector2'], 'bv3': self.config['initial_coor_basis_vector3'], 'origin': self.config['initial_coor_origin'] } # Add each diffusible object for obj_name in self.config['diffusible_objects']: # Skip parametrization if requested if self.config.get('skip_parametrizing_diffusible', False): logger.info(f"Skipping parametrization for {obj_name} (as requested in config)") continue # Determine number of copies copies = self.config.get('copies_per_object', {}).get(obj_name, 1) # Find structure files psf_file = Path(f"{obj_name}.psf") pdb_file = Path(f"{obj_name}.pdb") if not (psf_file.exists() and pdb_file.exists()): logger.warning(f"Structure files for {obj_name} not found: {psf_file}, {pdb_file}") continue logger.info(f"Adding diffusible object: {obj_name} with {copies} copies") # Create work directory work_dir = Path(self.config.get('parameters_folder', './parameters')) / obj_name # Add to model model.add_diffusible_object( structure_path=psf_file, # Use PSF as primary file copies=copies, name=obj_name, initial_region=initial_region, random_seed=42, # Fixed seed for reproducibility work_dir=work_dir )
[docs] def setup_static_objects(self, model: StructureRigidBodyModel): """ Set up static objects in the model. Args: model: StructureRigidBodyModel to add static objects to """ if 'static_objects' not in self.config or not self.config['static_objects']: logger.info("No static objects specified in configuration") return # Process each static object for obj_name in self.config['static_objects']: # Find structure files psf_file = Path(f"{obj_name}.psf") pdb_file = Path(f"{obj_name}.pdb") if not (psf_file.exists() and pdb_file.exists()): logger.warning(f"Structure files for static object {obj_name} not found: {psf_file}, {pdb_file}") continue # Determine if it's a gigantic object is_gigantic = self.config.get('gigantic_stat_objects', False) logger.info(f"Adding static object: {obj_name} (gigantic: {is_gigantic})") # Create work directory work_dir = Path(self.config.get('parameters_folder', './parameters')) / f"static_{obj_name}" # Add to model model.add_static_object( structure_path=psf_file, # Use PSF as primary file work_dir=work_dir, is_gigantic=is_gigantic, threshold=300 # Default threshold )
[docs] def run_simulation(self, model: StructureRigidBodyModel, engine: SimpleArbdEngine): """ Run the simulation. Args: model: StructureRigidBodyModel to simulate engine: SimpleArbdEngine to use for simulation """ # Set up output directory sim_path = self.config.get('simulation_path', './simulation') output_dir = Path(sim_path) / 'output' output_dir.mkdir(parents=True, exist_ok=True) # Determine number of replicas replicas = self.config.get('num_replicas', 1) # Run simulation logger.info(f"Running simulation with {replicas} replicas") engine.run_simulation( model=model, output_name=Path(self.config_path).stem, replicas=replicas, output_directory=str(output_dir), directory=str(sim_path) )
[docs] def main(): """ Main function to process SimpleARBD configuration file and run simulation. """ parser = argparse.ArgumentParser(description='Process SimpleARBD configuration file') parser.add_argument('config_file', help='Path to SimpleARBD configuration file') parser.add_argument('--setup-only', action='store_true', help='Only set up the simulation, do not run it') args = parser.parse_args() # Parse configuration file try: config = SimpleArbdConfig(args.config_file) except Exception as e: logger.error(f"Error parsing configuration file: {e}") return 1 # Create model and engine model = config.create_model() engine = config.create_engine() # Set up diffusible and static objects config.setup_diffusible_objects(model) config.setup_static_objects(model) if not args.setup_only: # Run simulation config.run_simulation(model, engine) else: logger.info("Setup complete. Simulation not started (--setup-only flag used)") return 0
if __name__ == "__main__": sys.exit(main())