{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "**Import required libraries and scripts**" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Import required libraries and scripts\n", "from scripts.library_preparation import *\n", "from scripts.utilities import *\n", "from scripts.docking_functions import *\n", "from scripts.clustering_functions import *\n", "from scripts.rescoring_functions import *\n", "from scripts.consensus_methods import *\n", "from scripts.performance_calculation import *\n", "from scripts.dogsitescorer import *\n", "from scripts.get_pocket import *\n", "from scripts.postprocessing import *\n", "from scripts.protein_preparation import *" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "**Set up**\n", "- **software**: The path to the software folder. In most cases this is where the DockM8 repository was downloaded to.\n", "- **receptors**: The list of paths to the protein files (.pdb).\n", "- **prepare_proteins**: Whether or not protein files should be prepared using Protoss (True or False).\n", "- **pocket**: The method to use for pocket determination. Must be one of 'reference', 'RoG' or 'dogsitescorer'.\n", "- **ref_file**: The path to the reference ligand used to define the binding pocket (.sdf file).\n", "- **docking_library**: The path to the docking library file (.sdf).\n", "- **idcolumn**: The unique identifier column used in the docking library.\n", "- **conformers**: The method to use for conformer generation, must be one of 'GypsumDL', 'MMFF' or 'RDKit' (RDKit and MMFF are equivalent). \n", "- **protonation**: The method to use for compound protonation. Must be one of 'GypsumDL', 'None'.\n", "- **docking_programs**: The method(s) to use for docking. Must be one or more of 'GNINA', 'SMINA', 'QVINA2', 'QVINAW' or 'PLANTS'.\n", "- **nposes**: The number of poses to generate for each docking software. Default=10\n", "- **exhaustiveness**: The precision used if docking with SMINA/GNINA. Default=8\n", "- **pose_selection**: The method(s) to use for pose clustering. Must be one or more of 'RMSD', 'spyRMSD', 'espsim', 'USRCAT', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_QVINA2', 'bestpose_QVINAW' or 'bestpose_PLANTS'. You can also specify any of the scoring functions to select the poses.\n", "- **clustering_method**: Which algorithm to use for clustering. Must be one of 'KMedoids', 'Aff_prop'. Only valid for the descriptor based pose_selection methods (RMSD, spyRMSD, espsim, USRCAT)\n", "- **rescoring**: A list of scoring functions to use for rescoring. Must be one or more of 'GNINA-Affinity','CNN-Score','CNN-Affinity', 'AD4', 'CHEMPLP', 'RFScoreVS', 'LinF9', 'SCORCH', 'Vinardo', 'PLECScore', 'NNScore', 'KORP-PL', 'ConvexPLR', 'RTMScore', 'AAScore'.\n", "- **consensus**: Which consensus method to use. Must be one of :'ECR_best', 'ECR_avg', 'avg_ECR', 'RbR', 'RbV', 'Zscore_best', 'Zscore_avg'.\n", "We recommend to use the command line or GUI versions of DockM8 to generate decoys." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "CWD = os.getcwd()\n", "software = Path(CWD+'/software')\n", "receptors = [Path(CWD+'/dockm8_testing/1ffv_p.pdb'), Path(CWD+'/dockm8_testing/4kd1_p.pdb')]\n", "pocket = 'Reference'\n", "prepare_protein = True\n", "ref_files = [Path(CWD+'/dockm8_testing/1ffv_l.sdf'), Path(CWD+'/dockm8_testing/4kd1_l.sdf')]\n", "docking_library = Path(CWD+'/dockm8_testing/library.sdf')\n", "id_column = 'ID'\n", "conformers = 'GypsumDL'\n", "protonation = 'GypsumDL'\n", "docking_programs = ['PLANTS']\n", "n_poses = 10\n", "exhaustiveness = 8\n", "pose_selection = 'bestpose_PLANTS'\n", "clustering_method = 'KMedoids'\n", "rescoring_functions = ['GNINA_Affinity','CNN-Score','CNN-Affinity', 'AD4', 'CHEMPLP', 'RFScoreVS']\n", "consensus_method = 'ECR_best'\n", "ncpus = int(os.cpu_count()*0.9)\n", "open('log.txt', 'w').close()\n", "\n", "print('DockM8 is running in ensemble mode...')\n", " \n", "receptor_dict = {}\n", "for i, receptor in enumerate(receptors):\n", " receptor_dict[receptor] = ref_files[i]\n", " \n", "for receptor, ref_file in receptor_dict.items():\n", "\n", " w_dir = Path(receptor).parent / Path(receptor).stem\n", " print('The working directory has been set to:', w_dir)\n", " (w_dir).mkdir(exist_ok=True)\n", " \n", " if prepare_protein == 1:\n", " #Prepare the protein\n", " prepared_receptor = prepare_protein_protoss(receptor)\n", " else:\n", " prepared_receptor = receptor\n", " \n", " #if os.path.isfile(str(receptor).replace('.pdb', '_pocket.pdb')) == False:\n", " if pocket == 'Reference':\n", " pocket_definition = get_pocket(ref_file, prepared_receptor, 8)\n", " print(pocket_definition)\n", " if pocket == 'RoG':\n", " pocket_definition = get_pocket_RoG(ref_file, prepared_receptor)\n", " print(pocket_definition)\n", " elif pocket == 'Dogsitescorer':\n", " pocket_definition = binding_site_coordinates_dogsitescorer(prepared_receptor, w_dir, method='Volume')\n", " print(pocket_definition)\n", " #else:\n", " #pocket_definition = calculate_pocket_coordinates_from_pocket_pdb_file((str(prepared_receptor).replace('.pdb', '_pocket.pdb')))\n", " \n", " if (w_dir / 'final_library.sdf').is_file() == False:\n", " prepare_library(docking_library, w_dir, id_column, conformers, protonation, software, ncpus)\n", " \n", " docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, ncpus, 'concurrent_process')\n", " concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus, bust_poses=False)\n", "\n", " print('Loading all poses SDF file...')\n", " tic = time.perf_counter()\n", " all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)\n", " print(f'Loaded {len(all_poses)} poses.')\n", " toc = time.perf_counter()\n", " print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')\n", "\n", " for method in pose_selection:\n", " if os.path.isfile(w_dir / 'clustering/'f'{method}_clustered.sdf') == False:\n", " select_poses(method, 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)\n", " for method in pose_selection:\n", " rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{method}_clustered.sdf', rescoring_functions , ncpus)\n", " for method in pose_selection:\n", " apply_consensus_methods(w_dir, method, consensus_method, rescoring_functions, 'min_max')\n", "\n", "for method in pose_selection:\n", " ensemble_results = ensemble_consensus(receptors, method, consensus_method, 1)\n", "\n", "print(ensemble_results)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.13 ('wocondock')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "ac9acbc93af693fb4e01b586b9d883cf48eab2850c268069ebbf85c5f9fbe2b9" } } }, "nbformat": 4, "nbformat_minor": 2 }