**Import required libraries and scripts**

In [None]:
#Import required libraries and scripts
from scripts.library_preparation import *
from scripts.utilities import *
from scripts.docking_functions import *
from scripts.clustering_functions import *
from scripts.rescoring_functions import *
from scripts.consensus_methods import *
from scripts.performance_calculation import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
from scripts.postprocessing import *
from scripts.protein_preparation import *

**Set up**
- **software**: The path to the software folder. In most cases this is where the DockM8 repository was downloaded to.
- **receptors**: The list of paths to the protein files (.pdb).
- **prepare_proteins**: Whether or not protein files should be prepared using Protoss (True or False).
- **pocket**: The method to use for pocket determination. Must be one of 'reference', 'RoG' or 'dogsitescorer'.
- **ref_file**: The path to the reference ligand used to define the binding pocket (.sdf file).
- **docking_library**: The path to the docking library file (.sdf).
- **idcolumn**: The unique identifier column used in the docking library.
- **conformers**: The method to use for conformer generation, must be one of 'GypsumDL', 'MMFF' or 'RDKit' (RDKit and MMFF are equivalent). 
- **protonation**: The method to use for compound protonation. Must be one of 'GypsumDL', 'None'.
- **docking_programs**: The method(s) to use for docking. Must be one or more of 'GNINA', 'SMINA', 'QVINA2', 'QVINAW' or 'PLANTS'.
- **nposes**: The number of poses to generate for each docking software. Default=10
- **exhaustiveness**: The precision used if docking with SMINA/GNINA. Default=8
- **pose_selection**: The method(s) to use for pose clustering. Must be one or more of 'RMSD', 'spyRMSD', 'espsim', 'USRCAT', '3DScore', 'bestpose', 'bestpose_GNINA', 'bestpose_SMINA', 'bestpose_QVINA2', 'bestpose_QVINAW' or 'bestpose_PLANTS'. You can also specify any of the scoring functions to select the poses.
- **clustering_method**: Which algorithm to use for clustering. Must be one of 'KMedoids', 'Aff_prop'. Only valid for the descriptor based pose_selection methods (RMSD, spyRMSD, espsim, USRCAT)
- **rescoring**: A list of scoring functions to use for rescoring. Must be one or more of 'GNINA-Affinity','CNN-Score','CNN-Affinity', 'AD4', 'CHEMPLP', 'RFScoreVS', 'LinF9', 'SCORCH', 'Vinardo', 'PLECScore', 'NNScore', 'KORP-PL', 'ConvexPLR', 'RTMScore', 'AAScore'.
- **consensus**: Which consensus method to use. Must be one of :'ECR_best', 'ECR_avg', 'avg_ECR', 'RbR', 'RbV', 'Zscore_best', 'Zscore_avg'.
We recommend to use the command line or GUI versions of DockM8 to generate decoys.

In [None]:
CWD = os.getcwd()
software = Path(CWD+'/software')
receptors = [Path(CWD+'/dockm8_testing/1ffv_p.pdb'), Path(CWD+'/dockm8_testing/4kd1_p.pdb')]
pocket = 'Reference'
prepare_protein = True
ref_files = [Path(CWD+'/dockm8_testing/1ffv_l.sdf'), Path(CWD+'/dockm8_testing/4kd1_l.sdf')]
docking_library = Path(CWD+'/dockm8_testing/library.sdf')
id_column = 'ID'
conformers = 'GypsumDL'
protonation = 'GypsumDL'
docking_programs = ['PLANTS']
n_poses = 10
exhaustiveness = 8
pose_selection = 'bestpose_PLANTS'
clustering_method = 'KMedoids'
rescoring_functions = ['GNINA_Affinity','CNN-Score','CNN-Affinity', 'AD4', 'CHEMPLP', 'RFScoreVS']
consensus_method = 'ECR_best'
ncpus = int(os.cpu_count()*0.9)
open('log.txt', 'w').close()

print('DockM8 is running in ensemble mode...')
 
receptor_dict = {}
for i, receptor in enumerate(receptors):
 receptor_dict[receptor] = ref_files[i]
 
for receptor, ref_file in receptor_dict.items():

 w_dir = Path(receptor).parent / Path(receptor).stem
 print('The working directory has been set to:', w_dir)
 (w_dir).mkdir(exist_ok=True)
 
 if prepare_protein == 1:
 #Prepare the protein
 prepared_receptor = prepare_protein_protoss(receptor)
 else:
 prepared_receptor = receptor
 
 #if os.path.isfile(str(receptor).replace('.pdb', '_pocket.pdb')) == False:
 if pocket == 'Reference':
 pocket_definition = get_pocket(ref_file, prepared_receptor, 8)
 print(pocket_definition)
 if pocket == 'RoG':
 pocket_definition = get_pocket_RoG(ref_file, prepared_receptor)
 print(pocket_definition)
 elif pocket == 'Dogsitescorer':
 pocket_definition = binding_site_coordinates_dogsitescorer(prepared_receptor, w_dir, method='Volume')
 print(pocket_definition)
 #else:
 #pocket_definition = calculate_pocket_coordinates_from_pocket_pdb_file((str(prepared_receptor).replace('.pdb', '_pocket.pdb')))
 
 if (w_dir / 'final_library.sdf').is_file() == False:
 prepare_library(docking_library, w_dir, id_column, conformers, protonation, software, ncpus)
 
 docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, n_poses, ncpus, 'concurrent_process')
 concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus, bust_poses=False)

 print('Loading all poses SDF file...')
 tic = time.perf_counter()
 all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
 print(f'Loaded {len(all_poses)} poses.')
 toc = time.perf_counter()
 print(f'Finished loading all poses SDF in {toc-tic:0.4f}!...')

 for method in pose_selection:
 if os.path.isfile(w_dir / 'clustering/'f'{method}_clustered.sdf') == False:
 select_poses(method, 'KMedoids', w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)
 for method in pose_selection:
 rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{method}_clustered.sdf', rescoring_functions , ncpus)
 for method in pose_selection:
 apply_consensus_methods(w_dir, method, consensus_method, rescoring_functions, 'min_max')

for method in pose_selection:
 ensemble_results = ensemble_consensus(receptors, method, consensus_method, 1)

print(ensemble_results)
