# parsing the input import json import datetime import socket import re, os import time import sys import random import logging import pymol from pymol import cmd from pymol import stored from chempy.cif import * # logging logger = logging.getLogger() logger.setLevel(logging.DEBUG) # API calls server_root = "https://www.ebi.ac.uk/pdbe/api/" summaryURL = server_root + "pdb/entry/summary/" poly_seq_schemeURL = server_root + "pdb/entry/residue_listing/" protein_mappingURL = server_root + "mappings/" nucleic_mappingURL = server_root + "nucleic_mappings/" # seqURL = server_root + "mappings/sequence_domains/entry/" moleculesURL = server_root + "pdb/entry/molecules/" validationURL = server_root + 'validation/global-percentiles/entry/' validation_residueURL = server_root + 'validation/protein-RNA-DNA-geometry-outlier-residues/entry/' validation_ramaURL = server_root + 'validation/protein-ramachandran-sidechain-outliers/entry/' # ftp site ebi_ftp = 'ftp://ftp.ebi.ac.uk/pub/databases/pdb/data/structures/divided/mmCIF/%s/%s.cif.gz' # clean mmcif updated_ftp = 'https://www.ebi.ac.uk/pdbe/static/entry/%s_updated.cif.gz' # dictionaries stored.domain_dict = {} stored.seq_scheme = {} stored.molecule_dict = {} stored.poly_count = 0 stored.residue_dict = {} stored.ca_p_only = [] stored.assemblies = {} stored.operators = {} stored.cif_items = {} stored.asym_to_chain = {} stored.assembly_objects = {} stored.entities = {} sphere_scale_dict = {'sticks': '0.25', 'spheres': '0.5', 'cartoon': '', 'ribbon': ''} pdb_id_re = re.compile(r'\d[A-z0-9]{3}') mmCIF_file_RE = re.compile(r'mmCIF_file=(.*)') # cmd.set('connect_mode', 4) empty_cif_list = ["", ".", "?", None] class ApiCall: '''functions specific to API calls''' def __init__(self, url): self.url = url self.data = {} def return_data(self): try: import requests except Exception as e: logging.error(e) return self.data self.encode_url() r = requests.get(url=self.url, timeout=60) if r.status_code == 200: json_data = r.json() self.data = json_data elif r.status_code == 404: self.data = {} else: print(r.status_code, r.reason) self.data = {} return self.data def encode_url(self): self.url = re.sub(" ", "%20", self.url) # url handling def url_response_urllib2(url, description): '''try several times to get a response from the API''' data = None try: import urllib2 except Exception as e: logging.error(e) return data data_response = False tries = 1 limit = 5 logging.debug(url) date = datetime.datetime.now().strftime("%Y,%m,%d %H:%M") while tries < limit: try: response = urllib2.urlopen(url, None, 60) except urllib2.HTTPError as e: logging.debug("##############") logging.debug("%s HTTP API error - %s, error code - %s, try %s" % (date, description, e.code, str(tries))) # logging.debug(e.code) if e.code == 404: data_response = True data = {} break else: # logging.debug(entry_url) time.sleep(random.randint(1, 20)) # sleep 1-20 seconds tries += 1 logging.debug(e) data = {} except urllib2.URLError as e: logging.debug("##############") logging.debug("%s URL API error - %s, try %s" % (date, e, str(tries))) # logging.debug(entry_url) time.sleep(random.randint(1, 20)) # sleep 1-20 seconds tries += 1 logging.debug(e) data = {} except socket.timeout as e: logging.debug("##############") logging.debug("%s Timeout API error - %s, try %s" % (date, e, str(tries))) # logging.debug(entry_url) time.sleep(random.randint(1, 20)) # sleep 1-20 seconds tries += 1 logging.debug(e) data = {} else: logging.debug("received a response from the " + description + " API after " + str(tries) + " tries.") data = json.load(response) data_response = True break if not data_response: logging.debug("No response from the %s API" % description) # cmd.quit(1) return data # url handling def url_response(url, description): logging.debug(description) try: import requests data = ApiCall(url=url).return_data() except: data = url_response_urllib2(url=url, description=description) return data def getColor(val, crange=[2, 5], inverted=1, absolute=1, buffer=32): """convert colour value in rgb for pymol""" if absolute == 1: val = abs(val) # Convert value to integer in range [0,510] integercolor = int((val - crange[0]) / (crange[1] - crange[0]) * 510) if integercolor < 0: integercolor = 0 if integercolor > 510: integercolor = 510 if inverted == 1: integercolor = 510 - integercolor # first 255 values are converted in color RED --> YELLOW if integercolor <= 255: red = 255 - buffer green = int(integercolor * (1 - buffer / 255)) blue = 0 # other values are converted in color YELLOW --> GREEN elif (integercolor <= 510): integercolor -= 255 red = int(255 + integercolor * (buffer / 255 - 1)) green = 255 - buffer blue = 0 else: red = 150 green = 150 blue = 150 red = float(red) / 255.0 green = float(green) / 255.0 blue = float(blue) / 255.0 # return color format used for drawing in Pymol colour_list = [] colour_list.append(red) colour_list.append(green) colour_list.append(blue) return colour_list # return "[" + int(red) + ',' + int(green) + ',' + int(blue) + "]" pymolcolours = [ "red", "green", "blue", "yellow", "magenta", "cyan", "tv_red", "tv_green", "tv_blue", "tv_yellow", "lightmagenta", "palecyan", "raspberry", "chartreuse", "marine", "paleyellow", "hotpink", "aquamarine", "darksalmon", "splitpea", "slate", "yelloworange", "pink", "greencyan", "salmon", "smudge", "lightblue", "limon", "lightpink", "teal", "deepsalmon", "palegreen", "skyblue", "wheat", "dirtyviolet", "deepteal", "warmpink", "limegreen", "purpleblue", "sand", "violet", "lightteal", "firebrick", "lime", "deepblue", "violetpurple", "ruby", "limon", "density", "purple", "chocolate", "forest", "deeppurple", "brown", ] validation_background_colour = [0.4, 1.0, 0.4] cmd.set_color("validation_green_background", validation_background_colour) validation_colours = ['yellow', 'orange', 'red'] angle_bond_dict = {'angles': {'description': 'bond angles', 'colour': 'yellow', 'display': 'spheres'}, 'bonds': {'description': 'bond length', 'colour': 'red', 'display': 'spheres'}} rama_dict = {'ramachandran_outliers': {'selection': ' and name C+CA+N+O', 'colour': 'yellow', 'display': 'spheres'}, 'sidechain_outliers': {'selection': ' and not name N+CA+C+O', 'colour': 'red', 'display': 'spheres'}} def colour_return(num): colournum = "" factor = 1 while (num >= len(pymolcolours)): # print (num) num = num - (len(pymolcolours) * factor) # print (num) factor = factor + 1 else: colournum = num colour = pymolcolours[colournum] return colour def poly_display_type(asym, mol_type, length): """return either ribbon or cartoon depending on number of polymer chains""" ##start with it being cartoon - then change as needed. display_type = "cartoon" if stored.poly_count > 50: display_type = "ribbon" elif asym in stored.ca_p_only: print('set ribbon trace on') cmd.set('ribbon_trace_atoms', 1) display_type = "ribbon" else: if re.search('polypeptide', mol_type): if length < 20: display_type = 'sticks' if re.search('nucleotide', mol_type): if length < 3: display_type = 'sticks' if re.search('saccharide', mol_type): display_type = 'sticks' # print ('asym: %s, mol_type: %s, length: %s, display_type: %s' %(asym, mol_type, length, display_type)) return display_type class worker_functions(): def trans_setting(self, selection, trans_value): cmd.set("cartoon_transparency", trans_value, selection) cmd.set("ribbon_transparency", trans_value, selection) cmd.set("stick_transparency", trans_value, selection) cmd.set("sphere_transparency", trans_value, selection) def count_poly(self, pdbid): url = moleculesURL + pdbid # global molecule_dict if stored.molecule_dict == {}: stored.molecule_dict = url_response(url, 'molecules') if pdbid in stored.molecule_dict: for entity in stored.molecule_dict[pdbid]: # add ca only list if entity['ca_p_only'] != False: for a in entity['in_struct_asyms']: if a not in stored.ca_p_only: stored.ca_p_only.append(a) if entity['molecule_type'] not in ['water', 'bound']: stored.poly_count += 1 def poly_seq_scheme(pdbid): """build a dictionary like poly_seq_scheme""" url = poly_seq_schemeURL + pdbid data = url_response(url, 'seq_scheme') if pdbid in data: for entity in data[pdbid]['molecules']: for chain in entity['chains']: chain_id = chain['chain_id'] asym_id = chain['struct_asym_id'] for residue in chain['residues']: CIFnum = residue['residue_number'] PDBnum = residue['author_residue_number'] PDBinsCode = residue['author_insertion_code'] residueName = residue['residue_name'] if residue['observed_ratio'] != 0: PDBobs = True else: PDBobs = False try: stored.seq_scheme[asym_id].update({CIFnum: {'PDBnum': PDBnum, 'PDBinsCode': PDBinsCode, 'observed': PDBobs, 'residueName': residueName, 'chainID': chain_id}}) except: stored.seq_scheme.update({asym_id: {}}) stored.seq_scheme[asym_id].update({CIFnum: {'PDBnum': PDBnum, 'PDBinsCode': PDBinsCode, 'observed': PDBobs, 'residueName': residueName, 'chainID': chain_id}}) # pprint(seq_scheme) def asym_from_entity(pdbid, entity_id): url = moleculesURL + pdbid # global molecule_dict if stored.molecule_dict == {}: stored.molecule_dict = url_response(url, 'molecules') asym_id_list = [] if pdbid in stored.molecule_dict: # print (entity_id) for entity in stored.molecule_dict[pdbid]: # add ca only list if entity['ca_p_only'] != False: for a in entity['in_struct_asyms']: if a not in stored.ca_p_only: stored.ca_p_only.append(a) # pprint(entity['entity_id']) for a in entity['in_struct_asyms']: if str(entity_id) == str(entity['entity_id']): asym_id_list.append(str(a)) # print (asym_id_list) return asym_id_list def check_range_observed(asym_id, start, end, unobs): range_dict = [] firstResidue = 1 lastResidue = 1 for r in stored.seq_scheme[asym_id]: if r <= firstResidue: firstResidue = r if r >= lastResidue: lastResidue = r if end > lastResidue: # print ('ERROR: residue %s from SIFTS is greater than the lastSeqRes number %s' %(end, lastSeqRes)) end = lastResidue # print ("first residue: %s, last residue %s" %(firstResidue, lastResidue)) # print (seq_scheme[asym_id][start]) if start in stored.seq_scheme[asym_id]: chain = stored.seq_scheme[asym_id][start]['chainID'] while stored.seq_scheme[asym_id][start]['observed'] == False: # print ("PDB start is not observed, adding 1 to residue number %s" %(start)) if start == lastResidue: # print ("domain not observed") unobs = True break else: start += 1 if start == lastResidue: # print ("domain not observed") unobs = True break elif start == end: # print ("domain not observed") unobs = True break if end in stored.seq_scheme[asym_id]: while stored.seq_scheme[asym_id][end]['observed'] == False: # print ("PDB end is not observed, minusing 1 from residue number %s" %(end)) if end == firstResidue: # print ("domain not observed") unobs = True break else: end -= 1 if end == firstResidue: # print ("domain not observed") unobs = True break elif start == end: # print ("domain not observed") unobs = True break if unobs == False: # print ("CIF start: %s, CIF end %s" %(start, end)) # print ("PDB start: %s, PDB end %s" %(PDBstart, PDBend)) # need to do something about non continuous ranges which pymol doesn't cope with. """find all sections where the residue number increases by one. If it jumps then store this as a separate residue block while number is less than endbegin with number = start plus 1. see how the numbering differs from startif its 1 then its continuous - move to the next residue if its zero then there must be insert codes - this is ok - move to the next residue if its greater than 1 then store the previous residues as a block. """ currentCIF = start blockStartCIF = start nextCIF = start + 1 while nextCIF < end: currentPDB = stored.seq_scheme[asym_id][currentCIF]['PDBnum'] nextPDB = stored.seq_scheme[asym_id][nextCIF]['PDBnum'] if currentPDB not in empty_cif_list: if nextPDB in empty_cif_list: startPDB = stored.seq_scheme[asym_id][blockStartCIF]['PDBnum'] swap = checkOrder(startPDB, currentPDB) startPDBins = insert_code(asym_id, blockStartCIF) currentPDB = insert_code(asym_id, currentCIF) if swap == False: range_dict.append({"PDBstart": startPDBins, "PDBend": currentPDB, "chainID": chain}) else: range_dict.append({"PDBstart": currentPDB, "PDBend": startPDBins, "chainID": chain}) # move start to next block blockStartCIF = nextCIF else: currentPDB = int(stored.seq_scheme[asym_id][currentCIF]['PDBnum']) nextPDB = int(stored.seq_scheme[asym_id][nextCIF]['PDBnum']) if nextPDB - currentPDB > 1: # print ("numbering not continues, positive jump - store as section") startPDB = stored.seq_scheme[asym_id][blockStartCIF]['PDBnum'] swap = checkOrder(startPDB, currentPDB) startPDBins = insert_code(asym_id, blockStartCIF) currentPDB = insert_code(asym_id, currentCIF) if swap == False: range_dict.append({"PDBstart": startPDBins, "PDBend": currentPDB, "chainID": chain}) else: range_dict.append({"PDBstart": currentPDB, "PDBend": startPDBins, "chainID": chain}) # move start to next block blockStartCIF = nextCIF elif currentPDB - nextPDB > 1: # print ("numbering not continues, negative jump - store as section") startPDB = stored.seq_scheme[asym_id][blockStartCIF]['PDBnum'] swap = checkOrder(startPDB, currentPDB) startPDBins = insert_code(asym_id, blockStartCIF) currentPDB = insert_code(asym_id, currentCIF) if swap == False: range_dict.append({"PDBstart": startPDBins, "PDBend": currentPDB, "chainID": chain}) else: range_dict.append({"PDBstart": currentPDB, "PDBend": startPDBins, "chainID": chain}) # move start to next block blockStartCIF = nextCIF else: blockStartCIF = nextCIF nextCIF += 1 currentCIF += 1 startPDB = stored.seq_scheme[asym_id][blockStartCIF]['PDBnum'] swap = checkOrder(blockStartCIF, nextCIF) startPDBins = insert_code(asym_id, blockStartCIF) nextPDB = insert_code(asym_id, nextCIF) if swap == False: range_dict.append({"PDBstart": startPDBins, "PDBend": nextPDB, "chainID": chain}) else: range_dict.append({"PDBstart": nextPDB, "PDBend": startPDBins, "chainID": chain}) # pprint(range_dict) else: print("domain unobserved") return range_dict def checkOrder(start, end): swap = False # print ('PRE: start: %s, end %s, swap %s' %(start, end, swap)) if type(start) is str: start = start.split('\\')[-1] if type(end) is str: end = end.split('\\')[-1] if int(start) > int(end): swap = True # print ('POST: start: %s, end: %s, swap %s' %(start, end, swap)) return swap def insert_code(asym_id, CIFnum): if stored.seq_scheme[asym_id][CIFnum]['PDBinsCode'] == None: PDBnum = stored.seq_scheme[asym_id][CIFnum]['PDBnum'] else: PDBnum = "%s%s" % ( stored.seq_scheme[asym_id][CIFnum]['PDBnum'], stored.seq_scheme[asym_id][CIFnum]['PDBinsCode']) if re.search('-', str(PDBnum)): PDBnum = re.sub('-', '\-', str(PDBnum)) return PDBnum polymeric_types = [''] class Validation: def launch_validation(self, pdbid): url = validationURL + pdbid val_data = url_response(url, 'validation') if val_data: print('There is validation for this entry') url = validation_residueURL + pdbid res_data = url_response(url, 'residue validation') url = validation_ramaURL + pdbid rama_data = url_response(url, 'rama validation') self.per_residue_validation(pdbid, res_data, rama_data) else: print("No validation for this entry") ###this takes too long for really large entries. # Validation().per_chain_per_residue_validation(pdbid, res_data, rama_data) """validation of all polymeric enties""" def validation_selection(self, selection, display_id): # print (selection) # uses a list so 0 means that there is one outlier for this residue. if selection in stored.residue_dict: colour_num = stored.residue_dict[selection] + 1 stored.residue_dict[selection] = colour_num else: stored.residue_dict.update({selection: 0}) colour_num = 0 if colour_num > 2: colour_num = 2 residue_colour = validation_colours[colour_num] # print (residue_colour) cmd.color(residue_colour, selection) def geometric_validation(self, pdbid, res_data, chain=False, model=1): """check for geometric validation outliers in res_data """ if res_data: if pdbid in res_data: if 'molecules' in res_data[pdbid]: if res_data[pdbid]['molecules']: for entity in res_data[pdbid]['molecules']: # print (entity) entity_id = entity['entity_id'] for chain in entity['chains']: chain_id = chain['chain_id'] # print (chain_id) for model in chain['models']: model_id = int(model['model_id']) for outlier_type in model['outlier_types']: outliers = model['outlier_types'][outlier_type] # print (outlier_type) # print (outliers) for outlier in outliers: PDB_res_num = outlier['author_residue_number'] PDB_ins_code = outlier['author_insertion_code'] alt_code = outlier['alt_code'] if PDB_ins_code not in [None, " "]: PDB_res_num = '%s%s' % (PDB_res_num, PDB_ins_code) # print (PDB_res_num) selection = "chain %s and resi %s" % (chain_id, PDB_res_num) if model == 1 or model_id: if chain == False or chain_id: self.validation_selection(selection, pdbid) else: print("no residue validation for this entry") def ramachandran_validation(self, pdbid, rama_data, chain=False, model=1): """display ramachandran outliers""" if rama_data: if pdbid in rama_data: for k in rama_data[pdbid]: v = rama_data[pdbid][k] if v: print("ramachandran %s for this entry" % k) for outlier in rama_data[pdbid][k]: # print (outlier) entity_id = outlier['entity_id'] model_id = int(outlier['model_id']) chain_id = outlier['chain_id'] PDB_res_num = outlier['author_residue_number'] PDB_ins_code = outlier['author_insertion_code'] alt_code = outlier['alt_code'] if PDB_ins_code not in [None, " "]: PDB_res_num = '%s%s' % (PDB_res_num, PDB_ins_code) selection = "chain %s and resi %s" % (chain_id, PDB_res_num) if model == 1 or model_id: if chain == False or chain_id: self.validation_selection(selection, pdbid) else: print("is multimodel!!!, outlier not in model 1, not shown.") else: print("no %s" % (k)) def per_residue_validation(self, pdbid, res_data, rama_data): """validation of all outliers, coloured by number of outliers""" # display only polymers if stored.molecule_dict == {}: url = moleculesURL + pdbid stored.molecule_dict = url_response(url, 'molecules') if pdbid in stored.molecule_dict: for entity in stored.molecule_dict[pdbid]: # pprint(entity) mol_type = entity['molecule_type'] ca_only_list = [] if entity['ca_p_only'] != False: ca_only_list = entity['ca_p_only'] if mol_type not in ['Water', 'Bound']: for a in entity['in_struct_asyms']: if stored.seq_scheme == {}: poly_seq_scheme(pdbid) unobs = False start = 1 end = entity['length'] length = entity['length'] asym_id = a display_type = poly_display_type(asym_id, 'polypeptide', length) # print (asym_id) x = check_range_observed(asym_id, start, end, unobs) if x: for y in x: start = y['PDBstart'] end = y['PDBend'] chain = y['chainID'] selection = "chain %s and resi %s-%s and %s" % (chain, start, end, pdbid) # print (selection) cmd.show(display_type, selection) cmd.colour("validation_green_background", pdbid) # display(pdbid, image_type) cmd.enable(pdbid) self.geometric_validation(pdbid, res_data) self.ramachandran_validation(pdbid, rama_data) # pprint(stored.residue_dict) def entities(pdbid): print('Display entities') cmd.set("cartoon_transparency", 0.3, pdbid) cmd.set("ribbon_transparency", 0.3, pdbid) url = moleculesURL + pdbid num = 1 if stored.molecule_dict == {}: stored.molecule_dict = url_response(url, 'molecules') if pdbid in stored.molecule_dict: for entity in stored.molecule_dict[pdbid]: entity_name = '' if entity['molecule_name']: if len(entity['molecule_name']) > 1: for instance, mol in enumerate(entity['molecule_name']): if mol != entity['molecule_name'][-1]: mol = mol + "-" entity_name += mol else: entity_name += mol entity_name = entity_name + '_chimera' else: entity_name = entity['molecule_name'][0] else: print("No name from the API") entity_name = "entity_%s" % (entity['entity_id']) # print ('entity %s: %s' %(entity['entity_id'], entity_name)) # print (entity['entity_id']) ####see if its polymeric mol_type = entity['molecule_type'] # entity_name = entity['molecule_name'] if entity_name: objectName = re.sub(' ', '_', entity_name) objectName = re.sub(r'\W+', "", objectName) else: objectName = "entity_%s" % (entity['entity_id']) # print (objectName) display_type = "" object_selection = [] pymol_selection = "" if mol_type != 'Water': # use asym to find residue number and chain ID for a in entity['in_struct_asyms']: if stored.seq_scheme == {}: poly_seq_scheme(pdbid) if mol_type == 'Bound': ##bound molecules have no CIFresidue number and this is defaulted to 1 in the API # print (entity_name) # print (stored.seq_scheme[a]) for res in stored.seq_scheme[a]: # print (stored.seq_scheme[a][res]) short = stored.seq_scheme[a][res] chain = short['chainID'] res = "" display_type = 'spheres' # print (short) if short['PDBinsCode'] == None: res = str(short['PDBnum']) else: res = str(short['PDBnum']) + short['PDBinsCode'] selection = "chain %s and resi %s and %s" % (chain, res, pdbid) object_selection.append(selection) # print (selection) else: ###find the first and last residues unobs = False start = 1 end = entity['length'] asym_id = a ###need to work out if cartoon is the right thing to display length = entity['length'] display_type = poly_display_type(asym_id, mol_type, length) # print (asym_id) x = check_range_observed(asym_id, start, end, unobs) # print (x) if x: for y in x: start = y['PDBstart'] end = y['PDBend'] chain = y['chainID'] selection = "chain %s and resi %s-%s and %s" % (chain, start, end, pdbid) # print (selection) object_selection.append(selection) for o in object_selection: if len(object_selection) > 1: if o == object_selection[-1]: pymol_selection += "(%s)" % (o) else: pymol_selection += "(%s) or " % (o) else: pymol_selection += "%s" % (o) try: stored.entities[pdbid]['entity'][entity['entity_id']]['selection']['selection_list'].append( pymol_selection) print('appended %s' % (pymol_selection)) except: stored.entities.setdefault(pdbid, {}) \ .setdefault('entity', {}) \ .setdefault(entity['entity_id'], {}) \ .setdefault('selection', {}) \ .update({'display_type': display_type, 'selection_list': [pymol_selection]}) # print (pymol_selection) if len(objectName) > 250: objectName = objectName[:249] cmd.select("test_select", pymol_selection) cmd.create(objectName, "test_select") # print (display_type) cmd.show(display_type, objectName) # colour by entity. colour = colour_return(int(entity['entity_id'])) # print (colour) cmd.color(colour, objectName) cmd.delete("test_select") # cmd.show("cartoon", pdbid) def show_assemblies(pdbid, mmCIF_file): """iterate through the assemblies and output images""" print('Generating assemblies') # cmd.hide('everything') worker_functions().trans_setting('all', 0.0) try: assemblies = cmd.get_assembly_ids(pdbid) # list or None print(assemblies) if assemblies: for a_id in assemblies: print("Assembly: %s" % (a_id)) assembly_name = pdbid + '_assem_' + a_id print(assembly_name) cmd.set('assembly', a_id) cmd.load(mmCIF_file, assembly_name, format='cif') print('finished Assembly: %s' % (a_id)) except: print('pymol version does not support assemblies') def mapping(pdbid): """make domain objects""" if not stored.seq_scheme: poly_seq_scheme(pdbid) domain_to_make = ['CATH', 'SCOP', 'Pfam', 'Rfam'] segment_identifier = {'CATH': 'domain', 'SCOP': 'scop_id', 'Pfam': '', 'Rfam': ''} protein_url = protein_mappingURL + pdbid nucleic_url = nucleic_mappingURL + pdbid protein_domains = url_response(protein_url, "protein_domains") nucleic_domains = url_response(nucleic_url, "nucleic_domains") for data in [protein_domains, nucleic_domains]: if data == {}: print("no domain information for this entry") else: for domain_type in data[pdbid]: if domain_type in domain_to_make: for domain in data[pdbid][domain_type]: if data[pdbid][domain_type][domain]['mappings']: identifier = data[pdbid][domain_type][domain]['identifier'] # print (domain_type) # print (domain) for mapping in data[pdbid][domain_type][domain]['mappings']: unobs = False PDBstart = "" PDBend = "" domain_segment_id = "" domain_name = "" # check segment id if 'segment_id' in mapping: domain_segment_id = mapping['segment_id'] else: # force it to be one if its missing domain_segment_id = 1 if segment_identifier[domain_type] in mapping: domain_name = str(mapping[segment_identifier[domain_type]]) # print ("%s: %s" %(domain_name, domain_segment_id)) # then check it exits start = mapping['start']['residue_number'] end = mapping['end']['residue_number'] chain = mapping['chain_id'] entity_id = mapping['entity_id'] asym_id = mapping['struct_asym_id'] if asym_id in stored.seq_scheme: x = check_range_observed(asym_id, start, end, unobs) if x: for y in x: start = y['PDBstart'] end = y['PDBend'] try: stored.domain_dict[domain_type][domain][domain_name].append( {'asym_id': asym_id, 'chain': chain, 'start': start, 'end': end, 'entity_id': entity_id}) except: stored.domain_dict.setdefault(domain_type, {}) \ .setdefault(domain, {}) \ .update({domain_name: [ {'asym_id': asym_id, 'chain': chain, 'start': start, 'end': end, 'entity_id': entity_id}]}) def domains(pdbid): mapping(pdbid) obj_dict = {} chain_dict = {} if stored.domain_dict: # pprint(domain_dict) for domain_type in stored.domain_dict: for domain in stored.domain_dict[domain_type]: # print (domain) for instance in stored.domain_dict[domain_type][domain]: asym_list = [] entity_list = [] # print (instance) object_selection = [] pymol_selection = "" for segment in stored.domain_dict[domain_type][domain][instance]: PDBstart = segment['start'] PDBend = segment['end'] chain = segment['chain'] asym_id = segment['asym_id'] entity_id = segment['entity_id'] asym_list.append(asym_id) entity_list.append(entity_id) chain_dict.update({asym_id: {'chain': chain, 'entity_id': entity_id}}) selection = "chain %s and resi %s-%s and %s" % (chain, PDBstart, PDBend, pdbid) object_selection.append(selection) for o in object_selection: if len(object_selection) > 1: if o == object_selection[-1]: pymol_selection += "(%s)" % (o) else: pymol_selection += "(%s) or " % (o) else: pymol_selection += "%s" % (o) # print (pymol_selection) objectName = "%s_%s_%s" % (domain_type, domain, instance) obj_dict.update({objectName: {'asym_list': asym_list, 'entity_list': entity_list}}) cmd.select("test_select", pymol_selection) cmd.create(objectName, "test_select") for chain in chain_dict: print(chain) c_select = "chain %s and %s" % (chain_dict[chain]['chain'], pdbid) entity_id = chain_dict[chain]['entity_id'] length = None for entity in stored.molecule_dict[pdbid]: if entity_id == entity['entity_id']: length = entity['length'] display_type = poly_display_type(chain, 'polypeptide', length) cmd.show(display_type, c_select) cmd.color("grey", c_select) num = 1 # print (obj_dict) for obj in obj_dict: # print (obj) cmd.enable(obj) # domain can span multiple asym_id's, could be different type for i, a in enumerate(obj_dict[obj]['asym_list']): entity_id = obj_dict[obj]['entity_list'][i] length = None for entity in stored.molecule_dict[pdbid]: if entity_id == entity['entity_id']: length = entity['length'] display_type = poly_display_type(a, 'polypeptide', length) cmd.show(display_type, obj) colour = colour_return(num) # print (colour) cmd.color(colour, obj) num += 1 cmd.delete("test_select") def PDBe_startup(pdbid, method, mmCIF_file=None, file_path=None): if pdbid: pdbid = pdbid.lower() if mmCIF_file: filename = os.path.basename(mmCIF_file) if re.search('_', filename): pdbid = re.split('_', filename)[0].lower() else: pdbid = re.split('\.', filename)[0].lower() try: cmd.set('cif_keepinmemory') except: print('version of pymol does not support keeping all cif items') # check the PDB code actually exists. url = summaryURL + pdbid summary = url_response(url, "summary") if summary: # clear the dictionaries stored.domain_dict = {} stored.seq_scheme = {} stored.molecule_dict = {} stored.residue_dict = {} print('pdbid: %s' % (pdbid)) mid_pdb = pdbid[1:3] obj_list = cmd.get_object_list('all') if pdbid not in obj_list: if mmCIF_file: file_path = mmCIF_file else: # if local file exists then use it if os.path.exists("%s.cif" % (pdbid)): print("CIF found in current directory") file_path = "%s.cif" % (pdbid) else: print("fetching %s" % (pdbid)) try: # connect mode 4 works only with version 1.7 of pymol cmd.set('assembly', '') file_path = updated_ftp % pdbid print('setting connect mode to mode 4') cmd.set('connect_mode', 4) except: print('pymol version does not support assemblies or connect mode 4') file_path = ebi_ftp % (mid_pdb, pdbid) print('File to load: %s' % file_path) cmd.load(file_path, pdbid, format="cif") cmd.hide("everything", pdbid) worker_functions().count_poly(pdbid) if method == "entity": entities(pdbid) if method == "domains": entities(pdbid) domains(pdbid) elif method == "validation": Validation().launch_validation(pdbid) elif method == 'assemblies': entities(pdbid) show_assemblies(pdbid, file_path) elif method == 'show_assembly': show_assemblies(pdbid, file_path) elif method == 'all': entities(pdbid) domains(pdbid) show_assemblies(pdbid, file_path) Validation().launch_validation(pdbid) else: print("provide a method") cmd.zoom(pdbid, complete=1) elif mmCIF_file: print('no PDB ID, show assemblies from mmCIF file') cmd.load(mmCIF_file, pdbid, format="cif") # worker_functions().count_poly(pdbid) show_assemblies(pdbid, mmCIF_file) else: print("please provide a 4 letter PDB code") def __init__(self): try: # Simply add the menu entry and callback self.menuBar.addmenuitem('Plugin', 'command', 'PDB Loader', label='PDB Analysis - All', command=lambda s=self: PDBeLoaderDialog(s)) self.menuBar.addmenuitem('Plugin', 'command', 'PDB Molecules', label='PDB Analysis - Molecules', command=lambda s=self: PDBeEntityDialog(s)) self.menuBar.addmenuitem('Plugin', 'command', 'PDB Domains', label='PDB Analysis - Domains', command=lambda s=self: PDBeDomainDialog(s)) self.menuBar.addmenuitem('Plugin', 'command', 'PDB Validation', label='PDB Analysis - Validation', command=lambda s=self: PDBeValidationDialog(s)) self.menuBar.addmenuitem('Plugin', 'command', 'Assemblies', label='PDB Analysis - Assemblies', command=lambda s=self: PDBeAssemblyDialog(s)) except Exception as e: logging.error('unable to make menu items') logging.error(e) def PDBeLoaderDialog(app): import tkSimpleDialog pdbCode = tkSimpleDialog.askstring('PDB Loader Service', 'Highlight chemically distinct molecules, domains and assemblies in a PDB entry. Please enter a 4-digit pdb code:', parent=app.root) PDBe_startup(pdbCode, "all") def PDBeEntityDialog(app): import tkSimpleDialog pdbCode = tkSimpleDialog.askstring('PDB Loader Service', 'Highlight chemically distinct molecules in a PDB entry. Please enter a 4-digit pdb code:', parent=app.root) PDBe_startup(pdbCode, "entity") def PDBeDomainDialog(app): import tkSimpleDialog pdbCode = tkSimpleDialog.askstring('PDB Loader Service', 'Display Pfam, SCOP, CATH and Rfam domains on a PDB entry. Please enter a 4-digit pdb code:', parent=app.root) PDBe_startup(pdbCode, "domains") def PDBeValidationDialog(app): import tkSimpleDialog pdbCode = tkSimpleDialog.askstring('PDB Loader Service', 'Display geometric outliers on a PDB entry. Please enter a 4-digit pdb code:', parent=app.root) PDBe_startup(pdbCode, "validation") def PDBeAssemblyDialog(app): import tkSimpleDialog pdbCode = tkSimpleDialog.askstring('PDB Loader Service', 'Display assemblies for a PDB entry. Please enter a 4-digit pdb code:', parent=app.root) PDBe_startup(pdbCode, "show_assembly") def PDBe_entities(pdbid): """launch PDBe entity""" PDBe_startup(pdbid, "entity") def PDBe_domains(pdbid): PDBe_startup(pdbid, "domains") def PDBe_validation(pdbid): PDBe_startup(pdbid, "validation") def PDBe_assemblies(pdbid): PDBe_startup(pdbid, "show_assembly") pymol.cmd.extend("PDB_Analysis_Molecules", PDBe_entities) pymol.cmd.extend("PDB_Analysis_Domains", PDBe_domains) pymol.cmd.extend("PDB_Analysis_Validation", PDBe_validation) pymol.cmd.extend("PDB_Analysis_Assemblies", PDBe_assemblies) def chain_append(chain): if chain not in stored.chain: pymol.stored.chain.append(chain) def count_chain(selection): pymol.stored.chain = [] pymol.cmd.iterate(selection, 'chain_append(chain)') print(stored.chain) print("Number of chains: %s" % (len(stored.chain))) pymol.cmd.extend("count_chain", count_chain) def usage(): usage = """ Usage pymol PDB_plugin.py mmCIF_file_name """ print((usage)) # pymol.cmd.quit() if __name__ == '__main__': mmCIF_file = None pdbid = None if len(sys.argv) > 1: print(sys.argv) for arr in sys.argv: if pdb_id_re.match(arr): pdbid = pdb_id_re.match(arr).group(0) if mmCIF_file_RE.match(arr): mmCIF_file = mmCIF_file_RE.match(arr).group(1) if pdbid or mmCIF_file: print('pdbid: %s' % pdbid) print('mmCIF file: %s' % mmCIF_file) PDBe_startup(pdbid, 'all', mmCIF_file=mmCIF_file) else: print("Please provide a pdbid or mmCIF_file=FILE after -- ")