Source code for ndb_adapter.ndb_download
import os
import zlib
from enum import Enum
from io import BytesIO
import requests
from ndb_adapter.ndb_base import NDBBase
class _Pdb(NDBBase):
"""Helper class for pdb file download
:cvar Url: url to file ftp
:cvar Ext: file extension
:cvar UrlExt: url file extension
:cvar PreName: url name prefix
:cvar PostName: url name postfix
"""
Url = NDBBase._chiralCorrectUrl
Ext = ".ent"
UrlExt = ".ent.gz"
PreName = 'pdb'
PostName = ''
class _PdbNmr(_Pdb):
"""Helper class for pdb nmr file download"""
Url = NDBBase._nmrUrl
class _PdbBioAssembly(NDBBase):
"""Helper class for pdb biological assembly file download"""
Url = NDBBase._bioAssemblyCoordinateUrl
Ext = ".pdb"
UrlExt = ".pdb"
PreName = ''
PostName = ''
class _Cif(NDBBase):
"""Helper class for cif file download"""
Url = NDBBase._mmCifUrl
Ext = ".cif"
UrlExt = ".cif.gz"
PreName = ''
PostName = ''
class _CifNmr(_Cif):
"""Helper class for cif nmr file download"""
Url = NDBBase._nmrMmCifUrl
class _CifStructureFactors(_Pdb):
"""Helper class for cif structure factors file download"""
Url = NDBBase._structureFactorsUrl
PreName = 'r'
PostName = 'sf'
class _CifNmrRestraints(NDBBase):
"""Helper class for nmr restraints file download"""
Url = NDBBase._nmrRestraintsUrl
Ext = ".mr"
UrlExt = ".mr.gz"
PreName = ''
PostName = ''
class _XmlComplete(NDBBase):
"""Helper class for Xml complete file download"""
Url = NDBBase._xmlCompleteUrl
Ext = ".xml"
UrlExt = ".xml.gz"
PreName = ''
PostName = ''
class _XmlCoordinates(_XmlComplete):
"""Helper class for Xml coordinates file download"""
Url = NDBBase._xmlCoordinatesUrl
PostName = '-extatom'
class _XmlHeader(_XmlComplete):
"""Helper class for Xml header file download"""
Url = NDBBase._xmlHeaderUrl
PostName = '-noatom'
[docs]class DownloadType(Enum):
"""Enum for file download format
:cvar Pdb: Asymmetric Unit coordinates (pdb format, Unix compressed(.gz))
:cvar Cif: Asymmetric Unit coordinates (cif format, Unix compressed(.gz))
:cvar PdbBioAssembly: Biological Assembly coordinates
:cvar PdbNmr: Coordinates (pdb format, Unix compressed(.gz))
:cvar CifNmr: Coordinates (cif format, Unix compressed(.gz))
:cvar CifStructureFactors: Structure Factors (cif format)
:cvar CifNmrRestraints: NMR Restraints (cif format, Unix compressed(.gz))
:cvar XmlComplete: XML | Complete with coordinates (xml format, GNU compressed(.gz))
:cvar XmlCoordinates: XML | Coordinates only (xml format, GNU compressed(.gz))
:cvar XmlHeader: XML | Header only (xml format, GNU compressed(.gz))
"""
Pdb = _Pdb
PdbNmr = _PdbNmr
PdbBioAssembly = _PdbBioAssembly
Cif = _Cif
CifNmr = _CifNmr
CifStructureFactors = _CifStructureFactors
CifNmrRestraints = _CifNmrRestraints
XmlComplete = _XmlComplete
XmlCoordinates = _XmlCoordinates
XmlHeader = _XmlHeader
[docs]class DownloadHelper(object):
"""Helper class for downloading form NDB"""
@staticmethod
[docs] def download(structure_id: str, download_type: DownloadType = DownloadType.Pdb,
save: bool = False, target_dir: str = '') -> str:
"""Download PDB from NDB
:param download_type: file download type (default value is DownloadType.PDB)
:type download_type: DownloadType
:param target_dir: where to save file (default value is current dir)
:type target_dir: str
:param save: tells if file should be saved or not (default value = False)
:type save: bool
:param structure_id: structure NDB ID or PDB ID e.g. 4Z6C
:type structure_id: str
:return: string or None
:rtype: str
:raise AttributeError: when structure id is empty
:raise FileNotFoundError: when file is not present on server
"""
if not structure_id:
raise AttributeError("structure id is empty")
d_type = download_type.value
file_name = d_type.PreName + structure_id.lower() + d_type.PostName
if d_type is not DownloadType.PdbBioAssembly.value:
try:
proper_url = d_type.Url + file_name + d_type.UrlExt
file_text = DownloadHelper._download_prepare(proper_url)
except FileNotFoundError as error:
if d_type is DownloadType.Pdb.value:
file_name = structure_id.lower() + d_type.PostName
proper_url = d_type.Url + file_name + d_type.UrlExt
file_text = DownloadHelper._download_prepare(proper_url)
pass
else:
raise error
if save:
target = target_dir if target_dir else os.getcwd()
target = target + os.path.sep if target[-1] != os.path.sep else target
with open(target + file_name + d_type.Ext, 'w') as file:
file.write(file_text)
return None
return file_text
else:
results = []
i = 1
while True:
try:
proper_url = d_type.Url + file_name + d_type.UrlExt + str(i)
file_text = DownloadHelper._download_prepare(proper_url, decompress=False)
results.append(file_text)
i += 1
except FileNotFoundError:
break
if save:
i = 1
target = target_dir if target_dir else os.getcwd()
target = target + os.path.sep if target[-1] != os.path.sep else target
for text in results:
with open(target + file_name + d_type.Ext + str(i), 'w') as file:
file.write(text)
i += 1
return None
return results
@staticmethod
def _download_prepare(url: str, decompress: bool=True) -> str:
"""To download and prepare if needed
:param url: url to download from
:type url: str
:param decompress: tells if decompress (default value = True)
:type decompress: bool
:return: file string
"""
try:
file = DownloadHelper.download_file(url)
if decompress:
file = zlib.decompress(file.read(), 32 + zlib.MAX_WBITS) # 32 to skip header of gz
else:
file = file.read()
return file.decode("utf-8")
except zlib.error:
raise BufferError("File corrupted")
@staticmethod
[docs] def download_file(url: str) -> BytesIO:
"""Function to download file and convert to BytesIO
:param url: file url
:type url: str
:return: file as BytesIO
:rtype: BytesIO
"""
with requests.session() as session:
resp = session.get(url)
if resp.status_code == 404:
raise FileNotFoundError("No file on server")
return BytesIO(resp.content)
return None