#-----------------------------------------------------------------------------# Copyright (c) 2012 - 2022, Anaconda, Inc., and Bokeh Contributors.# All rights reserved.## The full license is in the file LICENSE.txt, distributed with this software.#-----------------------------------------------------------------------------''' Helper functions for downloading and accessing sample data.'''#-----------------------------------------------------------------------------# Boilerplate#-----------------------------------------------------------------------------from__future__importannotationsimportlogging# isort:skiplog=logging.getLogger(__name__)#-----------------------------------------------------------------------------# Imports#-----------------------------------------------------------------------------# NOTE: since downloading sampledata is not a common occurrnce, non-stdlib# imports are generally deferrered in this module# Standard library importsimporthashlibimportjsonfromosimportmkdir,removefromos.pathimport(abspath,dirname,exists,expanduser,isdir,isfile,join,splitext,)fromsysimportstdoutfromtypingimportAny,TextIO,castfromurllib.parseimporturljoinfromurllib.requestimporturlopen#-----------------------------------------------------------------------------# Globals and constants#-----------------------------------------------------------------------------__all__=('download',)DataFrame=Any#-----------------------------------------------------------------------------# General API#-----------------------------------------------------------------------------
[docs]defdownload(progress:bool=True)->None:''' Download larger data sets for various Bokeh examples. '''data_dir=external_data_dir(create=True)print("Using data directory: %s"%data_dir)# HTTP requests are cheaper for us, and there is nothing private to protects3='http://sampledata.bokeh.org'files=json.load(open(join(dirname(__file__),"sampledata.json")))forfilename,md5infiles:real_name,ext=splitext(filename)ifext=='.zip':ifnotsplitext(real_name)[1]:real_name+=".csv"else:real_name+=extreal_path=join(data_dir,real_name)ifexists(real_path):local_md5=hashlib.md5(open(real_path,'rb').read()).hexdigest()iflocal_md5==md5:print(f"Skipping {filename!r} (checksum match)")continueelse:print(f"Re-fetching {filename!r} (checksum mismatch)")_download_file(s3,filename,data_dir,progress=progress)
#-----------------------------------------------------------------------------# Dev API#-----------------------------------------------------------------------------defexternal_csv(module:str,name:str,**kw:Any)->DataFrame:''' '''from.dependenciesimportimport_requiredpd=import_required('pandas','%s sample data requires Pandas (http://pandas.pydata.org) to be installed'%module)returncast(Any,pd).read_csv(external_path(name),**kw)defexternal_data_dir(create:bool=False)->str:''' '''try:importyamlexceptImportError:raiseRuntimeError("'yaml' and 'pyyaml' are required to use bokeh.sampledata functions")bokeh_dir=_bokeh_dir(create=create)data_dir=join(bokeh_dir,"data")try:config=yaml.safe_load(open(join(bokeh_dir,'config')))data_dir=expanduser(config['sampledata_dir'])except(OSError,TypeError):passifnotexists(data_dir):ifnotcreate:raiseRuntimeError('bokeh sample data directory does not exist, please execute bokeh.sampledata.download()')print("Creating %s directory"%data_dir)try:mkdir(data_dir)exceptOSError:raiseRuntimeError("could not create bokeh data directory at %s"%data_dir)else:ifnotisdir(data_dir):raiseRuntimeError("%s exists but is not a directory"%data_dir)returndata_dirdefexternal_path(filename:str)->str:data_dir=external_data_dir()fn=join(data_dir,filename)ifnotexists(fn)andisfile(fn):raiseRuntimeError('Could not locate external data file %s. Please execute bokeh.sampledata.download()'%fn)returnfndefpackage_csv(module:str,name:str,**kw:Any)->DataFrame:''' '''from.dependenciesimportimport_requiredpd=import_required('pandas','%s sample data requires Pandas (http://pandas.pydata.org) to be installed'%module)returncast(Any,pd).read_csv(package_path(name),**kw)defpackage_dir()->str:''' '''returnabspath(join(dirname(__file__),"..","sampledata","_data"))defpackage_path(filename:str)->str:''' '''returnjoin(package_dir(),filename)defopen_csv(filename:str)->TextIO:''' '''returnopen(filename,'r',newline='',encoding='utf8')#-----------------------------------------------------------------------------# Private API#-----------------------------------------------------------------------------def_bokeh_dir(create:bool=False)->str:''' '''bokeh_dir=join(expanduser("~"),".bokeh")ifnotexists(bokeh_dir):ifnotcreate:returnbokeh_dirprint("Creating %s directory"%bokeh_dir)try:mkdir(bokeh_dir)exceptOSError:raiseRuntimeError("could not create bokeh config directory at %s"%bokeh_dir)else:ifnotisdir(bokeh_dir):raiseRuntimeError("%s exists but is not a directory"%bokeh_dir)returnbokeh_dirdef_download_file(base_url:str,filename:str,data_dir:str,progress:bool=True)->None:''' '''# These are actually somewhat expensive imports that added ~5% to overall# typical bokeh import times. Since downloading sampledata is not a common# action, we defer them to inside this function.fromzipfileimportZipFilefile_url=urljoin(base_url,filename)file_path=join(data_dir,filename)url=urlopen(file_url)withopen(file_path,'wb')asfile:file_size=int(url.headers["Content-Length"])print("Downloading: %s (%d bytes)"%(filename,file_size))fetch_size=0block_size=16384whileTrue:data=url.read(block_size)ifnotdata:breakfetch_size+=len(data)file.write(data)ifprogress:status="\r%10d [%6.2f%%]"%(fetch_size,fetch_size*100.0/file_size)stdout.write(status)stdout.flush()ifprogress:print()real_name,ext=splitext(filename)ifext=='.zip':ifnotsplitext(real_name)[1]:real_name+=".csv"print("Unpacking: %s"%real_name)withZipFile(file_path,'r')aszip_file:zip_file.extract(real_name,data_dir)remove(file_path)#-----------------------------------------------------------------------------# Code#-----------------------------------------------------------------------------