#-----------------------------------------------------------------------------# Copyright (c) 2012 - 2023, Anaconda, Inc., and Bokeh Contributors.# All rights reserved.## The full license is in the file LICENSE.txt, distributed with this software.#-----------------------------------------------------------------------------''' Helper functions for downloading and accessing sample data.'''#-----------------------------------------------------------------------------# Boilerplate#-----------------------------------------------------------------------------from__future__importannotations# isort:skip# NOTE: skip logging imports so that this module may be run as a script#-----------------------------------------------------------------------------# Imports#-----------------------------------------------------------------------------# Standard library importsimporthashlibimportjsonfromos.pathimportsplitextfrompathlibimportPathfromsysimportstdoutfromtypingimportTYPE_CHECKING,Any,TextIOfromurllib.parseimporturljoinfromurllib.requestimporturlopen# NOTE: since downloading sampledata is not a common occurrence, non-stdlib# imports are generally deferrered in this moduleifTYPE_CHECKING:importpandasaspd#-----------------------------------------------------------------------------# Globals and constants#-----------------------------------------------------------------------------__all__=('download',)#-----------------------------------------------------------------------------# General API#-----------------------------------------------------------------------------
[docs]defdownload(progress:bool=True)->None:''' Download larger data sets for various Bokeh examples. '''data_dir=external_data_dir(create=True)print(f"Using data directory: {data_dir}")# HTTP requests are cheaper for us, and there is nothing private to protects3='http://sampledata.bokeh.org'files=json.load(open(Path(__file__).parent/"sampledata.json"))forfilename,md5infiles:real_name,ext=splitext(filename)ifext=='.zip':ifnotsplitext(real_name)[1]:real_name+=".csv"else:real_name+=extreal_path=data_dir/real_nameifreal_path.exists():local_md5=hashlib.md5(open(real_path,'rb').read()).hexdigest()iflocal_md5==md5:print(f"Skipping {filename!r} (checksum match)")continueelse:print(f"Re-fetching {filename!r} (checksum mismatch)")_download_file(s3,filename,data_dir,progress=progress)
#-----------------------------------------------------------------------------# Dev API#-----------------------------------------------------------------------------defexternal_csv(module:str,name:str,**kw:Any)->pd.DataFrame:''' '''importpandasaspdreturnpd.read_csv(external_path(name),**kw)defexternal_data_dir(create:bool=False)->Path:''' '''try:importyamlexceptImportError:raiseRuntimeError("'yaml' and 'pyyaml' are required to use bokeh.sampledata functions")bokeh_dir=_bokeh_dir(create=create)data_dir=bokeh_dir/"data"try:config=yaml.safe_load(open(bokeh_dir/'config'))data_dir=Path.expanduser(config['sampledata_dir'])except(OSError,TypeError):passifnotdata_dir.exists():ifnotcreate:raiseRuntimeError('bokeh sample data directory does not exist, please execute bokeh.sampledata.download()')print(f"Creating {data_dir} directory")try:data_dir.mkdir()exceptOSError:raiseRuntimeError(f"could not create bokeh data directory at {data_dir}")else:ifnotdata_dir.is_dir():raiseRuntimeError(f"{data_dir} exists but is not a directory")returndata_dirdefexternal_path(filename:str|Path)->Path:data_dir=external_data_dir()fn=data_dir/filenameifnotfn.exists()ornotfn.is_file():raiseRuntimeError(f"Could not locate external data file {fn}. Please execute bokeh.sampledata.download()")returnfndefpackage_csv(module:str,name:str,**kw:Any)->pd.DataFrame:''' '''importpandasaspdreturnpd.read_csv(package_path(name),**kw)defpackage_dir()->Path:''' '''returnPath(__file__).parents[1].joinpath("sampledata","_data").resolve()defpackage_path(filename:str|Path)->Path:''' '''returnpackage_dir()/filenamedefopen_csv(filename:str|Path)->TextIO:''' '''returnopen(filename,newline='',encoding='utf8')#-----------------------------------------------------------------------------# Private API#-----------------------------------------------------------------------------def_bokeh_dir(create:bool=False)->Path:bokeh_dir=Path("~").expanduser()/".bokeh"ifnotbokeh_dir.exists():ifnotcreate:returnbokeh_dirprint(f"Creating {bokeh_dir} directory")try:bokeh_dir.mkdir()exceptOSError:raiseRuntimeError(f"could not create bokeh config directory at {bokeh_dir}")else:ifnotbokeh_dir.is_dir():raiseRuntimeError(f"{bokeh_dir} exists but is not a directory")returnbokeh_dirdef_download_file(base_url:str,filename:str,data_dir:Path,progress:bool=True)->None:''' '''# These are actually somewhat expensive imports that added ~5% to overall# typical bokeh import times. Since downloading sampledata is not a common# action, we defer them to inside this function.fromzipfileimportZipFilefile_url=urljoin(base_url,filename)file_path=data_dir/filenameurl=urlopen(file_url)withopen(file_path,'wb')asfile:file_size=int(url.headers["Content-Length"])print(f"Downloading: {filename} ({file_size} bytes)")fetch_size=0block_size=16384whileTrue:data=url.read(block_size)ifnotdata:breakfetch_size+=len(data)file.write(data)ifprogress:status=f"\r{fetch_size:< 10d} [{fetch_size*100.0/file_size:6.2f}%%]"stdout.write(status)stdout.flush()ifprogress:print()real_name,ext=splitext(filename)ifext=='.zip':ifnotsplitext(real_name)[1]:real_name+=".csv"print(f"Unpacking: {real_name}")withZipFile(file_path,'r')aszip_file:zip_file.extract(real_name,data_dir)file_path.unlink()#-----------------------------------------------------------------------------# Code#-----------------------------------------------------------------------------# This is necessary so that we can run the sampledata download code in the# release build, before an actual package exists.if__name__=="__main__":download(progress=False)