This docs on this page refers to a PREVIOUS VERSION. For the latest stable release, go to https://docs.bokeh.org/

Archived docs for versions <= 1.0.4 have had to be modified from their original published configuration, and may be missing some features (e.g. source listing)

All users are encourage to update to version 1.1 or later, as soon as they are able.

bokeh.util.sampledata — Bokeh 1.0.3 documentation

Source code for bokeh.util.sampledata

#-----------------------------------------------------------------------------
# Copyright (c) 2012 - 2017, Anaconda, Inc. All rights reserved.
#
# Powered by the Bokeh Development Team.
#
# The full license is in the file LICENSE.txt, distributed with this software.
#-----------------------------------------------------------------------------
''' Helper functions for downloading and accessing sample data.

'''

#-----------------------------------------------------------------------------
# Boilerplate
#-----------------------------------------------------------------------------
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
log = logging.getLogger(__name__)

#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------

# NOTE: since downloading sampledata is not a common occurrnce, non-stdlib
# imports are generally deferrered in this module

# Standard library imports
from os import mkdir, remove
from os.path import abspath, dirname, exists, expanduser, isdir, isfile, join, splitext
from sys import stdout

# External imports
import six
from six.moves.urllib_parse import urljoin

# Bokeh imports

#-----------------------------------------------------------------------------
# Globals and constants
#-----------------------------------------------------------------------------

__all__ = (
    'download',
)

#-----------------------------------------------------------------------------
# General API
#-----------------------------------------------------------------------------

[docs]def download(progress=True): ''' Download larger data sets for various Bokeh examples. ''' data_dir = external_data_dir(create=True) print("Using data directory: %s" % data_dir) s3 = 'https://bokeh-sampledata.s3.amazonaws.com' files = [ (s3, 'CGM.csv'), (s3, 'US_Counties.zip'), (s3, 'us_cities.json'), (s3, 'unemployment09.csv'), (s3, 'AAPL.csv'), (s3, 'FB.csv'), (s3, 'GOOG.csv'), (s3, 'IBM.csv'), (s3, 'MSFT.csv'), (s3, 'WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip'), (s3, 'gapminder_fertility.csv'), (s3, 'gapminder_population.csv'), (s3, 'gapminder_life_expectancy.csv'), (s3, 'gapminder_regions.csv'), (s3, 'world_cities.zip'), (s3, 'airports.json'), (s3, 'movies.db.zip'), (s3, 'airports.csv'), (s3, 'routes.csv'), (s3, 'haarcascade_frontalface_default.xml'), ] for base_url, filename in files: _download_file(base_url, filename, data_dir, progress=progress)
#----------------------------------------------------------------------------- # Dev API #----------------------------------------------------------------------------- def external_csv(module, name, **kw): ''' ''' from .dependencies import import_required pd = import_required('pandas', '%s sample data requires Pandas (http://pandas.pydata.org) to be installed' % module) return pd.read_csv(external_path(name), **kw) def external_data_dir(create=False): ''' ''' try: import yaml except ImportError: raise RuntimeError("'yaml' and 'pyyaml' are required to use bokeh.sampledata functions") bokeh_dir = _bokeh_dir(create=create) data_dir = join(bokeh_dir, "data") try: config = yaml.load(open(join(bokeh_dir, 'config'))) data_dir = expanduser(config['sampledata_dir']) except (IOError, TypeError): pass if not exists(data_dir): if not create: raise RuntimeError('bokeh sample data directory does not exist, please execute bokeh.sampledata.download()') print("Creating %s directory" % data_dir) try: mkdir(data_dir) except OSError: raise RuntimeError("could not create bokeh data directory at %s" % data_dir) else: if not isdir(data_dir): raise RuntimeError("%s exists but is not a directory" % data_dir) return data_dir def external_path(filename): data_dir = external_data_dir() fn = join(data_dir, filename) if not exists(fn) and isfile(fn): raise RuntimeError('Could not locate external data file %e. Please execute bokeh.sampledata.download()' % fn) return fn def package_csv(module, name, **kw): ''' ''' from .dependencies import import_required pd = import_required('pandas', '%s sample data requires Pandas (http://pandas.pydata.org) to be installed' % module) return pd.read_csv(package_path(name), **kw) def package_dir(): ''' ''' return abspath(join(dirname(__file__), "..", "sampledata", "_data")) def package_path(filename): ''' ''' return join(package_dir(), filename) def open_csv(filename): ''' ''' # csv differs in Python 2.x and Python 3.x. Open the file differently in each. if six.PY2: return open(filename, 'rb') else: return open(filename, 'r', newline='', encoding='utf8') #----------------------------------------------------------------------------- # Private API #----------------------------------------------------------------------------- def _bokeh_dir(create=False): ''' ''' bokeh_dir = join(expanduser("~"), ".bokeh") if not exists(bokeh_dir): if not create: return bokeh_dir print("Creating %s directory" % bokeh_dir) try: mkdir(bokeh_dir) except OSError: raise RuntimeError("could not create bokeh config directory at %s" % bokeh_dir) else: if not isdir(bokeh_dir): raise RuntimeError("%s exists but is not a directory" % bokeh_dir) return bokeh_dir def _download_file(base_url, filename, data_dir, progress=True): ''' ''' # These is actually a somewhat expensive imports that added ~5% to overall # typical bokeh import times. Since downloading sampledata is not a common # action, we defer them to inside this function. from six.moves.urllib.request import urlopen from zipfile import ZipFile file_url = urljoin(base_url, filename) file_path = join(data_dir, filename) url = urlopen(file_url) with open(file_path, 'wb') as file: file_size = int(url.headers["Content-Length"]) print("Downloading: %s (%d bytes)" % (filename, file_size)) fetch_size = 0 block_size = 16384 while True: data = url.read(block_size) if not data: break fetch_size += len(data) file.write(data) if progress: status = "\r%10d [%6.2f%%]" % (fetch_size, fetch_size*100.0/file_size) stdout.write(status) stdout.flush() if progress: print() real_name, ext = splitext(filename) if ext == '.zip': if not splitext(real_name)[1]: real_name += ".csv" print("Unpacking: %s" % real_name) with ZipFile(file_path, 'r') as zip_file: zip_file.extract(real_name, data_dir) remove(file_path) #----------------------------------------------------------------------------- # Code #-----------------------------------------------------------------------------