bitbake: implement checksums for local files in SRC_URI

Gathers a list of paths to have checksums calculated at parse time, and
processes these when calculating task hashes. Checksums are cached with
the file's current mtime. Thus, changing any local file in SRC_URI will
now cause the do_fetch taskhash to change, thus forcing a rebuild.

This change adds very roughly about an 8% increase in parse time (a few
seconds) and maybe a few seconds during runqueue generation, so a fairly
moderate performance hit.

Note that since paths are resolved at parse time, this will not force
a rebuild when files are introduced which would cause that resolved path
to be different - for example, where a machine-specific version of a file
was added without otherwise changing the recipe. This will need to be
handled in a future update.

Code to hook this into the signature generator was courtesy of
Richard Purdie <richard.purdie@linuxfoundation.org>.

Implements [YOCTO #2044].

(Bitbake rev: c993b7c457f8b7776e8a5dff253bfa0724bc2cae)

Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Paul Eggleton 2012-05-23 00:23:32 +01:00 committed by Richard Purdie
parent d7b818b51f
commit 8b8be74ed2
5 changed files with 211 additions and 3 deletions

View File

@ -43,7 +43,7 @@ except ImportError:
logger.info("Importing cPickle failed. "
"Falling back to a very slow implementation.")
__cache_version__ = "143"
__cache_version__ = "144"
def getCacheFile(path, filename, data_hash):
return os.path.join(path, filename + "." + data_hash)
@ -76,9 +76,13 @@ class RecipeInfoCommon(object):
for task in tasks)
@classmethod
def flaglist(cls, flag, varlist, metadata):
return dict((var, metadata.getVarFlag(var, flag, True))
def flaglist(cls, flag, varlist, metadata, squash=False):
out_dict = dict((var, metadata.getVarFlag(var, flag, True))
for var in varlist)
if squash:
return dict((k,v) for (k,v) in out_dict.iteritems() if v)
else:
return out_dict
@classmethod
def getvar(cls, var, metadata):
@ -128,6 +132,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
self.stamp = self.getvar('STAMP', metadata)
self.stamp_base = self.flaglist('stamp-base', self.tasks, metadata)
self.stamp_extrainfo = self.flaglist('stamp-extra-info', self.tasks, metadata)
self.file_checksums = self.flaglist('file-checksums', self.tasks, metadata, True)
self.packages_dynamic = self.listvar('PACKAGES_DYNAMIC', metadata)
self.depends = self.depvar('DEPENDS', metadata)
self.provides = self.depvar('PROVIDES', metadata)
@ -154,6 +159,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
cachedata.stamp = {}
cachedata.stamp_base = {}
cachedata.stamp_extrainfo = {}
cachedata.file_checksums = {}
cachedata.fn_provides = {}
cachedata.pn_provides = defaultdict(list)
cachedata.all_depends = []
@ -185,6 +191,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
cachedata.stamp[fn] = self.stamp
cachedata.stamp_base[fn] = self.stamp_base
cachedata.stamp_extrainfo[fn] = self.stamp_extrainfo
cachedata.file_checksums[fn] = self.file_checksums
provides = [self.pn]
for provide in self.provides:

View File

@ -0,0 +1,90 @@
# Local file checksum cache implementation
#
# Copyright (C) 2012 Intel Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import os
import stat
import bb.utils
import logging
from bb.cache import MultiProcessCache
logger = logging.getLogger("BitBake.Cache")
try:
import cPickle as pickle
except ImportError:
import pickle
logger.info("Importing cPickle failed. "
"Falling back to a very slow implementation.")
# mtime cache (non-persistent)
# based upon the assumption that files do not change during bitbake run
class FileMtimeCache(object):
cache = {}
def cached_mtime(self, f):
if f not in self.cache:
self.cache[f] = os.stat(f)[stat.ST_MTIME]
return self.cache[f]
def cached_mtime_noerror(self, f):
if f not in self.cache:
try:
self.cache[f] = os.stat(f)[stat.ST_MTIME]
except OSError:
return 0
return self.cache[f]
def update_mtime(self, f):
self.cache[f] = os.stat(f)[stat.ST_MTIME]
return self.cache[f]
def clear(self):
self.cache.clear()
# Checksum + mtime cache (persistent)
class FileChecksumCache(MultiProcessCache):
cache_file_name = "local_file_checksum_cache.dat"
CACHE_VERSION = 1
def __init__(self):
self.mtime_cache = FileMtimeCache()
MultiProcessCache.__init__(self)
def get_checksum(self, f):
entry = self.cachedata[0].get(f)
cmtime = self.mtime_cache.cached_mtime(f)
if entry:
(mtime, hashval) = entry
if cmtime == mtime:
return hashval
else:
bb.debug(2, "file %s changed mtime, recompute checksum" % f)
hashval = bb.utils.md5_file(f)
self.cachedata_extras[0][f] = (cmtime, hashval)
return hashval
def merge_data(self, source, dest):
for h in source[0]:
if h in dest:
(smtime, _) = source[0][h]
(dmtime, _) = dest[0][h]
if smtime > dmtime:
dest[0][h] = source[0][h]
else:
dest[0][h] = source[0][h]

View File

@ -1570,6 +1570,7 @@ class CookerParser(object):
def init():
Parser.cfg = self.cfgdata
multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, args=(self.cfgdata,), exitpriority=1)
multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, args=(self.cfgdata,), exitpriority=1)
self.feeder_quit = multiprocessing.Queue(maxsize=1)
self.parser_quit = multiprocessing.Queue(maxsize=self.num_processes)
@ -1618,6 +1619,7 @@ class CookerParser(object):
sync.start()
multiprocessing.util.Finalize(None, sync.join, exitpriority=-100)
bb.codeparser.parser_cache_savemerge(self.cooker.configuration.data)
bb.fetch.fetcher_parse_done(self.cooker.configuration.data)
def load_cached(self):
for filename, appends in self.fromcache:

View File

@ -8,6 +8,7 @@ BitBake build tools.
"""
# Copyright (C) 2003, 2004 Chris Larson
# Copyright (C) 2012 Intel Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -30,9 +31,11 @@ import os, re
import logging
import urllib
import bb.persist_data, bb.utils
import bb.checksum
from bb import data
__version__ = "2"
_checksum_cache = bb.checksum.FileChecksumCache()
logger = logging.getLogger("BitBake.Fetcher")
@ -233,10 +236,18 @@ def fetcher_init(d):
else:
raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy)
_checksum_cache.init_cache(d)
for m in methods:
if hasattr(m, "init"):
m.init(d)
def fetcher_parse_save(d):
_checksum_cache.save_extras(d)
def fetcher_parse_done(d):
_checksum_cache.save_merge(d)
def fetcher_compare_revisions(d):
"""
Compare the revisions in the persistant cache with current values and
@ -553,6 +564,80 @@ def srcrev_internal_helper(ud, d, name):
return rev
def get_checksum_file_list(d):
""" Get a list of files checksum in SRC_URI
Returns the all resolved local path of all local file entries in
SRC_URI as a space-separated string
"""
fetch = Fetch([], d)
dl_dir = d.getVar('DL_DIR', True)
filelist = []
for u in fetch.urls:
ud = fetch.ud[u]
if isinstance(ud.method, local.Local):
ud.setup_localpath(d)
f = ud.localpath
if f.startswith(dl_dir):
# The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else
if os.path.exists(f):
bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN', True), os.path.basename(f)))
else:
bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN', True), os.path.basename(f)))
continue
filelist.append(f)
return " ".join(filelist)
def get_file_checksums(filelist, pn):
"""Get a list of the checksums for a list of local files
Returns the checksums for a list of local files, caching the results as
it proceeds
"""
def checksum_file(f):
try:
checksum = _checksum_cache.get_checksum(f)
except OSError as e:
import traceback
bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
return None
return checksum
checksums = []
for pth in filelist.split():
checksum = None
if '*' in pth:
# Handle globs
import glob
for f in glob.glob(pth):
checksum = checksum_file(f)
if checksum:
checksums.append((f, checksum))
elif os.path.isdir(pth):
# Handle directories
for root, dirs, files in os.walk(pth):
for name in files:
fullpth = os.path.join(root, name)
checksum = checksum_file(fullpth)
if checksum:
checksums.append((fullpth, checksum))
else:
checksum = checksum_file(pth)
if checksum:
checksums.append((pth, checksum))
checksums.sort()
return checksums
class FetchData(object):
"""
A class which represents the fetcher state for a given URI.

View File

@ -60,6 +60,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
self.taskhash = {}
self.taskdeps = {}
self.runtaskdeps = {}
self.file_checksum_values = {}
self.gendeps = {}
self.lookupcache = {}
self.pkgnameextract = re.compile("(?P<fn>.*)\..*")
@ -152,6 +153,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
k = fn + "." + task
data = dataCache.basetaskhash[k]
self.runtaskdeps[k] = []
self.file_checksum_values[k] = {}
recipename = dataCache.pkg_fn[fn]
for dep in sorted(deps, key=clean_basepath):
depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')]
@ -161,6 +163,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
data = data + self.taskhash[dep]
self.runtaskdeps[k].append(dep)
if task in dataCache.file_checksums[fn]:
checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
for (f,cs) in checksums:
self.file_checksum_values[k][f] = cs
data = data + cs
h = hashlib.md5(data).hexdigest()
self.taskhash[k] = h
#d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
@ -197,6 +205,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
if runtime and k in self.taskhash:
data['runtaskdeps'] = self.runtaskdeps[k]
data['file_checksum_values'] = self.file_checksum_values[k]
data['runtaskhashes'] = {}
for dep in data['runtaskdeps']:
data['runtaskhashes'][dep] = self.taskhash[dep]
@ -304,6 +313,18 @@ def compare_sigfiles(a, b):
for dep in changed:
print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])
changed, added, removed = dict_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
if changed:
for f in changed:
print "Checksum for file %s changed from %s to %s" % (f, a_data['file_checksum_values'][f], b_data['file_checksum_values'][f])
if added:
for f in added:
print "Dependency on checksum of file %s was added" % (f)
if removed:
for f in removed:
print "Dependency on checksum of file %s was removed" % (f)
if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
a = clean_basepaths(a_data['runtaskhashes'])
b = clean_basepaths(b_data['runtaskhashes'])
@ -353,6 +374,9 @@ def dump_sigfile(a):
if 'runtaskdeps' in a_data:
print "Tasks this task depends on: %s" % (a_data['runtaskdeps'])
if 'file_checksum_values' in a_data:
print "This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])
if 'runtaskhashes' in a_data:
for dep in a_data['runtaskhashes']:
print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])