@TechReport{ it:2003-058, author = {Erik Berg and Erik Hagersten}, title = {StatCache: A Probabilistic Approach to Efficient and Accurate Data Locality Analysis}, institution = {Department of Information Technology, Uppsala University}, department = {Division of Computer Systems}, year = {2003}, month = dec, number = {2003-058}, abstract = {The widening memory gap reduces performance of applications with poor data locality. This problem can be analyzed using working-set graphs. Current methods to generate such graphs include set sampling and time sampling, but cold start effects and unrepresentative set selection impair accuracy. In this paper we present StatCache, a novel sample-based method that can perform data-locality analysis on realistic workloads. During the execution of an application, sparse discrete memory accesses are sampled, and their reuse distances are measured using a simple watchpoint mechanism. StatCache uses the information collected from a single run to accurately estimate miss ratios of fully-associative caches of arbitrary sizes and generates working-set graphs. We evaluate StatCache using the SPEC CPU2000 benchmarks and show that StatCache gives accurate results with a sampling rate as low as $10^{-4}$. We also provide a proof-of-concept implementation, and discuss potentially very fast implementation alternatives.} }