%% README for probcomp bibliography file
%%
%% Maintainer: Feras A. Saad
%%
%% This .bib file uses the following conventions
%%
%% - The most important convention is consistency. When making an addition, it
%% is very likely that you can find an existing entry that matches the
%% template for the new entry and requires only minor modifications.
%%
%% - Entries are sorted by year.
%%
%% - Entries are keyed by the last name of first author, followed by the year
%% followed by keyword of paper title (towner2019gen).
%%
%% - Each entry should have its fields aligned at the = sign.
%%
%% - Values of non-numeric fields are enclosed between curly braces. The month
%% field is written in lower case abbreviation without curly braces.
%%
%% - Use a trailing comma for the last key in an entry.
%%
%% - Publication titles are lower case, except for:
%% - The first word in the title;
%% - Proper nouns (e.g. {Bayesian}, {Markov}). Make sure to enclose
%% the whole word (not only first letter) in curly braces to guarantee
%% it is capitalized with correct kerning.
%% - First word after a colon (Picture: A ...). These should be
%% capitalized in the source and do not require explicit curly braces.
%%
%% - Author names should follow the style
%% author = {Doe, Jane C. and Foe, John E.},
%%
%% - Enclose compound last names in braces, i.e. {van de Meent}, Jan-Willem
%%
%% - Names of conference proceedings should be first spelled out in full,
%% followed by the acronym e.g.
%% booktitle = {PLDI 2019: Proceedings of the 40th ACM...},
%%
%% - Workshop presentations, use inproceedings (even though most workshops
%% do not have formal proceedings), and write (co-located with
%% ). See examples below.
%%
%% - BibBase specific keys:
%% - url_paper is mandatory when possible, and must be a direct link to a PDF.
%% - url_link is strongly recommended, and usually has an abstract etc.
%% - url_[foo] can be used for creating a custom link with text [foo],
%% such as url_supplement, url_slides, url_video, etc.
%% - note can be used for [Oral], [In review at ...], and so on, and should
%% always terminate with a period.
@inproceedings{saad2021hirm,
title = {Hierarchical Infinite Relational Model},
author = {Saad, Feras, and Mansinghka, Vikash K.},
booktitle = {UAI 2021: Proceedings of the 37th Conference on Uncertainty in Artificial Intelligence},
year = 2021,
fvolume = {},
fpages = {},
publisher = {PMLR},
url_paper = {https://www.auai.org/uai2021/pdf/uai2021.405.preliminary.pdf},
url_code = {https://github.com/probcomp/hierarchical-irm},
abstract = {This paper describes the hierarchical infinite relational model (HIRM), a new probabilistic generative model for noisy, sparse, and heterogeneous relational data. Given a set of relations defined over a collection of domains, the model first infers multiple non-overlapping clusters of relations using a top-level Chinese restaurant process. Within each cluster of relations, a Dirichlet process mixture is then used to partition the domain entities and model the probability distribution of relation values. The HIRM generalizes the standard infinite relational model and can be used for a variety of data analysis tasks including dependence detection, clustering, and density estimation. We present new algorithms for fully Bayesian posterior inference via Gibbs sampling. We illustrate the efficacy of the method on a density estimation benchmark of twenty object-attribute datasets with up to 18 million cells and use it to discover relational structure in real-world datasets from politics and genomics.},
}
@inproceedings{lew2021pclean,
title = {{PClean:} {Bayesian} Data Cleaning at Scale with Domain-Specific Probabilistic Programming},
author = {Lew, Alex K. and Agrawal, Monica and Sontag, David and Mansinghka, Vikash K.},
booktitle = {AISTATS 2021: Proceedings of The 24th International Conference on Artificial Intelligence and Statistics},
volume = 130,
pages = {1927--1935},
series = {Proceedings of Machine Learning Research},
publisher = {PMLR},
year = 2021,
url_link = {http://proceedings.mlr.press/v130/lew21a.html},
url_paper = {http://proceedings.mlr.press/v130/lew21a/lew21a.pdf},
url_MIT_News = {https://news.mit.edu/2021/system-cleans-messy-data-tables-automatically-0511},
abstract = {Data cleaning is naturally framed as probabilistic inference in a generative model of ground truth data and likely errors, but the diversity of real-world error patterns and the hardness of inference make Bayesian approaches difficult to automate. We present PClean, a probabilistic programming language (PPL) for leveraging dataset-specific knowledge to automate Bayesian cleaning. Compared to general-purpose PPLs, PClean tackles a restricted problem domain, enabling three modeling and inference innovations: (1) a non-parametric model of relational database instances, which users’ programs customize; (2) a novel sequential Monte Carlo inference algorithm that exploits the structure of PClean’s model class; and (3) a compiler that generates near-optimal SMC proposals and blocked-Gibbs rejuvenation kernels based on the user’s model and data. We show empirically that short (<50-line) PClean programs can: be faster and more accurate than generic PPL inference on data-cleaning benchmarks; match state-of-the-art data-cleaning systems in terms of accuracy and runtime (unlike generic PPL inference in the same runtime); and scale to real-world datasets with millions of records.},
}
@inproceedings{saad2021sppl,
title = {SPPL: Probabilistic Programming with Fast Exact Symbolic Inference},
author = {Saad, Feras A. and Rinard, Martin C. and Mansinghka, Vikash K.},
year = 2021,
booktitle = {PLDI 2021: Proceedings of the 42nd ACM SIGPLAN Conference on Programming Language Design and Implementation},
pages = {804--819},
publisher = {ACM},
url_link = {https://dl.acm.org/doi/10.1145/3453483.3454078},
url_paper = {https://dl.acm.org/doi/pdf/10.1145/3453483.3454078},
abstract = {We present the Sum-Product Probabilistic Language (SPPL), a new probabilistic programming language that automatically delivers exact solutions to a broad range of probabilistic inference queries. SPPL translates probabilistic programs into sum-product expressions, a new symbolic representation and associated semantic domain that extends standard sum-product networks to support mixed-type distributions, numeric transformations, logical formulas, and pointwise and set-valued constraints. We formalize SPPL via a novel translation strategy from probabilistic programs to sum-product expressions and give sound exact algorithms for conditioning on and computing probabilities of events. SPPL imposes a collection of restrictions on probabilistic programs to ensure they can be translated into sum-product expressions, which allow the system to leverage new techniques for improving the scalability of translation and inference by automatically exploiting probabilistic structure. We implement a prototype of SPPL with a modular architecture and evaluate it on benchmarks the system targets, showing that it obtains up to 3500x speedups over state-of-the-art symbolic systems on tasks such as verifying the fairness of decision tree classifiers, smoothing hidden Markov models, conditioning transformed random variables, and computing rare event probabilities.},
}
@inproceedings{matheos2021oupms,
title = {Transforming Worlds: Automated Involutive {MCMC} for Open-Universe Probabilistic Models},
author = {Matheos, George and Lew, Alex K. and Ghavamizadeh, Matin and Russell, Stuart and Cusumano-Towner, Marco and Mansinghka, Vikash K.},
booktitle = {AABI 2021: Third Symposium on Advances in Approximate Bayesian Inference},
year = 2021,
url_link = {https://openreview.net/forum?id=8Itm8dQnJRc},
url_paper = {https://openreview.net/pdf?id=8Itm8dQnJRc},
abstract = {Open-universe probabilistic models enable Bayesian inference about how many objects underlie data, and how they are related. Effective inference in OUPMs remains a challenge, however, often requiring the use of custom, transdimensional MCMC kernels, based on heuristics, deep learning, or domain knowledge, that can be difficult to derive and to implement correctly. This paper adapts the recently introduced involutive MCMC framework to the open-universe setting, and shows how error-prone aspects of kernel design and implementation (e.g., the computation of valid accept/reject probabilities) can be automated, using techniques from probabilistic and differentiable programming. The result is an intuitive design space for MCMC kernels for OUPMs: users write programs that propose incremental changes to possible worlds, creating, deleting, or modifying objects according to arbitrary application-specific logic, and their proposals are automatically converted into stationary MCMC kernels. We demonstrate in preliminary experiments that data-driven involutive MCMC kernels outperform generic probabilistic programming language inference, as well as generic birth/death reversible-jump kernels without application-specific logic.},
}
@inproceedings{tan2021genify,
title = {Genify.jl: Transforming {Julia} into {Gen} to enable programmable inference},
author = {Zhi-Xuan, Tan and Becker, McCoy R. and Mansinghka, Vikash K.},
booktitle = {Workshop on Languages for Inference (LAFI, co-located with POPL 2021)},
year = 2021,
url_link = {https://popl21.sigplan.org/details/lafi-2021-papers/5/Genify-jl-Transforming-Julia-into-Gen-to-enable-programmable-inference},
code = {https://github.com/probcomp/Genify.jl},
abstract = {A wide variety of libraries written in Julia implement stochastic simulators of natural and social phenomena for the purposes of computational science. However, these simulators are not generally amenable to Bayesian inference, as they do not provide likelihoods for execution traces, support constraining of observed random variables, or allow random choices and subroutines to be selectively updated in Monte Carlo algorithms. To address these limitations, we present Genify.jl, an approach to transforming plain Julia code into generative functions in Gen, a universal probabilistic programming system with programmable inference. We accomplish this via lightweight transformation of lowered Julia code into Gen’s dynamic modeling language, combined with a user-friendly random variable addressing scheme that enables straightforward implementation of custom inference programs. We demonstrate the utility of this approach by transforming an existing agent-based simulator from plain Julia into Gen, and designing custom inference programs that increase accuracy and efficiency relative to generic SMC and MCMC methods. This performance improvement is achieved by proposing, constraining, or re-simulating random variables that are internal to the simulator, which is made possible by transformation into Gen.},
}
@mastersthesis{garrett2021thesis,
title = {Infrastructure for modeling and inference engineering with {3D} generative scene graphs},
author = {Garrett, Austin},
year = 2021,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/130688/1251779760-MIT.pdf?sequence=1&isAllowed=y},
url_link = {https://dspace.mit.edu/handle/1721.1/130688},
abstract = {Recent advances in probabilistic programming have enabled the development of probabilistic generative models for visual perception using a rich abstract representation of 3D scene geometry called a scene graph. However, there remain several challenges in the practical implementation of scene graph models, including human-editable specification, visualization, priors, structure inference, hyperparameters tuning, and benchmarking. In this thesis, I describe the development of infrastructure to enable the development and research of scene graph models by researchers and practitioners. A description of a preliminary scene graph model and inference program for 3D scene structure is provided, along with an implementation in the probabilistic programming language Gen. Utilities for visualizing and understanding distributions over scene graphs are developed. Synthetic enumerative tests of the posterior and inference algorithm are conducted, and conclusions drawn for the improvement of the proposed modeling components. Finally, I collect and analyze real-world scene graph data, and use it to optimize model hyperparameters; the preliminary structure inference program is then tested in a structure prediction task with both the unoptimizedand optimized models.},
}
@mastersthesis{mann2021thesis,
title = {Neural {Bayesian} goal inference for symbolic planning domains},
author = {Mann, Jordyn},
year = 2021,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/130701/1251800415-MIT.pdf?sequence=1&isAllowed=y},
url_link = {https://dspace.mit.edu/handle/1721.1/130701},
abstract = {There are several reasons for which one may aim to infer the short- and long-term goals of agents in diverse physical domains. As increasingly powerful autonomous systems come into development, it is conceivable that they may eventually need to accurately infer the goals of humans. There are also more immediate reasons for which this sort of inference may be desirable, such as in the use case of intelligent personal assistants. This thesis introduces a neural Bayesian approach to goal inference in multiple symbolic planning domains and compares the results of this approach to the results of a recently developed Monte Carlo Bayesian inference method knownas Sequential Inverse Plan Search (SIPS). SIPS is based on sequential Monte Carlo inference for Bayesian inversion of probabilistic plan search in Planning Domain Definition Language (PDDL) domains. In addition to the neural architectures, the thesis also introduces approaches for converting PDDL predicate state representations to numerical arrays and vectors suitable for input to the neural networks. The experimental results presented indicate that for the domains investigated, in cases where the training set is representative of the test set, the neural approach provides similar accuracy results to SIPS in the later portions of the observation sequences with a far shorter amortized time cost. However, in earlier timesteps of those observation sequences and in cases where the training set is less similar to the testing set, SIPS outperforms the neural approach in terms of accuracy. These results indicate that a model-based inference method where SIPS uses a neural proposal based on the neural networks designed in this thesis could have the potential to combine the advantages of both goal inference approaches by improving the speed of SIPS inference while maintaining generalizability and high accuracy throughout the timesteps of the observation sequences.},
}
@inproceedings{tan2020sips,
title = {Online {Bayesian} goal inference for boundedly-rational planning agents},
author = {Zhi-Xuan, Tan and Mann, Jordyn L. and Silver, Tom and Tenenbaum, Joshua B. and Mansinghka, Vikash K.},
booktitle = {NeurIPS 2020: Advances in Neural Information Processing Systems 33},
year = 2020,
url_paper = {https://papers.nips.cc/paper/2020/file/df3aebc649f9e3b674eeb790a4da224e-Paper.pdf},
url_link = {https://papers.nips.cc/paper/2020/hash/df3aebc649f9e3b674eeb790a4da224e-Abstract.html},
url_supplement = {https://papers.nips.cc/paper/2020/file/df3aebc649f9e3b674eeb790a4da224e-Supplemental.zip},
url_MIT_News = {https://news.mit.edu/2020/building-machines-better-understand-human-goals-1214},
abstract = {People routinely infer the goals of others by observing their actions over time. Remarkably, we can do so even when those actions lead to failure, enabling us to assist others when we detect that they might not achieve their goals. How might we endow machines with similar capabilities? Here we present an architecture capable of inferring an agent's goals online from both optimal and non-optimal sequences of actions. Our architecture models agents as boundedly-rational planners that interleave search with execution by replanning, thereby accounting for sub-optimal behavior. These models are specified as probabilistic programs, allowing us to represent and perform efficient Bayesian inference over an agent's goals and internal planning processes. To perform such inference, we develop Sequential Inverse Plan Search (SIPS), a sequential Monte Carlo algorithm that exploits the online replanning assumption of these models, limiting computation by incrementally extending inferred plans as new actions are observed. We present experiments showing that this modeling and inference architecture outperforms Bayesian inverse reinforcement learning baselines, accurately inferring goals from both optimal and non-optimal trajectories involving failure and back-tracking, while generalizing across domains with compositional structure and sparse rewards.},
}
@inproceedings{saad2020fldr,
title = {The fast loaded dice roller: A near-optimal exact sampler for discrete probability distributions},
author = {Saad, Feras A. and Freer, Cameron E. and Rinard, Martin C. and Mansinghka, Vikash K.},
booktitle = {AISTATS 2020: Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics},
volume = 108,
pages = {1036--1046},
series = {Proceedings of Machine Learning Research},
address = {Palermo, Sicily, Italy},
publisher = {PMLR},
year = 2020,
keywords = {random variate generation, sampling, discrete random variables},
abstract = {This paper introduces a new algorithm for the fundamental problem of generating a random integer from a discrete probability distribution using a source of independent and unbiased random coin flips. This algorithm, which we call the Fast Loaded Dice Roller (FLDR), has efficient complexity properties in space and time: the size of the sampler is guaranteed to be linear in the number of bits needed to encode the target distribution and the sampler consumes (in expectation) at most 6.5 bits of entropy more than the information-theoretically minimal rate, independently of the values or size of the target distribution. We present an easy-to-implement, linear-time preprocessing algorithm and a fast implementation of the FLDR using unsigned integer arithmetic. Empirical evaluations establish that the FLDR is 2x--10x faster than multiple baseline algorithms for exact sampling, including the widely-used alias and interval samplers. It also uses up to 10000x less space than the information-theoretically optimal sampler, at the expense of a less than 1.5x runtime overhead.},
url_paper = {http://proceedings.mlr.press/v108/saad20a/saad20a.pdf},
url_supplement = {http://proceedings.mlr.press/v108/saad20a/saad20a-supp.pdf},
url_link = {http://proceedings.mlr.press/v108/saad20a.html},
url_code = {https://github.com/probcomp/fast-loaded-dice-roller.git},
url_experiments = {https://github.com/probcomp/fast-loaded-dice-roller-experiments.git},
url_MIT_News = {https://news.mit.edu/2020/algorithm-simulates-roll-loaded-dice-0528},
url_Quanta = {https://www.quantamagazine.org/how-and-why-computers-roll-loaded-dice-20200708/},
}
@inproceedings{witty2020causal,
title = {Causal inference using {Gaussian} processes with structured latent confounders},
author = {Witty, Sam and Takatsu, Kenta and Jensen, David and Mansinghka, Vikash},
booktitle = {ICML 2020: Proceedings of the Thirty-Seventh International Conference on Machine Learning},
year = 2020,
series = {Proceedings of Machine Learning Research},
publisher = {PMLR},
url_paper = {https://proceedings.icml.cc/static/paper_files/icml/2020/5045-Paper.pdf},
url_link = {https://proceedings.icml.cc/static/paper_files/icml/2020/5045-Paper.pdf},
abstract = {Latent confounders---unobserved variables that influence both treatment and outcome---can bias estimates of causal effects. In some cases, these confounders are shared across observations, e.g. all students in a school are influenced by the school\textquotesingle s culture in addition to any educational interventions they receive individually. This paper shows how to model latent confounders that have this structure and thereby improve estimates of causal effects. The key innovations are a hierarchical Bayesian model, Gaussian processes with structured latent confounders (GP-SLC), and a Monte Carlo inference algorithm for this model based on elliptical slice sampling. GP-SLC provides principled Bayesian uncertainty estimates of individual treatment effect without requiring parametric assumptions about the functional forms relating confounders, covariates, treatment, and outcomes. This paper also proves that, for linear functional forms, accounting for the structure in latent confounders is sufficient for asymptotically consistent estimates of causal effect. Finally, this paper shows GP-SLC is competitive with or more accurate than widely used causal inference techniques such as multi-level linear models and Bayesian additive regression trees. Benchmark datasets include the Infant Health and Development Program and a dataset showing the effect of changing temperatures on state-wide energy consumption across New England.},
}
@article{saad2020sampling,
title = {Optimal approximate sampling from discrete probability distributions},
author = {Saad, Feras A. and Freer, Cameron E. and Rinard, Martin C. and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 4,
number = {POPL},
month = jan,
year = 2020,
pages = {36:1--36:31},
articleno = 36,
numpages = 31,
publisher = {ACM},
keywords = {discrete random variables, random variate generation},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3371104},
url_supplement = {https://dl.acm.org/action/downloadSupplement?doi=10.1145%2F3371104&file=popl20main-p126-p-aux.zip&download=true},
url_video = {https://www.youtube.com/watch?v=Eb0CtuK7K3g},
url_code = {https://github.com/probcomp/optimal-approximate-sampling},
url_experiments = {https://doi.org/10.1145/3373106},
url_link = {https://doi.org/10.1145/3371104},
abstract = {This paper addresses a fundamental problem in random variate generation: given access to a random source that emits a stream of independent fair bits, what is the most accurate and entropy-efficient algorithm for sampling from a discrete probability distribution $(p_1, \dots, p_n)$, where the probabilities of the output distribution $(\hat{p}_1, \dots, \hat{p}_n)$ of the sampling algorithm must be specified using at most $k$ bits of precision? We present a theoretical framework for formulating this problem and provide new techniques for finding sampling algorithms that are optimal both statistically (in the sense of sampling accuracy) and information-theoretically (in the sense of entropy consumption). We leverage these results to build a system that, for a broad family of measures of statistical accuracy, delivers a sampling algorithm whose expected entropy usage is minimal among those that induce the same distribution (i.e., is ``entropy-optimal'') and whose output distribution $(\hat{p}_1, \dots, \hat{p}_n)$ is a closest approximation to the target distribution $(p_1, \dots, p_n)$ among all entropy-optimal sampling algorithms that operate within the specified $k$-bit precision. This optimal approximate sampler is also a closer approximation than any (possibly entropy-suboptimal) sampler that consumes a bounded amount of entropy with the specified precision, a class which includes floating-point implementations of inversion sampling and related methods found in many software libraries. We evaluate the accuracy, entropy consumption, precision requirements, and wall-clock runtime of our optimal approximate sampling algorithms on a broad set of distributions, demonstrating the ways that they are superior to existing approximate samplers and establishing that they often consume significantly fewer resources than are needed by exact samplers.},
}
@phdthesis{towner2020thesis,
title = {Gen: A high-level programming platform for probabilistic inference},
author = {Cusumano-Towner, Marco},
year = 2020,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/129247/1227518338-MIT.pdf?sequence=1&isAllowed=y},
url_link = {https://dspace.mit.edu/handle/1721.1/129247},
abstract = {Probabilistic inference provides a powerful theoretical framework for engineering intelligent systems. However, diverse modeling approaches and inference algorithms are needed to navigate engineering tradeoffs between robustness, adaptability, accuracy, safety, interpretability, data efficiency, and computational efficiency. Structured generative models represented as symbolic programs provide interpretability. Structure learning of these models provides data-efficient adaptability. Uncertainty quantification is needed for safety. Bottom-up, discriminative inference provides computational efficiency. Iterative “model-in-the-loop” algorithms can improve accuracy by fine-tuning inferences and improve robustness to out-of-distribution data. Recent probabilistic programming systems fully or partially automate inference, but are too restrictive for many applications. Differentiable programming systems are also inadequate: they do not support structure learning of generative models or hybrids of “model-in-the-loop” and discriminative inference. Therefore, probabilistic inference is still often implemented by translating tedious mathematical derivations into low-level numerical programs, which are error-prone and difficult to modify and maintain. This thesis presents the design and implementation of the Gen programming platform for probabilistic inference. Gen automates the low-level implementation of probabilistic inference algorithms while remaining flexible enough to support heterogeneous algorithmic approaches and extensible enough for practical inference engineering. Gen users define their models explicitly using probabilistic programs, but instead of compiling the model directly into an inference algorithm implementation, Gen compiles the model into data types that encapsulate low-level inference operations whose semantics are derived from the model, like sampling, density evaluation, and gradients. Users write their inference application in a general-purpose programming language using Gen’s abstract data types as primitives. This thesis defines Gen’s data types and shows that they can be used to compose a variety of inference techniques including sophisticated Monte Carlo algorithms and hybrids of Monte Carlo, variational, and discriminative techniques. The same data types can be generated from multiple probabilistic programming languages that strike different expressiveness and performance tradeoffs. By decoupling probabilistic programming language implementations from inference algorithm design, Gen enables more flexible specialization of both, leading to performance improvements over existing probabilistic programming systems.},
}
@article{lew2020traces,
title = {Trace types and denotational semantics for sound programmable inference in probabilistic languages},
author = {Lew, Alexander K. and Cusumano-Towner, Marco F. and Sherman, Benjamin and Carbin, Michale and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 4,
number = {POPL},
month = jan,
year = 2020,
pages = {19:1--19:32},
articleno = 19,
numpages = 32,
publisher = {ACM},
keywords = {type systems, probabilistic programming, programmable inference},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3371087},
url_link = {https://doi.org/10.1145/3371087},
abstract = {Modern probabilistic programming languages aim to formalize and automate key aspects of probabilistic modeling and inference. Many languages provide constructs for programmable inference that enable developers to improve inference speed and accuracy by tailoring an algorithm for use with a particular model or dataset. Unfortunately, it is easy to use these constructs to write unsound programs that appear to run correctly but produce incorrect results. To address this problem, we present a denotational semantics for programmable inference in higher-order probabilistic programming languages, along with a type system that ensures that well-typed inference programs are sound by construction. A central insight is that the type of a probabilistic expression can track the space of its possible execution traces, not just the type of value that it returns, as these traces are often the objects that inference algorithms manipulate. We use our semantics and type system to establish soundness properties of custom inference programs that use constructs for variational, sequential Monte Carlo, importance sampling, and Markov chain Monte Carlo inference.},
}
@article{saad2020sppl,
author = {Saad, Feras A. and Rinard, Martin C. and Mansinghka, Vikash K.},
title = {Exact symbolic inference in probabilistic programs via sum-product representations},
year = {2020},
journal = {arXiv preprint},
volume = {arXiv:2010.03485},
url_paper = {https://arxiv.org/pdf/2010.03485.pdf},
url_link = {https://arxiv.org/abs/2010.03485},
abstract = {We present the Sum-Product Probabilistic Language (SPPL), a new system that automatically delivers exact solutions to a broad range of probabilistic inference queries. SPPL symbolically represents the full distribution on execution traces specified by a probabilistic program using a generalization of sum-product networks. SPPL handles continuous and discrete distributions, many-to-one numerical transformations, and a query language that includes general predicates on random variables. We formalize SPPL in terms of a novel translation strategy from probabilistic programs to a semantic domain of sum-product representations, present new algorithms for exactly conditioning on and computing probabilities of queries, and prove their soundness under the semantics. We present techniques for improving the scalability of translation and inference by automatically exploiting conditional independences and repeated structure in SPPL programs. We implement a prototype of SPPL with a modular architecture and evaluate it on a suite of common benchmarks, which establish that our system is up to 3500x faster than state-of-the-art systems for fairness verification; up to 1000x faster than state-of-the-art symbolic algebra techniques; and can compute exact probabilities of rare events in milliseconds.},
note = {Preprint},
}
@article{spanbauer2020imcmc,
title = {Deep involutive generative models for neural {MCMC}},
author = {Spanbauer, Span and Freer, Cameron E. and Mansinghka, Vikash. K.},
year = 2020,
journal = {arXiv preprint},
volume = {arXiv:2006.15167},
url_paper = {https://arxiv.org/pdf/2006.15167.pdf},
url_link = {https://arxiv.org/abs/2006.15167},
abstract = {We introduce deep involutive generative models, a new architecture for deep generative modeling, and use them to define Involutive Neural MCMC, a new approach to fast neural MCMC. An involutive generative model represents a probability kernel $G(\phi \mapsto \phi')$ as an involutive (i.e., self-inverting) deterministic function $f(\phi,\pi)$ on an enlarged state space containing auxiliary variables $\pi$. We show how to make these models volume preserving, and how to use deep volume-preserving involutive generative models to make valid Metropolis-Hastings updates based on an auxiliary variable scheme with an easy-to-calculate acceptance ratio. We prove that deep involutive generative models and their volume-preserving special case are universal approximators for probability kernels. This result implies that with enough network capacity and training time, they can be used to learn arbitrarily complex MCMC updates. We define a loss function and optimization algorithm for training parameters given simulated data. We also provide initial experiments showing that Involutive Neural MCMC can efficiently explore multi-modal distributions that are intractable for Hybrid Monte Carlo, and can converge faster than A-NICE-MC, a recently introduced neural MCMC technique.},
}
@article{towner2020imcmc,
title = {Automating involutive {MCMC} using probabilistic and differentiable programming},
author = {Cusumano-Towner, Marco and Lew, Alexander K. and Mansinghka, Vikash K.},
year = 2020,
journal = {arXiv preprint},
volume = {arXiv:2007.09871},
url_paper = {https://arxiv.org/pdf/2007.09871.pdf},
url_link = {https://arxiv.org/abs/2007.09871},
abstract = {Involutive MCMC is a unifying mathematical construction for MCMC kernels that generalizes many classic and state-of-the-art MCMC algorithms, from reversible jump MCMC to kernels based on deep neural networks. But as with MCMC samplers more generally, implementing involutive MCMC kernels is often tedious and error-prone, especially when sampling on complex state spaces. This paper describes a technique for automating the implementation of involutive MCMC kernels given (i) a pair of probabilistic programs defining the target distribution and an auxiliary distribution respectively and (ii) a differentiable program that transforms the execution traces of these probabilistic programs. The technique, which is implemented as part of the Gen probabilistic programming system, also automatically detects user errors in the specification of involutive MCMC kernels and exploits sparsity in the kernels for improved efficiency. The paper shows example Gen code for a split-merge reversible jump move in an infinite Gaussian mixture model and a state-dependent mixture of proposals on a combinatorial space of covariance functions for a Gaussian process.},
}
@inproceedings{lew2020thoughtppl,
title = {Leveraging unstructured statistical knowledge in a probabilistic language of thought},
author = {Lew, Alexander K. and Tessler, Michael H. and Mansinghka, Vikash K. and Tenenbaum, Joshua B.},
booktitle = {CogSci 2020: Proceedings of the Forty-Second Annual Virtual Meeting of the Cognitive Science Society},
year = 2020,
series = {Proceedings of the Annual Conference of the Cognitive Science Society},
url_link = {https://cognitivesciencesociety.org/cogsci20/papers/0520/index.html},
url_paper = {https://cognitivesciencesociety.org/cogsci20/papers/0520/0520.pdf},
abstract = {One hallmark of human reasoning is that we can bring to bear a diverse web of common-sense knowledge in any situation. The vastness of our knowledge poses a challenge for the practical implementation of reasoning systems as well as for our cognitive theories --- how do people represent their common-sense knowledge? On the one hand, our best models of sophisticated reasoning are top-down, making use primarily of symbolically-encoded knowledge. On the other, much of our understanding of the statistical properties of our environment may arise in a bottom-up fashion, for example through associationist learning mechanisms. Indeed, recent advances in AI have enabled the development of billion-parameter language models that can scour for patterns in gigabytes of text from the web, picking up a surprising amount of common-sense knowledge along the way---but they fail to learn the structure of coherent reasoning. We propose combining these approaches, by embedding language-model-backed primitives into a state-of-the-art probabilistic programming language (PPL). On two open-ended reasoning tasks, we show that our PPL models with neural knowledge components characterize the distribution of human responses more accurately than the neural language models alone, raising interesting questions about how people might use language as an interface to common-sense knowledge, and suggesting that building probabilistic models with neural language-model components may be a promising approach for more human-like AI.},
}
@mastersthesis{charchut2020thesis,
title = {Implementation of a cross-platform automated {Bayesian} data modeling system},
author = {Charchut, Nicholas},
year = 2020,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/129078/1227274665-MIT.pdf?sequence=1&isAllowed=y},
url_link = {https://dspace.mit.edu/handle/1721.1/129078},
abstract = {Understanding the underlying structure of a high-dimensional dataset is a quintessential task in data science and multivariate statistics. The CrossCat model class provides an automated solution by using Bayesian Non-Parametric processes to identify dependencies within the data without any user input necessary. This thesis provides an implementation of the CrossCat model class in the functional programming language Clojure. This implementation, called ClojureCat, was designed to be part of a probabilistic programming platform and implemented to be able to cross-compile into JavaScript, allowing complex inference procedures to be run in any JavaScript-supporting web browser. The implementation is thoroughly tested and benchmarked with respect to existing CrossCat implementations and other baselines, showing that ClojureCat is not only performant, but accurate in its implementation of Cross-Cat inference procedures. Also included in ClojureCat are several implementations of few-shot learning, in which for several real-world datasets, we utilize extremely sparse label sets and CrossCat’s learned structure of the data to draw meaningful conclusions, make predictions, and further analyze the high-dimensional data.},
}
@article{saad2019bayesian,
title = {Bayesian synthesis of probabilistic programs for automatic data modeling},
author = {Saad, Feras A. and Cusumano-Towner, Marco F. and Schaechtle, Ulrich and Rinard, Martin C. and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 3,
number = {POPL},
month = jan,
year = 2019,
pages = {37:1--37:32},
articleno = {37},
numpages = {29},
publisher = {ACM},
url_paper = {https://dl.acm.org/doi/pdf/10.1145/3290350},
url_link = {https://doi.org/10.1145/3290350},
url_video = {https://www.youtube.com/watch?v=T5fdUmYJsjM},
url_MIT_News = {https://news.mit.edu/2019/nonprogrammers-data-science-0115},
url_ZDNet_News = {https://www.zdnet.com/article/mit-aims-to-help-data-scientists-by-having-ai-do-the-heavy-lifting/},
abstract = {We present new techniques for automatically constructing probabilistic programs for data analysis, interpretation, and prediction. These techniques work with probabilistic domain-specific data modeling languages that capture key properties of a broad class of data generating processes, using Bayesian inference to synthesize probabilistic programs in these modeling languages given observed data. We provide a precise formulation of Bayesian synthesis for automatic data modeling that identifies sufficient conditions for the resulting synthesis procedure to be sound. We also derive a general class of synthesis algorithms for domain-specific languages specified by probabilistic context-free grammars and establish the soundness of our approach for these languages. We apply the techniques to automatically synthesize probabilistic programs for time series data and multivariate tabular data. We show how to analyze the structure of the synthesized programs to compute, for key qualitative properties of interest, the probability that the underlying data generating process exhibits each of these properties. Second, we translate probabilistic programs in the domain-specific language into probabilistic programs in Venture, a general-purpose probabilistic programming system. The translated Venture programs are then executed to obtain predictions of new time series data and new multivariate data records. Experimental results show that our techniques can accurately infer qualitative structure in multiple real-world data sets and outperform standard data analysis methods in forecasting and predicting new data.},
}
@inproceedings{towner2019gen,
title = {Gen: a general-purpose probabilistic programming system with programmable inference},
author = {Cusumano-Towner, Marco F. and Saad, Feras A. and Lew, Alexander K. and Mansinghka, Vikash K.},
year = 2019,
booktitle = {PLDI 2019: Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation},
pages = {221--236},
publisher = {ACM},
address = {Phoenix, Arizona},
url_link = {https://dl.acm.org/citation.cfm?id=3314221.3314642},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3314642},
url_MIT_News = {http://news.mit.edu/2019/ai-programming-gen-0626},
url_Venture_Beat_News = {https://venturebeat.com/2019/06/27/mits-gen-programming-system-allows-users-to-easily-create-computer-vision-statistical-ai-and-robotics-programs},
abstract = {Although probabilistic programming is widely used for some restricted classes of statistical models, existing systems lack the flexibility and efficiency needed for practical use with more challenging models arising in fields like computer vision and robotics. This paper introduces Gen, a general-purpose probabilistic programming system that achieves modeling flexibility and inference efficiency via several novel language constructs: (i) the generative function interface for encapsulating probabilistic models; (ii) interoperable modeling languages that strike different flexibility/efficiency trade-offs; (iii) combinators that exploit common patterns of conditional independence; and (iv) an inference library that empowers users to implement efficient inference algorithms at a high level of abstraction. We show that Gen outperforms state-of-the-art probabilistic programming systems, sometimes by multiple orders of magnitude, on diverse problems including object tracking, estimating 3D body pose from a depth image, and inferring the structure of a time series.},
}
@inproceedings{zinberg2019structured,
title = {Structured differentiable models of {3D} scenes via generative scene graphs},
author = {Zinberg, Ben and Cusumano-Towner, Marco and Mansinghka, Vikash},
booktitle = {Workshop on Perception as Generative Reasoning (co-located with NeurIPS 2019)},
year = 2019,
url_paper = {https://pgr-workshop.github.io/img/PGR025.pdf},
abstract = {Generative approaches to 3D scene perception and physical reasoning are increasingly common. There is a widespread need for scene models that can incorporate planar and point-wise contacts between physical objects, and ensure that objects do not inter-penetrate. Generic priors on 6DOF poses---and bottom up neural networks that de-render images into scenes---typically violate these constraints. This abstract introduces a family of generative models for 3D scenes that respect pairwise physical contact constraints between objects. The innovation is to represent scenes in terms of hybrid symbolic-numerical scene graphs over objects, where the edges correspond to parameterized contract relationships (see Figure 1). Given this representation, 3D poses can be generated via a graph traversal. We show how to define prior distributions on scene graphics, and how 3D scene understanding can be phrased as posterior inference over the scene graph. This abstract also shows preliminary evidence that it is possible to infer scene graph parameters from empirical data, "derendering" images into structured 3D scene representations. This is implemented using the Gen probabilistic programming language. Finally, this abstract briefly discusses representation and inference challenges that need to be addressed to scale up the approach to more complex, real-world scenes.}
}
@inproceedings{felip2019real,
title = {Real-time approximate inference for scene understanding with generative models},
author = {Felip, Javier and Ahuja, Nilesh and G{\'o}mez-Guti{\'e}rrez, David and Tickoo, Omesh and Mansinghka, Vikash},
booktitle = {Workshop on Perceptions as Generative Reasoning (co-located with NeurIPS 2019)},
year = 2019,
url_paper = {https://pgr-workshop.github.io/img/PGR019.pdf},
abstract = {Consider scene understanding problems such as predicting where a person is probably reaching, or inferring the pose of 3D objects from depth images, or inferring the probable street crossings of pedestrians. This paper shows how to solve these problems using inference in generative models, by introducing new techniques for real-time inference. The underlying generative models are built from realistic simulation software, wrapped in a Bayesian error model for the gap between simulation outputs and real data. This paper shows that it is possible to perform approximately Bayesian inference in these models, obtaining high-quality approximations to the full posterior over scenes, without using bottom-up networks. Instead, this paper introduces two general real-time inference techniques. The first is to train neural surrogates of the simulators. The second is to adaptively discretize the latent variables using a Tree-Pyramid (TP) approach. THis paper also shows that by combining these techniques, it is possible to perform accurate, approximately Bayesian inference in realistic generative models, in real time.}
}
@article{handa2019inference,
title = {Compositional inference metaprogramming with convergence guarantees},
author = {Handa, Shivam and Mansinghka, Vikash and Rinard, Martin},
year = 2019,
journal = {arXiv preprint},
volume = {arXiv:1907.05451},
url_paper = {https://arxiv.org/pdf/1907.05451.pdf},
url_link = {https://arxiv.org/abs/1907.05451},
abstract = {Inference metaprogramming enables effective probabilistic programming by supporting the decomposition of executions of probabilistic programs into subproblems and the deployment of hybrid probabilistic inference algorithms that apply different probabilistic inference algorithms to different subproblems. We introduce the concept of independent subproblem inference (as opposed to entangled subproblem inference in which the subproblem inference algorithm operates over the full program trace) and present a mathematical framework for studying convergence properties of hybrid inference algorithms that apply different Markov-Chain Monte Carlo algorithms to different parts of the inference problem. We then use this formalism to prove asymptotic convergence results for probablistic programs with inference metaprogramming. To the best of our knowledge this is the first asymptotic convergence result for hybrid probabilistic inference algorithms defined by (subproblem-based) inference metaprogramming.},
}
@article{witty2019causal,
title = {Bayesian causal inference via probabilistic program synthesis},
author = {Witty, Sam and Lew, Alexander and Jensen, David and Mansinghka, Vikash},
year = 2019,
journal = {arXiv preprint},
volume = {arXiv:1910.14124},
url_paper = {https://arxiv.org/pdf/1910.14124.pdf},
url_link = {https://arxiv.org/abs/1910.14124},
abstract = {Causal inference can be formalized as Bayesian inference that combines a prior distribution over causal models and likelihoods that account for both observations and interventions. We show that it is possible to implement this approach using a sufficiently expressive probabilistic programming language. Priors are represented using probabilistic programs that generate source code in a domain specific language. Interventions are represented using probabilistic programs that edit this source code to modify the original generative process. This approach makes it straightforward to incorporate data from atomic interventions, as well as shift interventions, variance-scaling interventions, and other interventions that modify causal structure. This approach also enables the use of general-purpose inference machinery for probabilistic programs to infer probable causal structures and parameters from data. This abstract describes a prototype of this approach in the Gen probabilistic programming language.},
}
@article {bolton2019zebra,
title = {Elements of a stochastic {3D} prediction engine in larval zebrafish prey capture},
author = {Bolton, Andrew D. and Haesemeyer, Martin and Jordi, Joshua and Schaechtle, Ulrich and Saad, Feras A. and Mansinghka, Vikash K. and Tenenbaum, Joshua B. and Engert, Florian},
editor = {Berman, Gordon J},
volume = 8,
year = 2019,
month = nov,
pages = {e51975},
journal = {eLife},
fdoi = {10.7554/eLife.51975},
url_link = {https://doi.org/10.7554/eLife.51975},
abstract = {The computational principles underlying predictive capabilities in animals are poorly understood. Here, we wondered whether predictive models mediating prey capture could be reduced to a simple set of sensorimotor rules performed by a primitive organism. For this task, we chose the larval zebrafish, a tractable vertebrate that pursues and captures swimming microbes. Using a novel naturalistic 3D setup, we show that the zebrafish combines position and velocity perception to construct a future positional estimate of its prey, indicating an ability to project trajectories forward in time. Importantly, the stochasticity in the fish's sensorimotor transformations provides a considerable advantage over equivalent noise-free strategies. This surprising result coalesces with recent findings that illustrate the benefits of biological stochasticity to adaptive behavior. In sum, our study reveals that zebrafish are equipped with a recursive prey capture algorithm, built up from simple stochastic rules, that embodies an implicit predictive model of the world.},
publisher = {eLife Sciences Publications, Ltd},
}
@inproceedings{ackerman2019barriers,
title = {Algorithmic barriers to representing conditional independence},
author = {Ackerman, Nathanael L. and Avigad, Jeremy and Freer, Cameron E. and Roy, Daniel M. and Rute, Jason M.},
booktitle = {LICS 2019: Proceedings of the 34th Annual ACM/IEEE Symposium on Logic in Computer Science},
address = {Vancouver, British Columbia},
publisher = {ACM},
year = 2019,
url_paper = {http://math.mit.edu/~freer/papers/AAFRR-algorithmic-barriers.pdf},
url_doi = {10.1109/LICS.2019.8785762},
keywords = {conditional independence, computable probability distributions, graphons, cut distance, invariant measures, random-freeness},
abstract = {We define a representation of conditional independence in terms of products of probability kernels, and ask when such representations are computable. We pursue this question in the context of exchangeable sequences and arrays of random variables, which arise in statistical contexts. Exchangeable sequences are conditionally i.i.d. by de Finetti’s theorem. Known results about the computability of de Finetti’s theorem imply that these conditional independences are computable. The conditional independences underlying exchangeable arrays are characterized by the Aldous–Hoover theorem. In the special case of adjacency matrices of undirected graphs, i.e., symmetric binary arrays, this representation theorem expresses the conditional independences in terms of graphons. We prove that there exist exchangeable random graphs that can be computably sampled but whose corresponding graphons are not computable as functions or even as L1 equivalence classes. We also give results on the approximability of graphons in certain special cases.},
}
@inproceedings{saad2019gof,
title = {A family of exact goodness-of-fit tests for high-dimensional discrete distributions},
author = {Saad, Feras A. and Freer, Cameron E. and Ackerman, Nathanael L. and Mansinghka, Vikash K.},
booktitle = {AISTATS 2019: Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics},
volume = 89,
pages = {1640--1649},
series = {Proceedings of Machine Learning Research},
address = {Naha, Okinawa, Japan},
publisher = {PMLR},
year = 2019,
url_paper = {http://proceedings.mlr.press/v89/saad19a/saad19a.pdf},
url_supplement = {http://proceedings.mlr.press/v89/saad19a/saad19a-supp.pdf},
url_link = {http://proceedings.mlr.press/v89/saad19a.html},
keywords = {hypothesis testing, statistical inference, goodness-of-fit tests},
abstract = {The objective of goodness-of-fit testing is to assess whether a dataset of observations is likely to have been drawn from a candidate probability distribution. This paper presents a rank-based family of goodness-of-fit tests that is specialized to discrete distributions on high-dimensional domains. The test is readily implemented using a simulation-based, linear-time procedure. The testing procedure can be customized by the practitioner using knowledge of the underlying data domain. Unlike most existing test statistics, the proposed test statistic is distribution-free and its exact (non-asymptotic) sampling distribution is known in closed form. We establish consistency of the test against all alternatives by showing that the test statistic is distributed as a discrete uniform if and only if the samples were drawn from the candidate distribution. We illustrate its efficacy for assessing the sample quality of approximate sampling algorithms over combinatorially large spaces with intractable probabilities, including random partitions in Dirichlet process mixture models and random lattices in Ising models.},
}
@article{ackerman2019computability,
title = {On the computability of conditional probability},
author = {Ackerman, Nathanael L. and Freer, Cameron E. and Roy, Daniel M.},
journal = {Journal of the ACM},
volume = 66,
number = 3,
month = jun,
year = 2019,
pages = {23:1--23:40},
articleno = 23,
numpages = 40,
publisher = {ACM},
address = {New York, NY, USA},
url_doi = {http://doi.acm.org/10.1145/3321699},
url_paper = {https://arxiv.org/pdf/1005.3014.pdf},
keywords = {computable probability theory, conditional probability, real computation},
abstract = {As inductive inference and machine learning methods in computer science see continued success, researchers are aiming to describe ever more complex probabilistic models and inference algorithms. It is natural to ask whether there is a universal computational procedure for probabilistic inference. We investigate the computability of conditional probability, a fundamental notion in probability theory and a cornerstone ofBayesian statistics. We show that there are computable joint distributions with noncomputable conditional distributions, ruling out the prospect of general inference algorithms, even inefficient ones. Specifically, we construct a pair of computable random variables in the unit interval such that the conditional distribution of the first variable given the second encodes the halting problem. Nevertheless, probabilistic inference is possible in many common modeling settings, and we prove several results giving broadly applicable conditions under which conditional distributions are computable. In particular, conditional distributions become computable when measurements are corrupted by independent computable noise with a sufficiently smooth bounded density},
}
@inproceedings{blackwell2019,
title = {Usability of probabilistic programming languages},
author = {Blackwell, Alan F. and Kohn, Tobias and Erwig, Martin and Baydin, Atilim G. and Church, Luke and Geddes, James and Gordon, Andy and Gorinova, Maria and Gram-Hensen, Bradley and Lawrence, Neil and Mansinghka, Vikash K. and Paige, Brooks and Petricek, Tomas and Robinson, Diana and Sarkar, Advait and Strickson, Oliver},
booktitle = {PPIG 2019: 30th Annual Meeting of the Psychology of Programming Interest Group},
year = 2019,
url_paper = {https://web.engr.oregonstate.edu/~erwig/papers/UsabilityOfPPLs_PPIG19.pdf},
abstract = {This discussion paper presents a conversation between researchers having active interests in the usability of probabilistic programming languages (PPLs), but coming from a wide range of technical and research perspectives. Although PPL development is currently a vigorous and active research field, there has been very little attention to date to basic questions in the psychology of programming. Relevant issues include mental models associated with Bayesian probability, end-user applications of PPLs, the potential for data-first interaction styles, visualisation or explanation of model structure and solver behaviour, and many others}
}
@techreport{towner2018gen,
title = {Gen: A general-purpose probabilistic programming system with programmable inference},
author = {Cusumano-Towner, Marco F. and Saad, Feras A. and Lew, Alexander and Mansinghka, Vikash K.},
year = 2018,
number = {MIT-CSAIL-TR-2018-020},
institution = {Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology},
address = {Cambridge, Massachusetts},
month = nov,
url_link = {http://hdl.handle.net/1721.1/119255},
abstract = {Probabilistic modeling and inference are central to many fields. A key challenge for wider adoption of probabilistic programming languages is designing systems that are both flexible and performant. This paper introduces Gen, a new probabilistic programming system with novel language con- structs for modeling and for end-user customization and optimization of inference. Gen makes it practical to write probabilistic programs that solve problems from multiple fields. Gen programs can combine generative models written in Julia, neural networks written in TensorFlow, and custom inference algorithms based on an extensible library of Monte Carlo and numerical optimization techniques. This paper also presents techniques that enable Gen's combination of flexibility and performance: (i) the generative function interface, an abstraction for encapsulating probabilistic and/or differentiable computations; (ii) domain-specific languages with custom compilers that strike different flexibility/performance tradeoffs; (iii) combinators that encode common patterns of conditional independence and repeated compu- tation, enabling speedups from caching; and (iv) a standard inference library that supports custom proposal distributions also written as programs in Gen. This paper shows that Gen outperforms state-of-the-art probabilistic programming systems, sometimes by multiple orders of magnitude, on problems such as nonlinear state-space modeling, structure learning for real-world time series data, robust regression, and 3D body pose estimation from depth images.},
}
@inproceedings{mansinghka2018probabilistic,
title = {Probabilistic programming with programmable inference},
author = {Mansinghka, Vikash K. and Schaechtle, Ulrich and Handa, Shivam and Radul, Alexey and Chen, Yutian and Rinard, Martin},
booktitle = {PLDI 2018: Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation},
year = 2018,
publisher = {ACM},
pages = {603--616},
url_paper = {http://acm.mementodepot.org/pubs/proceedings/acmconferences_3192366/3192366/3192366.3192409/3192366.3192409.pdf},
url_link = {https://dl.acm.org/citation.cfm?id=3192409},
abstract = {We introduce inference metaprogramming for probabilistic programming languages, including new language constructs, a formalism, and the first demonstration of effectiveness in practice. Instead of relying on rigid black-box inference algorithms hard-coded into the language implementation as in previous probabilistic programming languages, inference metaprogramming enables developers to 1) dynamically decompose inference problems into subproblems, 2) apply inference tactics to subproblems, 3) alternate between incorporating new data and performing inference over existing data, and 4) explore multiple execution traces of the probabilistic program at once. Implemented tactics include gradient-based optimization, Markov chain Monte Carlo, variational inference, and sequental Monte Carlo techniques. Inference metaprogramming enables the concise expression of probabilistic models and inference algorithms across diverse fields, such as computer vision, data science, and robotics, within a single probabilistic programming language.},
}
@inproceedings{towner2018incremental,
title = {Incremental inference for probabilistic programs},
author = {Cusumano-Towner, Marco F. and Bichsel, Benjamin and Gehr, Timon and Vechev, Martin and Mansinghka, Vikash K.},
year = 2018,
booktitle = {PLDI 2018: Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation},
pages = {571--585},
publisher = {ACM},
url_paper = {https://files.sri.inf.ethz.ch/website/papers/pldi18_interemental_inference_for_probabilistic_programs.pdf},
url_link = {https://dl.acm.org/citation.cfm?id=3192399},
abstract = {We present a novel approach for approximate sampling in probabilistic programs based on incremental inference. The key idea is to adapt the samples for a program $P$ into samples for a program $Q$, thereby avoiding the expensive sampling computation for program $Q$. To enable incremental inference in probabilistic programming, our work: (i) introduces the concept of a trace translator which adapts samples from $P$ into samples of $Q$, (ii) phrases this translation approach in the context of sequential Monte Carlo (SMC), which gives theoretical guarantees that the adapted samples converge to the distribution induced by $Q$, and (iii) shows how to obtain a concrete trace translator by establishing a correspondence between the random choices of the two probabilistic programs. We implemented our approach in two different probabilistic programming systems and showed that, compared to methods that sample the program $Q$ from scratch, incremental inference can lead to orders of magnitude increase in efficiency, depending on how closely related $P$ and $Q$ are.},
}
@inproceedings{towner2018design,
title = {A design proposal for {Gen:} Probabilistic programming with fast custom inference via code generation},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {MAPL 2018: Workshop on Machine Learning and Programming Languages (co-located with PLDI 2018)},
year = 2018,
pages = {52--57},
url_link = {https://dl.acm.org/citation.cfm?id=3211350},
abstract = {Probabilistic programming languages have the potential to make probabilistic modeling and inference easier to use in practice, but only if inference is sufficiently fast and accurate for real applications. Thus far, this has only been possible for domain-specific languages that focus on a restricted class of models and inference algorithms. This paper proposes a design for a probabilistic programming language called Gen, embedded in Julia, that aims to be sufficiently expressive and performant for general-purpose use. The language provides constructs for automatically generating optimized implementations of custom inference tactics based on static analysis of the target probabilistic model. This paper informally describes a language design for Gen, and shows that Gen is more expressive than Stan, a widely used language for hierarchical Bayesian modeling. A first benchmark shows that a prototype implementation of Gen can be as fast as Stan, only ∼1.4x slower than a hand-coded sampler in Julia, and ∼7,500x faster than Venture, one of the only other probabilistic languages with support for custom inference.}
}
@inproceedings{saad2018trcrpm,
title = {Temporally-reweighted {Chinese} restaurant process mixtures for clustering, imputing, and forecasting multivariate time series},
author = {Saad, Feras A. and Mansinghka, Vikash K.},
booktitle = {AISTATS 2018: Proceedings of the 21st International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = 84,
pages = {755--764},
publisher = {PMLR},
year = 2018,
keywords = {probabilistic inference, multivariate time series, nonparametric Bayes, structure learning},
url_paper = {http://proceedings.mlr.press/v84/saad18a/saad18a.pdf},
url_supplement = {http://proceedings.mlr.press/v84/saad18a/saad18a-supp.pdf},
url_link = {http://proceedings.mlr.press/v84/saad18a.html},
url_code = {https://github.com/probcomp/trcrpm},
abstract = {This article proposes a Bayesian nonparametric method for forecasting, imputation, and clustering in sparsely observed, multivariate time series data. The method is appropriate for jointly modeling hundreds of time series with widely varying, non-stationary dynamics. Given a collection of N time series, the Bayesian model first partitions them into independent clusters using a Chinese restaurant process prior. Within a cluster, all time series are modeled jointly using a novel “temporally-reweighted” extension of the Chinese restaurant process mixture. Markov chain Monte Carlo techniques are used to obtain samples from the posterior distribution, which are then used to form predictive inferences. We apply the technique to challenging forecasting and imputation tasks using seasonal flu data from the US Center for Disease Control and Prevention, demonstrating superior forecasting accuracy and competitive imputation accuracy as compared to multiple widely used baselines. We further show that the model discovers interpretable clusters in datasets with hundreds of time series, using macroeconomic data from the Gapminder Foundation.},
}
@inproceedings{towner2018proposals,
title = {Using probabilistic programs as proposals},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {Workshop on Probabilistic Programming Languages, Semantics, and Systems (co-located with POPL 2018)},
year = 2018,
url_paper = {https://arxiv.org/pdf/1801.03612.pdf},
url_link = {https://popl18.sigplan.org/event/pps-2018-using-probabilistic-programs-as-proposals},
keywords = {probabilistic programming, amortized inference, randomized algorithms, neural networks, proposal distribution, importance sampling, metropolis-hastings, auxiliary variable},
abstract = {Monte Carlo inference has asymptotic guarantees, but can be slow when using generic proposals. Handcrafted proposals that rely on user knowledge about the posterior distribution can be efficient, but are difficult to derive and implement. This paper proposes to let users express their posterior knowledge in the form of proposal programs, which are samplers written in probabilistic programming languages. One strategy for writing good proposal programs is to combine domain-specific heuristic algorithms with neural network models. The heuristics identify high probability regions, and the neural networks model the posterior uncertainty around the outputs of the algorithm. Proposal programs can be used as proposal distributions in importance sampling and Metropolis-Hastings samplers without sacrificing asymptotic consistency, and can be optimized offline using inference compilation. Support for optimizing and using proposal programs is easily implemented in a sampling-based probabilistic programming runtime. The paper illustrates the proposed technique with a proposal program that combines RANSAC and neural networks to accelerate inference in a Bayesian linear regression with outliers model.},
}
@inproceedings{towner2017aide,
title = {{AIDE:} An algorithm for measuring the accuracy of probabilistic inference algorithms},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {NIPS 2017: Advances in Neural Information Processing Systems 30},
year = 2017,
pages = {3004--3014},
publisher = {Curran Associates},
url_paper = {https://papers.nips.cc/paper/6893-aide-an-algorithm-for-measuring-the-accuracy-of-probabilistic-inference-algorithms.pdf},
url_supplement = {https://papers.nips.cc/paper/6893-aide-an-algorithm-for-measuring-the-accuracy-of-probabilistic-inference-algorithms-supplemental.zip},
url_link = {https://papers.nips.cc/paper/6893-aide-an-algorithm-for-measuring-the-accuracy-of-probabilistic-inference-algorithms},
url_code = {https://github.com/probcomp/nips2017-aide-experiments},
keywords = {probabilistic inference, sequential monte carlo, variational inference, approximate inference, kullback-leibler},
abstract = {Approximate probabilistic inference algorithms are central to many fields. Examples include sequential Monte Carlo inference in robotics, variational inference in machine learning, and Markov chain Monte Carlo inference in statistics. A key problem faced by practitioners is measuring the accuracy of an approximate inference algorithm on a specific dataset. This paper introduces the auxiliary inference divergence estimator (AIDE), an algorithm for measuring the accuracy of approximate inference algorithms. AIDE is based on the observation that inference algorithms can be treated as probabilistic models and the random variables used within the inference algorithm can be viewed as auxiliary variables. This view leads to a new estimator for the symmetric KL divergence between the output distributions of two inference algorithms. The paper illustrates application of AIDE to algorithms for inference in regression, hidden Markov, and Dirichlet process mixture models. The experiments show that AIDE captures the qualitative behavior of a broad class of inference algorithms and can detect failure modes of inference algorithms that are missed by standard heuristics.},
}
@inproceedings{saad2017dependencies,
title = {Detecting dependencies in sparse, multivariate databases using probabilistic programming and non-parametric {Bayes}},
author = {Feras Saad and Vikash Mansinghka},
booktitle = {AISTATS 2017: Proceedings of the 20th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = 54,
pages = {632--641},
publisher = {PMLR},
year = 2017,
address = {Fort Lauderdale, Florida},
url_paper = {http://proceedings.mlr.press/v54/saad17a/saad17a.pdf},
url_supplement = {http://proceedings.mlr.press/v54/saad17a/saad17a-supp.pdf},
url_link = {http://proceedings.mlr.press/v54/saad17a.html},
abstract = {Datasets with hundreds of variables and many missing values are commonplace. In this setting, it is both statistically and computationally challenging to detect true predictive relationships between variables and also to suppress false positives. This paper proposes an approach that combines probabilistic programming, information theory, and non-parametric Bayes. It shows how to use Bayesian non-parametric modeling to (i) build an ensemble of joint probability models for all the variables; (ii) efficiently detect marginal independencies; and (iii) estimate the conditional mutual information between arbitrary subsets of variables, subject to a broad class of constraints. Users can access these capabilities using BayesDB, a probabilistic programming platform for probabilistic data analysis, by writing queries in a simple, SQL-like language. This paper demonstrates empirically that the method can (i) detect context-specific (in)dependencies on challenging synthetic problems and (ii) yield improved sensitivity and specificity over baselines from statistics and machine learning, on a real-world database of over 300 sparsely observed indicators of macroeconomic development and public health.},
keywords = {dependence detection, probabilistic programming, nonparametric Bayes},
}
@article{schaechtle2017timeseries,
title = {Time series structure discovery via probabilistic program synthesis},
author = {Schaechtle, Ulrich and Saad, Feras and Radul, Alexey and Mansinghka, Vikash K.},
year = 2017,
journal = {arXiv preprint},
volume = {arXiv:1611.07051},
url_paper = {https://arxiv.org/pdf/1611.07051.pdf},
url_link = {https://arxiv.org/abs/1611.07051},
keywords = {probabilistic programming, program synthesis, structure discovery},
abstract = {There is a widespread need for techniques that can discover structure from time series data. Recently introduced techniques such as Automatic Bayesian Covariance Discovery (ABCD) provide a way to find structure within a single time series by searching through a space of covariance kernels that is generated using a simple grammar. While ABCD can identify a broad class of temporal patterns, it is difficult to extend and can be brittle in practice. This paper shows how to extend ABCD by formulating it in terms of probabilistic program synthesis. The key technical ideas are to (i) represent models using abstract syntax trees for a domain-specific probabilistic language, and (ii) represent the time series model prior, likelihood, and search strategy using probabilistic programs in a sufficiently expressive language. The final probabilistic program is written in under 70 lines of probabilistic code in Venture. The paper demonstrates an application to time series clustering that involves a non-parametric extension to ABCD, experiments for interpolation and extrapolation on real-world econometric data, and improvements in accuracy over both non-parametric and standard regression baselines.},
}
@article{saad2017search,
title = {Probabilistic search for structured data via probabilistic programming and nonparametric {Bayes}},
author = {Saad, Feras and Casarsa, Leonardo and Mansinghka, Vikash K.},
year = 2017,
journal = {arXiv preprint},
volume = {arXiv:1704.01087},
url_paper = {https://arxiv.org/pdf/1704.01087.pdf},
url_link = {https://arxiv.org/abs/1704.01087},
keywords = {probabilistic search, nonparametric Bayes},
abstract = {Databases are widespread, yet extracting relevant data can be difficult. Without substantial domain knowledge, multivariate search queries often return sparse or uninformative results. This paper introduces an approach for searching structured data based on probabilistic programming and nonparametric Bayes. Users specify queries in a probabilistic language that combines standard SQL database search operators with an information theoretic ranking function called predictive relevance. Predictive relevance can be calculated by a fast sparse matrix algorithm based on posterior samples from CrossCat, a nonparametric Bayesian model for high-dimensional, heterogeneously-typed data tables. The result is a flexible search technique that applies to a broad class of information retrieval problems, which we integrate into BayesDB, a probabilistic programming platform for probabilistic data analysis. This paper demonstrates applications to databases of US colleges, global macroeconomic indicators of public health, and classic cars. We found that human evaluators often prefer the results from probabilistic search to results from a standard baseline.},
}
@article{towner2017agents,
title = {Probabilistic programs for inferring the goals of autonomous agents},
author = {Cusumano-Towner, Marco F. and Radul, Alexey and Wingate, David and Mansinghka, Vikash K.},
journal = {arXiv preprint},
volume = {arXiv:1704.04977},
year = {2017},
url_paper = {https://arxiv.org/pdf/1704.04977.pdf},
url_link = {https://arxiv.org/abs/1704.04977},
keywords = {probabilistic goal inference, probabilistic programming, probabilistic robotics},
abstract = {Intelligent systems sometimes need to infer the probable goals of people, cars, and robots, based on partial observations of their motion. This paper introduces a class of probabilistic programs for formulating and solving these problems. The formulation uses randomized path planning algorithms as the basis for probabilistic models of the process by which autonomous agents plan to achieve their goals. Because these path planning algorithms do not have tractable likelihood functions, new inference algorithms are needed. This paper proposes two Monte Carlo techniques for these "likelihood-free" models, one of which can use likelihood estimates from neural networks to accelerate inference. The paper demonstrates efficacy on three simple examples, each using under 50 lines of probabilistic code.},
}
@inproceedings{towner2017encapsulating,
title = {Encapsulating models and approximate inference programs in probabilistic modules},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {Workshop on Probabilistic Programming Semantics (co-located with POPL 2017)},
year = 2017,
url_paper = {https://arxiv.org/pdf/1612.04759.pdf},
url_link = {http://pps2017.soic.indiana.edu/2017/01/13/encapsulating-models-and-approximate-inference-programs-in-probabilistic-modules/},
abstract = {This paper introduces the probabilistic module interface, which allows encapsulation of complex probabilistic models with latent variables alongside custom stochastic approximate inference machinery, and provides a platform-agnostic abstraction barrier separating the model internals from the host probabilistic inference system. The interface can be seen as a stochastic generalization of a standard simulation and density interface for probabilistic primitives. We show that sound approximate inference algorithms can be constructed for networks of probabilistic modules, and we demonstrate that the interface can be implemented using learned stochastic inference networks and MCMC and SMC approximate inference programs.},
}
@article{saeedi2017variational,
title = {Variational particle approximations},
author = {Saeedi, Ardavan and Kulkarni, Tejas D. and Mansinghka, Vikash K. and Gershman, Samuel J.},
journal = {Journal of Machine Learning Research},
volume = {18},
number = {69},
pages = {1--29},
year = 2017,
url_paper = {http://jmlr.org/papers/volume18/15-615/15-615.pdf},
url_link = {http://jmlr.org/papers/v18/15-615.html},
abstract = {Approximate inference in high-dimensional, discrete probabilistic models is a central problem in computational statistics and machine learning. This paper describes discrete particle variational inference (DPVI), a new approach that combines key strengths of Monte Carlo, variational and search-based techniques. DPVI is based on a novel family of particle-based variational approximations that can be fit using simple, fast, deterministic search techniques. Like Monte Carlo, DPVI can handle multiple modes, and yields exact results in a well-defined limit. Like unstructured mean-field, DPVI is based on optimizing a lower bound on the partition function; when this quantity is not of intrinsic interest, it facilitates convergence assessment and debugging. Like both Monte Carlo and combinatorial search, DPVI can take advantage of factorization, sequential structure, and custom search operators. This paper defines DPVI particle-based approximation family and partition function lower bounds, along with the sequential DPVI and local DPVI algorithm templates for optimizing them. DPVI is illustrated and evaluated via experiments on lattice Markov Random Fields, nonparametric Bayesian mixtures and block-models, and parametric as well as non-parametric hidden Markov models. Results include applications to real-world spike-sorting and relational modeling problems, and show that DPVI can offer appealing time/accuracy trade-offs as compared to multiple alternatives.},
}
@inproceedings{saad2016probabilistic,
title = {A probabilistic programming approach to probabilistic data analysis},
author = {Saad, Feras and Mansinghka, Vikash K.},
booktitle = {NIPS 2016: Advances in Neural Information Processing Systems 29},
pages = {2011--2019},
year = 2016,
publisher = {Curran Associates},
url_paper = {https://papers.nips.cc/paper/6060-a-probabilistic-programming-approach-to-probabilistic-data-analysis.pdf},
url_link = {https://papers.nips.cc/paper/6060-a-probabilistic-programming-approach-to-probabilistic-data-analysis},
abstract = {Probabilistic techniques are central to data analysis, but different approaches can be challenging to apply, combine, and compare. This paper introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include discriminative machine learning, hierarchical Bayesian models, multivariate kernel methods, clustering algorithms, and arbitrary probabilistic programs. We demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling definition language and structured query language. The practical value is illustrated in two ways. First, the paper describes an analysis on a database of Earth satellites, which identifies records that probably violate Kepler’s Third Law by composing causal probabilistic programs with nonparametric Bayes in 50 lines of probabilistic code. Second, it reports the lines of code and accuracy of CGPMs compared with baseline solutions from standard machine learning libraries.},
}
@inproceedings{schaechtle2016gp,
title = {Gaussian process structure learning via probabilistic inverse compilation},
author = {Schaechtle, Ulrich and Mansinghka, Vikash K.},
year = 2016,
booktitle = {Workshop on Interpretable Machine Learning in Complex Systems (co-located with NIPS 2016)},
url_paper = {https://arxiv.org/pdf/1611.07051v1.pdf},
}
@inproceedings{towner2016measuring,
title = {Measuring the non-asymptotic convergence of sequential {Monte} {Carlo} samplers using probabilistic programming},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
year = 2016,
booktitle = {Workshop on Approximate Inference (co-located with NIPS 2016)},
url_paper = {https://arxiv.org/pdf/1612.02161v1.pdf},
}
@inproceedings{curlette2016monitoring,
title = {Monitoring the errors of discriminative models with probabilistic programming},
author = {Curlette, Christina and Schaechtle, Ulrich and Mansinghka, Vikash K.},
booktitle = {Workshop on Reliable Machine Learning in the Wild (co-located with NIPS 2016)},
url_paper = {https://sites.google.com/site/wildml2016nips/CurlettePaper.pdf},
year = 2016,
}
@article{mansinghka2016crosscat,
title = {Crosscat: A fully {Bayesian}, nonparametric method for analyzing heterogeneous, high-dimensional data},
author = {Mansinghka, Vikash K. and Shafto, Patrick and Jonas, Eric and Petschulat, Cap and Gasner, Max and Tenenbaum, Joshua B.},
year = 2016,
journal = {Journal of Machine Learning Research},
volume = {17},
number = {138},
pages = {1-49},
url_paper = {http://jmlr.org/papers/volume17/11-392/11-392.pdf},
url_link = {http://jmlr.org/papers/v17/11-392.html},
abstract = {There is a widespread need for statistical methods that can analyze high-dimensional datasets without imposing restrictive or opaque modeling assumptions. This paper describes a domain-general data analysis method called CrossCat. CrossCat infers multiple non-overlapping views of the data, each consisting of a subset of the variables, and uses a separate nonparametric mixture to model each view. CrossCat is based on approximately Bayesian inference in a hierarchical, nonparametric model for data tables. This model consists of a Dirichlet process mixture over the columns of a data table in which each mixture component is itself an independent Dirichlet process mixture over the rows; the inner mixture components are simple parametric models whose form depends on the types of data in the table. CrossCat combines strengths of mixture modeling and Bayesian network structure learning. Like mixture modeling, CrossCat can model a broad class of distributions by positing latent variables, and produces representations that can be efficiently conditioned and sampled from for prediction. Like Bayesian networks, CrossCat represents the dependencies and independencies between variables, and thus remains accurate when there are multiple statistical signals. Inference is done via a scalable Gibbs sampling scheme; this paper shows that it works well in practice. This paper also includes empirical results on heterogeneous tabular data of up to 10 million cells, such as hospital cost and quality measures, voting records, unemployment rates, gene expression measurements, and images of handwritten digits. CrossCat infers structure that is consistent with accepted findings and common-sense knowledge in multiple domains and yields predictive accuracy competitive with generative, discriminative, and model-free alternatives.},
}
@article{saad2016analysis,
title = {Probabilistic data analysis with probabilistic programming},
author = {Saad, Feras and Mansinghka, Vikash K.},
year = 2016,
journal = {arXiv preprint},
volume = {arXiv:1608.05347},
url_paper = {https://arxiv.org/pdf/1608.05347.pdf},
url_link = {https://arxiv.org/abs/1608.05347},
abstract = {Probabilistic techniques are central to data analysis, but different approaches can be difficult to apply, combine, and compare. This paper introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include hierarchical Bayesian models, multivariate kernel methods, discriminative machine learning, clustering algorithms, dimensionality reduction, and arbitrary probabilistic programs. We also demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling language and a structured query language. The practical value is illustrated in two ways. First, CGPMs are used in an analysis that identifies satellite data records which probably violate Kepler’s Third Law, by composing causal probabilistic programs with non-parametric Bayes in under 50 lines of probabilistic code. Second, for several representative data analysis tasks, we report on lines of code and accuracy measurements of various CGPMs, plus comparisons with standard baseline solutions from Python and MATLAB libraries.},
}
@article{towner2016quantifying,
title = {Quantifying the probable approximation error of probabilistic inference programs},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
year = 2016,
journal = {arXiv preprint},
volume = {arXiv:1606.00068},
url_paper = {https://arxiv.org/pdf/1606.00068.pdf},
url_link = {https://arxiv.org/abs/1606.00068},
abstract = {This paper introduces a new technique for quantifying the approximation error of a broad class of probabilistic inference programs, including ones based on both variational and Monte Carlo approaches. The key idea is to derive a subjective bound on the symmetrized KL divergence between the distribution achieved by an approximate inference program and its true target distribution. The bound’s validity (and subjectivity) rests on the accuracy of two auxiliary probabilistic programs: (i) a “reference” inference program that defines a gold standard of accuracy and (ii) a “meta-inference” program that answers the question “what internal random choices did the original approximate inference program probably make given that it produced a particular result?” The paper includes empirical results on inference problems drawn from linear regression, Dirichlet process mixture modeling, HMMs, and Bayesian networks. The experiments show that the technique is robust to the quality of the reference inference program and that it can detect implementation bugs that are not apparent from predictive performance.},
}
@mastersthesis{saad2016thesis,
title = {Probabilistic data analysis with probabilistic programming},
author = {Saad, Feras A.},
year = 2016,
school = {Massachusetts Institute of Technology},
url_link = {https://dspace.mit.edu/handle/1721.1/113164},
abstract = {Probabilistic techniques are central to data analysis, but dierent approaches can be challenging to apply, combine, and compare. This thesis introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include hierarchical Bayesian models, multivariate kernel methods, discriminative machine learning, clustering algorithms, dimensionality reduction, and arbitrary probabilistic programs. We also demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling language and a structured query language. The practical value is illustrated in two ways. First, CGPMs are used in an analysis that identifies satellite data records which probably violate Kepler's Third Law, by composing causal probabilistic programs with non-parametric Bayes in under 50 lines of probabilistic code. Second, for several representative data analysis tasks, we report on lines of code and accuracy measurements of various CGPMs, plus comparisons with standard baseline solutions from Python and MATLAB libraries.},
note = {MIT Charles and Jennifer Johnson Computer Science Master of Engineering Thesis Award, First Place.}
}
@mastersthesis{lu2016thesis,
title = {Venture: An extensible platform for probabilistic meta-programming},
author = {Lu, Anthony},
year = 2016,
school = {Massachusetts Institute of Technology},
url_link = {https://dspace.mit.edu/handle/1721.1/113160},
abstract = {This thesis describes Venture, an extensible platform for probabilistic meta-programming. In Venture, probabilistic generative models, probability density functions, and probabilistic inference algorithms are all firstclass objects. Any Venture program that makes random choices can be treated as a probabilistic model defined over the space of possible executions of the program. Such probabilistic model programs can also be run while recording the random choices that they make. Modeling and inference in Venture involves two additional classes of probabilistic programs. The first, probability density meta-programs partially describe the input-output behavior of probabilistic model programs. The second, stochastic inference meta-programs identify probable executions of model programs given stochastic constraints, and typically use density metaprograms as guides. Unlike other probabilistic programming platforms, Venture allows model programs, density meta-programs, and inference meta-programs to be written as user-space code in a single probabilistic programming language. Venture is essentially a Lisp-like higher-order language augmented with two novel abstractions: (i) probabilistic execution traces, a first-class object that represents the sequence of random choices that a probabilistic program makes, and (ii) stochastic procedures, which encapsulate the probabilistic programs and meta-programs needed to allow simple probability distributions, user-space VentureScript programs, and foreign probabilistic programs to be treated uniformly as components of probabilistic computations. Venture also provides runtime support for stochastic regeneration of execution trace fragments that makes use of the programs and meta-programs of all stochastic procedures invoked during the execution of the original traced program. This thesis describes a new prototype implementation of Venture incorporating these ideas and illustrates the flexibility of Venture by giving concise user-space implementations of primitives and inference strategies that have been built in to Church as well as other probabilistic languages.},
}
@inproceedings{kulkarni2015picture,
title = {Picture: A probabilistic programming language for scene perception},
author = {Kulkarni, Tejas and Kohli, Pushmeet and Tenenbaum, Joshua B. and Mansinghka, Vikash K.},
booktitle = {CVPR 2015: IEEE Conference on Computer Vision and Pattern Recognition},
pages = {4390--4399},
year = 2015,
publisher = {IEEE},
url_paper = {https://mrkulk.github.io/www_cvpr15/1999.pdf},
url_supplement = {https://mrkulk.github.io/www_cvpr15/1999-supp.zip},
abstract = {Recent progress on probabilistic modeling and statistical learning, coupled with the availability of large training datasets, has led to remarkable progress in computer vision. Generative probabilistic models, or “analysis-by-synthesis” approaches, can capture rich scene structure but have been less widely applied than their discriminative counterparts, as they often require considerable problem-specific engineering in modeling and inference, and inference is typically seen as requiring slow, hypothesize-and-test Monte Carlo methods. Here we present Picture, a probabilistic programming language for scene understanding that allows researchers to express complex generative vision models, while automatically solving them using fast general-purpose inference machinery. Picture provides a stochastic scene language that can express generative models for arbitrary 2D/3D scenes, as well as a hierarchy of representation layers for comparing scene hypotheses with observed images by matching not simply pixels, but also more abstract features (e.g., contours, deep neural network activations). Inference can flexibly integrate advanced Monte Carlo strategies with fast bottomup data-driven methods. Thus both representations and inference strategies can build directly on progress in discriminatively trained systems to make generative vision more robust and efficient. We use Picture to write programs for 3D face analysis, 3D human pose estimation, and 3D object reconstruction – each competitive with specially engineered baselines.},
}
@inproceedings{demeent2015particle,
title = {Particle {Gibbs} with ancestor sampling for probabilistic programs},
author = {{van de Meent}, Jan-Willem and Yang, Hongseok and Mansinghka, Vikash K. and Wood, Frank},
year = 2015,
booktitle = {AISTATS 2015: Proceedings of the 18th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = {38},
pages = {986--994},
publisher = {PMLR},
url_paper = {http://www.jmlr.org/proceedings/papers/v38/vandemeent15.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v38/vandemeent15.html},
abstract = {Particle Markov chain Monte Carlo techniques rank among current state-of-the-art methods for probabilistic program inference. A drawback of these techniques is that they rely on importance resampling, which results in degenerate particle trajectories and a low effective sample size for variables sampled early in a program. We here develop a formalism to adapt ancestor resampling, a technique that mitigates particle degeneracy, to the probabilistic programming setting. We present empirical results that demonstrate nontrivial performance gains.},
}
@inproceedings{huggins2015jump,
title = {{JUMP-means:} Small variance asymptotics for {M}arkov jump processes},
author = {Huggins, Jonathan and Narasimhan, Karthik and Saeedi, Aradavan and Mansinghka, Vikash K.},
booktitle = {ICML 2015: International Conference on Machine Learning},
series = {Proceedings of Machine Learning Research},
volume = {37},
year = 2015,
pages = {693--701},
url_paper = {http://www.jmlr.org/proceedings/papers/v37/hugginsa15.pdf},
url_supplement = {http://www.jmlr.org/proceedings/papers/v37/hugginsa15-supp.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v37/hugginsa15.html},
abstract = {Markov jump processes (MJPs) are used to model a wide range of phenomena from disease progression to RNA path folding. However, maximum likelihood estimation of parametric models leads to degenerate trajectories and inferential performance is poor in nonparametric models. We take a small-variance asymptotics (SVA) approach to overcome these limitations. We derive the small-variance asymptotics for parametric and nonparametric MJPs for both directly observed and hidden state models. In the parametric case we obtain a novel objective function which leads to non-degenerate trajectories. To derive the nonparametric version we introduce the gamma-gamma process, a novel extension to the gamma-exponential process. We propose algorithms for each of these formulations, which we call JUMP-means. Our experiments demonstrate that JUMP-means is competitive with or outperforms widely used MJP inference approaches in terms of both speed and reconstruction accuracy.},
}
@article{mansinghka2015bayesdb,
title = {{BayesDB:} A probabilistic programming system for querying the probable implications of data},
author = {Mansinghka, Vikash K. and Tibbetts, Richard and Baxter, Jay and Shafto, Pat and Eaves, Baxter},
year = 2015,
journal = {arXiv preprint},
volume = {arXiv:1512.05006},
url_paper = {https://arxiv.org/pdf/1512.05006.pdf},
url_link = {https://arxiv.org/abs/1512.05006},
abstract = {Is it possible to make statistical inference broadly accessible to non-statisticians without sacrificing mathematical rigor or inference quality? This paper describes BayesDB, a probabilistic programming platform that aims to enable users to query the probable implications of their data as directly as SQL databases enable them to query the data itself. This paper focuses on four aspects of BayesDB: (i) BQL, an SQL-like query language for Bayesian data analysis, that answers queries by averaging over an implicit space of probabilistic models; (ii) techniques for implementing BQL using a broad class of multivariate probabilistic models; (iii) a semi-parametric Bayesian model-builder that auomatically builds ensembles of factorial mixture models to serve as baselines; and (iv) MML, a “meta-modeling” language for imposing qualitative constraints on the model-builder and combining baseline models with custom algorithmic and statistical models that can be implemented in external software. BayesDB is illustrated using three applications: cleaning and exploring a public database of Earth satellites; assessing the evidence for temporal dependence between macroeconomic indicators; and analyzing a salary survey.},
}
@article{schaechtle2015memoization,
title = {Probabilistic programming with {Gaussian} process memoization},
author = {Schaechtle, Ulrich and Zinberg, Ben and Radul, Alexey and Stathis, Kostas and Mansinghka, Vikash K.},
year = 2015,
journal = {arXiv preprint},
volume = {arXiv:1512.05665},
url_paper = {https://arxiv.org/pdf/1512.05665v2.pdf},
url_link = {https://arxiv.org/abs/1512.05665v2},
abstract = {Gaussian Processes (GPs) are widely used tools in statistics, machine learning, robotics, computer vision, and scientific computation. However, despite their popularity, they can be difficult to apply; all but the simplest classification or regression applications require specification and inference over complex covariance functions that do not admit simple analytical posteriors. This paper shows how to embed Gaussian processes in any higherorder probabilistic programming language, using an idiom based on memoization, and demonstrates its utility by implementing and extending classic and state-of-the-art GP applications. The interface to Gaussian processes, called gpmem, takes an arbitrary real-valued computational process as input and returns a statistical emulator that automatically improve as the original process is invoked and its input-output behavior is recorded. The flexibility of gpmem is illustrated via three applications: (i) Robust GP regression with hierarchical hyper-parameter learning, (ii) discovering symbolic expressions from time-series data by fully Bayesian structure learning over kernels generated by a stochastic grammar, and (iii) a bandit formulation of Bayesian optimization with automatic inference and action selection. All applications share a single 50-line Python library and require fewer than 20 lines of probabilistic code each.},
}
@article{chen2015sublinear,
title = {Sublinear-time approximate {MCMC} transitions for probabilistic programs},
author = {Chen, Yutian and Mansinghka, Vikash K. and Ghahramani, Zoubin},
year = 2015,
journal = {arXiv preprint},
volume = {arXiv:1411.1690},
url_paper = {https://arxiv.org/pdf/1411.1690.pdf},
url_link = {https://arxiv.org/abs/1411.1690},
abstract = {Probabilistic programming languages can simplify the development of machine learning techniques, but only if inference is sufficiently scalable. Unfortunately, Bayesian parameter estimation for highly coupled models such as regressions and state-space models still scales poorly; each MCMC transition takes linear time in the number of observations. This paper describes a sublinear-time algorithm for making Metropolis-Hastings (MH) updates to latent variables in probabilistic programs. The approach generalizes recently introduced approximate MH techniques: instead of subsampling data items assumed to be independent, it subsamples edges in a dynamically constructed graphical model. It thus applies to a broader class of problems and interoperates with other general-purpose inference techniques. Empirical results, including confirmation of sublinear per-transition scaling, are presented for Bayesian logistic regression, nonlinear classification via joint Dirichlet process mixtures, and parameter estimation for stochastic volatility models (with state estimation via particle MCMC). All three applications use the same implementation, and each requires under 20 lines of probabilistic code.},
}
@mastersthesis{zinberg2015thesis,
title = {Bayesian optimization as a probabilistic meta-program},
author = {Zinberg, Benjamin},
year = 2015,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/106374/967346485-MIT.pdf?sequence=1},
url_link = {https://dspace.mit.edu/handle/1721.1/106374},
abstract = {This thesis answers two questions: 1. How should probabilistic programming languages incorporate Gaussian processes? and 2. Is it possible to write a probabilistic meta-program for Bayesian optimization, a probabilistic meta-algorithm that can combine regression frameworks such as Gaussian processes with a broad class of parameter estimation and optimization techniques? We answer both questions affirmatively, presenting both an implementation and informal semantics for Gaussian process models in probabilistic programming systems, and a probabilistic meta-program for Bayesian optimization. The meta-program exposes modularity common to a wide range of Bayesian optimization methods in a way that is not apparent from their usual treatment in statistics.},
}
@inproceedings{wood2014anglican,
title = {A new approach to probabilistic programming inference},
author = {Wood, Frank and {van de Meent}, Jan-Willem and Mansinghka, Vikash K.},
booktitle = {AISTATS 2014: Proceedings of the 17th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = {33},
year = 2014,
publisher = {PMLR},
pages = {1024--1032},
url_paper = {http://www.jmlr.org/proceedings/papers/v33/wood14.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v33/wood14.html},
abstract = {We introduce and demonstrate a new approach to inference in expressive probabilistic programming languages based on particle Markov chain Monte Carlo. Our approach is simple to implement and easy to parallelize. It applies to Turing-complete probabilistic programming languages and supports accurate inference in models that make use of complex control flow, including stochastic recursion. It also includes primitives from Bayesian nonparametric statistics. Our experiments show that this approach can be more efficient than previously introduced single-site Metropolis-Hastings methods.},
}
@inproceedings{saeedi2014automatic,
title = {Automatic inference for inverting software simulators via probabilistic programming},
author = {Saeedi,Ardavan and Firoiu, Vlad and Mansinghka, Vikash K.},
booktitle = {Workshop on Automatic Machine Learning (co-located with ICML 2014)},
year = 2014,
url_paper = {https://arxiv.org/pdf/1506.00308.pdf},
url_link = {https://arxiv.org/pdf/1506.00308.html},
abstract = {Models of complex systems are often formalized as sequential software simulators: computationally intensive programs that iteratively build up probable system configurations given parameters and initial conditions. These simulators enable modelers to capture effects that are difficult to characterize analytically or summarize statistically. However, in many realworld applications, these simulations need to be inverted to match the observed data. This typically requires the custom design, derivation and implementation of sophisticated inversion algorithms. Here we give a framework for inverting a broad class of complex software simulators via probabilistic programming and automatic inference, using under 20 lines of probabilistic code. Our approach is based on a formulation of inversion as approximate inference in a simple sequential probabilistic model. We implement four inference strategies, including Metropolis-Hastings, a sequentialized Metropolis-Hastings scheme, and a particle Markov chain Monte Carlo scheme, requiring 4 or fewer lines of probabilistic code each. We demonstrate our framework by applying it to invert a real geological software simulator from the oil and gas industry.},
}
@article{mansinghka2014venture,
title = {Venture: A higher-order probabilistic programming platform with programmable inference},
author = {Mansinghka, Vikash K. and Selsam, Daniel and Perov, Yura},
year = 2014,
journal = {arXiv preprint},
volume = {arXiv:1404.0099},
url_paper = {https://arxiv.org/pdf/1404.0099.pdf},
url_link = {https://arxiv.org/abs/1404.0099},
abstract = {We describe Venture, an interactive virtual machine for probabilistic programming that aims to be sufficiently expressive, extensible, and efficient for general-purpose use. Like Church, probabilistic models and inference problems in Venture are specified via a Turing-complete, higher-order probabilistic language descended from Lisp. Unlike Church, Venture also provides a compositional language for custom inference strategies, assembled from scalable implementations of several exact and approximate techniques. Venture is thus applicable to problems involving widely varying model families, dataset sizes and runtime/accuracy constraints. We also describe four key aspects of Venture’s implementation that build on ideas from probabilistic graphical models. First, we describe the stochastic procedure interface (SPI) that specifies and encapsulates primitive random variables, analogously to conditional probability tables in a Bayesian network. The SPI supports custom control flow, higher-order probabilistic procedures, partially exchangeable sequences and “likelihood-free” stochastic simulators, all with custom proposals. It also supports the integration of external models that dynamically create, destroy and perform inference over latent variables hidden from Venture. Second, we describe probabilistic execution traces (PETs), which represent execution histories of Venture programs. Like Bayesian networks, PETs capture conditional dependencies, but PETs also represent existential dependencies and exchangeable coupling. Third, we describe partitions of execution histories called scaffolds that can be efficiently constructed from PETs and that factor global inference problems into coherent sub-problems. Finally, we describe a family of stochastic regeneration algorithms for efficiently modifying PET fragments contained within scaffolds without visiting conditionally independent random choices. Stochastic regeneration insulates inference algorithms from the complexities introduced by changes in execution structure, with runtime that scales linearly in cases where previous approaches often scaled quadratically and were therefore impractical. We show how to use stochastic regeneration and the SPI to implement general-purpose inference strategies such as Metropolis-Hastings, Gibbs sampling, and blocked proposals based on hybrids with both particle Markov chain Monte Carlo and mean-field variational inference techniques.},
}
@article{mansinghka2014machines,
title = {Building fast {Bayesian} computing machines out of intentionally stochastic parts},
author = {Mansinghka, Vikash K. and Jonas, Eric},
year = 2014,
journal = {arXiv preprint},
volume = {arXiv:1402:4914},
url_paper = {http://probcomp.csail.mit.edu/wordpress/wp-content/uploads/2020/01/1402.4914_withSupplement_Jan20.pdf},
url_link = {http://probcomp.csail.mit.edu/wordpress/wp-content/uploads/2020/01/1402.4914_withSupplement_Jan20.pdf},
abstract = {The brain interprets ambiguous sensory information faster and more reliably than modern computers, using neurons that are slower and less reliable than logic gates. But Bayesian inference, which underpins many computational models of perception and cognition, appears computationally challenging even given modern transistor speeds and energy budgets. The computational principles and structures needed to narrow this gap are unknown. Here we show how to build fast Bayesian computing machines using intentionally stochastic, digital parts, narrowing this efficiency gap by multiple orders of magnitude. We find that by connecting stochastic digital components according to simple mathematical rules, one can build massively parallel, low precision circuits that solve Bayesian inference problems and are compatible with the Poisson firing statistics of cortical neurons. We evaluate circuits for depth and motion perception, perceptual learning and causal reasoning, each performing inference over 10,000+ latent variables in real time — a 1,000x speed advantage over commodity microprocessors. These results suggest a new role for randomness in the engineering and reverse-engineering of intelligent computation.},
}
@inproceedings{mansinghka2013gpgp,
title = {Approximate {Bayesian} image interpretation using generative probabilistic graphics programs},
author = {Mansinghka, Vikash K. and Kulkarni, Tejas and Perov, Yura and Tenenbaum, Joshua B.},
booktitle = {NIPS 2013: Advances in Neural Information Processing Systems 26},
year = 2013,
pages = {1520--1528},
publisher = {Curran Associates},
url_paper = {http://papers.nips.cc/paper/4881-approximate-bayesian-image-interpretation-using-generative-probabilistic-graphics-programs.pdf},
url_link = {http://papers.nips.cc/paper/4881-approximate-bayesian-image-interpretation-using-generative-probabilistic-graphics-programs},
abstract = {The idea of computer vision as the Bayesian inverse problem to computer graphics has a long history and an appealing elegance, but it has proved difficult to directly implement. Instead, most vision tasks are approached via complex bottom-up processing pipelines. Here we show that it is possible to write short, simple probabilistic graphics programs that define flexible generative models and to automatically invert them to interpret real-world images. Generative probabilistic graphics programs (GPGP) consist of a stochastic scene generator, a renderer based on graphics software, a stochastic likelihood model linking the renderer’s output and the data, and latent variables that adjust the fidelity of the renderer and the tolerance of the likelihood. Representations and algorithms from computer graphics are used as the deterministic backbone for highly approximate and stochastic generative models. This formulation combines probabilistic programming, computer graphics, and approximate Bayesian computation, and depends only on generalpurpose, automatic inference techniques. We describe two applications: reading sequences of degraded and adversarially obscured characters, and inferring 3D road models from vehicle-mounted camera images. Each of the probabilistic graphics programs we present relies on under 20 lines of probabilistic code, and yields accurate, approximately Bayesian inferences about real-world images.},
}
@inproceedings{deka2013power,
title = {Markov chain algorithms: A template for building future robust low power systems},
author = {Deka, Biplab and Birklykke, Alex and Duwe, Henry and Mansinghka, Vikash K. and Kumar, Rakesh},
booktitle = {ACSSC 2013: Proceedings of the 47th Annual Asilomar Conference on Signals, Systems, and Computers},
year = 2013,
publisher = {IEEE},
url_paper = {http://rakeshk.crhc.illinois.edu/asilomar13_cam.pdf},
abstract = {Although computational systems are looking towards post CMOS devices in the pursuit of lower power, the inherent unreliability of such devices makes it difficult to design robust systems without additional power overheads for guaranteeing robustness. As such, algorithmic structures with inherent ability to tolerate computational errors are of significant interest. We propose to cast applications as stochastic algorithms based on Markov chains as such algorithms are both sufficiently general and tolerant to transition errors. We show with four example applications - boolean satisfiability (SAT), sorting, LDPC decoding and clustering - how applications can be cast as Markov Chain algorithms. Using algorithmic fault injection techniques, we demonstrate the robustness of these implementations to transition errors with high error rates. Based on these results, we make a case for using Markov Chains as an algorithmic template for future robust low power systems.},
}
@article{sanborn2013intuitive,
title = {Reconciling intuitive physics and {Newtonian} mechanics for colliding objects},
author = {Sanborn, Adam and Mansinghka, Vikash K. and Griffiths, Thomas},
journal = {Psychological Review},
volume = {120},
number = {2},
pages = {411-437},
year = 2013,
url_paper = {https://cocosci.berkeley.edu/tom/papers/collisionPsychReview_inpress.pdf},
url_link = {https://www.ncbi.nlm.nih.gov/pubmed/23458084},
abstract = {People have strong intuitions about the influence objects exert upon one another when they collide. Because people’s judgments appear to deviate from Newtonian mechanics, psychologists have suggested that people depend on a variety of task-specific heuristics. This leaves open the question of how these heuristics could be chosen, and how to integrate them into a unified model that can explain human judgments across a wide range of physical reasoning tasks. We propose an alternative framework, in which people’s judgments are based on optimal statistical inference over a Newtonian physical model that incorporates sensory noise and intrinsic uncertainty about the physical properties of the objects being viewed. This “noisy Newton” framework can be applied to a multitude of judgments, with people’s answers determined by the uncertainty they have for physical variables and the constraints of Newtonian mechanics. We investigate a range of effects in mass judgments that have previously been taken as strong evidence for heuristic use and show that they are well explained by the interplay between Newtonian constraints and sensory uncertainty. We also consider an extended model that handles causality judgments, and obtain good quantitative agreement with human judgments across tasks that involve different judgment types with a single consistent set of parameters.},
}
@article{shafto2011crosscat,
title = {A probabilistic model of cross-categorization},
author = {Shafto, Patrick and Kemp, Charles and Mansinghka, Vikash K. and Tenenbaum, Joshua B.},
journal = {Cognition},
volume = {120},
number = 1,
pages = {1--25},
year = 2011,
url_paper = {http://www.psy.cmu.edu/~ckemp/papers/shaftokmt11_aprobabilisticmodelofcrosscategorization.pdf},
abstract = {Most natural domains can be represented in multiple ways: we can categorize foods in terms of their nutritional content or social role, animals in terms of their taxonomic groupings or their ecological niches, and musical instruments in terms of their taxonomic categories or social uses. Previous approaches to modeling human categorization have largely ignored the problem of cross-categorization, focusing on learning just a single system of categories that explains all of the features. Cross-categorization presents a difficult problem: how can we infer categories without first knowing which features the categories are meant to explain? We present a novel model that suggests that human cross-categorization is a result of joint inference about multiple systems of categories and the features that they explain. We also formalize two commonly proposed alternative explanations for cross-categorization behavior: a features-first and an objects-first approach. The features-first approach suggests that cross-categorization is a consequence of attentional processes, where features are selected by an attentional mechanism first and categories are derived second. The objects-first approach suggests that cross-categorization is a consequence of repeated, sequential attempts to explain features, where categories are derived first, then features that are poorly explained are recategorized. We present two sets of simulations and experiments testing the models’ predictions about human categorization. We find that an approach based on joint inference provides the best fit to human categorization behavior, and we suggest that a full account of human category learning will need to incorporate something akin to these capabilities.},
}
@inproceedings{freer2010computationally,
title = {When are probabilistic programs probably computationally tractable?},
author = {Freer, Cameron and Mansinghka, Vikash K. and Roy, Daniel},
booktitle = {Workshop on Monte Carlo Methods for Modern Applications (co-located with NIPS 2010)},
year = 2010,
url_paper = {http://danroy.org/papers/FreerManRoy-NIPSMC-2010.pdf},
abstract = {We study the computational complexity of Bayesian inference through the lens of simulating probabilistic programs. Our approach is grounded in new conceptual hypotheses about some intrinsic drivers of computational complexity, drawn from the experience of computational Bayesian statisticians. We sketch a research program to address two issues, via a combination of theory and experiments: (a) What conditions suffice for Bayesian inference by posterior simulation to be computationally efficient? (b) How should probabilistic programmers write their probabilistic programs so as to maximize their probable tractability? The research program we articulate, if successful, may help to explain the gap between the unreasonable effectiveness of some simple, widely-used sampling algorithms and the classical study of reductions to Bayes from hard problems of deduction, arithmetic, and optimization.},
}
@inproceedings{mansinghka2009systematic,
title = {Exact and approximate sampling by systematic stochastic search},
author = {Mansinghka, Vikash K. and Roy, Daniel and Jonas, Eric and Tenenbaum, Joshua B.},
booktitle = {AISTATS 2009: Proceedings of the 12th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = {5},
pages = {400--407},
year = 2009,
publisher = {PMLR},
url_paper = {http://web.mit.edu/cocosci/Papers/sss_camera.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v5/mansinghka09a.html},
abstract = {We introduce adaptive sequential rejection sampling, an algorithm for generating exact samples from high-dimensional, discrete distributions, building on ideas from classical AI search. Just as systematic search algorithms like A* recursively build complete solutions from partial solutions, sequential rejection sampling recursively builds exact samples over high-dimensional spaces from exact samples over lower-dimensional subspaces. Our algorithm recovers widely-used particle filters as an approximate variant without adaptation, and a randomized version of depth first search with backtracking when applied to deterministic problems. In this paper, we present the mathematical and algorithmic underpinnings of our approach and measure its behavior on undirected and directed graphical models, obtaining exact and approximate samples in a range of situations.},
}
@inproceedings{sanborn2009intuitive,
title = {A {Bayesian} framework for modeling intuitive dynamics},
author = {Sanborn, Adam and Mansinghka, Vikash K. and Griffiths, Thomas},
booktitle = {COGSCI 2009: Proceedings of the 31st Annual Conference of the Cognitive Science Society},
year = 2009,
url_paper = {https://cocosci.berkeley.edu/tom/papers/collisions.pdf},
abstract = {People have strong intuitions about the masses of objects and the causal forces that they exert upon one another. These intuitions have been explored through a variety of tasks, in particular judging the relative masses of objects involved in collisions and evaluating whether one object caused another to move. We present a single framework for explaining two types of judgments that people make about the dynamics of objects, based on Bayesian inference. In this framework, we define a particular model of dynamics – essentially Newtonian physics plus Gaussian noise – which makes predictions about the trajectories of objects following collisions based on their masses. By applying Bayesian inference, it becomes possible to reason from trajectories back to masses, and to reason about whether one object caused another to move. We use this framework to predict human causality judgments using data collected from a mass judgment task.},
}
@inproceedings{mansinghka2009crosscat,
title = {Cross-categorization: A method for discovering multiple overlapping clusterings},
author = {Mansinghka, Vikash K. and Jonas, Eric and Petschulat, Cap and Cronin, Beau and Shafto, Pat and Tenenbaum, Joshua B.},
booktitle = {Workshop on Nonparametric Bayes (co-located with NIPS 2009)},
year = 2009,
url_paper = {http://web.mit.edu/vkm/www/crosscat.pdf},
abstract = {Model-based clustering techniques, including inference in Dirichlet process mixture models, have difficulty when different dimensions are best explained by very different clusterings. We introduce cross-categorization, an unsupervised learning technique that overcomes this basic limitation. Based on MCMC inference in a novel nonparametric Bayesian model, cross-categorization automatically discovers the number of independent nonparametric Bayesian models needed to explain the data, using a separate Dirichlet process mixture model for each group in an inferred partition of the dimensions. Unlike a DP mixture, our model is exchangeable over both the rows of a heterogeneous data array (the samples) and the columns (new dimensions), and can model any dataset as the number of samples and dimensions both go to infinity. We demonstrate the efficiency and robustness of our algorithm, including experiments on the full Dartmouth Health Atlas dataset without any preprocessing, showing that it finds veridical causal structure.},
}
@inproceedings{goodman2008church,
title = {Church: A universal language for generative models},
author = {Goodman, Noah and Mansinghka, Vikash K. and Roy, Daniel and Bonawitz, Keith and Tenenbaum, Joshua B.},
booktitle = {UAI 2008: Proceedings of the 24th Conference on Uncertainty in Artificial Intelligence},
pages = {220--229},
year = 2008,
publisher = {AUAI Press},
url_paper = {https://arxiv.org/pdf/1206.3255v2.pdf},
url_link = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1346&proceeding_id=24},
abstract = {Formal languages for probabilistic modeling enable re-use, modularity, and descriptive clarity, and can foster generic inference techniques. We introduce Church, a universal language for describing stochastic generative processes. Church is based on the Lisp model of lambda calculus, containing a pure Lisp as its deterministic subset. The semantics of Church is defined in terms of evaluation histories and conditional distributions on such histories. Church also includes a novel language construct, the stochastic memoizer, which enables simple description of many complex non-parametric models. We illustrate language features through several examples, including: a generalized Bayes net in which parameters cluster over trials, infinite PCFGs, planning by inference, and various non-parametric clustering models. Finally, we show how to implement query on any Church program, exactly and approximately, using Monte Carlo techniques.},
}
@inproceedings{roy2008stochastic,
title = {A stochastic programming perspective on nonparametric {Bayes}},
author = {Roy, Daniel and Mansinghka, Vikash K. and Goodman, Noah and Tenenbaum Joshua B.},
booktitle = {Workshop on Nonparametric Bayes (co-located with ICML 2008)},
year = 2008,
url_paper = {http://danroy.org/papers/RoyManGooTen-ICMLNPB-2008.pdf},
abstract = {We use Church, a Turing-universal language for stochastic generative processes and the probability distributions they induce, to study and extend several objects in nonparametric Bayesian statistics. We connect exchangeability and de Finetti measures with notions of purity and closures from functional programming. We exploit delayed evaluation to provide finite, machine-executable representations for various nonparametric Bayesian objects. We relate common uses of the Dirichlet process to a stochastic generalization of memoization, and use this abstraction to compactly describe and extend several nonparametric models. Finally, we briefly discuss issues of computability and inference.},
}
@techreport{mansinghka2008stochastic,
title = {Stochastic digital circuits for probabilistic inference},
author = {Mansinghka, Vikash K. and Jonas, Eric and Tenenbaum, Joshua B.},
year = 2008,
number = {MIT-CSAIL-TR-2008-069},
institution = {Computer Science and Artificial Intelligence Laboratory},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/43712/MIT-CSAIL-TR-2008-069.pdf},
url_link = {https://dspace.mit.edu/handle/1721.1/43712},
abstract = {We introduce combinational stochastic logic, an abstraction that generalizes deterministic digital circuit design (based on Boolean logic gates) to the probabilistic setting. We show how this logic can be combined with techniques from contemporary digital design to generate stateless and stateful circuits for exact and approximate sampling from a range of probability distributions. We focus on Markov chain Monte Carlo algorithms for Markov random fields, using massively parallel circuits. We implement these circuits on commodity reconfigurable logic and estimate the resulting performance in time, space and price. Using our approach, these simple and general algorithms could be affordably run for thousands of iterations on models with hundreds of thousands of variables in real time},
}
@phdthesis{mansinghka2009thesis,
title = {Natively probabilistic computation},
author = {Mansinghka, Vikash K.},
year = 2009,
school = {Massachusetts Institute of Technology},
url_paper = {http://web.mit.edu/vkm/www/vkm-dissertation.pdf},
url_link = {http://hdl.handle.net/1721.1/47892},
note = {Winner of the Geroge M. Sprowls dissertation award.},
}
@mastersthesis{mansinghka2009meng,
title = {Nonparametric {Bayesian} models for supervised and unsupervised learning},
author = {Mansinghka, Vikash K.},
year = 2009,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/53172/517976994-MIT.pdf?sequence=2},
url_link = {https://dspace.mit.edu/handle/1721.1/53172},
abstract = {I introduce two nonparametric Bayesian methods for solving problems of supervised and unsupervised learning. The first method simultaneously learns causal networks and causal theories from data. For example, given synthetic co-occurrence data from a simple causal model for the medical domain, it can learn relationships like "having a flu causes coughing", while also learning that observable quantities can be usefully grouped into categories like diseases and symptoms, and that diseases tend to cause symptoms, not the other way around. The second method is an online algorithm for learning a prototype-based model for categorial concepts, and can be used to solve problems of multiclass classification with missing features. I apply it to problems of categorizing newsgroup posts and recognizing handwritten digits. These approaches were inspired by a striking capacity of human learning, which shouldalso be a desideratum for any intelligent system: the ability to learn certain kinds of "simple" or "natural" structures very quickly, while still being able to learn arbitrary - and arbitrarily complex - structures given enough data. In each case, I show how nonparametric Bayesian modeling and inference based on stochastic simulation give us some of the tools we need to achieve this goal.},
}
@inproceedings{goodman2007learning,
title = {Learning grounded causal models},
author = {Goodman, Noah and Mansinghka, Vikash K. and Tenenbaum, Joshua B},
year = 2007,
booktitle = {COGSCI 2007: Proceedings of the 29th Annual Conference of the Cognitive Science Society},
pages = {305},
url_paper = {https://cocolab.stanford.edu/papers/GoodmanEtAl2007-Cogsci.pdf},
url_link = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.422.2173},
note = {Winner of the Cognitive Science Society Computational Modeling Prize for Perception and Action.},
abstract = {We address the problem of learning grounded causal models: systems of concepts that are connected by causal relations and explicitly grounded in perception. We present a Bayesian framework for learning these models—both a causal Bayesian network structure over variables and the consequential region of each variable in perceptual space—from dynamic perceptual evidence. Using a novel experimental paradigm we show that humans are able to learn grounded causal models, and that the Bayesian model accounts well for human performance.},
}
@inproceedings{goldwater2007modeling,
title = {Modeling human performance in statistical word segmentation},
author = {Goldwater, Frank and Mansinghka, Vikash K. and Griffiths, Thomas and Tenenbaum, Joshua B.},
booktitle = {COGSCI 2007: Proceedings of the 29th Annual Conference of the Cognitive Science Society},
year = 2007,
pages = {281},
url_paper = {http://csjarchive.cogsci.rpi.edu/proceedings/2007/docs/p281.pdf},
url_link = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.124.6889},
abstract = {What mechanisms support the ability of human infants, adults, and other primates to identify words from fluent speech using distributional regularities? In order to better characterize this ability, we collected data from adults in an artificial language segmentation task similar to Saffran, Newport, and Aslin (1996) in which the length of sentences was systematically varied between groups of participants. We then compared the fit of a variety of computational models— including simple statistical models of transitional probability and mutual information, a clustering model based on mutual information by Swingley (2005), PARSER (Perruchet & Vintner, 1998), and a Bayesian model. We found that while all models were able to successfully complete the task, fit to the human data varied considerably, with the Bayesian model achieving the highest correlation with our results.},
}
@inproceedings{mansinghka2007aclass,
title = {{AClass:} An online algorithm for generative classification},
author = {Mansinghka, Vikash K. and Roy, Daniel and Rifkin, Ryan and Tenenbaum, Joshua B.},
booktitle = {AISTATS 2007: Artificial Intelligence and Statistics 11},
volume = {2},
pages = {315--322},
year = 2007,
publisher = {PMLR},
url_paper = {http://www.jmlr.org/proceedings/papers/v2/mansinghka07a/mansinghka07a.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v2/mansinghka07a.html},
abstract = {We present AClass, a simple, online, parallelizable algorithm for supervised multiclass classification. AClass models each class conditional density as a Chinese restaurant process mixture, and performs approximate inference in this model using a sequential Monte Carlo scheme. AClass combines several strengths of previous approaches to classification that are not typically found in a single algorithm; it supports learning from missing data and yields sensibly regularized nonlinear decision boundaries while remaining computationally efficient. We compare AClass to several standard classification algorithms and show competitive performance.},
}
@inproceedings{mansinghka2006structured,
title = {Structured priors for structure learning},
author = {Mansinghka, Vikash K. and Kemp, Charles and Tenenbaum, Joshua B.},
year = 2006,
booktitle = {UAI 2006: Uncertainty in Artificial Intelligence 22},
pages = {324--331},
url_paper = {https://dslpitt.org/uai/papers/06/p324-mansinghka.pdf},
url_link = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1314&proceeding_id=22},
abstract = {Traditional approaches to Bayes net structure learning typically assume little regularity in graph structure other than sparseness. However, in many cases, we expect more systematicity: variables in real-world systems often group into classes that predict the kinds of probabilistic dependencies they participate in. Here we capture this form of prior knowledge in a hierarchical Bayesian framework, and exploit it to enable structure learning and type discovery from small datasets. Specifically, we present a nonparametric generative model for directed acyclic graphs as a prior for Bayes net structure learning. Our model assumes that variables come in one or more classes and that the prior probability of an edge existing between two variables is a function only of their classes. We derive an MCMC algorithm for simultaneous inference of the number of classes, the class assignments of variables, and the Bayes net structure over variables. For several realistic, sparse datasets, we show that the bias towards systematicity of connections provided by our model can yield more accurate learned networks than the traditional approach of using a uniform prior, and that the classes found by our model are appropriate.},
}
@inproceedings{roy2006learning,
title = {Learning annotated hierarchies from relational data},
author = {Roy, Daniel and Kemp, Charles and Mansinghka, Vikash K. and Tenenbaum, Joshua B.},
booktitle = {NIPS 2006: Advances in Neural Information Processing Systems 19},
pages = {1185--1192},
year = 2006,
publisher = {Curran Associates},
url_paper = {https://papers.nips.cc/paper/3077-learning-annotated-hierarchies-from-relational-data.pdf},
url_link = {https://papers.nips.cc/paper/3077-learning-annotated-hierarchies-from-relational-data},
abstract = {The objects in many real-world domains can be organized into hierarchies, where each internal node picks out a category of objects. Given a collection of features and relations defined over a set of objects, an annotated hierarchy includes a specification of the categories that are most useful for describing each individual feature and relation. We define a generative model for annotated hierarchies and the features and relations that they describe, and develop a Markov chain Monte Carlo scheme for learning annotated hierarchies. We show that our model discovers interpretable structure in several real-world data sets.},
}
@inproceedings{shafto2006learning,
title = {Learning cross-cutting systems of categories},
author = {Shafto, Pat and Kemp, Charles and Mansinghka, Vikash K. and Gordon, Matthew and Tenenbaum, Joshua B.},
booktitle = {NIPS 2006: Proceedings of the 28th Annual Conference of the Cognitive Science Society},
pages = {2146--2151},
year = 2006,
url_paper = {http://csjarchive.cogsci.rpi.edu/Proceedings/2006/docs/p2146.pdf},
url_link = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.183.9893},
abstract = {Most natural domains can be represented in multiple ways: animals may be thought of in terms of their taxonomic groupings or their ecological niches and foods may be thought of in terms of their nutritional content or social role. We present a computational framework that discovers multiple systems of categories given information about a domain of objects and their properties. Each system of object categories accounts for a distinct and coherent subset of the features. A first experiment shows that our CrossCat model predicts human learning in an artificial category learning task. A second experiment shows that the model discovers important structure in two real-world domains. Traditional models of categorization usually search for a single system of categories: we suggest that these models do not predict human performance in our task, and miss important structure in our real world examples.},
}
@inproceedings{goodman2006intuitive,
title = {Intuitive theories of mind: A rational approach to false belief},
author = {Goodman, Noah and Baker, Chris and Bonawitz, Elizabeth and Mansinghka, Vikash K. and Gopnik, Alison and Wellman, Henry and Schulz, Laura and Tenenbaum, Joshua B.},
booktitle = {COGSCI 2006: Proceedings of the 28th Annual Conference of the Cognitive Science Society},
pages = {1382--1387},
year = 2006,
url_paper = {http://web.mit.edu/cocosci/Papers/pos785-goodman.pdf},
url_link = {http://web.mit.edu/cocosci/Papers/pos785-goodman},
}