$\require{mathtools} % %%% GENERIC MATH %%% % % Environments \newcommand{\al}[1]{\begin{align}#1\end{align}} % need this for \tag{} to work \renewcommand{\r}{\mathrm} % % Greek \newcommand{\eps}{\epsilon} \newcommand{\veps}{\varepsilon} \newcommand{\Om}{\Omega} \newcommand{\om}{\omega} \newcommand{\Th}{\Theta} \let\fi\phi % because it looks like an f \let\phi\varphi % because it looks like a p % % Miscellaneous shortcuts % .. over and under \newcommand{\ss}[1]{_{\substack{#1}}} \newcommand{\ob}{\overbrace} \newcommand{\ub}{\underbrace} \newcommand{\ol}{\overline} \newcommand{\tld}{\widetilde} \newcommand{\HAT}{\widehat} \newcommand{\f}{\frac} \newcommand{\s}[2]{#1 /\mathopen{}#2} \newcommand{\rt}[1]{ {\sqrt{#1}}} % .. relations \newcommand{\sr}{\stackrel} \newcommand{\sse}{\subseteq} \newcommand{\ce}{\coloneqq} \newcommand{\ec}{\eqqcolon} \newcommand{\ap}{\approx} \newcommand{\ls}{\lesssim} \newcommand{\gs}{\gtrsim} % .. miscer \newcommand{\q}{\quad} \newcommand{\qq}{\qquad} \newcommand{\heart}{\heartsuit} % % Delimiters % (I needed to create my own because the MathJax version of \DeclarePairedDelimiter doesn't have \mathopen{} and that messes up the spacing) % .. one-part \newcommand{\p}[1]{\mathopen{}\left( #1 \right)} \newcommand{\b}[1]{\mathopen{}\left[ #1 \right]} \newcommand{\set}[1]{\mathopen{}\left\{ #1 \right\}} \newcommand{\abs}[1]{\mathopen{}\left\lvert #1 \right\rvert} \newcommand{\floor}[1]{\mathopen{}\left\lfloor #1 \right\rfloor} \newcommand{\ceil}[1]{\mathopen{}\left\lceil #1 \right\rceil} \newcommand{\inner}[1]{\mathopen{}\left\langle #1 \right\rangle} % .... (use phantom to force at least the standard height of double bars) \newcommand{\norm}[1]{\mathopen{}\left\lVert #1 \vphantom{f} \right\rVert} \newcommand{\frob}[1]{\norm{#1}_\mathrm{F}} %% .. two-part \newcommand{\incond}[2]{#1 \mathop{}\middle|\mathop{} #2} \newcommand{\cond}[2]{ {\left.\incond{#1}{#2}\right.}} \newcommand{\pco}[2]{\p{\incond{#1}{#2}}} \newcommand{\bco}[2]{\b{\incond{#1}{#2}}} \newcommand{\setco}[2]{\set{\incond{#1}{#2}}} \newcommand{\at}[2]{ {\left.#1\right|_{#2}}} \newcommand{\pat}[2]{\p{\at{#1}{#2}}} \newcommand{\bat}[2]{\b{\at{#1}{#2}}} % ..... (use phantom to force at least the standard height of double bar) \newcommand{\oldpara}[2]{#1\vphantom{f} \mathop{}\middle\|\mathop{} #2} %\newcommand{\para}[2]{#1\vphantom{f} \mathop{}\middle\|\mathop{} #2} \newcommand{\para}[2]{\mathchoice{\begin{matrix}#1\\\hdashline#2\end{matrix}}{\begin{smallmatrix}#1\\\hdashline#2\end{smallmatrix}}{\begin{smallmatrix}#1\\\hdashline#2\end{smallmatrix}}{\begin{smallmatrix}#1\\\hdashline#2\end{smallmatrix}}} \newcommand{\ppa}[2]{\p{\para{#1}{#2}}} \newcommand{\bpa}[2]{\b{\para{#1}{#2}}} %\newcommand{\bpaco}[4]{\bpa{\incond{#1}{#2}}{\incond{#3}{#4}}} \newcommand{\bpaco}[4]{\bpa{\cond{#1}{#2}}{\cond{#3}{#4}}} % % Levels of closeness \newcommand{\scirc}[1]{\sr{\circ}{#1}} \newcommand{\sdot}[1]{\sr{.}{#1}} \newcommand{\slog}[1]{\sr{\log}{#1}} \newcommand{\createClosenessLevels}[7]{ \newcommand{#2}{\mathrel{(#1)}} \newcommand{#3}{\mathrel{#1}} \newcommand{#4}{\mathrel{#1\!\!#1}} \newcommand{#5}{\mathrel{#1\!\!#1\!\!#1}} \newcommand{#6}{\mathrel{(\sdot{#1})}} \newcommand{#7}{\mathrel{(\slog{#1})}} } \let\lt\undefined \let\gt\undefined % .. vanilla versions (is it within a constant?) \newcommand{\ez}{\scirc=} \newcommand{\eq}{\simeq} \newcommand{\eqq}{\mathrel{\eq\!\!\eq}} \newcommand{\eqqq}{\mathrel{\eq\!\!\eq\!\!\eq}} \newcommand{\lez}{\scirc\le} \newcommand{\lq}{\preceq} \newcommand{\lqq}{\mathrel{\lq\!\!\lq}} \newcommand{\lqqq}{\mathrel{\lq\!\!\lq\!\!\lq}} \newcommand{\gez}{\scirc\ge} \newcommand{\gq}{\succeq} \newcommand{\gqq}{\mathrel{\gq\!\!\gq}} \newcommand{\gqqq}{\mathrel{\gq\!\!\gq\!\!\gq}} \newcommand{\lz}{\scirc<} \newcommand{\lt}{\prec} \newcommand{\ltt}{\mathrel{\lt\!\!\lt}} \newcommand{\lttt}{\mathrel{\lt\!\!\lt\!\!\lt}} \newcommand{\gz}{\scirc>} \newcommand{\gt}{\succ} \newcommand{\gtt}{\mathrel{\gt\!\!\gt}} \newcommand{\gttt}{\mathrel{\gt\!\!\gt\!\!\gt}} % .. dotted versions (is it equal in the limit?) \newcommand{\ed}{\sdot=} \newcommand{\eqd}{\sdot\eq} \newcommand{\eqqd}{\sdot\eqq} \newcommand{\eqqqd}{\sdot\eqqq} \newcommand{\led}{\sdot\le} \newcommand{\lqd}{\sdot\lq} \newcommand{\lqqd}{\sdot\lqq} \newcommand{\lqqqd}{\sdot\lqqq} \newcommand{\ged}{\sdot\ge} \newcommand{\gqd}{\sdot\gq} \newcommand{\gqqd}{\sdot\gqq} \newcommand{\gqqqd}{\sdot\gqqq} \newcommand{\ld}{\sdot<} \newcommand{\ltd}{\sdot\lt} \newcommand{\lttd}{\sdot\ltt} \newcommand{\ltttd}{\sdot\lttt} \newcommand{\gd}{\sdot>} \newcommand{\gtd}{\sdot\gt} \newcommand{\gttd}{\sdot\gtt} \newcommand{\gtttd}{\sdot\gttt} % .. log versions (is it equal up to log?) \newcommand{\elog}{\slog=} \newcommand{\eqlog}{\slog\eq} \newcommand{\eqqlog}{\slog\eqq} \newcommand{\eqqqlog}{\slog\eqqq} \newcommand{\lelog}{\slog\le} \newcommand{\lqlog}{\slog\lq} \newcommand{\lqqlog}{\slog\lqq} \newcommand{\lqqqlog}{\slog\lqqq} \newcommand{\gelog}{\slog\ge} \newcommand{\gqlog}{\slog\gq} \newcommand{\gqqlog}{\slog\gqq} \newcommand{\gqqqlog}{\slog\gqqq} \newcommand{\llog}{\slog<} \newcommand{\ltlog}{\slog\lt} \newcommand{\lttlog}{\slog\ltt} \newcommand{\ltttlog}{\slog\lttt} \newcommand{\glog}{\slog>} \newcommand{\gtlog}{\slog\gt} \newcommand{\gttlog}{\slog\gtt} \newcommand{\gtttlog}{\slog\gttt} % % Miscellaneous \newcommand{\LHS}{\mathrm{LHS}} \newcommand{\RHS}{\mathrm{RHS}} % .. operators \DeclareMathOperator{\poly}{poly} \DeclareMathOperator{\polylog}{polylog} \DeclareMathOperator{\quasipoly}{quasipoly} \DeclareMathOperator{\negl}{negl} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max} % .. functions \DeclareMathOperator{\id}{id} \DeclareMathOperator{\sign}{sign} \DeclareMathOperator{\err}{err} \DeclareMathOperator{\ReLU}{ReLU} % .. analysis \let\d\undefined \newcommand{\d}{\operatorname{d}\mathopen{}} \newcommand{\df}[2]{ {\f{\d #1}{\d #2}}} \newcommand{\ds}[2]{ {\s{\d #1}{\d #2}}} \newcommand{\part}{\partial} \newcommand{\partf}[2]{\f{\part #1}{\part #2}} \newcommand{\parts}[2]{\s{\part #1}{\part #2}} \newcommand{\grad}[1]{\mathop{\nabla\!_{#1}}} % .. sets of numbers \newcommand{\N}{\mathbb{N}} \newcommand{\Z}{\mathbb{Z}} \newcommand{\R}{\mathbb{R}} \newcommand{\C}{\mathbb{C}} \newcommand{\F}{\mathbb{F}} % %%% SPECIALIZED MATH %%% % % Logic \renewcommand{\and}{\wedge} \newcommand{\AND}{\bigwedge} \newcommand{\or}{\vee} \newcommand{\OR}{\bigvee} \newcommand{\xor}{\oplus} \newcommand{\XOR}{\bigoplus} \newcommand{\union}{\cup} \newcommand{\inter}{\cap} \newcommand{\UNION}{\bigcup} \newcommand{\INTER}{\bigcap} \newcommand{\comp}{\overline} \newcommand{\true}{\r{true}} \newcommand{\false}{\r{false}} \newcommand{\tf}{\set{\true,\false}} \DeclareMathOperator{\One}{\mathbb{1}} \DeclareMathOperator{\1}{\mathbb{1}} % % Linear algebra \renewcommand{\span}{\mathrm{span}} \DeclareMathOperator{\rank}{rank} \DeclareMathOperator{\proj}{proj} \DeclareMathOperator{\dom}{dom} \DeclareMathOperator{\Img}{Im} \newcommand{\transp}{\mathsf{T}} \renewcommand{\t}{^\transp} % ... named tensors \newcommand{\namedtensorstrut}{\vphantom{fg}} % milder than \mathstrut \newcommand{\name}[1]{\mathsf{\namedtensorstrut #1}} \newcommand{\nbin}[2]{\mathbin{\underset{\substack{#1}}{\namedtensorstrut #2}}} \newcommand{\ndot}[1]{\nbin{#1}{\odot}} \newcommand{\ncat}[1]{\nbin{#1}{\oplus}} \newcommand{\nsum}[1]{\sum\limits_{\substack{#1}}} \newcommand{\nfun}[2]{\mathop{\underset{\substack{#1}}{\namedtensorstrut\mathrm{#2}}}} \newcommand{\ndef}[2]{\newcommand{#1}{\name{#2}}} \newcommand{\nt}[1]{^{\transp(#1)}} % % Probability \newcommand{\Normal}{\mathcal{N}} \let\Pr\undefined \DeclareMathOperator*{\Pr}{Pr} \DeclareMathOperator*{\G}{\mathbb{G}} \DeclareMathOperator*{\Odds}{Od} \DeclareMathOperator*{\E}{E} \DeclareMathOperator*{\Var}{Var} \DeclareMathOperator*{\Cov}{Cov} \DeclareMathOperator*{\corr}{corr} \DeclareMathOperator*{\median}{median} \newcommand{\dTV}{d_{\mathrm{TV}}} \newcommand{\dHel}{d_{\mathrm{Hel}}} \newcommand{\dJS}{d_{\mathrm{JS}}} % ... information theory \let\H\undefined \DeclareMathOperator*{\H}{H} \DeclareMathOperator*{\I}{I} \DeclareMathOperator*{\D}{D} % %%% SPECIALIZED COMPUTER SCIENCE %%% % % Complexity classes % .. classical \newcommand{\Poly}{\mathsf{P}} \newcommand{\NP}{\mathsf{NP}} \newcommand{\PH}{\mathsf{PH}} \newcommand{\PSPACE}{\mathsf{PSPACE}} \renewcommand{\L}{\mathsf{L}} % .. probabilistic \newcommand{\formost}{\mathsf{Я}} \newcommand{\RP}{\mathsf{RP}} \newcommand{\BPP}{\mathsf{BPP}} \newcommand{\MA}{\mathsf{MA}} \newcommand{\AM}{\mathsf{AM}} \newcommand{\IP}{\mathsf{IP}} \newcommand{\RL}{\mathsf{RL}} % .. circuits \newcommand{\NC}{\mathsf{NC}} \newcommand{\AC}{\mathsf{AC}} \newcommand{\ACC}{\mathsf{ACC}} \newcommand{\TC}{\mathsf{TC}} \newcommand{\Ppoly}{\mathsf{P}/\poly} \newcommand{\Lpoly}{\mathsf{L}/\poly} % .. resources \newcommand{\TIME}{\mathsf{TIME}} \newcommand{\SPACE}{\mathsf{SPACE}} \newcommand{\TISP}{\mathsf{TISP}} \newcommand{\SIZE}{\mathsf{SIZE}} % .. keywords \newcommand{\co}{\mathsf{co}} \newcommand{\Prom}{\mathsf{Promise}} % % Boolean analysis \newcommand{\zo}{\set{0,1}} \newcommand{\pmo}{\set{\pm 1}} \newcommand{\zpmo}{\set{0,\pm 1}} \newcommand{\harpoon}{\!\upharpoonright\!} \newcommand{\rr}[2]{#1\harpoon_{#2}} \newcommand{\Fou}[1]{\widehat{#1}} \DeclareMathOperator{\Ind}{\mathrm{Ind}} \DeclareMathOperator{\Inf}{\mathrm{Inf}} \newcommand{\Der}[1]{\operatorname{D}_{#1}\mathopen{}} \newcommand{\Exp}[1]{\operatorname{E}_{#1}\mathopen{}} \DeclareMathOperator{\Stab}{\mathrm{Stab}} \DeclareMathOperator{\T}{T} \DeclareMathOperator{\sens}{\mathrm{s}} \DeclareMathOperator{\bsens}{\mathrm{bs}} \DeclareMathOperator{\fbsens}{\mathrm{fbs}} \DeclareMathOperator{\Cert}{\mathrm{C}} \DeclareMathOperator{\DT}{\mathrm{DT}} \DeclareMathOperator{\CDT}{\mathrm{CDT}} % canonical \DeclareMathOperator{\ECDT}{\mathrm{ECDT}} \DeclareMathOperator{\CDTv}{\mathrm{CDT_{vars}}} \DeclareMathOperator{\ECDTv}{\mathrm{ECDT_{vars}}} \DeclareMathOperator{\CDTt}{\mathrm{CDT_{terms}}} \DeclareMathOperator{\ECDTt}{\mathrm{ECDT_{terms}}} \DeclareMathOperator{\CDTw}{\mathrm{CDT_{weighted}}} \DeclareMathOperator{\ECDTw}{\mathrm{ECDT_{weighted}}} \DeclareMathOperator{\AvgDT}{\mathrm{AvgDT}} \DeclareMathOperator{\PDT}{\mathrm{PDT}} % partial decision tree \DeclareMathOperator{\DTsize}{\mathrm{DT_{size}}} \DeclareMathOperator{\W}{\mathbf{W}} % .. functions (small caps sadly doesn't work) \DeclareMathOperator{\Par}{\mathrm{Par}} \DeclareMathOperator{\Maj}{\mathrm{Maj}} \DeclareMathOperator{\HW}{\mathrm{HW}} \DeclareMathOperator{\Thr}{\mathrm{Thr}} \DeclareMathOperator{\Tribes}{\mathrm{Tribes}} \DeclareMathOperator{\RotTribes}{\mathrm{RotTribes}} \DeclareMathOperator{\CycleRun}{\mathrm{CycleRun}} \DeclareMathOperator{\SAT}{\mathrm{SAT}} \DeclareMathOperator{\UniqueSAT}{\mathrm{UniqueSAT}} % % Dynamic optimality \newcommand{\OPT}{\mathsf{OPT}} \newcommand{\Alt}{\mathsf{Alt}} \newcommand{\Funnel}{\mathsf{Funnel}} % % Alignment \DeclareMathOperator{\Amp}{\mathrm{Amp}} % %%% TYPESETTING %%% % % In text \renewcommand{\th}{^{\mathrm{th}}} \newcommand{\degree}{^\circ} % % Fonts % .. bold \newcommand{\BA}{\boldsymbol{A}} \newcommand{\BB}{\boldsymbol{B}} \newcommand{\BC}{\boldsymbol{C}} \newcommand{\BD}{\boldsymbol{D}} \newcommand{\BE}{\boldsymbol{E}} \newcommand{\BF}{\boldsymbol{F}} \newcommand{\BG}{\boldsymbol{G}} \newcommand{\BH}{\boldsymbol{H}} \newcommand{\BI}{\boldsymbol{I}} \newcommand{\BJ}{\boldsymbol{J}} \newcommand{\BK}{\boldsymbol{K}} \newcommand{\BL}{\boldsymbol{L}} \newcommand{\BM}{\boldsymbol{M}} \newcommand{\BN}{\boldsymbol{N}} \newcommand{\BO}{\boldsymbol{O}} \newcommand{\BP}{\boldsymbol{P}} \newcommand{\BQ}{\boldsymbol{Q}} \newcommand{\BR}{\boldsymbol{R}} \newcommand{\BS}{\boldsymbol{S}} \newcommand{\BT}{\boldsymbol{T}} \newcommand{\BU}{\boldsymbol{U}} \newcommand{\BV}{\boldsymbol{V}} \newcommand{\BW}{\boldsymbol{W}} \newcommand{\BX}{\boldsymbol{X}} \newcommand{\BY}{\boldsymbol{Y}} \newcommand{\BZ}{\boldsymbol{Z}} \newcommand{\Ba}{\boldsymbol{a}} \newcommand{\Bb}{\boldsymbol{b}} \newcommand{\Bc}{\boldsymbol{c}} \newcommand{\Bd}{\boldsymbol{d}} \newcommand{\Be}{\boldsymbol{e}} \newcommand{\Bf}{\boldsymbol{f}} \newcommand{\Bg}{\boldsymbol{g}} \newcommand{\Bh}{\boldsymbol{h}} \newcommand{\Bi}{\boldsymbol{i}} \newcommand{\Bj}{\boldsymbol{j}} \newcommand{\Bk}{\boldsymbol{k}} \newcommand{\Bp}{\boldsymbol{p}} \newcommand{\Bq}{\boldsymbol{q}} \newcommand{\Br}{\boldsymbol{r}} \newcommand{\Bs}{\boldsymbol{s}} \newcommand{\Bt}{\boldsymbol{t}} \newcommand{\Bu}{\boldsymbol{u}} \newcommand{\Bv}{\boldsymbol{v}} \newcommand{\Bw}{\boldsymbol{w}} \newcommand{\Bx}{\boldsymbol{x}} \newcommand{\By}{\boldsymbol{y}} \newcommand{\Bz}{\boldsymbol{z}} \newcommand{\Balpha}{\boldsymbol{\alpha}} \newcommand{\Bbeta}{\boldsymbol{\beta}} \newcommand{\Bgamma}{\boldsymbol{\gamma}} \newcommand{\Bdelta}{\boldsymbol{\delta}} \newcommand{\Beps}{\boldsymbol{\eps}} \newcommand{\Bveps}{\boldsymbol{\veps}} \newcommand{\Bzeta}{\boldsymbol{\zeta}} \newcommand{\Beta}{\boldsymbol{\eta}} \newcommand{\Btheta}{\boldsymbol{\theta}} \newcommand{\Biota}{\boldsymbol{\iota}} \newcommand{\Bkappa}{\boldsymbol{\kappa}} \newcommand{\Blambda}{\boldsymbol{\lambda}} \newcommand{\Bmu}{\boldsymbol{\mu}} \newcommand{\Bnu}{\boldsymbol{\nu}} \newcommand{\Bxi}{\boldsymbol{\xi}} \newcommand{\Bomicron}{\boldsymbol{\omicron}} \newcommand{\Bpi}{\boldsymbol{\pi}} \newcommand{\Brho}{\boldsymbol{\rho}} \newcommand{\Bsigma}{\boldsymbol{\sigma}} \newcommand{\Btau}{\boldsymbol{\tau}} \newcommand{\Bupsilon}{\boldsymbol{\upsilon}} \newcommand{\Bphi}{\boldsymbol{\phi}} \newcommand{\Bfi}{\boldsymbol{\fi}} \newcommand{\Bchi}{\boldsymbol{\chi}} \newcommand{\Bpsi}{\boldsymbol{\psi}} \newcommand{\Bomega}{\boldsymbol{\omega}} % .. calligraphic \newcommand{\CA}{\mathcal{A}} \newcommand{\CB}{\mathcal{B}} \newcommand{\CC}{\mathcal{C}} \newcommand{\CD}{\mathcal{D}} \newcommand{\CE}{\mathcal{E}} \newcommand{\CF}{\mathcal{F}} \newcommand{\CG}{\mathcal{G}} \newcommand{\CH}{\mathcal{H}} \newcommand{\CI}{\mathcal{I}} \newcommand{\CJ}{\mathcal{J}} \newcommand{\CK}{\mathcal{K}} \newcommand{\CL}{\mathcal{L}} \newcommand{\CM}{\mathcal{M}} \newcommand{\CN}{\mathcal{N}} \newcommand{\CO}{\mathcal{O}} \newcommand{\CP}{\mathcal{P}} \newcommand{\CQ}{\mathcal{Q}} \newcommand{\CR}{\mathcal{R}} \newcommand{\CS}{\mathcal{S}} \newcommand{\CT}{\mathcal{T}} \newcommand{\CU}{\mathcal{U}} \newcommand{\CV}{\mathcal{V}} \newcommand{\CW}{\mathcal{W}} \newcommand{\CX}{\mathcal{X}} \newcommand{\CY}{\mathcal{Y}} \newcommand{\CZ}{\mathcal{Z}} % .. typewriter \newcommand{\TA}{\mathtt{A}} \newcommand{\TB}{\mathtt{B}} \newcommand{\TC}{\mathtt{C}} \newcommand{\TD}{\mathtt{D}} \newcommand{\TE}{\mathtt{E}} \newcommand{\TF}{\mathtt{F}} \newcommand{\TG}{\mathtt{G}} \newcommand{\TH}{\mathtt{H}} \newcommand{\TI}{\mathtt{I}} \newcommand{\TJ}{\mathtt{J}} \newcommand{\TK}{\mathtt{K}} \newcommand{\TL}{\mathtt{L}} \newcommand{\TM}{\mathtt{M}} \newcommand{\TN}{\mathtt{N}} \newcommand{\TO}{\mathtt{O}} \newcommand{\TP}{\mathtt{P}} \newcommand{\TQ}{\mathtt{Q}} \newcommand{\TR}{\mathtt{R}} \newcommand{\TS}{\mathtt{S}} \newcommand{\TT}{\mathtt{T}} \newcommand{\TU}{\mathtt{U}} \newcommand{\TV}{\mathtt{V}} \newcommand{\TW}{\mathtt{W}} \newcommand{\TX}{\mathtt{X}} \newcommand{\TY}{\mathtt{Y}} \newcommand{\TZ}{\mathtt{Z}}$

Personal summary of A constant lower bound for the union-closed sets conjecture by Justin Gilmer.

Conjecture. If $\CF \sse 2^{[n]}, \CF \ne \set{\emptyset}$ is a family of sets such that for any $A,B \in \CF$, their union $A \union B$ is also in $\CF$, then there is some element $i \in [n]$ that is present in at least half the sets of $\CF$.

For example,

  • in $\CF = \set{\emptyset, \set{1}, \set{1,2}, \ldots, [n]}$, element $1$ is present in a $\frac{n}{n+1} \ge 1/2$ fraction of the sets;
  • in $\CF = 2^{[n]}$, each element $i \in [n]$ is present in exactly half the sets.

In the contrapositive, this says if you have some family of sets $\CF$ such that each element $i \in [n]$ is present in less that half the sets, then there must be some $A,B \in \CF$ such that their union $A \union B$ is outside of $\CF$. Gilmer shows this under the stronger assumption that each element is present in only $\le 1\%$ of the sets.

More precisely, he shows that if the sets $\BA,\BB$ are independent samples from the same distribution (in this case, uniform in $\CF$) such that $\Pr[i \in \BA] \le 1\%$ for all $i \in [n]$, then their union has more entropy than either of them:

\[ \H[\BA \union \BB] \ge (1+\Omega(1))\H[\BA]. \]

Intuition

Independent case

For convenience, for any set $A \sse [n]$, let $A_i$ be the indicator that $i\in A$ (we’re interpreting $A$ as a point in the hypercube $\zo^n$).

Intuitively, if each element is in only $\le 1\%$ of the sets, this means that the typical $A \in \CF$ are very sparse, and thus has relatively low information content, while the typical union $A \union B$ would have up to twice as many elements, and thus twice as much entropy.

Example 1. Say $\BA$ (and $\BB$) is generated by including each element $i \in [n]$ is independently with probability $p$. Then we have

\[ \H[\BA] = \H[\BA_1] + \cdots + \H[\BA_n] = n \cdot H(p) \]

where $H(p)$ is the binary entropy function. And on the other hand, every element is included in $\BA \union \BB$ independently with probability $1-(1-p)^2 = 2p-p^2$, so

\[ \H[\BA \union \BB] = \H[(\BA\union\BB)_1] + \cdots + \H[(\BA\union\BB)_n] = n \cdot H(2p-p^2). \]

When $p$ is small, we have $H(2p-p^2) \approx H(2p) \approx 2\cdot H(p)$, so

\[ \H[\BA \union \BB] \approx 2 \H[\BA], \]

which matches our intuition.

Non-independent case

When the “coordinates” of $\BA$ and $\BB$ are not independent (i.e. $\BA_i$ and $\BA_j$ might be correlated for $i \ne j$), then we can analyse the entropies $\H[\BA]$ and $\H[\BA \union \BB]$ coordinate-by-coordinate, but we need to take conditioning into account:

\[ \al{ \H[\BA] &= \sum_{i=1}^n \H\bco{\BA_i}{\BA_{<i}}\\ \H[\BA\union \BB] &= \sum_{i=1}^n \H\bco{(\BA\union\BB)_i}{(\BA\union\BB)_{<i}}, } \]

where $\BA_{<i}$ is shorthand for $\BA_1, \ldots, \BA_{i-1}$.

We want to argue that “on average”, the mean of $(\BA \union \BB)_i$ will still be about twice the mean of $\BA_i$ (which is $\le 1\%$), and therefore even after conditioning on the previous coordinates, $(\BA \union \BB)_i$ will have about twice as much entropy as $\BA_i$.

One catch is that the coordinates $<i$ might actually influence the mean of the $i\th$ coordinate quite dramatically, as the following example shows.

Example 2. Say $\BA_1$ is a Bernoulli of mean $1\%$ and let $\BA_2,\ldots,\BA_n$ be a Bernoullis of mean $.9$ if $\BA_1=1$, and deterministically $0$ otherwise. In this case, we would have

\[ \H\bco{\BA_2}{\BA_1} = 1\% \cdot H(.9) \]

and

\[ \H\bco{(\BA \union \BB)_2}{(\BA\union\BB)_1} \approx 2\% \cdot H(.9) \]

(indeed, the entropy of $(\BA \union \BB)_2$ is dominated by the case where exactly one of $\BA_1$ and $\BB_1$ was $1$, which happens with probability $\approx 2\%$). In this case, we still get

\[ \H\bco{(\BA \union \BB)_2}{(\BA\union\BB)_1} \approx 2 \H\bco{\BA_2}{\BA_1}, \]

but for a completely different reason: the mean of $(\BA \union \BB)_2$ in the relevant cases is not higher than that of $\BA_2$ (both are $.9$), but it takes that value twice as often ($\approx 2\%$ instead of $1\%$)!

In general, if we want to show that

\[ \H\bco{(\BA\union\BB)_i}{(\BA\union\BB)_{<i}} \ge (1+\Omega(1))\H\bco{\BA_i}{\BA_{<i}}, \]

then the argument needs to care not only about $\E[\BA_i]$ globally and $\E[(\BA\union\BB)_i]$, but also about the typical values that their means take when conditioning on the previous $i-1$ coordinates.

Intuitively, we’ll split into two cases, inspired by our two examples:

  • “Boosting” case (Example 1): When the conditional mean of $\BA_i$ and $\BB_i$ are small (say $\le \frac1{10}$), this will make the conditional mean of $(\BA \union \BB)_i$ significantly bigger than either of them, thus making the entropy bigger.
  • “Hedging” case (Example 2): If the conditional mean of either $\BA_i$ or $\BB_i$ is big (say $> \frac1{10}$), then the conditional mean of $(\BA \union \BB)_i$ will also be big. It will not necessarily be bigger than the bigger of the two, but it will be big more often, because it gets “two chances”.

Proof

By the chain rule, we have

\[ \al{ \H[\BA] &= \sum_{i=1}^n \H\bco{\BA_i}{\BA_{<i}}\\ \H[\BA\union \BB] &= \sum_{i=1}^n \H\bco{(\BA\union\BB)_i}{(\BA\union\BB)_{<i}}. } \]

We want to show that $\H[\BA\union\BB] \ge (1+\Omega(1))\H[\BA]$ term by term. One key observation is that

\[ \H\bco{(\BA\union\BB)_i}{(\BA\union\BB)_{<i}} \geq \H\bco{(\BA\union\BB)_i}{\BA_{<i}, \BB_{<i}}. \]

Indeed, knowing $\BA_{<i}$ and $\BB_{<i}$ is strictly more information that just knowing their union $(\BA \union \BB)_{<i}$. All we need to show is that

\[ \H\bco{(\BA\union\BB)_i}{\BA_{<i}, \BB_{<i}} \stackrel?\ge (1+\Omega(1)) \H\bco{\BA_i}{\BA_{<i}}.\tag{1} \]

And this makes things much simpler to reason about: now, $\BA_i$ and $\BB_i$ are independent even after the conditioning, so we’ll be able to compute the conditional mean of $(\BA \union \BB)_i$ as a function of those of $\BA_i$ and $\BB_i$.

Formally, let

\[ \al{ \BP_i &\ce \E_{\BA' \sim \BA}\bco{\BA'_i}{\BA'_{<i} = \BA_{<i}}\\ \BQ_i &\ce \E_{\BB' \sim \BB}\bco{\BB'_i}{\BB'_{<i} = \BB_{<i}}\\ \BR_i &\ce \E_{(\BA',\BB') \sim (\BA,\BB)}\bco{(\BA' \union \BB')_i}{\BA'_{<i} = \BA_{<i},\BB'_{<i} = \BB_{<i}} } \]

be the random variables that give the expected values of $\BA_i$, $\BB_i$ and $(\BA \union \BB)_i$ conditioned on the previous coordinates (note that $\BP_i$ and $\BQ_i$ have the same distribution). Then we can rewrite equation (1) as

\[ \E[H(\BR_i)] \stackrel?\ge (1+\Omega(1))\E[H(\BP_i)]. \]

And by independence, we have $\BR_i = 1-(1-\BP_i)(1-\BQ_i)$, so we can further rewrite it as

\[ \E[H(\BP_i+\BQ_i - \BP_i\BQ_i)] \stackrel?\ge (1+\Omega(1))\E[H(\BP_i)]. \]

The main technical lemma shows that the above inequality holds for any pair of iid random variables with low mean:

Lemma. Let $\BP$ and $\BQ$ be two iid1 random variables over $[0,1]$ such that $\E[\BP] \le 1\%$. Then

\[ \E[H(\BP+\BQ - \BP\BQ)] \ge (1+\Omega(1))\E[H(\BP)]. \]

Proof of technical lemma

Let’s split these expectations based on whether $\BP \le \frac1{10}$ and whether $\BQ \le \frac1{10}$. Note that $\Pr[\BP \le \frac1{10}] = \Pr[\BQ \le \frac1{10}] \ge \frac{9}{10}$ by Markov’s inequality. We have

\[ \E[H(\BP)] = \E\b{H(\BP)\cdot\1\b{\BP\le \frac1{10}}} + \E\b{H(\BP)\cdot\1\b{\BP> \frac1{10}}} \]

and

\[ \al{ &\E[H(\BP+\BQ-\BP\BQ)] \\ &\ge \ub{\E\b{H(\BP+\BQ-\BP\BQ) \cdot \1\b{\BP,\BQ \le \frac{1}{10}}}}_\text{``boosting'' case} + \ub{2\E\b{H(\BP+\BQ-\BP\BQ) \cdot \1\b{\BQ \le \frac{1}{10} < \BP}}}_\text{``hedging'' case} } \]

where we used the fact that $\BP,\BQ$ are iid to group $\BQ \le \frac{1}{10} < \BP$ and $\BP \le \frac{1}{10} < \BQ$ together, and we dropped the term corresponding to $\BP,\BQ > \frac{1}{10}$. We’ll show the inequality term by term.

(In what follows, we basically treat $\frac{1}{10}$ as an arbitrarily small constant.)

“Boosting” case (sketch)

For any fixed $p,q \le \frac{1}{10}$, we have

\[ H(p+q-pq) \approx H(p+q) \geq (1+\Omega(1))\frac{H(p)+H(q)}{2}, \]

so

\[ \al{ &\E\b{H(\BP+\BQ-\BP\BQ) \cdot \1\b{\BP,\BQ \le \frac{1}{10}}}\\ &\qquad\ge (1+\Omega(1))\E\b{\frac{H(\BP)+H(\BQ)}{2}\1\b{\BP,\BQ \le \frac{1}{10}}}\\ &\qquad= (1+\Omega(1))\E\b{H(\BP)\cdot\1\b{\BP\le \frac1{10}}}\Pr\b{\BQ \le \frac{1}{10}}\tag{$\BP,\BQ$ iid}\\ &\qquad\approx (1+\Omega(1))\E\b{H(\BP)\cdot\1\b{\BP\le \frac1{10}}} \tag{$\Pr[\BQ \le 1/10] \ge 9/10$} } \]

(because $\BP,\BQ$ are iid).

“Hedging” case (sketch)

For any fixed $p$ and any fixed $q \le \frac{1}{10}$, we have

\[ H(p+q-pq) \approx H(p), \]

so

\[ \al{ &2\E\b{H(\BP+\BQ-\BP\BQ) \cdot \1\b{\BQ \le \frac{1}{10} < \BP}}\\ &\qquad\approx 2\E\b{H(\BP) \cdot \1\b{\BQ \le \frac{1}{10} < \BP}}\\ &\qquad= 2\E\b{H(\BP) \cdot \1\b{\BP >\frac{1}{10}}}\Pr\b{\BQ \le\frac{1}{10}}\tag{$\BP,\BQ$ iid}\\ &\qquad\ge (1+\Omega(1)) \E\b{H(\BP)\cdot\1\b{\BP> \frac1{10}}}.\tag{$\textstyle\Pr[\BQ \le \frac{1}{10}] \ge \frac{9}{10}$} } \]
  1. The assumption that they are identical is not very key; it basically only makes sure that $\E[\BP] \approx \E[\BQ]$. My guess is that you could generalize it to something like $\geq \p{1+\Omega\p{\frac{\E[\BQ]}{\E[\BP]}}}\E[H(\BP)]$ when $\E[\BQ] \le \E[\BP]$, though I could be wrong on the details.