l2hmc Example: 2D U(1)
This notebook will (attempt) to walk through the steps needed to successfully instantiate and “run” an experiment.
For this example, we wish to train the L2HMC sampler for the 2D U(1) lattice gauge model with Wilson action:
S_{\beta}(n) = \beta \sum_{n}\sum_{\mu<\nu}\mathrm{Re}\left[1 - U_{\mu\nu}(n) \right]
This consists of the following steps:
Build an
Experimentby parsing our configuration objectTrain our model using the
Experiment.train()methodEvaluate our trained model
Experiment.evaluate(job_type='eval')Compare our trained models’ performance against generic HMC
Experiment.evaluate(job_type='hmc')
Evaluating Performance
We measure the performance of our model by comparing the tunneling rate \delta Q of our trained sampler to that of generic HMC.
Explicitly, the tunneling rate is given by:
\delta Q = \frac{1}{N_{\mathrm{chains}}}\sum_{\mathrm{chains}} \left|Q_{i+1} - Q_{i}\right|
where the difference is between subsequent states in a chain, and the sum is over all N chains (each being ran in parallel, independently).
Since our goal is to generate independent configurations, the more our sampler tunnels between different topological sectors (tunneling rate), the more efficient our sampler.
Imports / Setup
! nvidia-smi | tail --lines -7import os
devices = os.environ.get('CUDA_VISIBLE_DEVICES', None)
print(devices)
!getconf _NPROCESSORS_ONLN # get number of availble CPUsos.environ['TORCH_CPP_LOG_LEVEL'] = 'ERROR'
os.environ['AUTOGRAPH_VERBOSITY'] = '10'
!echo $CUDA_VISIBLE_DEVICESimport lovely_tensors as lt
lt.monkey_patch()
lt.set_config(color=False)# automatically detect and reload local changes to modules
%load_ext autoreload
%autoreload 2
# automatically detect and reload local changes to modules
%matplotlib inline
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg', 'retina')import os
import warnings
os.environ['COLORTERM'] = 'truecolor'
warnings.filterwarnings('ignore')
# --------------------------------------
# BE SURE TO GRAB A FRESH GPU !
os.environ['CUDA_VISIBLE_DEVICES'] = '5'
!echo $CUDA_VISIBLE_DEVICES
# --------------------------------------import yaml
import logging
from l2hmc.configs import CONF_DIR
rlog_yaml = CONF_DIR.joinpath('hydra', 'job_logging', 'rich_jupyter.yaml')
with rlog_yaml.open('r') as stream:
logconf = dict(yaml.safe_load(stream))
logging.config.dictConfig(logconf)
log = logging.getLogger()
log.setLevel('INFO')import torch
import opinionated
import seaborn as sns
import numpy as np
import lovely_tensors as lt
import matplotlib.pyplot as plt
import l2hmc.group.su3.pytorch.group as g
from pathlib import Path
from typing import Optional
from ezpz import setup_torch
#from l2hmc.utils.dist import setup_torch_distributed
from l2hmc.common import grab_tensor, print_dict
from l2hmc.configs import dict_to_list_of_overrides, get_experiment
from l2hmc.experiment.pytorch.experiment import Experiment, evaluate # noqa # noqa
from l2hmc.utils.plot_helpers import set_plot_style
from l2hmc.utils.history import BaseHistory
from l2hmc.utils.plot_helpers import ( # noqa
set_plot_style,
plot_scalar,
plot_chains,
plot_leapfrogs
)
os.environ['COLORTERM'] = 'truecolor'
PORT = np.random.randint(5000, 6000)
#SEED = np.random.randint(0, 2 ** 16)
SEED = 4351
log.critical(f'{SEED=}')
log.info(f'{PORT=}')
os.environ['MASTER_PORT'] = str(PORT)
#_ = setup_torch_distributed(backend='DDP', )
_ = setup_torch(backend='DDP', seed=SEED, port=PORT)
_ = (
torch.set_default_device('cuda')
if torch.cuda.is_available() else None
)
#torch.set_default_dtype(torch.bfloat16)
#_ = (
# torch.set_autocast_gpu_dtype(torch.bfloat16)
# if torch.cuda.is_available() else None
#)
set_plot_style()
plt.style.use(opinionated.STYLES['opinionated_min'])
sns.set_context('notebook', font_scale=1.25)import l2hmc
log.info(f'{l2hmc.__file__=}')Initialize and Build Experiment objects:
- The
l2hmc.mainmodule provides a functionbuild_experiment:
def build_experiment(overrides: list[str]) -> tfExperiment | ptExperiment:
...which will:
- Load the default options from
conf/config.yaml - Override the default options with any values provided in
overrides - Parse these options and build an
ExperimentConfigwhich uniquely defines an experiment - Instantiate / return an
Experimentfrom theExperimentConfig. Depending onframework=pytorch|tensorflow:framework=pytorch->l2hmc.experiment.pytorch.Experimentframework=tensorflow->l2hmc.experiment.tensorflow.Experiment
>>> train_output = experiment.train()
>>> eval_output = experiment.evaluate(job_type='eval')
>>> hmc_output = experiment.evaluate(job_type='hmc')Overriding Defaults
Specifics about the training / evaluation / hmc runs can be flexibly overridden by passing arguments to the training / evaluation / hmc runs, respectively
import numpy as np
# ---- NOTE -----------------------------
# These don't NEED to be specified,
# they're just here for completeness.
# the defaults are in `conf/config.yaml`
# ----------------------------------------
DEFAULTS = {
'seed': 76043,
'precision': 'fp16',
'init_aim': False,
'init_wandb': False,
'use_wandb': False,
'restore': False,
'save': False,
'use_tb': False,
'dynamics': {
'nleapfrog': 4,
'nchains': 2048,
'eps': 0.05,
},
'conv': 'none',
'steps': {
'log': 20,
'print': 200,
'nepoch': 5000,
'nera': 1,
},
'annealing_schedule': {
'beta_init': 4.0,
'beta_final': 4.0,
},
}
outputs = {
'pytorch': {
'train': {},
'eval': {},
'hmc': {},
},
'tensorflow': {
'train': {},
'eval': {},
'hmc': {},
},
}from l2hmc.configs import dict_to_list_of_overrides
OVERRIDES = dict_to_list_of_overrides(DEFAULTS)
log.info('\n'.join(OVERRIDES))from l2hmc.__main__ import build_experiment
# Build PyTorch Experiment
ptExpU1 = build_experiment(
overrides=[
*OVERRIDES,
'framework=pytorch',
'backend=DDP',
]
)# Build TensorFlow Experiment
import tensorflow as tf
tf.keras.mixed_precision.set_global_policy('mixed_float16')
tfExpU1 = build_experiment(
overrides=[
*OVERRIDES,
'framework=tensorflow',
'backend=horovod',
]
)PyTorch
import time
from l2hmc.utils.history import BaseHistory, summarize_dict
import l2hmc.utils.live_plots as plotter
plt.rcParams['xaxis.labellocation'] = 'center'
plt.rcParams['yaxis.labellocation'] = 'center'
beta = 4.0
state = ptExpU1.trainer.dynamics.random_state(beta)state.x.deviceTraining
outputs['pytorch']['train'] = ptExpU1.trainer.train(
nera=1,
nepoch=5000,
beta=4.0,
# beta=[4.0, 4.25, 4.5, 4.75, 5.0],
)# dset_train = ptExpU1.trainer.histories['train'].plot_all(num_chains=128)
dset_train_pt = ptExpU1.save_dataset(job_type='train', nchains=32)Inference
Evaluation
outputs['pytorch']['eval'] = ptExpU1.trainer.eval(
job_type='eval',
nprint=500,
nchains=128,
eval_steps=2000,
)
dset_eval_pt = ptExpU1.save_dataset(job_type='eval', nchains=32)
# dset_eval_pt = ptExpU1.trainer.histories['eval'].plot_all()HMC
outputs['pytorch']['hmc'] = ptExpU1.trainer.eval(
job_type='hmc',
nprint=500,
nchains=128,
eval_steps=2000,
)
dset_hmc_pt = ptExpU1.save_dataset(job_type='hmc', nchains=32)
# dset_hmc_pt = ptExpU1.trainer.histories['hmc'].plot_all()TensorFlow
Train
outputs['tensorflow']['train'] = tfExpU1.trainer.train(
nera=1,
nepoch=5000,
beta=4.0,
# beta=[4.0, 4.25, 4.5, 4.75, 5.0],
)
# dset_train_tf = tfExpU1.trainer.histories['train'].plot_all()
_ = tfExpU1.save_dataset(job_type='train', nchains=32)Inference
Evaluate
outputs['tensorflow']['eval'] = tfExpU1.trainer.eval(
job_type='eval',
nprint=500,
nchains=128,
eval_steps=2000,
)
# dset_eval_tf = tfExpU1.trainer.histories['eval'].plot_all()
_ = tfExpU1.save_dataset(job_type='eval', nchains=32)HMC
outputs['tensorflow']['hmc'] = tfExpU1.trainer.eval(
job_type='hmc',
nprint=500,
nchains=128,
eval_steps=2000,
)
_ = tfExpU1.save_dataset(job_type='hmc', nchains=32)Model Performance
Our goal is improving the efficiency of our MCMC sampler.
In particular, we are interested in generating independent save_datasetrations which we can then use to calculate expectation values of physical observables.
For our purposes, we are interested in obtaining lattice configurations from distinct topological charge sectors, as characterized by a configurations topological charge, Q.
HMC is known to suffer from critical slowing down, a phenomenon in which our configurations remains stuck in some local topological charge sector and fails to produce distinct configurations.
In particular, it is known that the integrated autocorrelation time of the topological charge \tau grows exponentially with decreasing lattice spacing (i.e. continuum limit), making this theory especially problematic.
Because of this, we can assess our models’ performance by looking at the tunneling rate, i.e. the rate at which our sampler jumps between these different charge sectors.
We can write this quantity as:
\delta Q = |Q^{(i)} - Q^{(i-1)}|
where we look at the difference in the topological charge between sequential configurations.
Note: The efficiency of our sampler is directly proportional to the tunneling rate, which is inversely proportional to the integrated autocorrelation time \tau, i.e.
\text{Efficiency} \propto \delta Q \propto \frac{1}{\tau}
Explicitly, this means that the more efficient the model \longrightarrow
- the larger tunneling rate
- the smaller integrated autocorrelation time for Q
import xarray as xr
def get_thermalized_configs(
x: np.ndarray | xr.DataArray,
drop: int = 5
) -> np.ndarray | xr.DataArray:
"""Drop the first `drop` states across all chains.
x.shape = [draws, chains]
"""
if isinstance(x, np.ndarray):
return np.sort(x)[..., :-drop]
if isinstance(x, xr.DataArray):
return x.sortby(
['chain', 'draw'],
ascending=False
)[..., :-drop]
raise TypeErrorComparisons
We can measure our models’ performance explicitly by looking at the average tunneling rate, \delta Q_{\mathbb{Z}}, for our trained model and comparing it against generic HMC.
Recall,
\delta Q_{\mathbb{Z}} := \big|Q^{(i+1)}_{\mathbb{Z}} - Q^{(i)}_{\mathbb{Z}}\big|
where a higher value of \delta Q_{\mathbb{Z}} corresponds to better tunneling of the topological charge, Q_{\mathbb{Z}}.
Note that we can get a concise representation of the data from different parts of our run via:
Note that the data from each of the different parts of our experiment (i.e. train, eval, and hmc) are stored as a dict, e.g.
>>> list(ptExpU1.trainer.histories.keys())
['train', 'eval', 'hmc']
>>> train_history = ptExpU1.trainer.histories['train']
>>> train_dset = train_history.get_dataset()
>>> assert isinstance(train_history, l2hmc.utils.history.BaseHistory)
>>> assert isinstance(train_dset, xarray.Dataset)(see below, for example)
We aggregate the data into the dsets dict below, grouped by:
- Framework (
pytorch/tensorflow) - Job type (
train,eval,hmc)
import logging
log = logging.getLogger(__name__)
dsets = {}
fws = ['pt', 'tf']
modes = ['train', 'eval', 'hmc']
for fw in fws:
dsets[fw] = {}
for mode in modes:
hist = None
if fw == 'pt':
hist = ptExpU1.trainer.histories.get(mode, None)
elif fw == 'tf':
hist = tfExpU1.trainer.histories.get(mode, None)
if hist is not None:
log.info(f'Getting dataset for {fw}: {mode}')
dsets[fw][mode] = hist.get_dataset()dset_eval_pt = ptExpU1.trainer.histories['eval'].get_dataset()
dset_hmc_pt = ptExpU1.trainer.histories['hmc'].get_dataset()import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = False
import matplotlib.pyplot as plt
from l2hmc.utils.plot_helpers import COLORS
# ---------------------------------------------
# ---- DROP FIRST 20% FOR THERMALIZATION ------
# ---------------------------------------------
KEEP = int(0.8 * len(dset_eval_pt.draw))
dqpte = get_thermalized_configs(dset_eval_pt['dQint'].astype('int'))
dqpth = get_thermalized_configs(dset_hmc_pt['dQint'].astype('int'))
#dqpte = get_thermalized_configs(dsets['pt']['eval']['dQint'].astype('int'))
#dqpth = get_thermalized_configs(dsets['pt']['hmc']['dQint'].astype('int'))dqtfe = get_thermalized_configs(dsets['tf']['eval']['dQint'].astype('int'))
dqtfh = get_thermalized_configs(dsets['tf']['hmc']['dQint'].astype('int'))dqpte_tot = dqpte.sum().values.item()
dqpth_tot = dqpth.sum().values.item()log.info(f'{dqpte_tot=}')
log.info(f'{dqpth_tot=}')with plt.rc_context(
{
'text.usetex': False,
#'font.family': 'sans-serif',
#'font.sans-serif': 'IBM Plex Sans',
#'mathtext.fontset': 'dejavusans',
}
):
fig, ax = plt.subplots(figsize=(8, 3))
_ = sns.distplot(
dqpth.sum('chain'),
kde=False,
color=COLORS['blue'],
hist_kws={'edgecolor': 'none'},
label=fr'HMC, $ \sum \delta Q =$ {dqpth_tot}',
ax=ax
)
_ = sns.distplot(
dqpte.sum('chain'),
kde=False,
color=COLORS['red'],
hist_kws={'edgecolor': 'none'},
label=fr'Trained, $\sum\delta Q =$ {dqpte_tot}',
ax=ax
)
_ = ax.set_xlabel(
#f'# tunneling events / {dqpte.shape[-1]} configurations'
r"$\delta Q$",
#fontname="IBM Plex Sans",
#textcolor='#838383',
)
_ = ax.grid(alpha=0.0)
_ = ax.legend(loc='best', frameon=False)
plt.legend()PyTorch (red) vs HMC (blue).dqtfe_tot = dqtfe.sum().values.item()
dqtfh_tot = dqtfh.sum().values.item()
fig, ax = plt.subplots(figsize=(8, 3))
_ = sns.distplot(
dqtfh.sum('chain'),
kde=False,
color=COLORS['blue'],
hist_kws={'edgecolor': 'none'},
label=fr'HMC, $ \sum \delta Q =$ {dqtfh_tot}',
ax=ax
)
_ = sns.distplot(
dqtfe.sum('chain'),
kde=False,
color=COLORS['red'],
hist_kws={'edgecolor': 'none'},
label=fr'Trained, $\sum\delta Q =$ {dqtfe_tot}',
ax=ax
)
_ = ax.set_xlabel(
#f'# tunneling events / {dqtfe.shape[-1]} configurations'
r"$\delta Q$",
#fontname="IBM Plex Sans",
#textcolor='#838383',
)
_ = ax.grid(alpha=0.0)
_ = ax.legend(loc='best', frameon=False)
_ = plt.legend()TensorFlow (red) vs HMC (blue).fig, ax = plt.subplots(figsize=(16, 3), ncols=2)
_ = sns.distplot(
dqpte.sum('chain'),
kde=False,
color=COLORS['blue'],
hist_kws={'edgecolor': 'none'},
label='Eval',
ax=ax[0]
)
_ = sns.distplot(
dqpth.sum('chain'),
kde=False,
color=COLORS['red'],
hist_kws={'edgecolor': 'none'},
label='HMC',
ax=ax[0]
)
_ = ax[0].set_title('PyTorch')
_ = ax[0].set_xlabel(
f'# tunneling events / {dqpte.shape[-1]} configurations'
)
_ = ax[0].legend(loc='best', frameon=False)
plt.legend()
_ = sns.distplot(
dqtfe.sum('chain'),
kde=False,
color=COLORS['blue'],
hist_kws={'edgecolor': 'none'},
label='Eval',
ax=ax[1]
)
_ = sns.distplot(
dqtfh.sum('chain'),
kde=False,
color=COLORS['red'],
label='HMC',
ax=ax[1],
hist_kws={'edgecolor': 'none'},
)
_ = ax[1].set_title('TensorFlow')
_ = ax[1].set_xlabel(
r"""$\sum_{i=0} \left|\delta Q_{i}\right|$""",
#fontsize='large',
#f'# tunneling events / {dqpte.shape[-1]} configurations'
)
_ = ax[1].legend(loc='best', frameon=False)TensorFlow Results
sns.set_context('notebook')
ndraws = len(dsets['tf']['eval']['dQint'].draw)
drop = int(0.1 * ndraws)
keep = int(0.9 * ndraws)
dqe = dsets['tf']['eval']['dQint'][:, -90:]
dqh = dsets['tf']['hmc']['dQint'][:, -90:]
etot = dqe.astype(int).sum()
htot = dqh.astype(int).sum()
fsize = plt.rcParams['figure.figsize']
#figsize = (2.5 * fsize[0], fsize[1])
fig, ax = plt.subplots(figsize=figsize, ncols=2)
_ = dqe.astype(int).plot(ax=ax[0])
_ = dqh.astype(int).plot(ax=ax[1])
_ = ax[0].set_title(f'Eval, total: {etot.values}');
_ = ax[1].set_title(f'HMC, total: {htot.values}');
_ = fig.suptitle(fr'TensorFlow Improvement: {100*(etot / htot):3.0f}%')
dqe_tot = dqe.astype(int).sum().T.values.sum()
dqh_tot = dqh.astype(int).sum().T.values.sum()
dqeh_ratio = dqe_tot / dqh_tot
log.info(f"TensorFlow, EVAL\n {dqe.astype(int).sum('chain').T=}")
log.info(f"Eval: {dqe.astype(int).sum().T.values.sum()=}")
log.info(f"TensorFlow, HMC\n {dqh.astype(int).sum('chain').T=}")
log.info(f"HMC: {dqh.astype(int).sum().T.values.sum()=}")
log.critical(f"dQ_eval / dQ_hmc: {dqeh_ratio:.4f}")PyTorch Results
sns.set_context('notebook', font_scale=1.25)
ndraws = len(dsets['pt']['eval']['dQint'].draw)
drop = int(0.1 * ndraws)
keep = int(0.9 * ndraws)
dqe = dsets['pt']['eval']['dQint'][:, -90:]
dqh = dsets['pt']['hmc']['dQint'][:, -90:]
etot = dqe.astype(int).sum()
htot = dqh.astype(int).sum()
fsize = plt.rcParams['figure.figsize']
figsize = (2.5 * fsize[0], 0.8 * fsize[1])
fig, ax = plt.subplots(figsize=figsize, ncols=2)
_ = dqe.astype(int).plot(ax=ax[0])
_ = dqh.astype(int).plot(ax=ax[1])
_ = ax[0].set_title(f'Eval, total: {etot.values}');
_ = ax[1].set_title(f'HMC, total: {htot.values}');
#_ = fig.suptitle(fr'PyTorch Improvement: {100*(etot / htot):3.0f}%')dqe_tot = dqe.astype(int).sum().T.values.sum()
dqh_tot = dqh.astype(int).sum().T.values.sum()
dqeh_ratio = dqe_tot / dqh_tot
log.info(f"PyTorch, EVAL\n {dqe.astype(int).sum('chain').T=}")
log.info(f"Eval: {dqe.astype(int).sum().T.values.sum()=}")
log.info(f"TensorFlow, HMC\n {dqh.astype(int).sum('chain').T=}")
log.info(f"HMC: {dqh.astype(int).sum().T.values.sum()=}")
log.critical(f"dQ_eval / dQ_hmc: {dqeh_ratio:.4f}")Comparisons
import matplotlib.pyplot as plt
from l2hmc.utils.plot_helpers import set_plot_style, COLORS
import seaborn as sns
set_plot_style()
plt.rcParams['axes.linewidth'] = 2.0
sns.set_context('notebook', font_scale=1.25)
figsize = plt.rcParamsDefault['figure.figsize']
plt.rcParams['figure.dpi'] = plt.rcParamsDefault['figure.dpi']
for idx in range(4):
fig, (ax, ax1) = plt.subplots(
ncols=2,
#nrows=4,
figsize=(3. * figsize[0], figsize[1]),
)
_ = ax.plot(
dsets['pt']['eval'].intQ[idx] + 5, # .dQint.mean('chain')[100:],
color=COLORS['red'],
ls=':',
label='Trained',
lw=1.5,
);
_ = ax.plot(
dsets['pt']['hmc'].intQ[idx] - 5, # .dQint.mean('chain')[100:],
ls='-',
label='HMC',
color='#666666',
zorder=5,
lw=2.0,
);
_ = ax1.plot(
dsets['tf']['eval'].intQ[idx] + 5, # .dQint.mean('chain')[-100:],
color=COLORS['blue'],
ls=':',
label='Trained',
lw=1.5,
);
_ = ax1.plot(
dsets['tf']['hmc'].intQ[idx] - 5, # .dQint.mean('chain')[-100:],
color='#666666',
ls='-',
label='HMC',
zorder=5,
lw=2.0,
);
_ = ax.set_title('PyTorch')
_ = ax1.set_title('TensorFlow')
#_ = ax1.set_ylim(ax.get_ylim())
_ = ax.grid(True, alpha=0.2)
_ = ax1.grid(True, alpha=0.2)
_ = ax.set_xlabel('MD Step')
_ = ax1.set_xlabel('MD Step')
_ = ax.set_ylabel('dQint'
)
_ = ax.legend(loc='best', ncol=2, labelcolor='#939393')
_ = ax1.legend(loc='best', ncol=2, labelcolor='#939393')Citation
@online{foreman2023,
author = {Foreman, Sam},
title = {L2hmc-Qcd},
date = {2023-12-14},
url = {https://saforem2.github.io/l2hmc-qcd},
langid = {en}
}
