l2hmc
Example: 2D U(1)
This notebook will (attempt) to walk through the steps needed to successfully instantiate and “run” an experiment.
For this example, we wish to train the L2HMC sampler for the 2D U(1) lattice gauge model with Wilson action:
S_{\beta}(n) = \beta \sum_{n}\sum_{\mu<\nu}\mathrm{Re}\left[1 - U_{\mu\nu}(n) \right]
This consists of the following steps:
Build an
Experiment
by parsing our configuration objectTrain our model using the
Experiment.train()
methodEvaluate our trained model
Experiment.evaluate(job_type='eval')
Compare our trained models’ performance against generic HMC
Experiment.evaluate(job_type='hmc')
Evaluating Performance
We measure the performance of our model by comparing the tunneling rate \delta Q of our trained sampler to that of generic HMC.
Explicitly, the tunneling rate is given by:
\delta Q = \frac{1}{N_{\mathrm{chains}}}\sum_{\mathrm{chains}} \left|Q_{i+1} - Q_{i}\right|
where the difference is between subsequent states in a chain, and the sum is over all N chains (each being ran in parallel, independently).
Since our goal is to generate independent configurations, the more our sampler tunnels between different topological sectors (tunneling rate), the more efficient our sampler.
Imports / Setup
! nvidia-smi | tail --lines -7
import os
= os.environ.get('CUDA_VISIBLE_DEVICES', None)
devices print(devices)
!getconf _NPROCESSORS_ONLN # get number of availble CPUs
'TORCH_CPP_LOG_LEVEL'] = 'ERROR'
os.environ['AUTOGRAPH_VERBOSITY'] = '10'
os.environ[!echo $CUDA_VISIBLE_DEVICES
import lovely_tensors as lt
lt.monkey_patch()=False) lt.set_config(color
# automatically detect and reload local changes to modules
%load_ext autoreload
%autoreload 2
# automatically detect and reload local changes to modules
%matplotlib inline
import matplotlib_inline
'svg', 'retina') matplotlib_inline.backend_inline.set_matplotlib_formats(
import os
import warnings
'COLORTERM'] = 'truecolor'
os.environ['ignore')
warnings.filterwarnings(# --------------------------------------
# BE SURE TO GRAB A FRESH GPU !
'CUDA_VISIBLE_DEVICES'] = '5'
os.environ[!echo $CUDA_VISIBLE_DEVICES
# --------------------------------------
import yaml
import logging
from l2hmc.configs import CONF_DIR
= CONF_DIR.joinpath('hydra', 'job_logging', 'rich_jupyter.yaml')
rlog_yaml with rlog_yaml.open('r') as stream:
= dict(yaml.safe_load(stream))
logconf
logging.config.dictConfig(logconf)= logging.getLogger()
log 'INFO') log.setLevel(
import torch
import opinionated
import seaborn as sns
import numpy as np
import lovely_tensors as lt
import matplotlib.pyplot as plt
import l2hmc.group.su3.pytorch.group as g
from pathlib import Path
from typing import Optional
from ezpz import setup_torch
#from l2hmc.utils.dist import setup_torch_distributed
from l2hmc.common import grab_tensor, print_dict
from l2hmc.configs import dict_to_list_of_overrides, get_experiment
from l2hmc.experiment.pytorch.experiment import Experiment, evaluate # noqa # noqa
from l2hmc.utils.plot_helpers import set_plot_style
from l2hmc.utils.history import BaseHistory
from l2hmc.utils.plot_helpers import ( # noqa
set_plot_style,
plot_scalar,
plot_chains,
plot_leapfrogs
)
'COLORTERM'] = 'truecolor'
os.environ[= np.random.randint(5000, 6000)
PORT #SEED = np.random.randint(0, 2 ** 16)
= 4351
SEED f'{SEED=}')
log.critical(f'{PORT=}')
log.info('MASTER_PORT'] = str(PORT)
os.environ[
#_ = setup_torch_distributed(backend='DDP', )
= setup_torch(backend='DDP', seed=SEED, port=PORT)
_
= (
_ 'cuda')
torch.set_default_device(if torch.cuda.is_available() else None
)#torch.set_default_dtype(torch.bfloat16)
#_ = (
# torch.set_autocast_gpu_dtype(torch.bfloat16)
# if torch.cuda.is_available() else None
#)
set_plot_style()'opinionated_min'])
plt.style.use(opinionated.STYLES['notebook', font_scale=1.25) sns.set_context(
import l2hmc
f'{l2hmc.__file__=}') log.info(
Initialize and Build Experiment
objects:
- The
l2hmc.main
module provides a functionbuild_experiment
:
def build_experiment(overrides: list[str]) -> tfExperiment | ptExperiment:
...
which will:
- Load the default options from
conf/config.yaml
- Override the default options with any values provided in
overrides
- Parse these options and build an
ExperimentConfig
which uniquely defines an experiment - Instantiate / return an
Experiment
from theExperimentConfig
. Depending onframework=pytorch|tensorflow
:framework=pytorch
->l2hmc.experiment.pytorch.Experiment
framework=tensorflow
->l2hmc.experiment.tensorflow.Experiment
>>> train_output = experiment.train()
>>> eval_output = experiment.evaluate(job_type='eval')
>>> hmc_output = experiment.evaluate(job_type='hmc')
Overriding Defaults
Specifics about the training / evaluation / hmc runs can be flexibly overridden by passing arguments to the training / evaluation / hmc runs, respectively
import numpy as np
# ---- NOTE -----------------------------
# These don't NEED to be specified,
# they're just here for completeness.
# the defaults are in `conf/config.yaml`
# ----------------------------------------
= {
DEFAULTS 'seed': 76043,
'precision': 'fp16',
'init_aim': False,
'init_wandb': False,
'use_wandb': False,
'restore': False,
'save': False,
'use_tb': False,
'dynamics': {
'nleapfrog': 4,
'nchains': 2048,
'eps': 0.05,
},'conv': 'none',
'steps': {
'log': 20,
'print': 200,
'nepoch': 5000,
'nera': 1,
},'annealing_schedule': {
'beta_init': 4.0,
'beta_final': 4.0,
},
}
= {
outputs 'pytorch': {
'train': {},
'eval': {},
'hmc': {},
},'tensorflow': {
'train': {},
'eval': {},
'hmc': {},
}, }
from l2hmc.configs import dict_to_list_of_overrides
= dict_to_list_of_overrides(DEFAULTS)
OVERRIDES '\n'.join(OVERRIDES)) log.info(
from l2hmc.__main__ import build_experiment
# Build PyTorch Experiment
= build_experiment(
ptExpU1 =[
overrides*OVERRIDES,
'framework=pytorch',
'backend=DDP',
] )
# Build TensorFlow Experiment
import tensorflow as tf
'mixed_float16')
tf.keras.mixed_precision.set_global_policy(
= build_experiment(
tfExpU1 =[
overrides*OVERRIDES,
'framework=tensorflow',
'backend=horovod',
] )
PyTorch
import time
from l2hmc.utils.history import BaseHistory, summarize_dict
import l2hmc.utils.live_plots as plotter
'xaxis.labellocation'] = 'center'
plt.rcParams['yaxis.labellocation'] = 'center'
plt.rcParams[
= 4.0
beta = ptExpU1.trainer.dynamics.random_state(beta) state
state.x.device
Training
'pytorch']['train'] = ptExpU1.trainer.train(
outputs[=1,
nera=5000,
nepoch=4.0,
beta# beta=[4.0, 4.25, 4.5, 4.75, 5.0],
)
# dset_train = ptExpU1.trainer.histories['train'].plot_all(num_chains=128)
= ptExpU1.save_dataset(job_type='train', nchains=32) dset_train_pt
Inference
Evaluation
'pytorch']['eval'] = ptExpU1.trainer.eval(
outputs[='eval',
job_type=500,
nprint=128,
nchains=2000,
eval_steps
)= ptExpU1.save_dataset(job_type='eval', nchains=32)
dset_eval_pt # dset_eval_pt = ptExpU1.trainer.histories['eval'].plot_all()
HMC
'pytorch']['hmc'] = ptExpU1.trainer.eval(
outputs[='hmc',
job_type=500,
nprint=128,
nchains=2000,
eval_steps
)= ptExpU1.save_dataset(job_type='hmc', nchains=32)
dset_hmc_pt # dset_hmc_pt = ptExpU1.trainer.histories['hmc'].plot_all()
TensorFlow
Train
'tensorflow']['train'] = tfExpU1.trainer.train(
outputs[=1,
nera=5000,
nepoch=4.0,
beta# beta=[4.0, 4.25, 4.5, 4.75, 5.0],
)# dset_train_tf = tfExpU1.trainer.histories['train'].plot_all()
= tfExpU1.save_dataset(job_type='train', nchains=32) _
Inference
Evaluate
'tensorflow']['eval'] = tfExpU1.trainer.eval(
outputs[='eval',
job_type=500,
nprint=128,
nchains=2000,
eval_steps
)# dset_eval_tf = tfExpU1.trainer.histories['eval'].plot_all()
= tfExpU1.save_dataset(job_type='eval', nchains=32) _
HMC
'tensorflow']['hmc'] = tfExpU1.trainer.eval(
outputs[='hmc',
job_type=500,
nprint=128,
nchains=2000,
eval_steps
)= tfExpU1.save_dataset(job_type='hmc', nchains=32) _
Model Performance
Our goal is improving the efficiency of our MCMC sampler.
In particular, we are interested in generating independent save_datasetrations which we can then use to calculate expectation values of physical observables.
For our purposes, we are interested in obtaining lattice configurations from distinct topological charge sectors, as characterized by a configurations topological charge, Q.
HMC is known to suffer from critical slowing down, a phenomenon in which our configurations remains stuck in some local topological charge sector and fails to produce distinct configurations.
In particular, it is known that the integrated autocorrelation time of the topological charge \tau grows exponentially with decreasing lattice spacing (i.e. continuum limit), making this theory especially problematic.
Because of this, we can assess our models’ performance by looking at the tunneling rate, i.e. the rate at which our sampler jumps between these different charge sectors.
We can write this quantity as:
\delta Q = |Q^{(i)} - Q^{(i-1)}|
where we look at the difference in the topological charge between sequential configurations.
Note: The efficiency of our sampler is directly proportional to the tunneling rate, which is inversely proportional to the integrated autocorrelation time \tau, i.e.
\text{Efficiency} \propto \delta Q \propto \frac{1}{\tau}
Explicitly, this means that the more efficient the model \longrightarrow
- the larger tunneling rate
- the smaller integrated autocorrelation time for Q
import xarray as xr
def get_thermalized_configs(
| xr.DataArray,
x: np.ndarray int = 5
drop: -> np.ndarray | xr.DataArray:
) """Drop the first `drop` states across all chains.
x.shape = [draws, chains]
"""
if isinstance(x, np.ndarray):
return np.sort(x)[..., :-drop]
if isinstance(x, xr.DataArray):
return x.sortby(
'chain', 'draw'],
[=False
ascending-drop]
)[..., :raise TypeError
Comparisons
We can measure our models’ performance explicitly by looking at the average tunneling rate, \delta Q_{\mathbb{Z}}, for our trained model and comparing it against generic HMC.
Recall,
\delta Q_{\mathbb{Z}} := \big|Q^{(i+1)}_{\mathbb{Z}} - Q^{(i)}_{\mathbb{Z}}\big|
where a higher value of \delta Q_{\mathbb{Z}} corresponds to better tunneling of the topological charge, Q_{\mathbb{Z}}.
Note that we can get a concise representation of the data from different parts of our run via:
Note that the data from each of the different parts of our experiment (i.e. train
, eval
, and hmc
) are stored as a dict, e.g.
>>> list(ptExpU1.trainer.histories.keys())
'train', 'eval', 'hmc']
[>>> train_history = ptExpU1.trainer.histories['train']
>>> train_dset = train_history.get_dataset()
>>> assert isinstance(train_history, l2hmc.utils.history.BaseHistory)
>>> assert isinstance(train_dset, xarray.Dataset)
(see below, for example)
We aggregate the data into the dsets
dict below, grouped by:
- Framework (
pytorch
/tensorflow
) - Job type (
train
,eval
,hmc
)
import logging
= logging.getLogger(__name__)
log = {}
dsets = ['pt', 'tf']
fws = ['train', 'eval', 'hmc']
modes for fw in fws:
= {}
dsets[fw] for mode in modes:
= None
hist if fw == 'pt':
= ptExpU1.trainer.histories.get(mode, None)
hist elif fw == 'tf':
= tfExpU1.trainer.histories.get(mode, None)
hist if hist is not None:
f'Getting dataset for {fw}: {mode}')
log.info(= hist.get_dataset() dsets[fw][mode]
= ptExpU1.trainer.histories['eval'].get_dataset()
dset_eval_pt = ptExpU1.trainer.histories['hmc'].get_dataset() dset_hmc_pt
import numpy as np
import matplotlib.pyplot as plt
'text.usetex'] = False
plt.rcParams[import matplotlib.pyplot as plt
from l2hmc.utils.plot_helpers import COLORS
# ---------------------------------------------
# ---- DROP FIRST 20% FOR THERMALIZATION ------
# ---------------------------------------------
= int(0.8 * len(dset_eval_pt.draw))
KEEP = get_thermalized_configs(dset_eval_pt['dQint'].astype('int'))
dqpte = get_thermalized_configs(dset_hmc_pt['dQint'].astype('int'))
dqpth #dqpte = get_thermalized_configs(dsets['pt']['eval']['dQint'].astype('int'))
#dqpth = get_thermalized_configs(dsets['pt']['hmc']['dQint'].astype('int'))
= get_thermalized_configs(dsets['tf']['eval']['dQint'].astype('int'))
dqtfe = get_thermalized_configs(dsets['tf']['hmc']['dQint'].astype('int')) dqtfh
= dqpte.sum().values.item()
dqpte_tot = dqpth.sum().values.item() dqpth_tot
f'{dqpte_tot=}')
log.info(f'{dqpth_tot=}') log.info(
with plt.rc_context(
{'text.usetex': False,
#'font.family': 'sans-serif',
#'font.sans-serif': 'IBM Plex Sans',
#'mathtext.fontset': 'dejavusans',
}
):= plt.subplots(figsize=(8, 3))
fig, ax = sns.distplot(
_ sum('chain'),
dqpth.=False,
kde=COLORS['blue'],
color={'edgecolor': 'none'},
hist_kws=fr'HMC, $ \sum \delta Q =$ {dqpth_tot}',
label=ax
ax
)= sns.distplot(
_ sum('chain'),
dqpte.=False,
kde=COLORS['red'],
color={'edgecolor': 'none'},
hist_kws=fr'Trained, $\sum\delta Q =$ {dqpte_tot}',
label=ax
ax
)= ax.set_xlabel(
_ #f'# tunneling events / {dqpte.shape[-1]} configurations'
r"$\delta Q$",
#fontname="IBM Plex Sans",
#textcolor='#838383',
)= ax.grid(alpha=0.0)
_ = ax.legend(loc='best', frameon=False)
_ plt.legend()
= dqtfe.sum().values.item()
dqtfe_tot = dqtfh.sum().values.item()
dqtfh_tot
= plt.subplots(figsize=(8, 3))
fig, ax = sns.distplot(
_ sum('chain'),
dqtfh.=False,
kde=COLORS['blue'],
color={'edgecolor': 'none'},
hist_kws=fr'HMC, $ \sum \delta Q =$ {dqtfh_tot}',
label=ax
ax
)= sns.distplot(
_ sum('chain'),
dqtfe.=False,
kde=COLORS['red'],
color={'edgecolor': 'none'},
hist_kws=fr'Trained, $\sum\delta Q =$ {dqtfe_tot}',
label=ax
ax
)= ax.set_xlabel(
_ #f'# tunneling events / {dqtfe.shape[-1]} configurations'
r"$\delta Q$",
#fontname="IBM Plex Sans",
#textcolor='#838383',
)= ax.grid(alpha=0.0)
_ = ax.legend(loc='best', frameon=False)
_ = plt.legend() _
= plt.subplots(figsize=(16, 3), ncols=2)
fig, ax = sns.distplot(
_ sum('chain'),
dqpte.=False,
kde=COLORS['blue'],
color={'edgecolor': 'none'},
hist_kws='Eval',
label=ax[0]
ax
)= sns.distplot(
_ sum('chain'),
dqpth.=False,
kde=COLORS['red'],
color={'edgecolor': 'none'},
hist_kws='HMC',
label=ax[0]
ax
)
= ax[0].set_title('PyTorch')
_ = ax[0].set_xlabel(
_ f'# tunneling events / {dqpte.shape[-1]} configurations'
)= ax[0].legend(loc='best', frameon=False)
_
plt.legend()
= sns.distplot(
_ sum('chain'),
dqtfe.=False,
kde=COLORS['blue'],
color={'edgecolor': 'none'},
hist_kws='Eval',
label=ax[1]
ax
)= sns.distplot(
_ sum('chain'),
dqtfh.=False,
kde=COLORS['red'],
color='HMC',
label=ax[1],
ax={'edgecolor': 'none'},
hist_kws
)
= ax[1].set_title('TensorFlow')
_ = ax[1].set_xlabel(
_ r"""$\sum_{i=0} \left|\delta Q_{i}\right|$""",
#fontsize='large',
#f'# tunneling events / {dqpte.shape[-1]} configurations'
)= ax[1].legend(loc='best', frameon=False) _
TensorFlow Results
'notebook')
sns.set_context(= len(dsets['tf']['eval']['dQint'].draw)
ndraws = int(0.1 * ndraws)
drop = int(0.9 * ndraws)
keep
= dsets['tf']['eval']['dQint'][:, -90:]
dqe = dsets['tf']['hmc']['dQint'][:, -90:]
dqh
= dqe.astype(int).sum()
etot = dqh.astype(int).sum()
htot
= plt.rcParams['figure.figsize']
fsize #figsize = (2.5 * fsize[0], fsize[1])
= plt.subplots(figsize=figsize, ncols=2)
fig, ax = dqe.astype(int).plot(ax=ax[0])
_ = dqh.astype(int).plot(ax=ax[1])
_ = ax[0].set_title(f'Eval, total: {etot.values}');
_ = ax[1].set_title(f'HMC, total: {htot.values}');
_ = fig.suptitle(fr'TensorFlow Improvement: {100*(etot / htot):3.0f}%')
_
= dqe.astype(int).sum().T.values.sum()
dqe_tot = dqh.astype(int).sum().T.values.sum()
dqh_tot = dqe_tot / dqh_tot
dqeh_ratio
f"TensorFlow, EVAL\n {dqe.astype(int).sum('chain').T=}")
log.info(f"Eval: {dqe.astype(int).sum().T.values.sum()=}")
log.info(f"TensorFlow, HMC\n {dqh.astype(int).sum('chain').T=}")
log.info(f"HMC: {dqh.astype(int).sum().T.values.sum()=}")
log.info(f"dQ_eval / dQ_hmc: {dqeh_ratio:.4f}") log.critical(
PyTorch Results
'notebook', font_scale=1.25)
sns.set_context(
= len(dsets['pt']['eval']['dQint'].draw)
ndraws = int(0.1 * ndraws)
drop = int(0.9 * ndraws)
keep
= dsets['pt']['eval']['dQint'][:, -90:]
dqe = dsets['pt']['hmc']['dQint'][:, -90:]
dqh
= dqe.astype(int).sum()
etot = dqh.astype(int).sum()
htot
= plt.rcParams['figure.figsize']
fsize = (2.5 * fsize[0], 0.8 * fsize[1])
figsize = plt.subplots(figsize=figsize, ncols=2)
fig, ax = dqe.astype(int).plot(ax=ax[0])
_ = dqh.astype(int).plot(ax=ax[1])
_ = ax[0].set_title(f'Eval, total: {etot.values}');
_ = ax[1].set_title(f'HMC, total: {htot.values}');
_ #_ = fig.suptitle(fr'PyTorch Improvement: {100*(etot / htot):3.0f}%')
= dqe.astype(int).sum().T.values.sum()
dqe_tot = dqh.astype(int).sum().T.values.sum()
dqh_tot = dqe_tot / dqh_tot
dqeh_ratio
f"PyTorch, EVAL\n {dqe.astype(int).sum('chain').T=}")
log.info(f"Eval: {dqe.astype(int).sum().T.values.sum()=}")
log.info(f"TensorFlow, HMC\n {dqh.astype(int).sum('chain').T=}")
log.info(f"HMC: {dqh.astype(int).sum().T.values.sum()=}")
log.info(f"dQ_eval / dQ_hmc: {dqeh_ratio:.4f}") log.critical(
Comparisons
import matplotlib.pyplot as plt
from l2hmc.utils.plot_helpers import set_plot_style, COLORS
import seaborn as sns
set_plot_style()'axes.linewidth'] = 2.0
plt.rcParams['notebook', font_scale=1.25)
sns.set_context(= plt.rcParamsDefault['figure.figsize']
figsize 'figure.dpi'] = plt.rcParamsDefault['figure.dpi']
plt.rcParams[
for idx in range(4):
= plt.subplots(
fig, (ax, ax1) =2,
ncols#nrows=4,
=(3. * figsize[0], figsize[1]),
figsize
)= ax.plot(
_ 'pt']['eval'].intQ[idx] + 5, # .dQint.mean('chain')[100:],
dsets[=COLORS['red'],
color=':',
ls='Trained',
label=1.5,
lw;
)
= ax.plot(
_ 'pt']['hmc'].intQ[idx] - 5, # .dQint.mean('chain')[100:],
dsets[='-',
ls='HMC',
label='#666666',
color=5,
zorder=2.0,
lw;
)
= ax1.plot(
_ 'tf']['eval'].intQ[idx] + 5, # .dQint.mean('chain')[-100:],
dsets[=COLORS['blue'],
color=':',
ls='Trained',
label=1.5,
lw
;
)= ax1.plot(
_ 'tf']['hmc'].intQ[idx] - 5, # .dQint.mean('chain')[-100:],
dsets[='#666666',
color='-',
ls='HMC',
label=5,
zorder=2.0,
lw;
)= ax.set_title('PyTorch')
_ = ax1.set_title('TensorFlow')
_ #_ = ax1.set_ylim(ax.get_ylim())
= ax.grid(True, alpha=0.2)
_ = ax1.grid(True, alpha=0.2)
_ = ax.set_xlabel('MD Step')
_ = ax1.set_xlabel('MD Step')
_ = ax.set_ylabel('dQint'
_
)= ax.legend(loc='best', ncol=2, labelcolor='#939393')
_ = ax1.legend(loc='best', ncol=2, labelcolor='#939393') _
Citation
@online{foreman2023,
author = {Foreman, Sam},
title = {L2hmc-Qcd},
date = {2023-12-14},
url = {https://saforem2.github.io/l2hmc-qcd},
langid = {en}
}