cd /home/harold/liquid/liquid_lfm ; /usr/bin/env /home/harold/miniconda3/envs/fp8/bin/python /home/harold/.cursor-server/extensions/ms-python.debugpy-2024.8.0-linux-x64/bundled/libs/debugpy/adapter/../../debugpy/launcher 40533 -- /home/harold/liquid/liquid_lfm/train.py \
model=hybrid/liquid_hybrid_125M_fp8 \
task=125M_themix_pretraining \
pipeline=fsdp \
fsdp.mixed_precision=bf16-mixed \
monitor.log_interval=200 \
train.valid_steps=500 \
train.steps_per_epoch=2000 \
train.num_epochs=3 \
train.total_epochs=3 \
distributed.model_parallel=node \
monitor.loss_threshold=20 \
train.run_speedtest=True \
distributed.model_parallel=1 \
quantization.enable_fp8_autocast=True