cd /home/harold/liquid/liquid_lfm ; /usr/bin/env /home/harold/miniconda3/envs/fp8/bin/python /home/harold/.cursor-server/extensions/ms-python.debugpy-2024.8.0-linux-x64/bundled/libs/debugpy/adapter/../../debugpy/launcher 40533 -- /home/harold/liquid/liquid_lfm/train.py \
    model=hybrid/liquid_hybrid_125M_fp8 \
    task=125M_themix_pretraining \
    pipeline=fsdp \
    fsdp.mixed_precision=bf16-mixed \
    monitor.log_interval=200 \
    train.valid_steps=500 \
    train.steps_per_epoch=2000 \
    train.num_epochs=3 \
    train.total_epochs=3 \
    distributed.model_parallel=node \
    monitor.loss_threshold=20 \
    train.run_speedtest=True \
distributed.model_parallel=1 \
    quantization.enable_fp8_autocast=True