add adaptation ablation

1e7fc585 · Piyush Raikwar · a0e7a9d5 · 1e7fc585 · 1e7fc585 · 1e7fc585
Commit 1e7fc585 authored 2 weeks ago by Piyush Raikwar
--- a/configs/distill/cd_allegro_adapt.yaml
+++ b/configs/distill/cd_allegro_adapt.yaml
 experiment:
-    project_name: calodit_consistency_distillation
-    run_name: cd_1_allegro_adapt
-    output_dir: ${oc.env:RESULTS_DIR}/experiments
+    project_name: cd_allegro_adapt
+    run_name: steps_10K_data_vary
+    output_dir: ./experiments
    seed: 42
    use_wandb: true
 accelerator:
@@ -11,12 +11,13 @@ data:
    train:
        files:
            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part1.h5']
-            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5']
-            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5']
-            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5']
+            # - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5']
+            # - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5']
+            # - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5']
        use_cond_info: true
        need_geo_condn: true
        train_on: ["Par04", "SciPb", "ODD", "CLD"]
+        max_num_showers: 1000
    valid:
        files:
            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part10.h5']
@@ -50,7 +51,7 @@ model:
            num_heads: 6
            mlp_ratio: 4
        diffusion_class: src.diffusion.EDM
-        model_path: /afs/cern.ch/user/p/praikwar/public/Par04Models/edm_multi_final_model.pt
+        model_path: ./experiments/edm_allegro_scratch/steps_10K_data_1K/final_model.pt
    student:
        architecture_class: src.models.CaloDiT
        architecture_args:
@@ -64,7 +65,7 @@ model:
            num_heads: 6
            mlp_ratio: 4
        diffusion_class: src.diffusion.ConsistencyModel
-        model_path: /afs/cern.ch/user/p/praikwar/public/Par04Models/cd_multi_final_model.pt
+        model_path: ./pretrained/cd_multi_final_model.pt
 distill:
    target_ema_decay: 0.95
    discretization_steps: 32
@@ -72,9 +73,9 @@ distill:
    solver: heun
    init_student_from_teacher: false
 train:
-    learning_rate: 0.0005
-    per_device_batch_size: 256
-    max_steps: 100000
+    learning_rate: 0.0001
+    per_device_batch_size: 128
+    max_steps: 10000
    gradient_accumulation_steps: 1
    max_grad_norm: 1.0
    optimizer_class: torch.optim.AdamW
@@ -85,9 +86,7 @@ train:
    lr_scheduler_class: src.lr_schedulers.get_lr_scheduler
    lr_scheduler_args:
        lr_scheduler_type: wsd
-        num_warmup_steps: 2000
-        num_stable_steps: 80000
-        num_decay_steps: 18000
+        num_max_steps: 10000
        decay_type: sqrt
    ema_scheduler_class: src.models.ema.InverseDecayEMA
    ema_scheduler_args:
@@ -97,18 +96,19 @@ train:
        steps: 1
        discretization_steps: 32
    valid_strategy: steps
-    valid_steps: 10000
+    valid_steps: 9999
    logging_strategy: steps
    logging_steps: 100
    save_strategy: steps
-    save_steps: 10000
+    save_steps: 9999
    save_best_and_last_only: false
    load_best_model_at_end: false
    test_strategy: steps
-    test_steps: 10000
+    test_steps: 9999
    test_conditions:
        # - [geometry, energy, phi, theta, file]
        - ['ALLEGRO', 5, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_5GeV_phi0.0_theta1.57_edm4hep_13245646.0.h5']
        - ['ALLEGRO', 50, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_50GeV_phi0.0_theta1.57_edm4hep_9261224.0.h5']
+        # - ['ALLEGRO', 500, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_500GeV_phi0.0_theta1.57_edm4hep_9261225.0.h5']
    need_geo_condn: true
    train_on: ["Par04", "SciPb", "ODD", "CLD"]
\ No newline at end of file
--- a/configs/distill/cd_allegro_scratch.yaml
+++ b/configs/distill/cd_allegro_scratch.yaml
 experiment:
-    project_name: calodit_consistency_distillation
-    run_name: cd_1_allegro_scratch
-    output_dir: ${oc.env:RESULTS_DIR}/experiments
+    project_name: cd_allegro_scratch
+    run_name: steps_10K_data_vary
+    output_dir: ./experiments
    seed: 42
    use_wandb: true
 accelerator:
@@ -11,10 +11,11 @@ data:
    train:
        files:
            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part1.h5'
-            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5'
-            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5'
-            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5'
+            # - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5'
+            # - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5'
+            # - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5'
        use_cond_info: true
+        max_num_showers: 1000
    valid:
        files:
            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part10.h5'
@@ -46,7 +47,7 @@ model:
            num_heads: 6
            mlp_ratio: 4
        diffusion_class: src.diffusion.EDM
-        model_path: chkpt/experiments/calodit_edm/par04_test_edm_allegro_scratch/2024-12-09_17-28-59/final_model.pt
+        model_path: ./experiments/edm_allegro_scratch/steps_10K_data_1K/final_model.pt
    student:
        architecture_class: src.models.CaloDiT
        architecture_args:
@@ -60,16 +61,16 @@ model:
            num_heads: 6
            mlp_ratio: 4
        diffusion_class: src.diffusion.ConsistencyModel
-        model_path: chkpt/experiments/calodit_edm/par04_test_edm_allegro_scratch/2024-12-09_17-28-59/final_model.pt
 distill:
    target_ema_decay: 0.95
    discretization_steps: 32
    metric_function: l2
    solver: heun
+    init_student_from_teacher: true
 train:
-    learning_rate: 0.001
-    per_device_batch_size: 256
-    max_steps: 200000
+    learning_rate: 0.0005
+    per_device_batch_size: 128
+    max_steps: 10000
    gradient_accumulation_steps: 1
    max_grad_norm: 1.0
    optimizer_class: torch.optim.AdamW
@@ -80,9 +81,7 @@ train:
    lr_scheduler_class: src.lr_schedulers.get_lr_scheduler
    lr_scheduler_args:
        lr_scheduler_type: wsd
-        num_warmup_steps: 5000
-        num_stable_steps: 170000
-        num_decay_steps: 25000
+        num_max_steps: 10000
        decay_type: sqrt
    ema_scheduler_class: src.models.ema.InverseDecayEMA
    ema_scheduler_args:
@@ -92,16 +91,17 @@ train:
        steps: 1
        discretization_steps: 32
    valid_strategy: steps
-    valid_steps: 10000
+    valid_steps: 9999
    logging_strategy: steps
    logging_steps: 100
    save_strategy: steps
-    save_steps: 25000
+    save_steps: 9999
    save_best_and_last_only: false
    load_best_model_at_end: false
    test_strategy: steps
-    test_steps: 10000
+    test_steps: 9999
    test_conditions:
        # - [geometry, energy, phi, theta, file]
        - ['ALLEGRO', 5, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_5GeV_phi0.0_theta1.57_edm4hep_13245646.0.h5']
-        - ['ALLEGRO', 50, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_50GeV_phi0.0_theta1.57_edm4hep_9261224.0.h5']
\ No newline at end of file
+        - ['ALLEGRO', 50, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_50GeV_phi0.0_theta1.57_edm4hep_9261224.0.h5']
+        # - ['ALLEGRO', 500, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_500GeV_phi0.0_theta1.57_edm4hep_9261225.0.h5']
--- a/configs/train/edm_allegro_adapt.yaml
+++ b/configs/train/edm_allegro_adapt.yaml
 experiment:
-    project_name: calodit_edm
-    run_name: par04_test_edm_allegro_adapt
-    output_dir: ${oc.env:RESULTS_DIR}/experiments
+    project_name: edm_allegro_adapt
+    run_name: steps_10K_data_vary
+    output_dir: ./experiments
    seed: 42
    use_wandb: true
 accelerator:
@@ -11,12 +11,13 @@ data:
    train:
        files:
            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part1.h5']
-            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5']
-            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5']
-            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5']
+            # - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5']
+            # - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5']
+            # - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5']
        use_cond_info: true
        need_geo_condn: true
        train_on: ["Par04", "SciPb", "ODD", "CLD"]
+        max_num_showers: 1000
    valid:
        files:
            - ['ALLEGRO', '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part10.h5']
@@ -56,9 +57,9 @@ model:
        rho: 7
        P_mean: -1.2
        P_std: 1.2
-    model_path: /afs/cern.ch/user/p/praikwar/public/Par04Models/edm_multi_final_model.pt
+    model_path: ./pretrained/edm_multi_final_model.pt
 train:
-    learning_rate: 0.001
+    learning_rate: 0.0001
    per_device_batch_size: 128
    max_steps: 10000
    gradient_accumulation_steps: 1
@@ -71,9 +72,7 @@ train:
    lr_scheduler_class: src.lr_schedulers.get_lr_scheduler
    lr_scheduler_args:
        lr_scheduler_type: wsd
-        num_warmup_steps: 200
-        num_stable_steps: 8000
-        num_decay_steps: 1800
+        num_max_steps: 10000
        decay_type: sqrt
    ema_scheduler_class: src.models.ema.InverseDecayEMA
    ema_scheduler_args:
@@ -88,15 +87,15 @@ train:
            s_tmax: .inf
            s_noise: 1.0
    valid_strategy: steps
-    valid_steps: 1000
+    valid_steps: 9999
    logging_strategy: steps
    logging_steps: 100
    save_strategy: steps
-    save_steps: 1000
+    save_steps: 9999
    save_best_and_last_only: false
    load_best_model_at_end: false
    test_strategy: steps
-    test_steps: 1000
+    test_steps: 9999
    test_conditions:
        # - [geometry, energy, phi, theta, file]
        - ['ALLEGRO', 5, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_5GeV_phi0.0_theta1.57_edm4hep_13245646.0.h5']

--- a/configs/train/edm_allegro_scratch.yaml
+++ b/configs/train/edm_allegro_scratch.yaml
 experiment:
-    project_name: calodit_edm
-    run_name: par04_test_edm_allegro_scratch
-    output_dir: ${oc.env:RESULTS_DIR}/experiments
+    project_name: edm_allegro_scratch
+    run_name: steps_10K_data_vary
+    output_dir: ./experiments
    seed: 42
    use_wandb: true
 accelerator:
@@ -11,10 +11,11 @@ data:
    train:
        files:
            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part1.h5'
-            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5'
-            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5'
-            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5'
+            # - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part2.h5'
+            # - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part3.h5'
+            # - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part4.h5'
        use_cond_info: true
+        max_num_showers: 1000
    valid:
        files:
            - '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part10.h5'
@@ -55,7 +56,7 @@ model:
 train:
    learning_rate: 0.001
    per_device_batch_size: 128
-    max_steps: 100000
+    max_steps: 10000
    gradient_accumulation_steps: 1
    max_grad_norm: 1.0
    optimizer_class: torch.optim.AdamW
@@ -66,9 +67,7 @@ train:
    lr_scheduler_class: src.lr_schedulers.get_lr_scheduler
    lr_scheduler_args:
        lr_scheduler_type: wsd
-        num_warmup_steps: 5000
-        num_stable_steps: 80000
-        num_decay_steps: 15000
+        num_max_steps: 10000
        decay_type: sqrt
    ema_scheduler_class: src.models.ema.InverseDecayEMA
    ema_scheduler_args:
@@ -83,15 +82,15 @@ train:
            s_tmax: .inf
            s_noise: 1.0
    valid_strategy: steps
-    valid_steps: 10000
+    valid_steps: 9999
    logging_strategy: steps
    logging_steps: 100
    save_strategy: steps
-    save_steps: 10000
+    save_steps: 9999
    save_best_and_last_only: false
    load_best_model_at_end: false
    test_strategy: steps
-    test_steps: 10000
+    test_steps: 9999
    test_conditions:
        # - [geometry, energy, phi, theta, file]
        - ['ALLEGRO', 5, 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_discrete_noTracker/ddsim_mesh_FCCeeALLEGRO_gamma_1000events_5GeV_phi0.0_theta1.57_edm4hep_13245646.0.h5']

--- a/configs/validate/cd_adapt.yaml
+++ b/configs/validate/cd_adapt.yaml
+accelerator:
+    cpu: False # whether or not to force the script to execute on CPU
+    mixed_precision: "no" # choose from 'no','fp16', 'bf16 or 'fp8'
+model:
+    merge_config: false
+    model_path: null
+    architecture_class: src.models.CaloDiT
+    diffusion_class: src.diffusion.ConsistencyModel
+    architecture_args:
+        input_size: [9, 16, 45]
+        patch_size: [3, 2, 3]
+        conditions_size: [1, 2, 1, 5]
+        in_channels: 1
+        out_channels: 1
+        emb_dim: 384
+        num_layers: 6
+        num_heads: 6
+        mlp_ratio: 4
+    diffusion_args:
+        sigma_min: 0.002
+        sigma_max: 80
+        sigma_data: 0.5
+        rho: 7
+        P_mean: -1.2
+        P_std: 1.2
+validate:
+    need_geo_condn: true
+    train_on: ["Par04", "SciPb", "ODD", "CLD"]
+    output_dir: ${oc.env:RESULTS_DIR}/simulations
+    batch_size: 128
+    simulation_conditions:
+        # - [geometry, energy, phi, theta, file]
+        - ['ALLEGRO', 'all', 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part10.h5']
+sampling:
+    steps: 1
+    discretization_steps: 32
+preprocessing:
+    steps:
+        - class_name: src.data.preprocessing.CutNoise
+          init_args:
+              noise_level: 1.515e-05
+              both_directions: false
+        - class_name: src.data.preprocessing.LogTransform
+          init_args:
+              eps: 1.0e-06
+        - class_name: src.data.preprocessing.Standarize
+          init_args:
+              mean: -10.766
+              std: 3.5773
\ No newline at end of file
--- a/configs/validate/cd_scratch.yaml
+++ b/configs/validate/cd_scratch.yaml
+accelerator:
+    cpu: False # whether or not to force the script to execute on CPU
+    mixed_precision: "no" # choose from 'no','fp16', 'bf16 or 'fp8'
+model:
+    merge_config: false
+    model_path: null
+    architecture_class: src.models.CaloDiT
+    diffusion_class: src.diffusion.ConsistencyModel
+    architecture_args:
+        input_size: [9, 16, 45]
+        patch_size: [3, 2, 3]
+        conditions_size: [1, 2, 1]
+        in_channels: 1
+        out_channels: 1
+        emb_dim: 384
+        num_layers: 6
+        num_heads: 6
+        mlp_ratio: 4
+    diffusion_args:
+        sigma_min: 0.002
+        sigma_max: 80
+        sigma_data: 0.5
+        rho: 7
+        P_mean: -1.2
+        P_std: 1.2
+validate:
+    need_geo_condn: false
+    train_on: null
+    output_dir: ${oc.env:RESULTS_DIR}/simulations
+    batch_size: 128
+    simulation_conditions:
+        # - [geometry, energy, phi, theta, file]
+        - ['ALLEGRO', 'all', 0.0, 1.57, '/eos/geant4/fastSim/ddfastsim/FCCeeALLEGRO/dataset2_1GeV100GeVFlat_theta0p87to2p27_phiFull/ddsim_mesh_FCCeeALLEGRO_gamma_100kevents_1GeV100GeV_GPSFlat_edm4hep_13245648_part10.h5']
+sampling:
+    steps: 1
+    discretization_steps: 32
+preprocessing:
+    steps:
+        - class_name: src.data.preprocessing.CutNoise
+          init_args:
+              noise_level: 1.515e-05
+              both_directions: false
+        - class_name: src.data.preprocessing.LogTransform
+          init_args:
+              eps: 1.0e-06
+        - class_name: src.data.preprocessing.Standarize
+          init_args:
+              mean: -10.766
+              std: 3.5773
\ No newline at end of file
--- a/q_vary.sh
+++ b/q_vary.sh
+#!/bin/bash
+
+declare -a steps=("1K" "2K" "5K" "10K" "20K")
+declare -a data=("1K" "5K" "25K" "100K")
+
+for step in "${steps[@]}"; do
+  # Convert step string like "1K" to number like 1000
+  if [[ $step == *K ]]; then
+    maxsteps=$(( ${step%K} * 1000 ))
+  else
+    maxsteps=$step
+  fi
+
+  for d in "${data[@]}"; do
+    # Convert data string like "1K" to number like 1000
+    if [[ $d == *K ]]; then
+      maxdata=$(( ${d%K} * 1000 ))
+    else
+      maxdata=$d
+    fi
+    echo "Running with step: $step, data: $d, maxsteps: $maxsteps, maxdata: $maxdata"
+
+    # Run scratch
+    CUDA_VISIBLE_DEVICES=0 python scripts/train.py configs/train/edm_allegro_scratch.yaml \
+      experiment.run_name="steps_${step}_data_${d}" \
+      data.train.max_num_showers=$maxdata \
+      train.max_steps=$maxsteps \
+      train.lr_scheduler_args.num_max_steps=$maxsteps \
+      train.test_steps=$((maxsteps - 1)) &
+
+    # Run adapt
+    CUDA_VISIBLE_DEVICES=1 python scripts/train.py configs/train/edm_allegro_adapt.yaml \
+      experiment.run_name="steps_${step}_data_${d}" \
+      data.train.max_num_showers=$maxdata \
+      train.max_steps=$maxsteps \
+      train.lr_scheduler_args.num_max_steps=$maxsteps \
+      train.test_steps=$((maxsteps - 1))
+
+    # Wait for both processes to finish
+    wait
+
+    # ----- Scratch Distillation -----
+    CUDA_VISIBLE_DEVICES=0 python scripts/distill.py configs/distill/cd_allegro_scratch.yaml \
+      experiment.run_name="steps_${step}_data_${d}" \
+      data.train.max_num_showers=$maxdata \
+      train.max_steps=$maxsteps \
+      train.lr_scheduler_args.num_max_steps=$maxsteps \
+      train.test_steps=$((maxsteps - 1)) \
+      model.teacher.model_path="./experiments/edm_allegro_scratch/steps_${step}_data_${d}/final_model.pt" &
+
+    # ----- Adapt Distillation -----
+    CUDA_VISIBLE_DEVICES=1 python scripts/distill.py configs/distill/cd_allegro_adapt.yaml \
+      experiment.run_name="steps_${step}_data_${d}" \
+      data.train.max_num_showers=$maxdata \
+      train.max_steps=$maxsteps \
+      train.lr_scheduler_args.num_max_steps=$maxsteps \
+      train.test_steps=$((maxsteps - 1)) \
+      model.teacher.model_path="./experiments/edm_allegro_adapt/steps_${step}_data_${d}/final_model.pt"
+
+  done
+done
--- a/scripts/validate.py
+++ b/scripts/validate.py
@@ -33,7 +33,7 @@ def main(cfg: DictConfig):
    model = accelerator.prepare(model)
    # model.summarize()
  
-    preprocessor = CaloShowerPreprocessor(**model.config.preprocessing)
+    preprocessor = CaloShowerPreprocessor(**cfg.preprocessing)

    batch_size = int(cfg.validate.batch_size)
    for geometry, energy, phi, theta, fullsim_path in cfg.validate.simulation_conditions:
@@ -45,7 +45,7 @@ def main(cfg: DictConfig):
            file_struc = [[geometry, fullsim_path],]
        else:
            file_struc = [fullsim_path,]
-        dataset = CaloShowerDataset(files=file_struc, need_geo_condn=cfg.validate.need_geo_condn, train_on=cfg.validate.train_on)
+        dataset = CaloShowerDataset(files=file_struc, need_geo_condn=cfg.validate.need_geo_condn, train_on=cfg.validate.train_on, max_num_showers=50000)
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
        dataloader = accelerator.prepare(dataloader)

@@ -57,8 +57,12 @@ def main(cfg: DictConfig):

        generated_events_list = []
        orginal_events_list = []
+        orginal_energy_list = []
        for sample in tqdm(dataloader):
            showers, conditions = sample
+            org_energy = conditions[0]
+            orginal_energy_list.append(org_energy.cpu().numpy())
+
            _, conditions = preprocessor.transform(conditions=conditions)
            
            showers = cut_below_noise_level(showers, noise_level=preprocessor.shower_preprocessor.noise_level)
@@ -70,6 +74,7 @@ def main(cfg: DictConfig):

        original_events = np.concatenate(orginal_events_list)
        generated_events = np.concatenate(generated_events_list)
+        original_energy = np.concatenate(orginal_energy_list)

        output_path = (
            output_dir / f"generated_{num_samples}events_Geo_{geometry}_E_{energy}GeV_Phi_{phi}_Theta_{theta}.h5"
@@ -79,10 +84,10 @@ def main(cfg: DictConfig):
            logger.warning(f"File {output_path} already exists, overwriting")
            output_path.unlink()

-        save_showers(generated_events, energy, phi, theta, output_path)
+        save_showers(generated_events, original_energy, phi, theta, output_path)
        logger.info(f"Saved generated events to {output_path}")

-        compare_observables(original_events, generated_events, output_dir, geometry, energy, phi, theta)
+        # compare_observables(original_events, generated_events, output_dir, geometry, energy, phi, theta)

    logger.info("Validation completed.")


--- a/src/diffusion/base.py
+++ b/src/diffusion/base.py
@@ -36,7 +36,7 @@ class DiffusionModelBase(nn.Module):
        logger.info(f"Saved model to {save_path}")

    def load_state(self, load_path: Union[str, Path]) -> None:
-        state = torch.load(load_path, map_location="cpu")
+        state = torch.load(load_path, map_location="cpu", weights_only=False)
        self.model.load_state_dict(state["model"])
        self.config = state["config"]
        logger.info(f"Loaded model from {load_path}")
@@ -57,10 +57,11 @@ def create_model_from_config(config) -> DiffusionModelBase:
    diffusion_args = config.get("diffusion_args", {})

    model_path = config.get("model_path")
+    merge_config = config.get("merge_config", True)

-    if model_path is not None:
+    if merge_config and model_path is not None:
        logger.info(f"Loading model from {model_path}")
-        state = torch.load(model_path, map_location="cpu")
+        state = torch.load(model_path, map_location="cpu", weights_only=False)

        saved_model_config = state["config"]["model"]
        saved_architecture_args = saved_model_config["architecture_args"]

--- a/src/lr_schedulers.py
+++ b/src/lr_schedulers.py
@@ -13,10 +13,11 @@ from transformers.optimization import (

 def get_wsd_schedule(
    optimizer: Optimizer,
-    num_warmup_steps: int,
-    num_stable_steps: int,
-    num_decay_steps: int,
-    min_lr_ratio: float = 0,
+    num_warmup_steps: int = None,
+    num_stable_steps: int = None,
+    num_decay_steps: int = None,
+    num_max_steps: int = None,
+    min_lr_ratio: float = 1e-6,
    last_epoch: int = -1,
    decay_type: str = "cosine",
 ):
@@ -41,6 +42,10 @@ def get_wsd_schedule(
    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    """
+    if num_max_steps:
+        num_warmup_steps = 0.1 * num_max_steps
+        num_stable_steps = 0.5 * num_max_steps
+        num_decay_steps = num_max_steps - num_warmup_steps - num_stable_steps

    def _get_wsd_scheduler_lambda(
        current_step: int,

--- a/src/trainer.py
+++ b/src/trainer.py
@@ -211,7 +211,7 @@ class DiffusionTrainer(object):
        if exists(run_name):
            self.output_dir = self.output_dir / run_name

-        self.output_dir = self.output_dir / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        # self.output_dir = self.output_dir / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        self.output_dir.mkdir(exist_ok=True, parents=True)

        # logging
@@ -364,10 +364,10 @@ class DiffusionTrainer(object):
                        self.writer.add_scalar("Train/Epoch Loss", train_epoch_loss, global_step=self.state.step)
                        self.writer.add_scalar("Epoch", self.state.epoch, global_step=self.state.step)

-                if self.state.step % self.valid_steps == 0:
-                    valid_loss = self.validate()
+                # if self.state.step % self.valid_steps == 0:
+                #     valid_loss = self.validate()

-                if self.state.step % self.test_steps == 0:
+                if self.state.step!=0 and self.state.step % self.test_steps == 0:
                    self.test()

                self._anneal_learning_rate(valid_loss)