Submission script content:
#!/bin/bash
#SBATCH --account=suncat:normal
#SBATCH --partition=milano
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=12
#SBATCH --cpus-per-task=10
#SBATCH --mem=480G
#SBATCH --time=1-01:00:00
#SBATCH --job-name=mrcc_mpi_job
#SBATCH --output=slurm-%j.output
#SBATCH --error=slurm-%j.error
#SBATCH --export=NONE   # <-- BEST PRACTICE: Start with a clean environment

# set -eo pipefail # It's good practice, but can be removed if scripts still cause issues

echo "Submission script content:"
cat $0
echo "---------------------------"
echo "Job ID: $SLURM_JOB_ID"
echo "Running on node(s): $SLURM_JOB_NODELIST"

# --- 1. Set up local scratch directory ---
startfolder=$(pwd)
export LOCAL_SCRATCH=/lscratch/$USER/$SLURM_JOB_ID
mkdir -p $LOCAL_SCRATCH

print_usage() {
  echo "==== DISK USAGE ===="
  df -h $LOCAL_SCRATCH
  echo
  echo "==== RAM USAGE (Peak, All Users) ===="
  # This prints the top 10 memory-using processes, you can adjust as needed.
  # If you want a summary, uncomment the next line instead.
  free -h
  ps -eo pid,user,%mem,rss,vsize,cmd --sort=-%mem | head -n 20
  echo
  echo "==== SLURM Resource Usage (if available) ===="
  if [ -n "$SLURM_JOB_ID" ]; then
    sacct -j "$SLURM_JOB_ID" --format=JobID,Elapsed,MaxRSS,MaxVMSize,State
  fi
}

copy_results() {
  echo "Copying results back to submission directory..."
  # Create a unique results folder to avoid overwriting
  mkdir -p "$startfolder/results_$SLURM_JOB_ID"
  rsync -av --ignore-missing-args $LOCAL_SCRATCH/* "$startfolder/results_$SLURM_JOB_ID/" 2>/dev/null || true
  echo "Results copied."
  print_usage
}
trap copy_results EXIT


# --- 2. Set up the environment FROM SCRATCH ---

# Because of --export=NONE, we must redefine the PATH
export MRCC=/fs/ddn/sdf/group/suncat/rfanta/software/mrcc_25.1.1/install
export PATH=$MRCC:$PATH

# Now source the Intel environment (it will not have been run before)
source /sdf/group/suncat/sw/intel/env.bash

# Set MPI troubleshooting flags from the manual for cluster runs
export I_MPI_HYDRA_BOOTSTRAP=ssh
export I_MPI_OFI_PROVIDER=tcp # Use tcp for stability, can try 'verbs' for performance later
unset I_MPI_PMI_LIBRARY

# Set number of threads
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export MKL_NUM_THREADS=$SLURM_CPUS_PER_TASK
export OMP_PLACES=cores
export OMP_PROC_BIND=close # 'close' is often better for MPI+OpenMP


# --- 3. Run the job in the local scratch ---

# Copy the input file to the local scratch
cp $startfolder/MINP $LOCAL_SCRATCH/

cd $LOCAL_SCRATCH

echo "Start time: $(date)"

# Run the job
dmrcc > MINP.log 2>&1

echo "Finish time: $(date)"


---------------------------
Job ID: 8449818
Running on node(s): sdfmilan053
 
:: initializing oneAPI environment ...
   slurm_script: BASH_VERSION = 4.4.20(1)-release
   args: Using "$@" for setvars.sh arguments: 
:: advisor -- latest
:: ccl -- latest
:: compiler -- latest
:: dal -- latest
:: debugger -- latest
:: dev-utilities -- latest
:: dnnl -- latest
:: dpcpp-ct -- latest
:: dpl -- latest
:: inspector -- latest
:: ipp -- latest
:: ippcp -- latest
:: itac -- latest
:: mkl -- latest
:: mpi -- latest
:: tbb -- latest
:: vtune -- latest
:: oneAPI environment initialized ::
 
Start time: Thu Jul 31 08:40:16 PDT 2025
Finish time: Thu Jul 31 08:52:08 PDT 2025
Copying results back to submission directory...
sending incremental file list
COORD.xyz
DAO
DFINT
DFINV
DFTGRID
EXIT
FOCK
FOCK.OD
KEYWD
MINP
MINP.log
MOCOEF
MOCOEF.LOC
MOCOEF.QRO
MOLDEN
MOLDEN.QRO
MOLDEN.perm
OCCUP
OEINT
OSVFILE
PRINT
S12MATold
SCFDENSITIES
SCFDENSITIES.C.cc-pvtz-min.0
SCFDENSITIES.Fe.cc-pvtz-min.0
SCFDENSITIES.H.cc-pvtz-min.0
SCFDENSITIES.N.cc-pvtz-min.0
SCFDENSITIES.O.cc-pvtz-min.0
SCHOL
SROOT
SYMTRA
TEDAT
VARS
dmrcc.id
files
fort.101
fort.11
fort.55
idfile.    59
iface
localcc.restart
mrcc.out.1
mrcc.out.10
mrcc.out.11
mrcc.out.2
mrcc.out.3
mrcc.out.4
mrcc.out.5
mrcc.out.6
mrcc.out.7
mrcc.out.8
mrcc.out.9
mulliken-overlaps
pids
mpi_output/
mrccdir.0/
mrccdir.0/DFINVLOCb
mrccdir.0/MOCOEF.CAN
mrccdir.0/S_MO
mrccdir.0/rMO2cMO
mrccdir.1/
mrccdir.1/DFINVLOCb
mrccdir.1/FOCK.OD
mrccdir.1/MOCOEF.CAN
mrccdir.1/S_MO
mrccdir.1/rMO2cMO
mrccdir.10/
mrccdir.10/DFINVLOCb
mrccdir.10/FOCK.OD
mrccdir.10/MOCOEF.CAN
mrccdir.10/S_MO
mrccdir.10/rMO2cMO
mrccdir.11/
mrccdir.11/DFINVLOCb
mrccdir.11/FOCK.OD
mrccdir.11/MOCOEF.CAN
mrccdir.11/S_MO
mrccdir.11/rMO2cMO
mrccdir.2/
mrccdir.2/DFINVLOCb
mrccdir.2/FOCK.OD
mrccdir.2/MOCOEF.CAN
mrccdir.2/S_MO
mrccdir.2/rMO2cMO
mrccdir.3/
mrccdir.3/DFINVLOCb
mrccdir.3/FOCK.OD
mrccdir.3/MOCOEF.CAN
mrccdir.3/S_MO
mrccdir.3/rMO2cMO
mrccdir.4/
mrccdir.4/DFINVLOCb
mrccdir.4/FOCK.OD
mrccdir.4/MOCOEF.CAN
mrccdir.4/S_MO
mrccdir.4/rMO2cMO
mrccdir.5/
mrccdir.5/DFINVLOCb
mrccdir.5/FOCK.OD
mrccdir.5/MOCOEF.CAN
mrccdir.5/S_MO
mrccdir.5/rMO2cMO
mrccdir.6/
mrccdir.6/DFINVLOCb
mrccdir.6/FOCK.OD
mrccdir.6/MOCOEF.CAN
mrccdir.6/S_MO
mrccdir.6/rMO2cMO
mrccdir.7/
mrccdir.7/DFINVLOCb
mrccdir.7/FOCK.OD
mrccdir.7/MOCOEF.CAN
mrccdir.7/S_MO
mrccdir.7/rMO2cMO
mrccdir.8/
mrccdir.8/DFINVLOCb
mrccdir.8/FOCK.OD
mrccdir.8/MOCOEF.CAN
mrccdir.8/S_MO
mrccdir.8/rMO2cMO
mrccdir.9/
mrccdir.9/DFINVLOCb
mrccdir.9/FOCK.OD
mrccdir.9/MOCOEF.CAN
mrccdir.9/S_MO
mrccdir.9/rMO2cMO

sent 1,238,308,334 bytes  received 2,267 bytes  275,180,133.56 bytes/sec
total size is 1,237,998,841  speedup is 1.00
Results copied.
==== DISK USAGE ====
Filesystem      Size  Used Avail Use% Mounted on
/dev/nvme2n1p1  6.0T   45G  6.0T   1% /lscratch

==== RAM USAGE (Peak, All Users) ====
              total        used        free      shared  buff/cache   available
Mem:          503Gi        66Gi       101Gi       7.9Gi       335Gi       424Gi
Swap:         8.0Gi       4.3Gi       3.7Gi
    PID USER     %MEM   RSS    VSZ CMD
  14204 root      0.3 2048352 9775468 /weka/wekanode --slot 0 --container-name sdfdata
  10088 root      0.3 2021132 9773828 /weka/wekanode --slot 0 --container-name sdfscratch
   7810 root      0.3 2019080 9773896 /weka/wekanode --slot 0 --container-name drpsrcf
   9252 root      0.3 1989812 9743556 /weka/wekanode --slot 0 --container-name sdfhome
  14341 root      0.2 1583164 8469444 /weka/wekanode --slot 2 --container-name sdfdata
  14339 root      0.2 1582292 8469444 /weka/wekanode --slot 1 --container-name sdfdata
 802554 root      0.2 1582112 8469448 /weka/wekanode --slot 1 --container-name sdfhome
  10270 root      0.2 1580312 8469444 /weka/wekanode --slot 2 --container-name sdfscratch
   8095 root      0.2 1580176 8469444 /weka/wekanode --slot 2 --container-name drpsrcf
  10271 root      0.2 1579860 8469444 /weka/wekanode --slot 1 --container-name sdfscratch
   8096 root      0.2 1579688 8469444 /weka/wekanode --slot 1 --container-name drpsrcf
2096674 root      0.1 582872 888208 /weka/dumper/trace-dumper --histogram-interval 60
 371650 root      0.1 571656 887876 /weka/dumper/trace-dumper --histogram-interval 60
2248467 root      0.1 547924 859404 /weka/dumper/trace-dumper --histogram-interval 60
2261432 root      0.0 461848 766452 /weka/dumper/trace-dumper --histogram-interval 60
   2365 root      0.0 204244 360772 /usr/lib/systemd/systemd-journald
   3294 root      0.0 177116 765336 /usr/libexec/platform-python -Es /usr/sbin/tuned -l -P
 963604 cvmfs     0.0 116076 26106940 /usr/bin/cvmfs2 -o rw,system_mount,fsname=cvmfs2,allow_other,grab_mountpoint,uid=11855,gid=2505 sw.lsst.eu /cvmfs/sw.lsst.eu
   3746 root      0.0 93108 5417216 /usr/bin/weka --agent

==== SLURM Resource Usage (if available) ====
JobID           Elapsed     MaxRSS  MaxVMSize      State 
------------ ---------- ---------- ---------- ---------- 
8449818        00:12:23                          RUNNING 
8449818.bat+   00:12:23                          RUNNING 
8449818.ext+   00:12:23                          RUNNING 
