#!/usr/bin/tclsh
# Author: Jeff Comer <jeffcomer at gmail>

if {[llength $argv] < 4} {
    puts "Usage: tclsh abfCheckRunsError.tcl anchor0 anchor1 colvarHistGrad0 \[colvarHistGrad1\] outName"
    exit
}

# Input:
set anchor0 [lindex $argv 0]
set anchor1 [lindex $argv 1]
set colvarHistGradList [lrange $argv 2 end-1]
set outName [lindex $argv end]

proc trimExtension {name} {
    set ind [string last "." $name]
    return [string range $name 0 [expr {$ind-1}]]
}

proc writeData {fileName data} {
    set out [open $fileName w]
    foreach d $data {
	puts $out $d
    }
    close $out
}

proc getNumFrames {colvarHistFileList} {
    # We assume that the frames have a header of two comment lines.
    set count 0

    foreach colvarHistFile $colvarHistFileList {
	set in [open $colvarHistFile r]
	while {[gets $in lin] >= 0} {
	    if {[string match "#*" $lin]} {
		# Keep track of how many headers we've seen.
		# Get the next comment line as well so it doesn't get counted.
		set lin1 [gets $in lin]
		# We are on the next frame.
		incr count
	    }
	}
	close $in
    }
    
    return $count
}

proc getFrame {colvarHistFileList frame} {
    set count 0
    set data {}
    set found 0
    
    puts ""
    puts "Frame $frame from $colvarHistFileList"
    
    foreach colvarHistFile $colvarHistFileList {
	set in [open $colvarHistFile r]
	set writing 0
	while {[gets $in lin] >= 0} {
	    if {$writing} {
		if {[string match "#*" $lin]} {
		    # We have reached the end of this frame.
		    set writing 0
		    set found 1
		    break
		} else {
		    # Add to the data.
		    if {[string length [string trim $lin]] > 0} {
			#puts $lin
			lappend data [concat $lin]
		    }
		}
	    } elseif {[string match "#*" $lin]} {
		# Keep track of how many headers we've seen.

		# Are we at the requested frame.
		if {$frame >= 0 && $frame == $count} {
		    puts "Found frame $frame." 
		    # This is the frame that we want.
		    set writing 1
		}
		# Get the next comment line as well so it doesn't get counted.
		set lin1 [gets $in lin]
		# We are on the next frame.
		incr count
	    }
	}
	close $in
	if { $found } { break }
    }

    return $data
}

proc meanData {data x0 x1} {
    set n 0
    set sum 0.0
    foreach d $data {
	foreach {x y} $d { break }
	if {$x < $x0 || $x >= $x1} { continue }

	set sum [expr {$sum + $y}]
	incr n
    }

    if {$n == 0} { return 0.0 }
    return [expr {$sum/$n}]
}

proc shiftData {data shift} {
    set ret {}
    foreach d $data {
	foreach {x y} $d { break }
	lappend ret [list $x [expr {$y+$shift}]]
    }
    return $ret
}

proc integrateGrad {gradList anchor0 anchor1} {
    set binDel [expr {[lindex $gradList 1 0]-[lindex $gradList 0 0]}]

    set pmfList {}
    set sum 0.0
    set z0 [expr {[lindex $gradList 0 0]-0.5*$binDel}] 
    lappend pmfList [list $z0 $sum]
    foreach item $gradList {
	foreach {z gradMean} $item { break }
	set sum [expr {$sum + $gradMean*$binDel}]
	set z0 [expr {$z + 0.5*$binDel}]

	lappend pmfList [list $z0 $sum]
    }

    set anchorLevel [meanData $pmfList $anchor0 $anchor1]
    return [shiftData $pmfList [expr {-$anchorLevel}]]
}

# Make a list of count files.
set colvarHistCountList {}
foreach colvarHistGrad $colvarHistGradList {
    lappend colvarHistCountList "[trimExtension $colvarHistGrad].count"
}

set frames [getNumFrames $colvarHistGradList]
set frames0 [getNumFrames $colvarHistCountList]
if {$frames0 < $frames} {
    # Choose the lowest number of frames if one of the files is incomplete.
    set frames $frames0
}

set endFrame [expr {$frames-1}]
puts "frames $frames"

# Get the gradient and count at the beginning, middle, and end of the simulation.
set midFrame [expr {$frames/2}]
set endGradList [getFrame $colvarHistGradList $endFrame]
set midGradList [getFrame $colvarHistGradList $midFrame]
set begGradList [getFrame $colvarHistGradList 0]
set endCountList [getFrame $colvarHistCountList $endFrame]
set midCountList [getFrame $colvarHistCountList $midFrame]
set begCountList [getFrame $colvarHistCountList 0]

# Calculate the gradient for the first half, by removing
# any contribution from the beginning.
# In most cases, the simulation should start from scratch
# so that the count at the beginning is zero and there is no
# contribution to subtract!
set firstCountList {}
set firstGradList {}
foreach midGrad $midGradList midCount $midCountList begGrad $begGradList begCount $begCountList {
    set x [lindex $midCount 0]
    set countF [lindex $midCount 1]
    set count0 [lindex $begCount 1]
    set gradF [lindex $midGrad 1]
    set grad0 [lindex $begGrad 1]

    set count1 [expr {$countF - $count0}]
    lappend firstCountList [list $x $count1]

    if {$count1 == 0} {
	set grad1 0.0
    } else {
	set grad1 [expr {($countF*$gradF - $count0*$grad0)/$count1}]
    }
    lappend firstGradList [list $x $grad1]
}

# Calculate the gradient for the second half, by removing
# any contribution from first half.
set secondCountList {}
set secondGradList {}
foreach endGrad $endGradList endCount $endCountList midGrad $midGradList midCount $midCountList  {
    set x [lindex $endCount 0]
    set countF [lindex $endCount 1]
    set count0 [lindex $midCount 1]
    set gradF [lindex $endGrad 1]
    set grad0 [lindex $midGrad 1]

    set count1 [expr {$countF - $count0}]
    lappend secondCountList [list $x $count1]

    if {$count1 == 0} {
	set grad1 0.0
    } else {
	set grad1 [expr {($countF*$gradF - $count0*$grad0)/$count1}]
    }
    lappend secondGradList [list $x $grad1]
}

# Write the gradients and counts for the first (.0) and second halves (.1).
writeData $outName.0.count $firstCountList
writeData $outName.0.grad $firstGradList
writeData $outName.1.count $secondCountList
writeData $outName.1.grad $secondGradList

# Integrate the gradients to obtain the PMFs.
set firstPmf [integrateGrad $firstGradList $anchor0 $anchor1]
writeData $outName.0.pmf $firstPmf
set secondPmf [integrateGrad $secondGradList $anchor0 $anchor1]
writeData $outName.1.pmf $secondPmf

# Calculate the difference between the first and second half gradients.
# This yields the uncertainty in the gradient.
set gradErr {}
foreach item0 $firstGradList item1 $secondGradList itemEnd $endGradList {
    set g0 [lindex $item0 1]
    set g1 [lindex $item1 1]
    set x [lindex $item0 0]
    set gEnd [lindex $itemEnd 1]
    
    lappend gradErr [list $x $gEnd [expr {abs($g1-$g0)}]]
}
writeData $outName.err.grad $gradErr

# Find the grid point nearest the anchor.
set anchorPos [expr {0.5*($anchor0+$anchor1)}]
set anchorDist [expr {abs([lindex $gradErr 0 0]-$anchorPos)}]
set anchorInd 0
for {set i 1} {$i < [llength $gradErr]} {incr i} {
    set dist [expr {abs([lindex $gradErr $i 0]-$anchorPos)}]
    if {$dist < $anchorDist} {
	set anchorInd $i
	set anchorDist $dist
    }
}
puts "Anchor: ideal $anchorPos actual [lindex $gradErr $anchorInd 0] index $anchorInd"

# Integrate over the gradient uncertainties to obtain the PMF uncertainties.
set errPmf {}
set dx [expr {[lindex $gradErr 1 0]-[lindex $gradErr 0 0]}]
for {set i 0} {$i < [llength $gradErr]} {incr i} {
    # For each node, sum from the anchor point.
    # Assume uncertainties for the gradient are independent
    # (square root of sum of squares)
    set dj [expr {($anchorInd<=$i)?1:-1}]
    set sumSq [expr {[lindex $gradErr $i 2]*[lindex $gradErr $i 2]}]
    for {set j $anchorInd} {$j != $i} {incr j $dj} {
	set sumSq [expr {$sumSq + [lindex $gradErr $j 2]*[lindex $gradErr $j 2]}]
    }
    lappend errPmf [expr {$dx*sqrt($sumSq)}]
    if {$i == $anchorInd} {
	lappend errPmf [expr {$dx*sqrt($sumSq)}]
    }
}

# Make a PMF containing the estimated uncertainty.
set pmfErr0 [integrateGrad $gradErr $anchor0 $anchor1]
set pmfErr {}
foreach item $pmfErr0 err $errPmf {
    lappend pmfErr [list [lindex $item 0] [lindex $item 1] $err]
}
# Write the final PMF.
writeData $outName.err.pmf $pmfErr

# For completeness, write the final counts.
writeData $outName.err.count $endCountList
