#!/usr/bin/perl -w
# Merge code and templates into one unified script.

use strict;


# {{{ encode.pl

# Helper for encode().
sub flush_space($)
{
   my ($count) = @_;

   my $output = "";
   if( $$count > 0 )
   {
      $output = chr(ord("A") + $$count - 1);
      $$count = 0;
   }
   return $output;
}

# Helper for encode().
sub flush_char($)
{
   my ($count) = @_;

   my $output = "";
   if( $$count > 0 )
   {
      $output = chr(ord("a") + $$count - 1);
      $$count = 0;
   }
   return $output;
}

# Encode text to template.
sub encode($)
{
   my ($input) = @_;

   my $output = "";
   my $space = 0;
   my $char = 0;
   foreach my $c (unpack "C*", $input)
   {
      if( $c == ord(" ") )
      {
         $output .= flush_char(\$char);
         $space++;
         if( $space >= 26 )
         {
            $output .= flush_space(\$space);
         }
      }
      elsif( $c == ord("\n") )
      {
         $output .= flush_space(\$space) . flush_char(\$char) . "!";
      }
      else
      {
         $output .= flush_space(\$space);
         $char++;
         if( $char >= 26 )
         {
            $output .= flush_char(\$char);
         }
      }
   }
   $output .= flush_space(\$space) . flush_char(\$char);
   return $output;
}

# }}}

# {{{ expand.pl

# Expand template to desired number of bytes.
sub expand($$)
{
   my ($text, $size) = @_;

   # Split bytes randomly until text is of desired length.
   while( length($text) < $size )
   {
      # Collect candidate indices for where to split.  Each index gets
      # a count proportional to the number of times it can be split,
      # such that we are weighted toward splitting higher value bytes.
      my @offsets = ();
      for(my $i = 0; $i < length($text); $i++)
      {
         my $c = substr($text, $i, 1);
         next if( index("!aA", $c) >= 0 );
         for(my $j = 0; $j < (ord($c) % 32) - 1; $j++)
         {
            push @offsets, $i;
         }
      }
      unless( scalar @offsets )
      {
         die "Desired size is $size bytes, but we ran out of places to split " .
             "after reaching " . length($text) . " bytes\n";
      }

      # Split at random weighted offset.
      my $i = $offsets[int(rand(scalar @offsets))];
      my $byte = ord(substr($text, $i, 1));
      my $base = $byte >= ord("a") ? ord("a") - 1 : ord("A") - 1;

      $byte %= 32;
      $byte > 1 or die;
      my $a = int(rand($byte - 1)) + 1;
      my $b = $byte - $a;
      $a > 0 or die;
      $b > 0 or die;
      $text = substr($text, 0, $i) .
              chr($base + $a) .
              chr($base + $b) .
              substr($text, $i + 1);
   }
   return $text;
}

# }}}

# Load file to string.
sub load_file($)
{
   my ($filename) = @_;

   open my $file, "<$filename" or die $!;
   my $text = join '', <$file>;
   close $file;
   return $text;
}

# Count number of non-whitespace characters.
sub char_count($)
{
   my ($text) = @_;

   $text =~ s/\s//gs;
   return length($text);
}


# Load inputs.
unless( $#ARGV == 3 )
{
   die "$0 <code.pl> <size_bias> <template1.txt> <template2.txt>\n";
}
my $code = load_file($ARGV[0]);
my $bias = $ARGV[1];
my $template1 = load_file($ARGV[2]);
my $template2 = load_file($ARGV[3]);
if( char_count($template1) != char_count($template2) )
{
   die "Number of non-whitespace bytes in templates do not match:\n" .
       "$ARGV[2] = " . char_count($template1) . "\n" .
       "$ARGV[3] = " . char_count($template2) . "\n";
}

$code =~ /^(\$p\s*=\s*")(";\s*\$q\s*=\s*")(";.*)$/s
   or die "Unexpected code format\n";
my ($c1, $c2, $c3) = ($1, $2, $3);

# Expand templates.
my $target_size = char_count($template1);
my $code_size = char_count($code);
my $template1_size = char_count(encode($template1));
my $template2_size = char_count(encode($template2));
my $expand_size = $target_size -
                  ($template1_size + $template2_size + $code_size);
my $pad1 = int($expand_size / 2) + $bias;
$pad1 >= 0 or die;
my $pad2 = $expand_size - $pad1;
$pad2 >= 0 or die;

print STDERR
      "Target = $target_size\n",
      "Template 1 = $template1_size + $pad1\n",
      "Template 2 = $template2_size + $pad2\n",
      "Code = $code_size\n";
print $c1,
      expand(encode($template1), $template1_size + $pad1),
      $c2,
      expand(encode($template2), $template2_size + $pad2),
      $c3;