#!/usr/bin/perl -w # Pad run-length encoded data to desired length. # # ./expand.pl [size] < input.txt > output.txt use strict; unless( $#ARGV >= 0 && $ARGV[0] =~ /^\d+$/ ) { die "$0 \n"; } my $size = shift @ARGV; # Load input. my $input = join '', <>; $input =~ s/[^!a-zA-Z]//gs; if( length($input) > $size ) { die "Desired output size is $size bytes, but input size is already " . length($input) . " bytes.\n"; } # Split bytes randomly until input is of desired length. while( length($input) < $size ) { # Collect candidate indices for where to split. Each index gets a # count proportional to the number of times it can be split, such # that we are weighted toward splitting higher value bytes. my @offsets = (); for(my $i = 0; $i < length($input); $i++) { my $c = substr($input, $i, 1); next if( index("!aA", $c) >= 0 ); for(my $j = 0; $j < (ord($c) % 32) - 1; $j++) { push @offsets, $i; } } unless( scalar @offsets ) { die "Desired size is $size bytes, but we ran out of places to split " . "after reaching " . length($input) . " bytes\n"; } # Split at random weighted offset. my $i = $offsets[int(rand(scalar @offsets))]; my $byte = ord(substr($input, $i, 1)); my $base = $byte >= ord("a") ? ord("a") - 1 : ord("A") - 1; $byte %= 32; $byte > 1 or die; my $a = int(rand($byte - 1)) + 1; my $b = $byte - $a; $a > 0 or die; $b > 0 or die; $input = substr($input, 0, $i) . chr($base + $a) . chr($base + $b) . substr($input, $i + 1); } print $input, "\n";