#!/usr/bin/perl -w use strict; # Check if code is a combining character. # # This list is not at all complete, but good enough for my purpose. sub IsCombiningCharacter($) { my ($code) = @_; return ($code >= 0x0300 && $code <= 0x036f) || ($code >= 0x1ab0 && $code <= 0x1aff) || ($code >= 0x1dc0 && $code <= 0x1dff) || ($code >= 0x20d0 && $code <= 0x20ff) || ($code >= 0x302a && $code <= 0x302f) || ($code >= 0x3099 && $code <= 0x309a) || ($code >= 0xfe20 && $code <= 0xfe2f); } # Check if code is a full width character. # # Character width determination came from here: # https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c # # Plus 0x1f000 .. 0x20000 to account for emojis sub IsFullWidth($) { my ($code) = @_; return ($code >= 0x1100 && $code <= 0x115f) || ($code >= 0x2329 && $code <= 0x232a) || ($code >= 0x2e80 && $code <= 0xa4cf && $code != 0x303f) || ($code >= 0xac00 && $code <= 0xd7a3) || ($code >= 0xf900 && $code <= 0xfaff) || ($code >= 0xfe10 && $code <= 0xfe19) || ($code >= 0xfe30 && $code <= 0xfe6f) || ($code >= 0xff00 && $code <= 0xff60) || ($code >= 0xffe0 && $code <= 0xffe6) || ($code >= 0x1f000 && $code <= 0x1fa95) || ($code >= 0x20000 && $code <= 0x2fffd) || ($code >= 0x30000 && $code <= 0x3fffd); } # Get next UTF-8 character from string sub ReadChar($$) { my ($text, $offset) = @_; my $c = ord(substr($text, $offset, 1)); if( ($c & 0xe0) == 0xc0 ) { return substr($text, $offset, 2); } if( ($c & 0xf0) == 0xe0 ) { return substr($text, $offset, 3); } if( ($c & 0xf8) == 0xf0 ) { return substr($text, $offset, 4); } return substr($text, $offset, 1); } # Decode UTF-8 bytes to code point sub DecodeChar($) { my ($bytes) = @_; my @c = unpack 'C*', $bytes; if( ($c[0] & 0xe0) == 0xc0 ) { return (($c[0] & 0x7f) << 6) | ($c[1] & 0x3f); } if( ($c[0] & 0xf0) == 0xe0 ) { return (($c[0] & 0x0f) << 12) | (($c[1] & 0x3f) << 6) | ($c[2] & 0x3f); } if( ($c[0] & 0xf8) == 0xf0 ) { return (($c[0] & 0x07) << 18) | (($c[1] & 0x3f) << 12) | (($c[2] & 0x3f) << 6) | ($c[3] & 0x3f); } return $c[0]; } # Load input into a list of (row, column, character) tuples sub LoadInput($) { my ($filename) = @_; # Load input as lines open my $infile, "<$filename" or die $!; my @lines = <$infile>; close $infile; chomp foreach @lines; # Convert input into (row, column, character) tuples my @chars = (); my $y = 0; foreach my $line (@lines) { my $x = 0; for(my $i = 0; $i < length($line);) { my $c = ReadChar($line, $i); $i += length($c); my $code = DecodeChar($c); if( IsCombiningCharacter($code) ) { # Found a combining character, append to previous character # instead of starting a new character. if( (scalar @chars) == 0 ) { die "Found orphaned combining character at line " . ($y + 1) . " offset $i\n"; } $chars[$#chars][2] .= $c; } else { # Ignore whitespaces if( $c eq " " ) { $x++; next; } if( $c eq "\t" ) { $x += 8; $x -= $x % 8; next; } next if ord($c) < 32; # Start a new character push @chars, [$y, $x, $c]; # Advance cursor position. $x += IsFullWidth($code) ? 2 : 1; } } $y++; } return @chars; } # Fisher-yates shuffle sub Shuffle($) { my ($array) = @_; if( (scalar @$array) > 0 ) { for(my $i = scalar @$array; --$i;) { my $j = int(rand($i + 1)); @$array[$i, $j] = @$array[$j, $i]; } } } # Load input if( $#ARGV < 1 ) { die "$0 [output...]\n"; } my @chars = LoadInput($ARGV[0]); my $height = 0; if( scalar @chars ) { $height = $chars[$#chars][0] + 1; } Shuffle(\@chars); # Output layers my $char_count = scalar @chars; my $layer_count = $#ARGV; my $layer_index = 1; my $row = 0; my $column = 0; my $outfile = undef; for(my $i = 0; $i < $char_count; $i++) { unless( defined $outfile ) { open $outfile, ">$ARGV[$layer_index]" or die $!; } if( $i == 0 ) { # Header for first layer: make sure there is enough vertical space. print $outfile ("\n" x $height); $row = $height; } elsif( $i >= $layer_index * $char_count / $layer_count ) { # Footer for previous layer: make sure cursor ends at bottom row. if( $row + 1 < $height ) { print $outfile "\e[", ($height - $row - 1), "B"; } print $outfile "\n"; $row = $height; $column = 0; close $outfile; $layer_index++; open $outfile, ">$ARGV[$layer_index]" or die $!; } my @c = @{$chars[$i]}; if( $row < $c[0] ) { print $outfile "\e[", $c[0] - $row, "B"; } elsif( $row > $c[0] ) { print $outfile "\e[", $row - $c[0], "A"; } if( $column < $c[1] ) { print $outfile "\e[", $c[1] - $column, "C"; } elsif( $column > $c[1] ) { print $outfile "\e[", $column - $c[1], "D"; } print $outfile $c[2]; $row = $c[0]; $column = $c[1] + 1; if( IsFullWidth(DecodeChar($c[2])) ) { $column++; } } # Footer for final layer. if( defined($outfile) ) { if( $row + 1 < $height ) { print $outfile "\e[", ($height - $row - 1), "B"; } print $outfile "\n"; close $outfile; }