#!/usr/bin/perl -w use strict; use constant DELAY_MICROSECONDS => 100 * 1000; use Time::HiRes 'usleep'; # Check if code is a combining character. # # This list is not at all complete, but good enough for my purpose. sub IsCombiningCharacter($) { my ($code) = @_; return ($code >= 0x0300 && $code <= 0x036f) || ($code >= 0x1ab0 && $code <= 0x1aff) || ($code >= 0x1dc0 && $code <= 0x1dff) || ($code >= 0x20d0 && $code <= 0x20ff) || ($code >= 0x302a && $code <= 0x302f) || ($code >= 0x3099 && $code <= 0x309a) || ($code >= 0xfe20 && $code <= 0xfe2f); } # Get next UTF-8 character from string sub ReadChar($) { my ($text) = @_; my $c = ord(substr($text, 0, 1)); if( ($c & 0xe0) == 0xc0 ) { return substr($text, 0, 2); } if( ($c & 0xf0) == 0xe0 ) { return substr($text, 0, 3); } if( ($c & 0xf8) == 0xf0 ) { return substr($text, 0, 4); } return substr($text, 0, 1); } # Decode UTF-8 bytes to code point sub DecodeChar($) { my ($bytes) = @_; my @c = unpack 'C*', $bytes; if( ($c[0] & 0xe0) == 0xc0 ) { return (($c[0] & 0x7f) << 6) | ($c[1] & 0x3f); } if( ($c[0] & 0xf0) == 0xe0 ) { return (($c[0] & 0x0f) << 12) | (($c[1] & 0x3f) << 6) | ($c[2] & 0x3f); } if( ($c[0] & 0xf8) == 0xf0 ) { return (($c[0] & 0x07) << 18) | (($c[1] & 0x3f) << 12) | (($c[2] & 0x3f) << 6) | ($c[3] & 0x3f); } return $c[0]; } # Force flush after each write $| = 1; while( my $line = <> ) { while( $line ne "" ) { # Consume escape sequences my $esc = ""; if( $line =~ /^((?:\x1b\[[?]?\d+(?:;\d+){0,2}[ABCDEFGHJKSTifhlmnsu])*)/ ) { $esc = $1; $line = substr($line, length($esc)); } # Read UTF-8 bytes my $char = ReadChar($line); $line = substr($line, length($char)); for(my $next_char; ($next_char = ReadChar($line)) ne "";) { if( !IsCombiningCharacter(DecodeChar($next_char)) ) { last; } $char .= $next_char; $line = substr($line, length($next_char)); } # Output characters slowly print $esc, $char; usleep DELAY_MICROSECONDS; } }