#!/bin/bash set -eu function die { echo "$1" exit 1 } if [[ $# != 1 ]]; then die "$0 " fi CMD="./$1" # Verify checksums of test input files first. md5sum -c test_md5.txt --quiet || die "$LINENO: Need to recompute checksums" # Try empty file. Note that any file containing only whitespaces # would be treated exactly like an empty file. "$CMD" test_empty.txt | diff - test_empty.txt \ || die "$LINENO: Failed to disassemble empty file" "$CMD" < test_empty.txt | diff - test_empty.txt \ || die "$LINENO: Failed to assemble empty file" # Try various small files. for i in test_small_*.txt; do "$CMD" "$i" \ | "$CMD" \ | diff - "$i" \ || die "$LINENO: Failed to process $i" done # Test various small sizes. SEQ_INPUT=$(mktemp) for i in $(seq 32); do for pattern in '%.0f' ' %.0f'; do seq -f "$pattern" "$i" > "$SEQ_INPUT" "$CMD" "$SEQ_INPUT" \ | "$CMD" \ | diff - "$SEQ_INPUT" \ || die "$LINENO: Failed to process multi-line generated sequence $i" done done for i in $(seq 32); do for pattern in '%.0f' ' %.0f'; do seq -f "$pattern" -s "" "$i" > "$SEQ_INPUT" "$CMD" "$SEQ_INPUT" \ | "$CMD" \ | diff - "$SEQ_INPUT" \ || die "$LINENO: Failed to process single-line generated sequence $i" done done rm -f "$SEQ_INPUT" # Test plain ASCII input. Note that this input contains a tab character, # so we are explicitly ignoring whitespaces in comparing output. "$CMD" test_simple.txt \ | "$CMD" \ | diff -w - test_simple.txt \ || die "$LINENO: Failed to process ASCII input" # We do want to make sure that tabs are expanded to the right number of # spaces, so test for that specifically. "$CMD" < test_tab.txt \ | diff - test_tab_expanded.txt \ || die "$LINENO: Failed to assemble file with tabs" "$CMD" test_tab.txt \ | "$CMD" \ | diff - test_tab_expanded.txt \ || die "$LINENO: Failed to process file with tabs" # Test UTF-8 input. "$CMD" test_utf8.txt \ | "$CMD" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to process UTF8 input" # Verify that carrage returns are dropped. "$CMD" test_crlf.txt \ | "$CMD" \ | cmp - test_lf.txt \ || die "$LINENO: Failed to process file with CR-LF sequences" # Check assembly with handwritten escape sequences, including some # out of bounds positioning. "$CMD" < test_assembly.txt \ | diff - test_assembly_flattened.txt \ || die "$LINENO: Failed to assemble text with out of bounds sequence" # Try overwriting characters. "$CMD" < test_overwrite.txt \ | diff - test_overwrite_flattened.txt \ || die "$LINENO: Failed to assemble text with overlapping characters" # Verify that output is sufficiently scrambled to defeat grep. "$CMD" test_defeat_grep.txt \ | perl -ne 'die if /This text should not be found/' \ || die "$LINENO: Failed to disassemble file" # Verify that scrambled output can be re-shuffled directly. "$CMD" test_utf8.txt \ | "$CMD" - \ | "$CMD" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to process with one reshuffle" "$CMD" test_utf8.txt \ | "$CMD" - \ | "$CMD" - \ | "$CMD" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to process with two reshuffle" # Verify that reshuffling the output produces different results. SHUFFLE_ONCE=$(mktemp) "$CMD" test_utf8.txt > "$SHUFFLE_ONCE" SHUFFLE_TWICE=$(mktemp) "$CMD" "$SHUFFLE_ONCE" > "$SHUFFLE_TWICE" diff -q "$SHUFFLE_ONCE" "$SHUFFLE_TWICE" > /dev/null \ && die "$LINENO: Reshuffling did not produce different output" rm -f "$SHUFFLE_ONCE" "$SHUFFLE_TWICE" # Original string should be recoverable after reassembly. "$CMD" test_defeat_grep.txt \ | "$CMD" \ | grep -q "This text should not be found" \ || die "$LINENO: Failed to reassemble file" # Disassembly should work with pipe input. cat test_defeat_grep.txt \ | "$CMD" - \ | perl -ne 'die if /This text should not be found/' \ || die "$LINENO: Failed to disassemble input pipe" # Reassemble what was disassembled via pipe. cat test_defeat_grep.txt \ | "$CMD" - \ | "$CMD" \ | grep -q "This text should not be found" \ || die "$LINENO: Failed to reassemble input pipe" # Try splitting input to a file, as opposed to stdout. LAYER1=$(mktemp) "$CMD" test_utf8.txt "$LAYER1" \ || die "Failed to split to 1 layer" [[ -s "$LAYER1" ]] || die "$LINENO: Output file is empty" "$CMD" < "$LAYER1" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to reassemble from file" # Try splitting input to 2 layers. LAYER2=$(mktemp) "$CMD" test_utf8.txt "$LAYER1" "$LAYER2" \ || die "$LINENO: Failed to split to 2 layers" [[ -s "$LAYER1" ]] || die "$LINENO: Output layer 1 of 2 is empty" [[ -s "$LAYER2" ]] || die "$LINENO: Output layer 2 of 2 is empty" "$CMD" < "$LAYER1" \ | diff -q - test_utf8.txt > /dev/null \ && die "$LINENO: Unexpected reassembly success using only layer 1 of 2" "$CMD" < "$LAYER2" \ | diff -q - test_utf8.txt > /dev/null \ && die "$LINENO: Unexpected reassembly success using only layer 2 of 2" cat "$LAYER1" "$LAYER2" \ | "$CMD" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to reassemble using 2 layers" rm -f "$LAYER1" "$LAYER2" # Try splitting input to 3 layers. LAYER3=$(mktemp) "$CMD" test_utf8.txt "$LAYER1" "$LAYER2" "$LAYER3" \ || die "$LINENO: Failed to split to 2 layers" [[ -s "$LAYER1" ]] || die "$LINENO: Output layer 1 of 3 is empty" [[ -s "$LAYER2" ]] || die "$LINENO: Output layer 2 of 3 is empty" [[ -s "$LAYER3" ]] || die "$LINENO: Output layer 3 of 3 is empty" "$CMD" < "$LAYER1" \ | diff -q - test_utf8.txt > /dev/null \ && die "$LINENO: Unexpected reassembly success using only layer 1 of 3" "$CMD" < "$LAYER2" \ | diff -q - test_utf8.txt > /dev/null \ && die "$LINENO: Unexpected reassembly success using only layer 2 of 3" "$CMD" < "$LAYER3" \ | diff -q - test_utf8.txt > /dev/null \ && die "$LINENO: Unexpected reassembly success using only layer 3 of 3" cat "$LAYER1" "$LAYER2" "$LAYER3" \ | "$CMD" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to reassemble using 3 layers" rm -f "$LAYER1" "$LAYER2" "$LAYER3" # Test the splitter with randomized input. ITERS=10 for generator in generate_dense.pl generate_sparse.pl; do for i in `seq $ITERS`; do perl "$generator" > "$LAYER1" "$CMD" "$LAYER1" \ | "$CMD" \ | diff --ignore-trailing-space - "$LAYER1" \ || die "$LINENO: Failed to split at $generator iteration $i of $ITERS" done done # Test the assembler with randomized input. for i in `seq $ITERS`; do perl split.pl test_utf8.txt - \ | "$CMD" \ | diff - test_utf8.txt \ || die "$LINENO: Failed to reassemble at iteration $i of $ITERS" perl split.pl test_utf8.txt "$LAYER1" "$LAYER2" "$LAYER3" "$CMD" < "$LAYER1" > /dev/null \ || die "$LINENO: Failed to process layer 1 of 3 at iteration $i of $ITERS" "$CMD" < "$LAYER2" > /dev/null \ || die "$LINENO: Failed to process layer 2 of 3 at iteration $i of $ITERS" "$CMD" < "$LAYER3" > /dev/null \ || die "$LINENO: Failed to process layer 3 of 3 at iteration $i of $ITERS" done # Verify that we haven't corrupted any test input files during this test. md5sum -c test_md5.txt --quiet || die "$LINENO: Test data corrupted" # All good exit 0