3. Defense: Perl
#!/usr/bin/env perl
#
# test the UTF8-ness of a file
use strict;
use warnings;
use Search::Tools::UTF8;
use Search::Tools;
if ( !@ARGV ) {
die "usage: $0 file\n";
}
for my $file (@ARGV) {
my $buf = Search::Tools->slurp($file);
printf( "is_flagged_utf8=%d\n", is_flagged_utf8($buf) );
printf( "is_valid_utf8=%d\n", is_valid_utf8($buf) );
printf( "is_perl_utf8_string=%d\n", is_perl_utf8_string($buf) );
printf( "is_sane_utf8=%d\n", is_sane_utf8( $buf, 1 ) );
print " ... running through to_utf8(), testing again.\n";
$buf = to_utf8($buf);
printf( "is_flagged_utf8=%d\n", is_flagged_utf8($buf) );
printf( "is_valid_utf8=%d\n", is_valid_utf8($buf) );
printf( "is_perl_utf8_string=%d\n", is_perl_utf8_string($buf) );
printf( "is_sane_utf8=%d\n", is_sane_utf8( $buf, 1 ) );
}
4. Defense: Perl, cont
#!/usr/bin/env perl
#
# print chart of chars and corresponding hexdump vals
# just latin1 by default
# otherwise, specify start/stop numerals at cmd line
#
use strict;
use warnings;
my $NUM_COLS = 3;
binmode STDOUT, ':utf8';
print ' ';
my $c = 0;
my $start = shift @ARGV || 161;
my $stop = shift @ARGV || 255;
for ( 33 .. 126, $start .. $stop ) {
printf( "%05d %c 0x%05x ", $_, $_, $_ );
if ( ++$c == $NUM_COLS ) {
print "\n ";
$c = 0;
}
}
print "\n";