#!/usr/bin/perl use Text::CSV; open CAP, "<", "capital" or die $!; while () { chomp; my ($state, $ab, $cap) = split /\s+/, $_, 3; $ab{$state} = $ab; $name{$ab} = $state; $cap{$ab} = $cap; } close CAP; my %cities; $STOP=1; my $csv = Text::CSV->new; open C, "<", "CENSUS" or die $!; $/ = "\r\n"; while () { chomp; tr/\n/ /; die "line $." unless $csv->parse($_); my @f = $csv->fields; my ($loc, $type, $pop) = @f[2,3,4]; next unless $type eq "Metropolitan Division"; $pop =~ tr/0-9//cd; next unless $pop > 0; my ($city, $statelist) = split /,\s+/, $loc; my ($state, @other_states) = split /-/, $statelist; unless ($name{$state}) { warn "Unknown state abbreviation '$state'" unless $seen{$state}++; next; } push @{$cities{$state}}, [$city, $pop]; } for my $ab (keys %cities) { my ($city_1, $city_2) = sort {$b->[1] <=> $a->[1]} @{$cities{$ab}}; my ($c1, $p1) = @$city_1; my ($c2, $p2) = @$city_2; $quot{$ab} = $p1/$p2 if $p2 > 0; push @scatter, [$ab, log($p1)/log(10), log($p2)/log(10)] if $p2 > 0; } print "\n"; print qq{
State Largest city and
its Population
Second-largest city
and its population
Quotient }; my @colors = qw(#cccccc pink); my $color = 0; for my $ab (sort {$quot{$b} <=> $quot{$a}} keys %cities) { my ($city_1, $city_2) = sort {$b->[1] <=> $a->[1]} @{$cities{$ab}}; my ($c1, $p1) = @$city_1; my ($c2, $p2) = @$city_2; # print "$ab $city_1->[0] $city_2->[0]\n"; if ($p2 > 0) { printf "
%12s %24s %8s %24s %8s %6d%s\n", $name{$ab}, $c1, comma_int($p1), $c2, comma_int($p2), int($p1/$p2), frac($p1/$p2); } else { printf "
%12s %24s %8s —\n", $name{$ab}, $c1, comma_int($p1); } $color += 1; $color %= @colors; } print "
\n\n"; open G, ">", "gnuplot.dat" or die $!; for my $pair (@scatter) { my ($ab, $c1, $c2) = @$pair; print G "$c1 $c2 # $ab\n"; } close G; open G, ">", "gnuplot.gpl" or die $!; print G qq{ set output "scatterplot.pbm" set terminal pbm color set nokey }; for my $pair (@scatter) { my ($ab, $c1, $c2) = @$pair; print G qq{set label " $ab" at $c1, $c2\n}; } print G qq{plot "gnuplot.dat" with points\n}; close G; system("gnuplot < gnuplot.gpl && cjpeg -q 95 scatterplot.pbm > scatterplot.jpg"); sub comma_int { my $n = int(shift()); my $i = -3; while (length($n) + $i > 0) { substr($n, $i, 0, ","); $i -= 4; } return $n; } sub frac { my $n = shift; $n -= int($n); $n = sprintf("%.2f", $n); $n =~ s/^.*\././; return $n; } sub log10 { return log($_[0])/log(10); }