|
~\Desktop\prem2005\build-import.matches.pl.html
#! /perl/bin/perl
### Import matches into database
use strict;
use DOM_DB;
use DOM_FOOT;
$|=1;
my $dbh=&db_connect('premiership');
# build master all 12 seasons all divisions
open (FILEINDEX,"fileindex.txt");
my @findex=<FILEINDEX>;
close FILEINDEX;
# purge table for rebuild
my $sql="DELETE FROM results"; &exec_sql($sql);
$sql="ALTER TABLE results AUTO_INCREMENT = 1"; &exec_sql($sql);
$sql='DELETE FROM seasonrank'; &exec_sql($sql);
my (%ct,%tt, %tr); # being competition_tag, team_tag,Rate table
# long variable names clutter the code.
my ($id,$tag,$team,$rate);
$sql='SELECT id, tag FROM competitions';
my $csr=&select_sql($sql);
while (($id, $tag)=$csr->fetchrow) {
$ct{$tag}=$id;
}
$csr->finish;
$sql='SELECT id,name,start_rate FROM teams';
$csr=&select_sql($sql);
while (($id,$team,$rate)=$csr->fetchrow) {
$tt{$team}=$id;
$tr{$id}=$rate;
}
$csr->finish;
# you must integrate the synonyms as shadow teams
$sql='SELECT id,alt_name FROM teams where alt_name is not null';
$csr=&select_sql($sql);
while (($id, $team)=$csr->fetchrow) {
$tt{$team}=$id;
}
$csr->finish;
my $file;
my $fc=@findex;
print "$fc:";
foreach $file (@findex) {
chomp $file;
open (FIND,$file);
my @filec=<FIND>;
close FIND;
print '.'; # to reassure the operator
chomp $filec[0];
$filec[0]="$filec[0], ";
my @fields=split(/\,/,$filec[0]);
my $fc=0;
# flush the key hash so we're not using out of date headers
my %fhn=();
my $fh;
# Explanation for the fhn and fs notations used.
#
# The data as read is in CSV format, but the quality varies, some files
# having more information than others, and thus the field placement
# altering somewhat. There is a header entry though, and the header
# codes are consistent.
#
# So... we put the headers in a hash, with their position as the data,
# which later gets used as a subscript to the data array. Easy!
foreach $fh (@fields) {
$fhn{$fh}=$fc;
$fc++;
}
GAME: foreach $fh (@filec) {
my ($a,$b,$c,$d,$e,$f,$g,$h);
$fh="$fh, ";
my @fs=split(/\,/,$fh);
if (not defined $fs[0]) {next GAME;}
if ($fs[0]eq'Div'|| $fs[0]eq'') { next GAME; }
# Div,Date,HomeTeam,AwayTeam,FTHG,FTAG
$a=$fs[$fhn{'Div'}];
$b=$fs[$fhn{'Date'}];
$b=~s/(\d\d)\/(\d\d)\/(\d\d)/$3\/$2\/$1/;
$c=$fs[$fhn{'HomeTeam'}];
$d=$fs[$fhn{'AwayTeam'}];
$e=$fs[$fhn{'FTHG'}];
$f=$fs[$fhn{'FTAG'}];
# $g=$fs[$fhn{'WHH'}];
# $h=$fs[$fhn{'WHA'}];
if (not defined $fhn{'WHH'} ) { $g=0;}
else { $g=$fs[$fhn{'WHH'}]; }
if (not defined $fhn{'WHA'} ) { $h=0;}
else { $h=$fs[$fhn{'WHA'}]; }
# slapped in from the re-rating code because
# UPDATE takes so long to run...
my ($ph,$pv,$gd,$ch,$cv,$ah,$av);
($ph,$pv)=&assess($tr{$tt{$c}},$tr{$tt{$d}});
#$ph=1/(1+10**(($tr{$tt{$d}}-$tr{$tt{$c}})/400));
# $pv=1/(1+10**(($tr{$tt{$c}}-$tr{$tt{$d}})/400));
# It could be all 1 big if, but this looks better
if ($e > $f ) { $ah=1; $av=0; }
if ($e < $f ) { $ah=0; $av=1; }
if ($e == $f ) { $ah=0.5;$av=0.5;}
#print "$hs - $vs : $ph - $pv\n";
$gd=abs($e-$f); if ($gd==0) { $gd=1; }
$ch=int(($ah-$ph) * 16 * $gd);
$cv=int(($av-$pv) * 16 * $gd);
# we're choosing not to have original data in here atm
# as the BLOBS are too bulky
$sql = q{
INSERT INTO results
(home,visitor,hscore,vscore,
date,competition,wh_home,wh_away,
hrank_new,vrank_new,hrank_old,vrank_old)
VALUES(?,?,?,?,?,?,?,?,?,?,?,?)
};
&exec_sql($sql,$tt{$c},$tt{$d},$e,$f,
$b,$ct{$a},$g,$h,
$tr{$tt{$c}}+$ch,$tr{$tt{$d}}+$cv,$tr{$tt{$c}},$tr{$tt{$d}});
$tr{$tt{$c}}+=$ch; $tr{$tt{$d}}+=$cv;
}
# write end-of-season rankings to holding table
$sql = 'SELECT id FROM seasons WHERE start < ? and end > ?';
print "$b";
$csr=&select_sql($sql,$b,$b);
my ($season)=$csr->fetchrow;
$csr->finish;
print "$season - $b";
foreach $team ( keys %tt ) {
$sql= q{INSERT INTO seasonrank
(team, season, rank)
VALUES (?,?,?)
};
&exec_sql($sql, $tt{$team},1,$tr{$tt{$team}});
}
}
foreach $team ( keys %tt ) {
$sql = 'UPDATE teams SET rate = ? WHERE id =?';
&exec_sql($sql,$tr{$tt{$team}}, $tt{$team});
}
|