Dominic J. Thoreau - build-import.matches.pl

Dominic J. Thoreau
build-import.matches.pl
Home page

Technical Articles

Family

News

Sounds

Blog

Art

How to contact me

Photography

How I build this site

Football prediction system

build-import.matches.pl
Perplex City
~\Desktop\prem2005\build-import.matches.pl.html
#! /perl/bin/perl

### Import matches into database
use strict;
use DOM_DB;
use DOM_FOOT;
$|=1;

my $dbh=&db_connect('premiership');

# build master all 12 seasons all divisions
open (FILEINDEX,"fileindex.txt");
my @findex=<FILEINDEX>;
close FILEINDEX;

# purge table for rebuild
my $sql="DELETE FROM results";                  &exec_sql($sql);
$sql="ALTER TABLE results AUTO_INCREMENT = 1";  &exec_sql($sql);
$sql='DELETE FROM seasonrank';                  &exec_sql($sql);

my (%ct,%tt, %tr); # being competition_tag, team_tag,Rate table
# long variable names clutter the code. 
my ($id,$tag,$team,$rate);
$sql='SELECT id, tag FROM competitions';
my $csr=&select_sql($sql);
while (($id, $tag)=$csr->fetchrow) {
        $ct{$tag}=$id;
}
$csr->finish;

$sql='SELECT id,name,start_rate FROM teams';
$csr=&select_sql($sql);
while (($id,$team,$rate)=$csr->fetchrow) {
        $tt{$team}=$id;
        $tr{$id}=$rate;
}
$csr->finish;

# you must integrate the synonyms as shadow teams
$sql='SELECT id,alt_name FROM teams where alt_name is not null';
$csr=&select_sql($sql);
while (($id, $team)=$csr->fetchrow) {
        $tt{$team}=$id;
}
$csr->finish;


my $file;
my $fc=@findex;
print "$fc:";
foreach $file (@findex) {
        chomp $file;
        open (FIND,$file);
        my @filec=<FIND>;
        close FIND;
        print '.'; # to reassure the operator

        chomp $filec[0];
        $filec[0]="$filec[0], ";
        my @fields=split(/\,/,$filec[0]);
        my $fc=0;

        # flush the key hash so we're not using out of date headers
        my %fhn=();
        my $fh;

        # Explanation for the fhn and fs notations used.
        #
        # The data as read is in CSV format, but the quality varies, some files
        # having more information than others, and thus the field placement 
        # altering somewhat. There is a header entry though, and the header 
        # codes are consistent.
        #
        # So... we put the headers in a hash, with their position as the data,
        # which later gets used as a subscript to the data array. Easy!
        foreach $fh (@fields) {
                $fhn{$fh}=$fc;
                $fc++;
        }
        GAME: foreach $fh (@filec) {
                my ($a,$b,$c,$d,$e,$f,$g,$h);
                $fh="$fh,  ";
                my @fs=split(/\,/,$fh);
                if (not defined $fs[0]) {next GAME;}
                if ($fs[0]eq'Div'|| $fs[0]eq'') { next GAME; }
                # Div,Date,HomeTeam,AwayTeam,FTHG,FTAG
                $a=$fs[$fhn{'Div'}];
                $b=$fs[$fhn{'Date'}];
                $b=~s/(\d\d)\/(\d\d)\/(\d\d)/$3\/$2\/$1/;
                $c=$fs[$fhn{'HomeTeam'}];
                $d=$fs[$fhn{'AwayTeam'}];
                $e=$fs[$fhn{'FTHG'}];
                $f=$fs[$fhn{'FTAG'}];
#               $g=$fs[$fhn{'WHH'}];
#               $h=$fs[$fhn{'WHA'}];
                if (not defined $fhn{'WHH'} ) { $g=0;}
                        else { $g=$fs[$fhn{'WHH'}]; }
                if (not defined $fhn{'WHA'} ) { $h=0;}
                        else { $h=$fs[$fhn{'WHA'}]; }

                # slapped in from the re-rating code because 
                #  UPDATE takes so long to run...

                my ($ph,$pv,$gd,$ch,$cv,$ah,$av);
                ($ph,$pv)=&assess($tr{$tt{$c}},$tr{$tt{$d}});
                        #$ph=1/(1+10**(($tr{$tt{$d}}-$tr{$tt{$c}})/400));
#               $pv=1/(1+10**(($tr{$tt{$c}}-$tr{$tt{$d}})/400));

                # It could be all 1 big if, but this looks better
                if ($e > $f )   { $ah=1; $av=0; }
                if ($e < $f )   { $ah=0; $av=1; }
                if ($e == $f )  { $ah=0.5;$av=0.5;}

                #print "$hs - $vs : $ph - $pv\n";

                $gd=abs($e-$f); if ($gd==0) { $gd=1; }
                $ch=int(($ah-$ph) * 16 * $gd);
                $cv=int(($av-$pv) * 16 * $gd);


                # we're choosing not to have original data in here atm
                #   as the BLOBS are too bulky
                $sql = q{
                INSERT INTO results
                (home,visitor,hscore,vscore,
                date,competition,wh_home,wh_away,
                hrank_new,vrank_new,hrank_old,vrank_old)
                VALUES(?,?,?,?,?,?,?,?,?,?,?,?)
                };
                &exec_sql($sql,$tt{$c},$tt{$d},$e,$f,
                        $b,$ct{$a},$g,$h,
                $tr{$tt{$c}}+$ch,$tr{$tt{$d}}+$cv,$tr{$tt{$c}},$tr{$tt{$d}});
                $tr{$tt{$c}}+=$ch; $tr{$tt{$d}}+=$cv;
        }
        # write end-of-season rankings to holding table
        $sql = 'SELECT id FROM seasons WHERE start < ? and end > ?';
        print "$b";
        $csr=&select_sql($sql,$b,$b);
        my ($season)=$csr->fetchrow;
        $csr->finish;
        print "$season - $b";
        foreach $team ( keys %tt ) {
                $sql= q{INSERT INTO seasonrank
                (team, season, rank)
                VALUES (?,?,?)
                };
                &exec_sql($sql, $tt{$team},1,$tr{$tt{$team}});
        }
}
foreach $team ( keys %tt ) {
        $sql = 'UPDATE teams SET rate = ? WHERE id =?';
        &exec_sql($sql,$tr{$tt{$team}}, $tt{$team});
}
© 2004 Dominic J. Thoreau - this is http://www.thoreau-online.net/build-import.matches.pl.html
Updated and uploaded Fri Dec 29 11:45:07 2006