Code.
Here is the Python code that I used to generate the random numbers for my Zobrist hashes. The first time I did this, I used Perl and it generated a set of numbers that were not quite random. For whatever reason all the numbers it generated were even. This Python code is quite reliable.
import random
# kind of piece
for i in range(2):
for j in range(6):
for k in range(64):
print random.randint(0, 18446744073709551615)
# castle flags
for i in range(4):
print random.randint(0, 18446744073709551615)
# en passant file
for i in range(8):
print random.randint(0, 18446744073709551615)
# side to move
for i in range(1):
print random.randint(0, 18446744073709551615)
Convert PGN files to winning game lines.
It takes 1 command line argument which is the name of a single PGN file. If this is omitted it looks for all PGN files in the current directory. For each PGN file two files will be created with .black_wins and .white_wins appended to the original filename. These files store the winning lines. If the script crashes, delete the last file *_wins files it created and restart the script. It will not overwrite existing _wins files.
#!/usr/bin/perl
use Chess::PGN::Parse;
use English qw( -no_match_vars );
use Chess::Rep;
use strict;
my $gtot = 0;
opendir(foodir, ".");
while (my $file = readdir(foodir))
{
next if ($file !~ /\.pgn$/i);
next if ($ARGV[0] && ($file ne $ARGV[0]));
if (-e "$file.black_wins")
{
print "$file already done\n";
next;
}
if (-e "$file.white_wins")
{
print "$file already done WHITE\n";
next;
}
open(bws, ">$file.black_wins");
open(wws, ">$file.white_wins");
my $pgnfile = $file;
my $pgn = new Chess::PGN::Parse $pgnfile or die "can't open $pgnfile\n";
my $skip = 0;
while ($pgn->read_game()) {
print "$file, $gtot games\n";
$pgn->parse_game();
my $gr = $pgn->result();
next if ($gr eq '*');
$gtot++;
my $pos = Chess::Rep->new;
my $game;
if ($gr =~ /^1\-/)
{
$game = 1; #white
} elsif ($gr =~ /^0\-/) {
$game = 2; #black
}
next if (($game != 1) && ($game != 2));
my @m = @{$pgn->moves()};
my $mv_cnt = 0;
my $max_moves = 24;
$max_moves = 23 if ($game == 2);
my $line;
foreach my $move (@m)
{
$mv_cnt++;
my %r = %{$pos->go_move($move)};
my $from_col = lc(substr($r{from}, 0, 1));
my $from_row = substr($r{from}, 1, 1);
my $to_col = lc(substr($r{to}, 0, 1));
my $to_row = substr($r{to}, 1, 1);
my $promote = '';
if ($r{promote})
{
$promote = '=' . $r{promote};
}
$line .= "$from_col$from_row$to_col$to_row$promote ";
last if ($mv_cnt == $max_moves);
}
if ($mv_cnt != $max_moves)
{
unless ($pos->status->{mate})
{
# stalemate or any kind of resignation
next;
}
}
if ($game == 1)
{
print wws "$line\n";
}
elsif ($game == 2)
{
print bws "$line\n";
}
}
close(bws);
close(wws);
}
In case you want it, here is a script to extract all the lines from PGN's that ended in draw. You can use the board evaluation function to filter out bad positions. But these games all ended in draw, only use these if you want a big opening book.
#!/usr/bin/perl
use Chess::PGN::Parse;
use English qw( -no_match_vars );
use Chess::Rep;
use strict;
my $gtot = 0;
opendir(foodir, ".");
while (my $file = readdir(foodir))
{
next if ($file !~ /\.pgn$/i);
next if ($ARGV[0] && ($file ne $ARGV[0]));
if (-e "$file.draws")
{
print "$file already done\n";
next;
}
open(ds, ">$file.draws");
my $pgnfile = $file;
my $pgn = new Chess::PGN::Parse $pgnfile or die "can't open $pgnfile\n";
my $skip = 0;
while ($pgn->read_game()) {
print "$file, $gtot games\n";
$pgn->parse_game();
my $gr = $pgn->result();
next if ($gr eq '*');
$gtot++;
my $pos = Chess::Rep->new;
my $game;
if ($gr =~ /^1\/2\-1\/2/)
{
$game = 1; #draw
}
next if ($game != 1);
my @m = @{$pgn->moves()};
my $mv_cnt = 0;
my $max_moves = 24;
my $line;
foreach my $move (@m)
{
$mv_cnt++;
my %r = %{$pos->go_move($move)};
my $from_col = lc(substr($r{from}, 0, 1));
my $from_row = substr($r{from}, 1, 1);
my $to_col = lc(substr($r{to}, 0, 1));
my $to_row = substr($r{to}, 1, 1);
my $promote = '';
if ($r{promote})
{
$promote = '=' . $r{promote};
}
$line .= "$from_col$from_row$to_col$to_row$promote ";
last if ($mv_cnt == $max_moves);
}
if ($mv_cnt != $max_moves)
{
unless ($pos->status->{mate})
{
# stalemate or any kind of resignation
next;
}
}
print ds "$line\n";
}
close(ds);
}
Create scored versions of opening lines
Like the script above, 1 command line argument to specify the single file it should process. Include the .black_wins or .white_wins extension. If omitted the entire folder will processed. If the .scored file exists already it will be skipped. For each file, a new file with .scored will be created. The first number in on each line of the scored file is the value assigned to by Gaviota's evaluation function. See line 13 to set the path or change to a different engine.
#!/usr/bin/perl
use Expect;
use Chess::Rep;
use strict;
$Expect::Log_Stdout = 0;
my $pos;
my $line;
my @moves;
my $command = 'gaviota-0.84-linux/gaviota-linux64-0.84';
my $exp = new Expect();
$exp->raw_pty(1);
$exp->spawn($command) or die "Cannot spawn $command: $!\n";
my $idx = $exp->expect(1, 'useful commands');
unless ($idx)
{
print "Gaviota failed to start\n";
exit;
}
my $cntr = 1;
opendir(foodir, ".");
while (my $file = readdir(foodir))
{
next unless (($file =~ /\.black_wins$/) || ($file =~ /\.white_wins$/) || ($file =~ /\.draws$/));
next if (($ARGV[0] && ($file !~ /$ARGV[0]/)));
next if (-e $file.".scored");
print "$cntr, $file\n";
open(foo, $file);
open(bar, ">".$file.".scored");
while ($line = )
{
$pos = Chess::Rep->new();
chomp($line);
@moves = split(/ /, $line);
my $winner = 0;
if ($file =~ /\.black_wins$/)
{
$winner = 1;
} elsif ($file =~ /\.white_wins$/) {
$winner = 2;
}
foreach my $m (@moves)
{
$pos->go_move($m);
}
if ($winner == 1)
{
if ($pos->to_move() eq '128')
{
unless ($pos->status->{mate})
{
die "Wrong turn at end of game";
}
}
} elsif ($winner == 2) {
if ($pos->to_move() eq '0')
{
unless ($pos->status->{mate})
{
die "Wrong turn at tend of game";
}
}
}
my $score = '?';
if ($pos->status->{mate})
{
$score = 32767;
} else {
my $fen = $pos->get_fen();
$exp->clear_accum();
$exp->send("setboard $fen\n");
$exp->send("score\n");
my ($matched_pattern_position, $error, $successfully_matching_string, $before_match, $after_match) = $exp->expect(2, '===>');
unless ($error)
{
($score) = ($after_match =~ /\((.*?)\)/);
} else {
die "chess engine didnt return a score $fen | $line | $file ";
}
}
print bar $score . " " . $line . "\n";
}
close(foo);
close(bar);
$cntr++;
}
Collect opening lines and filter based on a score threshold
Takes a numeric parameter as the threshold for filtering out bad lines. All lines that score below this amount will be ignored. All *.scored files are parsed and added to openingbook.raw
#!/usr/bin/perl
open(book, ">openingbook.raw");
my $threshold = $ARGV[0];
my $draw_threshold = $ARGV[0];
opendir(foodir, ".");
my $ltot;
my $lkeep;
my $ldiscard;
while (my $file = readdir(foodir))
{
next unless ($file =~ /\.scored$/);
my $winner = 0; # draw =0 , black=1, white=2
$winner = 1 if ($file =~ /black_wins/);
$winner = 2 if ($file =~ /white_wins/);
open(foo, $file);
while (my $line = )
{
$ltot++;
my ($d, $out) = ($line =~ /^([+-]?\d+) (.*)$/);
if ($d ne '')
{
if (($winner > 0) && ($d > $threshold + 0))
{
$lkeep++;
if ($winner == 1)
{
print book "b " . $line;
} else {
print book "w " . $line;
}
} else {
if (abs($d) > $draw_threshold + 0)
{
$lkeep++;
print book "d " . $line;
} else {
$ldiscard++;
}
}
} else {
print "wtf : $line";
}
}
close(foo);
}
print "Total: $ltot Kept: $lkeep Discarded: $ldiscard\n";
Build the zobrist hash and move lists
This creates two files 'hashes' and 'moves' that are in sorted order. These will will be parsed by the c# tokenizer to create the compressed book and block indexes
#!/usr/bin/perl
use Chess::Rep;
use MLDBM qw(DB_File Storable);
use Fcntl;
use strict;
my %pth;
$pth{K} = 0;
$pth{Q} = 1;
$pth{R} = 2;
$pth{B} = 3;
$pth{N} = 4;
$pth{P} = 5;
my %ph;
$ph{1} = [1,5];
$ph{2} = [1,4];
$ph{4} = [1,0];
$ph{8} = [1,3];
$ph{16} = [1,2];
$ph{32} = [1,1];
$ph{129} = [0,5];
$ph{130} = [0,4];
$ph{132} = [0,0];
$ph{136} = [0,3];
$ph{144} = [0,2];
$ph{160} = [0,1];
my %ah;
open(foo, "zob");
my @lines = ;
close(foo);
my $i = 0;
foreach my $l (@lines)
{
chomp($l);
$ah{$i} = $l;
$i++;
}
my %rh;
$rh{a} = 0;
$rh{b} = 1;
$rh{c} = 2;
$rh{d} = 3;
$rh{e} = 4;
$rh{f} = 5;
$rh{g} = 6;
$rh{h} = 7;
my $fn = $ARGV[0];
die "You must specify a raw book to process" unless ($fn);
open(foo, $fn);
my %hashes;
unlink $fn.".data.custom";
my $dbm = tie %hashes, 'MLDBM', $fn.".data.custom", O_CREAT|O_RDWR, 0640 or die $!;
#goto readyup;
my $pos;
my $linen =0;
while (my $line = )
{
$linen++;
print "Line $linen\n";
$pos = Chess::Rep->new();
chomp($line);
my @moves = split(/ /, $line);
my $winner = shift(@moves);
my $score = shift(@moves);
foreach my $m (@moves)
{
my $z = &zob($pos);
my %r = %{$pos->go_move($m)};
if ($pos->to_move() eq '0')
{
next if ($winner eq 'b');
}
if ($pos->to_move() eq '128')
{
next if ($winner eq 'w');
}
# if draw, figure out if whos 'winning' line this should count under
# look at to_move and decide if this white or black line
my $from_col = $rh{lc(substr($r{from}, 0, 1))};
my $from_row = substr($r{from}, 1, 1);
my $to_col = $rh{lc(substr($r{to}, 0, 1))};
my $to_row = substr($r{to}, 1, 1);
my $from_idx = ((7-($from_row-1)) * 8) + $from_col;
my $to_idx = ((7-($to_row-1)) * 8) + $to_col;
my $promote = $pth{$r{promote}};
my $move_byte = $from_idx | ($to_idx << 6); # we assume always promote to queen to keep it small, score goes here instead | ($promote << 12);
die "Move_byte was too big: $move_byte from_idx: $from_idx to_idx: $to_idx promote: $promote" if ($move_byte > 32767);
if (ref($hashes{$z}) ne 'ARRAY')
{
$hashes{$z} = [[$move_byte,$score]];
} else {
my @foo = @{$hashes{$z}};
push @foo, [$move_byte,$score];
$hashes{$z} = \@foo;
}
}
}
readyup:
print "About to sort hashes, go make a coffee\n";
open(fhash, ">hashes");
open(fmove, ">moves");
my @hk = keys %hashes;
foreach my $k (sort num_sort (@hk))
{
my @mvs = sort move_sort @{$hashes{$k}};
my %us;
for my $tm (@mvs)
{
$us{$tm->[1]} = 1;
}
my $umvs = scalar(keys %us);
my $nm = scalar(@mvs);
my $cur_rank = 1;
my $cur_score = $mvs[0]->[1];
my @moves_for_key;
foreach my $m (@mvs)
{
my $mp = $m->[0];
next if (grep {$_ eq $m->[0]} @moves_for_key);
push @moves_for_key, $m->[0];
my $sp = $m->[1];
if ($cur_score != $sp)
{
$cur_rank++;
$cur_score = $sp;
}
my $rank = 15 - (($cur_rank / $umvs) * 15);
my $ns = $mp | ($rank << 12);
die "Move_byte with score was too big: $ns mp: $mp rank: $rank " if ($ns > 32767);
print fhash $k . "\n";
print fmove $ns . "\n";
}
}
sub num_sort
{
return ($a + 0) <=> ($b + 0);
}
sub move_sort
{
return $a->[1] <=> $b->[1];
}
close(fhash);
close(fmove);
sub zob
{
my ($pos) = @_;
my $z = 0;
for (my $col = 0; $col < 8; $col++)
{
for (my $row = 0; $row < 8; $row++)
{
my $p = $pos->get_piece_at(7 - $col, $row);
my $pc = $ph{$p}[0];
my $pt = $ph{$p}[1];
next if (($pc eq '') && ($pt eq ''));
my $idx = ($col * 8) + $row;
my $idx2 = (($pc*6+$pt) * 64 + $idx);
$z ^= $ah{$idx2};
}
}
if ($pos->can_castle(1,0)) {
$z ^= $ah{768};
}
if ($pos->can_castle(1,1)) {
$z ^= $ah{769};
}
if ($pos->can_castle(0,0)) {
$z ^= $ah{770};
}
if ($pos->can_castle(0,1)) {
$z ^= $ah{771};
}
if ($pos->{enpa} ne '0') {
my $col = $rh{lc(substr($pos->ep_square(), 0, 1))};
$z ^= $ah{772 + $col};
}
if ($pos->to_move() eq '128') {
$z ^= $ah{780};
}
return $z;
}
Build the Polyglot book
This Perl script will convert the hash and moves files into a polyglot compatible book that can be used with other chess engines or a Chess GUI like WinBoard.
#!/usr/bin/perl
use Chess::Rep;
use MLDBM qw(DB_File Storable);
use Fcntl;
use strict;
my %kind_of_piece;
$kind_of_piece{1}{5} = 0; #black pawn
$kind_of_piece{0}{5} = 1; #white pawn
$kind_of_piece{1}{4} = 2; #black knight
$kind_of_piece{0}{4} = 3; #white knight
$kind_of_piece{1}{3} = 4; #black bishop
$kind_of_piece{0}{3} = 5; #white bishop
$kind_of_piece{1}{2} = 6; #black rook
$kind_of_piece{0}{2} = 7; #white rook
$kind_of_piece{1}{1} = 8; #black queen
$kind_of_piece{0}{1} = 9; #white queen
$kind_of_piece{1}{0} = 10; #black king
$kind_of_piece{0}{0} = 11; #white king
my %pth;
$pth{K} = 0;
$pth{Q} = 1;
$pth{R} = 2;
$pth{B} = 3;
$pth{N} = 4;
$pth{P} = 5;
my %ph;
$ph{1} = [1,5];
$ph{2} = [1,4];
$ph{4} = [1,0];
$ph{8} = [1,3];
$ph{16} = [1,2];
$ph{32} = [1,1];
$ph{129} = [0,5];
$ph{130} = [0,4];
$ph{132} = [0,0];
$ph{136} = [0,3];
$ph{144} = [0,2];
$ph{160} = [0,1];
my %ah;
open(foo, "polyglot.zob");
my @lines = ;
close(foo);
my $i = 0;
foreach my $l (@lines)
{
chomp($l);
$ah{$i} = $l;
$i++;
}
my %rh;
$rh{a} = 0;
$rh{b} = 1;
$rh{c} = 2;
$rh{d} = 3;
$rh{e} = 4;
$rh{f} = 5;
$rh{g} = 6;
$rh{h} = 7;
my $fn = $ARGV[0];
die "You must specify a raw book to process" unless ($fn);
open(foo, $fn);
my %hashes;
#unlink $fn.".data";
my $dbm = tie %hashes, 'MLDBM', $fn.".data", O_CREAT|O_RDWR, 0640 or die $!;
goto readyup;
my $pos;
my $linen =0;
while (my $line = )
{
$linen++;
print "Line $linen\n";
$pos = Chess::Rep->new();
chomp($line);
my @moves = split(/ /, $line);
my $winner = shift(@moves);
my $score = shift(@moves);
foreach my $m (@moves)
{
my $z = &zob($pos);
#print "Z:" . sprintf("%x", $z) . "\n";
my %r = %{$pos->go_move($m)};
if ($pos->to_move() eq '0')
{
next if ($winner eq 'b');
}
if ($pos->to_move() eq '128')
{
next if ($winner eq 'w');
}
#print "Z:" . sprintf("%x", $z) . "\n";
# if draw, figure out if whos 'winning' line this should count under
# look at to_move and decide if this white or black line
# account for strange castle move square numbers
$r{to} = 'h1' if (($r{from} eq 'e1') && ($r{to} eq 'g1') && ($r{piece} eq '132')); #white king side
$r{to} = 'a1' if (($r{from} eq 'e1') && ($r{to} eq 'c1') && ($r{piece} eq '132')); #white queen side
$r{to} = 'h8' if (($r{from} eq 'e8') && ($r{to} eq 'g8') && ($r{piece} eq '4')); #black king side
$r{to} = 'a8' if (($r{from} eq 'e8') && ($r{to} eq 'c8') && ($r{piece} eq '4')); #black queen side
my $from_col = $rh{lc(substr($r{from}, 0, 1))};
my $from_row = int(substr($r{from}, 1, 1)) - 1;
my $to_col = $rh{lc(substr($r{to}, 0, 1))};
my $to_row = int(substr($r{to}, 1, 1)) - 1;
my $promote = 0;
if ($r{promote}) { $promote = 4; }
my $move_byte = $to_col | ($to_row << 3) | ($from_col << 6) | ($from_row << 9) | ($promote << 12);
die "Move_byte was too big: $move_byte promote: $promote" if ($move_byte > 32767);
if (ref($hashes{$z}) ne 'ARRAY')
{
$hashes{$z} = [[$move_byte,$score]];
} else {
my @foo = @{$hashes{$z}};
push @foo, [$move_byte,$score];
$hashes{$z} = \@foo;
}
}
}
readyup:
print "About to sort hashes, go make a coffee\n";
open(fhash, '>:raw', "pwned.polyglot");
binmode(fhash);
my @hk = keys %hashes;
foreach my $k (sort num_sort (@hk))
{
my @mvs = sort move_sort @{$hashes{$k}};
my %us;
for my $tm (@mvs)
{
$us{$tm->[1]} = 1;
}
my $umvs = scalar(keys %us);
my $nm = scalar(@mvs);
my $cur_rank = 1;
my $cur_score = $mvs[0]->[1];
my @moves_for_key;
foreach my $m (@mvs)
{
my $mp = $m->[0];
next if (grep {$_ eq $m->[0]} @moves_for_key);
push @moves_for_key, $m->[0];
my $sp = $m->[1];
if ($cur_score != $sp)
{
$cur_rank++;
$cur_score = $sp;
}
my $rank = 15 - (($cur_rank / $umvs) * 15);
my $ns = $mp;
die "Move_byte with score was too big: $ns mp: $mp rank: $rank " if ($ns > 32767);
$sp = 1 if ($sp == 0);
my $o;
for (my $i = 0; $i < 8; $i++)
{
$o .= pack('C', ($k >> (8 * (7-$i))) & 255);
}
for (my $i = 0; $i < 2; $i++)
{
$o .= pack('C', ($mp >> (8 * (1-$i))) & 255);
}
for (my $i = 0; $i < 2; $i++)
{
$o .= pack('C', ($sp >> (8 * (1-$i))) & 255);
}
for (my $i = 0; $i < 4; $i++)
{
$o .= pack('C', (0 >> (8 * (3-$i))) & 255);
}
print fhash $o;
}
}
sub num_sort
{
return ($a + 0) <=> ($b + 0);
}
sub move_sort
{
return $a->[1] <=> $b->[1];
}
close(fhash);
sub zob
{
my ($pos) = @_;
my $z = 0;
for (my $row = 0; $row < 8; $row++)
{
for (my $col = 0; $col < 8; $col++)
{
my $p = $pos->get_piece_at($row, $col);
my $pc = $ph{$p}[0];
my $pt = $ph{$p}[1];
next if (($pc eq '') && ($pt eq ''));
my $kop = $kind_of_piece{$pc}{$pt};
my $offset_piece = 64 * $kop + 8 * $row + $col;
$z ^= $ah{$offset_piece};
}
}
if ($pos->can_castle(128,0)) {
$z ^= $ah{768};
}
if ($pos->can_castle(128,1)) {
$z ^= $ah{769};
}
if ($pos->can_castle(0,0)) {
$z ^= $ah{770};
}
if ($pos->can_castle(0,1)) {
$z ^= $ah{771};
}
if ($pos->{enpa} ne '0')
{
my $col = $rh{lc(substr($pos->ep_square(), 0, 1))};
if ($pos->to_move() eq '0')
{
if ((($col > 0) && ($pos->get_piece_at(3, $col-1) eq '1')) || (($col < 7) && ($pos->get_piece_at(3, $col+1) eq '1')))
{
$z ^= $ah{772 + $col};
}
} else {
if ((($col > 0) && ($pos->get_piece_at(4, $col-1) eq '129')) || (($col < 7) && ($pos->get_piece_at(4, $col+1) eq '129')))
{
$z ^= $ah{772 + $col};
}
}
}
if ($pos->to_move() eq '128') {
$z ^= $ah{780};
}
return $z;
}
Compress the data
This needs to be done in c#. The code for the project used to tokenize the opening book is available here. It includes the data files generated by the above process in case you want to use the same games and parameters for building the book.