From: Eric Bollengier Date: Wed, 7 Apr 2010 08:52:53 +0000 (+0200) Subject: Update bench procedure X-Git-Tag: Release-5.2.1~126 X-Git-Url: https://git.sur5r.net/?p=bacula%2Fdocs;a=commitdiff_plain;h=cf55541bc34a70068a3ea7c07a01a846c3148274 Update bench procedure --- diff --git a/docs/techlogs/accurate-test.ods b/docs/techlogs/accurate-test.ods index 57498101..86439c28 100644 Binary files a/docs/techlogs/accurate-test.ods and b/docs/techlogs/accurate-test.ods differ diff --git a/docs/techlogs/accurate-test.pl b/docs/techlogs/accurate-test.pl new file mode 100755 index 00000000..61ee1fe4 --- /dev/null +++ b/docs/techlogs/accurate-test.pl @@ -0,0 +1,268 @@ +#!/usr/bin/perl -w +use strict; + +# this test needs 25GB of free space for the database +# you need pwgen package to fill path and filename tables + +# start with: +# filename=1 ./fill_table.pl & +# path=1 ./fill_table.pl & +# file=1 ./fill_table.pl & + +# You can add more files with +# start=400 end=600 file=1 ./fill_table.pl + +# then test with +# ./fill_table.pl + +# you can run optimize with +# filename=1 optimize=1 ./fill_table.pl + +# you can get mysql profiling info with +# profile=1 ./fill_table.pl + +# to get values +# ./fill_table.pl > /dev/null + +# to use something else than mysql on regress +# dbi="DBI:Pg:database=regress" ./fill_table.pl + +use DBI; +my $dbi = $ENV{dbi} || "DBI:mysql:database=regress"; +my $table = ($dbi =~ /mysql/i)?'TABLE':''; +my $dbh = DBI->connect($dbi, "", ""); +die "Can't connect to your database:\n$DBI::errstr\n" + unless ($dbh); + +$dbh->{FetchHashKeyName} = 'NAME_lc'; + +if ($ENV{filename}) { + my $t = time; + my $j=0; + $dbh->begin_work(); + print "Fill Filename table\n"; + open(FILE, "pwgen -1 11 12000000|") or die "Please, install pwgen"; + my $sth = $dbh->prepare("INSERT INTO Filename (Name) VALUES (?),(?),(?),(?),(?),(?),(?),(?),(?),(?)"); + while (!eof(FILE)) + { + my @val; + for my $i (1..10) { + my $l = ; + chop($l); + push @val, "$l$j"; + $j++; + } + if (scalar(@val) == 10) { + $sth->execute(@val) or die; + } + } + close(FILE); + $dbh->commit(); + print "Inserting to Filename ", 12000000/(time - $t), "f/sec\n"; + $dbh->do("OPTIMIZE TABLE Filename") if $ENV{optimize}; + $dbh->do("ANALYZE $table Filename"); +} +if ($ENV{path}) { + my $j=0; + my $t = time; + $dbh->begin_work(); + print "Fill Path table\n"; + open(DIR, "pwgen -1 8 4000000|") or die "Please, install pwgen"; + my $sth = $dbh->prepare("INSERT INTO Path (Path) VALUES (?),(?),(?),(?),(?),(?),(?),(?),(?),(?)"); + while (!eof(DIR)) + { + my @val; + for my $i (1..10) { + my $l = ; + chop($l); + push @val, "$l$j"; + $j++; + } + if (scalar(@val) == 10) { + $sth->execute(@val) or die "@val DBI::errstr"; + } + } + close(DIR); + $dbh->commit(); + print "Inserting to Path ", 4000000/(time - $t), "p/sec\n"; + $dbh->do("OPTIMIZE TABLE Path") if $ENV{optimize}; + $dbh->do("ANALYZE $table Path"); +} + +if ($ENV{file}) { + # jobs 10, 20, 30, 40..400 are full of 2M files + # jobs 1..5 are incr of 200,000 files + # jobs 6..9 are diff of 500,000 files + my $jobid=$ENV{start} || 1; + my $end = $ENV{end} || 400; + my $type; + my $nb; + while ($jobid < $end) { + if (($jobid % 10) == 0) { + $type = 'F'; + $nb = 200000; + } elsif (($jobid % 10) < 6) { + $type = 'I'; + $nb = 20000; + } else { + $type = 'F'; + $nb = 50000; + } + + $dbh->begin_work(); + my $t = time; + $dbh->do(" +INSERT INTO Job (Job, Name, JobFiles, Type, Level, JobStatus, JobTDate) + VALUES ('AJob$jobid', 'Ajob$jobid', $nb*10, 'B', '$type', 'T', '$t')"); + my $sth = $dbh->prepare("INSERT INTO File (JobId, FileIndex, PathId, FilenameId,LStat, MD5) VALUES ($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat','')"); + my $fid = $jobid*10; + my $pid = $jobid*10; + for my $nb (1..$nb) { + $fid = ($fid + 1111) % 12000000; + $pid = ($pid + 11) % 3000000; + $sth->execute($pid, $fid, + $pid, $fid+1, + $pid, $fid+2, + $pid, $fid+3, + $pid, $fid+4, + $pid, $fid+5, + $pid, $fid+6, + $pid, $fid+7, + $pid, $fid+8, + $pid, $fid+9 + ) or die DBI::errstr; + } + print "JobId=$jobid files=$nb\n"; + $dbh->commit(); + print "Insert takes ", scalar(time) - $t, + "secs for ", $nb*10, " records\n"; + $jobid++; + } + + $dbh->do("OPTIMIZE TABLE File,Job") if $ENV{optimize}; + $dbh->do("ANALYZE $table Job"); + $dbh->do("ANALYZE $table File"); +} + + +if (!($ENV{file} or $ENV{path} or $ENV{filename}) or $ENV{testsql}) +{ + do_bench("13,23", 400000); + do_bench("17,37", 1000000); + do_bench("10,13", 2200000); + do_bench("20,27", 2500000); + do_bench("50,60", 4000000); + do_bench("70,80,82,92,103", 4600000); + do_bench("30,40,17,23,34", 4900000); + do_bench("110,120,130", 6000000); + print_mysql_profiles() if $ENV{profile}; +} + +sub do_bench +{ + my ($jobid, $nb) = @_; + + if ($dbi =~ /mysql/i) { + $dbh->do("set profiling=1") if $ENV{profile}; + foreach my $i (1..4) { + print "Doing Accurate query...\n"; + my $begin = time; + my $ret = $dbh->selectrow_arrayref("SELECT count(1) from ( +SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5 +FROM ( + SELECT FileId, Job.JobId AS JobId, FileIndex, File.PathId AS PathId, + File.FilenameId AS FilenameId, LStat, MD5 + FROM Job, File, + ( SELECT MAX(JobTDate) AS JobTDate, PathId, FilenameId + FROM + ( SELECT JobTDate, PathId, FilenameId + FROM File JOIN Job USING (JobId) WHERE File.JobId IN ($jobid) + UNION ALL + SELECT JobTDate, PathId, FilenameId + FROM BaseFiles JOIN File USING (FileId) JOIN Job ON (BaseJobId = Job.JobId) + WHERE BaseFiles.JobId IN ($jobid) + ) AS tmp GROUP BY PathId, FilenameId + ) AS T1 + WHERE (Job.JobId IN ( SELECT DISTINCT BaseJobId FROM BaseFiles WHERE JobId IN ($jobid)) OR Job.JobId IN ($jobid)) + AND T1.JobTDate = Job.JobTDate + AND Job.JobId = File.JobId + AND T1.PathId = File.PathId + AND T1.FilenameId = File.FilenameId + ) AS Temp + JOIN Filename ON (Filename.FilenameId = Temp.FilenameId) + JOIN Path ON (Path.PathId = Temp.PathId) +WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC +) as a") or die "Can't do query $DBI::errstr"; + + print "Big select takes ", scalar(time) - $begin, "secs for $ret->[0] records\n"; + print STDERR + "new|$ret->[0]|$nb|", (scalar(time) - $begin),"\n"; + } + if (0) { + foreach my $i (1..4) { + print "Doing Old Accurate query...\n"; + my $begin = time; + my $ret = $dbh->selectrow_arrayref("SELECT count(1) from ( + SELECT Path.Path, Filename.Name, File.FileIndex, File.JobId, File.LStat + FROM ( + SELECT max(FileId) as FileId, PathId, FilenameId + FROM (SELECT FileId, PathId, FilenameId FROM File WHERE JobId IN ($jobid)) AS F + GROUP BY PathId, FilenameId + ) AS Temp + JOIN Filename ON (Filename.FilenameId = Temp.FilenameId) + JOIN Path ON (Path.PathId = Temp.PathId) + JOIN File ON (File.FileId = Temp.FileId) +WHERE File.FileIndex > 0 ORDER BY JobId, FileIndex ASC +) as a") or die "Can't do query $DBI::errstr"; + + print "Big select takes ", scalar(time) - $begin, + "secs for $ret->[0] records\n"; + print STDERR + "old|$ret->[0]|$nb|", (scalar(time) - $begin),"\n"; + } + } + } else { + foreach my $i (1..4) { + print "Doing Accurate query...\n"; + my $begin = time; + my $ret = $dbh->selectrow_arrayref("SELECT count(1) from ( +SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5 +FROM ( + SELECT DISTINCT ON (FilenameId, PathId) StartTime, JobId, FileId, + FileIndex, PathId, FilenameId, LStat, MD5 + FROM File JOIN Job USING (JobId) + WHERE JobId IN ($jobid) + ORDER BY FilenameId, PathId, StartTime DESC +) AS Temp + JOIN Filename ON (Filename.FilenameId = Temp.FilenameId) + JOIN Path ON (Path.PathId = Temp.PathId) +WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC +) as a") or die "Can't do query $DBI::errstr"; + + print "Big select takes ", scalar(time) - $begin, + "secs for $ret->[0] records\n"; + print STDERR + "new|$ret->[0]|$nb|", (scalar(time) - $begin),"\n"; + } + } +} + +use Data::Dumper; +sub print_mysql_profiles +{ + if ($dbi =~ /mysql/i) { + my $all = $dbh->selectall_arrayref("show profiles"); + open(FP, ">>log"); + print FP Data::Dumper::Dumper($all); + + for(my $i=0; $i < (scalar(@$all)); $i++) { + my $r = $dbh->selectall_arrayref("show profile cpu for query $i"); + foreach my $j (@$r) { + print FP sprintf("%6i | %20s | %8s | %3i | %3i\n", $i, @$j); + } + print FP "\n"; + } + + close(FP); + } +}