4 # this test needs 25GB of free space for the database
5 # you need pwgen package to fill path and filename tables
8 # filename=1 ./fill_table.pl &
9 # path=1 ./fill_table.pl &
10 # file=1 ./fill_table.pl &
12 # You can add more files with
13 # start=400 end=600 file=1 ./fill_table.pl
18 # you can run optimize with
19 # filename=1 optimize=1 ./fill_table.pl
21 # you can get mysql profiling info with
22 # profile=1 ./fill_table.pl
25 # ./fill_table.pl > /dev/null
27 # to use something else than mysql on regress
28 # dbi="DBI:Pg:database=regress" ./fill_table.pl
31 my $dbi = $ENV{dbi} || "DBI:mysql:database=regress";
32 my $table = ($dbi =~ /mysql/i)?'TABLE':'';
33 my $dbh = DBI->connect($dbi, "", "");
34 die "Can't connect to your database:\n$DBI::errstr\n"
37 $dbh->{FetchHashKeyName} = 'NAME_lc';
43 print "Fill Filename table\n";
44 open(FILE, "pwgen -1 11 12000000|") or die "Please, install pwgen";
45 my $sth = $dbh->prepare("INSERT INTO Filename (Name) VALUES (?),(?),(?),(?),(?),(?),(?),(?),(?),(?)");
55 if (scalar(@val) == 10) {
56 $sth->execute(@val) or die;
61 print "Inserting to Filename ", 12000000/(time - $t), "f/sec\n";
62 $dbh->do("OPTIMIZE TABLE Filename") if $ENV{optimize};
63 $dbh->do("ANALYZE $table Filename");
69 print "Fill Path table\n";
70 open(DIR, "pwgen -1 8 4000000|") or die "Please, install pwgen";
71 my $sth = $dbh->prepare("INSERT INTO Path (Path) VALUES (?),(?),(?),(?),(?),(?),(?),(?),(?),(?)");
81 if (scalar(@val) == 10) {
82 $sth->execute(@val) or die "@val DBI::errstr";
87 print "Inserting to Path ", 4000000/(time - $t), "p/sec\n";
88 $dbh->do("OPTIMIZE TABLE Path") if $ENV{optimize};
89 $dbh->do("ANALYZE $table Path");
93 # jobs 10, 20, 30, 40..400 are full of 2M files
94 # jobs 1..5 are incr of 200,000 files
95 # jobs 6..9 are diff of 500,000 files
96 my $jobid=$ENV{start} || 1;
97 my $end = $ENV{end} || 400;
100 while ($jobid < $end) {
101 if (($jobid % 10) == 0) {
104 } elsif (($jobid % 10) < 6) {
115 INSERT INTO Job (Job, Name, JobFiles, Type, Level, JobStatus, JobTDate)
116 VALUES ('AJob$jobid', 'Ajob$jobid', $nb*10, 'B', '$type', 'T', '$t')");
117 my $sth = $dbh->prepare("INSERT INTO File (JobId, FileIndex, PathId, FilenameId,LStat, MD5) VALUES ($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat','')");
120 for my $nb (1..$nb) {
121 $fid = ($fid + 1111) % 12000000;
122 $pid = ($pid + 11) % 3000000;
123 $sth->execute($pid, $fid,
133 ) or die DBI::errstr;
135 print "JobId=$jobid files=", $nb*10, "\n";
137 print "Insert takes ", scalar(time) - $t,
138 "secs for ", $nb*10, " records\n";
142 $dbh->do("OPTIMIZE TABLE File,Job") if $ENV{optimize};
143 $dbh->do("ANALYZE $table Job");
144 $dbh->do("ANALYZE $table File");
148 if (!($ENV{file} or $ENV{path} or $ENV{filename}) or $ENV{testsql})
150 do_bench("13,23", 400000);
151 do_bench("17,37", 1000000);
152 do_bench("10,13", 2200000);
153 do_bench("20,27", 2500000);
154 do_bench("50,60", 4000000);
155 do_bench("70,80,82,92,103", 4600000);
156 do_bench("30,40,17,23,34", 4900000);
157 do_bench("110,120,130", 6000000);
158 print_mysql_profiles() if $ENV{profile};
163 my ($jobid, $nb) = @_;
165 if ($dbi =~ /mysql/i) {
166 $dbh->do("set profiling=1") if $ENV{profile};
167 foreach my $i (1..4) {
168 print "Doing Accurate query...\n";
170 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
171 SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5
173 SELECT FileId, Job.JobId AS JobId, FileIndex, File.PathId AS PathId,
174 File.FilenameId AS FilenameId, LStat, MD5
176 ( SELECT MAX(JobTDate) AS JobTDate, PathId, FilenameId
178 ( SELECT JobTDate, PathId, FilenameId
179 FROM File JOIN Job USING (JobId) WHERE File.JobId IN ($jobid)
181 SELECT JobTDate, PathId, FilenameId
182 FROM BaseFiles JOIN File USING (FileId) JOIN Job ON (BaseJobId = Job.JobId)
183 WHERE BaseFiles.JobId IN ($jobid)
184 ) AS tmp GROUP BY PathId, FilenameId
186 WHERE (Job.JobId IN ( SELECT DISTINCT BaseJobId FROM BaseFiles WHERE JobId IN ($jobid)) OR Job.JobId IN ($jobid))
187 AND T1.JobTDate = Job.JobTDate
188 AND Job.JobId = File.JobId
189 AND T1.PathId = File.PathId
190 AND T1.FilenameId = File.FilenameId
192 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
193 JOIN Path ON (Path.PathId = Temp.PathId)
194 WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC
195 ) as a") or die "Can't do query $DBI::errstr";
197 print "Big select takes ", scalar(time) - $begin, "secs for $ret->[0] records\n";
199 "new|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
200 print "Doing Accurate query...\n";
202 foreach my $i (1..4) {
204 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
205 SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5
207 SELECT FileId, Job.JobId AS JobId, FileIndex, File.PathId AS PathId,
208 File.FilenameId AS FilenameId, LStat, MD5
210 ( SELECT MAX(JobTDate) AS JobTDate, PathId, FilenameId
211 FROM File JOIN Job USING (JobId) WHERE File.JobId IN ($jobid)
212 GROUP BY PathId, FilenameId
214 WHERE Job.JobId IN ($jobid)
215 AND T1.JobTDate = Job.JobTDate
216 AND Job.JobId = File.JobId
217 AND T1.PathId = File.PathId
218 AND T1.FilenameId = File.FilenameId
220 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
221 JOIN Path ON (Path.PathId = Temp.PathId)
222 WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC
223 ) as a") or die "Can't do query $DBI::errstr";
225 print "Big select without base takes ", scalar(time) - $begin, "secs for $ret->[0] records\n";
227 "base|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
230 foreach my $i (1..4) {
231 print "Doing Old Accurate query...\n";
233 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
234 SELECT Path.Path, Filename.Name, File.FileIndex, File.JobId, File.LStat
236 SELECT max(FileId) as FileId, PathId, FilenameId
237 FROM (SELECT FileId, PathId, FilenameId FROM File WHERE JobId IN ($jobid)) AS F
238 GROUP BY PathId, FilenameId
240 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
241 JOIN Path ON (Path.PathId = Temp.PathId)
242 JOIN File ON (File.FileId = Temp.FileId)
243 WHERE File.FileIndex > 0 ORDER BY JobId, FileIndex ASC
244 ) as a") or die "Can't do query $DBI::errstr";
246 print "Big select takes ", scalar(time) - $begin,
247 "secs for $ret->[0] records\n";
249 "old|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
253 foreach my $i (1..4) {
254 print "Doing Accurate query...\n";
256 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
257 SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5
259 SELECT DISTINCT ON (FilenameId, PathId) StartTime, JobId, FileId,
260 FileIndex, PathId, FilenameId, LStat, MD5
261 FROM File JOIN Job USING (JobId)
262 WHERE JobId IN ($jobid)
263 ORDER BY FilenameId, PathId, StartTime DESC
265 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
266 JOIN Path ON (Path.PathId = Temp.PathId)
267 WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC
268 ) as a") or die "Can't do query $DBI::errstr";
270 print "Big select takes ", scalar(time) - $begin,
271 "secs for $ret->[0] records\n";
273 "new|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
279 sub print_mysql_profiles
281 if ($dbi =~ /mysql/i) {
282 my $all = $dbh->selectall_arrayref("show profiles");
284 print FP Data::Dumper::Dumper($all);
286 for(my $i=0; $i < (scalar(@$all)); $i++) {
287 my $r = $dbh->selectall_arrayref("show profile cpu for query $i");
288 foreach my $j (@$r) {
289 print FP sprintf("%6i | %20s | %8s | %3i | %3i\n", $i, @$j);