4 # this test needs 25GB of free space for the database
5 # you need pwgen package to fill path and filename tables
8 # filename=1 ./fill_table.pl &
9 # path=1 ./fill_table.pl &
10 # file=1 ./fill_table.pl &
12 # You can add more files with
13 # start=400 end=600 file=1 ./fill_table.pl
18 # you can run optimize with
19 # filename=1 optimize=1 ./fill_table.pl
21 # you can get mysql profiling info with
22 # profile=1 ./fill_table.pl
25 # ./fill_table.pl > /dev/null
27 # to use something else than mysql on regress
28 # dbi="DBI:Pg:database=regress" ./fill_table.pl
31 my $dbi = $ENV{dbi} || "DBI:mysql:database=regress";
32 my $table = ($dbi =~ /mysql/i)?'TABLE':'';
33 my $dbh = DBI->connect($dbi, "", "");
34 die "Can't connect to your database:\n$DBI::errstr\n"
37 $dbh->{FetchHashKeyName} = 'NAME_lc';
43 print "Fill Filename table\n";
44 open(FILE, "pwgen -1 11 12000000|") or die "Please, install pwgen";
45 my $sth = $dbh->prepare("INSERT INTO Filename (Name) VALUES (?),(?),(?),(?),(?),(?),(?),(?),(?),(?)");
55 if (scalar(@val) == 10) {
56 $sth->execute(@val) or die;
61 print "Inserting to Filename ", 12000000/(time - $t), "f/sec\n";
62 $dbh->do("OPTIMIZE TABLE Filename") if $ENV{optimize};
63 $dbh->do("ANALYZE $table Filename");
69 print "Fill Path table\n";
70 open(DIR, "pwgen -1 8 4000000|") or die "Please, install pwgen";
71 my $sth = $dbh->prepare("INSERT INTO Path (Path) VALUES (?),(?),(?),(?),(?),(?),(?),(?),(?),(?)");
81 if (scalar(@val) == 10) {
82 $sth->execute(@val) or die "@val DBI::errstr";
87 print "Inserting to Path ", 4000000/(time - $t), "p/sec\n";
88 $dbh->do("OPTIMIZE TABLE Path") if $ENV{optimize};
89 $dbh->do("ANALYZE $table Path");
93 # jobs 10, 20, 30, 40..400 are full of 2M files
94 # jobs 1..5 are incr of 200,000 files
95 # jobs 6..9 are diff of 500,000 files
96 my $jobid=$ENV{start} || 1;
97 my $end = $ENV{end} || 400;
100 while ($jobid < $end) {
101 if (($jobid % 10) == 0) {
104 } elsif (($jobid % 10) < 6) {
115 INSERT INTO Job (Job, Name, JobFiles, Type, Level, JobStatus, JobTDate)
116 VALUES ('AJob$jobid', 'Ajob$jobid', $nb*10, 'B', '$type', 'T', '$t')");
117 my $sth = $dbh->prepare("INSERT INTO File (JobId, FileIndex, PathId, FilenameId,LStat, MD5) VALUES ($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''),($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat', ''), ($jobid,1,?,?,'lstat','')");
120 for my $nb (1..$nb) {
121 $fid = ($fid + 1111) % 12000000;
122 $pid = ($pid + 11) % 3000000;
123 $sth->execute($pid, $fid,
133 ) or die DBI::errstr;
135 print "JobId=$jobid files=$nb\n";
137 print "Insert takes ", scalar(time) - $t,
138 "secs for ", $nb*10, " records\n";
142 $dbh->do("OPTIMIZE TABLE File,Job") if $ENV{optimize};
143 $dbh->do("ANALYZE $table Job");
144 $dbh->do("ANALYZE $table File");
148 if (!($ENV{file} or $ENV{path} or $ENV{filename}) or $ENV{testsql})
150 do_bench("13,23", 400000);
151 do_bench("17,37", 1000000);
152 do_bench("10,13", 2200000);
153 do_bench("20,27", 2500000);
154 do_bench("50,60", 4000000);
155 do_bench("70,80,82,92,103", 4600000);
156 do_bench("30,40,17,23,34", 4900000);
157 do_bench("110,120,130", 6000000);
158 print_mysql_profiles() if $ENV{profile};
163 my ($jobid, $nb) = @_;
165 if ($dbi =~ /mysql/i) {
166 $dbh->do("set profiling=1") if $ENV{profile};
167 foreach my $i (1..4) {
168 print "Doing Accurate query...\n";
170 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
171 SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5
173 SELECT FileId, Job.JobId AS JobId, FileIndex, File.PathId AS PathId,
174 File.FilenameId AS FilenameId, LStat, MD5
176 ( SELECT MAX(JobTDate) AS JobTDate, PathId, FilenameId
178 ( SELECT JobTDate, PathId, FilenameId
179 FROM File JOIN Job USING (JobId) WHERE File.JobId IN ($jobid)
181 SELECT JobTDate, PathId, FilenameId
182 FROM BaseFiles JOIN File USING (FileId) JOIN Job ON (BaseJobId = Job.JobId)
183 WHERE BaseFiles.JobId IN ($jobid)
184 ) AS tmp GROUP BY PathId, FilenameId
186 WHERE (Job.JobId IN ( SELECT DISTINCT BaseJobId FROM BaseFiles WHERE JobId IN ($jobid)) OR Job.JobId IN ($jobid))
187 AND T1.JobTDate = Job.JobTDate
188 AND Job.JobId = File.JobId
189 AND T1.PathId = File.PathId
190 AND T1.FilenameId = File.FilenameId
192 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
193 JOIN Path ON (Path.PathId = Temp.PathId)
194 WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC
195 ) as a") or die "Can't do query $DBI::errstr";
197 print "Big select takes ", scalar(time) - $begin, "secs for $ret->[0] records\n";
199 "new|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
202 foreach my $i (1..4) {
203 print "Doing Old Accurate query...\n";
205 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
206 SELECT Path.Path, Filename.Name, File.FileIndex, File.JobId, File.LStat
208 SELECT max(FileId) as FileId, PathId, FilenameId
209 FROM (SELECT FileId, PathId, FilenameId FROM File WHERE JobId IN ($jobid)) AS F
210 GROUP BY PathId, FilenameId
212 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
213 JOIN Path ON (Path.PathId = Temp.PathId)
214 JOIN File ON (File.FileId = Temp.FileId)
215 WHERE File.FileIndex > 0 ORDER BY JobId, FileIndex ASC
216 ) as a") or die "Can't do query $DBI::errstr";
218 print "Big select takes ", scalar(time) - $begin,
219 "secs for $ret->[0] records\n";
221 "old|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
225 foreach my $i (1..4) {
226 print "Doing Accurate query...\n";
228 my $ret = $dbh->selectrow_arrayref("SELECT count(1) from (
229 SELECT Path.Path, Filename.Name, Temp.FileIndex, Temp.JobId, LStat, MD5
231 SELECT DISTINCT ON (FilenameId, PathId) StartTime, JobId, FileId,
232 FileIndex, PathId, FilenameId, LStat, MD5
233 FROM File JOIN Job USING (JobId)
234 WHERE JobId IN ($jobid)
235 ORDER BY FilenameId, PathId, StartTime DESC
237 JOIN Filename ON (Filename.FilenameId = Temp.FilenameId)
238 JOIN Path ON (Path.PathId = Temp.PathId)
239 WHERE FileIndex > 0 ORDER BY Temp.JobId, FileIndex ASC
240 ) as a") or die "Can't do query $DBI::errstr";
242 print "Big select takes ", scalar(time) - $begin,
243 "secs for $ret->[0] records\n";
245 "new|$ret->[0]|$nb|", (scalar(time) - $begin),"\n";
251 sub print_mysql_profiles
253 if ($dbi =~ /mysql/i) {
254 my $all = $dbh->selectall_arrayref("show profiles");
256 print FP Data::Dumper::Dumper($all);
258 for(my $i=0; $i < (scalar(@$all)); $i++) {
259 my $r = $dbh->selectall_arrayref("show profile cpu for query $i");
260 foreach my $j (@$r) {
261 print FP sprintf("%6i | %20s | %8s | %3i | %3i\n", $i, @$j);