SQL Puzzle
Dear lazyweb,
I want to mine a code repository for data to map past bugs to sourcecode files.
I have written a small PHP script (the initial version of the script can be found here) to import the relevant data from a Subversion repository into the following tables of a relational database:
I want to mine a code repository for data to map past bugs to sourcecode files.
I have written a small PHP script (the initial version of the script can be found here) to import the relevant data from a Subversion repository into the following tables of a relational database:
What I need now is two queries to ask the database forbugs changes paths -------- -------- ------- bug_id path_id <--> path_id revision <--> revision path
- paths that are most commonly changed during bugfix commits and
- paths that are commonly changed together
03/12/2007 at 21:16 Permalink
SELECT path, COUNT(*) AS cnt
FROM paths
JOIN changes USING (path_id)
JOIN bugs USING (revision)
GROUP BY revision
ORDER BY cnt DESC
Paths that are commonly changed together:
SELECT p1.path AS path1, p2.path AS path2, COUNT(*) AS cnt
FROM paths AS p1
JOIN changes AS c1 ON (p1.path_id = c1.path_id)
JOIN changes AS c2 ON (c1.revision = c2.revision AND c1.path_id < c2.path_id)
JOIN paths AS p2 ON (p2.path_id = c2.path_id)
GROUP BY path1, path2
ORDER BY cnt DESC
Reply
04/12/2007 at 09:01 Permalink
Reply
03/12/2007 at 21:34 Permalink
-- paths that are most commonly changed
-- during bugfix commits
select path, count(*)
from paths
group by path
order by 2 desc
;
-- paths that are commonly changed together
select p1.path, p2.path, count(*)
from changes c1
join changes c2 on c1.revision = c2.revision
and c1.path_id != c2.path_id
join paths p1 on c1.path_id = p1.path_id
join paths p2 on c2.path_id = p2.path_id
group by p1.path, p2.path
order by 3 desc
;
Reply
03/12/2007 at 21:47 Permalink
Reply
04/12/2007 at 09:07 Permalink
Reply
04/12/2007 at 19:07 Permalink
Reply
04/12/2007 at 19:17 Permalink
Reply
03/12/2007 at 23:42 Permalink
SELECT tmp.bug_count, p.*
FROM (
SELECT c.path_id, COUNT(*) AS bug_count
FROM changes c
JOIN bugs b USING (revision)
GROUP BY c.path_id
ORDER BY bug_count DESC
LIMIT 10
) AS tmp
JOIN paths p USING (path_id)
SELECT cnt, p1.path AS path1, p2.path AS path2
FROM (
SELECT c1.path_id AS id1, c2.path_id AS id2, COUNT(*) AS cnt
FROM changes c1
JOIN changes c2 ON c2.revision = c1.revision
AND c2.path_id > c1.path_id
GROUP BY c1.path_id, c2.path_id
ORDER BY cnt DESC
LIMIT 10
) AS tmp
JOIN paths p1 ON p1.path_id = tmp.path_id
JOIN paths p2 ON p2.path_id = tmp.path_id
Reply