Follow up: Vacuum Full progress

Ok, to follow up my recent post I created a small example of how the progress indication could be done. The example is very simple and is only a proof of concept, nothing more. There is no support for multiple file nodes, only relation and index is considered, etc.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import psycopg2
import os
import time
from threading import Thread
        

class Vacuum(Thread):
    def __init__(self, dsn, name):
        super(Vacuum, self).__init__()
        self.connection = psycopg2.connect(dsn)
        self.connection.set_isolation_level(0)
        self.name = name
    def run(self):
        cursor = self.connection.cursor()
        cursor.execute('vacuum full %s' % (self.name))
            
            
class Progress(Thread):
    def __init__(self, dsn, name, pgdata):
        super(Progress, self).__init__()
        self.connection = psycopg2.connect(dsn)
        self.table = name
        self.objects = {}
        cursor = self.connection.cursor()
        cursor.execute("select oid from pg_database where datname=current_database();")   
        db_oid = cursor.fetchone()[0]
        self.path = '%s/base/%s' % (pgdata, str(db_oid))
        cursor.execute("select oid from pg_class where relname='%s'" % (name))
        oid = cursor.fetchone()[0]
        epoch = os.path.getmtime(self.path + '/' + str(oid))
        orig = time.strftime('%H:%M', time.localtime(epoch))
        self.objects[name] = {'filenode': oid, 'epoch': epoch,    'original_time': orig}
        index_query = "select c2.relname, c2.relfilenode FROM pg_catalog.pg_class c, pg_catalog.  pg_class c2, pg_catalog.pg_index i WHERE c.oid=%s::integer AND c.oid = i.indrelid AND i.indexrelid = c2.oid;"
        cursor.execute(index_query % (oid))
        for row in cursor.fetchall():
            name, oid = row[0], row[1]
            epoch = os.path.getmtime(self.path + '/' + str(oid))
            orig = time.strftime('%H:%M', time.localtime(epoch))
            self.objects[name] = {'filenode': oid, 'epoch':       epoch, 'original_time': orig}

    def _check(self):
        done = 0
        for name, attrs in self.objects.items():
            epoch = os.path.getmtime(self.path + '/' + str(attrs['filenode']))
            if epoch > attrs['epoch']:
                self.objects[name]['done_epoch'] = epoch
                done += 1
        return done

    def run(self):
        done = 0
        print "done: %s of %s" % (done, len(self.      objects))
        cursor = self.connection.cursor()
        while len(self.objects) > done:
            tmp, done = done, self._check()
            if tmp != done:
                print "done: %s of %s" % (done,        len(self.objects))
            time.sleep(1)



if __name__ == '__main__':
    import sys
    dsn = 'dbname=testdb user=testuser password=secret host=127.0.0.1'
    vacuum = Vacuum(dsn, 'db_category')
    progress = Progress(dsn, 'db_category', '/path/to/my/pgdata')
    vacuum.start()
    progress.start()
    vacuum.join()
    progress.join()

# vi: set fileencoding=utf-8 :

To do something serious about this the implementation would have to understand which objects have file nodes, support multiple file nodes, time each run and try to estimate progress, etc.

I would also need to understand exactly how VACUUM FULL behaves, when files are written for example.


Kommentarer
Postat av: dbr

lol

2009-02-04 @ 14:01:48

Kommentera inlägget här:

Namn:
Kom ihåg mig?

E-postadress: (publiceras ej)

URL/Bloggadress:

Kommentar:

Trackback
RSS 2.0