Changeset created on Tue Jan 12 12:03:15 CET 2010 by Seek You Too Description: Improved logging and exception handling for deletions. Exceptions raised while performing a DeleteIds are now raised instead of hiding them. The actions using the DeleteIds component will now only state they are finished if no exceptions where raised. DeleteIds component will now also write log information to the generalHarvestLog and the events log of the repository. This makes the deleteids.log deprecated. Baseline version: meresco-harvester/tags/version_5.2.3.1 diff --unidirectional-new-file --exclude=.svn --exclude='*.pyc' --exclude=applied --recursive --unified meresco-harvester-5.2.3.1/merescoharvester/harvester/deleteids.py meresco-harvester-trunk/merescoharvester/harvester/deleteids.py --- meresco-harvester-5.2.3.1/merescoharvester/harvester/deleteids.py 2010-01-12 11:57:51.000000000 +0100 +++ meresco-harvester-trunk/merescoharvester/harvester/deleteids.py 2010-01-12 10:16:34.000000000 +0100 @@ -11,6 +11,7 @@ # Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. # http://www.kennisnetictopschool.nl # Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2010 Seek You Too (CQ2) http://www.cq2.nl # # This file is part of "Meresco Harvester" # @@ -33,12 +34,14 @@ from virtualuploader import UploaderException from eventlogger import NilEventLogger, EventLogger from harvesterlog import idfilename, HarvesterLog +from eventlogger import CompositeLogger, NilEventLogger from string import strip from cq2utils_old import binderytools from cq2utils_old import wrappers import sys, os from sets import Set from mapping import Upload +from traceback import format_exception def readIds(filename): ids = Set() @@ -61,56 +64,59 @@ class DeleteIds: - def __init__(self, repository, stateDir, logDir): + def __init__(self, repository, stateDir, logDir, generalHarvestLog=NilEventLogger()): self._stateDir = stateDir self._logDir = logDir - self.repository = repository - self.logger = EventLogger(os.path.join(self._logDir, 'deleteids.log')) - self.filename = idfilename(self._stateDir, self.repository.id) - self.markLogger = True + self._repository = repository + self._logger = HarvesterLog(stateDir, logDir, repository.id) + self._eventLogger = CompositeLogger([ + (['*'], EventLogger(os.path.join(self._logDir, 'deleteids.log'))), + (['*'], self._logger.eventLogger()), + (['ERROR', 'INFO', 'WARN'], generalHarvestLog), + ]) + self._filename = idfilename(self._stateDir, self._repository.id) + self._markLogger = True def ids(self): - return readIds(self.filename) + return readIds(self._filename) - def delete(self, trials = 3): - uploader = self.repository.createUploader(self.logger) + def delete(self): + uploader = self._repository.createUploader(self._eventLogger) uploader.start() try: - trials = min(10, max(1, trials)) - for i in range(trials): - remaining = self._delete(uploader) - if not remaining: - break + self._delete(uploader) finally: uploader.stop() def deleteFile(self, filename): - self.filename = filename - self.markLogger = False + self._filename = filename + self._markLogger = False self.delete() def _delete(self, uploader): ids = self.ids() done = Set() - exceptions = [] try: for id in ids: try: anUpload = Upload() anUpload.id = id - anUpload.repository = self.repository + anUpload.repository = self._repository uploader.delete(anUpload) done.add(id) - except UploaderException, e: - exceptions.append((id,e)) + except: + xtype,xval,xtb = sys.exc_info() + errorMessage = '|'.join(map(str.strip,format_exception(xtype,xval,xtb))) + self._eventLogger.error(errorMessage, id=id) + raise return ids - done finally: self._finish(ids - done) def _finish(self, remainingIDs): - writeIds(self.filename, remainingIDs) - if self.markLogger and not remainingIDs: - logger = HarvesterLog(self._stateDir, self._logDir, self.repository.id) + writeIds(self._filename, remainingIDs) + if self._markLogger and not remainingIDs: + logger = HarvesterLog(self._stateDir, self._logDir, self._repository.id) try: logger.markDeleted() finally: diff --unidirectional-new-file --exclude=.svn --exclude='*.pyc' --exclude=applied --recursive --unified meresco-harvester-5.2.3.1/merescoharvester/harvester/repository.py meresco-harvester-trunk/merescoharvester/harvester/repository.py --- meresco-harvester-5.2.3.1/merescoharvester/harvester/repository.py 2010-01-12 11:57:51.000000000 +0100 +++ meresco-harvester-trunk/merescoharvester/harvester/repository.py 2010-01-12 11:50:05.000000000 +0100 @@ -11,6 +11,7 @@ # Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. # http://www.kennisnetictopschool.nl # Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2010 Seek You Too (CQ2) http://www.cq2.nl # # This file is part of "Meresco Harvester" # @@ -36,7 +37,9 @@ from harvester import Harvester, HARVESTED, NOTHING_TO_DO from deleteids import DeleteIds, readIds, writeIds from saharaobject import SaharaObject -import os.path, shutil +from shutil import move +from os.path import isfile, join +from os import remove from eventlogger import NilEventLogger from virtualuploader import UploaderFactory from timeslot import Timeslot @@ -85,21 +88,24 @@ class DeleteIdsAction(Action): def do(self): - d = DeleteIds(self._repository, self._stateDir, self._logDir) + if self._repository.shopClosed(): + return False, 'Not deleting outside timeslots.', False + + d = DeleteIds(self._repository, self._stateDir, self._logDir, generalHarvestLog=self._generalHarvestLog) d.delete() return True, 'Deleted', False class SmoothAction(Action): def __init__(self, repository, stateDir, logDir, generalHarvestLog): Action.__init__(self, repository, stateDir, logDir, generalHarvestLog) - self.filename = os.path.join(self._stateDir, self._repository.key + '.ids') + self.filename = join(self._stateDir, self._repository.key + '.ids') self.oldfilename = self.filename + ".old" def do(self): if self._repository.shopClosed(): return False, 'Not smoothharvesting outside timeslots.', False - if not os.path.isfile(self.oldfilename): + if not isfile(self.oldfilename): result, hasResumptionToken = self._smoothinit(), True else: result, hasResumptionToken = self._harvest() @@ -109,8 +115,8 @@ return result == DONE, 'Smooth reharvest: ' + result, hasResumptionToken def _smoothinit(self): - if os.path.isfile(self.filename): - shutil.move(self.filename, self.oldfilename) + if isfile(self.filename): + move(self.filename, self.oldfilename) else: open(self.oldfilename, 'w').close() open(self.filename, 'w').close() @@ -123,13 +129,15 @@ def _finish(self): deletefilename = self.filename + '.delete' - writeIds(deletefilename, readIds(self.oldfilename) - readIds(self.filename)) + if not isfile(deletefilename): + writeIds(deletefilename, readIds(self.oldfilename) - readIds(self.filename)) self._delete(deletefilename) - os.remove(self.oldfilename) + remove(self.oldfilename) + remove(deletefilename) return DONE def _delete(self, filename): - d = DeleteIds(self._repository, self._stateDir, self._logDir) + d = DeleteIds(self._repository, self._stateDir, self._logDir, generalHarvestLog=self._generalHarvestLog) d.deleteFile(filename) def _harvest(self): diff --unidirectional-new-file --exclude=.svn --exclude='*.pyc' --exclude=applied --recursive --unified meresco-harvester-5.2.3.1/README meresco-harvester-trunk/README --- meresco-harvester-5.2.3.1/README 2010-01-12 11:57:52.000000000 +0100 +++ meresco-harvester-trunk/README 2010-01-12 11:59:54.000000000 +0100 @@ -1,4 +1,4 @@ -"Meresco Harvester" is the OAI-Harvester from the Meresco Suite 2005-2008 +"Meresco Harvester" is the OAI-Harvester from the Meresco Suite 2005-2010 http://www.meresco.org @@ -116,4 +116,4 @@ 3. Further Reading ------------------ For more information take a look at http://meresco.org or at the - documentation in the doc directory. \ No newline at end of file + documentation in the doc directory. diff --unidirectional-new-file --exclude=.svn --exclude='*.pyc' --exclude=applied --recursive --unified meresco-harvester-5.2.3.1/test/deleteidstest.py meresco-harvester-trunk/test/deleteidstest.py --- meresco-harvester-5.2.3.1/test/deleteidstest.py 2010-01-12 11:57:51.000000000 +0100 +++ meresco-harvester-trunk/test/deleteidstest.py 2010-01-11 16:54:45.000000000 +0100 @@ -11,6 +11,7 @@ # Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. # http://www.kennisnetictopschool.nl # Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2010 Seek You Too (CQ2) http://www.cq2.nl # # This file is part of "Meresco Harvester" # @@ -30,7 +31,7 @@ # ## end license ## -import unittest, os, shutil +from cq2utils_old import CallTrace, CQ2TestCase from merescoharvester.harvester import harvesterlog from merescoharvester.harvester.deleteids import DeleteIds, readIds from sets import Set @@ -38,35 +39,36 @@ from merescoharvester.harvester import deleteids from tempfile import mkdtemp from shutil import rmtree +from os.path import join, isfile +from os import makedirs -class DeleteIdsTest(unittest.TestCase): + +class DeleteIdsTest(CQ2TestCase): def setUp(self): - self.stateDir = mkdtemp() - self.logDir = mkdtemp() + CQ2TestCase.setUp(self) + self.stateDir = join(self.tempdir, 'state') + makedirs(self.stateDir) + self.logDir = join(self.tempdir, 'log') - def tearDown(self): - rmtree(self.stateDir) - rmtree(self.logDir) - - def testDeleteWithOneFailure(self): + def testDeleteWithFailure(self): repository = MockRepositoryAndUploader() idfile = file(harvesterlog.idfilename(self.stateDir, repository.id), 'w') - idfile.write('mock:1\nmock:2\n\n\t\nmock:3\nmock:2\nmock:2\n') + idfile.write('mock:1\nmock:2\n\n\t\nmock:raises:server:crash\nmock:2\nmock:2\n') idfile.close() self.createStatsFile(repository) dt = DeleteIds(repository, self.stateDir, self.logDir) - self.assertEquals(Set(['mock:1','mock:2','mock:3']),dt.ids()) - dt.delete(trials=1) - dlogfile = os.path.join(self.logDir,'deleteids.log') - self.assert_(os.path.isfile(dlogfile)) - dlog = open(dlogfile) - s = Set(map(lambda l:l.split('\t')[2],dlog)) - self.assertEquals(Set(['[mock:1]','[mock:2]','[mock:3]']),s) - dlog.close() + self.assertEquals(Set(['mock:1','mock:2','mock:raises:server:crash']),dt.ids()) + try: + dt.delete() + self.fail() + except UploaderException, e: + self.assertTrue('crashed' in str(e)) + dlogfile = join(self.logDir,'deleteids.log') + self.assertTrue(isfile(dlogfile)) + dlog = open(dlogfile).read() + self.assertTrue('[mock:raises:server:crash]' in dlog, dlog) dt = DeleteIds(repository, self.stateDir, self.logDir) - self.assertEquals(Set(['mock:3']),dt.ids()) - logger = harvesterlog.HarvesterLog(self.stateDir, self.logDir, repository.id) - self.assert_(logger.from_) + self.assertTrue('mock:raises:server:crash' in dt.ids(), dt.ids()) def testDelete(self): repository = MockRepositoryAndUploader() @@ -76,7 +78,7 @@ self.createStatsFile(repository) dt = DeleteIds(repository, self.stateDir, self.logDir) self.assertEquals(5, len(dt.ids())) - dt.delete(trials=1) + dt.delete() dt = DeleteIds(repository, self.stateDir, self.logDir) self.assertEquals(0, len(dt.ids())) logger = harvesterlog.HarvesterLog(self.stateDir, self.logDir, repository.id) @@ -99,7 +101,7 @@ def testDeleteOtherFilename(self): repository = MockRepositoryAndUploader() - filename = os.path.join(self.stateDir, 'delete.ids.in.this.file') + filename = join(self.stateDir, 'delete.ids.in.this.file') idfile = file(filename, 'w') idfile.write('mock:5\nmock:6\nmock:7\nmock:8\nmock:9\n') idfile.close() @@ -110,41 +112,23 @@ self.assertEquals(Set(['mock:5','mock:6','mock:7','mock:8','mock:9']),repository.deleted_ids) logger = harvesterlog.HarvesterLog(self.stateDir, self.logDir, repository.id) - #self.assert_(not logger.from_) - def testDeleteWithCtrlC(self): repository = MockRepositoryAndUploader() idfile = file(harvesterlog.idfilename(self.stateDir, repository.id), 'w') - idfile.write('mock:11\nmock:12\n\n\t\nmock:13\nmock:14\nmock:15\n') + idfile.write('mock:b\n\n\t\nmock:raises:system:exit\nmock:14\nmock:15\n') idfile.close() self.createStatsFile(repository) dt = DeleteIds(repository, self.stateDir, self.logDir) - self.assertEquals(5, len(dt.ids())) + self.assertEquals(4, len(dt.ids())) try: - dt.delete(trials=1) + dt.delete() self.fail() except SystemExit, e: pass dt = DeleteIds(repository, self.stateDir, self.logDir) self.assertEquals(3, len(dt.ids())) - def testTrials(self): - repository = MockRepositoryAndUploader() - idfile = file(harvesterlog.idfilename(self.stateDir, repository.id), 'w') - idfile.write('mock:21\nmock:22\nmock:23\nmock:24\nmock:25\n') - idfile.close() - dt = DeleteIds(repository, self.stateDir, self.logDir) - self.assertEquals(5, len(dt.ids())) - dt.delete(trials=1) - dt = DeleteIds(repository, self.stateDir, self.logDir) - self.assertEquals(1, len(dt.ids())) - self.assertEquals(1, repository.deleteMock24Count) - dt.delete() - self.assertEquals(0, len(dt.ids())) - self.assertEquals(3, repository.deleteMock24Count) - - def createStatsFile(self,repository): logger = harvesterlog.HarvesterLog(self.stateDir, self.logDir, repository.id) logger.startRepository('A beautiful name') @@ -170,13 +154,9 @@ id = anUpload.id self.uploads.append(anUpload) self.logger.logLine('UPLOADER','START deleting',id=id) - if id == 'mock:3': + if id == 'mock:raises:server:crash': raise UploaderException(uploadId=id, message='Sorry, but the vm has crashed.') - if id == 'mock:13': + if id == 'mock:raises:system:exit': raise SystemExit() - if id == 'mock:24': - self.deleteMock24Count += 1 - if self.deleteMock24Count < 3: - raise UploaderException(uploadId=id, message='Sorry, but cannot delete mock24') self.deleted_ids.add(id) self.logger.logLine('UPLOADER','END deleting',id=id) diff --unidirectional-new-file --exclude=.svn --exclude='*.pyc' --exclude=applied --recursive --unified meresco-harvester-5.2.3.1/test/smoothactiontest.py meresco-harvester-trunk/test/smoothactiontest.py --- meresco-harvester-5.2.3.1/test/smoothactiontest.py 2010-01-12 11:57:51.000000000 +0100 +++ meresco-harvester-trunk/test/smoothactiontest.py 2010-01-12 10:17:33.000000000 +0100 @@ -11,6 +11,7 @@ # Copyright (C) 2007-2009 Stichting Kennisnet Ict op school. # http://www.kennisnetictopschool.nl # Copyright (C) 2009 Tilburg University http://www.uvt.nl +# Copyright (C) 2010 Seek You Too (CQ2) http://www.cq2.nl # # This file is part of "Meresco Harvester" # @@ -142,7 +143,7 @@ def testSmooth_Delete(self): class MockDelete: usedrep, usedStateDir, usedLogDir, filename = None, None, None, None - def __init__(self, rep, stateDir, logDir): + def __init__(self, rep, stateDir, logDir, **kwargs): MockDelete.usedrep = rep MockDelete.usedStateDir = stateDir MockDelete.usedLogDir = logDir