Skip to content

Commit

Permalink
Add new can_archive column and functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Hydriz committed Jul 11, 2015
1 parent 80006d2 commit 44aaa07
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 27 deletions.
43 changes: 37 additions & 6 deletions balchivist/sqldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,27 @@ def execute(self, query, params=()):
else:
return result

def getAllDumps(self, wikidb, progress="all"):
def getAllDumps(self, wikidb, progress="all", can_archive="all"):
"""
This function is used to get all dumps of a specific wiki.
- progress (string): Dumps with this progress will be returned, "all"
for all progress statuses.
- can_archive (string): Dumps with this can_archive status will be
returned, "all" for all can_archive statuses.
Returns: Dict with all dumps of a wiki.
"""
dumps = []
conds = ""
if progress == "all":
conds = ""
pass
else:
conds = ' AND progress="%s"' % (progress)
conds += ' AND progress="%s"' % (progress)
if can_archive == "all":
pass
else:
conds += ' AND can_archive="%s"' % (can_archive)
query = [
'SELECT', 'dumpdate',
'FROM', self.dbtable,
Expand Down Expand Up @@ -122,6 +129,30 @@ def claimItem(self, params):
except:
return False

def markCanArchive(self, params):
"""
This function is used to update the status of whether a dump can be
archived.
- params (dict): Information about the item with the keys "type",
"subject" and "dumpdate".
Returns: True if update is successful, False if an error occurred.
"""
conv = BALConverter()
arcdate = conv.getDateFromWiki(params['dumpdate'], archivedate=True)
conds = (params['type'], params['subject'], arcdate)
query = [
'UPDATE', self.dbtable,
'SET', 'can_archive=1',
'WHERE', 'type=%s AND subject=%s AND dumpdate=%s;'
]
try:
self.execute(' '.join(query), conds)
return True
except:
return False

def markArchived(self, params):
"""
This function is used to mark an item as archived after doing so.
Expand Down Expand Up @@ -253,9 +284,9 @@ def addNewItem(self, params):
)
query = [
'INSERT INTO', self.dbtable,
'(type, subject, dumpdate, progress, claimed_by, is_archived,',
'is_checked, comments)',
'VALUES', '(%s, %s, %s, %s, NULL, 0, 0, NULL);'
'(type, subject, dumpdate, progress, claimed_by, can_archive,',
'is_archived, is_checked, comments)',
'VALUES', '(%s, %s, %s, %s, NULL, 0, 0, 0, NULL);'
]
try:
self.execute(' '.join(query), conds)
Expand Down
72 changes: 51 additions & 21 deletions dumps.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,31 +188,18 @@ def getAllDumps(self, wikidb):
dumps.append(i.group('dump'))
return sorted(dumps)

def archive(self, wikidb, dumpdate, dumpdir=None, resume=False):
def checkDumpDir(self, dumpdir, wikidb, dumpdate):
"""
This function is for doing the actual archiving process.
This function is used to check if the given dump directory is complete.
- dumpdir (string): The path to the dump directory.
- wikidb (string): The wiki database to check.
- dumpdate (string): The date of the dump in %Y%m%d format.
- dumpdir (string): The path to the dump directory.
- resume (boolean): Whether or not to resume archiving an item.
Returns: True if process is successful, False if otherwise.
Returns: True if dump directory is complete, False if otherwise.
"""
converter = balchivist.BALConverter()
wikiname = converter.getNameFromDB(wikidb)
sitename = converter.getNameFromDB(wikidb, pretext=True)
langname = converter.getNameFromDB(wikidb, format='language')
project = converter.getNameFromDB(wikidb, format='project')
datename = converter.getDateFromWiki(dumpdate)
arcdate = converter.getDateFromWiki(dumpdate, archivedate=True)

if dumpdir is None:
dumps = "%s/%s/%s" % (self.config.get('dumpdir'), wikidb, dumpdate)
else:
dumps = dumpdir
if os.path.exists(dumps):
files = os.listdir(dumps)
if os.path.exists(dumpdir):
files = os.listdir(dumpdir)
else:
# The dump directory does not exist.
# Exit the rest of the function and leave it to another day.
Expand All @@ -234,6 +221,35 @@ def archive(self, wikidb, dumpdate, dumpdir=None, resume=False):
else:
pass
return False

def archive(self, wikidb, dumpdate, dumpdir=None, resume=False):
"""
This function is for doing the actual archiving process.
- wikidb (string): The wiki database to check.
- dumpdate (string): The date of the dump in %Y%m%d format.
- dumpdir (string): The path to the dump directory.
- resume (boolean): Whether or not to resume archiving an item.
Returns: True if process is successful, False if otherwise.
"""
converter = balchivist.BALConverter()
wikiname = converter.getNameFromDB(wikidb)
sitename = converter.getNameFromDB(wikidb, pretext=True)
langname = converter.getNameFromDB(wikidb, format='language')
project = converter.getNameFromDB(wikidb, format='project')
datename = converter.getDateFromWiki(dumpdate)
arcdate = converter.getDateFromWiki(dumpdate, archivedate=True)

if dumpdir is None:
dumps = "%s/%s/%s" % (self.config.get('dumpdir'), wikidb, dumpdate)
else:
dumps = dumpdir
if self.checkDumpDir(dumps, wikidb, dumpdate):
pass
else:
# The dump directory is not suitable to be used, exit the function
return False
count = 0
iaitem = balchivist.BALArchiver('%s-%s' % (wikidb, dumpdate))
if resume:
Expand Down Expand Up @@ -342,10 +358,11 @@ def update(self):
for private in privatedb:
alldb.remove(private)
for db in alldb:
# First check if all new dumps are registered
dumps = self.getAllDumps(db)
stored = self.sqldb.getAllDumps(db)
inprogress = self.sqldb.getAllDumps(db, progress="progress")
cannotarc = self.sqldb.getAllDumps(db, can_archive=0)
# Step 1: Check if all new dumps are registered
for dump in dumps:
if dump in stored:
self.printv("Dump of %s on %s already in the database, "
Expand All @@ -362,7 +379,7 @@ def update(self):
'progress': progress
}
self.sqldb.addNewItem(params=params)
# Then we check if the status of dumps in progress have changed
# Step 2: Check if the status of dumps in progress have changed
for dump in inprogress:
progress = self.getDumpProgress(db, dump)
if progress != 'progress':
Expand All @@ -376,6 +393,19 @@ def update(self):
self.sqldb.updateProgress(params=params)
else:
continue
# Step 3: Check if the dump is available for archiving
for dump in cannotarc:
dumpdir = "%s/%s/%s" % (self.config.get('dumpdir'), db, dump)
if self.checkDumpDir(dumps, wikidb, dumpdate):
# The dump is now suitable to be archived
params = {
'type': 'main',
'subject': db,
'dumpdate': arcdate
}
self.sqldb.markCanArchive(params=params)
else:
continue
return True

if __name__ == '__main__':
Expand Down
1 change: 1 addition & 0 deletions dumps.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CREATE TABLE archive (
dumpdate DATE,
progress VARCHAR(255),
claimed_by VARCHAR(255),
can_archive INT,
is_archived INT,
is_checked INT,
comments VARCHAR(255)
Expand Down

0 comments on commit 44aaa07

Please sign in to comment.