@@ -69,7 +69,6 @@ class Database:
6969
7070 def connect (self , connector_impl = mysql .connector ):
7171 """Establish a connection to the database."""
72-
7372 u , p = secrets .db .epi
7473 self ._connector_impl = connector_impl
7574 self ._connection = self ._connector_impl .connect (
@@ -247,6 +246,104 @@ def insert_or_update_batch(self, cc_rows, batch_size=2**20, commit_partial=False
247246 self ._cursor .execute (drop_tmp_table_sql )
248247 return total
249248
249+ def delete_batch (self , cc_deletions ):
250+ """
251+ Remove rows specified by a csv file or list of tuples.
252+
253+ If cc_deletions is a filename, the file should include a header row and use the following field order:
254+ - geo_id
255+ - value (ignored)
256+ - stderr (ignored)
257+ - sample_size (ignored)
258+ - issue (YYYYMMDD format)
259+ - time_value (YYYYMMDD format)
260+ - geo_type
261+ - signal
262+ - source
263+
264+ If cc_deletions is a list of tuples, the tuples should use the following field order (=same as above, plus time_type):
265+ - geo_id
266+ - value (ignored)
267+ - stderr (ignored)
268+ - sample_size (ignored)
269+ - issue (YYYYMMDD format)
270+ - time_value (YYYYMMDD format)
271+ - geo_type
272+ - signal
273+ - source
274+ - time_type
275+ """
276+ tmp_table_name = "tmp_delete_table"
277+ create_tmp_table_sql = f'''
278+ CREATE OR REPLACE TABLE { tmp_table_name } LIKE covidcast;
279+ '''
280+
281+ amend_tmp_table_sql = f'''
282+ ALTER TABLE { tmp_table_name } ADD COLUMN covidcast_id bigint unsigned;
283+ '''
284+
285+ load_tmp_table_infile_sql = f'''
286+ LOAD DATA INFILE "{ cc_deletions } "
287+ INTO TABLE { tmp_table_name }
288+ FIELDS TERMINATED BY ","
289+ IGNORE 1 LINES
290+ (`geo_value`, `value`, `stderr`, `sample_size`, `issue`, `time_value`, `geo_type`, `signal`, `source`)
291+ SET time_type="day";
292+ '''
293+
294+ load_tmp_table_insert_sql = f'''
295+ INSERT INTO { tmp_table_name }
296+ (`geo_value`, `value`, `stderr`, `sample_size`, `issue`, `time_value`, `geo_type`, `signal`, `source`, `time_type`,
297+ `value_updated_timestamp`, `direction_updated_timestamp`, `lag`, `direction`, `is_latest_issue`)
298+ VALUES
299+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
300+ 0, 0, 0, 0, 0)
301+ '''
302+
303+ add_id_sql = f'''
304+ UPDATE { tmp_table_name } d INNER JOIN covidcast c USING
305+ (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `issue`)
306+ SET d.covidcast_id=c.id, d.is_latest_issue=c.is_latest_issue;
307+ '''
308+
309+ delete_sql = f'''
310+ DELETE c FROM { tmp_table_name } d INNER JOIN covidcast c WHERE d.covidcast_id=c.id;
311+ '''
312+
313+ fix_latest_issue_sql = f'''
314+ UPDATE
315+ (SELECT `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, MAX(`issue`) AS `issue`
316+ FROM
317+ (SELECT DISTINCT `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`
318+ FROM { tmp_table_name } WHERE `is_latest_issue`=1) AS was_latest
319+ LEFT JOIN covidcast c
320+ USING (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`)
321+ GROUP BY `source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`
322+ ) AS TMP
323+ LEFT JOIN `covidcast`
324+ USING (`source`, `signal`, `time_type`, `geo_type`, `time_value`, `geo_value`, `issue`)
325+ SET `covidcast`.`is_latest_issue`=1;
326+ '''
327+
328+ drop_tmp_table_sql = f'DROP TABLE { tmp_table_name } '
329+ try :
330+ self ._cursor .execute (create_tmp_table_sql )
331+ self ._cursor .execute (amend_tmp_table_sql )
332+ if isinstance (cc_deletions , str ):
333+ self ._cursor .execute (load_tmp_table_infile_sql )
334+ elif isinstance (cc_deletions , list ):
335+ self ._cursor .executemany (load_tmp_table_insert_sql , cc_deletions )
336+ else :
337+ raise Exception (f"Bad deletions argument: need a filename or a list of tuples; got a { type (cc_deletions )} " )
338+ self ._cursor .execute (add_id_sql )
339+ self ._cursor .execute (delete_sql )
340+ self ._cursor .execute (fix_latest_issue_sql )
341+ self ._connection .commit ()
342+ except Exception as e :
343+ raise e
344+ finally :
345+ self ._cursor .execute (drop_tmp_table_sql )
346+
250347 def compute_covidcast_meta (self , table_name = 'covidcast' , use_index = True ):
251348 """Compute and return metadata on all non-WIP COVIDcast signals."""
252349 logger = get_structured_logger ("compute_covidcast_meta" )
0 commit comments