Skip to content

Commit 84019c3

Browse files
committed
⚡️ Make 1000 rows chunks for tables
1 parent 1e75341 commit 84019c3

File tree

3 files changed

+11
-6
lines changed

3 files changed

+11
-6
lines changed

ingestors/support/table.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class TableSupport(EncodingSupport, TempFileSupport):
1818
def emit_row_dicts(self, table, rows, headers=None):
1919
csv_path = self.make_work_file(table.id)
2020
row_count = 0
21+
fragment_rows = []
2122
with open(csv_path, "w", encoding=self.DEFAULT_ENCODING) as fp:
2223
csv_writer = csv.writer(fp, dialect="unix")
2324
for row in rows:
@@ -28,11 +29,15 @@ def emit_row_dicts(self, table, rows, headers=None):
2829
if length == 0:
2930
continue
3031
csv_writer.writerow(values)
31-
self.manager.emit_text_fragment(table, values, row_count)
32+
fragment_rows.append(",".join([v or "" for v in values]))
3233
row_count += 1
3334
if row_count > 0 and row_count % 1000 == 0:
3435
log.info("Table emit [%s]: %s...", table, row_count)
36+
fragment_rows = []
37+
self.manager.emit_text_fragment(table, fragment_rows, row_count)
3538
if row_count > 0:
39+
if len(fragment_rows):
40+
self.manager.emit_text_fragment(table, fragment_rows, row_count)
3641
csv_hash = self.manager.store(csv_path, mime_type=CSV)
3742
table.set("csvHash", csv_hash)
3843
table.set("rowCount", row_count + 1)

tests/test_dbf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
from .support import TestCase
2+
from tests.support import TestCase
33

44

55
class DBFIngestorTest(TestCase):
@@ -13,5 +13,5 @@ def test_simple_dbf(self):
1313
self.assertEqual(entity.schema.name, "Table")
1414
self.assertTrue(entity.has("csvHash"))
1515
self.assertEqual(int(entity.first("rowCount")), 9)
16-
self.assertIn("Azad Kashmir", table.get("indexText"))
17-
self.assertIn("Pakistan", table.get("indexText"))
16+
self.assertIn("Azad Kashmir", "".join(table.get("indexText")))
17+
self.assertIn("Pakistan", "".join(table.get("indexText")))

tests/test_tabular.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
2-
from .support import TestCase
32
from ingestors.exc import ENCRYPTED_MSG
3+
from tests.support import TestCase
44

55

66
class TabularIngestorTest(TestCase):
@@ -16,7 +16,7 @@ def test_simple_xlsx(self):
1616
table = [t for t in tables if "1" in t.first("title")][0]
1717
self.assertTrue(table.has("csvHash"))
1818
self.assertEqual(int(table.first("rowCount")), 3)
19-
self.assertIn("Mihai Viteazul", table.get("indexText"))
19+
self.assertIn("Mihai Viteazul", "".join(table.get("indexText")))
2020

2121
def test_unicode_xls(self):
2222
fixture_path, entity = self.fixture("rom.xls")

0 commit comments

Comments
 (0)