Skip to content

Commit 9481067

Browse files
committed
finally support encoding for csv output
1 parent 1dfcf81 commit 9481067

File tree

2 files changed

+65
-16
lines changed

2 files changed

+65
-16
lines changed

pyexcel_io/fileformat/_csv.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,36 @@ def __next__(self):
5353
return next(self.reader).encode('utf-8')
5454

5555

56+
class UnicodeWriter:
57+
"""
58+
A CSV writer which will write rows to CSV file "f",
59+
which is encoded in the given encoding.
60+
"""
61+
62+
def __init__(self, f, encoding="utf-8", **kwds):
63+
# Redirect output to a queue
64+
self.queue = StringIO()
65+
self.writer = csv.writer(self.queue, **kwds)
66+
self.stream = f
67+
self.encoder = codecs.getincrementalencoder(encoding)()
68+
69+
def writerow(self, row):
70+
self.writer.writerow([text_type(s).encode("utf-8") for s in row])
71+
# Fetch UTF-8 output from the queue ...
72+
data = self.queue.getvalue()
73+
data = data.decode("utf-8")
74+
# ... and reencode it into the target encoding
75+
data = self.encoder.encode(data)
76+
# write to the target stream
77+
self.stream.write(data)
78+
# empty queue
79+
self.queue.truncate(0)
80+
81+
def writerows(self, rows):
82+
for row in rows:
83+
self.writerow(row)
84+
85+
5686
class CSVSheetReader(SheetReader):
5787
def __init__(self, sheet, encoding="utf-8",
5888
auto_detect_float=True, auto_detect_int=True,
@@ -149,12 +179,7 @@ def write_row(self, array):
149179
"""
150180
write a row into the file
151181
"""
152-
if PY2:
153-
self.writer.writerow(
154-
[text_type(s if s is not None else '').encode(self.encoding)
155-
for s in array])
156-
else:
157-
self.writer.writerow(array)
182+
self.writer.writerow(array)
158183

159184

160185
class CSVFileWriter(CSVSheetWriter):
@@ -175,9 +200,12 @@ def set_sheet_name(self, name):
175200
file_name = self.native_book
176201
if PY2:
177202
self.f = open(file_name, "wb")
203+
self.writer = UnicodeWriter(self.f, encoding=self.encoding,
204+
**self.keywords)
178205
else:
179-
self.f = open(file_name, "w", newline="")
180-
self.writer = csv.writer(self.f, **self.keywords)
206+
self.f = open(file_name, "w", newline="",
207+
encoding=self.encoding)
208+
self.writer = csv.writer(self.f, **self.keywords)
181209

182210

183211
class CSVMemoryWriter(CSVSheetWriter):
@@ -194,8 +222,13 @@ def __init__(self, filename, name,
194222
self.line_terminator))
195223

196224
def set_sheet_name(self, name):
197-
self.f = self.native_book
198-
self.writer = csv.writer(self.f, **self.keywords)
225+
if PY2:
226+
self.f = self.native_book
227+
self.writer = UnicodeWriter(self.f, encoding=self.encoding,
228+
**self.keywords)
229+
else:
230+
self.f = self.native_book
231+
self.writer = csv.writer(self.f, **self.keywords)
199232

200233
def close(self):
201234
if self.single_sheet_in_book:

tests/test_csv_book.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
CSVFileWriter,
1515
CSVMemoryWriter
1616
)
17-
from pyexcel_io._compact import BytesIO, PY2
17+
from pyexcel_io._compact import BytesIO, PY2, StringIO
1818

1919

2020
class TestReaders(TestCase):
@@ -154,15 +154,18 @@ def test_utf16_encoding():
154154
test_file = "test-utf16-encoding.csv"
155155
writer = CSVFileWriter(
156156
test_file, None,
157-
encoding="utf-16")
157+
encoding="utf-16", lineterminator="\n")
158158
writer.write_array(content)
159159
writer.close()
160-
with open(test_file, "r") as f:
161-
actual = f.read()
162-
print(actual)
160+
with open(test_file, "rb") as f:
161+
actual = f.read().decode('utf-16')
162+
if PY2:
163+
actual = actual.encode('utf-8')
164+
eq_(actual, 'Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n')
165+
os.unlink(test_file)
163166

164167

165-
def test_utf16_memory_encoding():
168+
def test_utf16_memory_decoding():
166169
test_content = u'Äkkilähdöt,Matkakirjoituksia,Matkatoimistot'
167170
test_content = BytesIO(test_content.encode('utf-16'))
168171
reader = CSVinMemoryReader(
@@ -173,3 +176,16 @@ def test_utf16_memory_encoding():
173176
content[0] = [s.encode('utf-8') for s in content[0]]
174177
expected = [['Äkkilähdöt', 'Matkakirjoituksia', 'Matkatoimistot']]
175178
eq_(content, expected)
179+
180+
181+
def test_utf16_memory_encoding():
182+
content = [[u'Äkkilähdöt', u'Matkakirjoituksia', u'Matkatoimistot']]
183+
io = StringIO()
184+
writer = CSVMemoryWriter(
185+
io, None, lineterminator="\n", single_sheet_in_book=True,
186+
encoding="utf-16")
187+
writer.write_array(content)
188+
actual = io.getvalue()
189+
if PY2:
190+
actual = actual.decode('utf-16')
191+
eq_(actual, u'Äkkilähdöt,Matkakirjoituksia,Matkatoimistot\n')

0 commit comments

Comments
 (0)