Skip to content

Commit 8c520aa

Browse files
committed
fix alignment in readSourceFileToEndAlloc
Previously, it would return an align(2) slice as provided by toOwnedSliceSentinel, which was problematic as the callee owns the memory and would mistakingly free it as an align(1) slice. The array list was originally changed to be align(2) to avoid reallocation when converting UTF16LE to UTF8, however this changes it back to align(1) and instead adds a function to std.unicode to convert unaligned codepoints.
1 parent 14bb533 commit 8c520aa

File tree

2 files changed

+22
-12
lines changed

2 files changed

+22
-12
lines changed

lib/std/unicode.zig

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ pub const Utf16LeIterator = struct {
477477
bytes: []const u8,
478478
i: usize,
479479

480-
pub fn init(s: []const u16) Utf16LeIterator {
480+
pub fn init(s: []align(1) const u16) Utf16LeIterator {
481481
return Utf16LeIterator{
482482
.bytes = mem.sliceAsBytes(s),
483483
.i = 0,
@@ -917,7 +917,8 @@ test fmtUtf8 {
917917

918918
fn utf16LeToUtf8ArrayListImpl(
919919
result: *std.ArrayList(u8),
920-
utf16le: []const u16,
920+
comptime alignment: std.mem.Alignment,
921+
utf16le: []align(alignment.toByteUnits()) const u16,
921922
comptime surrogates: Surrogates,
922923
) (switch (surrogates) {
923924
.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
@@ -969,7 +970,7 @@ pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;
969970

970971
pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
971972
try result.ensureUnusedCapacity(utf16le.len);
972-
return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);
973+
return utf16LeToUtf8ArrayListImpl(result, .of(u16), utf16le, .cannot_encode_surrogate_half);
973974
}
974975

975976
/// Caller must free returned memory.
@@ -978,17 +979,26 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L
978979
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
979980
errdefer result.deinit();
980981

981-
try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
982+
try utf16LeToUtf8ArrayListImpl(&result, .of(u16), utf16le, .cannot_encode_surrogate_half);
982983
return result.toOwnedSlice();
983984
}
984985

985986
/// Caller must free returned memory.
986987
pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {
988+
return alignedUtf16LeToUtf8AllocZ(allocator, .of(u16), utf16le);
989+
}
990+
991+
/// Caller must free returned memory.
992+
pub fn alignedUtf16LeToUtf8AllocZ(
993+
allocator: mem.Allocator,
994+
comptime alignment: mem.Alignment,
995+
utf16le: []align(alignment.toByteUnits()) const u16,
996+
) Utf16LeToUtf8AllocError![:0]u8 {
987997
// optimistically guess that it will all be ascii (and allocate space for the null terminator)
988998
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
989999
errdefer result.deinit();
9901000

991-
try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
1001+
try utf16LeToUtf8ArrayListImpl(&result, alignment, utf16le, .cannot_encode_surrogate_half);
9921002
return result.toOwnedSliceSentinel(0);
9931003
}
9941004

@@ -1752,7 +1762,7 @@ pub const Wtf8Iterator = struct {
17521762

17531763
pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
17541764
try result.ensureUnusedCapacity(utf16le.len);
1755-
return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half);
1765+
return utf16LeToUtf8ArrayListImpl(result, .of(u16), utf16le, .can_encode_surrogate_half);
17561766
}
17571767

17581768
/// Caller must free returned memory.
@@ -1761,7 +1771,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Al
17611771
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len);
17621772
errdefer result.deinit();
17631773

1764-
try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
1774+
try utf16LeToUtf8ArrayListImpl(&result, .of(u16), wtf16le, .can_encode_surrogate_half);
17651775
return result.toOwnedSlice();
17661776
}
17671777

@@ -1771,7 +1781,7 @@ pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.A
17711781
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1);
17721782
errdefer result.deinit();
17731783

1774-
try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
1784+
try utf16LeToUtf8ArrayListImpl(&result, .of(u16), wtf16le, .can_encode_surrogate_half);
17751785
return result.toOwnedSliceSentinel(0);
17761786
}
17771787

@@ -1979,7 +1989,7 @@ pub const Wtf16LeIterator = struct {
19791989
bytes: []const u8,
19801990
i: usize,
19811991

1982-
pub fn init(s: []const u16) Wtf16LeIterator {
1992+
pub fn init(s: []align(1) const u16) Wtf16LeIterator {
19831993
return Wtf16LeIterator{
19841994
.bytes = mem.sliceAsBytes(s),
19851995
.i = 0,

lib/std/zig.zig

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ test isUnderscore {
534534
}
535535

536536
pub fn readSourceFileToEndAlloc(gpa: Allocator, file_reader: *std.fs.File.Reader) ![:0]u8 {
537-
var buffer: std.ArrayListAlignedUnmanaged(u8, .@"2") = .empty;
537+
var buffer: std.ArrayListUnmanaged(u8) = .empty;
538538
defer buffer.deinit(gpa);
539539

540540
if (file_reader.getSize()) |size| {
@@ -543,7 +543,7 @@ pub fn readSourceFileToEndAlloc(gpa: Allocator, file_reader: *std.fs.File.Reader
543543
try buffer.ensureTotalCapacityPrecise(gpa, casted_size + 1);
544544
} else |_| {}
545545

546-
try file_reader.interface.appendRemaining(gpa, .@"2", &buffer, .limited(max_src_size));
546+
try file_reader.interface.appendRemaining(gpa, null, &buffer, .limited(max_src_size));
547547

548548
// Detect unsupported file types with their Byte Order Mark
549549
const unsupported_boms = [_][]const u8{
@@ -560,7 +560,7 @@ pub fn readSourceFileToEndAlloc(gpa: Allocator, file_reader: *std.fs.File.Reader
560560
// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
561561
if (std.mem.startsWith(u8, buffer.items, "\xff\xfe")) {
562562
if (buffer.items.len % 2 != 0) return error.InvalidEncoding;
563-
return std.unicode.utf16LeToUtf8AllocZ(gpa, @ptrCast(buffer.items)) catch |err| switch (err) {
563+
return std.unicode.alignedUtf16LeToUtf8AllocZ(gpa, .@"1", @ptrCast(buffer.items)) catch |err| switch (err) {
564564
error.DanglingSurrogateHalf => error.UnsupportedEncoding,
565565
error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
566566
error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,

0 commit comments

Comments
 (0)