Skip to content

Commit b56b325

Browse files
authored
Enable internal compression in pmtiles (#811)
* Enable internal compression in pmtiles * Use buffer instead of little endian stream when deserializing headers
1 parent f4b2820 commit b56b325

File tree

3 files changed

+143
-124
lines changed

3 files changed

+143
-124
lines changed

baremaps-core/src/main/java/org/apache/baremaps/tilestore/pmtiles/PMTiles.java

Lines changed: 121 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717

1818
package org.apache.baremaps.tilestore.pmtiles;
1919

20-
import com.google.common.io.LittleEndianDataInputStream;
21-
import com.google.common.io.LittleEndianDataOutputStream;
2220
import com.google.common.math.LongMath;
2321
import java.io.ByteArrayOutputStream;
2422
import java.io.IOException;
23+
import java.io.InputStream;
24+
import java.io.OutputStream;
25+
import java.nio.ByteBuffer;
26+
import java.nio.ByteOrder;
2527
import java.util.ArrayList;
2628
import java.util.List;
2729

@@ -31,72 +33,72 @@ public static long toNum(long low, long high) {
3133
return high * 0x100000000L + low;
3234
}
3335

34-
public static long readVarIntRemainder(LittleEndianDataInputStream input, long l)
36+
public static long readVarIntRemainder(InputStream input, long l)
3537
throws IOException {
3638
long h, b;
37-
b = input.readByte() & 0xff;
39+
b = input.read() & 0xff;
3840
h = (b & 0x70) >> 4;
3941
if (b < 0x80) {
4042
return toNum(l, h);
4143
}
42-
b = input.readByte() & 0xff;
44+
b = input.read() & 0xff;
4345
h |= (b & 0x7f) << 3;
4446
if (b < 0x80) {
4547
return toNum(l, h);
4648
}
47-
b = input.readByte() & 0xff;
49+
b = input.read() & 0xff;
4850
h |= (b & 0x7f) << 10;
4951
if (b < 0x80) {
5052
return toNum(l, h);
5153
}
52-
b = input.readByte() & 0xff;
54+
b = input.read() & 0xff;
5355
h |= (b & 0x7f) << 17;
5456
if (b < 0x80) {
5557
return toNum(l, h);
5658
}
57-
b = input.readByte() & 0xff;
59+
b = input.read() & 0xff;
5860
h |= (b & 0x7f) << 24;
5961
if (b < 0x80) {
6062
return toNum(l, h);
6163
}
62-
b = input.readByte() & 0xff;
64+
b = input.read() & 0xff;
6365
h |= (b & 0x01) << 31;
6466
if (b < 0x80) {
6567
return toNum(l, h);
6668
}
6769
throw new RuntimeException("Expected varint not more than 10 bytes");
6870
}
6971

70-
public static int writeVarInt(LittleEndianDataOutputStream output, long value)
72+
public static int writeVarInt(OutputStream output, long value)
7173
throws IOException {
7274
int n = 1;
7375
while (value >= 0x80) {
74-
output.writeByte((byte) (value | 0x80));
76+
output.write((byte) (value | 0x80));
7577
value >>>= 7;
7678
n++;
7779
}
78-
output.writeByte((byte) value);
80+
output.write((byte) value);
7981
return n;
8082
}
8183

82-
public static long readVarInt(LittleEndianDataInputStream input) throws IOException {
84+
public static long readVarInt(InputStream input) throws IOException {
8385
long val, b;
84-
b = input.readByte() & 0xff;
86+
b = input.read() & 0xff;
8587
val = b & 0x7f;
8688
if (b < 0x80) {
8789
return val;
8890
}
89-
b = input.readByte() & 0xff;
91+
b = input.read() & 0xff;
9092
val |= (b & 0x7f) << 7;
9193
if (b < 0x80) {
9294
return val;
9395
}
94-
b = input.readByte() & 0xff;
96+
b = input.read() & 0xff;
9597
val |= (b & 0x7f) << 14;
9698
if (b < 0x80) {
9799
return val;
98100
}
99-
b = input.readByte() & 0xff;
101+
b = input.read() & 0xff;
100102
val |= (b & 0x7f) << 21;
101103
if (b < 0x80) {
102104
return val;
@@ -179,74 +181,83 @@ public static long[] tileIdToZxy(long i) {
179181

180182
private static final int HEADER_SIZE_BYTES = 127;
181183

182-
public static Header deserializeHeader(LittleEndianDataInputStream input) throws IOException {
183-
input.skipBytes(7);
184+
public static Header deserializeHeader(InputStream input) throws IOException {
185+
byte[] bytes = new byte[HEADER_SIZE_BYTES];
186+
var num = input.read(bytes);
187+
if (num != HEADER_SIZE_BYTES) {
188+
throw new IOException("Invalid header size");
189+
}
190+
var buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN);
191+
buffer.position(7);
184192
return new Header(
185-
input.readByte(),
186-
input.readLong(),
187-
input.readLong(),
188-
input.readLong(),
189-
input.readLong(),
190-
input.readLong(),
191-
input.readLong(),
192-
input.readLong(),
193-
input.readLong(),
194-
input.readLong(),
195-
input.readLong(),
196-
input.readLong(),
197-
input.readByte() == 1,
198-
Compression.values()[input.readByte()],
199-
Compression.values()[input.readByte()],
200-
TileType.values()[input.readByte()],
201-
input.readByte(),
202-
input.readByte(),
203-
(double) input.readInt() / 10000000,
204-
(double) input.readInt() / 10000000,
205-
(double) input.readInt() / 10000000,
206-
(double) input.readInt() / 10000000,
207-
input.readByte(),
208-
(double) input.readInt() / 10000000,
209-
(double) input.readInt() / 10000000);
193+
buffer.get(),
194+
buffer.getLong(),
195+
buffer.getLong(),
196+
buffer.getLong(),
197+
buffer.getLong(),
198+
buffer.getLong(),
199+
buffer.getLong(),
200+
buffer.getLong(),
201+
buffer.getLong(),
202+
buffer.getLong(),
203+
buffer.getLong(),
204+
buffer.getLong(),
205+
buffer.get() == 1,
206+
Compression.values()[buffer.get()],
207+
Compression.values()[buffer.get()],
208+
TileType.values()[buffer.get()],
209+
buffer.get(),
210+
buffer.get(),
211+
(double) buffer.getInt() / 10000000,
212+
(double) buffer.getInt() / 10000000,
213+
(double) buffer.getInt() / 10000000,
214+
(double) buffer.getInt() / 10000000,
215+
buffer.get(),
216+
(double) buffer.getInt() / 10000000,
217+
(double) buffer.getInt() / 10000000);
210218
}
211219

212-
public static void serializeHeader(LittleEndianDataOutputStream output, Header header)
213-
throws IOException {
214-
output.writeByte((byte) 0x50);
215-
output.writeByte((byte) 0x4D);
216-
output.writeByte((byte) 0x54);
217-
output.writeByte((byte) 0x69);
218-
output.writeByte((byte) 0x6C);
219-
output.writeByte((byte) 0x65);
220-
output.writeByte((byte) 0x73);
221-
output.writeByte((byte) header.getSpecVersion());
222-
output.writeLong(header.getRootDirectoryOffset());
223-
output.writeLong(header.getRootDirectoryLength());
224-
output.writeLong(header.getJsonMetadataOffset());
225-
output.writeLong(header.getJsonMetadataLength());
226-
output.writeLong(header.getLeafDirectoryOffset());
227-
output.writeLong(header.getLeafDirectoryLength());
228-
output.writeLong(header.getTileDataOffset());
229-
output.writeLong(header.getTileDataLength());
230-
output.writeLong(header.getNumAddressedTiles());
231-
output.writeLong(header.getNumTileEntries());
232-
output.writeLong(header.getNumTileContents());
233-
output.writeByte((byte) (header.isClustered() ? 1 : 0));
234-
output.writeByte((byte) header.getInternalCompression().ordinal());
235-
output.writeByte((byte) header.getTileCompression().ordinal());
236-
output.writeByte((byte) header.getTileType().ordinal());
237-
output.writeByte((byte) header.getMinZoom());
238-
output.writeByte((byte) header.getMaxZoom());
239-
output.writeInt((int) (header.getMinLon() * 10000000));
240-
output.writeInt((int) (header.getMinLat() * 10000000));
241-
output.writeInt((int) (header.getMaxLon() * 10000000));
242-
output.writeInt((int) (header.getMaxLat() * 10000000));
243-
output.writeByte((byte) header.getCenterZoom());
244-
output.writeInt((int) (header.getCenterLon() * 10000000));
245-
output.writeInt((int) (header.getCenterLat() * 10000000));
220+
public static byte[] serializeHeader(Header header) {
221+
var buffer = ByteBuffer.allocate(HEADER_SIZE_BYTES).order(ByteOrder.LITTLE_ENDIAN);
222+
buffer.put((byte) 0x50);
223+
buffer.put((byte) 0x4D);
224+
buffer.put((byte) 0x54);
225+
buffer.put((byte) 0x69);
226+
buffer.put((byte) 0x6C);
227+
buffer.put((byte) 0x65);
228+
buffer.put((byte) 0x73);
229+
buffer.put((byte) header.getSpecVersion());
230+
buffer.putLong(header.getRootDirectoryOffset());
231+
buffer.putLong(header.getRootDirectoryLength());
232+
buffer.putLong(header.getJsonMetadataOffset());
233+
buffer.putLong(header.getJsonMetadataLength());
234+
buffer.putLong(header.getLeafDirectoryOffset());
235+
buffer.putLong(header.getLeafDirectoryLength());
236+
buffer.putLong(header.getTileDataOffset());
237+
buffer.putLong(header.getTileDataLength());
238+
buffer.putLong(header.getNumAddressedTiles());
239+
buffer.putLong(header.getNumTileEntries());
240+
buffer.putLong(header.getNumTileContents());
241+
buffer.put((byte) (header.isClustered() ? 1 : 0));
242+
buffer.put((byte) header.getInternalCompression().ordinal());
243+
buffer.put((byte) header.getTileCompression().ordinal());
244+
buffer.put((byte) header.getTileType().ordinal());
245+
buffer.put((byte) header.getMinZoom());
246+
buffer.put((byte) header.getMaxZoom());
247+
buffer.putInt((int) (header.getMinLon() * 10000000));
248+
buffer.putInt((int) (header.getMinLat() * 10000000));
249+
buffer.putInt((int) (header.getMaxLon() * 10000000));
250+
buffer.putInt((int) (header.getMaxLat() * 10000000));
251+
buffer.put((byte) header.getCenterZoom());
252+
buffer.putInt((int) (header.getCenterLon() * 10000000));
253+
buffer.putInt((int) (header.getCenterLat() * 10000000));
254+
buffer.flip();
255+
return buffer.array();
246256
}
247257

248-
public static void serializeEntries(LittleEndianDataOutputStream output, List<Entry> entries)
258+
public static void serializeEntries(OutputStream output, List<Entry> entries)
249259
throws IOException {
260+
var buffer = ByteBuffer.allocate(entries.size() * 48);
250261
writeVarInt(output, entries.size());
251262
long lastId = 0;
252263
for (Entry entry : entries) {
@@ -268,9 +279,11 @@ public static void serializeEntries(LittleEndianDataOutputStream output, List<En
268279
writeVarInt(output, entry.getOffset() + 1);
269280
}
270281
}
282+
buffer.flip();
283+
output.write(buffer.array(), 0, buffer.limit());
271284
}
272285

273-
public static List<Entry> deserializeEntries(LittleEndianDataInputStream buffer)
286+
public static List<Entry> deserializeEntries(InputStream buffer)
274287
throws IOException {
275288
long numEntries = readVarInt(buffer);
276289
List<Entry> entries = new ArrayList<>((int) numEntries);
@@ -329,60 +342,62 @@ public static Entry findTile(List<Entry> entries, long tileId) {
329342
return null;
330343
}
331344

332-
public static Directories buildRootLeaves(List<Entry> entries, int leafSize) throws IOException {
345+
public static Directories buildRootLeaves(List<Entry> entries, int leafSize,
346+
Compression compression) throws IOException {
333347
var rootEntries = new ArrayList<Entry>();
334348
var numLeaves = 0;
335349
byte[] leavesBytes;
336350
byte[] rootBytes;
337351

338-
try (var leavesOutput = new ByteArrayOutputStream();
339-
var leavesDataOutput = new LittleEndianDataOutputStream(leavesOutput)) {
352+
try (var leavesOutput = new ByteArrayOutputStream()) {
340353
for (var i = 0; i < entries.size(); i += leafSize) {
341354
numLeaves++;
342355
var end = i + leafSize;
343356
if (i + leafSize > entries.size()) {
344357
end = entries.size();
345358
}
346-
347359
var offset = leavesOutput.size();
348-
serializeEntries(leavesDataOutput, entries.subList(i, end));
349-
var length = leavesOutput.size();
350-
rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0));
360+
try (var leafOutput = new ByteArrayOutputStream()) {
361+
try (var compressedLeafOutput = compression.compress(leafOutput)) {
362+
serializeEntries(compressedLeafOutput, entries.subList(i, end));
363+
}
364+
var length = leafOutput.size();
365+
rootEntries.add(new Entry(entries.get(i).getTileId(), offset, length, 0));
366+
leavesOutput.write(leafOutput.toByteArray());
367+
}
351368
}
352-
353369
leavesBytes = leavesOutput.toByteArray();
354370
}
355371

356-
try (var rootOutput = new ByteArrayOutputStream();
357-
var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
358-
serializeEntries(rootDataOutput, rootEntries);
372+
try (var rootOutput = new ByteArrayOutputStream()) {
373+
try (var compressedRootOutput = compression.compress(rootOutput)) {
374+
serializeEntries(compressedRootOutput, rootEntries);
375+
}
359376
rootBytes = rootOutput.toByteArray();
360377
}
361378

362379
return new Directories(rootBytes, leavesBytes, numLeaves);
363380
}
364381

365-
public static Directories optimizeDirectories(List<Entry> entries, int targetRootLenght)
382+
public static Directories optimizeDirectories(List<Entry> entries, int targetRootLength,
383+
Compression compression)
366384
throws IOException {
367385
if (entries.size() < 16384) {
368-
byte[] rootBytes;
369-
try (var rootOutput = new ByteArrayOutputStream();
370-
var rootDataOutput = new LittleEndianDataOutputStream(rootOutput)) {
371-
serializeEntries(rootDataOutput, entries);
372-
rootBytes = rootOutput.toByteArray();
373-
}
374-
if (rootBytes.length <= targetRootLenght) {
375-
return new Directories(rootBytes, new byte[] {}, 0);
386+
try (var rootOutput = new ByteArrayOutputStream()) {
387+
try (var compressedOutput = compression.compress(rootOutput)) {
388+
serializeEntries(compressedOutput, entries);
389+
}
390+
byte[] rootBytes = rootOutput.toByteArray();
391+
if (rootBytes.length <= targetRootLength) {
392+
return new Directories(rootBytes, new byte[] {}, 0);
393+
}
376394
}
377395
}
378396

379-
double leafSize = (double) entries.size() / 3500;
380-
if (leafSize < 4096) {
381-
leafSize = 4096;
382-
}
397+
double leafSize = Math.max((double) entries.size() / 3500, 4096);
383398
for (;;) {
384-
var directories = buildRootLeaves(entries, (int) leafSize);
385-
if (directories.getRoot().length <= targetRootLenght) {
399+
var directories = buildRootLeaves(entries, (int) leafSize, compression);
400+
if (directories.getRoot().length <= targetRootLength) {
386401
return directories;
387402
}
388403
leafSize = leafSize * 1.2;

0 commit comments

Comments
 (0)