Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6d71a5b
Work in progress - Initial support for SIMD in the java module.
samyron Jul 4, 2025
55aee21
Updated the vectorized scanner to more closely match the C implmeenta…
samyron Jul 5, 2025
8e42c0f
Cleanups.
samyron Jul 5, 2025
700826b
WIP
samyron Jul 7, 2025
51264df
Skip the vectorized code if it cannot be compiled.
samyron Jul 7, 2025
53a5a88
Added a system property to enable vectorized scanning and fix a bug a…
samyron Jul 7, 2025
3e89dd7
Cleanups.
samyron Jul 7, 2025
22d2c76
Add a BasicScanner that doesn't use a lookup table.
samyron Jul 10, 2025
4895a35
Fixed a bug parsing UTF8 encoded strings. Additionally some refactoring.
samyron Jul 15, 2025
15c7187
Merge branch 'master' into sm/java-vector-simd
samyron Jul 15, 2025
3222610
Move the vectorized escape scanning around the escape logic to allow …
samyron Jul 28, 2025
ecdb48f
Merge branch 'master' into sm/java-vector-simd
samyron Sep 2, 2025
0f6f9eb
Updates.
samyron Sep 5, 2025
477b947
Removed unnecessary imports.
samyron Sep 5, 2025
ba28520
More cleanups.
samyron Sep 5, 2025
d75173a
Removed unecessary changes from the SWARBasicStringEncoder.
samyron Sep 5, 2025
e421b61
Removed commented out code.
samyron Sep 9, 2025
4f39225
Merge branch 'master' into sm/java-vector-simd
samyron Sep 19, 2025
d6da4c7
Merge branch 'master' into sm/java-vector-simd
samyron Sep 22, 2025
de544e0
fix the ByteBuffer in the VectorizedStringEncoder. Also commented out…
samyron Sep 22, 2025
3990e3a
fix ptrBytes offsets in vectorized code.
samyron Sep 22, 2025
883ea5a
Merge branch 'master' into sm/java-vector-simd
headius Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ end
JAVA_DIR = "java/src/json/ext"
JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl"
JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java"
JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"]
JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"].exclude("#{JAVA_DIR}/Vectorized*.java")
JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/Vectorized*.java"]
JAVA_CLASSES = []
JRUBY_PARSER_JAR = File.expand_path("lib/json/ext/parser.jar")
JRUBY_GENERATOR_JAR = File.expand_path("lib/json/ext/generator.jar")
Expand Down Expand Up @@ -142,8 +143,8 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'

JRUBY_JAR = File.join(CONFIG["libdir"], "jruby.jar")
if File.exist?(JRUBY_JAR)
classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator
JAVA_SOURCES.each do |src|
classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator
obj = src.sub(/\.java\Z/, '.class')
file obj => src do
if File.exist?(File.join(ENV['JAVA_HOME'], "lib", "modules"))
Expand All @@ -154,6 +155,20 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
end
JAVA_CLASSES << obj
end

JAVA_VEC_SOURCES.each do |src|
obj = src.sub(/\.java\Z/, '.class')
file obj => src do
sh 'javac', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '--release', '16', src do |success, status|
if success
puts "*** 'jdk.incubator.vector' support enabled ***"
else
puts "*** 'jdk.incubator.vector' support disabled ***"
end
end
end
JAVA_CLASSES << obj
end
else
warn "WARNING: Cannot find jruby in path => Cannot build jruby extension!"
end
Expand Down Expand Up @@ -199,11 +214,13 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
generator_classes = FileList[
"json/ext/*ByteList*.class",
"json/ext/OptionsReader*.class",
"json/ext/EscapeScanner*.class",
"json/ext/Generator*.class",
"json/ext/RuntimeInfo*.class",
"json/ext/*StringEncoder*.class",
"json/ext/Utils*.class"
]
puts "Creating generator jar with classes: #{generator_classes.join(', ')}"
sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes
mv File.basename(JRUBY_GENERATOR_JAR), File.dirname(JRUBY_GENERATOR_JAR)
end
Expand Down
4 changes: 2 additions & 2 deletions java/src/json/ext/SWARBasicStringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void encode(ByteList src) throws IOException {
}
}

private boolean skipChunk(long x) {
boolean skipChunk(long x) {
long is_ascii = 0x8080808080808080L & ~x;
long xor2 = x ^ 0x0202020202020202L;
long lt32_or_eq34 = xor2 - 0x2121212121212121L;
Expand All @@ -80,7 +80,7 @@ private boolean skipChunk(long x) {
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
}

private boolean skipChunk(int x) {
boolean skipChunk(int x) {
int is_ascii = 0x80808080 & ~x;
int xor2 = x ^ 0x02020202;
int lt32_or_eq34 = xor2 - 0x21212121;
Expand Down
39 changes: 38 additions & 1 deletion java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.nio.charset.StandardCharsets;

import org.jcodings.Encoding;
Expand Down Expand Up @@ -114,11 +116,39 @@ class StringEncoder extends ByteListTranscoder {

protected final byte[] escapeTable;

private static final String VECTORIZED_STRING_ENCODER_CLASS = "json.ext.VectorizedStringEncoder";
private static final String USE_VECTORIZED_BASIC_ENCODER_PROP = "jruby.json.useVectorizedBasicEncoder";
private static final String USE_VECTORIZED_BASIC_ENCODER_DEFAULT = "false";
private static final boolean USE_VECTORIZED_BASIC_ENCODER;
private static final StringEncoder VECTORIZED_SCANNER;

private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder";
private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
private static final boolean USE_BASIC_SWAR_ENCODER;

static {
String enableVectorizedScanner = System.getProperty(USE_VECTORIZED_BASIC_ENCODER_PROP, USE_VECTORIZED_BASIC_ENCODER_DEFAULT);
if ("true".equalsIgnoreCase(enableVectorizedScanner) || "1".equalsIgnoreCase(enableVectorizedScanner)) {
StringEncoder scanner;
try {
Class<?> vectorizedStringEncoderClass = StringEncoder.class.getClassLoader().loadClass(VECTORIZED_STRING_ENCODER_CLASS);
Constructor<?> vectorizedStringEncoderConstructor = vectorizedStringEncoderClass.getDeclaredConstructor();
scanner = (StringEncoder) vectorizedStringEncoderConstructor.newInstance();
// System.out.println(scanner.getClass().getName() + " loaded successfully.");
} catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
// Fallback to the StringEncoder if we cannot load the VectorizedStringEncoder.
// System.err.println("Failed to load VectorizedStringEncoder, falling back to StringEncoder:");
// e.printStackTrace();
scanner = null;
}
VECTORIZED_SCANNER = scanner;
USE_VECTORIZED_BASIC_ENCODER = scanner != null;
} else {
System.err.println("VectorizedStringEncoder disabled.");
VECTORIZED_SCANNER = null;
USE_VECTORIZED_BASIC_ENCODER = false;
}

USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
// XXX Is there a logger we can use here?
Expand Down Expand Up @@ -149,8 +179,15 @@ class StringEncoder extends ByteListTranscoder {
this.escapeTable = escapeTable;
}

@Override
public StringEncoder clone() {
return new StringEncoder(escapeTable);
}

static StringEncoder createBasicEncoder() {
if (USE_BASIC_SWAR_ENCODER) {
if (USE_VECTORIZED_BASIC_ENCODER) {
return (StringEncoder) VECTORIZED_SCANNER.clone();
} else if (USE_BASIC_SWAR_ENCODER) {
return new SWARBasicStringEncoder();
} else {
return new StringEncoder(false);
Expand Down
104 changes: 104 additions & 0 deletions java/src/json/ext/VectorizedStringEncoder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package json.ext;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.jruby.util.ByteList;

import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;

class VectorizedStringEncoder extends SWARBasicStringEncoder {
private static final VectorSpecies<Byte> SP = ByteVector.SPECIES_PREFERRED;
private static final ByteVector ZERO = ByteVector.zero(SP);
private static final ByteVector TWO = ByteVector.broadcast(SP, 2);
private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33);
private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\');

@Override
public StringEncoder clone() {
return new VectorizedStringEncoder();
}

@Override
void encode(ByteList src) throws IOException {
byte[] ptrBytes = src.unsafeBytes();
int ptr = src.begin();
int len = src.realSize();
int beg = 0;
int pos = ptr;

while ((pos + SP.length() <= len)) {
ByteVector chunk = ByteVector.fromArray(SP, ptrBytes, ptr + pos);
// bytes are signed in java, so we need to remove negative values
VectorMask<Byte> negative = chunk.lt(ZERO);
VectorMask<Byte> tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, TWO).lt(THIRTY_THREE).andNot(negative);
VectorMask<Byte> needsEscape = chunk.eq(BACKSLASH).or(tooLowOrDblQuote);
if (needsEscape.anyTrue()) {
int chunkStart = pos;
long mask = needsEscape.toLong();

while(mask > 0) {
// nextMatch inlined
int index = Long.numberOfTrailingZeros(mask);
mask &= (mask - 1);
pos = chunkStart + index;
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);

beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, aux, HEX);
}

// Skip over any remaining characters in the current chunk
pos = chunkStart + SP.length();
continue;
}

pos += SP.length();
}

ByteBuffer bb = ByteBuffer.wrap(ptrBytes, ptr, len);
if (pos + 8 <= len) {
long x = bb.getLong(ptr + pos);
if (skipChunk(x)) {
pos += 8;
} else {
int chunkEnd = ptr + pos + 8;
while (pos < chunkEnd) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, aux, HEX);
} else {
pos++;
}
}
}
}

if (pos + 4 <= len) {
int x = bb.getInt(ptr + pos);
if (skipChunk(x)) {
pos += 4;
}
}

while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, aux, HEX);
} else {
pos++;
}
}

if (beg < len) {
append(ptrBytes, ptr + beg, len - beg);
}
}
}
4 changes: 4 additions & 0 deletions test/json/json_encoding_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ def test_generate_shared_string
assert_equal '"234567890"', JSON.dump(s[2..-1])
s = '01234567890123456789"a"b"c"d"e"f"g"h'
assert_equal '"\"a\"b\"c\"d\"e\"f\"g\""', JSON.dump(s[20, 15])
s = "0123456789001234567890012345678900123456789001234567890"
assert_equal '"23456789001234567890012345678900123456789001234567890"', JSON.dump(s[2..-1])
s = "0123456789001234567890012345678900123456789001234567890"
assert_equal '"567890012345678900123456789001234567890012345678"', JSON.dump(s[5..-3])
end

def test_unicode
Expand Down