Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions api/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ plugins {
}

dependencies {
implementation(platform(libs.jackson.bom))
implementation(libs.jackson.annotations)
api(platform(libs.jackson.bom))
api(libs.jackson.annotations)
implementation(libs.jackson.databind)
implementation(libs.jackson2.databind)
testImplementation(project(":docling-testcontainers"))
}
36 changes: 36 additions & 0 deletions api/src/main/java/ai/docling/api/DoclingApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,44 @@
*/
public interface DoclingApi {

/**
* Executes a health check for the API and retrieves the health status of the service.
*
* @return a {@link HealthCheckResponse} object containing the health status of the API.
*/
HealthCheckResponse health();

/**
* Converts the provided document source(s) into a processed document based on the specified options.
*
* @param request the {@link ConvertDocumentRequest} containing the source(s), conversion options, and optional target.
* @return a {@link ConvertDocumentResponse} containing the processed document data, processing details, and any errors.
*/
ConvertDocumentResponse convertSource(ConvertDocumentRequest request);

/**
* Creates and returns a builder instance capable of constructing a duplicate or modified
* version of the current API instance. The builder provides a customizable way to adjust
* configuration or properties before constructing a new API instance.
*
* @return a {@link DoclingApiBuilder} initialized with the state of the current API instance.
*/
<T extends DoclingApi, B extends DoclingApiBuilder<T, B>> DoclingApiBuilder<T, B> toBuilder();

/**
* A builder interface for constructing implementations of {@link DoclingApi}. This interface
* supports a fluent API for setting configuration properties before building an instance.
*
* @param <T> the type of the {@link DoclingApi} implementation being built.
* @param <B> the type of the concrete builder implementation.
*/
interface DoclingApiBuilder<T extends DoclingApi, B extends DoclingApiBuilder<T, B>> {
/**
* Builds and returns an instance of the specified type, representing the completed configuration
* of the builder. The returned instance is typically an implementation of the Docling API.
*
* @return an instance of type {@code T} representing a configured Docling API client.
*/
T build();
}
}
262 changes: 250 additions & 12 deletions api/src/main/java/ai/docling/api/convert/response/DocumentResponse.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,271 @@

import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

import org.jspecify.annotations.Nullable;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;

@JsonInclude(JsonInclude.Include.NON_NULL)
public record DocumentResponse(
import tools.jackson.databind.annotation.JsonDeserialize;
import tools.jackson.databind.annotation.JsonPOJOBuilder;

@JsonProperty("doctags_content") @Nullable String doctagsContent,
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = DocumentResponse.Builder.class)
@com.fasterxml.jackson.databind.annotation.JsonDeserialize(builder = DocumentResponse.Builder.class)
public interface DocumentResponse {
/**
* Retrieves the content of the doc tags, if available.
*
* @return the content of the doc tags, or null if not present
*/
@Nullable
String doctagsContent();

@JsonProperty("filename") String filename,
/**
* Retrieves the filename associated with the document.
*
* @return the filename of the document as a string
*/
String filename();

@JsonProperty("html_content") @Nullable String htmlContent,
/**
* Retrieves the HTML content associated with the document, if available.
*
* @return the HTML content as a string, or null if not present
*/
@Nullable
String htmlContent();

@JsonProperty("json_content") @Nullable Map<String, Object> jsonContent,
/**
* Retrieves the JSON content associated with the document.
*
* @return a map representing the JSON content, or an empty map if no JSON content is present
*/
Map<String, Object> jsonContent();

@JsonProperty("md_content") @Nullable String markdownContent,
/**
* Retrieves the Markdown content associated with the document, if available.
*
* @return the Markdown content as a string, or null if no Markdown content is present
*/
@Nullable
String markdownContent();

@JsonProperty("text_content") @Nullable String textContent
/**
* Retrieves the plain text content associated with the document, if available.
*
* @return the plain text content as a string, or null if no text content is present
*/
@Nullable
String textContent();

) {
/**
* Creates a new {@code Builder} instance initialized with the current state of the {@code DocumentResponse}.
*
* @return a {@code Builder} instance populated with the values from this {@code DocumentResponse}
*/
default Builder toBuilder() {
return new Builder(this);
}

/**
* Creates and returns a new instance of the {@code Builder} class, which can be used to
* construct a {@code DocumentResponse} object in a step-by-step manner.
*
* @return a new {@code Builder} instance
*/
static Builder builder() {
return new Builder();
}

/**
* Default implementation of the {@link DocumentResponse} interface.
* This record represents the response containing document data in various formats.
* It is an immutable data structure that consolidates information related to a document,
* such as its filename, content in multiple formats, and metadata.
*
* Each instance ensures the provided JSON content is unmodifiable by copying
* the input map if it is present, or initializing it to an empty map otherwise.
*/
record DefaultDocumentResponse(String doctagsContent,
String filename,
String htmlContent,
Map<String, Object> jsonContent,
String markdownContent,
String textContent) implements DocumentResponse {

public DocumentResponse {
if (jsonContent != null) {
jsonContent = new HashMap<>(jsonContent);
public DefaultDocumentResponse {
jsonContent = Optional.ofNullable(jsonContent)
.map(Map::copyOf)
.orElseGet(Map::of);
}

public DefaultDocumentResponse(Builder builder) {
this(builder.doctagsContent,
builder.filename,
builder.htmlContent,
builder.jsonContent,
builder.markdownContent,
builder.textContent);
}
}

/**
* A builder class for constructing instances of {@code DocumentResponse}.
*
* This class provides a step-by-step approach to configure and create a
* {@code DocumentResponse} object. Each method in this class sets a specific
* property of the object being built. Once all the desired properties are set,
* the {@code build} method is used to create the final {@code DocumentResponse}
* instance.
*
* The builder supports customization of various document-related attributes,
* including doc tags content, filename, HTML content, JSON content, Markdown
* content, and plain text content.
*
* By default, the builder initializes attributes with an empty state or default
* values. If a {@code DocumentResponse} instance is provided to the constructor,
* the builder is pre-populated with the attributes from the given response.
*
* This class is intended for internal use and is protected to restrict its
* accessibility outside the defining package or class hierarchy.
*/
@JsonPOJOBuilder(withPrefix = "")
@com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder(withPrefix = "")
class Builder {
protected String doctagsContent;
protected String filename;
protected String htmlContent;
protected Map<String, Object> jsonContent = new HashMap<>();
protected String markdownContent;
protected String textContent;

/**
* Constructs a new {@code Builder} instance.
*
* This constructor initializes a builder with default or empty states for all
* attributes. It is protected to restrict direct instantiation outside of the
* defining package or class hierarchy.
*
* The {@code Builder} class is primarily used to facilitate the creation of
* {@code DocumentResponse} objects through a step-by-step configuration process.
*/
protected Builder() {

}

/**
* Constructs a new {@code Builder} instance using the provided {@code DocumentResponse}.
*
* This constructor initializes the builder's fields with the data from the given
* {@code DocumentResponse} object. It allows for the creation of a {@code Builder}
* instance pre-populated with the state of an existing {@code DocumentResponse}.
*
* @param documentResponse the {@code DocumentResponse} instance whose data will
* populate the fields of this builder
*/
protected Builder(DocumentResponse documentResponse) {
this.doctagsContent = documentResponse.doctagsContent();
this.filename = documentResponse.filename();
this.htmlContent = documentResponse.htmlContent();
this.jsonContent = documentResponse.jsonContent();
this.markdownContent = documentResponse.markdownContent();
this.textContent = documentResponse.textContent();
}

/**
* Sets the doctags content for the builder instance.
*
* @param doctagsContent the doctags content to be set
* @return this Builder instance for method chaining
*/
@JsonProperty("doctags_content")
public Builder doctagsContent(String doctagsContent) {
this.doctagsContent = doctagsContent;
return this;
}

/**
* Sets the filename for the builder instance.
*
* @param filename the filename to be set
* @return this Builder instance for method chaining
*/
@JsonProperty("filename")
public Builder filename(String filename) {
this.filename = filename;
return this;
}

/**
* Sets the HTML content for the builder instance.
*
* @param htmlContent the HTML content to be set
* @return this Builder instance for method chaining
*/
@JsonProperty("html_content")
public Builder htmlContent(String htmlContent) {
this.htmlContent = htmlContent;
return this;
}

/**
* Sets the JSON content for the builder instance.
*
* The JSON content is represented as a map of key-value pairs, where the keys
* are {@code String} objects, and the values are {@code Object} instances.
*
* @param jsonContent the JSON content to be set, represented as a {@code Map<String, Object>}
* @return this {@link Builder} instance for method chaining
*/
@JsonProperty("json_content")
public Builder jsonContent(Map<String, Object> jsonContent) {
this.jsonContent = jsonContent;
return this;
}

/**
* Sets the Markdown content for this builder instance.
*
* The Markdown content represents the textual data formatted in Markdown syntax,
* which can include headings, lists, links, and other Markdown elements.
*
* @param markdownContent the Markdown content to be set, represented as a {@code String}
* @return this {@link Builder} instance for method chaining
*/
@JsonProperty("md_content")
public Builder markdownContent(String markdownContent) {
this.markdownContent = markdownContent;
return this;
}

/**
* Sets the plain text content for this builder instance.
*
* The plain text content represents unformatted textual data that can be
* used for display or processing purposes within the application.
*
* @param textContent the plain text content to be set, represented as a {@code String}
* @return this {@link Builder} instance for method chaining
*/
@JsonProperty("text_content")
public Builder textContent(String textContent) {
this.textContent = textContent;
return this;
}

/**
* Creates and returns a {@link DocumentResponse} instance based on the current state of this {@link Builder}.
*
* <p>The returned {@link DocumentResponse} will encapsulate the values configured in the builder,
* and further modifications to the builder instance will not affect the created {@code DocumentResponse}.
*
* @return a new {@code DocumentResponse} instance constructed from the builder's state
*/
public DocumentResponse build() {
return new DefaultDocumentResponse(this);
}
}
}
Loading