Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.

## Upcoming
- Support edgeOnlyLoad parameter for neptune database bulk load operation ([Link to PR](https://github.com/aws/graph-notebook/pull/750))
- Support loading parquet format data for neptune analytics incremental load operation ([Link to PR](https://github.com/aws/graph-notebook/pull/752))

## Release 5.0.1 (May 19, 2025)
- Locked numba dependency to 0.60.0 to avoid numpy conflict ([Link to PR](https://github.com/aws/graph-notebook/pull/735))
Expand Down
31 changes: 26 additions & 5 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from graph_notebook.magics.streams import StreamViewer
from graph_notebook.neptune.client import (ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
DB_LOAD_TYPES, ANALYTICS_LOAD_TYPES, VALID_BULK_FORMATS, VALID_INCREMENTAL_FORMATS, \
DB_LOAD_TYPES, ANALYTICS_LOAD_TYPES, VALID_BULK_FORMATS, VALID_INCREMENTAL_FORMATS, FORMAT_PARQUET, \
FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, FORMAT_NTRIPLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
Expand Down Expand Up @@ -153,7 +153,7 @@

DEFAULT_NAMEDGRAPH_URI = "http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph"
DEFAULT_BASE_URI = "http://aws.amazon.com/neptune/default"
RDF_LOAD_FORMATS = [FORMAT_NTRIPLE, FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE]
RDF_LOAD_FORMATS = [FORMAT_NTRIPLE, FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, FORMAT_PARQUET]
BASE_URI_FORMATS = [FORMAT_RDFXML, FORMAT_TURTLE]
DEFAULT_LOAD_CONCURRENCY = 1

Expand Down Expand Up @@ -2095,6 +2095,8 @@ def load(self, line='', local_ns: dict = None):
parser.add_argument('--allow-empty-strings', action='store_true', default=False,
help='Load empty strings found in node and edge property values.')
parser.add_argument('-n', '--nopoll', action='store_true', default=False)
parser.add_argument('--edge-only-load', action='store_true', default=False,
help='Assume there are only edge files present - do not scan for vertex files before loading edge files.')

args = parser.parse_args(line.split())
button = widgets.Button(description="Submit")
Expand Down Expand Up @@ -2238,6 +2240,13 @@ def load(self, line='', local_ns: dict = None):
disabled=False,
layout=widgets.Layout(width=widget_width)
)

edge_only_load = widgets.Dropdown(
options=['TRUE', 'FALSE'],
value=str(args.edge_only_load).upper(),
disabled=False,
layout=widgets.Layout(width=widget_width)
)

# Create a series of HBox containers that will hold the widgets and labels
# that make up the %load form. Some of the labels and widgets are created
Expand Down Expand Up @@ -2347,6 +2356,13 @@ def load(self, line='', local_ns: dict = None):
justify_content="flex-end"))

poll_status_hbox = widgets.HBox([poll_status_label, poll_status])

edge_only_load_label = widgets.Label('Edge Only load:',
layout=widgets.Layout(width=label_width,
display="flex",
justify_content="flex-end"))

edge_only_load_hbox = widgets.HBox([edge_only_load_label, edge_only_load])

def update_edge_ids_options(change):
if change.new.lower() == FORMAT_OPENCYPHER:
Expand Down Expand Up @@ -2399,7 +2415,7 @@ def update_parserconfig_options(change):
# load arguments for Neptune bulk load
bulk_load_boxes = [arn_hbox, mode_hbox, parallelism_hbox, cardinality_hbox,
queue_hbox, dep_hbox, ids_hbox, allow_empty_strings_hbox,
named_graph_uri_hbox, base_uri_hbox, poll_status_hbox]
named_graph_uri_hbox, base_uri_hbox, poll_status_hbox, edge_only_load_hbox]
submit_load_boxes = [button, output]

if load_type == 'incremental':
Expand All @@ -2418,6 +2434,7 @@ def on_button_clicked(b):
base_uri_hbox.children = (base_uri_hbox_label, base_uri,)
dep_hbox.children = (dep_hbox_label, dependencies,)
concurrency_hbox.children = (concurrency_hbox_label, concurrency,)
edge_only_load_hbox.children = (edge_only_load_label, edge_only_load,)

validated = True
validation_label_style = DescriptionStyle(color='red')
Expand Down Expand Up @@ -2473,8 +2490,11 @@ def on_button_clicked(b):
'parallelism': parallelism.value,
'updateSingleCardinalityProperties': update_single_cardinality.value,
'queueRequest': queue_request.value,
'parserConfiguration': {}
'parserConfiguration': {},
}

if source_format.value.lower() == FORMAT_CSV or source_format.value.lower() == FORMAT_PARQUET:
bulk_load_kwargs['edgeOnlyLoad'] = edge_only_load.value

if dependencies:
bulk_load_kwargs['dependencies'] = dependencies_list
Expand All @@ -2491,7 +2511,7 @@ def on_button_clicked(b):
bulk_load_kwargs['parserConfiguration']['baseUri'] = base_uri.value

kwargs.update(bulk_load_kwargs)

print(kwargs)
source_hbox.close()
source_format_hbox.close()
region_hbox.close()
Expand All @@ -2508,6 +2528,7 @@ def on_button_clicked(b):
named_graph_uri_hbox.close()
base_uri_hbox.close()
concurrency_hbox.close()
edge_only_load_hbox.close()
button.close()

load_submit_status_output = widgets.Output()
Expand Down
13 changes: 11 additions & 2 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
FORMAT_CSV = 'csv'
FORMAT_OPENCYPHER = 'opencypher'
FORMAT_NTRIPLE = 'ntriples'
FORMAT_PARQUET = 'parquet'
FORMAT_NQUADS = 'nquads'
FORMAT_RDFXML = 'rdfxml'
FORMAT_TURTLE = 'turtle'
Expand All @@ -69,8 +70,16 @@
LOAD_JOB_MODES = [MODE_RESUME, MODE_NEW, MODE_AUTO]
DB_LOAD_TYPES = ['bulk']
ANALYTICS_LOAD_TYPES = ['incremental']
VALID_INCREMENTAL_FORMATS = ['', FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE]
VALID_BULK_FORMATS = VALID_INCREMENTAL_FORMATS + [FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE]
VALID_COMMON_FORMATS = ['', FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE]

# --------------
# Currently, Parquet format is only supported for incremental loads, which are exclusively used with Neptune Analytics.
# Bulk loads (used with Neptune DB) do not support the parquet format.
# This distinction is handled in the load magic function when processing the format parameter.
VALID_INCREMENTAL_FORMATS = VALID_COMMON_FORMATS + [FORMAT_PARQUET]
VALID_BULK_FORMATS = VALID_COMMON_FORMATS + [FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE]
# --------------

PARALLELISM_OPTIONS = [PARALLELISM_LOW, PARALLELISM_MEDIUM, PARALLELISM_HIGH, PARALLELISM_OVERSUBSCRIBE]
LOADER_ACTION = 'loader'

Expand Down