Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ install:

.PHONY: run map
map:
python examples/map.py
python examples/tasks/map/map.py
run: map

.PHONY: benchmark_match, benchmark_mapper, test
Expand Down
1 change: 1 addition & 0 deletions examples/kafka/s2_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

# User data to be sent
user_data = {"name": "John Doe", "age": 28}
print("User:", user_data)

# Produce message
producer.produce(topic='user-info', key=str(user_data['name']), value=user_data)
Expand Down
1 change: 1 addition & 0 deletions examples/kafka/s4_producer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

# Updated user data to be sent with the new schema
user_data = {"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"}
print("User:", user_data)

# Produce message
producer.produce(topic='user-info', key=str(user_data['name']), value=user_data)
Expand Down
37 changes: 36 additions & 1 deletion examples/tasks/assemble/assemble.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,43 @@
import pandas as pd

import llmint
from llmint.assemble.pandas import assemble, construct


def main():
llmint.assemble()
source_schema = '''
{
"fields": [
{"name": "Fname", "type": "string"},
{"name": "Lname", "type": "string"},
{"name": "Age", "type": "int"},
{"name": "Email", "type": ["null", "string"], "default": null}
]
}
'''
target_schema = '''
{
"fields": [
{"name": "name", "type": "string"},
{"name": "age", "type": "int"},
{"name": "email", "type": ["null", "string"], "default": null}
]
}
'''


source_df = pd.DataFrame([{"Fname": "Josh", "Lname": "Doe", "Age": 31, "Email": "joshdoe@example.com"}])
dest_df = pd.DataFrame([{"name": "Jane Doe", "age": 27, "email": "janedoe@example.com"}])
print("Concat the source dataframe to the dest dataframe:")
print("Source:", source_df, sep="\n")
print("Dest:", dest_df, sep="\n")

mappings = llmint.map(source_schema, target_schema)
assembly = assemble(mappings)
output = construct(source_df, assembly)

combined_df = pd.concat([dest_df, output], axis=0)
print("\nCombined:", combined_df, sep="\n")


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions llmint/assemble/pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from llmint.assemble.pandas.function import assemble, construct
39 changes: 39 additions & 0 deletions llmint/assemble/pandas/function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pandas as pd
from typing import List, Callable

from llmint.assemble.pandas.transform import (
add, copy, default, missing, apply, scale, shift
)
from llmint.map.function import Map


def assemble(mappings: list[Map]):
output = []

for mapping in mappings:
match mapping.transformation.split(' ')[0]:
case 'ADD':
output.append(add(mapping))
case 'COPY':
output.append(copy(mapping))
case 'DEFAULT':
output.append(default(mapping))
case 'MISSING':
output.append(missing(mapping))
case 'APPLY':
output.append(apply(mapping))
case 'SCALE':
output.append(scale(mapping))
case 'SHIFT':
output.append(shift(mapping))

return output


def construct(df: pd.DataFrame, assembly: List[Callable[[pd.DataFrame], pd.Series]]):
df_output = []

for func in assembly:
df_output.append(func(df))

return pd.concat(df_output, axis=1)
8 changes: 8 additions & 0 deletions llmint/assemble/pandas/transform/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from llmint.assemble.pandas.transform.field.add import func as add
from llmint.assemble.pandas.transform.field.copy import func as copy
from llmint.assemble.pandas.transform.field.default import func as default
from llmint.assemble.pandas.transform.field.missing import func as missing

from llmint.assemble.pandas.transform.value.apply import func as apply
from llmint.assemble.pandas.transform.value.scale import func as scale
from llmint.assemble.pandas.transform.value.shift import func as shift
Empty file.
10 changes: 10 additions & 0 deletions llmint/assemble/pandas/transform/field/add.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import re
from pandas import Series

from llmint.map.function import Map


def func(mapping: Map):
col_type = re.search(r'TYPE (\w+)', mapping.transformation).group(1)

return lambda df: Series([], name=mapping.target_field, dtype=col_type)
7 changes: 7 additions & 0 deletions llmint/assemble/pandas/transform/field/copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pandas import Series

from llmint.map.function import Map


def func(mapping: Map):
return lambda df: Series(df[mapping.source_field], name=mapping.target_field)
10 changes: 10 additions & 0 deletions llmint/assemble/pandas/transform/field/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import re
from pandas import Series

from llmint.map.function import Map


def func(mapping: Map):
default_val = re.search(r'DEFAULT TO (.*)', mapping.transformation).group(1)

return lambda df: Series([default_val] * len(df), name=mapping.target_field)
5 changes: 5 additions & 0 deletions llmint/assemble/pandas/transform/field/missing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from llmint.map.function import Map


def func(mapping: Map):
return lambda df: print(f"WARNING: {mapping.target_field} field cannot be automatically converted.")
Empty file.
19 changes: 19 additions & 0 deletions llmint/assemble/pandas/transform/value/apply.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import re
from pandas import Series, DataFrame

from llmint.map.function import Map


def func(mapping: Map):
apply_func = re.search(r'APPLY (.*)', mapping.transformation).group(1)

def apply(df: DataFrame):
# assign all columns to their own variables
for col in df.columns:
exec(f'{col.replace(" ", "_")} = df[col]', locals(), globals())

exec(f'_output = {apply_func}', locals(), globals())

return Series(_output, name=mapping.target_field)

return apply
12 changes: 12 additions & 0 deletions llmint/assemble/pandas/transform/value/scale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import re

from llmint.map.function import Map


def func(mapping: Map):
try:
scale = float(re.search(r'SCALE BY (\d*.\d*)', mapping.transformation).group(1))
except ValueError:
return lambda df: df[mapping.source_field].copy()

return lambda df: df[mapping.source_field] * scale
12 changes: 12 additions & 0 deletions llmint/assemble/pandas/transform/value/shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import re

from llmint.map.function import Map


def func(mapping: Map):
try:
shift = float(re.search(r'SHIFT BY (\d*.\d*)', mapping.transformation).group(1))
except ValueError:
return lambda df: df[mapping.source_field].copy()

return lambda df: df[mapping.source_field] + shift
21 changes: 11 additions & 10 deletions llmint/core/eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from llmint.map.function import Map


class pcolors:
RIGHT = '\033[92m'
WRONG = '\033[91m'
Expand Down Expand Up @@ -43,13 +46,11 @@ def accuracy(output: list, test_example: list):
f1 = 0
return precision, recall, f1

def print_mappings(mappings: dict, include_reasoning=True):
for name, response in mappings.items():
mapping, reasoning = response
if include_reasoning:

print(pcolors.RIGHT + mapping + pcolors.ENDC + '\n',
reasoning, flush=True)
else:
print(pcolors.RIGHT + mapping + pcolors.ENDC,
flush=True)
def print_mappings(mappings: list[Map], include_reasoning=True):
for mapping in mappings:
if include_reasoning:
print(pcolors.RIGHT + mapping.__dict__ + pcolors.ENDC + '\n',
mapping.reasoning, flush=True)
else:
print(pcolors.RIGHT + mapping.__dict__ + pcolors.ENDC,
flush=True)
21 changes: 19 additions & 2 deletions llmint/map/function.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
from pydantic import BaseModel

from llmint.core import model
from llmint.map import prompt, parameter


class Map(BaseModel):
source_field: str | None
target_field: str
transformation: str
reasoning: str | None


def map(source_schema, target_schema):
mappings = model.call(
output = model.call(
prompt=[
{"role": "system", "content": prompt.system},
{"role": "user", "content": prompt.user.format(
Expand All @@ -15,5 +25,12 @@ def map(source_schema, target_schema):
temperature=parameter.temperature,
seed=parameter.seed,
max_model_call=1, # only one model call
)["tool_outputs"][0] # take the first tool output
)["tool_outputs"]

# process the mappings
mappings = []
for mapping in output:
for _, mapping in mapping.items():
mappings.append(mapping)

return mappings
10 changes: 6 additions & 4 deletions llmint/map/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
"llmint.map.match",
# field transformation
"llmint.map.transform.field.add",
"llmint.map.transform.field.cast",
# "llmint.map.transform.field.cast",
"llmint.map.transform.field.copy",
"llmint.map.transform.field.default",
"llmint.map.transform.field.delete",
"llmint.map.transform.field.rename",
# "llmint.map.transform.field.delete",
# "llmint.map.transform.field.rename",
"llmint.map.transform.field.missing",
# value transformation
"llmint.map.transform.value.apply",
# "llmint.map.transform.value.gen",
"llmint.map.transform.value.link",
# "llmint.map.transform.value.link",
"llmint.map.transform.value.scale",
"llmint.map.transform.value.shift",
]

reasoning = False
3 changes: 3 additions & 0 deletions llmint/map/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,6 @@

user = "Source Schema: ' + {source_schema} + " \
"'\nTarget Schema: ' + {target_schema}"

"""Reasoning prompt"""
reasoning_prompt = "In-depth reasoning as to why you chose this function"
46 changes: 26 additions & 20 deletions llmint/map/transform/field/add.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,37 @@
from libem.core.util import create_json_schema

from llmint.map.function import Map
from llmint.map.parameter import reasoning
from llmint.map.prompt import reasoning_prompt


name = "ADD"
description = "Add an optional target field"
properties = {
"target_field": (str, "Optional field in the target schema"),
"field_type": (str, "The type of the field to be added"),
}
if reasoning:
properties["reasoning"] = (str, reasoning_prompt)

schema = {
"type": "function",
"function": {
"name": name,
"description": "Add an optional target field",
"description": description,
"parameters": {
"type": "object",
"properties": {
"target_field": {
"type": "string",
"description": "Optional field in the target schema",
},
"field_type": {
"type": "string",
"description": "The type of the field to be added",
},
"reasoning": {
"type": "string",
"description": "In-depth reasoning as to why you chose this function",
},
},
"required": ["target_field", "field_type", "reasoning"],
},
"properties": create_json_schema(
**properties
)["properties"],
"required": list(properties.keys()),
}
}
}


def func(target_field, field_type, reasoning):
return (f'{{from: None, to: {target_field}, '
f'transformation: ADD {target_field} TYPE {field_type}}}', reasoning)
def func(target_field, field_type, reasoning=None):
return Map(source_field=None,
target_field=target_field,
transformation=f'ADD {target_field} TYPE {field_type}',
reasoning=reasoning)
9 changes: 7 additions & 2 deletions llmint/map/transform/field/cast.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from llmint.map.function import Map


name = "CAST"
schema = {
"type": "function",
Expand Down Expand Up @@ -35,5 +38,7 @@


def func(source_field, target_field, source_type, target_type, reasoning):
return (f'{{from: {source_field}, to: {target_field}, '
f'transformation: CAST {source_field} FROM {source_type} TO {target_type}}}', reasoning)
return Map(source_field=source_field,
target_field=target_field,
transformation=f'CAST FROM {source_type} TO {target_type}',
reasoning=reasoning)
Loading