From 1eb027c8cf6c3c3eb41a3bd53252f37ece2aed0a Mon Sep 17 00:00:00 2001 From: Rohit Jangid Date: Thu, 25 May 2023 10:40:44 +0530 Subject: [PATCH 1/2] feat: Add std.parseCsv and std.manifestCsv --- builtins.go | 167 ++++++++++++++++++ linter/internal/types/stdlib.go | 16 +- testdata/builtinManifestCsv.golden | 1 + testdata/builtinManifestCsv.jsonnet | 1 + testdata/builtinManifestCsv.linter.golden | 0 testdata/builtinManifestCsv2.golden | 1 + testdata/builtinManifestCsv2.jsonnet | 1 + testdata/builtinManifestCsv2.linter.golden | 0 testdata/builtinParseCsvWithHeader.golden | 6 + testdata/builtinParseCsvWithHeader.jsonnet | 1 + .../builtinParseCsvWithHeader.linter.golden | 0 testdata/builtinParseCsvWithHeader2.golden | 6 + testdata/builtinParseCsvWithHeader2.jsonnet | 1 + .../builtinParseCsvWithHeader2.linter.golden | 0 testdata/builtinParseCsvWithHeader3.golden | 6 + testdata/builtinParseCsvWithHeader3.jsonnet | 1 + .../builtinParseCsvWithHeader3.linter.golden | 0 17 files changed, 201 insertions(+), 7 deletions(-) create mode 100644 testdata/builtinManifestCsv.golden create mode 100644 testdata/builtinManifestCsv.jsonnet create mode 100644 testdata/builtinManifestCsv.linter.golden create mode 100644 testdata/builtinManifestCsv2.golden create mode 100644 testdata/builtinManifestCsv2.jsonnet create mode 100644 testdata/builtinManifestCsv2.linter.golden create mode 100644 testdata/builtinParseCsvWithHeader.golden create mode 100644 testdata/builtinParseCsvWithHeader.jsonnet create mode 100644 testdata/builtinParseCsvWithHeader.linter.golden create mode 100644 testdata/builtinParseCsvWithHeader2.golden create mode 100644 testdata/builtinParseCsvWithHeader2.jsonnet create mode 100644 testdata/builtinParseCsvWithHeader2.linter.golden create mode 100644 testdata/builtinParseCsvWithHeader3.golden create mode 100644 testdata/builtinParseCsvWithHeader3.jsonnet create mode 100644 testdata/builtinParseCsvWithHeader3.linter.golden diff --git a/builtins.go b/builtins.go index e426b069c..bd1866079 100644 --- a/builtins.go +++ b/builtins.go @@ -23,6 +23,7 @@ import ( "crypto/sha256" "crypto/sha512" "encoding/base64" + "encoding/csv" "encoding/hex" "encoding/json" "fmt" @@ -1512,6 +1513,170 @@ func builtinParseYAML(i *interpreter, str value) (value, error) { return jsonToValue(i, elems[0]) } +func builtinParseCSVWithHeader(i *interpreter, arguments []value) (value, error) { + strv := arguments[0] + dv := arguments[1] + + sval, err := i.getString(strv) + if err != nil { + return nil, err + } + s := sval.getGoString() + + d := ',' // default delimiter + if dv.getType() != nullType { + dval, err := i.getString(dv) + if err != nil { + return nil, err + } + ds := dval.getGoString() + if len(ds) != 1 { + return nil, i.Error(fmt.Sprintf("Delimiter %s is invalid", ds)) + } + d = rune(ds[0]) // conversion to rune + } + + json := make([]interface{}, 0) + var keys []string + + reader := csv.NewReader(strings.NewReader(s)) + reader.Comma = d + + for row := 0; ; row++ { + record, err := reader.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, i.Error(fmt.Sprintf("failed to parse CSV: %s", err.Error())) + } + + if row == 0 { // consider first row as header + // detect and handle duplicate headers + keyCount := map[string]int{} + for _, k := range record { + keyCount[k]++ + if c := keyCount[k]; c > 1 { + keys = append(keys, fmt.Sprintf("%s__%d", k, c-1)) + } else { + keys = append(keys, k) + } + } + } else { + j := make(map[string]interface{}) + for i, k := range keys { + j[k] = record[i] + } + json = append(json, j) + } + } + return jsonToValue(i, json) +} + +func builtinManifestCsv(i *interpreter, arguments []value) (value, error) { + arrv := arguments[0] + hv := arguments[1] + + arr, err := i.getArray(arrv) + if err != nil { + return nil, err + } + + var headers []string + if hv.getType() == nullType { + if len(arr.elements) == 0 { // no elements to select headers + return makeValueString(""), nil + } + + // default to all headers + obj, err := i.evaluateObject(arr.elements[0]) + if err != nil { + return nil, err + } + + simpleObj := obj.uncached.(*simpleObject) + for fieldName := range simpleObj.fields { + headers = append(headers, fieldName) + } + } else { + // headers are provided + ha, err := i.getArray(hv) + if err != nil { + return nil, err + } + + for _, elem := range ha.elements { + header, err := i.evaluateString(elem) + if err != nil { + return nil, err + } + headers = append(headers, header.getGoString()) + } + } + + var buf bytes.Buffer + w := csv.NewWriter(&buf) + + // Write headers + w.Write(headers) + + // Write rest of the rows + for _, elem := range arr.elements { + obj, err := i.evaluateObject(elem) + if err != nil { + return nil, err + } + + record := make([]string, len(headers)) + for c, h := range headers { + val, err := obj.index(i, h) + if err != nil { // no corresponding column + // skip to next column + continue + } + + s, err := stringFromValue(i, val) + if err != nil { + return nil, err + } + record[c] = s + } + w.Write(record) + } + + w.Flush() + + return makeValueString(buf.String()), nil +} + +func stringFromValue(i *interpreter, v value) (string, error) { + switch v.getType() { + case stringType: + s, err := i.getString(v) + if err != nil { + return "", err + } + return s.getGoString(), nil + case numberType: + n, err := i.getNumber(v) + if err != nil { + return "", err + } + return fmt.Sprint(n.value), nil + case booleanType: + b, err := i.getBoolean(v) + if err != nil { + return "", err + } + return fmt.Sprint(b.value), nil + case nullType: + return "", nil + default: + // for functionType, objectType and arrayType + return "", i.Error("invalid string conversion") + } +} + func jsonEncode(v interface{}) (string, error) { buf := new(bytes.Buffer) enc := json.NewEncoder(buf) @@ -2520,6 +2685,8 @@ var funcBuiltins = buildBuiltinMap([]builtin{ &unaryBuiltin{name: "parseInt", function: builtinParseInt, params: ast.Identifiers{"str"}}, &unaryBuiltin{name: "parseJson", function: builtinParseJSON, params: ast.Identifiers{"str"}}, &unaryBuiltin{name: "parseYaml", function: builtinParseYAML, params: ast.Identifiers{"str"}}, + &generalBuiltin{name: "parseCsvWithHeader", function: builtinParseCSVWithHeader, params: []generalBuiltinParameter{{name: "str"}, {name: "delimiter", defaultValue: &nullValue}}}, + &generalBuiltin{name: "manifestCsv", function: builtinManifestCsv, params: []generalBuiltinParameter{{name: "json"}, {name: "headers", defaultValue: &nullValue}}}, &generalBuiltin{name: "manifestJsonEx", function: builtinManifestJSONEx, params: []generalBuiltinParameter{{name: "value"}, {name: "indent"}, {name: "newline", defaultValue: &valueFlatString{value: []rune("\n")}}, {name: "key_val_sep", defaultValue: &valueFlatString{value: []rune(": ")}}}}, diff --git a/linter/internal/types/stdlib.go b/linter/internal/types/stdlib.go index 63c0eed34..7dba54d38 100644 --- a/linter/internal/types/stdlib.go +++ b/linter/internal/types/stdlib.go @@ -106,13 +106,14 @@ func prepareStdlib(g *typeGraph) { // Parsing - "parseInt": g.newSimpleFuncType(numberType, "str"), - "parseOctal": g.newSimpleFuncType(numberType, "str"), - "parseHex": g.newSimpleFuncType(numberType, "str"), - "parseJson": g.newSimpleFuncType(jsonType, "str"), - "parseYaml": g.newSimpleFuncType(jsonType, "str"), - "encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"), - "decodeUTF8": g.newSimpleFuncType(stringType, "arr"), + "parseInt": g.newSimpleFuncType(numberType, "str"), + "parseOctal": g.newSimpleFuncType(numberType, "str"), + "parseHex": g.newSimpleFuncType(numberType, "str"), + "parseJson": g.newSimpleFuncType(jsonType, "str"), + "parseYaml": g.newSimpleFuncType(jsonType, "str"), + "parseCsvWithHeader": g.newFuncType(jsonType, []ast.Parameter{required("str"), optional("delimiter")}), + "encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"), + "decodeUTF8": g.newSimpleFuncType(stringType, "arr"), // Manifestation @@ -124,6 +125,7 @@ func prepareStdlib(g *typeGraph) { "manifestJsonMinified": g.newSimpleFuncType(stringType, "value"), "manifestYamlDoc": g.newFuncType(stringType, []ast.Parameter{required("value"), optional("indent_array_in_object"), optional("quote_keys")}), "manifestYamlStream": g.newSimpleFuncType(stringType, "value"), + "manifestCsv": g.newFuncType(stringType, []ast.Parameter{required("json"), optional("headers")}), "manifestXmlJsonml": g.newSimpleFuncType(stringType, "value"), // Arrays diff --git a/testdata/builtinManifestCsv.golden b/testdata/builtinManifestCsv.golden new file mode 100644 index 000000000..b87af8a56 --- /dev/null +++ b/testdata/builtinManifestCsv.golden @@ -0,0 +1 @@ +"head1,head2\nval1,val2\n,1\nval3,\n" diff --git a/testdata/builtinManifestCsv.jsonnet b/testdata/builtinManifestCsv.jsonnet new file mode 100644 index 000000000..24a8c0751 --- /dev/null +++ b/testdata/builtinManifestCsv.jsonnet @@ -0,0 +1 @@ +std.manifestCsv([{ "head1": "val1", "head2": "val2", "head3": "foo" }, { "head2": 1, "head3": "bar" }, { "head1": "val3" }], ["head1", "head2"]) \ No newline at end of file diff --git a/testdata/builtinManifestCsv.linter.golden b/testdata/builtinManifestCsv.linter.golden new file mode 100644 index 000000000..e69de29bb diff --git a/testdata/builtinManifestCsv2.golden b/testdata/builtinManifestCsv2.golden new file mode 100644 index 000000000..17dbafb14 --- /dev/null +++ b/testdata/builtinManifestCsv2.golden @@ -0,0 +1 @@ +"head1\nval1\nval2\n" diff --git a/testdata/builtinManifestCsv2.jsonnet b/testdata/builtinManifestCsv2.jsonnet new file mode 100644 index 000000000..724e6b60e --- /dev/null +++ b/testdata/builtinManifestCsv2.jsonnet @@ -0,0 +1 @@ +std.manifestCsv([{ "head1": "val1" }, { "head1": "val2" }]) \ No newline at end of file diff --git a/testdata/builtinManifestCsv2.linter.golden b/testdata/builtinManifestCsv2.linter.golden new file mode 100644 index 000000000..e69de29bb diff --git a/testdata/builtinParseCsvWithHeader.golden b/testdata/builtinParseCsvWithHeader.golden new file mode 100644 index 000000000..468a887a3 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head2": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader.jsonnet b/testdata/builtinParseCsvWithHeader.jsonnet new file mode 100644 index 000000000..8d50422ec --- /dev/null +++ b/testdata/builtinParseCsvWithHeader.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1,head2\nval1,val2") \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader.linter.golden b/testdata/builtinParseCsvWithHeader.linter.golden new file mode 100644 index 000000000..e69de29bb diff --git a/testdata/builtinParseCsvWithHeader2.golden b/testdata/builtinParseCsvWithHeader2.golden new file mode 100644 index 000000000..9bf9bc85a --- /dev/null +++ b/testdata/builtinParseCsvWithHeader2.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head1__1": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader2.jsonnet b/testdata/builtinParseCsvWithHeader2.jsonnet new file mode 100644 index 000000000..8bdb2d6a7 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader2.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1,head1\nval1,val2") \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader2.linter.golden b/testdata/builtinParseCsvWithHeader2.linter.golden new file mode 100644 index 000000000..e69de29bb diff --git a/testdata/builtinParseCsvWithHeader3.golden b/testdata/builtinParseCsvWithHeader3.golden new file mode 100644 index 000000000..468a887a3 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader3.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head2": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader3.jsonnet b/testdata/builtinParseCsvWithHeader3.jsonnet new file mode 100644 index 000000000..8826aed79 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader3.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1;head2\nval1;val2", ";") \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader3.linter.golden b/testdata/builtinParseCsvWithHeader3.linter.golden new file mode 100644 index 000000000..e69de29bb From 4b9c10ebc134c53d114486211dea1d44f1cc3f02 Mon Sep 17 00:00:00 2001 From: Rohit Jangid Date: Tue, 27 Jun 2023 12:05:28 +0530 Subject: [PATCH 2/2] Add argument in parseCsvWithHeader function --- builtins.go | 39 +++++++++++++------ linter/internal/types/stdlib.go | 4 +- testdata/builtinParseCsvWithHeader2.golden | 3 +- testdata/builtinParseCsvWithHeader4.golden | 6 +++ testdata/builtinParseCsvWithHeader4.jsonnet | 1 + .../builtinParseCsvWithHeader4.linter.golden | 0 6 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 testdata/builtinParseCsvWithHeader4.golden create mode 100644 testdata/builtinParseCsvWithHeader4.jsonnet create mode 100644 testdata/builtinParseCsvWithHeader4.linter.golden diff --git a/builtins.go b/builtins.go index bd1866079..e4c523ac5 100644 --- a/builtins.go +++ b/builtins.go @@ -1516,6 +1516,7 @@ func builtinParseYAML(i *interpreter, str value) (value, error) { func builtinParseCSVWithHeader(i *interpreter, arguments []value) (value, error) { strv := arguments[0] dv := arguments[1] + odhv := arguments[2] sval, err := i.getString(strv) if err != nil { @@ -1536,6 +1537,15 @@ func builtinParseCSVWithHeader(i *interpreter, arguments []value) (value, error) d = rune(ds[0]) // conversion to rune } + odh := true // default value for overwrite_duplicate_headers + if odhv.getType() != nullType { + odhval, err := i.getBoolean(odhv) + if err != nil { + return nil, err + } + odh = odhval.value + } + json := make([]interface{}, 0) var keys []string @@ -1552,14 +1562,19 @@ func builtinParseCSVWithHeader(i *interpreter, arguments []value) (value, error) } if row == 0 { // consider first row as header - // detect and handle duplicate headers - keyCount := map[string]int{} - for _, k := range record { - keyCount[k]++ - if c := keyCount[k]; c > 1 { - keys = append(keys, fmt.Sprintf("%s__%d", k, c-1)) - } else { - keys = append(keys, k) + if odh { + // Overwrite duplicate headers + keys = record + } else { + // detect and handle duplicate headers + keyCount := map[string]int{} + for _, k := range record { + keyCount[k]++ + if c := keyCount[k]; c > 1 { + keys = append(keys, fmt.Sprintf("%s__%d", k, c-1)) + } else { + keys = append(keys, k) + } } } } else { @@ -2262,12 +2277,12 @@ func builtinAvg(i *interpreter, arrv value) (value, error) { if err != nil { return nil, err } - + len := float64(arr.length()) if len == 0 { return nil, i.Error("Cannot calculate average of an empty array.") } - + sumValue, err := builtinSum(i, arrv) if err != nil { return nil, err @@ -2277,7 +2292,7 @@ func builtinAvg(i *interpreter, arrv value) (value, error) { return nil, err } - avg := sum.value/len + avg := sum.value / len return makeValueNumber(avg), nil } @@ -2685,7 +2700,7 @@ var funcBuiltins = buildBuiltinMap([]builtin{ &unaryBuiltin{name: "parseInt", function: builtinParseInt, params: ast.Identifiers{"str"}}, &unaryBuiltin{name: "parseJson", function: builtinParseJSON, params: ast.Identifiers{"str"}}, &unaryBuiltin{name: "parseYaml", function: builtinParseYAML, params: ast.Identifiers{"str"}}, - &generalBuiltin{name: "parseCsvWithHeader", function: builtinParseCSVWithHeader, params: []generalBuiltinParameter{{name: "str"}, {name: "delimiter", defaultValue: &nullValue}}}, + &generalBuiltin{name: "parseCsvWithHeader", function: builtinParseCSVWithHeader, params: []generalBuiltinParameter{{name: "str"}, {name: "delimiter", defaultValue: &nullValue}, {name: "overwrite_duplicate_headers", defaultValue: &nullValue}}}, &generalBuiltin{name: "manifestCsv", function: builtinManifestCsv, params: []generalBuiltinParameter{{name: "json"}, {name: "headers", defaultValue: &nullValue}}}, &generalBuiltin{name: "manifestJsonEx", function: builtinManifestJSONEx, params: []generalBuiltinParameter{{name: "value"}, {name: "indent"}, {name: "newline", defaultValue: &valueFlatString{value: []rune("\n")}}, diff --git a/linter/internal/types/stdlib.go b/linter/internal/types/stdlib.go index 7dba54d38..f2a42de56 100644 --- a/linter/internal/types/stdlib.go +++ b/linter/internal/types/stdlib.go @@ -111,7 +111,7 @@ func prepareStdlib(g *typeGraph) { "parseHex": g.newSimpleFuncType(numberType, "str"), "parseJson": g.newSimpleFuncType(jsonType, "str"), "parseYaml": g.newSimpleFuncType(jsonType, "str"), - "parseCsvWithHeader": g.newFuncType(jsonType, []ast.Parameter{required("str"), optional("delimiter")}), + "parseCsvWithHeader": g.newFuncType(jsonType, []ast.Parameter{required("str"), optional("delimiter"), optional("overwrite_duplicate_headers")}), "encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"), "decodeUTF8": g.newSimpleFuncType(stringType, "arr"), @@ -154,7 +154,7 @@ func prepareStdlib(g *typeGraph) { "minArray": g.newFuncType(anyArrayType, []ast.Parameter{required("arr"), optional("keyF")}), "maxArray": g.newFuncType(anyArrayType, []ast.Parameter{required("arr"), optional("keyF")}), "contains": g.newSimpleFuncType(boolType, "arr", "elem"), - "avg": g.newSimpleFuncType(numberType, "arr"), + "avg": g.newSimpleFuncType(numberType, "arr"), "all": g.newSimpleFuncType(boolArrayType, "arr"), "any": g.newSimpleFuncType(boolArrayType, "arr"), "remove": g.newSimpleFuncType(anyArrayType, "arr", "elem"), diff --git a/testdata/builtinParseCsvWithHeader2.golden b/testdata/builtinParseCsvWithHeader2.golden index 9bf9bc85a..0be1184ad 100644 --- a/testdata/builtinParseCsvWithHeader2.golden +++ b/testdata/builtinParseCsvWithHeader2.golden @@ -1,6 +1,5 @@ [ { - "head1": "val1", - "head1__1": "val2" + "head1": "val2" } ] diff --git a/testdata/builtinParseCsvWithHeader4.golden b/testdata/builtinParseCsvWithHeader4.golden new file mode 100644 index 000000000..9bf9bc85a --- /dev/null +++ b/testdata/builtinParseCsvWithHeader4.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head1__1": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader4.jsonnet b/testdata/builtinParseCsvWithHeader4.jsonnet new file mode 100644 index 000000000..bebed6c58 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader4.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1,head1\nval1,val2", overwrite_duplicate_headers = false) \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader4.linter.golden b/testdata/builtinParseCsvWithHeader4.linter.golden new file mode 100644 index 000000000..e69de29bb