@@ -20,6 +20,97 @@ package tensorflow.data_validation;
20
20
import "tensorflow_metadata/proto/v0/anomalies.proto" ;
21
21
import "tensorflow_metadata/proto/v0/path.proto" ;
22
22
23
+ // Use this proto to configure custom validations in TFDV.
24
+ // Example usages follow.
25
+ // -----------------------------------------------------------------------------
26
+ // Example Single-Feature Validation
27
+ // Statistics
28
+ // datasets {
29
+ // name: "All Examples"
30
+ // num_examples: 10
31
+ // features {
32
+ // path { step: 'test_feature' }
33
+ // type: INT
34
+ // num_stats { num_zeros: 5 max: 25 }
35
+ // }
36
+ // }
37
+ // CustomValidationConfig
38
+ // feature_validations {
39
+ // feature_path { step: 'test_feature' }
40
+ // validations {
41
+ // sql_expression: 'feature.num_stats.num_zeros < 3'
42
+ // severity: ERROR
43
+ // description: 'Feature has too many zeros.'
44
+ // }
45
+ // validations {
46
+ // sql_expression: 'feature.num_stats.max > 10'
47
+ // severity: ERROR
48
+ // description: 'Maximum value is too low.'
49
+ // }
50
+ // }
51
+ // Anomalies
52
+ // anomaly_info {
53
+ // key: 'test_feature'
54
+ // value: {
55
+ // path { step: 'test_feature' }
56
+ // severity: ERROR
57
+ // reason {
58
+ // type: CUSTOM_VALIDATION
59
+ // short_description: 'Feature has too many zeros.'
60
+ // description: 'Custom validation triggered anomaly. Query: feature.num_stats.num_zeros < 3 Test dataset: default slice'
61
+ // }
62
+ // }
63
+ // }
64
+ // -----------------------------------------------------------------------------
65
+ // Example Feature Pair Validation
66
+ // Statistics
67
+ // Test statistics
68
+ // datasets {
69
+ // name: "slice_1"
70
+ // num_examples: 10
71
+ // features {
72
+ // path { step: 'test_feature' }
73
+ // type: INT
74
+ // num_stats { num_zeros: 5 max: 25 }
75
+ // }
76
+ // }
77
+ // Base statistics
78
+ // datasets {
79
+ // name: "slice_2"
80
+ // num_examples: 10
81
+ // features {
82
+ // path { step: 'test_feature' }
83
+ // type: INT
84
+ // num_stats { num_zeros: 1 max: 1 }
85
+ // }
86
+ // }
87
+ // CustomValidationConfig
88
+ // feature_pair_validations {
89
+ // dataset_name: 'slice_1'
90
+ // feature_test_path { step: 'test_feature' }
91
+ // base_dataset_name: 'slice_2'
92
+ // feature_base_path { step: 'test_feature' }
93
+ // validations {
94
+ // sql_expression: 'feature_test.num_stats.num_zeros < feature_base.num_stats.num_zeros'
95
+ // severity: ERROR
96
+ // description: 'Test feature has too many zeros.'
97
+ // }
98
+ // }
99
+ // Anomalies
100
+ // anomaly_info {
101
+ // key: 'test_feature'
102
+ // value: {
103
+ // path { step: 'test_feature' }
104
+ // severity: ERROR
105
+ // reason {
106
+ // type: CUSTOM_VALIDATION
107
+ // short_description: 'Test feature has too many zeros.'
108
+ // description: 'Custom validation triggered anomaly. Query: feature_test.num_stats.num_zeros < feature_base.num_stats.num_zeros Test dataset: slice_1 Base dataset: slice_2 Base path: test_feature'
109
+ // }
110
+ // }
111
+ // }
112
+ // =============================================================================
113
+
23
114
message Validation {
24
115
// Expression to evaluate. If the expression returns false, the anomaly is
25
116
// returned.
0 commit comments