20
20
#include " codegen/proxy/csv_scanner_proxy.h"
21
21
#include " codegen/proxy/runtime_functions_proxy.h"
22
22
#include " codegen/type/sql_type.h"
23
+ #include " codegen/vector.h"
23
24
#include " planner/csv_scan_plan.h"
24
25
25
26
namespace peloton {
@@ -28,23 +29,25 @@ namespace codegen {
28
29
CSVScanTranslator::CSVScanTranslator (const planner::CSVScanPlan &scan,
29
30
CompilationContext &context,
30
31
Pipeline &pipeline)
31
- : OperatorTranslator(context, pipeline), scan_(scan ) {
32
+ : OperatorTranslator(scan, context, pipeline ) {
32
33
// Register the CSV scanner instance
33
- auto &runtime_state = context.GetRuntimeState ();
34
- scanner_id_ = runtime_state .RegisterState (
34
+ auto &query_state = context.GetQueryState ();
35
+ scanner_id_ = query_state .RegisterState (
35
36
" csvScanner" , CSVScannerProxy::GetType (GetCodeGen ()));
36
37
37
38
// Load information about the attributes output by the scan plan
38
- scan_ .GetAttributes (output_attributes_);
39
+ scan .GetAttributes (output_attributes_);
39
40
}
40
41
41
- void CSVScanTranslator::InitializeState () {
42
+ void CSVScanTranslator::InitializeQueryState () {
42
43
auto &codegen = GetCodeGen ();
43
44
45
+ auto &scan = GetPlanAs<planner::CSVScanPlan>();
46
+
44
47
// Arguments
45
48
llvm::Value *scanner_ptr = LoadStatePtr (scanner_id_);
46
- llvm::Value *exec_ctx_ptr = GetCompilationContext (). GetExecutorContextPtr ();
47
- llvm::Value *file_path = codegen.ConstString (scan_ .GetFileName (), " filePath" );
49
+ llvm::Value *exec_ctx_ptr = GetExecutorContextPtr ();
50
+ llvm::Value *file_path = codegen.ConstString (scan .GetFileName (), " filePath" );
48
51
49
52
auto num_cols = static_cast <uint32_t >(output_attributes_.size ());
50
53
@@ -71,20 +74,24 @@ void CSVScanTranslator::InitializeState() {
71
74
// Cast the runtime type to an opaque void*. This is because we're calling
72
75
// into pre-compiled C++ that doesn't know that the dynamically generated
73
76
// RuntimeState* looks like.
74
- llvm::Value *runtime_state_ptr = codegen->CreatePointerCast (
77
+ llvm::Value *query_state_ptr = codegen->CreatePointerCast (
75
78
codegen.GetState (), codegen.VoidType ()->getPointerTo ());
76
79
77
80
// Call CSVScanner::Init()
78
81
codegen.Call (CSVScannerProxy::Init,
79
82
{scanner_ptr, exec_ctx_ptr, file_path, output_col_types,
80
- codegen.Const32 (num_cols), consumer_func, runtime_state_ptr ,
81
- codegen.Const8 (scan_ .GetDelimiterChar ()),
82
- codegen.Const8 (scan_ .GetQuoteChar ()),
83
- codegen.Const8 (scan_ .GetEscapeChar ())});
83
+ codegen.Const32 (num_cols), consumer_func, query_state_ptr ,
84
+ codegen.Const8 (scan .GetDelimiterChar ()),
85
+ codegen.Const8 (scan .GetQuoteChar ()),
86
+ codegen.Const8 (scan .GetEscapeChar ())});
84
87
}
85
88
86
89
namespace {
87
90
91
+ /* *
92
+ * This is a deferred column access class configured to load the contents of a
93
+ * given column.
94
+ */
88
95
class CSVColumnAccess : public RowBatch ::AttributeAccess {
89
96
public:
90
97
CSVColumnAccess (const planner::AttributeInfo *ai, llvm::Value *csv_columns,
@@ -94,6 +101,12 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
94
101
null_str_(std::move(null_str)),
95
102
runtime_null_(runtime_null_str) {}
96
103
104
+ // ////////////////////////////////////////////////////////////////////////////
105
+ // /
106
+ // / Accessors
107
+ // /
108
+ // ////////////////////////////////////////////////////////////////////////////
109
+
97
110
llvm::Value *Columns () const { return csv_columns_; }
98
111
99
112
uint32_t ColumnIndex () const { return ai_->attribute_id ; }
@@ -102,6 +115,25 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
102
115
103
116
const type::SqlType &SqlType () const { return ai_->type .GetSqlType (); }
104
117
118
+ // ////////////////////////////////////////////////////////////////////////////
119
+ // /
120
+ // / Logic
121
+ // /
122
+ // ////////////////////////////////////////////////////////////////////////////
123
+
124
+ /* *
125
+ * Check if a column's value is considered NULL. Given a pointer to the
126
+ * column's string value, and the length of the string, this function will
127
+ * check if the column's value is determined to be NULL. This is done by
128
+ * comparing the column's contents with the NULL string configured in the
129
+ * CSV scan plan (i.e., provided by the user).
130
+ *
131
+ * @param codegen The codegen instance
132
+ * @param data_ptr A pointer to the column's string value
133
+ * @param data_len The length of the column's string value
134
+ * @return True if the column is equivalent to the NULL string. False
135
+ * otherwise.
136
+ */
105
137
llvm::Value *IsNull (CodeGen &codegen, llvm::Value *data_ptr,
106
138
llvm::Value *data_len) const {
107
139
uint32_t null_str_len = static_cast <uint32_t >(null_str_.length ());
@@ -127,6 +159,16 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
127
159
return check_null.BuildPHI (cmp_res, codegen.ConstBool (false ));
128
160
}
129
161
162
+ /* *
163
+ * Load the value of the given column with the given type, ignoring a null
164
+ * check.
165
+ *
166
+ * @param codegen The codegen instance
167
+ * @param type The SQL type of the column
168
+ * @param data_ptr A pointer to the column's string representation
169
+ * @param data_len The length of the column's string representation
170
+ * @return The parsed value
171
+ */
130
172
Value LoadValueIgnoreNull (CodeGen &codegen, llvm::Value *type,
131
173
llvm::Value *data_ptr,
132
174
llvm::Value *data_len) const {
@@ -144,6 +186,15 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
144
186
}
145
187
}
146
188
189
+ /* *
190
+ * Access this column in the given row. In reality, this function pulls out
191
+ * the column information from the CSVScanner state and loads/parses the
192
+ * column's value.
193
+ *
194
+ * @param codegen The codegen instance
195
+ * @param row The row. This isn't used.
196
+ * @return The value of the column
197
+ */
147
198
Value Access (CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override {
148
199
// Load the type, data pointer and length values for the column
149
200
auto *type = codegen->CreateConstInBoundsGEP2_32 (
@@ -178,22 +229,31 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
178
229
}
179
230
180
231
private:
232
+ // Information about the attribute
181
233
const planner::AttributeInfo *ai_;
234
+
235
+ // A pointer to the array of columns
182
236
llvm::Value *csv_columns_;
237
+
238
+ // The NULL string configured for the CSV scan
183
239
const std::string null_str_;
240
+
241
+ // The runtime NULL string (a constant in LLVM)
184
242
llvm::Value *runtime_null_;
185
243
};
186
244
187
245
} // namespace
188
246
247
+ // We define the callback/consumer function for CSV parsing here
189
248
void CSVScanTranslator::DefineAuxiliaryFunctions () {
190
249
CodeGen &codegen = GetCodeGen ();
191
250
CompilationContext &cc = GetCompilationContext ();
192
251
252
+ auto &scan = GetPlanAs<planner::CSVScanPlan>();
253
+
193
254
// Define consumer function here
194
255
std::vector<FunctionDeclaration::ArgumentInfo> arg_types = {
195
- {" runtimeState" ,
196
- cc.GetRuntimeState ().FinalizeType (codegen)->getPointerTo ()}};
256
+ {" queryState" , cc.GetQueryState ().GetType ()->getPointerTo ()}};
197
257
FunctionDeclaration decl{codegen.GetCodeContext (), " consumer" ,
198
258
FunctionDeclaration::Visibility::Internal,
199
259
codegen.VoidType (), arg_types};
@@ -209,13 +269,13 @@ void CSVScanTranslator::DefineAuxiliaryFunctions() {
209
269
llvm::Value *cols = codegen->CreateLoad (codegen->CreateConstInBoundsGEP2_32 (
210
270
CSVScannerProxy::GetType (codegen), LoadStatePtr (scanner_id_), 0 , 1 ));
211
271
212
- llvm::Value *null_str = codegen.ConstString (scan_ .GetNullString (), " null" );
272
+ llvm::Value *null_str = codegen.ConstString (scan .GetNullString (), " null" );
213
273
214
274
// Add accessors for all columns into the row batch
215
275
std::vector<CSVColumnAccess> column_accessors;
216
276
for (uint32_t i = 0 ; i < output_attributes_.size (); i++) {
217
277
column_accessors.emplace_back (output_attributes_[i], cols,
218
- scan_ .GetNullString (), null_str);
278
+ scan .GetNullString (), null_str);
219
279
}
220
280
for (uint32_t i = 0 ; i < output_attributes_.size (); i++) {
221
281
one.AddAttribute (output_attributes_[i], &column_accessors[i]);
@@ -238,17 +298,10 @@ void CSVScanTranslator::Produce() const {
238
298
GetCodeGen ().Call (CSVScannerProxy::Produce, {scanner_ptr});
239
299
}
240
300
241
- void CSVScanTranslator::TearDownState () {
301
+ void CSVScanTranslator::TearDownQueryState () {
242
302
auto *scanner_ptr = LoadStatePtr (scanner_id_);
243
303
GetCodeGen ().Call (CSVScannerProxy::Destroy, {scanner_ptr});
244
304
}
245
305
246
- std::string CSVScanTranslator::GetName () const {
247
- return StringUtil::Format (
248
- " CSVScan(file: '%s', delimiter: '%c', quote: '%c', escape: '%c')" ,
249
- scan_.GetFileName ().c_str (), scan_.GetDelimiterChar (),
250
- scan_.GetQuoteChar (), scan_.GetEscapeChar ());
251
- }
252
-
253
306
} // namespace codegen
254
- } // namespace peloton
307
+ } // namespace peloton
0 commit comments