Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit c817e00

Browse files
committed
* Propagatge file options through optimization.
* Added codegen.cpp to source validator whitelist, since we have the ability to call printf() from codegen for debug. * Beefed up overflow checks in NumericRuntime. * Fixed tests.
1 parent 96de99c commit c817e00

File tree

13 files changed

+185
-56
lines changed

13 files changed

+185
-56
lines changed

script/validators/source_validator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@
5858
"src/network/protocol.cpp",
5959
"src/include/common/macros.h",
6060
"src/common/stack_trace.cpp",
61-
"src/include/parser/sql_scanner.h", # There is a free() in comments
6261
"src/include/index/bloom_filter.h",
6362
"src/include/index/compact_ints_key.h",
6463
"src/include/index/bwtree.h",
6564
"src/codegen/util/oa_hash_table.cpp",
66-
"src/codegen/util/cc_hash_table.cpp"
65+
"src/codegen/util/cc_hash_table.cpp",
66+
"src/codegen/codegen.cpp", # We allow calling printf() from codegen for debugging
6767
]
6868

6969
## ==============================================

src/function/numeric_functions.cpp

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,25 @@ T ParseInteger(const char *ptr, uint32_t len) {
216216
}
217217

218218
// Convert
219-
int64_t num = 0;
219+
uint64_t cutoff =
220+
static_cast<uint64_t>(negative ? -std::numeric_limits<int64_t>::min()
221+
: std::numeric_limits<int64_t>::max());
222+
uint64_t cutlimit = cutoff % 10;
223+
cutoff /= 10;
224+
225+
uint64_t num = 0;
220226
while (start < end) {
221227
if (*start < '0' || *start > '9') {
222228
break;
223229
}
224230

225-
num = (num * 10) + (*start - '0');
231+
uint32_t c = static_cast<uint32_t>(*start - '0');
232+
233+
if (num > cutoff || (num == cutoff && c > cutlimit)) {
234+
goto overflow;
235+
}
236+
237+
num = (num * 10) + c;
226238

227239
start++;
228240
}
@@ -234,8 +246,7 @@ T ParseInteger(const char *ptr, uint32_t len) {
234246

235247
// If we haven't consumed everything at this point, it was an invalid input
236248
if (start < end) {
237-
codegen::RuntimeFunctions::ThrowInvalidInputStringException();
238-
__builtin_unreachable();
249+
goto invalid;
239250
}
240251

241252
// Negate number if we need to
@@ -244,14 +255,21 @@ T ParseInteger(const char *ptr, uint32_t len) {
244255
}
245256

246257
// Range check
247-
if (num <= std::numeric_limits<T>::min() ||
248-
num >= std::numeric_limits<T>::max()) {
249-
codegen::RuntimeFunctions::ThrowOverflowException();
250-
__builtin_unreachable();
258+
if (static_cast<int64_t>(num) <= std::numeric_limits<T>::min() ||
259+
static_cast<int64_t>(num) >= std::numeric_limits<T>::max()) {
260+
goto overflow;
251261
}
252262

253263
// Done
254264
return static_cast<T>(num);
265+
266+
overflow:
267+
codegen::RuntimeFunctions::ThrowOverflowException();
268+
__builtin_unreachable();
269+
270+
invalid:
271+
codegen::RuntimeFunctions::ThrowInvalidInputStringException();
272+
__builtin_unreachable();
255273
}
256274

257275
} // namespace

src/include/optimizer/operators.h

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ class LogicalGet : public OperatorNode<LogicalGet> {
7373
class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
7474
public:
7575
static Operator make(oid_t get_id, ExternalFileFormat format,
76-
std::string file_name);
76+
std::string file_name, char delimiter, char quote,
77+
char escape);
7778

7879
bool operator==(const BaseOperatorNode &r) override;
7980

@@ -83,6 +84,9 @@ class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
8384
oid_t get_id;
8485
ExternalFileFormat format;
8586
std::string file_name;
87+
char delimiter;
88+
char quote;
89+
char escape;
8690
};
8791

8892
//===--------------------------------------------------------------------===//
@@ -330,14 +334,18 @@ class LogicalUpdate : public OperatorNode<LogicalUpdate> {
330334
class LogicalExportExternalFile
331335
: public OperatorNode<LogicalExportExternalFile> {
332336
public:
333-
static Operator make(ExternalFileFormat format, std::string file_name);
337+
static Operator make(ExternalFileFormat format, std::string file_name,
338+
char delimiter, char quote, char escape);
334339

335340
bool operator==(const BaseOperatorNode &r) override;
336341

337342
hash_t Hash() const override;
338343

339344
ExternalFileFormat format;
340345
std::string file_name;
346+
char delimiter;
347+
char quote;
348+
char escape;
341349
};
342350

343351
//===--------------------------------------------------------------------===//
@@ -410,7 +418,8 @@ class PhysicalIndexScan : public OperatorNode<PhysicalIndexScan> {
410418
class ExternalFileScan : public OperatorNode<ExternalFileScan> {
411419
public:
412420
static Operator make(oid_t get_id, ExternalFileFormat format,
413-
std::string file_name);
421+
std::string file_name, char delimiter, char quote,
422+
char escape);
414423

415424
bool operator==(const BaseOperatorNode &r) override;
416425

@@ -420,6 +429,9 @@ class ExternalFileScan : public OperatorNode<ExternalFileScan> {
420429
oid_t get_id;
421430
ExternalFileFormat format;
422431
std::string file_name;
432+
char delimiter;
433+
char quote;
434+
char escape;
423435
};
424436

425437
//===--------------------------------------------------------------------===//
@@ -617,14 +629,18 @@ class PhysicalUpdate : public OperatorNode<PhysicalUpdate> {
617629
class PhysicalExportExternalFile
618630
: public OperatorNode<PhysicalExportExternalFile> {
619631
public:
620-
static Operator make(ExternalFileFormat format, std::string file_name);
632+
static Operator make(ExternalFileFormat format, std::string file_name,
633+
char delimiter, char quote, char escape);
621634

622635
bool operator==(const BaseOperatorNode &r) override;
623636

624637
hash_t Hash() const override;
625638

626639
ExternalFileFormat format;
627640
std::string file_name;
641+
char delimiter;
642+
char quote;
643+
char escape;
628644
};
629645

630646
//===--------------------------------------------------------------------===//

src/include/parser/copy_statement.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,13 @@ class CopyStatement : public SQLStatement {
6262
std::string file_path;
6363

6464
// The format of the file
65-
ExternalFileFormat format;
65+
ExternalFileFormat format = ExternalFileFormat::CSV;
6666

6767
bool is_from;
6868

69-
char delimiter;
69+
char delimiter = ',';
70+
char quote = '"';
71+
char escape = '"';
7072
};
7173

7274
} // namespace parser

src/optimizer/operators.cpp

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,24 @@ bool LogicalGet::operator==(const BaseOperatorNode &r) {
6868
//===--------------------------------------------------------------------===//
6969

7070
Operator LogicalExternalFileGet::make(oid_t get_id, ExternalFileFormat format,
71-
std::string file_name) {
71+
std::string file_name, char delimiter,
72+
char quote, char escape) {
7273
auto *get = new LogicalExternalFileGet();
7374
get->get_id = get_id;
7475
get->format = format;
7576
get->file_name = std::move(file_name);
77+
get->delimiter = delimiter;
78+
get->quote = quote;
79+
get->escape = escape;
7680
return Operator(get);
7781
}
7882

7983
bool LogicalExternalFileGet::operator==(const BaseOperatorNode &node) {
8084
if (node.GetType() != OpType::LogicalExternalFileGet) return false;
8185
const auto &get = *static_cast<const LogicalExternalFileGet *>(&node);
8286
return (get_id == get.get_id && format == get.format &&
83-
file_name == get.file_name);
87+
file_name == get.file_name && delimiter == get.delimiter &&
88+
quote == get.quote && escape == get.escape);
8489
}
8590

8691
hash_t LogicalExternalFileGet::Hash() const {
@@ -89,6 +94,9 @@ hash_t LogicalExternalFileGet::Hash() const {
8994
hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
9095
hash = HashUtil::CombineHashes(
9196
hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
97+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
98+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
99+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
92100
return hash;
93101
}
94102

@@ -446,25 +454,34 @@ Operator LogicalLimit::make(int64_t offset, int64_t limit) {
446454
// External file output
447455
//===--------------------------------------------------------------------===//
448456
Operator LogicalExportExternalFile::make(ExternalFileFormat format,
449-
std::string file_name) {
457+
std::string file_name, char delimiter,
458+
char quote, char escape) {
450459
auto *export_op = new LogicalExportExternalFile();
451460
export_op->format = format;
452461
export_op->file_name = std::move(file_name);
462+
export_op->delimiter = delimiter;
463+
export_op->quote = quote;
464+
export_op->escape = escape;
453465
return Operator(export_op);
454466
}
455467

456468
bool LogicalExportExternalFile::operator==(const BaseOperatorNode &node) {
457469
if (node.GetType() != OpType::LogicalExportExternalFile) return false;
458470
const auto &export_op =
459471
*static_cast<const LogicalExportExternalFile *>(&node);
460-
return (format == export_op.format && file_name == export_op.file_name);
472+
return (format == export_op.format && file_name == export_op.file_name &&
473+
delimiter == export_op.delimiter && quote == export_op.quote &&
474+
escape == export_op.escape);
461475
}
462476

463477
hash_t LogicalExportExternalFile::Hash() const {
464478
hash_t hash = BaseOperatorNode::Hash();
465479
hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
466480
hash = HashUtil::CombineHashes(
467481
hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
482+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
483+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
484+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
468485
return hash;
469486
}
470487

@@ -567,19 +584,24 @@ hash_t PhysicalIndexScan::Hash() const {
567584
// Physical external file scan
568585
//===--------------------------------------------------------------------===//
569586
Operator ExternalFileScan::make(oid_t get_id, ExternalFileFormat format,
570-
std::string file_name) {
587+
std::string file_name, char delimiter,
588+
char quote, char escape) {
571589
auto *get = new ExternalFileScan();
572590
get->get_id = get_id;
573591
get->format = format;
574592
get->file_name = file_name;
593+
get->delimiter = delimiter;
594+
get->quote = quote;
595+
get->escape = escape;
575596
return Operator(get);
576597
}
577598

578599
bool ExternalFileScan::operator==(const BaseOperatorNode &node) {
579600
if (node.GetType() != OpType::QueryDerivedScan) return false;
580601
const auto &get = *static_cast<const ExternalFileScan *>(&node);
581602
return (get_id == get.get_id && format == get.format &&
582-
file_name == get.file_name);
603+
file_name == get.file_name && delimiter == get.delimiter &&
604+
quote == get.quote && escape == get.escape);
583605
}
584606

585607
hash_t ExternalFileScan::Hash() const {
@@ -588,6 +610,9 @@ hash_t ExternalFileScan::Hash() const {
588610
hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
589611
hash = HashUtil::CombineHashes(
590612
hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
613+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
614+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
615+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
591616
return hash;
592617
}
593618

@@ -845,25 +870,34 @@ Operator PhysicalUpdate::make(
845870
// PhysicalExportExternalFile
846871
//===--------------------------------------------------------------------===//
847872
Operator PhysicalExportExternalFile::make(ExternalFileFormat format,
848-
std::string file_name) {
873+
std::string file_name, char delimiter,
874+
char quote, char escape) {
849875
auto *export_op = new PhysicalExportExternalFile();
850876
export_op->format = format;
851877
export_op->file_name = file_name;
878+
export_op->delimiter = delimiter;
879+
export_op->quote = quote;
880+
export_op->escape = escape;
852881
return Operator(export_op);
853882
}
854883

855884
bool PhysicalExportExternalFile::operator==(const BaseOperatorNode &node) {
856885
if (node.GetType() != OpType::ExportExternalFile) return false;
857886
const auto &export_op =
858887
*static_cast<const PhysicalExportExternalFile *>(&node);
859-
return (format == export_op.format && file_name == export_op.file_name);
888+
return (format == export_op.format && file_name == export_op.file_name &&
889+
delimiter == export_op.delimiter && quote == export_op.quote &&
890+
escape == export_op.escape);
860891
}
861892

862893
hash_t PhysicalExportExternalFile::Hash() const {
863894
hash_t hash = BaseOperatorNode::Hash();
864895
hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
865896
hash = HashUtil::CombineHashes(
866897
hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
898+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
899+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
900+
hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
867901
return hash;
868902
}
869903

src/optimizer/plan_generator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ void PlanGenerator::Visit(const ExternalFileScan *op) {
143143

144144
// Create the plan
145145
output_plan_.reset(
146-
new planner::CSVScanPlan(op->file_name, std::move(cols)));
146+
new planner::CSVScanPlan(op->file_name, std::move(cols),
147+
op->delimiter, op->quote, op->escape));
147148
break;
148149
}
149150
}
@@ -388,7 +389,8 @@ void PlanGenerator::Visit(const PhysicalUpdate *op) {
388389

389390
void PlanGenerator::Visit(const PhysicalExportExternalFile *op) {
390391
unique_ptr<planner::AbstractPlan> export_plan{
391-
new planner::ExportExternalFilePlan(op->file_name)};
392+
new planner::ExportExternalFilePlan(op->file_name, op->delimiter,
393+
op->quote, op->escape)};
392394
export_plan->AddChild(move(children_plans_[0]));
393395
output_plan_ = move(export_plan);
394396
}

src/optimizer/query_to_operator_transformer.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,8 @@ void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
367367

368368
auto get_op =
369369
std::make_shared<OperatorExpression>(LogicalExternalFileGet::make(
370-
GetAndIncreaseGetId(), op->format, op->file_path));
370+
GetAndIncreaseGetId(), op->format, op->file_path, op->delimiter,
371+
op->quote, op->escape));
371372

372373
auto target_table =
373374
catalog::Catalog::GetInstance()
@@ -386,8 +387,9 @@ void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
386387
} else {
387388
op->table->Accept(this);
388389
}
389-
auto export_op = std::make_shared<OperatorExpression>(
390-
LogicalExportExternalFile::make(op->format, op->file_path));
390+
auto export_op =
391+
std::make_shared<OperatorExpression>(LogicalExportExternalFile::make(
392+
op->format, op->file_path, op->delimiter, op->quote, op->escape));
391393
export_op->PushChild(output_expr_);
392394
output_expr_ = export_op;
393395
}

src/optimizer/rule_impls.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ void LogicalExternalFileGetToPhysical::Transform(
455455
const auto *get = input->Op().As<LogicalExternalFileGet>();
456456

457457
auto result_plan = std::make_shared<OperatorExpression>(
458-
ExternalFileScan::make(get->get_id, get->format, get->file_name));
458+
ExternalFileScan::make(get->get_id, get->format, get->file_name,
459+
get->delimiter, get->quote, get->escape));
459460

460461
PELOTON_ASSERT(input->Children().empty());
461462

@@ -837,11 +838,12 @@ void LogicalExportToPhysicalExport::Transform(
837838
std::shared_ptr<OperatorExpression> input,
838839
std::vector<std::shared_ptr<OperatorExpression>> &transformed,
839840
UNUSED_ATTRIBUTE OptimizeContext *context) const {
840-
const auto *logical_export = input->Op().As<LogicalExportExternalFile>();
841+
const auto *export_op = input->Op().As<LogicalExportExternalFile>();
841842

842843
auto result_plan =
843844
std::make_shared<OperatorExpression>(PhysicalExportExternalFile::make(
844-
logical_export->format, logical_export->file_name));
845+
export_op->format, export_op->file_name, export_op->delimiter,
846+
export_op->quote, export_op->escape));
845847

846848
std::vector<std::shared_ptr<OperatorExpression>> children = input->Children();
847849
PELOTON_ASSERT(children.size() == 1);

0 commit comments

Comments
 (0)