diff --git a/script/validators/source_validator.py b/script/validators/source_validator.py index 331bdc7c688..ad70c24c7e5 100755 --- a/script/validators/source_validator.py +++ b/script/validators/source_validator.py @@ -58,12 +58,12 @@ "src/network/protocol.cpp", "src/include/common/macros.h", "src/common/stack_trace.cpp", - "src/include/parser/sql_scanner.h", # There is a free() in comments "src/include/index/bloom_filter.h", "src/include/index/compact_ints_key.h", "src/include/index/bwtree.h", "src/codegen/util/oa_hash_table.cpp", - "src/codegen/util/cc_hash_table.cpp" + "src/codegen/util/cc_hash_table.cpp", + "src/codegen/codegen.cpp", # We allow calling printf() from codegen for debugging ] ## ============================================== diff --git a/src/binder/bind_node_visitor.cpp b/src/binder/bind_node_visitor.cpp index a6ffe17b322..eec8a03c091 100644 --- a/src/binder/bind_node_visitor.cpp +++ b/src/binder/bind_node_visitor.cpp @@ -166,7 +166,19 @@ void BindNodeVisitor::Visit(parser::DeleteStatement *node) { } void BindNodeVisitor::Visit(parser::LimitDescription *) {} -void BindNodeVisitor::Visit(parser::CopyStatement *) {} + +void BindNodeVisitor::Visit(parser::CopyStatement *node) { + context_ = std::make_shared(nullptr); + if (node->table != nullptr) { + node->table->Accept(this); + + // If the table is given, we're either writing or reading all columns + context_->GenerateAllColumnExpressions(node->select_list); + } else { + node->select_stmt->Accept(this); + } +} + void BindNodeVisitor::Visit(parser::CreateFunctionStatement *) {} void BindNodeVisitor::Visit(parser::CreateStatement *node) { node->TryBindDatabaseName(default_database_name_); diff --git a/src/catalog/abstract_catalog.cpp b/src/catalog/abstract_catalog.cpp index 53c0b938279..9d9934a7c61 100644 --- a/src/catalog/abstract_catalog.cpp +++ b/src/catalog/abstract_catalog.cpp @@ -35,6 +35,7 @@ #include "executor/plan_executor.h" #include "executor/seq_scan_executor.h" #include "executor/update_executor.h" +#include "expression/constant_value_expression.h" #include "storage/database.h" #include "storage/storage_manager.h" diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 0759da7d42f..adc4f77b66e 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -30,7 +30,7 @@ #include "codegen/code_context.h" #include "concurrency/transaction_manager_factory.h" #include "function/date_functions.h" -#include "function/decimal_functions.h" +#include "function/numeric_functions.h" #include "function/old_engine_string_functions.h" #include "function/timestamp_functions.h" #include "index/index_factory.h" @@ -1283,43 +1283,43 @@ void Catalog::InitializeFunctions() { AddBuiltinFunction("abs", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang, "Abs", function::BuiltInFuncType{ - OperatorId::Abs, function::DecimalFunctions::_Abs}, + OperatorId::Abs, function::NumericFunctions::_Abs}, txn); AddBuiltinFunction( "sqrt", {type::TypeId::TINYINT}, type::TypeId::DECIMAL, internal_lang, "Sqrt", function::BuiltInFuncType{OperatorId::Sqrt, - function::DecimalFunctions::Sqrt}, + function::NumericFunctions::Sqrt}, txn); AddBuiltinFunction( "sqrt", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL, internal_lang, "Sqrt", function::BuiltInFuncType{OperatorId::Sqrt, - function::DecimalFunctions::Sqrt}, + function::NumericFunctions::Sqrt}, txn); AddBuiltinFunction( "sqrt", {type::TypeId::INTEGER}, type::TypeId::DECIMAL, internal_lang, "Sqrt", function::BuiltInFuncType{OperatorId::Sqrt, - function::DecimalFunctions::Sqrt}, + function::NumericFunctions::Sqrt}, txn); AddBuiltinFunction( "sqrt", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang, "Sqrt", function::BuiltInFuncType{OperatorId::Sqrt, - function::DecimalFunctions::Sqrt}, + function::NumericFunctions::Sqrt}, txn); AddBuiltinFunction( "sqrt", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang, "Sqrt", function::BuiltInFuncType{OperatorId::Sqrt, - function::DecimalFunctions::Sqrt}, + function::NumericFunctions::Sqrt}, txn); AddBuiltinFunction( "floor", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang, "Floor", function::BuiltInFuncType{OperatorId::Floor, - function::DecimalFunctions::_Floor}, + function::NumericFunctions::_Floor}, txn); /** @@ -1328,126 +1328,126 @@ void Catalog::InitializeFunctions() { AddBuiltinFunction("abs", {type::TypeId::TINYINT}, type::TypeId::TINYINT, internal_lang, "Abs", function::BuiltInFuncType{ - OperatorId::Abs, function::DecimalFunctions::_Abs}, + OperatorId::Abs, function::NumericFunctions::_Abs}, txn); AddBuiltinFunction("abs", {type::TypeId::SMALLINT}, type::TypeId::SMALLINT, internal_lang, "Abs", function::BuiltInFuncType{ - OperatorId::Abs, function::DecimalFunctions::_Abs}, + OperatorId::Abs, function::NumericFunctions::_Abs}, txn); AddBuiltinFunction("abs", {type::TypeId::INTEGER}, type::TypeId::INTEGER, internal_lang, "Abs", function::BuiltInFuncType{ - OperatorId::Abs, function::DecimalFunctions::_Abs}, + OperatorId::Abs, function::NumericFunctions::_Abs}, txn); AddBuiltinFunction("abs", {type::TypeId::BIGINT}, type::TypeId::BIGINT, internal_lang, "Abs", function::BuiltInFuncType{ - OperatorId::Abs, function::DecimalFunctions::_Abs}, + OperatorId::Abs, function::NumericFunctions::_Abs}, txn); AddBuiltinFunction( "floor", {type::TypeId::INTEGER}, type::TypeId::DECIMAL, internal_lang, "Floor", function::BuiltInFuncType{OperatorId::Floor, - function::DecimalFunctions::_Floor}, + function::NumericFunctions::_Floor}, txn); AddBuiltinFunction( "floor", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang, "Floor", function::BuiltInFuncType{OperatorId::Floor, - function::DecimalFunctions::_Floor}, + function::NumericFunctions::_Floor}, txn); AddBuiltinFunction( "floor", {type::TypeId::TINYINT}, type::TypeId::DECIMAL, internal_lang, "Floor", function::BuiltInFuncType{OperatorId::Floor, - function::DecimalFunctions::_Floor}, + function::NumericFunctions::_Floor}, txn); AddBuiltinFunction( "floor", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL, internal_lang, "Floor", function::BuiltInFuncType{OperatorId::Floor, - function::DecimalFunctions::_Floor}, + function::NumericFunctions::_Floor}, txn); AddBuiltinFunction( "round", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang, "Round", function::BuiltInFuncType{OperatorId::Round, - function::DecimalFunctions::_Round}, + function::NumericFunctions::_Round}, txn); AddBuiltinFunction( "ceil", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceil", {type::TypeId::TINYINT}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceil", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceil", {type::TypeId::INTEGER}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceil", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceiling", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceiling", {type::TypeId::TINYINT}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceiling", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceiling", {type::TypeId::INTEGER}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); AddBuiltinFunction( "ceiling", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang, "Ceil", function::BuiltInFuncType{OperatorId::Ceil, - function::DecimalFunctions::_Ceil}, + function::NumericFunctions::_Ceil}, txn); /** diff --git a/src/codegen/buffering_consumer.cpp b/src/codegen/buffering_consumer.cpp index 1edf1096b00..7316b8261f9 100644 --- a/src/codegen/buffering_consumer.cpp +++ b/src/codegen/buffering_consumer.cpp @@ -40,6 +40,15 @@ WrappedTuple &WrappedTuple::operator=(const WrappedTuple &o) { return *this; } +std::string WrappedTuple::ToCSV() const { + std::string ret; + for (uint32_t i = 0; i < tuple_.size(); i++) { + if (i != 0) ret.append(","); + ret.append(tuple_[i].ToString()); + } + return ret; +} + //===----------------------------------------------------------------------===// // BufferTuple() Proxy //===----------------------------------------------------------------------===// diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp index 251a4edd8bf..b810fd4c092 100644 --- a/src/codegen/codegen.cpp +++ b/src/codegen/codegen.cpp @@ -59,12 +59,30 @@ llvm::Constant *CodeGen::ConstDouble(double val) const { return llvm::ConstantFP::get(DoubleType(), val); } -llvm::Constant *CodeGen::ConstString(const std::string &s) const { +llvm::Value *CodeGen::ConstString(const std::string &str_val, + const std::string &name) const { // Strings are treated as arrays of bytes - auto *str = llvm::ConstantDataArray::getString(GetContext(), s); - return new llvm::GlobalVariable(GetModule(), str->getType(), true, - llvm::GlobalValue::InternalLinkage, str, - "str"); + auto *str = llvm::ConstantDataArray::getString(GetContext(), str_val); + auto *global_var = + new llvm::GlobalVariable(GetModule(), str->getType(), true, + llvm::GlobalValue::InternalLinkage, str, name); + return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)}); +} + +llvm::Value *CodeGen::ConstGenericBytes(const void *data, uint32_t length, + const std::string &name) const { + // Create the constant data array that wraps the input data + llvm::ArrayRef elements{reinterpret_cast(data), + length}; + auto *arr = llvm::ConstantDataArray::get(GetContext(), elements); + + // Create a global variable for the data + auto *global_var = + new llvm::GlobalVariable(GetModule(), arr->getType(), true, + llvm::GlobalValue::InternalLinkage, arr, name); + + // Return a pointer to the first element + return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)}); } llvm::Constant *CodeGen::Null(llvm::Type *type) const { @@ -75,11 +93,6 @@ llvm::Constant *CodeGen::NullPtr(llvm::PointerType *type) const { return llvm::ConstantPointerNull::get(type); } -llvm::Value *CodeGen::ConstStringPtr(const std::string &s) const { - auto &ir_builder = GetBuilder(); - return ir_builder.CreateConstInBoundsGEP2_32(nullptr, ConstString(s), 0, 0); -} - llvm::Value *CodeGen::AllocateVariable(llvm::Type *type, const std::string &name) { // To allocate a variable, a function must be under construction @@ -135,26 +148,68 @@ llvm::Value *CodeGen::CallFunc(llvm::Value *fn, return GetBuilder().CreateCall(fn, args); } -llvm::Value *CodeGen::CallPrintf(const std::string &format, - const std::vector &args) { +llvm::Value *CodeGen::Printf(const std::string &format, + const std::vector &args) { auto *printf_fn = LookupBuiltin("printf"); if (printf_fn == nullptr) { +#if GCC_AT_LEAST_6 +// In newer GCC versions (i.e., GCC 6+), function attributes are part of the +// type system and are attached to the function signature. For example, printf() +// comes with the "noexcept" attribute. Moreover, GCC 6+ will complain when +// attributes attached to a function (e.g., noexcept()) are not used at +// their call-site. Below, we use decltype(printf) to get the C/C++ function +// type of printf(...), but we discard the attributes since we don't need +// them. Hence, on GCC 6+, compilation will fail without adding the +// "-Wignored-attributes" flag. So, we add it here only. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif printf_fn = RegisterBuiltin( - "printf", llvm::TypeBuilder::get(GetContext()), + "printf", llvm::TypeBuilder::get(GetContext()), reinterpret_cast(printf)); +#if GCC_AT_LEAST_6 +#pragma GCC diagnostic pop +#endif } - auto &ir_builder = code_context_.GetBuilder(); - auto *format_str = - ir_builder.CreateGEP(ConstString(format), {Const32(0), Const32(0)}); // Collect all the arguments into a vector - std::vector printf_args{format_str}; + std::vector printf_args = {ConstString(format, "format")}; printf_args.insert(printf_args.end(), args.begin(), args.end()); - // Call the function + // Call printf() return CallFunc(printf_fn, printf_args); } +llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2, + llvm::Value *len) { + static constexpr char kMemcmpFnName[] = "memcmp"; + auto *memcmp_fn = LookupBuiltin(kMemcmpFnName); + if (memcmp_fn == nullptr) { +#if GCC_AT_LEAST_6 +// In newer GCC versions (i.e., GCC 6+), function attributes are part of the +// type system and are attached to the function signature. For example, memcmp() +// comes with the "throw()" attribute, among many others. Moreover, GCC 6+ will +// complain when attributes attached to a function are not used at their +// call-site. Below, we use decltype(memcmp) to get the C/C++ function type +// of memcmp(...), but we discard the attributes since we don't need them. +// Hence, on GCC 6+, compilation will fail without adding the +// "-Wignored-attributes" flag. So, we add it here only. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + memcmp_fn = RegisterBuiltin( + kMemcmpFnName, + llvm::TypeBuilder::get(GetContext()), + reinterpret_cast(memcmp)); +#if GCC_AT_LEAST_6 +#pragma GCC diagnostic pop +#endif + } + + // Call memcmp() + return CallFunc(memcmp_fn, {ptr1, ptr2, len}); +} + llvm::Value *CodeGen::Sqrt(llvm::Value *val) { llvm::Function *sqrt_func = llvm::Intrinsic::getDeclaration( &GetModule(), llvm::Intrinsic::sqrt, val->getType()); diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp new file mode 100644 index 00000000000..8084fca9bb7 --- /dev/null +++ b/src/codegen/operator/csv_scan_translator.cpp @@ -0,0 +1,307 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scan_translator.cpp +// +// Identification: src/codegen/operator/csv_scan_translator.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/operator/csv_scan_translator.h" + +#include "codegen/compilation_context.h" +#include "codegen/function_builder.h" +#include "codegen/lang/if.h" +#include "codegen/operator/projection_translator.h" +#include "codegen/pipeline.h" +#include "codegen/proxy/csv_scanner_proxy.h" +#include "codegen/proxy/runtime_functions_proxy.h" +#include "codegen/type/sql_type.h" +#include "codegen/vector.h" +#include "planner/csv_scan_plan.h" + +namespace peloton { +namespace codegen { + +CSVScanTranslator::CSVScanTranslator(const planner::CSVScanPlan &scan, + CompilationContext &context, + Pipeline &pipeline) + : OperatorTranslator(scan, context, pipeline) { + // Register the CSV scanner instance + auto &query_state = context.GetQueryState(); + scanner_id_ = query_state.RegisterState( + "csvScanner", CSVScannerProxy::GetType(GetCodeGen())); + + // Load information about the attributes output by the scan plan + scan.GetAttributes(output_attributes_); +} + +void CSVScanTranslator::InitializeQueryState() { + auto &codegen = GetCodeGen(); + + auto &scan = GetPlanAs(); + + // Arguments + llvm::Value *scanner_ptr = LoadStatePtr(scanner_id_); + llvm::Value *exec_ctx_ptr = GetExecutorContextPtr(); + llvm::Value *file_path = codegen.ConstString(scan.GetFileName(), "filePath"); + + auto num_cols = static_cast(output_attributes_.size()); + + // We need to generate an array of type::Type. To do so, we construct a vector + // of the types of the output columns, and we create an LLVM constant that is + // a copy of the underlying bytes. + + std::vector col_types_vec; + col_types_vec.reserve(num_cols); + for (const auto *ai : output_attributes_) { + col_types_vec.push_back(ai->type); + } + llvm::Value *raw_col_type_bytes = codegen.ConstGenericBytes( + col_types_vec.data(), static_cast(col_types_vec.capacity()), + "colTypes"); + llvm::Value *output_col_types = codegen->CreatePointerCast( + raw_col_type_bytes, TypeProxy::GetType(codegen)->getPointerTo()); + + // Now create a pointer to the consumer function + using ConsumerFuncType = void (*)(void *); + llvm::Value *consumer_func = codegen->CreatePointerCast( + consumer_func_, proxy::TypeBuilder::GetType(codegen)); + + // Cast the runtime type to an opaque void*. This is because we're calling + // into pre-compiled C++ that doesn't know that the dynamically generated + // RuntimeState* looks like. + llvm::Value *query_state_ptr = + codegen->CreatePointerCast(codegen.GetState(), codegen.VoidPtrType()); + + // Call CSVScanner::Init() + codegen.Call(CSVScannerProxy::Init, + {scanner_ptr, exec_ctx_ptr, file_path, output_col_types, + codegen.Const32(num_cols), consumer_func, query_state_ptr, + codegen.Const8(scan.GetDelimiterChar()), + codegen.Const8(scan.GetQuoteChar()), + codegen.Const8(scan.GetEscapeChar())}); +} + +namespace { + +/** + * This is a deferred column access class configured to load the contents of a + * given column. + */ +class CSVColumnAccess : public RowBatch::AttributeAccess { + public: + CSVColumnAccess(const planner::AttributeInfo *ai, llvm::Value *csv_columns, + std::string null_str, llvm::Value *runtime_null_str) + : ai_(ai), + csv_columns_(csv_columns), + null_str_(std::move(null_str)), + runtime_null_(runtime_null_str) {} + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Accessors + /// + ////////////////////////////////////////////////////////////////////////////// + + llvm::Value *Columns() const { return csv_columns_; } + + uint32_t ColumnIndex() const { return ai_->attribute_id; } + + bool IsNullable() const { return ai_->type.nullable; } + + const type::SqlType &SqlType() const { return ai_->type.GetSqlType(); } + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Logic + /// + ////////////////////////////////////////////////////////////////////////////// + + /** + * Check if a column's value is considered NULL. Given a pointer to the + * column's string value, and the length of the string, this function will + * check if the column's value is determined to be NULL. This is done by + * comparing the column's contents with the NULL string configured in the + * CSV scan plan (i.e., provided by the user). + * + * @param codegen The codegen instance + * @param data_ptr A pointer to the column's string value + * @param data_len The length of the column's string value + * @return True if the column is equivalent to the NULL string. False + * otherwise. + */ + llvm::Value *IsNull(CodeGen &codegen, llvm::Value *data_ptr, + llvm::Value *data_len) const { + uint32_t null_str_len = static_cast(null_str_.length()); + + // Is the length of the column value the same as the NULL string? + llvm::Value *eq_len = + codegen->CreateICmpEQ(data_len, codegen.Const32(null_str_len)); + + // If the null string is empty, generate simple comparison + if (null_str_len == 0) { + return eq_len; + } + + llvm::Value *cmp_res; + lang::If check_null{codegen, eq_len}; + { + // Do a memcmp against the NULL string + cmp_res = codegen.Memcmp(data_ptr, runtime_null_, + codegen.Const64(null_str_.length())); + cmp_res = codegen->CreateICmpEQ(cmp_res, codegen.Const32(0)); + } + check_null.EndIf(); + return check_null.BuildPHI(cmp_res, codegen.ConstBool(false)); + } + + /** + * Load the value of the given column with the given type, ignoring a null + * check. + * + * @param codegen The codegen instance + * @param type The SQL type of the column + * @param data_ptr A pointer to the column's string representation + * @param data_len The length of the column's string representation + * @return The parsed value + */ + Value LoadValueIgnoreNull(CodeGen &codegen, llvm::Value *type, + llvm::Value *data_ptr, + llvm::Value *data_len) const { + auto *input_func = SqlType().GetInputFunction(codegen, ai_->type); + auto *raw_val = codegen.CallFunc(input_func, {type, data_ptr, data_len}); + if (SqlType().IsVariableLength()) { + // StrWithLen + llvm::Value *str_ptr = codegen->CreateExtractValue(raw_val, 0); + llvm::Value *str_len = codegen->CreateExtractValue(raw_val, 1); + return codegen::Value{ai_->type, str_ptr, str_len, + codegen.ConstBool(false)}; + } else { + return codegen::Value{ai_->type, raw_val, nullptr, + codegen.ConstBool(false)}; + } + } + + /** + * Access this column in the given row. In reality, this function pulls out + * the column information from the CSVScanner state and loads/parses the + * column's value. + * + * @param codegen The codegen instance + * @param row The row. This isn't used. + * @return The value of the column + */ + Value Access(CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override { + // Load the type, data pointer and length values for the column + auto *type = codegen->CreateConstInBoundsGEP2_32( + CSVScannerColumnProxy::GetType(codegen), Columns(), ColumnIndex(), 0); + auto *data_ptr = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32( + CSVScannerColumnProxy::GetType(codegen), Columns(), ColumnIndex(), 1)); + auto *data_len = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32( + CSVScannerColumnProxy::GetType(codegen), Columns(), ColumnIndex(), 2)); + + // If the valid isn't NULLable, avoid the null check here + if (!IsNullable()) { + return LoadValueIgnoreNull(codegen, type, data_ptr, data_len); + } + + // If the value isn't actually null, try to parse it + codegen::Value valid_val, null_val; + lang::If is_null{codegen, + codegen->CreateNot(IsNull(codegen, data_ptr, data_len))}; + { + // Load valid + valid_val = LoadValueIgnoreNull(codegen, type, data_ptr, data_len); + } + is_null.ElseBlock(); + { + // Default null + null_val = SqlType().GetNullValue(codegen); + } + is_null.EndIf(); + + // Return + return is_null.BuildPHI(valid_val, null_val); + } + + private: + // Information about the attribute + const planner::AttributeInfo *ai_; + + // A pointer to the array of columns + llvm::Value *csv_columns_; + + // The NULL string configured for the CSV scan + const std::string null_str_; + + // The runtime NULL string (a constant in LLVM) + llvm::Value *runtime_null_; +}; + +} // namespace + +// We define the callback/consumer function for CSV parsing here +void CSVScanTranslator::DefineAuxiliaryFunctions() { + CodeGen &codegen = GetCodeGen(); + CompilationContext &cc = GetCompilationContext(); + + auto &scan = GetPlanAs(); + + // Define consumer function here + std::vector arg_types = { + {"queryState", cc.GetQueryState().GetType()->getPointerTo()}}; + FunctionDeclaration decl{codegen.GetCodeContext(), "consumer", + FunctionDeclaration::Visibility::Internal, + codegen.VoidType(), arg_types}; + FunctionBuilder scan_consumer{codegen.GetCodeContext(), decl}; + { + ConsumerContext ctx{cc, GetPipeline()}; + + Vector v{nullptr, 1, nullptr}; + RowBatch one{GetCompilationContext(), codegen.Const32(0), + codegen.Const32(1), v, false}; + + // Load the pointer to the columns view + llvm::Value *cols = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32( + CSVScannerProxy::GetType(codegen), LoadStatePtr(scanner_id_), 0, 1)); + + llvm::Value *null_str = codegen.ConstString(scan.GetNullString(), "null"); + + // Add accessors for all columns into the row batch + std::vector column_accessors; + for (uint32_t i = 0; i < output_attributes_.size(); i++) { + column_accessors.emplace_back(output_attributes_[i], cols, + scan.GetNullString(), null_str); + } + for (uint32_t i = 0; i < output_attributes_.size(); i++) { + one.AddAttribute(output_attributes_[i], &column_accessors[i]); + } + + // Push the row through the pipeline + RowBatch::Row row{one, nullptr, nullptr}; + ctx.Consume(row); + + // Done + scan_consumer.ReturnAndFinish(); + } + + // The consumer function has been generated. Get a pointer to it now. + consumer_func_ = scan_consumer.GetFunction(); +} + +void CSVScanTranslator::Produce() const { + auto *scanner_ptr = LoadStatePtr(scanner_id_); + GetCodeGen().Call(CSVScannerProxy::Produce, {scanner_ptr}); +} + +void CSVScanTranslator::TearDownQueryState() { + auto *scanner_ptr = LoadStatePtr(scanner_id_); + GetCodeGen().Call(CSVScannerProxy::Destroy, {scanner_ptr}); +} + +} // namespace codegen +} // namespace peloton diff --git a/src/codegen/proxy/csv_scanner_proxy.cpp b/src/codegen/proxy/csv_scanner_proxy.cpp new file mode 100644 index 00000000000..c13914fbecd --- /dev/null +++ b/src/codegen/proxy/csv_scanner_proxy.cpp @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scanner_proxy.cpp +// +// Identification: src/codegen/proxy/csv_scanner_proxy.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/proxy/csv_scanner_proxy.h" + +#include "codegen/proxy/executor_context_proxy.h" +#include "codegen/proxy/runtime_functions_proxy.h" + +namespace peloton { +namespace codegen { + +DEFINE_TYPE(CSVScanner, "util::CSVScanner", opaque1, cols, opaque2); + +DEFINE_TYPE(CSVScannerColumn, "util::CSVScanner::Column", type, ptr, len, + is_null); + +DEFINE_METHOD(peloton::codegen::util, CSVScanner, Init); +DEFINE_METHOD(peloton::codegen::util, CSVScanner, Destroy); +DEFINE_METHOD(peloton::codegen::util, CSVScanner, Produce); + +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/codegen/proxy/date_functions_proxy.cpp b/src/codegen/proxy/date_functions_proxy.cpp index de8f030ef4f..7bce9276f56 100644 --- a/src/codegen/proxy/date_functions_proxy.cpp +++ b/src/codegen/proxy/date_functions_proxy.cpp @@ -6,19 +6,24 @@ // // Identification: src/codegen/proxy/date_functions_proxy.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "codegen/proxy/date_functions_proxy.h" +#include "codegen/proxy/runtime_functions_proxy.h" #include "codegen/proxy/type_builder.h" #include "function/date_functions.h" namespace peloton { namespace codegen { +// Utility functions DEFINE_METHOD(peloton::function, DateFunctions, Now); +// Input functions +DEFINE_METHOD(peloton::function, DateFunctions, InputDate); + } // namespace codegen } // namespace peloton diff --git a/src/codegen/proxy/decimal_functions_proxy.cpp b/src/codegen/proxy/decimal_functions_proxy.cpp deleted file mode 100644 index 4cbc6d05640..00000000000 --- a/src/codegen/proxy/decimal_functions_proxy.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// decimal_functions_proxy.cpp -// -// Identification: src/codegen/proxy/decimal_functions_proxy.cpp -// -// Copyright (c) 2015-2018, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "codegen/proxy/decimal_functions_proxy.h" - -#include "codegen/proxy/type_builder.h" -#include "function/decimal_functions.h" - -namespace peloton { -namespace codegen { - -DEFINE_METHOD(peloton::function, DecimalFunctions, Abs); - -DEFINE_METHOD(peloton::function, DecimalFunctions, Floor); - -DEFINE_METHOD(peloton::function, DecimalFunctions, Round); - -DEFINE_METHOD(peloton::function, DecimalFunctions, Ceil); - -} // namespace codegen -} // namespace peloton diff --git a/src/codegen/proxy/numeric_functions_proxy.cpp b/src/codegen/proxy/numeric_functions_proxy.cpp new file mode 100644 index 00000000000..133917b668d --- /dev/null +++ b/src/codegen/proxy/numeric_functions_proxy.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// numeric_functions_proxy.cpp +// +// Identification: src/codegen/proxy/numeric_functions_proxy.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/proxy/numeric_functions_proxy.h" + +#include "codegen/proxy/runtime_functions_proxy.h" +#include "codegen/proxy/type_builder.h" +#include "function/numeric_functions.h" + +namespace peloton { +namespace codegen { + +// Utility functions +DEFINE_METHOD(peloton::function, NumericFunctions, Abs); +DEFINE_METHOD(peloton::function, NumericFunctions, Floor); +DEFINE_METHOD(peloton::function, NumericFunctions, Round); +DEFINE_METHOD(peloton::function, NumericFunctions, Ceil); + +// Input functions +DEFINE_METHOD(peloton::function, NumericFunctions, InputBoolean); +DEFINE_METHOD(peloton::function, NumericFunctions, InputTinyInt); +DEFINE_METHOD(peloton::function, NumericFunctions, InputSmallInt); +DEFINE_METHOD(peloton::function, NumericFunctions, InputInteger); +DEFINE_METHOD(peloton::function, NumericFunctions, InputBigInt); +DEFINE_METHOD(peloton::function, NumericFunctions, InputDecimal); + +} // namespace codegen +} // namespace peloton diff --git a/src/codegen/proxy/runtime_functions_proxy.cpp b/src/codegen/proxy/runtime_functions_proxy.cpp index b406a50fcca..652d1ba2e08 100644 --- a/src/codegen/proxy/runtime_functions_proxy.cpp +++ b/src/codegen/proxy/runtime_functions_proxy.cpp @@ -26,6 +26,8 @@ DEFINE_TYPE(ColumnLayoutInfo, "peloton::ColumnLayoutInfo", col_start_ptr, DEFINE_TYPE(AbstractExpression, "peloton::expression::AbstractExpression", opaque); +DEFINE_TYPE(Type, "peloton::Type", opaque); + DEFINE_METHOD(peloton::codegen, RuntimeFunctions, HashMurmur3); DEFINE_METHOD(peloton::codegen, RuntimeFunctions, HashCrc64); DEFINE_METHOD(peloton::codegen, RuntimeFunctions, GetTileGroup); diff --git a/src/codegen/proxy/string_functions_proxy.cpp b/src/codegen/proxy/string_functions_proxy.cpp index 32e25ccc0e1..bc0a6bce6fd 100644 --- a/src/codegen/proxy/string_functions_proxy.cpp +++ b/src/codegen/proxy/string_functions_proxy.cpp @@ -13,6 +13,8 @@ #include "codegen/proxy/string_functions_proxy.h" #include "codegen/proxy/executor_context_proxy.h" +#include "codegen/proxy/pool_proxy.h" +#include "codegen/proxy/runtime_functions_proxy.h" namespace peloton { namespace codegen { @@ -20,18 +22,18 @@ namespace codegen { // StrWithLen struct DEFINE_TYPE(StrWithLen, "peloton::StrWithLen", str, length); -// String Function DEFINE_METHOD(peloton::function, StringFunctions, Ascii); DEFINE_METHOD(peloton::function, StringFunctions, Like); DEFINE_METHOD(peloton::function, StringFunctions, Length); -DEFINE_METHOD(peloton::function, StringFunctions, Substr); -DEFINE_METHOD(peloton::function, StringFunctions, Repeat); - -// Trim-related functions DEFINE_METHOD(peloton::function, StringFunctions, BTrim); DEFINE_METHOD(peloton::function, StringFunctions, Trim); DEFINE_METHOD(peloton::function, StringFunctions, LTrim); DEFINE_METHOD(peloton::function, StringFunctions, RTrim); +DEFINE_METHOD(peloton::function, StringFunctions, Substr); +DEFINE_METHOD(peloton::function, StringFunctions, Repeat); +DEFINE_METHOD(peloton::function, StringFunctions, CompareStrings); +DEFINE_METHOD(peloton::function, StringFunctions, WriteString); +DEFINE_METHOD(peloton::function, StringFunctions, InputString); } // namespace codegen } // namespace peloton diff --git a/src/codegen/proxy/tuple_runtime_proxy.cpp b/src/codegen/proxy/tuple_runtime_proxy.cpp deleted file mode 100644 index 128c938f522..00000000000 --- a/src/codegen/proxy/tuple_runtime_proxy.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// tuple_runtime_proxy.cpp -// -// Identification: src/codegen/proxy/tuple_runtime_proxy.cpp -// -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "codegen/proxy/tuple_runtime_proxy.h" - -#include "codegen/tuple_runtime.h" -#include "codegen/proxy/pool_proxy.h" - -namespace peloton { -namespace codegen { - -DEFINE_METHOD(peloton::codegen, TupleRuntime, CreateVarlen); - -} // namespace codegen -} // namespace peloton diff --git a/src/codegen/proxy/values_runtime_proxy.cpp b/src/codegen/proxy/values_runtime_proxy.cpp index 85f866e74f8..0c30ef1d4ac 100644 --- a/src/codegen/proxy/values_runtime_proxy.cpp +++ b/src/codegen/proxy/values_runtime_proxy.cpp @@ -6,13 +6,15 @@ // // Identification: src/codegen/proxy/values_runtime_proxy.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/proxy/value_proxy.h" +#include "codegen/proxy/runtime_functions_proxy.h" +#include "codegen/proxy/pool_proxy.h" namespace peloton { namespace codegen { @@ -27,7 +29,6 @@ DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputTimestamp); DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputDecimal); DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputVarchar); DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputVarbinary); -DEFINE_METHOD(peloton::codegen, ValuesRuntime, CompareStrings); } // namespace codegen } // namespace peloton diff --git a/src/codegen/query_compiler.cpp b/src/codegen/query_compiler.cpp index 104e4f5783a..d6aa9912d51 100644 --- a/src/codegen/query_compiler.cpp +++ b/src/codegen/query_compiler.cpp @@ -6,7 +6,7 @@ // // Identification: src/codegen/query_compiler.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -46,6 +46,7 @@ std::unique_ptr QueryCompiler::Compile( bool QueryCompiler::IsSupported(const planner::AbstractPlan &plan) { switch (plan.GetPlanNodeType()) { case PlanNodeType::SEQSCAN: + case PlanNodeType::CSVSCAN: case PlanNodeType::ORDERBY: case PlanNodeType::DELETE: case PlanNodeType::INSERT: diff --git a/src/codegen/runtime_functions.cpp b/src/codegen/runtime_functions.cpp index 23dc1eec6c6..2bebfc150a6 100644 --- a/src/codegen/runtime_functions.cpp +++ b/src/codegen/runtime_functions.cpp @@ -255,5 +255,9 @@ void RuntimeFunctions::ThrowOverflowException() { throw std::overflow_error("ERROR: overflow"); } +void RuntimeFunctions::ThrowInvalidInputStringException() { + throw std::runtime_error("ERROR: invalid input string"); +} + } // namespace codegen } // namespace peloton \ No newline at end of file diff --git a/src/codegen/table_storage.cpp b/src/codegen/table_storage.cpp index 198c7df9f2a..e4240c6f7bc 100644 --- a/src/codegen/table_storage.cpp +++ b/src/codegen/table_storage.cpp @@ -6,7 +6,7 @@ // // Identification: src/codegen/table_storage.cpp // -// Copyright (c) 2015-17, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -14,7 +14,7 @@ #include "catalog/schema.h" #include "codegen/lang/if.h" -#include "codegen/proxy/tuple_runtime_proxy.h" +#include "codegen/proxy/string_functions_proxy.h" #include "codegen/type/sql_type.h" #include "codegen/type/type.h" #include "codegen/value.h" @@ -49,7 +49,7 @@ void TableStorage::StoreValues(CodeGen &codegen, llvm::Value *tuple_ptr, } value_is_null.ElseBlock(); { - codegen.Call(TupleRuntimeProxy::CreateVarlen, + codegen.Call(StringFunctionsProxy::WriteString, {value.GetValue(), value.GetLength(), val_ptr, pool}); } value_is_null.EndIf(); diff --git a/src/codegen/translator_factory.cpp b/src/codegen/translator_factory.cpp index 6fe1f65fce6..15b9dab7e7a 100644 --- a/src/codegen/translator_factory.cpp +++ b/src/codegen/translator_factory.cpp @@ -23,6 +23,7 @@ #include "codegen/expression/parameter_translator.h" #include "codegen/expression/tuple_value_translator.h" #include "codegen/operator/block_nested_loop_join_translator.h" +#include "codegen/operator/csv_scan_translator.h" #include "codegen/operator/delete_translator.h" #include "codegen/operator/global_group_by_translator.h" #include "codegen/operator/hash_group_by_translator.h" @@ -37,10 +38,12 @@ #include "expression/case_expression.h" #include "expression/comparison_expression.h" #include "expression/conjunction_expression.h" +#include "expression/constant_value_expression.h" #include "expression/function_expression.h" #include "expression/operator_expression.h" #include "expression/tuple_value_expression.h" #include "planner/aggregate_plan.h" +#include "planner/csv_scan_plan.h" #include "planner/delete_plan.h" #include "planner/hash_join_plan.h" #include "planner/hash_plan.h" @@ -67,6 +70,11 @@ std::unique_ptr TranslatorFactory::CreateTranslator( translator = new TableScanTranslator(scan, context, pipeline); break; } + case PlanNodeType::CSVSCAN: { + auto &scan = static_cast(plan_node); + translator = new CSVScanTranslator(scan, context, pipeline); + break; + } case PlanNodeType::PROJECTION: { auto &projection = static_cast(plan_node); diff --git a/src/codegen/tuple_runtime.cpp b/src/codegen/tuple_runtime.cpp deleted file mode 100644 index d065feed5d9..00000000000 --- a/src/codegen/tuple_runtime.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// tuple_runtime.cpp -// -// Identification: src/codegen/tuple_runtime.cpp -// -// Copyright (c) 2015-17, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "codegen/tuple_runtime.h" -#include "type/abstract_pool.h" - -namespace peloton { -namespace codegen { - -void TupleRuntime::CreateVarlen(char *data, uint32_t len, char *buf, - peloton::type::AbstractPool *pool) { - struct varlen_t { - uint32_t len; - char data[0]; - }; - - auto *area = - reinterpret_cast(pool->Allocate(sizeof(uint32_t) + len)); - area->len = len; - PELOTON_MEMCPY(area->data, data, len); - - *reinterpret_cast(buf) = area; -} - -} // namespace codegen -} // namespace peloton diff --git a/src/codegen/type/array_type.cpp b/src/codegen/type/array_type.cpp index b99daa2a4ac..f9e6e49a677 100644 --- a/src/codegen/type/array_type.cpp +++ b/src/codegen/type/array_type.cpp @@ -61,9 +61,8 @@ static std::vector kNoArgOperatorTable = {}; Array::Array() : SqlType(peloton::type::TypeId::ARRAY), type_system_(kImplicitCastingTable, kExplicitCastingTable, - kComparisonTable, kUnaryOperatorTable, - kBinaryOperatorTable, kNaryOperatorTable, - kNoArgOperatorTable) {} + kComparisonTable, kUnaryOperatorTable, kBinaryOperatorTable, + kNaryOperatorTable, kNoArgOperatorTable) {} Value Array::GetMinValue(UNUSED_ATTRIBUTE CodeGen &codegen) const { throw Exception{"Arrays don't have minimum values ...."}; @@ -86,6 +85,12 @@ void Array::GetTypeForMaterialization( "Arrays currently do not have a materialization format. Fix me."}; } +llvm::Function *Array::GetInputFunction( + UNUSED_ATTRIBUTE CodeGen &codegen, + UNUSED_ATTRIBUTE const Type &type) const { + throw NotImplementedException{"Array's can't be input ... for now ..."}; +} + llvm::Function *Array::GetOutputFunction( UNUSED_ATTRIBUTE CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { diff --git a/src/codegen/type/bigint_type.cpp b/src/codegen/type/bigint_type.cpp index e20e3e0396f..45b43b3ad46 100644 --- a/src/codegen/type/bigint_type.cpp +++ b/src/codegen/type/bigint_type.cpp @@ -6,7 +6,7 @@ // // Identification: src/codegen/type/bigint_type.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -14,6 +14,7 @@ #include "codegen/lang/if.h" #include "codegen/value.h" +#include "codegen/proxy/numeric_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/decimal_type.h" @@ -190,8 +191,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { } Value Impl(CodeGen &codegen, const Value &val, - const TypeSystem::InvocationContext &ctx) - const override { + const TypeSystem::InvocationContext &ctx) const override { PELOTON_ASSERT(SupportsType(val.GetType())); // The BigInt subtraction implementation Sub sub; @@ -201,7 +201,8 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { // We want: raw_ret = (val < 0 ? 0 - val : val) auto sub_result = sub.Impl(codegen, zero, val, ctx); auto *lt_zero = codegen->CreateICmpSLT(val.GetValue(), zero.GetValue()); - auto *raw_ret = codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue()); + auto *raw_ret = + codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue()); return Value{BigInt::Instance(), raw_ret}; } }; @@ -287,7 +288,7 @@ struct Sqrt : public TypeSystem::UnaryOperatorHandleNull { protected: Value Impl(CodeGen &codegen, const Value &val, UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx) - const override { + const override { auto casted = cast.Impl(codegen, val, Decimal::Instance()); auto *raw_ret = codegen.Sqrt(casted.GetValue()); return Value{Decimal::Instance(), raw_ret}; @@ -332,10 +333,9 @@ struct Add : public TypeSystem::BinaryOperatorHandleNull { }; // Subtraction -bool Sub::SupportsTypes(const Type &left_type, - const Type &right_type) const { +bool Sub::SupportsTypes(const Type &left_type, const Type &right_type) const { return left_type.GetSqlType() == BigInt::Instance() && - left_type == right_type; + left_type == right_type; } Type Sub::ResultType(UNUSED_ATTRIBUTE const Type &left_type, @@ -350,7 +350,7 @@ Value Sub::Impl(CodeGen &codegen, const Value &left, const Value &right, // Do subtraction llvm::Value *overflow_bit = nullptr; llvm::Value *result = codegen.CallSubWithOverflow( - left.GetValue(), right.GetValue(), overflow_bit); + left.GetValue(), right.GetValue(), overflow_bit); if (ctx.on_error == OnError::Exception) { codegen.ThrowIfOverflow(overflow_bit); @@ -513,20 +513,17 @@ struct Modulo : public TypeSystem::BinaryOperatorHandleNull { std::vector kImplicitCastingTable = { peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL}; +// clang-format off // Explicit casts CastBigInt kCastBigInt; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::BIGINT, peloton::type::TypeId::BOOLEAN, - kCastBigInt}, - {peloton::type::TypeId::BIGINT, peloton::type::TypeId::TINYINT, - kCastBigInt}, - {peloton::type::TypeId::BIGINT, peloton::type::TypeId::SMALLINT, - kCastBigInt}, - {peloton::type::TypeId::BIGINT, peloton::type::TypeId::INTEGER, - kCastBigInt}, + {peloton::type::TypeId::BIGINT, peloton::type::TypeId::BOOLEAN, kCastBigInt}, + {peloton::type::TypeId::BIGINT, peloton::type::TypeId::TINYINT, kCastBigInt}, + {peloton::type::TypeId::BIGINT, peloton::type::TypeId::SMALLINT, kCastBigInt}, + {peloton::type::TypeId::BIGINT, peloton::type::TypeId::INTEGER, kCastBigInt}, {peloton::type::TypeId::BIGINT, peloton::type::TypeId::BIGINT, kCastBigInt}, - {peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL, - kCastBigInt}}; + {peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL, kCastBigInt}}; +// clang-format on // Comparison operations CompareBigInt kCompareBigInt; @@ -599,6 +596,11 @@ void BigInt::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = nullptr; } +llvm::Function *BigInt::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return NumericFunctionsProxy::InputBigInt.GetFunction(codegen); +} + llvm::Function *BigInt::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputBigInt.GetFunction(codegen); diff --git a/src/codegen/type/boolean_type.cpp b/src/codegen/type/boolean_type.cpp index 2580e210d4b..37668c761da 100644 --- a/src/codegen/type/boolean_type.cpp +++ b/src/codegen/type/boolean_type.cpp @@ -6,12 +6,13 @@ // // Identification: src/codegen/type/boolean_type.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "codegen/type/boolean_type.h" +#include "codegen/proxy/numeric_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/integer_type.h" #include "codegen/type/varchar_type.h" @@ -84,7 +85,8 @@ struct CastBooleanToVarchar : public TypeSystem::CastHandleNull { // Convert this boolean (unsigned int) into a string llvm::Value *str_val = codegen->CreateSelect( - value.GetValue(), codegen.ConstString("T"), codegen.ConstString("F")); + value.GetValue(), codegen.ConstString("T", "true"), + codegen.ConstString("F", "false")); // We could be casting this non-nullable value to a nullable type llvm::Value *null = to_type.nullable ? codegen.ConstBool(false) : nullptr; @@ -250,18 +252,16 @@ struct LogicalOr : public TypeSystem::BinaryOperatorHandleNull { std::vector kImplicitCastingTable = { peloton::type::TypeId::BOOLEAN}; - +// clang-format off // Explicit casts CastBooleanToInteger kBooleanToInteger; CastBooleanToDecimal kBooleanToDecimal; CastBooleanToVarchar kBooleanToVarchar; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::INTEGER, - kBooleanToInteger}, - {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::VARCHAR, - kBooleanToVarchar}, - {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::DECIMAL, - kBooleanToDecimal}}; + {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::INTEGER, kBooleanToInteger}, + {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::VARCHAR, kBooleanToVarchar}, + {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::DECIMAL, kBooleanToDecimal}}; +// clang-format on // Comparison operations CompareBoolean kCompareBoolean; @@ -325,6 +325,11 @@ void Boolean::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = nullptr; } +llvm::Function *Boolean::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return NumericFunctionsProxy::InputBoolean.GetFunction(codegen); +} + llvm::Function *Boolean::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputBoolean.GetFunction(codegen); diff --git a/src/codegen/type/date_type.cpp b/src/codegen/type/date_type.cpp index 8f11f4d9ff1..5b541c32dcd 100644 --- a/src/codegen/type/date_type.cpp +++ b/src/codegen/type/date_type.cpp @@ -14,6 +14,7 @@ #include "codegen/lang/if.h" #include "codegen/value.h" +#include "codegen/proxy/date_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/integer_type.h" @@ -130,11 +131,12 @@ struct CompareDate : public TypeSystem::SimpleComparisonHandleNull { std::vector kImplicitCastingTable = { peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP}; +// clang-format off // Explicit casts CastDateToTimestamp kDateToTimestamp; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP, - kDateToTimestamp}}; + {peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP, kDateToTimestamp}}; +// clang-format on // Comparison operations CompareDate kCompareDate; @@ -187,6 +189,11 @@ void Date::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = nullptr; } +llvm::Function *Date::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return DateFunctionsProxy::InputDate.GetFunction(codegen); +} + llvm::Function *Date::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputDate.GetFunction(codegen); diff --git a/src/codegen/type/decimal_type.cpp b/src/codegen/type/decimal_type.cpp index f081013e0b2..7f527092a1d 100644 --- a/src/codegen/type/decimal_type.cpp +++ b/src/codegen/type/decimal_type.cpp @@ -13,7 +13,7 @@ #include "codegen/type/decimal_type.h" #include "codegen/lang/if.h" -#include "codegen/proxy/decimal_functions_proxy.h" +#include "codegen/proxy/numeric_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/integer_type.h" @@ -192,9 +192,9 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { Value Impl(CodeGen &codegen, const Value &val, UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx) - const override { + const override { llvm::Value *raw_ret = - codegen.Call(DecimalFunctionsProxy::Abs, {val.GetValue()}); + codegen.Call(NumericFunctionsProxy::Abs, {val.GetValue()}); return Value{Decimal::Instance(), raw_ret}; } }; @@ -213,7 +213,7 @@ struct Floor : public TypeSystem::UnaryOperatorHandleNull { UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx) const override { llvm::Value *raw_ret = - codegen.Call(DecimalFunctionsProxy::Floor, {val.GetValue()}); + codegen.Call(NumericFunctionsProxy::Floor, {val.GetValue()}); return Value{Decimal::Instance(), raw_ret}; } }; @@ -232,7 +232,7 @@ struct Round : public TypeSystem::UnaryOperatorHandleNull { UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx) const override { llvm::Value *raw_ret = - codegen.Call(DecimalFunctionsProxy::Round, {val.GetValue()}); + codegen.Call(NumericFunctionsProxy::Round, {val.GetValue()}); return Value{Decimal::Instance(), raw_ret}; } }; @@ -252,7 +252,7 @@ struct Ceil : public TypeSystem::UnaryOperatorHandleNull { const override { PELOTON_ASSERT(SupportsType(val.GetType())); - auto *result = codegen.Call(DecimalFunctionsProxy::Ceil, {val.GetValue()}); + auto *result = codegen.Call(NumericFunctionsProxy::Ceil, {val.GetValue()}); return Value{Decimal::Instance(), result}; } @@ -473,21 +473,17 @@ struct Modulo : public TypeSystem::BinaryOperatorHandleNull { std::vector kImplicitCastingTable = { peloton::type::TypeId::DECIMAL}; +// clang-format off // Explicit casting rules CastDecimal kCastDecimal; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BOOLEAN, - kCastDecimal}, - {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::TINYINT, - kCastDecimal}, - {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::SMALLINT, - kCastDecimal}, - {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::INTEGER, - kCastDecimal}, - {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BIGINT, - kCastDecimal}, - {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::DECIMAL, - kCastDecimal}}; + {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BOOLEAN, kCastDecimal}, + {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::TINYINT, kCastDecimal}, + {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::SMALLINT, kCastDecimal}, + {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::INTEGER, kCastDecimal}, + {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BIGINT, kCastDecimal}, + {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::DECIMAL, kCastDecimal}}; +// clang-format on // Comparison operations CompareDecimal kCompareDecimal; @@ -562,6 +558,12 @@ void Decimal::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = nullptr; } +llvm::Function *Decimal::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + // TODO: We should be using the precision/scale in the output function + return NumericFunctionsProxy::InputDecimal.GetFunction(codegen); +} + llvm::Function *Decimal::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { // TODO: We should be using the precision/scale in the output function diff --git a/src/codegen/type/integer_type.cpp b/src/codegen/type/integer_type.cpp index dc49056a5d1..6d9a61ebde1 100644 --- a/src/codegen/type/integer_type.cpp +++ b/src/codegen/type/integer_type.cpp @@ -6,13 +6,14 @@ // // Identification: src/codegen/type/integer_type.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "codegen/type/integer_type.h" #include "codegen/lang/if.h" +#include "codegen/proxy/numeric_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/decimal_type.h" @@ -187,8 +188,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { } Value Impl(CodeGen &codegen, const Value &val, - const TypeSystem::InvocationContext &ctx) - const override { + const TypeSystem::InvocationContext &ctx) const override { // The integer subtraction implementation Sub sub; // Zero place-holder @@ -197,7 +197,8 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { // We want: raw_ret = (val < 0 ? 0 - val : val) auto sub_result = sub.Impl(codegen, zero, val, ctx); auto *lt_zero = codegen->CreateICmpSLT(val.GetValue(), zero.GetValue()); - auto *raw_ret = codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue()); + auto *raw_ret = + codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue()); return Value{Integer::Instance(), raw_ret}; } }; @@ -251,7 +252,7 @@ struct Floor : public TypeSystem::UnaryOperatorHandleNull { // Ceiling struct Ceil : public TypeSystem::UnaryOperatorHandleNull { CastInteger cast; - + bool SupportsType(const Type &type) const override { return type.GetSqlType() == Integer::Instance(); } @@ -283,7 +284,7 @@ struct Sqrt : public TypeSystem::UnaryOperatorHandleNull { protected: Value Impl(CodeGen &codegen, const Value &val, UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx) - const override { + const override { auto casted = cast.Impl(codegen, val, Decimal::Instance()); auto *raw_ret = codegen.Sqrt(casted.GetValue()); return Value{Decimal::Instance(), raw_ret}; @@ -328,10 +329,9 @@ struct Add : public TypeSystem::BinaryOperatorHandleNull { }; // Subtraction -bool Sub::SupportsTypes(const Type &left_type, - const Type &right_type) const { +bool Sub::SupportsTypes(const Type &left_type, const Type &right_type) const { return left_type.GetSqlType() == Integer::Instance() && - left_type == right_type; + left_type == right_type; } Type Sub::ResultType(UNUSED_ATTRIBUTE const Type &left_type, @@ -346,7 +346,7 @@ Value Sub::Impl(CodeGen &codegen, const Value &left, const Value &right, // Do subtraction llvm::Value *overflow_bit = nullptr; llvm::Value *result = codegen.CallSubWithOverflow( - left.GetValue(), right.GetValue(), overflow_bit); + left.GetValue(), right.GetValue(), overflow_bit); if (ctx.on_error == OnError::Exception) { codegen.ThrowIfOverflow(overflow_bit); @@ -510,26 +510,21 @@ std::vector kImplicitCastingTable = { peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL}; +// clang-format off // Explicit casting rules CastInteger kCastInteger; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BOOLEAN, - kCastInteger}, - {peloton::type::TypeId::INTEGER, peloton::type::TypeId::TINYINT, - kCastInteger}, - {peloton::type::TypeId::INTEGER, peloton::type::TypeId::SMALLINT, - kCastInteger}, - {peloton::type::TypeId::INTEGER, peloton::type::TypeId::INTEGER, - kCastInteger}, - {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT, - kCastInteger}, - {peloton::type::TypeId::INTEGER, peloton::type::TypeId::DECIMAL, - kCastInteger}}; + {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BOOLEAN, kCastInteger}, + {peloton::type::TypeId::INTEGER, peloton::type::TypeId::TINYINT, kCastInteger}, + {peloton::type::TypeId::INTEGER, peloton::type::TypeId::SMALLINT, kCastInteger}, + {peloton::type::TypeId::INTEGER, peloton::type::TypeId::INTEGER, kCastInteger}, + {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT, kCastInteger}, + {peloton::type::TypeId::INTEGER, peloton::type::TypeId::DECIMAL, kCastInteger}}; +// clang-format on // Comparison operations CompareInteger kCompareInteger; -std::vector kComparisonTable = { - {kCompareInteger}}; +std::vector kComparisonTable = {{kCompareInteger}}; // Unary operators Negate kNegOp; @@ -599,6 +594,11 @@ void Integer::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = nullptr; } +llvm::Function *Integer::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return NumericFunctionsProxy::InputInteger.GetFunction(codegen); +} + llvm::Function *Integer::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputInteger.GetFunction(codegen); diff --git a/src/codegen/type/smallint_type.cpp b/src/codegen/type/smallint_type.cpp index 408523ea583..b645af00ffe 100644 --- a/src/codegen/type/smallint_type.cpp +++ b/src/codegen/type/smallint_type.cpp @@ -6,7 +6,7 @@ // // Identification: src/codegen/type/smallint_type.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -14,6 +14,7 @@ #include "codegen/lang/if.h" #include "codegen/value.h" +#include "codegen/proxy/numeric_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/decimal_type.h" @@ -194,9 +195,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { } Value Impl(CodeGen &codegen, const Value &val, - const TypeSystem::InvocationContext &ctx) - const override { - + const TypeSystem::InvocationContext &ctx) const override { // The smallint subtraction implementation Sub sub; PELOTON_ASSERT(SupportsType(val.GetType())); @@ -206,7 +205,8 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull { // We want: raw_ret = (val < 0 ? 0 - val : val) auto sub_result = sub.Impl(codegen, zero, val, ctx); auto *lt_zero = codegen->CreateICmpSLT(val.GetValue(), zero.GetValue()); - auto *raw_ret = codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue()); + auto *raw_ret = + codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue()); return Value{SmallInt::Instance(), raw_ret}; } }; @@ -338,10 +338,9 @@ struct Add : public TypeSystem::BinaryOperatorHandleNull { // Subtraction -bool Sub::SupportsTypes(const Type &left_type, - const Type &right_type) const { +bool Sub::SupportsTypes(const Type &left_type, const Type &right_type) const { return left_type.GetSqlType() == SmallInt::Instance() && - left_type == right_type; + left_type == right_type; } Type Sub::ResultType(UNUSED_ATTRIBUTE const Type &left_type, @@ -356,7 +355,7 @@ Value Sub::Impl(CodeGen &codegen, const Value &left, const Value &right, // Do subtraction llvm::Value *overflow_bit = nullptr; llvm::Value *result = codegen.CallSubWithOverflow( - left.GetValue(), right.GetValue(), overflow_bit); + left.GetValue(), right.GetValue(), overflow_bit); if (ctx.on_error == OnError::Exception) { codegen.ThrowIfOverflow(overflow_bit); @@ -522,21 +521,17 @@ std::vector kImplicitCastingTable = { peloton::type::TypeId::SMALLINT, peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL}; +// clang-format off // Explicit casting rules CastSmallInt kCastSmallInt; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BOOLEAN, - kCastSmallInt}, - {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::TINYINT, - kCastSmallInt}, - {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::SMALLINT, - kCastSmallInt}, - {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::INTEGER, - kCastSmallInt}, - {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BIGINT, - kCastSmallInt}, - {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::DECIMAL, - kCastSmallInt}}; + {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BOOLEAN, kCastSmallInt}, + {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::TINYINT, kCastSmallInt}, + {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::SMALLINT, kCastSmallInt}, + {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::INTEGER, kCastSmallInt}, + {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BIGINT, kCastSmallInt}, + {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::DECIMAL, kCastSmallInt}}; +// clang-format on // Comparison operations CompareSmallInt kCompareSmallInt; @@ -610,6 +605,11 @@ void SmallInt::GetTypeForMaterialization(CodeGen &codegen, len_type = nullptr; } +llvm::Function *SmallInt::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return NumericFunctionsProxy::InputSmallInt.GetFunction(codegen); +} + llvm::Function *SmallInt::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputSmallInt.GetFunction(codegen); diff --git a/src/codegen/type/sql_type.cpp b/src/codegen/type/sql_type.cpp index 6901976b008..49613d6d378 100644 --- a/src/codegen/type/sql_type.cpp +++ b/src/codegen/type/sql_type.cpp @@ -54,6 +54,12 @@ class Invalid : public SqlType, public Singleton { throw Exception{"INVALID type doesn't have a materialization type"}; } + llvm::Function *GetInputFunction( + UNUSED_ATTRIBUTE CodeGen &codegen, + UNUSED_ATTRIBUTE const Type &type) const override { + throw Exception{"INVALID type does not have an input function"}; + } + llvm::Function *GetOutputFunction( UNUSED_ATTRIBUTE CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const override { diff --git a/src/codegen/type/timestamp_type.cpp b/src/codegen/type/timestamp_type.cpp index 73603f222b2..68dcd180f0f 100644 --- a/src/codegen/type/timestamp_type.cpp +++ b/src/codegen/type/timestamp_type.cpp @@ -148,11 +148,12 @@ struct Now : public TypeSystem::NoArgOperator { std::vector kImplicitCastingTable = { peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP}; +// clang-format off // Explicit casts CastTimestampToDate kTimestampToDate; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::TIMESTAMP, peloton::type::TypeId::DATE, - kTimestampToDate}}; + {peloton::type::TypeId::TIMESTAMP, peloton::type::TypeId::DATE, kTimestampToDate}}; +// clang-format on // Comparisons CompareTimestamp kCompareTimestamp; @@ -209,6 +210,12 @@ void Timestamp::GetTypeForMaterialization(CodeGen &codegen, len_type = nullptr; } +llvm::Function *Timestamp::GetInputFunction( + UNUSED_ATTRIBUTE CodeGen &codegen, + UNUSED_ATTRIBUTE const Type &type) const { + throw NotImplementedException{"Timestamp input not implemented yet"}; +} + llvm::Function *Timestamp::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputTimestamp.GetFunction(codegen); diff --git a/src/codegen/type/tinyint_type.cpp b/src/codegen/type/tinyint_type.cpp index 254ef0d8e47..ab82f4982a2 100644 --- a/src/codegen/type/tinyint_type.cpp +++ b/src/codegen/type/tinyint_type.cpp @@ -6,7 +6,7 @@ // // Identification: src/codegen/type/tinyint_type.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -14,6 +14,7 @@ #include "codegen/lang/if.h" #include "codegen/value.h" +#include "codegen/proxy/numeric_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/decimal_type.h" @@ -516,21 +517,17 @@ std::vector kImplicitCastingTable = { peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL}; +// clang-format off // Explicit casting rules CastTinyInt kCastTinyInt; std::vector kExplicitCastingTable = { - {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BOOLEAN, - kCastTinyInt}, - {peloton::type::TypeId::TINYINT, peloton::type::TypeId::TINYINT, - kCastTinyInt}, - {peloton::type::TypeId::TINYINT, peloton::type::TypeId::SMALLINT, - kCastTinyInt}, - {peloton::type::TypeId::TINYINT, peloton::type::TypeId::INTEGER, - kCastTinyInt}, - {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BIGINT, - kCastTinyInt}, - {peloton::type::TypeId::TINYINT, peloton::type::TypeId::DECIMAL, - kCastTinyInt}}; + {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BOOLEAN, kCastTinyInt}, + {peloton::type::TypeId::TINYINT, peloton::type::TypeId::TINYINT, kCastTinyInt}, + {peloton::type::TypeId::TINYINT, peloton::type::TypeId::SMALLINT, kCastTinyInt}, + {peloton::type::TypeId::TINYINT, peloton::type::TypeId::INTEGER, kCastTinyInt}, + {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BIGINT, kCastTinyInt}, + {peloton::type::TypeId::TINYINT, peloton::type::TypeId::DECIMAL, kCastTinyInt}}; +// clang-format on // Comparison operations CompareTinyInt kCompareTinyInt; @@ -603,6 +600,11 @@ void TinyInt::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = nullptr; } +llvm::Function *TinyInt::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return NumericFunctionsProxy::InputTinyInt.GetFunction(codegen); +} + llvm::Function *TinyInt::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { return ValuesRuntimeProxy::OutputTinyInt.GetFunction(codegen); diff --git a/src/codegen/type/type.cpp b/src/codegen/type/type.cpp index ed8425302ee..9b4e8e7cf1b 100644 --- a/src/codegen/type/type.cpp +++ b/src/codegen/type/type.cpp @@ -6,7 +6,7 @@ // // Identification: src/codegen/type/type.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -18,15 +18,20 @@ namespace peloton { namespace codegen { namespace type { -Type::Type() : Type(peloton::type::TypeId::INVALID, false) {} - Type::Type(peloton::type::TypeId type_id, bool _nullable) - : type_id(type_id), nullable(_nullable) {} + : type_id(type_id), nullable(_nullable) { + aux_info.varlen = 0; + aux_info.numeric_info.precision = 0; + aux_info.numeric_info.scale = 0; +} + +Type::Type() : Type(peloton::type::TypeId::INVALID, false) {} Type::Type(const SqlType &sql_type, bool _nullable) : Type(sql_type.TypeId(), _nullable) {} bool Type::operator==(const Type &other) const { + // TODO(pmenon): This isn't correct; we need to check all other fields ... return type_id == other.type_id; } diff --git a/src/codegen/type/varbinary_type.cpp b/src/codegen/type/varbinary_type.cpp index 7706545c84c..6d80b924243 100644 --- a/src/codegen/type/varbinary_type.cpp +++ b/src/codegen/type/varbinary_type.cpp @@ -13,6 +13,7 @@ #include "codegen/type/varbinary_type.h" #include "codegen/value.h" +#include "codegen/proxy/string_functions_proxy.h" #include "codegen/proxy/values_runtime_proxy.h" #include "codegen/type/boolean_type.h" #include "codegen/type/integer_type.h" @@ -52,7 +53,7 @@ struct CompareVarbinary : public TypeSystem::ExpensiveComparisonHandleNull { // Setup the function arguments and invoke the call std::vector args = {left.GetValue(), left.GetLength(), right.GetValue(), right.GetLength()}; - return codegen.Call(ValuesRuntimeProxy::CompareStrings, args); + return codegen.Call(StringFunctionsProxy::CompareStrings, args); } Value CompareLtImpl(CodeGen &codegen, const Value &left, @@ -159,9 +160,8 @@ std::vector kNoArgOperatorTable = {}; Varbinary::Varbinary() : SqlType(peloton::type::TypeId::VARBINARY), type_system_(kImplicitCastingTable, kExplicitCastingTable, - kComparisonTable, kUnaryOperatorTable, - kBinaryOperatorTable, kNaryOperatorTable, - kNoArgOperatorTable) {} + kComparisonTable, kUnaryOperatorTable, kBinaryOperatorTable, + kNaryOperatorTable, kNoArgOperatorTable) {} Value Varbinary::GetMinValue(UNUSED_ATTRIBUTE CodeGen &codegen) const { throw Exception{"The VARBINARY type does not have a minimum value ..."}; @@ -183,6 +183,12 @@ void Varbinary::GetTypeForMaterialization(CodeGen &codegen, len_type = codegen.Int32Type(); } +llvm::Function *Varbinary::GetInputFunction( + UNUSED_ATTRIBUTE CodeGen &codegen, + UNUSED_ATTRIBUTE const Type &type) const { + throw NotImplementedException{"Blob input not implemented yet"}; +} + llvm::Function *Varbinary::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { // TODO: We should use the length information in the type? diff --git a/src/codegen/type/varchar_type.cpp b/src/codegen/type/varchar_type.cpp index 0066457e425..f786bc83945 100644 --- a/src/codegen/type/varchar_type.cpp +++ b/src/codegen/type/varchar_type.cpp @@ -52,7 +52,7 @@ struct CompareVarchar : public TypeSystem::ExpensiveComparisonHandleNull { // Setup the function arguments and invoke the call std::vector args = {left.GetValue(), left.GetLength(), right.GetValue(), right.GetLength()}; - return codegen.Call(ValuesRuntimeProxy::CompareStrings, args); + return codegen.Call(StringFunctionsProxy::CompareStrings, args); } Value CompareLtImpl(CodeGen &codegen, const Value &left, @@ -498,11 +498,8 @@ struct Substr : public TypeSystem::NaryOperator { // Setup function arguments llvm::Value *executor_ctx = ctx.executor_context; std::vector args = { - executor_ctx, - input_args[0].GetValue(), - input_args[0].GetLength(), - input_args[1].GetValue(), - input_args[2].GetValue(), + executor_ctx, input_args[0].GetValue(), input_args[0].GetLength(), + input_args[1].GetValue(), input_args[2].GetValue(), }; // Call @@ -550,9 +547,12 @@ LTrim kLTrim; RTrim kRTrim; Repeat kRepeat; std::vector kBinaryOperatorTable = { - {OperatorId::Like, kLike}, {OperatorId::DateTrunc, kDateTrunc}, - {OperatorId::DatePart, kDatePart}, {OperatorId::BTrim, kBTrim}, - {OperatorId::LTrim, kLTrim}, {OperatorId::RTrim, kRTrim}, + {OperatorId::Like, kLike}, + {OperatorId::DateTrunc, kDateTrunc}, + {OperatorId::DatePart, kDatePart}, + {OperatorId::BTrim, kBTrim}, + {OperatorId::LTrim, kLTrim}, + {OperatorId::RTrim, kRTrim}, {OperatorId::Repeat, kRepeat}}; // Nary operations @@ -596,6 +596,11 @@ void Varchar::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, len_type = codegen.Int32Type(); } +llvm::Function *Varchar::GetInputFunction( + CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { + return StringFunctionsProxy::InputString.GetFunction(codegen); +} + llvm::Function *Varchar::GetOutputFunction( CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const { // TODO: We should use the length information in the type? diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp new file mode 100644 index 00000000000..5f09349f973 --- /dev/null +++ b/src/codegen/util/csv_scanner.cpp @@ -0,0 +1,375 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scanner.cpp +// +// Identification: src/codegen/util/csv_scanner.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/util/csv_scanner.h" + +#include + +#include "common/exception.h" +#include "executor/executor_context.h" +#include "type/abstract_pool.h" +#include "util/string_util.h" + +namespace peloton { +namespace codegen { +namespace util { + +CSVScanner::CSVScanner(peloton::type::AbstractPool &pool, + const std::string &file_path, + const codegen::type::Type *col_types, uint32_t num_cols, + CSVScanner::Callback func, void *opaque_state, + char delimiter, char quote, char escape) + : memory_(pool), + file_path_(file_path), + file_(), + buffer_(nullptr), + buffer_pos_(0), + buffer_end_(0), + line_(nullptr), + line_len_(0), + line_maxlen_(0), + line_number_(0), + delimiter_(delimiter), + quote_(quote), + escape_(escape), + func_(func), + opaque_state_(opaque_state), + num_cols_(num_cols) { + // Make column array + cols_ = static_cast(memory_.Allocate(sizeof(Column) * num_cols_)); + + // Initialize the columns + for (uint32_t i = 0; i < num_cols_; i++) { + cols_[i].col_type = col_types[i]; + cols_[i].ptr = nullptr; + cols_[i].len = 0; + cols_[i].is_null = false; + } +} + +CSVScanner::~CSVScanner() { + if (buffer_ != nullptr) { + memory_.Free(buffer_); + buffer_ = nullptr; + } + + if (line_ != nullptr) { + memory_.Free(line_); + line_ = nullptr; + } + + if (cols_ != nullptr) { + memory_.Free(cols_); + cols_ = nullptr; + } +} + +void CSVScanner::Init(CSVScanner &scanner, + executor::ExecutorContext &executor_context, + const char *file_path, + const codegen::type::Type *col_types, uint32_t num_cols, + CSVScanner::Callback func, void *opaque_state, + char delimiter, char quote, char escape) { + // Forward to constructor + new (&scanner) + CSVScanner(*executor_context.GetPool(), file_path, col_types, num_cols, + func, opaque_state, delimiter, quote, escape); +} + +void CSVScanner::Destroy(CSVScanner &scanner) { + // Forward to destructor + scanner.~CSVScanner(); +} + +void CSVScanner::Produce() { + // Initialize + Initialize(); + + // Loop lines + while (char *line = NextLine()) { + ProduceCSV(line); + } +} + +void CSVScanner::Initialize() { + // Let's first perform a few validity checks + boost::filesystem::path path(file_path_); + + if (!boost::filesystem::exists(path)) { + throw ExecutorException(StringUtil::Format("input path '%s' does not exist", + file_path_.c_str())); + } else if (!boost::filesystem::is_regular_file(file_path_)) { + auto msg = + StringUtil::Format("unable to read file '%s'", file_path_.c_str()); + throw ExecutorException(msg); + } + + // The path looks okay, let's try opening it + file_.Open(file_path_, peloton::util::File::AccessMode::ReadOnly); + + // Allocate buffer space + buffer_ = static_cast(memory_.Allocate(kDefaultBufferSize)); + + // Fill read-buffer + NextBuffer(); + + // Allocate space for the full line, if it doesn't fit into the buffer. We + // reserve the last byte for the null-byte terminator. + line_ = static_cast(memory_.Allocate(kDefaultBufferSize)); + line_len_ = 0; + line_maxlen_ = kDefaultBufferSize - 1; +} + +bool CSVScanner::NextBuffer() { + // Do read + buffer_pos_ = 0; + buffer_end_ = static_cast(file_.Read(buffer_, kDefaultBufferSize)); + + // Update stats + stats_.num_reads++; + + return (buffer_end_ != 0); +} + +void CSVScanner::AppendToLineBuffer(const char *data, uint32_t len) { + PELOTON_ASSERT(len > 0); + + // Short-circuit if we're not appending any data + if (len == 0) { + return; + } + + if (line_len_ + len > line_maxlen_) { + // Check if we can even allocate any more bytes + if (static_cast(len) > kMaxAllocSize - line_len_) { + const auto msg = StringUtil::Format( + "Line %u in file '%s' exceeds maximum line length: %lu", + line_number_ + 1, file_path_.c_str(), kMaxAllocSize); + throw Exception(msg); + } + + // The current line buffer isn't large enough to store the new bytes, so we + // need to resize it. Let's find an allocation size large enough to fit the + // new bytes. + uint32_t new_maxlen = line_maxlen_ * 2; + while (new_maxlen < len) { + new_maxlen *= 2; + } + + // Clamp + new_maxlen = std::min(new_maxlen, static_cast(kMaxAllocSize)); + + auto *new_line = static_cast(memory_.Allocate(new_maxlen)); + + // Copy the old data + PELOTON_MEMCPY(new_line, line_, line_len_); + + // Free old old + memory_.Free(line_); + + // Setup pointers and sizes + line_ = new_line; + line_maxlen_ = new_maxlen - 1; + + stats_.num_reallocs++; + } + + // Copy provided data into the line buffer, ensuring null-byte termination. + PELOTON_MEMCPY(line_ + line_len_, data, len); + line_[line_len_ + len] = '\0'; + + // Increase the length of the line + line_len_ += len; + + // Track copy stats + stats_.num_copies++; +} + +// The objective of this function is to find a complete line in the CSV file. +// The returned value will be a valid pointer to a null-terminated string that +// is the next line in the CSV to be processed. +char *CSVScanner::NextLine() { + line_len_ = 0; + + const char quote = quote_; + const char escape = (quote_ == escape_ ? static_cast('\0') : escape_); + + bool in_quote = false; + bool last_was_escape = false; + + const char *buf = buffer_; + uint32_t curr_buffer_pos = buffer_pos_; + + while (true) { + if (curr_buffer_pos == buffer_end_) { + // We need to read more data from the CSV file. But first, we need to copy + // all the data in the read-buffer (i.e., [buffer_begin_, buffer_end_] to + // the line-buffer. + if (buffer_pos_ < curr_buffer_pos) { + AppendToLineBuffer(buffer_ + buffer_pos_, + curr_buffer_pos - buffer_pos_); + buffer_pos_ = curr_buffer_pos; + } + + // Reset positions + curr_buffer_pos = 0; + + // Now, read more data + if (!NextBuffer()) { + // We hit en EOF + break; + } + } + + // Read character + char c = buf[curr_buffer_pos++]; + + if (in_quote && c == escape) { + last_was_escape = !last_was_escape; + } + if (c == quote && !last_was_escape) { + in_quote = !in_quote; + } + if (c != escape) { + last_was_escape = false; + } + + // Process the new-line character. If we a new-line and we're not currently + // in a quoted section, we're done. + if (c == '\n' && !in_quote) { + break; + } + } + + // Flush remaining valid bytes + if (buffer_pos_ < curr_buffer_pos) { + AppendToLineBuffer(buffer_ + buffer_pos_, curr_buffer_pos - buffer_pos_); + buffer_pos_ = curr_buffer_pos; + } + + // Increment line number + line_number_++; + + // If we didn't transfer any bytes to the line buffer, we must have reached an + // EOF. If so, return null indicating there are no more lines. + if (line_len_ == 0) { + return nullptr; + } + + // A full line has been transferred to the line buffer, but we also copied the + // newline character. Strip it off now. + line_len_--; + line_[line_len_] = '\0'; + + // Done + return line_; +} + +void CSVScanner::ProduceCSV(char *line) { + const char delimiter = delimiter_; + const char quote = quote_; + const char escape = escape_; + + // The iterator over characters in the line + char *iter = line; + + for (uint32_t col_idx = 0; col_idx < num_cols_; col_idx++) { + char *col_begin = iter; + char *col_end = nullptr; + + // We need to move col_end to the end of the column's data. Along the way, + // we may need to shift data down due to quotes and escapes. Inspired by + // Postgres. + { + char *out = col_begin; + while (true) { + // This first loop looks for either the delimiter character or the end + // of the line, indicating the end of a columns data. It breaks out of + // the loop if a quote character is found. It flows into a second loop + // whose only purpose is to find the end of the quoted section. + while (true) { + char c = *iter++; + + // If we see the delimiter character, or the end of the string, + // finish + if (c == delimiter || c == '\0') { + col_end = out; + iter--; + goto colend; + } + + // If we see a quote character, move to the second loop to find the + // closing quote. + if (c == quote) { + break; + } + + *out++ = c; + } + + while (true) { + char c = *iter++; + + // If we see the end of the line *within* a quoted section, throw + // error + if (c == '\0') { + throw Exception(StringUtil::Format( + "unterminated CSV quoted field at %u", col_idx)); + } + + // If we see an escape character within a quoted section, we need to + // check if the following character is a quote. If so, we must + // escape it + if (c == escape) { + char next = *iter; + if (next == quote || next == escape) { + *out++ = next; + iter++; + continue; + } + } + + // If we see the closing quote, we're done. + if (c == quote) { + break; + } + + *out++ = c; + } + } + } + + colend: + // If we've reached the of the line, but haven't setup all the columns, then + // we're missing data for the remaining columns and should throw an error. + if (*iter == '\0' && col_idx != (num_cols_ - 1)) { + throw Exception( + StringUtil::Format("missing data for column %u on line %u", + (col_idx + 2), line_number_)); + } + + // Let's setup the columns + cols_[col_idx].ptr = col_begin; + cols_[col_idx].len = static_cast(col_end - col_begin); + cols_[col_idx].is_null = (cols_[col_idx].len == 0); + + // Eat delimiter, moving to next column + iter++; + } + + // Invoke callback + func_(opaque_state_); +} + +} // namespace util +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp index 461d6f8faf7..a3c41196762 100644 --- a/src/codegen/values_runtime.cpp +++ b/src/codegen/values_runtime.cpp @@ -6,12 +6,15 @@ // // Identification: src/codegen/values_runtime.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "codegen/values_runtime.h" +#include "codegen/runtime_functions.h" +#include "codegen/type/type.h" +#include "type/abstract_pool.h" #include "type/value.h" #include "type/type_util.h" #include "type/value_factory.h" @@ -20,77 +23,80 @@ namespace peloton { namespace codegen { +//////////////////////////////////////////////////////////////////////////////// +/// +/// Output functions +/// +//////////////////////////////////////////////////////////////////////////////// + namespace { -inline void SetValue(type::Value *val_ptr, type::Value &&val) { - new (val_ptr) type::Value(val); +inline void SetValue(peloton::type::Value *val_ptr, + peloton::type::Value &&val) { + new (val_ptr) peloton::type::Value(val); } } // namespace void ValuesRuntime::OutputBoolean(char *values, uint32_t idx, bool val, bool is_null) { - auto *vals = reinterpret_cast(values); + auto *vals = reinterpret_cast(values); if (is_null) { - SetValue(&vals[idx], - type::ValueFactory::GetNullValueByType(type::TypeId::BOOLEAN)); + SetValue(&vals[idx], peloton::type::ValueFactory::GetNullValueByType( + peloton::type::TypeId::BOOLEAN)); } else { - SetValue(&vals[idx], type::ValueFactory::GetBooleanValue(val)); + SetValue(&vals[idx], peloton::type::ValueFactory::GetBooleanValue(val)); } } void ValuesRuntime::OutputTinyInt(char *values, uint32_t idx, int8_t val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetTinyIntValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetTinyIntValue(val)); } void ValuesRuntime::OutputSmallInt(char *values, uint32_t idx, int16_t val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetSmallIntValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetSmallIntValue(val)); } void ValuesRuntime::OutputInteger(char *values, uint32_t idx, int32_t val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetIntegerValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetIntegerValue(val)); } void ValuesRuntime::OutputBigInt(char *values, uint32_t idx, int64_t val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetBigIntValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetBigIntValue(val)); } void ValuesRuntime::OutputDate(char *values, uint32_t idx, int32_t val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetDateValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetDateValue(val)); } void ValuesRuntime::OutputTimestamp(char *values, uint32_t idx, int64_t val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetTimestampValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetTimestampValue(val)); } void ValuesRuntime::OutputDecimal(char *values, uint32_t idx, double val) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetDecimalValue(val)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], peloton::type::ValueFactory::GetDecimalValue(val)); } void ValuesRuntime::OutputVarchar(char *values, uint32_t idx, const char *str, uint32_t len) { - auto *vals = reinterpret_cast(values); - SetValue(&vals[idx], type::ValueFactory::GetVarcharValue(str, len, false)); + auto *vals = reinterpret_cast(values); + SetValue(&vals[idx], + peloton::type::ValueFactory::GetVarcharValue(str, len, false)); } void ValuesRuntime::OutputVarbinary(char *values, uint32_t idx, const char *ptr, uint32_t len) { - auto *vals = reinterpret_cast(values); + auto *vals = reinterpret_cast(values); const auto *bin_ptr = reinterpret_cast(ptr); SetValue(&vals[idx], - type::ValueFactory::GetVarbinaryValue(bin_ptr, len, false)); -} - -int32_t ValuesRuntime::CompareStrings(const char *str1, uint32_t len1, - const char *str2, uint32_t len2) { - return type::TypeUtil::CompareStrings(str1, len1, str2, len2); + peloton::type::ValueFactory::GetVarbinaryValue(bin_ptr, len, false)); } } // namespace codegen diff --git a/src/common/internal_types.cpp b/src/common/internal_types.cpp index b93da7f3b13..855f7ef2d9b 100644 --- a/src/common/internal_types.cpp +++ b/src/common/internal_types.cpp @@ -1304,6 +1304,9 @@ std::string PlanNodeTypeToString(PlanNodeType type) { case PlanNodeType::INDEXSCAN: { return ("INDEXSCAN"); } + case PlanNodeType::CSVSCAN: { + return ("CSVSCAN"); + } case PlanNodeType::NESTLOOP: { return ("NESTLOOP"); } @@ -1379,9 +1382,6 @@ std::string PlanNodeTypeToString(PlanNodeType type) { case PlanNodeType::RESULT: { return ("RESULT"); } - case PlanNodeType::COPY: { - return ("COPY"); - } case PlanNodeType::MOCK: { return ("MOCK"); } @@ -1391,6 +1391,9 @@ std::string PlanNodeTypeToString(PlanNodeType type) { case PlanNodeType::ANALYZE: { return ("ANALYZE"); } + case PlanNodeType::EXPORT_EXTERNAL_FILE: { + return ("EXPORT_EXTERNAL_FILE"); + } default: { throw ConversionException( StringUtil::Format("No string conversion for PlanNodeType value '%d'", @@ -1408,6 +1411,8 @@ PlanNodeType StringToPlanNodeType(const std::string &str) { return PlanNodeType::SEQSCAN; } else if (upper_str == "INDEXSCAN") { return PlanNodeType::INDEXSCAN; + } else if (upper_str == "CSVSCAN") { + return PlanNodeType::CSVSCAN; } else if (upper_str == "NESTLOOP") { return PlanNodeType::NESTLOOP; } else if (upper_str == "NESTLOOPINDEX") { @@ -1456,12 +1461,12 @@ PlanNodeType StringToPlanNodeType(const std::string &str) { return PlanNodeType::HASH; } else if (upper_str == "RESULT") { return PlanNodeType::RESULT; - } else if (upper_str == "COPY") { - return PlanNodeType::COPY; } else if (upper_str == "MOCK") { return PlanNodeType::MOCK; } else if (upper_str == "ANALYZE") { return PlanNodeType::ANALYZE; + } else if (upper_str == "EXPORT_EXTERNAL_FILE") { + return PlanNodeType::EXPORT_EXTERNAL_FILE; } else { throw ConversionException(StringUtil::Format( "No PlanNodeType conversion from string '%s'", upper_str.c_str())); @@ -1877,6 +1882,32 @@ std::ostream &operator<<(std::ostream &os, const CopyType &type) { return os; } +//===--------------------------------------------------------------------===// +// ExternalFileFormat - String Utilities +//===--------------------------------------------------------------------===// + +std::string ExternalFileFormatToString(ExternalFileFormat format) { + switch (format) { + case ExternalFileFormat::CSV: + default: + return "CSV"; + } +} + +ExternalFileFormat StringToExternalFileFormat(const std::string &str) { + auto upper = StringUtil::Upper(str); + if (upper == "CSV") { + return ExternalFileFormat::CSV; + } + throw ConversionException(StringUtil::Format( + "No ExternalFileFormat for input '%s'", upper.c_str())); +} + +std::ostream &operator<<(std::ostream &os, const ExternalFileFormat &format) { + os << ExternalFileFormatToString(format); + return os; +} + //===--------------------------------------------------------------------===// // PayloadType - String Utilities //===--------------------------------------------------------------------===// diff --git a/src/executor/copy_executor.cpp b/src/executor/copy_executor.cpp index ce16d8c83eb..f499e899708 100644 --- a/src/executor/copy_executor.cpp +++ b/src/executor/copy_executor.cpp @@ -6,23 +6,25 @@ // // Identification: src/executor/copy_executor.cpp // -// Copyright (c) 2015-17, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// +#include "executor/copy_executor.h" + +#include +#include + #include "common/logger.h" #include "catalog/catalog.h" #include "concurrency/transaction_manager_factory.h" -#include "executor/copy_executor.h" #include "executor/executor_context.h" #include "executor/logical_tile_factory.h" -#include "planner/copy_plan.h" +#include "planner/export_external_file_plan.h" #include "storage/table_factory.h" #include "network/postgres_protocol_handler.h" #include "common/exception.h" #include "common/macros.h" -#include -#include namespace peloton { namespace executor { @@ -35,7 +37,7 @@ CopyExecutor::CopyExecutor(const planner::AbstractPlan *node, ExecutorContext *executor_context) : AbstractExecutor(node, executor_context) {} -CopyExecutor::~CopyExecutor() {} +CopyExecutor::~CopyExecutor() = default; /** * @brief Basic initialization. @@ -45,26 +47,19 @@ bool CopyExecutor::DInit() { PELOTON_ASSERT(children_.size() == 1); // Grab info from plan node and check it - const planner::CopyPlan &node = GetPlanNode(); + const auto &node = GetPlanNode(); - bool success = InitFileHandle(node.file_path.c_str(), "w"); + bool success = InitFileHandle(node.GetFileName().c_str(), "w"); if (success == false) { - throw ExecutorException("Failed to create file " + node.file_path + + throw ExecutorException("Failed to create file " + node.GetFileName() + ". Try absolute path and make sure you have the " "permission to access this file."); - return false; - } - LOG_DEBUG("Created target copy output file: %s", node.file_path.c_str()); - - // Whether we're copying the parameters which require deserialization - if (node.deserialize_parameters) { - InitParamColIds(); } + LOG_DEBUG("Created target copy output file: %s", node.GetFileName().c_str()); return true; } - bool CopyExecutor::InitFileHandle(const char *name, const char *mode) { auto file = fopen(name, mode); if (file == NULL) { @@ -122,33 +117,6 @@ void CopyExecutor::FFlushFsync() { } } -void CopyExecutor::InitParamColIds() { - // If we're going to deserialize prepared statement, get the column ids for - // the varbinary columns first - // auto catalog = catalog::Catalog::GetInstance(); - // try { - // auto query_metric_table = - // catalog->GetTableWithName(CATALOG_DATABASE_NAME, QUERY_METRIC_NAME); - // auto schema = query_metric_table->GetSchema(); - // auto &cols = schema->GetColumns(); - // for (unsigned int i = 0; i < cols.size(); i++) { - // auto col_name = cols[i].column_name.c_str(); - // if (std::strcmp(col_name, QUERY_PARAM_TYPE_COL_NAME) == 0) { - // param_type_col_id = i; - // } else if (std::strcmp(col_name, QUERY_PARAM_FORMAT_COL_NAME) == 0) { - // param_format_col_id = i; - // } else if (std::strcmp(col_name, QUERY_PARAM_VAL_COL_NAME) == 0) { - // param_val_col_id = i; - // } else if (std::strcmp(col_name, QUERY_NUM_PARAM_COL_NAME) == 0) { - // num_param_col_id = i; - // } - // } - // } - // catch (Exception &e) { - // e.PrintStackTrace(); - // } -} - void CopyExecutor::Copy(const char *data, int len, bool end_of_line) { // Worst case we need to escape all character and two delimiters while (COPY_BUFFER_SIZE - buff_size - buff_ptr < (size_t)len * 3) { diff --git a/src/executor/plan_executor.cpp b/src/executor/plan_executor.cpp index feca24cec2f..6226e3a26cf 100644 --- a/src/executor/plan_executor.cpp +++ b/src/executor/plan_executor.cpp @@ -170,9 +170,9 @@ void PlanExecutor::ExecutePlan( } catch (Exception &e) { ExecutionResult result; result.m_result = ResultType::FAILURE; - result.m_error_message = e.what(); - LOG_ERROR("Error thrown during execution: %s", - result.m_error_message.c_str()); + result.m_error_message = + StringUtil::Format("ERROR: during execution ['%s']", e.what()); + LOG_ERROR("Error during execution: %s", e.what()); on_complete(result, {}); } } @@ -339,7 +339,7 @@ executor::AbstractExecutor *BuildExecutorTree( new executor::CreateFunctionExecutor(plan, executor_context); break; - case PlanNodeType::COPY: + case PlanNodeType::EXPORT_EXTERNAL_FILE: child_executor = new executor::CopyExecutor(plan, executor_context); break; @@ -349,9 +349,9 @@ executor::AbstractExecutor *BuildExecutorTree( break; default: - LOG_ERROR("Unsupported plan node type : %s", - PlanNodeTypeToString(plan_node_type).c_str()); - break; + throw NotImplementedException{ + StringUtil::Format("Unsupported plan node type : %s", + PlanNodeTypeToString(plan_node_type).c_str())}; } LOG_TRACE("Adding %s Executor", PlanNodeTypeToString(plan_node_type).c_str()); diff --git a/src/function/date_functions.cpp b/src/function/date_functions.cpp index 233259844c8..ac37f21492c 100644 --- a/src/function/date_functions.cpp +++ b/src/function/date_functions.cpp @@ -6,7 +6,7 @@ // // Identification: src/function/date_functions.cpp // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -14,11 +14,9 @@ #include #include -#include -#include #include -#include "common/logger.h" +#include "codegen/runtime_functions.h" #include "common/internal_types.h" #include "type/value.h" #include "type/value_factory.h" @@ -26,30 +24,28 @@ namespace peloton { namespace function { -// This now is not what postgres does. -// Postgres is returning the time when the transaction begins -// We are here intead generating a new time when this function -// is called +// This implementation of Now() is **not** what postgres does. Postgres is +// returning the time when the transaction begins. We are here instead +// generating a new time when this function is called. int64_t DateFunctions::Now() { uint64_t time_stamp; struct timeval tv; struct tm *time_info; - uint64_t hour_min_sec_base = 1000000; //us to sec + uint64_t hour_min_sec_base = 1000000; // us to sec uint64_t year_base = hour_min_sec_base * 100000; - uint64_t day_base = year_base * 10000 * 27; // skip the time zone + uint64_t day_base = year_base * 10000 * 27; // skip the time zone uint64_t month_base = day_base * 32; gettimeofday(&tv, NULL); time_info = gmtime(&(tv.tv_sec)); - uint32_t hour_min_sec = time_info->tm_hour * 3600 + - time_info->tm_min * 60 + - time_info->tm_sec; + uint32_t hour_min_sec = + time_info->tm_hour * 3600 + time_info->tm_min * 60 + time_info->tm_sec; // EPOCH time start from 1970 uint16_t year = time_info->tm_year + 1900; uint16_t day = time_info->tm_mday; - uint16_t month = time_info->tm_mon + 1; // tm_mon is from 0 - 11 + uint16_t month = time_info->tm_mon + 1; // tm_mon is from 0 - 11 time_stamp = tv.tv_usec; time_stamp += hour_min_sec_base * hour_min_sec; @@ -60,10 +56,162 @@ int64_t DateFunctions::Now() { return time_stamp; } -type::Value DateFunctions::_Now(const UNUSED_ATTRIBUTE std::vector &args) { - PELOTON_ASSERT(args.size() == 0); - int64_t now = Now(); - return type::ValueFactory::GetTimestampValue(now); +type::Value DateFunctions::_Now( + UNUSED_ATTRIBUTE const std::vector &args) { + PELOTON_ASSERT(args.empty()); + return type::ValueFactory::GetTimestampValue(Now()); +} + +int32_t DateFunctions::DateToJulian(int32_t year, int32_t month, int32_t day) { + // From Postgres date2j() + + if (month > 2) { + month += 1; + year += 4800; + } else { + month += 13; + year += 4799; + } + + int32_t century = year / 100; + + int32_t julian = year * 365 - 32167; + julian += year / 4 - century + century / 4; + julian += 7834 * month / 256 + day; + + return julian; +} + +void DateFunctions::JulianToDate(int32_t julian_date, int32_t &year, + int32_t &month, int32_t &day) { + // From Postgres j2date() + + uint32_t julian = static_cast(julian_date); + julian += 32044; + + uint32_t quad = julian / 146097; + + uint32_t extra = (julian - quad * 146097) * 4 + 3; + julian += 60 + quad * 3 + extra / 146097; + quad = julian / 1461; + julian -= quad * 1461; + + int32_t y = julian * 4 / 1461; + julian = ((y != 0) ? (julian + 305) % 365 : (julian + 306) % 366) + 123; + y += quad * 4; + + // Set year + year = static_cast(y - 4800); + quad = julian * 2141 / 65536; + + // Set day + day = julian - 7834 * quad / 256; + + // Set month + month = (quad + 10) % 12 + 1; +} + +namespace { + +template +bool TryParseInt(const char *&data, const char *end, T &out) { + static_assert(std::is_integral::value, + "ParseInt() must only be called with integer types"); + + // Initialize + out = 0; + + // Trim leading whitespace + while (*data == ' ') { + data++; + } + + // Return if no more data + if (data == end) { + return false; + } + + const char *snapshot = data; + while (data != end) { + if (*data < '0' || *data > '9') { + // Not a valid integer, stop + break; + } + + // Update running sum + out = (out * 10) + (*data - '0'); + + // Move along + data++; + } + + return snapshot != data; +} + +} // namespace + +int32_t DateFunctions::InputDate( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *data, + uint32_t len) { + // Okay, Postgres supports a crap-tonne of different date-time and timestamp + // formats. I don't want to spend time implementing them all. For now, let's + // cover the most common formats: yyyy-mm-dd + + const char *curr_ptr = data; + const char *end = data + len; + + uint32_t nums[3] = {0, 0, 0}; + uint32_t year, month, day; + + for (uint32_t i = 0; i < 3; i++) { + bool parsed = TryParseInt(curr_ptr, end, nums[i]); + + bool unexpected_next_char = (*curr_ptr != '-' && *curr_ptr != '/'); + if (!parsed || (i != 2 && unexpected_next_char)) { + goto unsupported; + } + + curr_ptr++; + } + + // Looks okay ... let's check the components. + year = nums[0], month = nums[1], day = nums[2]; + + if (month == 0 || month > 12 || day == 0 || day > 31) { + goto unsupported; + } + + switch (month) { + case 2: { + uint32_t days_in_feb = + ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)) ? 29 : 28; + if (day > days_in_feb) { + goto unsupported; + } + break; + } + case 4: + case 6: + case 9: + case 11: { + if (day > 30) { + goto unsupported; + } + break; + } + default: { + if (day > 31) { + goto unsupported; + } + break; + } + } + + return DateToJulian(year, month, day); + +unsupported: + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); } } // namespace expression diff --git a/src/function/decimal_functions.cpp b/src/function/decimal_functions.cpp deleted file mode 100644 index b722993b4d0..00000000000 --- a/src/function/decimal_functions.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// decimal_functions.cpp -// -// Identification: src/function/decimal_functions.cpp -// -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#include "function/decimal_functions.h" -#include "type/value_factory.h" - -namespace peloton { -namespace function { - -// Get square root of the value -type::Value DecimalFunctions::Sqrt(const std::vector &args) { - PELOTON_ASSERT(args.size() == 1); - if (args[0].IsNull()) { - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - return args[0].Sqrt(); -} - -// Get Abs of value -type::Value DecimalFunctions::_Abs(const std::vector &args) { - PELOTON_ASSERT(args.size() == 1); - if (args[0].IsNull()) { - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - switch (args[0].GetElementType()) { - case type::TypeId::DECIMAL: - { - double result; - result = Abs(args[0].GetAs()); - return type::ValueFactory::GetDecimalValue(result); - } - break; - case type::TypeId::INTEGER: - { - int32_t result; - result = abs(args[0].GetAs()); - return type::ValueFactory::GetIntegerValue(result); - break; - } - case type::TypeId::BIGINT: - { - int64_t result; - result = std::abs(args[0].GetAs()); - return type::ValueFactory::GetBigIntValue(result); - } - break; - case type::TypeId::SMALLINT: - { - int16_t result; - result = abs(args[0].GetAs()); - return type::ValueFactory::GetSmallIntValue(result); - } - break; - case type::TypeId::TINYINT: - { - int8_t result; - result = abs(args[0].GetAs()); - return type::ValueFactory::GetTinyIntValue(result); - } - break; - default: - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } -} - -double DecimalFunctions::Abs(const double args) { return fabs(args); } - -// Get ceiling of value -type::Value DecimalFunctions::_Ceil(const std::vector &args) { - PELOTON_ASSERT(args.size() == 1); - if (args[0].IsNull()) { - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - double result; - switch (args[0].GetElementType()) { - case type::TypeId::DECIMAL: - result = Ceil(args[0].GetAs()); - break; - case type::TypeId::INTEGER: - result = args[0].GetAs(); - break; - case type::TypeId::BIGINT: - result = args[0].GetAs(); - break; - case type::TypeId::SMALLINT: - result = args[0].GetAs(); - break; - case type::TypeId::TINYINT: - result = args[0].GetAs(); - break; - default: - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - return type::ValueFactory::GetDecimalValue(result); -} - -double DecimalFunctions::Ceil(const double args) { return ceil(args); } - -// Get floor value -type::Value DecimalFunctions::_Floor(const std::vector &args) { - PELOTON_ASSERT(args.size() == 1); - if (args[0].IsNull()) { - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - double res; - switch(args[0].GetElementType()) { - case type::TypeId::DECIMAL: - res = Floor(args[0].GetAs()); - break; - case type::TypeId::INTEGER: - res = args[0].GetAs(); - break; - case type::TypeId::BIGINT: - res = args[0].GetAs(); - break; - case type::TypeId::SMALLINT: - res = args[0].GetAs(); - break; - case type::TypeId::TINYINT: - res = args[0].GetAs(); - break; - default: - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - return type::ValueFactory::GetDecimalValue(res); -} - -double DecimalFunctions::Floor(const double val) { return floor(val); } - -// Round to nearest integer -type::Value DecimalFunctions::_Round(const std::vector &args) { - PELOTON_ASSERT(args.size() == 1); - if (args[0].IsNull()) { - return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); - } - return type::ValueFactory::GetDecimalValue(Round(args[0].GetAs())); -} - -double DecimalFunctions::Round(double arg) { return round(arg); } - - -} // namespace function -} // namespace peloton diff --git a/src/function/numeric_functions.cpp b/src/function/numeric_functions.cpp new file mode 100644 index 00000000000..f0d13e92ffc --- /dev/null +++ b/src/function/numeric_functions.cpp @@ -0,0 +1,439 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// numeric_functions.cpp +// +// Identification: src/function/numeric_functions.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "function/numeric_functions.h" + +#include "codegen/type/type.h" +#include "codegen/runtime_functions.h" +#include "type/value.h" +#include "type/value_factory.h" + +namespace peloton { +namespace function { + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Square root +/// +//////////////////////////////////////////////////////////////////////////////// + +double NumericFunctions::ISqrt(uint32_t num) { + return std::sqrt(num); +} + +double NumericFunctions::DSqrt(double num) { return std::sqrt(num); } + +type::Value NumericFunctions::Sqrt(const std::vector &args) { + PELOTON_ASSERT(args.size() == 1); + if (args[0].IsNull()) { + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + return args[0].Sqrt(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Absolute value +/// +//////////////////////////////////////////////////////////////////////////////// + +double NumericFunctions::Abs(const double args) { return fabs(args); } + +// Get Abs of value +type::Value NumericFunctions::_Abs(const std::vector &args) { + PELOTON_ASSERT(args.size() == 1); + if (args[0].IsNull()) { + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + switch (args[0].GetElementType()) { + case type::TypeId::DECIMAL: { + double result; + result = Abs(args[0].GetAs()); + return type::ValueFactory::GetDecimalValue(result); + } + case type::TypeId::INTEGER: { + int32_t result; + result = abs(args[0].GetAs()); + return type::ValueFactory::GetIntegerValue(result); + } + case type::TypeId::BIGINT: { + int64_t result; + result = std::abs(args[0].GetAs()); + return type::ValueFactory::GetBigIntValue(result); + } + case type::TypeId::SMALLINT: { + int16_t result; + result = abs(args[0].GetAs()); + return type::ValueFactory::GetSmallIntValue(result); + } + case type::TypeId::TINYINT: { + int8_t result; + result = abs(args[0].GetAs()); + return type::ValueFactory::GetTinyIntValue(result); + } + default: { + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Ceiling value +/// +//////////////////////////////////////////////////////////////////////////////// + +double NumericFunctions::Ceil(const double args) { return ceil(args); } + +type::Value NumericFunctions::_Ceil(const std::vector &args) { + PELOTON_ASSERT(args.size() == 1); + if (args[0].IsNull()) { + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + double result; + switch (args[0].GetElementType()) { + case type::TypeId::DECIMAL: + result = Ceil(args[0].GetAs()); + break; + case type::TypeId::INTEGER: + result = args[0].GetAs(); + break; + case type::TypeId::BIGINT: + result = args[0].GetAs(); + break; + case type::TypeId::SMALLINT: + result = args[0].GetAs(); + break; + case type::TypeId::TINYINT: + result = args[0].GetAs(); + break; + default: + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + return type::ValueFactory::GetDecimalValue(result); +} + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Floor value +/// +//////////////////////////////////////////////////////////////////////////////// + +double NumericFunctions::Floor(const double val) { return floor(val); } + +type::Value NumericFunctions::_Floor(const std::vector &args) { + PELOTON_ASSERT(args.size() == 1); + if (args[0].IsNull()) { + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + double res; + switch (args[0].GetElementType()) { + case type::TypeId::DECIMAL: + res = Floor(args[0].GetAs()); + break; + case type::TypeId::INTEGER: + res = args[0].GetAs(); + break; + case type::TypeId::BIGINT: + res = args[0].GetAs(); + break; + case type::TypeId::SMALLINT: + res = args[0].GetAs(); + break; + case type::TypeId::TINYINT: + res = args[0].GetAs(); + break; + default: + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + return type::ValueFactory::GetDecimalValue(res); +} + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Rounding +/// +//////////////////////////////////////////////////////////////////////////////// + +double NumericFunctions::Round(double arg) { return round(arg); } + +type::Value NumericFunctions::_Round(const std::vector &args) { + PELOTON_ASSERT(args.size() == 1); + if (args[0].IsNull()) { + return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL); + } + return type::ValueFactory::GetDecimalValue(Round(args[0].GetAs())); +} + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Input functions +/// +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +/** + * Convert the provided input string into an integral number. This function + * handles leading whitespace and leading negative (-) or positive (+) signs. + * Additionally, it performs a bounds check to ensure the number falls into the + * valid range of numbers for the given type. + * + * @tparam T The integral type (int8_t, int16_t, int32_t, int64_t) + * @param ptr A pointer to the start of the input string + * @param len The length of the input string + * @return The numeric interpretation of the input string + */ +template +T ParseInteger(const char *ptr, uint32_t len) { + static_assert(std::is_integral::value, + "Must provide integer-type when calling ParseInteger"); + + const char *start = ptr; + const char *end = start + len; + + // Trim leading whitespace + while (start < end && *start == ' ') { + start++; + } + + // Check negative or positive sign + bool negative = false; + if (*start == '-') { + negative = true; + start++; + } else if (*start == '+') { + start++; + } + + // Convert + uint64_t cutoff = + static_cast(negative ? -std::numeric_limits::min() + : std::numeric_limits::max()); + uint64_t cutlimit = cutoff % 10; + cutoff /= 10; + + uint64_t num = 0; + while (start < end) { + if (*start < '0' || *start > '9') { + break; + } + + uint32_t c = static_cast(*start - '0'); + + if (num > cutoff || (num == cutoff && c > cutlimit)) { + goto overflow; + } + + num = (num * 10) + c; + + start++; + } + + // Trim trailing whitespace + while (start < end && *start == ' ') { + start++; + } + + // If we haven't consumed everything at this point, it was an invalid input + if (start < end) { + goto invalid; + } + + // Negate number if we need to + if (negative) { + num = -num; + } + + // Range check + if (static_cast(num) <= std::numeric_limits::min() || + static_cast(num) >= std::numeric_limits::max()) { + goto overflow; + } + + // Done + return static_cast(num); + +overflow: + codegen::RuntimeFunctions::ThrowOverflowException(); + __builtin_unreachable(); + +invalid: + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); +} + +} // namespace + +bool NumericFunctions::InputBoolean( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr, + uint32_t len) { + PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL"); + + if (len == 0) { + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); + } + + const char *start = ptr; + const char *end = ptr + len; + + // Trim leading whitespace + while (start < end && *start == ' ') { + start++; + } + + // + uint64_t trimmed_len = end - start; + + // Check cases + switch (*start) { + case 't': + case 'T': { + static constexpr char kTrue[] = "true"; + if (strncasecmp(start, kTrue, trimmed_len) == 0) { + return true; + } + break; + } + case 'f': + case 'F': { + static constexpr char kFalse[] = "false"; + if (strncasecmp(start, kFalse, trimmed_len) == 0) { + return false; + } + break; + } + case 'y': + case 'Y': { + static constexpr char kYes[] = "yes"; + if (strncasecmp(start, kYes, trimmed_len) == 0) { + return true; + } + break; + } + case 'n': + case 'N': { + static constexpr char kNo[] = "no"; + if (strncasecmp(start, kNo, trimmed_len) == 0) { + return false; + } + break; + } + case 'o': + case 'O': { + // 'o' not enough to distinguish between on/off + static constexpr char kOff[] = "off"; + static constexpr char kOn[] = "on"; + if (strncasecmp(start, kOff, (trimmed_len > 3 ? trimmed_len : 3)) == 0) { + return false; + } else if (strncasecmp(start, kOn, (trimmed_len > 2 ? trimmed_len : 2)) == + 0) { + return true; + } + break; + } + case '0': { + if (trimmed_len == 1) { + return false; + } else { + return true; + } + } + case '1': { + if (trimmed_len == 1) { + return true; + } else { + return false; + } + } + default: { break; } + } + + // Error + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); +} + +int8_t NumericFunctions::InputTinyInt( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr, + uint32_t len) { + PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL"); + return ParseInteger(ptr, len); +} + +int16_t NumericFunctions::InputSmallInt( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr, + uint32_t len) { + PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL"); + return ParseInteger(ptr, len); +} + +int32_t NumericFunctions::InputInteger( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr, + uint32_t len) { + PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL"); + return ParseInteger(ptr, len); +} + +int64_t NumericFunctions::InputBigInt( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr, + uint32_t len) { + PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL"); + return ParseInteger(ptr, len); +} + +double NumericFunctions::InputDecimal( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr, + uint32_t len) { + PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL"); + if (len == 0) { + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); + } + + const char *start = ptr; + const char *end = ptr + len; + + // We don't trim because std::strtod() does the trimming for us + + // TODO(pmenon): Optimize me later + char *consumed_ptr = nullptr; + double ret = std::strtod(ptr, &consumed_ptr); + + if (unlikely_branch(consumed_ptr == start)) { + if (errno == ERANGE) { + codegen::RuntimeFunctions::ThrowOverflowException(); + __builtin_unreachable(); + } else { + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); + } + } + + // Eat the rest + while (consumed_ptr < end && *consumed_ptr == ' ') { + consumed_ptr++; + } + + // If we haven't consumed everything at this point, it was an invalid input + if (consumed_ptr < end) { + codegen::RuntimeFunctions::ThrowInvalidInputStringException(); + __builtin_unreachable(); + } + + // Done + return ret; +} + +} // namespace function +} // namespace peloton diff --git a/src/function/string_functions.cpp b/src/function/string_functions.cpp index 841a9ee6e15..75af3a67523 100644 --- a/src/function/string_functions.cpp +++ b/src/function/string_functions.cpp @@ -14,6 +14,8 @@ #include "common/macros.h" #include "executor/executor_context.h" +#include "type/type_util.h" +#include "type/abstract_pool.h" namespace peloton { namespace function { @@ -220,5 +222,36 @@ uint32_t StringFunctions::Length( return length; } +int32_t StringFunctions::CompareStrings(const char *str1, uint32_t len1, + const char *str2, uint32_t len2) { + return peloton::type::TypeUtil::CompareStrings(str1, len1, str2, len2); +} + +void StringFunctions::WriteString(const char *data, uint32_t len, char *buf, + peloton::type::AbstractPool &pool) { + struct Varlen { + uint32_t len; + char data[0]; + }; + + // Allocate memory for the Varlen object + auto *area = static_cast(pool.Allocate(sizeof(uint32_t) + len)); + + // Populate it + area->len = len; + PELOTON_MEMCPY(area->data, data, len); + + // Store a pointer to the Varlen object into the target memory space + *reinterpret_cast(buf) = area; +} + +// TODO(pmenon): UTF8 checking, string checking, lots of error handling here +// TODO(pmenon): Why do we need this +1 on the length ? +StringFunctions::StrWithLen StringFunctions::InputString( + UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *data, + uint32_t len) { + return StringFunctions::StrWithLen{data, len + 1}; +} + } // namespace function } // namespace peloton diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h index 43a62d6444f..2f6875b453b 100644 --- a/src/include/catalog/schema.h +++ b/src/include/catalog/schema.h @@ -126,7 +126,7 @@ class Schema : public Printable { return columns[column_id].IsInlined(); } - inline const Column GetColumn(const oid_t column_id) const { + inline const Column &GetColumn(const oid_t column_id) const { return columns[column_id]; } diff --git a/src/include/codegen/buffering_consumer.h b/src/include/codegen/buffering_consumer.h index 0e537486a3e..5238563c45e 100644 --- a/src/include/codegen/buffering_consumer.h +++ b/src/include/codegen/buffering_consumer.h @@ -42,6 +42,8 @@ class WrappedTuple : public ContainerTuple> { // Assignment WrappedTuple &operator=(const WrappedTuple &o); + std::string ToCSV() const; + // The tuple std::vector tuple_; }; diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h index 5612868d0d5..9a56edf5dfd 100644 --- a/src/include/codegen/codegen.h +++ b/src/include/codegen/codegen.h @@ -16,6 +16,7 @@ #include #include "codegen/code_context.h" +#include "codegen/type/type.h" namespace peloton { namespace codegen { @@ -58,9 +59,10 @@ class CppProxyMember { uint32_t slot_; }; -//===----------------------------------------------------------------------===// -// The main wrapper around LLVM's IR Builder to generate IR -//===----------------------------------------------------------------------===// +/** + * The main API used to generate code in Peloton. Provides a thin wrapper around + * LLVM's IR Builder to generate IR. + */ class CodeGen { public: /// Constructor and destructor @@ -88,18 +90,20 @@ class CodeGen { } llvm::Type *ArrayType(llvm::Type *type, uint32_t num_elements) const; - /// Constant wrappers for bool, int8, int16, int32, int64, strings, and null + /// Functions to return LLVM values for constant boolean, int8, int16, int32, + // int64, strings, and null values. llvm::Constant *ConstBool(bool val) const; llvm::Constant *Const8(int8_t val) const; llvm::Constant *Const16(int16_t val) const; llvm::Constant *Const32(int32_t val) const; llvm::Constant *Const64(int64_t val) const; llvm::Constant *ConstDouble(double val) const; - llvm::Constant *ConstString(const std::string &s) const; + llvm::Value *ConstString(const std::string &str_val, + const std::string &name) const; + llvm::Value *ConstGenericBytes(const void *data, uint32_t length, + const std::string &name) const; llvm::Constant *Null(llvm::Type *type) const; llvm::Constant *NullPtr(llvm::PointerType *type) const; - /// Wrapper for pointer for constant string - llvm::Value *ConstStringPtr(const std::string &s) const; llvm::Value *AllocateVariable(llvm::Type *type, const std::string &name); llvm::Value *AllocateBuffer(llvm::Type *element_type, uint32_t num_elems, @@ -128,8 +132,10 @@ class CodeGen { //===--------------------------------------------------------------------===// // C/C++ standard library functions //===--------------------------------------------------------------------===// - llvm::Value *CallPrintf(const std::string &format, - const std::vector &args); + llvm::Value *Printf(const std::string &format, + const std::vector &args); + llvm::Value *Memcmp(llvm::Value *ptr1, llvm::Value *ptr2, + llvm::Value *len); llvm::Value *Sqrt(llvm::Value *val); //===--------------------------------------------------------------------===// diff --git a/src/include/codegen/operator/csv_scan_translator.h b/src/include/codegen/operator/csv_scan_translator.h new file mode 100644 index 00000000000..9b7efca8fc6 --- /dev/null +++ b/src/include/codegen/operator/csv_scan_translator.h @@ -0,0 +1,65 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scan_translator.h +// +// Identification: src/include/codegen/operator/csv_scan_translator.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "codegen/operator/operator_translator.h" + +namespace peloton { + +namespace planner { +class CSVScanPlan; +} // namespace planner + +namespace codegen { +class CompilationContext; +class Pipeline; +} // namespace codegen + +namespace codegen { + +//===----------------------------------------------------------------------===// +// A translator for CSV file scans +//===----------------------------------------------------------------------===// +class CSVScanTranslator : public OperatorTranslator { + public: + // Constructor + CSVScanTranslator(const planner::CSVScanPlan &scan, + CompilationContext &context, Pipeline &pipeline); + + void InitializeQueryState() override; + + void DefineAuxiliaryFunctions() override; + + // The method that produces new tuples + void Produce() const override; + + // Scans are leaves in the query plan and, hence, do not consume tuples + void Consume(ConsumerContext &, RowBatch &) const override {} + void Consume(ConsumerContext &, RowBatch::Row &) const override {} + + // Similar to InitializeState(), file scans don't have any state + void TearDownQueryState() override; + + private: + // The set of attributes output by the csv scan + std::vector output_attributes_; + + // The scanner state ID + QueryState::Id scanner_id_; + + // The generated CSV scan consumer function + llvm::Function *consumer_func_; +}; + +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/include/codegen/proxy/csv_scanner_proxy.h b/src/include/codegen/proxy/csv_scanner_proxy.h new file mode 100644 index 00000000000..ee27ce2b003 --- /dev/null +++ b/src/include/codegen/proxy/csv_scanner_proxy.h @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scanner_proxy.h +// +// Identification: src/include/codegen/proxy/csv_scanner_proxy.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "codegen/proxy/proxy.h" +#include "codegen/proxy/type_builder.h" +#include "codegen/util/csv_scanner.h" +#include "codegen/proxy/runtime_functions_proxy.h" + +namespace peloton { +namespace codegen { + +PROXY(CSVScannerColumn) { + DECLARE_MEMBER(0, type::Type, type); + DECLARE_MEMBER(1, char *, ptr); + DECLARE_MEMBER(2, uint32_t, len); + DECLARE_MEMBER(3, bool, is_null); + DECLARE_TYPE; +}; + +PROXY(CSVScanner) { + DECLARE_MEMBER(0, char[sizeof(codegen::util::CSVScanner) - + sizeof(util::CSVScanner::Column *) - + sizeof(util::CSVScanner::Stats) - sizeof(uint32_t)], + opaque1); + DECLARE_MEMBER(1, util::CSVScanner::Column *, cols); + DECLARE_MEMBER(2, char[sizeof(util::CSVScanner::Stats) + sizeof(uint32_t)], + opaque2); + DECLARE_TYPE; + + DECLARE_METHOD(Init); + DECLARE_METHOD(Destroy); + DECLARE_METHOD(Produce); +}; + +TYPE_BUILDER(CSVScanner, codegen::util::CSVScanner); +TYPE_BUILDER(CSVScannerColumn, codegen::util::CSVScanner::Column); + +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/include/codegen/proxy/date_functions_proxy.h b/src/include/codegen/proxy/date_functions_proxy.h index 38f96b3cd38..7954afe72d3 100644 --- a/src/include/codegen/proxy/date_functions_proxy.h +++ b/src/include/codegen/proxy/date_functions_proxy.h @@ -6,7 +6,7 @@ // // Identification: src/include/codegen/proxy/date_functions_proxy.h // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -18,8 +18,11 @@ namespace peloton { namespace codegen { PROXY(DateFunctions) { - // Proxy everything in function::StringFunctions + // Utility functions DECLARE_METHOD(Now); + + // Input functions + DECLARE_METHOD(InputDate); }; } // namespace codegen diff --git a/src/include/codegen/proxy/decimal_functions_proxy.h b/src/include/codegen/proxy/numeric_functions_proxy.h similarity index 52% rename from src/include/codegen/proxy/decimal_functions_proxy.h rename to src/include/codegen/proxy/numeric_functions_proxy.h index 4d9b70a5671..b3a338e06a8 100644 --- a/src/include/codegen/proxy/decimal_functions_proxy.h +++ b/src/include/codegen/proxy/numeric_functions_proxy.h @@ -2,11 +2,11 @@ // // Peloton // -// decimal_functions_proxy.h +// numeric_functions_proxy.h // -// Identification: src/include/codegen/proxy/decimal_functions_proxy.h +// Identification: src/include/codegen/proxy/numeric_functions_proxy.h // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -17,13 +17,20 @@ namespace peloton { namespace codegen { -PROXY(DecimalFunctions) { - // Proxy everything in function::DecimalFunctions - +PROXY(NumericFunctions) { + // Utility functions DECLARE_METHOD(Abs); DECLARE_METHOD(Floor); DECLARE_METHOD(Round); DECLARE_METHOD(Ceil); + + // Input functions + DECLARE_METHOD(InputBoolean); + DECLARE_METHOD(InputTinyInt); + DECLARE_METHOD(InputSmallInt); + DECLARE_METHOD(InputInteger); + DECLARE_METHOD(InputBigInt); + DECLARE_METHOD(InputDecimal); }; } // namespace codegen diff --git a/src/include/codegen/proxy/runtime_functions_proxy.h b/src/include/codegen/proxy/runtime_functions_proxy.h index c20ba145eb4..5700f7fffb9 100644 --- a/src/include/codegen/proxy/runtime_functions_proxy.h +++ b/src/include/codegen/proxy/runtime_functions_proxy.h @@ -33,6 +33,11 @@ PROXY(AbstractExpression) { DECLARE_TYPE; }; +PROXY(Type) { + DECLARE_MEMBER(0, char[sizeof(codegen::type::Type)], opaque); + DECLARE_TYPE; +}; + PROXY(RuntimeFunctions) { DECLARE_METHOD(HashMurmur3); DECLARE_METHOD(HashCrc64); @@ -47,6 +52,7 @@ PROXY(RuntimeFunctions) { TYPE_BUILDER(ColumnLayoutInfo, codegen::RuntimeFunctions::ColumnLayoutInfo); TYPE_BUILDER(AbstractExpression, expression::AbstractExpression); +TYPE_BUILDER(Type, codegen::type::Type); } // namespace codegen } // namespace peloton \ No newline at end of file diff --git a/src/include/codegen/proxy/string_functions_proxy.h b/src/include/codegen/proxy/string_functions_proxy.h index e9cf1c9c7fc..27a24995e3a 100644 --- a/src/include/codegen/proxy/string_functions_proxy.h +++ b/src/include/codegen/proxy/string_functions_proxy.h @@ -29,6 +29,9 @@ PROXY(StringFunctions) { DECLARE_METHOD(RTrim); DECLARE_METHOD(Substr); DECLARE_METHOD(Repeat); + DECLARE_METHOD(CompareStrings); + DECLARE_METHOD(WriteString); + DECLARE_METHOD(InputString); }; PROXY(StrWithLen) { diff --git a/src/include/codegen/proxy/tuple_runtime_proxy.h b/src/include/codegen/proxy/tuple_runtime_proxy.h deleted file mode 100644 index e166349575e..00000000000 --- a/src/include/codegen/proxy/tuple_runtime_proxy.h +++ /dev/null @@ -1,25 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// tuple_runtime_proxy.h -// -// Identification: src/include/codegen/proxy/tuple_runtime_proxy.h -// -// Copyright (c) 2015-2018, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "codegen/proxy/proxy.h" - -namespace peloton { -namespace codegen { - -PROXY(TupleRuntime) { - DECLARE_METHOD(CreateVarlen); -}; - -} // namespace codegen -} // namespace peloton diff --git a/src/include/codegen/proxy/type_builder.h b/src/include/codegen/proxy/type_builder.h index caab2705f72..cc30f6b5f97 100644 --- a/src/include/codegen/proxy/type_builder.h +++ b/src/include/codegen/proxy/type_builder.h @@ -53,6 +53,9 @@ DEFINE_PRIMITIVE_BUILDER(unsigned long, Int64); DEFINE_PRIMITIVE_BUILDER(long long, Int64); DEFINE_PRIMITIVE_BUILDER(unsigned long long, Int64); DEFINE_PRIMITIVE_BUILDER(double, Double); +DEFINE_PRIMITIVE_BUILDER(void *, VoidPtr); +DEFINE_PRIMITIVE_BUILDER(char *, CharPtr); +DEFINE_PRIMITIVE_BUILDER(unsigned char *, CharPtr); #undef DEFINE_PRIMITIVE_BUILDER /// Const diff --git a/src/include/codegen/proxy/values_runtime_proxy.h b/src/include/codegen/proxy/values_runtime_proxy.h index e74954a999a..9868d518bac 100644 --- a/src/include/codegen/proxy/values_runtime_proxy.h +++ b/src/include/codegen/proxy/values_runtime_proxy.h @@ -6,7 +6,7 @@ // // Identification: src/include/codegen/proxy/values_runtime_proxy.h // -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -29,7 +29,6 @@ PROXY(ValuesRuntime) { DECLARE_METHOD(OutputDecimal); DECLARE_METHOD(OutputVarchar); DECLARE_METHOD(OutputVarbinary); - DECLARE_METHOD(CompareStrings); }; } // namespace codegen diff --git a/src/include/codegen/runtime_functions.h b/src/include/codegen/runtime_functions.h index 13712188be4..4438bce31b7 100644 --- a/src/include/codegen/runtime_functions.h +++ b/src/include/codegen/runtime_functions.h @@ -77,7 +77,7 @@ class RuntimeFunctions { */ static void GetTileGroupLayout(const storage::TileGroup *tile_group, ColumnLayoutInfo *infos, uint32_t num_cols); - + /** * Execute a parallel scan over the given table in the given database. * @@ -106,6 +106,12 @@ class RuntimeFunctions { void *query_state, executor::ExecutorContext::ThreadStates &thread_states, void (*work_func)(void *, void *)); + ////////////////////////////////////////////////////////////////////////////// + /// + /// Exception related functions + /// + ////////////////////////////////////////////////////////////////////////////// + /** * Throw a divide-by-zero exception. This function doesn't return. */ @@ -115,6 +121,8 @@ class RuntimeFunctions { * Throw a mathematical overflow exception. This function does not return. */ static void ThrowOverflowException(); + + static void ThrowInvalidInputStringException(); }; } // namespace codegen diff --git a/src/include/codegen/tuple_runtime.h b/src/include/codegen/tuple_runtime.h deleted file mode 100644 index 86532055c7a..00000000000 --- a/src/include/codegen/tuple_runtime.h +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// tuple_runtime.h -// -// Identification: src/include/codegen/tuple_runtime.h -// -// Copyright (c) 2015-17, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "common/internal_types.h" - -namespace peloton { - -namespace type { -class AbstractPool; -} // namespace type - -namespace codegen { - -class TupleRuntime { - public: - static void CreateVarlen(char *data, uint32_t len, char *buf, - peloton::type::AbstractPool *pool); -}; - -} // namespace codegen -} // namespace peloton diff --git a/src/include/codegen/type/array_type.h b/src/include/codegen/type/array_type.h index e3b0fe7cc6a..052e55ca4ca 100644 --- a/src/include/codegen/type/array_type.h +++ b/src/include/codegen/type/array_type.h @@ -33,6 +33,9 @@ class Array : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/bigint_type.h b/src/include/codegen/type/bigint_type.h index 043e71a3e91..9f2abfe7aea 100644 --- a/src/include/codegen/type/bigint_type.h +++ b/src/include/codegen/type/bigint_type.h @@ -33,6 +33,9 @@ class BigInt : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/boolean_type.h b/src/include/codegen/type/boolean_type.h index 3c070b18714..5e854ba800e 100644 --- a/src/include/codegen/type/boolean_type.h +++ b/src/include/codegen/type/boolean_type.h @@ -35,6 +35,9 @@ class Boolean : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/date_type.h b/src/include/codegen/type/date_type.h index 03cf5da7827..225420e59c8 100644 --- a/src/include/codegen/type/date_type.h +++ b/src/include/codegen/type/date_type.h @@ -33,6 +33,9 @@ class Date : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/decimal_type.h b/src/include/codegen/type/decimal_type.h index b180fc2b4eb..6260fb98aba 100644 --- a/src/include/codegen/type/decimal_type.h +++ b/src/include/codegen/type/decimal_type.h @@ -33,6 +33,9 @@ class Decimal : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/integer_type.h b/src/include/codegen/type/integer_type.h index b8f6d97ea4f..dbc2b30957e 100644 --- a/src/include/codegen/type/integer_type.h +++ b/src/include/codegen/type/integer_type.h @@ -33,6 +33,9 @@ class Integer : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/smallint_type.h b/src/include/codegen/type/smallint_type.h index 86f0e9a2cb3..9c1068a0a82 100644 --- a/src/include/codegen/type/smallint_type.h +++ b/src/include/codegen/type/smallint_type.h @@ -33,6 +33,9 @@ class SmallInt : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/sql_type.h b/src/include/codegen/type/sql_type.h index a9232339570..256073bf80e 100644 --- a/src/include/codegen/type/sql_type.h +++ b/src/include/codegen/type/sql_type.h @@ -56,6 +56,8 @@ class SqlType { virtual void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const = 0; + virtual llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const = 0; virtual llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const = 0; virtual const TypeSystem &GetTypeSystem() const = 0; diff --git a/src/include/codegen/type/timestamp_type.h b/src/include/codegen/type/timestamp_type.h index b185cc349bf..febc95f1077 100644 --- a/src/include/codegen/type/timestamp_type.h +++ b/src/include/codegen/type/timestamp_type.h @@ -33,6 +33,9 @@ class Timestamp : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/tinyint_type.h b/src/include/codegen/type/tinyint_type.h index ae7cbd86b18..8593dd7b1de 100644 --- a/src/include/codegen/type/tinyint_type.h +++ b/src/include/codegen/type/tinyint_type.h @@ -33,6 +33,9 @@ class TinyInt : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/type.h b/src/include/codegen/type/type.h index d636d7d6572..1f485ad4e0e 100644 --- a/src/include/codegen/type/type.h +++ b/src/include/codegen/type/type.h @@ -15,6 +15,7 @@ #include #include "type/type_id.h" +#include "util/hash_util.h" namespace peloton { namespace codegen { @@ -78,6 +79,21 @@ class Type { Type AsNonNullable() const; }; +struct TypeHasher { + std::size_t operator()(const type::Type &type) const { + // TODO: hash the other parts + auto hash = HashUtil::Hash(&type.type_id); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&type.nullable)); + return hash; + } +}; + +struct TypeEquality { + bool operator()(const type::Type &l, const type::Type &r) const { + return l == r; + } +}; + } // namespace type } // namespace codegen } // namespace peloton diff --git a/src/include/codegen/type/varbinary_type.h b/src/include/codegen/type/varbinary_type.h index 54974e0a613..b9ad9cd3cf0 100644 --- a/src/include/codegen/type/varbinary_type.h +++ b/src/include/codegen/type/varbinary_type.h @@ -33,6 +33,9 @@ class Varbinary : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/type/varchar_type.h b/src/include/codegen/type/varchar_type.h index 796d493772a..1664a8a10c7 100644 --- a/src/include/codegen/type/varchar_type.h +++ b/src/include/codegen/type/varchar_type.h @@ -33,6 +33,9 @@ class Varchar : public SqlType, public Singleton { void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type, llvm::Type *&len_type) const override; + llvm::Function *GetInputFunction(CodeGen &codegen, + const Type &type) const override; + llvm::Function *GetOutputFunction(CodeGen &codegen, const Type &type) const override; diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h new file mode 100644 index 00000000000..f230354c5fa --- /dev/null +++ b/src/include/codegen/util/csv_scanner.h @@ -0,0 +1,216 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scanner.h +// +// Identification: src/include/codegen/util/csv_scanner.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include "codegen/type/type.h" +#include "util/file.h" + +namespace peloton { + +namespace executor { +class ExecutorContext; +} // namespace executor + +namespace type { +class AbstractPool; +} // namespace type + +namespace codegen { +namespace util { + +/** + * This is the primary class to scan CSV files. Callers use the constructor to + * configure various aspects of how parsing is performed. Callers must provide + * a description of the rows stored in the CSV file, and a callback function + * that is invoked once for every row in the CSV file. The delimiter character, + * quoting character, and escape characters can also be configured through the + * constructor. + * + * This scanner class is fail-fast. If it finds an ill-formatted row, it will + * immediately throw an error. + * + * TODO: implement a more generous parser that is best-effort. + */ +class CSVScanner { + public: + // 64K buffer size + static constexpr uint32_t kDefaultBufferSize = (1ul << 16ul); + + // We allocate a maximum of 1GB for the line buffer + static constexpr uint64_t kMaxAllocSize = (1ul << 30ul); + + // The signature of the callback function + using Callback = void (*)(void *); + + /** + * Column information + */ + struct Column { + // The type of data this column represents + codegen::type::Type col_type; + + // A pointer to where the next value of this column is + const char *ptr; + + // The number of bytes + uint32_t len; + + // Is the next value of this column NULL + bool is_null; + }; + + /** + * This structure tracks various statistics while we scan the CSV + */ + struct Stats { + // The number of times the read-buffer was copied into the line-buffer + uint32_t num_copies = 0; + // The number of times we had to re-allocate the line-buffer to make room + // for new data (i.e., to handle really long lines that don't fit into the + // read-buffer) + uint32_t num_reallocs = 0; + // The number of times we had to call Read() from the file + uint32_t num_reads = 0; + }; + + /** + * Constructor. + * + * @param memory A memory pool where all allocations are sourced from + * @param file_path The full path to the CSV file + * @param col_types A description of the rows stored in the CSV + * @param num_cols The number of columns to expect + * @param func The callback function to invoke per row/line in the CSV + * @param opaque_state An opaque state that is passed to the callback function + * upon invocation. + * @param delimiter The character that separates columns within a row + * @param quote The quoting character used to quote data (i.e., strings) + * @param escape The character that should appear before any data characters + * that match the quote character. + */ + CSVScanner(peloton::type::AbstractPool &memory, const std::string &file_path, + const codegen::type::Type *col_types, uint32_t num_cols, + Callback func, void *opaque_state, char delimiter = ',', + char quote = '"', char escape = '"'); + + /** + * Destructor + */ + ~CSVScanner(); + + /** + * Initialization function. This is the entry point from codegen to initialize + * scanner instances. + * + * @param scanner The scanner we're initializing + * @param memory A memory pool where all allocations are sourced from + * @param file_path The full path to the CSV file + * @param col_types A description of the rows stored in the CSV + * @param num_cols The number of columns to expect + * @param func The callback function to invoke per row/line in the CSV + * @param opaque_state An opaque state that is passed to the callback function + * upon invocation. + * @param delimiter The character that separates columns within a row + * @param quote The quoting character used to quote data (i.e., strings) + * @param escape The character that should appear before any data characters + * that match the quote character. + */ + static void Init(CSVScanner &scanner, + executor::ExecutorContext &executor_context, + const char *file_path, const codegen::type::Type *col_types, + uint32_t num_cols, Callback func, void *opaque_state, + char delimiter, char quote, char escape); + + /** + * Destruction function. This is the entry point from codegen when cleaning up + * and reclaiming memory from scanner instances. + * + * @param scanner The scanner we're destroying. + */ + static void Destroy(CSVScanner &scanner); + + /** + * Produce all the rows stored in the configured CSV file + */ + void Produce(); + + /** + * Return the list of columns + * + * @return + */ + const Column *GetColumns() const { return cols_; } + + private: + // Initialize the scan + void Initialize(); + + // Append bytes to the end of the line buffer + void AppendToLineBuffer(const char *data, uint32_t len); + + // Read the next line from the CSV file + char *NextLine(); + + // Read a buffer's worth of data from the CSV file + bool NextBuffer(); + + // Produce CSV data stored in the provided line + void ProduceCSV(char *line); + + private: + // All memory allocations happen from this pool + peloton::type::AbstractPool &memory_; + + // The path to the CSV file + const std::string file_path_; + + // The CSV file handle + peloton::util::File file_; + + // The temporary read-buffer where raw file contents are first read into + // TODO: make these unique_ptr's with a customer deleter + char *buffer_; + uint32_t buffer_pos_; + uint32_t buffer_end_; + + // A pointer to the start of a line in the CSV file + char *line_; + uint32_t line_len_; + uint32_t line_maxlen_; + + // Line number + uint32_t line_number_; + + // The column delimiter, quote, and escape characters configured for this CSV + char delimiter_; + char quote_; + char escape_; + + // The callback function to call for each row of the CSV, and an opaque state + Callback func_; + void *opaque_state_; + + // The columns + Column *cols_; + uint32_t num_cols_; + + // Statistics + Stats stats_; +}; + +} // namespace util +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/include/codegen/values_runtime.h b/src/include/codegen/values_runtime.h index e6cf4967ca2..fd5c26b0e78 100644 --- a/src/include/codegen/values_runtime.h +++ b/src/include/codegen/values_runtime.h @@ -15,8 +15,17 @@ #include namespace peloton { + +namespace type { +class AbstractPool; +} // namespace type + namespace codegen { +namespace type { +class Type; +} // namespace type + class ValuesRuntime { public: // Write out the given boolean value into the array at the provided index @@ -50,9 +59,6 @@ class ValuesRuntime { // Write out the given varbinary value into the array at the provided index static void OutputVarbinary(char *values, uint32_t idx, const char *str, uint32_t len); - - static int32_t CompareStrings(const char *str1, uint32_t len1, - const char *str2, uint32_t len2); }; } // namespace codegen diff --git a/src/include/common/container_tuple.h b/src/include/common/container_tuple.h index 29613067734..0d27a0da6f5 100644 --- a/src/include/common/container_tuple.h +++ b/src/include/common/container_tuple.h @@ -6,7 +6,7 @@ // // Identification: src/include/common/container_tuple.h // -// Copyright (c) 2015-16, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -17,6 +17,7 @@ #include #include +#include "catalog/schema.h" #include "common/abstract_tuple.h" #include "common/exception.h" #include "common/macros.h" diff --git a/src/include/common/internal_types.h b/src/include/common/internal_types.h index 995a92cea2d..22598226407 100644 --- a/src/include/common/internal_types.h +++ b/src/include/common/internal_types.h @@ -556,6 +556,7 @@ enum class PlanNodeType { // Scan Nodes SEQSCAN = 10, INDEXSCAN = 11, + CSVSCAN = 12, // Join Nodes NESTLOOP = 20, @@ -594,7 +595,7 @@ enum class PlanNodeType { // Utility RESULT = 70, - COPY = 71, + EXPORT_EXTERNAL_FILE = 71, CREATE_FUNC = 72, // Test @@ -817,6 +818,13 @@ std::string CopyTypeToString(CopyType type); CopyType StringToCopyType(const std::string &str); std::ostream &operator<<(std::ostream &os, const CopyType &type); +enum class ExternalFileFormat { + CSV, +}; +std::string ExternalFileFormatToString(ExternalFileFormat format); +ExternalFileFormat StringToExternalFileFormat(const std::string &str); +std::ostream &operator<<(std::ostream &os, const ExternalFileFormat &format); + //===--------------------------------------------------------------------===// // Payload Types //===--------------------------------------------------------------------===// @@ -1336,6 +1344,7 @@ enum class RuleType : uint32_t { GET_TO_SEQ_SCAN, GET_TO_INDEX_SCAN, QUERY_DERIVED_GET_TO_PHYSICAL, + EXTERNAL_FILE_GET_TO_PHYSICAL, DELETE_TO_PHYSICAL, UPDATE_TO_PHYSICAL, INSERT_TO_PHYSICAL, @@ -1346,6 +1355,7 @@ enum class RuleType : uint32_t { INNER_JOIN_TO_HASH_JOIN, IMPLEMENT_DISTINCT, IMPLEMENT_LIMIT, + EXPORT_EXTERNAL_FILE_TO_PHYSICAL, // Don't move this one RewriteDelimiter, diff --git a/src/include/common/macros.h b/src/include/common/macros.h index e7f2dc95008..96aaf6ab0d2 100644 --- a/src/include/common/macros.h +++ b/src/include/common/macros.h @@ -97,20 +97,13 @@ namespace peloton { #endif /* CHECK_INVARIANTS */ //===--------------------------------------------------------------------===// -// override +// Compiler version checks //===--------------------------------------------------------------------===// -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7) -#define GCC_AT_LEAST_47 1 +#if __GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 0) +#define GCC_AT_LEAST_6 1 #else -#define GCC_AT_LEAST_47 0 -#endif - -// g++-4.6 does not support override -#if GCC_AT_LEAST_47 -#define OVERRIDE override -#else -#define OVERRIDE +#define GCC_AT_LEAST_6 0 #endif //===--------------------------------------------------------------------===// diff --git a/src/include/executor/copy_executor.h b/src/include/executor/copy_executor.h index 31d65adaa1b..a95b6c49e86 100644 --- a/src/include/executor/copy_executor.h +++ b/src/include/executor/copy_executor.h @@ -40,9 +40,6 @@ class CopyExecutor : public AbstractExecutor { bool DExecute(); - // Initialize the column ids for query parameters - void InitParamColIds(); - bool InitFileHandle(const char *name, const char *mode); // Flush the local buffer diff --git a/src/include/function/date_functions.h b/src/include/function/date_functions.h index e5a6ca85cd6..73e95a512a5 100644 --- a/src/include/function/date_functions.h +++ b/src/include/function/date_functions.h @@ -1,32 +1,76 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// date_functions.h -// -// Identification: src/include/function/date_functions.h -// -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - -#include "common/logger.h" -#include "common/internal_types.h" -#include "type/value.h" - -namespace peloton { -namespace function { - -class DateFunctions { - public: - static int64_t Now(); - static type::Value _Now(const std::vector &args); -}; - -} // namespace function -} // namespace peloton +//===----------------------------------------------------------------------===// +// +// Peloton +// +// date_functions.h +// +// Identification: src/include/function/date_functions.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include "type/value.h" + +namespace peloton { + +namespace codegen { +namespace type { +class Type; +} // namespace type +} // namespace codegen + +namespace function { + +class DateFunctions { + public: + /** + * Function used to return the current date/time. Normally called at the start + * of a transaction, and consistent throughout its duration. + * + * @return The current date at the time of invocation + */ + static int64_t Now(); + static type::Value _Now(const std::vector &args); + + /** + * Convert the given input into a Julian date format. + * + * @param year The year + * @param month The month (1-based) + * @param day The day (1-based) + * @return The equivalent 32-bit integer representation of the date + */ + static int32_t DateToJulian(int32_t year, int32_t month, int32_t day); + + /** + * Decompose the given 32-bit Julian date value into year, month, and day + * components. + * + * @param julian_date The julian date + * @param year[out] Where the year is written + * @param month[out] Where the result month is written + * @param day[out] Where the result day is written + */ + static void JulianToDate(int32_t julian_date, int32_t &year, int32_t &month, + int32_t &day); + + /** + * Convert the given input string into a date. + * + * @param data A pointer to a string representation of a date + * @param len The length of the string + * @return A suitable date representation of the given input string that can + * be stored in the data tables. This typically means a Julian date. + */ + static int32_t InputDate(const codegen::type::Type &type, const char *data, + uint32_t len); +}; + +} // namespace function +} // namespace peloton diff --git a/src/include/function/decimal_functions.h b/src/include/function/decimal_functions.h deleted file mode 100644 index f4373aa5750..00000000000 --- a/src/include/function/decimal_functions.h +++ /dev/null @@ -1,46 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// decimal_functions.h -// -// Identification: src/include/function/decimal_functions.h -// -// Copyright (c) 2015-2017, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -#include "type/value.h" - -namespace peloton { -namespace function { - -class DecimalFunctions { - public: - - // Abs - static double Abs(double arg); - static type::Value _Abs(const std::vector& args); - - // Sqrt - static type::Value Sqrt(const std::vector& args); - - // Floor - static double Floor(const double val); - static type::Value _Floor(const std::vector& args); - - // Round - static double Round(double arg); - static type::Value _Round(const std::vector& args); - - // Ceil - static double Ceil(const double args); - static type::Value _Ceil(const std::vector& args); -}; - -} // namespace function -} // namespace peloton diff --git a/src/include/function/numeric_functions.h b/src/include/function/numeric_functions.h new file mode 100644 index 00000000000..6a606caf5d5 --- /dev/null +++ b/src/include/function/numeric_functions.h @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// numeric_functions.h +// +// Identification: src/include/function/numeric_functions.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace peloton { + +namespace codegen { +namespace type { +class Type; +} // namespace type +} // namespace codegen + +namespace type { +class Value; +} // namespace value + +namespace function { + +class NumericFunctions { + public: + // Abs + static double Abs(double arg); + static type::Value _Abs(const std::vector &args); + + // Sqrt + static double ISqrt(uint32_t num); + static double DSqrt(double num); + static type::Value Sqrt(const std::vector &args); + + // Floor + static double Floor(double val); + static type::Value _Floor(const std::vector &args); + + // Round + static double Round(double arg); + static type::Value _Round(const std::vector &args); + + // Ceil + static double Ceil(double args); + static type::Value _Ceil(const std::vector &args); + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Input functions + /// + ////////////////////////////////////////////////////////////////////////////// + + static bool InputBoolean(const codegen::type::Type &type, const char *ptr, + uint32_t len); + + static int8_t InputTinyInt(const codegen::type::Type &type, const char *ptr, + uint32_t len); + + static int16_t InputSmallInt(const codegen::type::Type &type, const char *ptr, + uint32_t len); + + static int32_t InputInteger(const codegen::type::Type &type, const char *ptr, + uint32_t len); + + static int64_t InputBigInt(const codegen::type::Type &type, const char *ptr, + uint32_t len); + + static double InputDecimal(const codegen::type::Type &type, const char *ptr, + uint32_t len); +}; + +} // namespace function +} // namespace peloton diff --git a/src/include/function/string_functions.h b/src/include/function/string_functions.h index 2a209d0dee6..47c72c62e15 100644 --- a/src/include/function/string_functions.h +++ b/src/include/function/string_functions.h @@ -16,10 +16,20 @@ namespace peloton { +namespace codegen { +namespace type { +class Type; +} // namespace type +} // namespace codegen + namespace executor { class ExecutorContext; } // namespace executor +namespace type { +class AbstractPool; +} // namespace type; + namespace function { class StringFunctions { @@ -74,6 +84,42 @@ class StringFunctions { // Length will return the number of characters in the given string static uint32_t Length(executor::ExecutorContext &ctx, const char *str, uint32_t length); + + /** + * Compare two (potentially empty) strings returning an integer value + * indicating their sort order. + * + * @param str1 A pointer to the first string + * @param len1 The length of the first string + * @param str2 A pointer to the second string + * @param len2 The length of the second string + * @return -1 if the first string is strictly less than the second; 0 if the + * two strings are equal; 1 if the second string is strictly greater than the + * second. + */ + static int32_t CompareStrings(const char *str1, uint32_t len1, + const char *str2, uint32_t len2); + + /** + * Write the provided variable length object into the target buffer. + * + * @param data The bytes we wish to serialize + * @param len The length of the byte array + * @param buf The target position we wish to write to + * @param pool A memory pool to source memory from + */ + static void WriteString(const char *data, uint32_t len, char *buf, + peloton::type::AbstractPool &pool); + + /** + * + * @param type + * @param data + * @param len + * @return + */ + static StrWithLen InputString(const codegen::type::Type &type, + const char *data, uint32_t len); }; } // namespace function diff --git a/src/include/index/bwtree.h b/src/include/index/bwtree.h index f9352aad09a..abb293f2e67 100755 --- a/src/include/index/bwtree.h +++ b/src/include/index/bwtree.h @@ -7585,7 +7585,7 @@ class BwTree : public BwTreeBase { // would always fail, until we have cleaned all epoch nodes current_epoch_p = nullptr; - LOG_TRACE("Clearing the epoch in ~EpochManager()..."); + LOG_TRACE("Clearing the epoch in ~EpochManager() ..."); // If all threads has exited then all thread counts are // 0, and therefore this should proceed way to the end diff --git a/src/include/optimizer/child_property_deriver.h b/src/include/optimizer/child_property_deriver.h index bd4aeb7b933..914cc77ab27 100644 --- a/src/include/optimizer/child_property_deriver.h +++ b/src/include/optimizer/child_property_deriver.h @@ -39,6 +39,7 @@ class ChildPropertyDeriver : public OperatorVisitor { void Visit(const DummyScan *) override; void Visit(const PhysicalSeqScan *) override; void Visit(const PhysicalIndexScan *) override; + void Visit(const ExternalFileScan *) override; void Visit(const QueryDerivedScan *op) override; void Visit(const PhysicalOrderBy *) override; void Visit(const PhysicalLimit *) override; @@ -58,6 +59,7 @@ class ChildPropertyDeriver : public OperatorVisitor { void Visit(const PhysicalSortGroupBy *) override; void Visit(const PhysicalDistinct *) override; void Visit(const PhysicalAggregate *) override; + void Visit(const PhysicalExportExternalFile *) override; private: void DeriveForJoin(); diff --git a/src/include/optimizer/cost_calculator.h b/src/include/optimizer/cost_calculator.h index 442f386fc5f..8ef40330d6b 100644 --- a/src/include/optimizer/cost_calculator.h +++ b/src/include/optimizer/cost_calculator.h @@ -27,6 +27,7 @@ class CostCalculator : public OperatorVisitor { void Visit(const DummyScan *) override; void Visit(const PhysicalSeqScan *) override; void Visit(const PhysicalIndexScan *) override; + void Visit(const ExternalFileScan *) override; void Visit(const QueryDerivedScan *) override; void Visit(const PhysicalOrderBy *) override; void Visit(const PhysicalLimit *) override; diff --git a/src/include/optimizer/input_column_deriver.h b/src/include/optimizer/input_column_deriver.h index fa1ec6ca5a1..ef66823bba0 100644 --- a/src/include/optimizer/input_column_deriver.h +++ b/src/include/optimizer/input_column_deriver.h @@ -53,6 +53,8 @@ class InputColumnDeriver : public OperatorVisitor { void Visit(const PhysicalIndexScan *op) override; + void Visit(const ExternalFileScan *op) override; + void Visit(const QueryDerivedScan *op) override; void Visit(const PhysicalOrderBy *) override; @@ -91,6 +93,8 @@ class InputColumnDeriver : public OperatorVisitor { void Visit(const PhysicalAggregate *) override; + void Visit(const PhysicalExportExternalFile *) override; + private: /** * @brief Provide all tuple value expressions needed in the expression diff --git a/src/include/optimizer/operator_node.h b/src/include/optimizer/operator_node.h index cb20c163bbe..f870df330eb 100644 --- a/src/include/optimizer/operator_node.h +++ b/src/include/optimizer/operator_node.h @@ -27,6 +27,7 @@ enum class OpType { Leaf, // Logical ops Get, + LogicalExternalFileGet, LogicalQueryDerivedGet, LogicalProjection, LogicalFilter, @@ -45,12 +46,14 @@ enum class OpType { LogicalUpdate, LogicalLimit, LogicalDistinct, + LogicalExportExternalFile, // Separate between logical and physical ops LogicalPhysicalDelimiter, // Physical ops DummyScan, /* Dummy Physical Op for SELECT without FROM*/ SeqScan, IndexScan, + ExternalFileScan, QueryDerivedScan, OrderBy, PhysicalLimit, @@ -69,7 +72,8 @@ enum class OpType { Update, Aggregate, HashGroupBy, - SortGroupBy + SortGroupBy, + ExportExternalFile, }; //===--------------------------------------------------------------------===// diff --git a/src/include/optimizer/operator_visitor.h b/src/include/optimizer/operator_visitor.h index 75b0a9f9c67..e225287cebb 100644 --- a/src/include/optimizer/operator_visitor.h +++ b/src/include/optimizer/operator_visitor.h @@ -6,7 +6,7 @@ // // Identification: src/include/optimizer/operator_visitor.h // -// Copyright (c) 2015-16, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -23,12 +23,13 @@ namespace optimizer { class OperatorVisitor { public: - virtual ~OperatorVisitor(){}; + virtual ~OperatorVisitor() = default; // Physical operator virtual void Visit(const DummyScan *) {} virtual void Visit(const PhysicalSeqScan *) {} virtual void Visit(const PhysicalIndexScan *) {} + virtual void Visit(const ExternalFileScan *) {} virtual void Visit(const QueryDerivedScan *) {} virtual void Visit(const PhysicalOrderBy *) {} virtual void Visit(const PhysicalLimit *) {} @@ -48,10 +49,12 @@ class OperatorVisitor { virtual void Visit(const PhysicalSortGroupBy *) {} virtual void Visit(const PhysicalDistinct *) {} virtual void Visit(const PhysicalAggregate *) {} + virtual void Visit(const PhysicalExportExternalFile *) {} // Logical operator virtual void Visit(const LeafOperator *) {} virtual void Visit(const LogicalGet *) {} + virtual void Visit(const LogicalExternalFileGet *) {} virtual void Visit(const LogicalQueryDerivedGet *) {} virtual void Visit(const LogicalFilter *) {} virtual void Visit(const LogicalProjection *) {} @@ -70,6 +73,7 @@ class OperatorVisitor { virtual void Visit(const LogicalUpdate *) {} virtual void Visit(const LogicalDistinct *) {} virtual void Visit(const LogicalLimit *) {} + virtual void Visit(const LogicalExportExternalFile *) {} }; } // namespace optimizer diff --git a/src/include/optimizer/operators.h b/src/include/optimizer/operators.h index a745439251a..d51d66b01e8 100644 --- a/src/include/optimizer/operators.h +++ b/src/include/optimizer/operators.h @@ -1,4 +1,3 @@ - //===----------------------------------------------------------------------===// // // Peloton @@ -7,7 +6,7 @@ // // Identification: src/include/optimizer/operators.h // -// Copyright (c) 2015-16, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -31,7 +30,7 @@ class UpdateClause; } namespace catalog { - class TableCatalogObject; +class TableCatalogObject; } namespace optimizer { @@ -51,10 +50,10 @@ class LeafOperator : OperatorNode { //===--------------------------------------------------------------------===// class LogicalGet : public OperatorNode { public: - static Operator make(oid_t get_id = 0, - std::vector predicates = {}, - std::shared_ptr table = nullptr, - std::string alias = "", bool update = false); + static Operator make( + oid_t get_id = 0, std::vector predicates = {}, + std::shared_ptr table = nullptr, + std::string alias = "", bool update = false); bool operator==(const BaseOperatorNode &r) override; @@ -68,6 +67,28 @@ class LogicalGet : public OperatorNode { bool is_for_update; }; +//===--------------------------------------------------------------------===// +// External file get +//===--------------------------------------------------------------------===// +class LogicalExternalFileGet : public OperatorNode { + public: + static Operator make(oid_t get_id, ExternalFileFormat format, + std::string file_name, char delimiter, char quote, + char escape); + + bool operator==(const BaseOperatorNode &r) override; + + hash_t Hash() const override; + + // identifier for all get operators + oid_t get_id; + ExternalFileFormat format; + std::string file_name; + char delimiter; + char quote; + char escape; +}; + //===--------------------------------------------------------------------===// // Query derived get //===--------------------------------------------------------------------===// @@ -246,7 +267,8 @@ class LogicalAggregateAndGroupBy class LogicalInsert : public OperatorNode { public: static Operator make( - std::shared_ptr target_table, const std::vector *columns, + std::shared_ptr target_table, + const std::vector *columns, const std::vector>> *values); @@ -258,7 +280,8 @@ class LogicalInsert : public OperatorNode { class LogicalInsertSelect : public OperatorNode { public: - static Operator make(std::shared_ptr target_table); + static Operator make( + std::shared_ptr target_table); std::shared_ptr target_table; }; @@ -286,7 +309,8 @@ class LogicalLimit : public OperatorNode { //===--------------------------------------------------------------------===// class LogicalDelete : public OperatorNode { public: - static Operator make(std::shared_ptr target_table); + static Operator make( + std::shared_ptr target_table); std::shared_ptr target_table; }; @@ -304,6 +328,26 @@ class LogicalUpdate : public OperatorNode { const std::vector> *updates; }; +//===--------------------------------------------------------------------===// +// Export to external file +//===--------------------------------------------------------------------===// +class LogicalExportExternalFile + : public OperatorNode { + public: + static Operator make(ExternalFileFormat format, std::string file_name, + char delimiter, char quote, char escape); + + bool operator==(const BaseOperatorNode &r) override; + + hash_t Hash() const override; + + ExternalFileFormat format; + std::string file_name; + char delimiter; + char quote; + char escape; +}; + //===--------------------------------------------------------------------===// // DummyScan //===--------------------------------------------------------------------===// @@ -317,7 +361,8 @@ class DummyScan : public OperatorNode { //===--------------------------------------------------------------------===// class PhysicalSeqScan : public OperatorNode { public: - static Operator make(oid_t get_id, std::shared_ptr table, + static Operator make(oid_t get_id, + std::shared_ptr table, std::string alias, std::vector predicates, bool update); @@ -339,7 +384,8 @@ class PhysicalSeqScan : public OperatorNode { //===--------------------------------------------------------------------===// class PhysicalIndexScan : public OperatorNode { public: - static Operator make(oid_t get_id, std::shared_ptr table, + static Operator make(oid_t get_id, + std::shared_ptr table, std::string alias, std::vector predicates, bool update, oid_t index_id, std::vector key_column_id_list, @@ -366,6 +412,28 @@ class PhysicalIndexScan : public OperatorNode { std::vector value_list; }; +//===--------------------------------------------------------------------===// +// Physical external file scan +//===--------------------------------------------------------------------===// +class ExternalFileScan : public OperatorNode { + public: + static Operator make(oid_t get_id, ExternalFileFormat format, + std::string file_name, char delimiter, char quote, + char escape); + + bool operator==(const BaseOperatorNode &r) override; + + hash_t Hash() const override; + + // identifier for all get operators + oid_t get_id; + ExternalFileFormat format; + std::string file_name; + char delimiter; + char quote; + char escape; +}; + //===--------------------------------------------------------------------===// // Query derived get //===--------------------------------------------------------------------===// @@ -513,7 +581,8 @@ class PhysicalOuterHashJoin : public OperatorNode { class PhysicalInsert : public OperatorNode { public: static Operator make( - std::shared_ptr target_table, const std::vector *columns, + std::shared_ptr target_table, + const std::vector *columns, const std::vector>> *values); @@ -525,7 +594,8 @@ class PhysicalInsert : public OperatorNode { class PhysicalInsertSelect : public OperatorNode { public: - static Operator make(std::shared_ptr target_table); + static Operator make( + std::shared_ptr target_table); std::shared_ptr target_table; }; @@ -535,7 +605,8 @@ class PhysicalInsertSelect : public OperatorNode { //===--------------------------------------------------------------------===// class PhysicalDelete : public OperatorNode { public: - static Operator make(std::shared_ptr target_table); + static Operator make( + std::shared_ptr target_table); std::shared_ptr target_table; }; @@ -552,6 +623,26 @@ class PhysicalUpdate : public OperatorNode { const std::vector> *updates; }; +//===--------------------------------------------------------------------===// +// Physical ExportExternalFile +//===--------------------------------------------------------------------===// +class PhysicalExportExternalFile + : public OperatorNode { + public: + static Operator make(ExternalFileFormat format, std::string file_name, + char delimiter, char quote, char escape); + + bool operator==(const BaseOperatorNode &r) override; + + hash_t Hash() const override; + + ExternalFileFormat format; + std::string file_name; + char delimiter; + char quote; + char escape; +}; + //===--------------------------------------------------------------------===// // PhysicalHashGroupBy //===--------------------------------------------------------------------===// diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 82b1d4c9a05..18608c06756 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -23,24 +23,24 @@ namespace peloton { namespace parser { class SQLStatementList; class SQLStatement; -} +} // namespace parser namespace planner { class AbstractPlan; -}; +} // namespace planner namespace optimizer { class OperatorExpression; -} +} // namespace optimizer namespace concurrency { class TransactionContext; -} +} // namespace concurrency namespace test { - class OptimizerRuleTests_SimpleAssociativeRuleTest_Test; - class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test; -} +class OptimizerRuleTests_SimpleAssociativeRuleTest_Test; +class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test; +} // namespace test namespace optimizer { @@ -60,8 +60,10 @@ class Optimizer : public AbstractOptimizer { friend class BindingIterator; friend class GroupBindingIterator; - friend class ::peloton::test::OptimizerRuleTests_SimpleAssociativeRuleTest_Test; - friend class ::peloton::test::OptimizerRuleTests_SimpleAssociativeRuleTest2_Test; + friend class ::peloton::test:: + OptimizerRuleTests_SimpleAssociativeRuleTest_Test; + friend class ::peloton::test:: + OptimizerRuleTests_SimpleAssociativeRuleTest2_Test; public: Optimizer(const Optimizer &) = delete; @@ -83,23 +85,26 @@ class Optimizer : public AbstractOptimizer { OptimizerMetadata &GetMetadata() { return metadata_; } /* For test purposes only */ - std::shared_ptr TestInsertQueryTree(parser::SQLStatement *tree, - concurrency::TransactionContext *txn) { + std::shared_ptr TestInsertQueryTree( + parser::SQLStatement *tree, concurrency::TransactionContext *txn) { return InsertQueryTree(tree, txn); } /* For test purposes only */ void TestExecuteTaskStack(OptimizerTaskStack &task_stack, int root_group_id, - std::shared_ptr root_context) { + std::shared_ptr root_context) { return ExecuteTaskStack(task_stack, root_group_id, root_context); } private: - /* HandleDDLStatement - Check and handle DDL statment (currently only support - *CREATE), set - * is_ddl_stmt to false if there is no DDL statement. + /** + * Check and handle the provided DDL statement, returning the resulting plan + * if parsed tree is a DDL statement. The is_ddl_stmt parameter is set to + * indicate if the parse tree was indeed a DDL statement. * - * tree: a peloton query tree representing a select query - * return: the DDL plan if it is a DDL statement + * @param tree A parsed SQL statement + * @param[out] is_ddl_stmt Set to true if the SQL statement is DDL + * @param txn The transactional context + * @return The constructed plan tree representing the DDL statement */ std::unique_ptr HandleDDLStatement( parser::SQLStatement *tree, bool &is_ddl_stmt, diff --git a/src/include/optimizer/plan_generator.h b/src/include/optimizer/plan_generator.h index c0a21259bc6..9fba272d4a8 100644 --- a/src/include/optimizer/plan_generator.h +++ b/src/include/optimizer/plan_generator.h @@ -54,6 +54,8 @@ class PlanGenerator : public OperatorVisitor { void Visit(const PhysicalIndexScan *) override; + void Visit(const ExternalFileScan *) override; + void Visit(const QueryDerivedScan *) override; void Visit(const PhysicalOrderBy *) override; @@ -92,6 +94,8 @@ class PlanGenerator : public OperatorVisitor { void Visit(const PhysicalAggregate *) override; + void Visit(const PhysicalExportExternalFile *) override; + private: /** * @brief Generate all tuple value expressions of a base table diff --git a/src/include/optimizer/rule_impls.h b/src/include/optimizer/rule_impls.h index 2c40e3f3c81..57902e744a9 100644 --- a/src/include/optimizer/rule_impls.h +++ b/src/include/optimizer/rule_impls.h @@ -73,6 +73,18 @@ class GetToSeqScan : public Rule { OptimizeContext *context) const override; }; +class LogicalExternalFileGetToPhysical : public Rule { + public: + LogicalExternalFileGetToPhysical(); + + bool Check(std::shared_ptr plan, + OptimizeContext *context) const override; + + void Transform(std::shared_ptr input, + std::vector> &transformed, + OptimizeContext *context) const override; +}; + /** * @brief Generate dummy scan for queries like "SELECT 1", there's no actual * table to generate @@ -269,6 +281,21 @@ class ImplementLimit : public Rule { OptimizeContext *context) const override; }; +/** + * @brief Logical Export to External File -> Physical Export to External file + */ +class LogicalExportToPhysicalExport : public Rule { + public: + LogicalExportToPhysicalExport(); + + bool Check(std::shared_ptr plan, + OptimizeContext *context) const override; + + void Transform(std::shared_ptr input, + std::vector> &transformed, + OptimizeContext *context) const override; +}; + //===--------------------------------------------------------------------===// // Rewrite rules //===--------------------------------------------------------------------===// diff --git a/src/include/optimizer/util.h b/src/include/optimizer/util.h index 8b9eb4baeef..634e1297347 100644 --- a/src/include/optimizer/util.h +++ b/src/include/optimizer/util.h @@ -17,7 +17,6 @@ #include #include "expression/abstract_expression.h" -#include "parser/copy_statement.h" #include "planner/abstract_plan.h" namespace peloton { @@ -122,12 +121,6 @@ bool ContainsJoinColumns(const std::unordered_set &l_group_alias, const std::unordered_set &r_group_alias, const expression::AbstractExpression *expr); -/** - * @brief Create a copy plan based on the copy statement - */ -std::unique_ptr CreateCopyPlan( - parser::CopyStatement *copy_stmt); - /** * @brief Construct the map from subquery column name to the actual expression * at the subquery level, for example SELECT a FROM (SELECT a + b as a FROM diff --git a/src/include/parser/copy_statement.h b/src/include/parser/copy_statement.h index 3af77a797c4..67e8fe5ee25 100644 --- a/src/include/parser/copy_statement.h +++ b/src/include/parser/copy_statement.h @@ -2,19 +2,19 @@ // // Peloton // -// statement_import.h +// copy_statement.h // -// Identification: src/include/parser/statement_import.h +// Identification: src/include/parser/copy_statement.h // -// Copyright (c) 2015-16, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #pragma once +#include "parser/select_statement.h" #include "parser/sql_statement.h" #include "parser/table_ref.h" -#include "expression/constant_value_expression.h" #include "common/sql_node_visitor.h" namespace peloton { @@ -26,26 +26,49 @@ namespace parser { */ class CopyStatement : public SQLStatement { public: - CopyStatement(CopyType type) + CopyStatement() : SQLStatement(StatementType::COPY), - cpy_table(nullptr), - type(type), - delimiter(','){}; + table(nullptr), + type(), + delimiter(',') {} - virtual ~CopyStatement() {} + ~CopyStatement() = default; - virtual void Accept(SqlNodeVisitor *v) override { v->Visit(this); } + void Accept(SqlNodeVisitor *v) override { v->Visit(this); } const std::string GetInfo(int num_indent) const override; const std::string GetInfo() const override; - std::unique_ptr cpy_table; + ////////////////////////////////////////////////////////////////////////////// + /// + /// Public member fields + /// + ////////////////////////////////////////////////////////////////////////////// + // The table that is copied into or copied from + std::unique_ptr table; + + // The SQL statement used instead of a table when copying data out to a file + std::unique_ptr select_stmt; + + // The set of attributes being written out or read in + std::vector> select_list; + + // The type of copy CopyType type; + // The input or output file that is read of written into std::string file_path; - char delimiter; + + // The format of the file + ExternalFileFormat format = ExternalFileFormat::CSV; + + bool is_from; + + char delimiter = ','; + char quote = '"'; + char escape = '"'; }; } // namespace parser diff --git a/src/include/parser/postgresparser.h b/src/include/parser/postgresparser.h index decd43d9ee7..388623a138c 100644 --- a/src/include/parser/postgresparser.h +++ b/src/include/parser/postgresparser.h @@ -6,7 +6,7 @@ // // Identification: src/include/parser/postgresparser.h // -// Copyright (c) 2015-16, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// diff --git a/src/include/planner/abstract_plan.h b/src/include/planner/abstract_plan.h index 2cb5e89ac49..c257b20d830 100644 --- a/src/include/planner/abstract_plan.h +++ b/src/include/planner/abstract_plan.h @@ -6,7 +6,7 @@ // // Identification: src/include/planner/abstract_plan.h // -// Copyright (c) 2015-18, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// diff --git a/src/include/planner/abstract_scan_plan.h b/src/include/planner/abstract_scan_plan.h index 816676736b5..7241f844c74 100644 --- a/src/include/planner/abstract_scan_plan.h +++ b/src/include/planner/abstract_scan_plan.h @@ -56,7 +56,7 @@ class AbstractScan : public AbstractPlan { storage::DataTable *GetTable() const { return target_table_; } - void GetAttributes(std::vector &ais) const { + virtual void GetAttributes(std::vector &ais) const { for (const auto &ai : attributes_) { ais.push_back(&ai); } diff --git a/src/include/planner/aggregate_plan.h b/src/include/planner/aggregate_plan.h index 56c0e99a6b6..51d9d8cfe42 100644 --- a/src/include/planner/aggregate_plan.h +++ b/src/include/planner/aggregate_plan.h @@ -41,7 +41,7 @@ class AggregatePlan : public AbstractPlan { bool distinct = false); // Bindings - void PerformBinding(BindingContext &binding_context); + void PerformBinding(bool is_global, BindingContext &binding_context); AggTerm Copy() const; }; diff --git a/src/include/planner/copy_plan.h b/src/include/planner/copy_plan.h deleted file mode 100644 index 079199cf755..00000000000 --- a/src/include/planner/copy_plan.h +++ /dev/null @@ -1,58 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Peloton -// -// copy_plan.h -// -// Identification: src/include/planner/copy_plan.h -// -// Copyright (c) 2015-16, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "../parser/copy_statement.h" -#include "../parser/select_statement.h" -#include "planner/abstract_plan.h" - -namespace peloton { - -namespace storage { -class DataTable; -} - -namespace parser { -class CopyStatement; -} - -namespace planner { - -class CopyPlan : public AbstractPlan { - public: - CopyPlan() = delete; - - explicit CopyPlan(std::string file_path, bool deserialize_parameters) - : file_path(file_path), deserialize_parameters(deserialize_parameters) { - LOG_DEBUG("Creating a Copy Plan"); - } - - inline PlanNodeType GetPlanNodeType() const { return PlanNodeType::COPY; } - - const std::string GetInfo() const { return "CopyPlan"; } - - // TODO: Implement copy mechanism - std::unique_ptr Copy() const { return nullptr; } - - // The path of the target file - std::string file_path; - - // Whether the copying requires deserialization of parameters - bool deserialize_parameters = false; - - private: - DISALLOW_COPY_AND_MOVE(CopyPlan); -}; - -} // namespace planner -} // namespace peloton \ No newline at end of file diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h new file mode 100644 index 00000000000..2cd255884d3 --- /dev/null +++ b/src/include/planner/csv_scan_plan.h @@ -0,0 +1,97 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scan_plan.h +// +// Identification: src/include/planner/csv_scan_plan.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#include "planner/abstract_scan_plan.h" +#include "planner/attribute_info.h" +#include "type/type_id.h" + +namespace peloton { +namespace planner { + +/** + * This is the plan node when scanning a CSV file. + */ +class CSVScanPlan : public AbstractScan { + public: + struct ColumnInfo { + std::string name; + type::TypeId type; + }; + + public: + /** + * Constructs a sequential scan over a CSV file + * + * @param file_name The file path + * @param cols Information of the columns expected in each row of the CSV + * @param delimiter The character that separates columns within a row + * @param quote The character used to quote data (i.e., strings) + * @param escape The character that should appear before any data characters + * that match the quote character. + */ + CSVScanPlan(std::string file_name, std::vector &&cols, + char delimiter = ',', char quote = '"', char escape = '"', + std::string null = ""); + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Accessors + /// + ////////////////////////////////////////////////////////////////////////////// + + PlanNodeType GetPlanNodeType() const override; + + void GetOutputColumns(std::vector &columns) const override; + + const std::string &GetFileName() const { return file_name_; } + + void GetAttributes(std::vector &ais) const override; + + char GetDelimiterChar() const { return delimiter_; } + char GetQuoteChar() const { return quote_; } + char GetEscapeChar() const { return escape_; } + const std::string &GetNullString() const { return null_; } + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Utilities + Internal + /// + ////////////////////////////////////////////////////////////////////////////// + + hash_t Hash() const override; + + bool operator==(const AbstractPlan &rhs) const override; + + std::unique_ptr Copy() const override; + + void PerformBinding(BindingContext &binding_context) override; + + private: + const std::string file_name_; + + char delimiter_; + char quote_; + char escape_; + const std::string null_; + + std::vector attributes_; +}; + +} // namespace planner +} // namespace peloton \ No newline at end of file diff --git a/src/include/planner/export_external_file_plan.h b/src/include/planner/export_external_file_plan.h new file mode 100644 index 00000000000..7dfb5807422 --- /dev/null +++ b/src/include/planner/export_external_file_plan.h @@ -0,0 +1,73 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// export_external_file_plan.h +// +// Identification: src/include/planner/export_external_file_plan.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include "planner/abstract_plan.h" + +namespace peloton { +namespace planner { + +/** + * This is the plan node when exporting data from the database into an external + * file. It is configured with the name of the file to write content into, and + * the delimiter, quote, and escape characters to use when writing content. + */ +class ExportExternalFilePlan : public AbstractPlan { + public: + ExportExternalFilePlan(std::string file_name, char delimiter = ',', + char quote = '"', char escape = '\"'); + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Accessors + /// + ////////////////////////////////////////////////////////////////////////////// + + PlanNodeType GetPlanNodeType() const override; + + const std::string &GetFileName() const { return file_name_; } + + char GetDelimiterChar() const { return delimiter_; } + char GetQuoteChar() const { return quote_; } + char GetEscapeChar() const { return escape_; } + + ////////////////////////////////////////////////////////////////////////////// + /// + /// Utilities + Internal + /// + ////////////////////////////////////////////////////////////////////////////// + + hash_t Hash() const override; + + bool operator==(const AbstractPlan &rhs) const override; + + std::unique_ptr Copy() const override; + + void PerformBinding(BindingContext &binding_context) override; + + private: + std::vector output_attributes_; + + std::string file_name_; + + char delimiter_; + char quote_; + char escape_; +}; + +} // namespace planner +} // namespace peloton \ No newline at end of file diff --git a/src/include/planner/insert_plan.h b/src/include/planner/insert_plan.h index 54072e76b3d..7c2bc212e55 100644 --- a/src/include/planner/insert_plan.h +++ b/src/include/planner/insert_plan.h @@ -70,9 +70,6 @@ class InsertPlan : public AbstractPlan { std::vector>> * insert_values); - // Get a varlen pool - will construct the pool only if needed - type::AbstractPool *GetPlanPool(); - PlanNodeType GetPlanNodeType() const override { return PlanNodeType::INSERT; }; diff --git a/src/include/util/file.h b/src/include/util/file.h new file mode 100644 index 00000000000..6bf35850674 --- /dev/null +++ b/src/include/util/file.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// file.h +// +// Identification: src/include/util/file.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include "common/exception.h" + +namespace peloton { +namespace util { + +class File { + public: + enum class AccessMode : uint8_t { ReadOnly, WriteOnly, ReadWrite }; + + File() : fd_(kInvalid) {} + + ~File() { Close(); } + + // Move + File(File &&other) noexcept : fd_(kInvalid) { std::swap(fd_, other.fd_); } + + // Move + File &operator=(File &&other) noexcept { + // First, close this file + Close(); + + // Swap descriptors + std::swap(fd_, other.fd_); + + // Done + return *this; + } + + void Open(const std::string &name, AccessMode access_mode); + + void Create(const std::string &name); + + void CreateTemp(); + + uint64_t Read(void *data, uint64_t len) const; + + uint64_t Write(void *data, uint64_t len) const; + + uint64_t Size() const; + + bool IsOpen() const { return fd_ != kInvalid; } + + void Close(); + + private: + // The file descriptor + int fd_; + + static constexpr int kInvalid = -1; + + private: + DISALLOW_COPY(File); +}; + +} // namespace util +} // namespace peloton \ No newline at end of file diff --git a/src/include/util/string_util.h b/src/include/util/string_util.h index d61f297ce09..9882ce3ecd5 100644 --- a/src/include/util/string_util.h +++ b/src/include/util/string_util.h @@ -133,6 +133,19 @@ class StringUtil { static void RTrim(std::string &str); static std::string Indent(const int num_indent); + + /** + * Return a new string that has stripped all occurrences of the provided + * character from the provided string. + * + * NOTE: This function copies the input string into a new string, which is + * wasteful. Don't use this for performance critical code, please! + * + * @param str The input string + * @param c The character we want to remove + * @return A new string with no occurrences of the provided character + */ + static std::string Strip(const std::string &str, char c); }; } // namespace peloton diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp index 1df06b3ea50..39ca06d811b 100644 --- a/src/optimizer/child_property_deriver.cpp +++ b/src/optimizer/child_property_deriver.cpp @@ -94,6 +94,12 @@ void ChildPropertyDeriver::Visit(const PhysicalIndexScan *op) { make_pair(provided_prop, vector>{})); } +void ChildPropertyDeriver::Visit(const ExternalFileScan *) { + // External file scans (like sequential scans) do not provide properties + output_.push_back( + make_pair(make_shared(), vector>{})); +} + void ChildPropertyDeriver::Visit(const QueryDerivedScan *) { output_.push_back( make_pair(requirements_, vector>{requirements_})); @@ -187,6 +193,13 @@ void ChildPropertyDeriver::Visit(const DummyScan *) { make_pair(make_shared(), vector>())); } +void ChildPropertyDeriver::Visit(const PhysicalExportExternalFile *) { + // Let child fulfil all the required properties + vector> child_input_properties{requirements_}; + + output_.push_back(make_pair(requirements_, move(child_input_properties))); +} + void ChildPropertyDeriver::DeriveForJoin() { output_.push_back(make_pair( make_shared(), diff --git a/src/optimizer/cost_calculator.cpp b/src/optimizer/cost_calculator.cpp index 5dda9e67c8a..56cbbecc64e 100644 --- a/src/optimizer/cost_calculator.cpp +++ b/src/optimizer/cost_calculator.cpp @@ -59,6 +59,11 @@ void CostCalculator::Visit(UNUSED_ATTRIBUTE const PhysicalIndexScan *op) { memo_->GetGroupByID(gexpr_->GetGroupID())->GetNumRows() * DEFAULT_TUPLE_COST; } + +void CostCalculator::Visit(UNUSED_ATTRIBUTE const ExternalFileScan *) { + output_cost_ = 0.0; +} + void CostCalculator::Visit(UNUSED_ATTRIBUTE const QueryDerivedScan *op) { output_cost_ = 0.f; } diff --git a/src/optimizer/input_column_deriver.cpp b/src/optimizer/input_column_deriver.cpp index 7819f81afb9..019117ae68e 100644 --- a/src/optimizer/input_column_deriver.cpp +++ b/src/optimizer/input_column_deriver.cpp @@ -55,6 +55,8 @@ void InputColumnDeriver::Visit(const PhysicalSeqScan *) { ScanHelper(); } void InputColumnDeriver::Visit(const PhysicalIndexScan *) { ScanHelper(); } +void InputColumnDeriver::Visit(const ExternalFileScan *) { ScanHelper(); } + void InputColumnDeriver::Visit(const QueryDerivedScan *op) { // QueryDerivedScan should only be a renaming layer ExprMap output_cols_map; @@ -155,6 +157,10 @@ void InputColumnDeriver::Visit(const PhysicalDelete *) { Passdown(); } void InputColumnDeriver::Visit(const PhysicalUpdate *) { Passdown(); } +void InputColumnDeriver::Visit(const PhysicalExportExternalFile *) { + Passdown(); +} + void InputColumnDeriver::ScanHelper() { // Scan does not have input column, output columns should contain all tuple // value expressions needed diff --git a/src/optimizer/operators.cpp b/src/optimizer/operators.cpp index 78c34d16257..6457e769db2 100644 --- a/src/optimizer/operators.cpp +++ b/src/optimizer/operators.cpp @@ -11,11 +11,13 @@ //===----------------------------------------------------------------------===// #include "optimizer/operators.h" + #include "optimizer/operator_visitor.h" #include "expression/expression_util.h" namespace peloton { namespace optimizer { + //===--------------------------------------------------------------------===// // Leaf //===--------------------------------------------------------------------===// @@ -51,7 +53,7 @@ hash_t LogicalGet::Hash() const { } bool LogicalGet::operator==(const BaseOperatorNode &r) { - if (r.GetType()!= OpType::Get) return false; + if (r.GetType() != OpType::Get) return false; const LogicalGet &node = *static_cast(&r); if (predicates.size() != node.predicates.size()) return false; for (size_t i = 0; i < predicates.size(); i++) { @@ -61,6 +63,43 @@ bool LogicalGet::operator==(const BaseOperatorNode &r) { return get_id == node.get_id; } +//===--------------------------------------------------------------------===// +// External file get +//===--------------------------------------------------------------------===// + +Operator LogicalExternalFileGet::make(oid_t get_id, ExternalFileFormat format, + std::string file_name, char delimiter, + char quote, char escape) { + auto *get = new LogicalExternalFileGet(); + get->get_id = get_id; + get->format = format; + get->file_name = std::move(file_name); + get->delimiter = delimiter; + get->quote = quote; + get->escape = escape; + return Operator(get); +} + +bool LogicalExternalFileGet::operator==(const BaseOperatorNode &node) { + if (node.GetType() != OpType::LogicalExternalFileGet) return false; + const auto &get = *static_cast(&node); + return (get_id == get.get_id && format == get.format && + file_name == get.file_name && delimiter == get.delimiter && + quote == get.quote && escape == get.escape); +} + +hash_t LogicalExternalFileGet::Hash() const { + hash_t hash = BaseOperatorNode::Hash(); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&get_id)); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format)); + hash = HashUtil::CombineHashes( + hash, HashUtil::HashBytes(file_name.data(), file_name.length())); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes("e, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1)); + return hash; +} + //===--------------------------------------------------------------------===// // Query derived get //===--------------------------------------------------------------------===// @@ -385,8 +424,8 @@ Operator LogicalDelete::make( //===--------------------------------------------------------------------===// Operator LogicalUpdate::make( std::shared_ptr target_table, - const std::vector> - *updates) { + const std::vector> * + updates) { LogicalUpdate *update_op = new LogicalUpdate; update_op->target_table = target_table; update_op->updates = updates; @@ -411,6 +450,41 @@ Operator LogicalLimit::make(int64_t offset, int64_t limit) { return Operator(limit_op); } +//===--------------------------------------------------------------------===// +// External file output +//===--------------------------------------------------------------------===// +Operator LogicalExportExternalFile::make(ExternalFileFormat format, + std::string file_name, char delimiter, + char quote, char escape) { + auto *export_op = new LogicalExportExternalFile(); + export_op->format = format; + export_op->file_name = std::move(file_name); + export_op->delimiter = delimiter; + export_op->quote = quote; + export_op->escape = escape; + return Operator(export_op); +} + +bool LogicalExportExternalFile::operator==(const BaseOperatorNode &node) { + if (node.GetType() != OpType::LogicalExportExternalFile) return false; + const auto &export_op = + *static_cast(&node); + return (format == export_op.format && file_name == export_op.file_name && + delimiter == export_op.delimiter && quote == export_op.quote && + escape == export_op.escape); +} + +hash_t LogicalExportExternalFile::Hash() const { + hash_t hash = BaseOperatorNode::Hash(); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format)); + hash = HashUtil::CombineHashes( + hash, HashUtil::HashBytes(file_name.data(), file_name.length())); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes("e, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1)); + return hash; +} + //===--------------------------------------------------------------------===// // DummyScan //===--------------------------------------------------------------------===// @@ -506,6 +580,42 @@ hash_t PhysicalIndexScan::Hash() const { return hash; } +//===--------------------------------------------------------------------===// +// Physical external file scan +//===--------------------------------------------------------------------===// +Operator ExternalFileScan::make(oid_t get_id, ExternalFileFormat format, + std::string file_name, char delimiter, + char quote, char escape) { + auto *get = new ExternalFileScan(); + get->get_id = get_id; + get->format = format; + get->file_name = file_name; + get->delimiter = delimiter; + get->quote = quote; + get->escape = escape; + return Operator(get); +} + +bool ExternalFileScan::operator==(const BaseOperatorNode &node) { + if (node.GetType() != OpType::QueryDerivedScan) return false; + const auto &get = *static_cast(&node); + return (get_id == get.get_id && format == get.format && + file_name == get.file_name && delimiter == get.delimiter && + quote == get.quote && escape == get.escape); +} + +hash_t ExternalFileScan::Hash() const { + hash_t hash = BaseOperatorNode::Hash(); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&get_id)); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format)); + hash = HashUtil::CombineHashes( + hash, HashUtil::HashBytes(file_name.data(), file_name.length())); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes("e, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1)); + return hash; +} + //===--------------------------------------------------------------------===// // Query derived get //===--------------------------------------------------------------------===// @@ -748,14 +858,49 @@ Operator PhysicalDelete::make( //===--------------------------------------------------------------------===// Operator PhysicalUpdate::make( std::shared_ptr target_table, - const std::vector> - *updates) { + const std::vector> * + updates) { PhysicalUpdate *update = new PhysicalUpdate; update->target_table = target_table; update->updates = updates; return Operator(update); } +//===--------------------------------------------------------------------===// +// PhysicalExportExternalFile +//===--------------------------------------------------------------------===// +Operator PhysicalExportExternalFile::make(ExternalFileFormat format, + std::string file_name, char delimiter, + char quote, char escape) { + auto *export_op = new PhysicalExportExternalFile(); + export_op->format = format; + export_op->file_name = file_name; + export_op->delimiter = delimiter; + export_op->quote = quote; + export_op->escape = escape; + return Operator(export_op); +} + +bool PhysicalExportExternalFile::operator==(const BaseOperatorNode &node) { + if (node.GetType() != OpType::ExportExternalFile) return false; + const auto &export_op = + *static_cast(&node); + return (format == export_op.format && file_name == export_op.file_name && + delimiter == export_op.delimiter && quote == export_op.quote && + escape == export_op.escape); +} + +hash_t PhysicalExportExternalFile::Hash() const { + hash_t hash = BaseOperatorNode::Hash(); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format)); + hash = HashUtil::CombineHashes( + hash, HashUtil::HashBytes(file_name.data(), file_name.length())); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes("e, 1)); + hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1)); + return hash; +} + //===--------------------------------------------------------------------===// // PhysicalHashGroupBy //===--------------------------------------------------------------------===// @@ -846,6 +991,9 @@ std::string OperatorNode::name_ = "LeafOperator"; template <> std::string OperatorNode::name_ = "LogicalGet"; template <> +std::string OperatorNode::name_ = + "LogicalExternalFileGet"; +template <> std::string OperatorNode::name_ = "LogicalQueryDerivedGet"; template <> @@ -884,12 +1032,17 @@ std::string OperatorNode::name_ = "LogicalLimit"; template <> std::string OperatorNode::name_ = "LogicalDistinct"; template <> +std::string OperatorNode::name_ = + "LogicalExportExternalFile"; +template <> std::string OperatorNode::name_ = "DummyScan"; template <> std::string OperatorNode::name_ = "PhysicalSeqScan"; template <> std::string OperatorNode::name_ = "PhysicalIndexScan"; template <> +std::string OperatorNode::name_ = "ExternalFileScan"; +template <> std::string OperatorNode::name_ = "QueryDerivedScan"; template <> std::string OperatorNode::name_ = "PhysicalOrderBy"; @@ -930,6 +1083,9 @@ template <> std::string OperatorNode::name_ = "PhysicalDistinct"; template <> std::string OperatorNode::name_ = "PhysicalAggregate"; +template <> +std::string OperatorNode::name_ = + "PhysicalExportExternalFile"; //===--------------------------------------------------------------------===// template <> @@ -937,6 +1093,9 @@ OpType OperatorNode::type_ = OpType::Leaf; template <> OpType OperatorNode::type_ = OpType::Get; template <> +OpType OperatorNode::type_ = + OpType::LogicalExternalFileGet; +template <> OpType OperatorNode::type_ = OpType::LogicalQueryDerivedGet; template <> @@ -974,6 +1133,10 @@ template <> OpType OperatorNode::type_ = OpType::LogicalDistinct; template <> OpType OperatorNode::type_ = OpType::LogicalLimit; +template <> +OpType OperatorNode::type_ = + OpType::LogicalExportExternalFile; + template <> OpType OperatorNode::type_ = OpType::DummyScan; template <> @@ -981,6 +1144,8 @@ OpType OperatorNode::type_ = OpType::SeqScan; template <> OpType OperatorNode::type_ = OpType::IndexScan; template <> +OpType OperatorNode::type_ = OpType::ExternalFileScan; +template <> OpType OperatorNode::type_ = OpType::QueryDerivedScan; template <> OpType OperatorNode::type_ = OpType::OrderBy; @@ -1018,7 +1183,11 @@ template <> OpType OperatorNode::type_ = OpType::SortGroupBy; template <> OpType OperatorNode::type_ = OpType::Aggregate; +template <> +OpType OperatorNode::type_ = + OpType::ExportExternalFile; //===--------------------------------------------------------------------===// + template bool OperatorNode::IsLogical() const { return type_ < OpType::LogicalPhysicalDelimiter; diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 62f813ec876..2525915fcc1 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -214,13 +214,6 @@ unique_ptr Optimizer::HandleDDLStatement( ddl_plan = move(analyze_plan); break; } - case StatementType::COPY: { - LOG_TRACE("Adding Copy plan..."); - parser::CopyStatement *copy_parse_tree = - static_cast(tree); - ddl_plan = util::CreateCopyPlan(copy_parse_tree); - break; - } default: is_ddl_stmt = false; } @@ -266,20 +259,33 @@ QueryInfo Optimizer::GetQueryInfo(parser::SQLStatement *tree) { std::shared_ptr physical_props = std::make_shared(); switch (tree->GetType()) { case StatementType::SELECT: { - auto select = reinterpret_cast(tree); + auto *select = reinterpret_cast(tree); GetQueryInfoHelper(select->select_list, select->order, output_exprs, physical_props); break; } case StatementType::INSERT: { - auto insert = reinterpret_cast(tree); + auto *insert = reinterpret_cast(tree); if (insert->select != nullptr) GetQueryInfoHelper(insert->select->select_list, insert->select->order, output_exprs, physical_props); break; } + case StatementType::COPY: { + auto *copy = reinterpret_cast(tree); + if (copy->select_stmt != nullptr) { + GetQueryInfoHelper(copy->select_stmt->select_list, + copy->select_stmt->order, output_exprs, + physical_props); + } else { + std::unique_ptr order; + GetQueryInfoHelper(copy->select_list, order, output_exprs, + physical_props); + } + break; + } default: - ; + break; } return QueryInfo(output_exprs, physical_props); diff --git a/src/optimizer/optimizer_task.cpp b/src/optimizer/optimizer_task.cpp index f0a489906ae..8c430f76ae2 100644 --- a/src/optimizer/optimizer_task.cpp +++ b/src/optimizer/optimizer_task.cpp @@ -6,7 +6,7 @@ // // Identification: src/optimizer/optimizer_task.cpp // -// Copyright (c) 2015-17, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -22,6 +22,7 @@ namespace peloton { namespace optimizer { + //===--------------------------------------------------------------------===// // Base class //===--------------------------------------------------------------------===// @@ -30,13 +31,16 @@ void OptimizerTask::ConstructValidRules( std::vector> &rules, std::vector &valid_rules) { for (auto &rule : rules) { - if (group_expr->Op().GetType() != - rule->GetMatchPattern()->Type() || // Root pattern type mismatch - group_expr->HasRuleExplored(rule.get()) || // Rule has been applied + // Check if we can apply the rule + bool root_pattern_mismatch = + group_expr->Op().GetType() != rule->GetMatchPattern()->Type(); + bool already_explored = group_expr->HasRuleExplored(rule.get()); + bool child_pattern_mismatch = group_expr->GetChildrenGroupsSize() != - rule->GetMatchPattern() - ->GetChildPatternsSize()) // Children size does not math + rule->GetMatchPattern()->GetChildPatternsSize(); + if (root_pattern_mismatch || already_explored || child_pattern_mismatch) { continue; + } auto promise = rule->Promise(group_expr, context); if (promise > 0) valid_rules.emplace_back(rule.get(), promise); diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp index a16b70c3878..671ef94dabd 100644 --- a/src/optimizer/plan_generator.cpp +++ b/src/optimizer/plan_generator.cpp @@ -15,12 +15,15 @@ #include "catalog/column_catalog.h" #include "catalog/index_catalog.h" #include "catalog/table_catalog.h" +#include "codegen/type/type.h" #include "concurrency/transaction_context.h" #include "expression/expression_util.h" #include "optimizer/operator_expression.h" #include "optimizer/properties.h" #include "planner/aggregate_plan.h" +#include "planner/csv_scan_plan.h" #include "planner/delete_plan.h" +#include "planner/export_external_file_plan.h" #include "planner/hash_join_plan.h" #include "planner/hash_plan.h" #include "planner/index_scan_plan.h" @@ -127,6 +130,26 @@ void PlanGenerator::Visit(const PhysicalIndexScan *op) { predicate.release(), column_ids, index_scan_desc, false)); } +void PlanGenerator::Visit(const ExternalFileScan *op) { + switch (op->format) { + case ExternalFileFormat::CSV: { + // First construct the output column descriptions + std::vector cols; + for (const auto *output_col : output_cols_) { + auto col_info = planner::CSVScanPlan::ColumnInfo{ + .name = "", .type = output_col->GetValueType()}; + cols.emplace_back(std::move(col_info)); + } + + // Create the plan + output_plan_.reset( + new planner::CSVScanPlan(op->file_name, std::move(cols), + op->delimiter, op->quote, op->escape)); + break; + } + } +} + void PlanGenerator::Visit(const QueryDerivedScan *) { PELOTON_ASSERT(children_plans_.size() == 1); output_plan_ = move(children_plans_[0]); @@ -364,6 +387,14 @@ void PlanGenerator::Visit(const PhysicalUpdate *op) { output_plan_ = move(update_plan); } +void PlanGenerator::Visit(const PhysicalExportExternalFile *op) { + unique_ptr export_plan{ + new planner::ExportExternalFilePlan(op->file_name, op->delimiter, + op->quote, op->escape)}; + export_plan->AddChild(move(children_plans_[0])); + output_plan_ = move(export_plan); +} + /************************* Private Functions *******************************/ vector> PlanGenerator::GenerateTableTVExprs( diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp index ff75140d5f5..56925c3b117 100644 --- a/src/optimizer/query_to_operator_transformer.cpp +++ b/src/optimizer/query_to_operator_transformer.cpp @@ -259,10 +259,10 @@ void QueryToOperatorTransformer::Visit(parser::InsertStatement *op) { if (column_objects[i]->IsNotNull()) { // TODO: Add check for default value's existence for the current // column - throw CatalogException( - StringUtil::Format("ERROR: null value in column \"%s\" " - "violates not-null constraint", - column_objects[i]->GetColumnName().c_str())); + throw CatalogException(StringUtil::Format( + "ERROR: null value in column \"%s\" " + "violates not-null constraint", + column_objects[i]->GetColumnName().c_str())); } } } @@ -359,8 +359,42 @@ void QueryToOperatorTransformer::Visit(parser::UpdateStatement *op) { output_expr_ = update_expr; } -void QueryToOperatorTransformer::Visit( - UNUSED_ATTRIBUTE parser::CopyStatement *op) {} +void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) { + if (op->is_from) { + // The copy statement is reading from a file into a table. We construct a + // logical external-file get operator as the leaf, and an insert operator + // as the root. + + auto get_op = + std::make_shared(LogicalExternalFileGet::make( + GetAndIncreaseGetId(), op->format, op->file_path, op->delimiter, + op->quote, op->escape)); + + auto target_table = + catalog::Catalog::GetInstance() + ->GetDatabaseObject(op->table->GetDatabaseName(), txn_) + ->GetTableObject(op->table->GetTableName(), + op->table->GetSchemaName()); + + auto insert_expr = std::make_shared( + LogicalInsertSelect::make(target_table)); + + insert_expr->PushChild(get_op); + output_expr_ = insert_expr; + } else { + if (op->select_stmt != nullptr) { + op->select_stmt->Accept(this); + } else { + op->table->Accept(this); + } + auto export_op = + std::make_shared(LogicalExportExternalFile::make( + op->format, op->file_path, op->delimiter, op->quote, op->escape)); + export_op->PushChild(output_expr_); + output_expr_ = export_op; + } +} + void QueryToOperatorTransformer::Visit( UNUSED_ATTRIBUTE parser::AnalyzeStatement *op) {} diff --git a/src/optimizer/rule.cpp b/src/optimizer/rule.cpp index 1e81799147d..8c72ed17fa8 100644 --- a/src/optimizer/rule.cpp +++ b/src/optimizer/rule.cpp @@ -39,11 +39,13 @@ RuleSet::RuleSet() { AddImplementationRule(new GetToDummyScan()); AddImplementationRule(new GetToSeqScan()); AddImplementationRule(new GetToIndexScan()); + AddImplementationRule(new LogicalExternalFileGetToPhysical()); AddImplementationRule(new LogicalQueryDerivedGetToPhysical()); AddImplementationRule(new InnerJoinToInnerNLJoin()); AddImplementationRule(new InnerJoinToInnerHashJoin()); AddImplementationRule(new ImplementDistinct()); AddImplementationRule(new ImplementLimit()); + AddImplementationRule(new LogicalExportToPhysicalExport()); AddRewriteRule(RewriteRuleSetName::PREDICATE_PUSH_DOWN, new PushFilterThroughJoin()); diff --git a/src/optimizer/rule_impls.cpp b/src/optimizer/rule_impls.cpp index e540555c9e3..33fb241df8d 100644 --- a/src/optimizer/rule_impls.cpp +++ b/src/optimizer/rule_impls.cpp @@ -275,9 +275,8 @@ void GetToIndexScan::Transform( sort_by_asc_base_column = false; break; } - auto bound_oids = - reinterpret_cast(expr) - ->GetBoundOid(); + auto bound_oids = reinterpret_cast( + expr)->GetBoundOid(); sort_col_ids.push_back(std::get<2>(bound_oids)); } // Check whether any index can fulfill sort property @@ -358,20 +357,16 @@ void GetToIndexScan::Transform( if (value_expr->GetExpressionType() == ExpressionType::VALUE_CONSTANT) { value_list.push_back( reinterpret_cast( - value_expr) - ->GetValue()); + value_expr)->GetValue()); LOG_TRACE("Value Type: %d", static_cast( reinterpret_cast( - expr->GetModifiableChild(1)) - ->GetValueType())); + expr->GetModifiableChild(1))->GetValueType())); } else { value_list.push_back( type::ValueFactory::GetParameterOffsetValue( reinterpret_cast( - value_expr) - ->GetValueIdx()) - .Copy()); + value_expr)->GetValueIdx()).Copy()); LOG_TRACE("Parameter offset: %s", (*value_list.rbegin()).GetInfo().c_str()); } @@ -440,6 +435,34 @@ void LogicalQueryDerivedGetToPhysical::Transform( transformed.push_back(result_plan); } +/////////////////////////////////////////////////////////////////////////////// +/// LogicalExternalFileGetToPhysical +LogicalExternalFileGetToPhysical::LogicalExternalFileGetToPhysical() { + type_ = RuleType::EXTERNAL_FILE_GET_TO_PHYSICAL; + match_pattern = std::make_shared(OpType::LogicalExternalFileGet); +} + +bool LogicalExternalFileGetToPhysical::Check( + UNUSED_ATTRIBUTE std::shared_ptr plan, + UNUSED_ATTRIBUTE OptimizeContext *context) const { + return true; +} + +void LogicalExternalFileGetToPhysical::Transform( + std::shared_ptr input, + std::vector> &transformed, + UNUSED_ATTRIBUTE OptimizeContext *context) const { + const auto *get = input->Op().As(); + + auto result_plan = std::make_shared( + ExternalFileScan::make(get->get_id, get->format, get->file_name, + get->delimiter, get->quote, get->escape)); + + PELOTON_ASSERT(input->Children().empty()); + + transformed.push_back(result_plan); +} + /////////////////////////////////////////////////////////////////////////////// /// LogicalDeleteToPhysical LogicalDeleteToPhysical::LogicalDeleteToPhysical() { @@ -797,6 +820,38 @@ void ImplementLimit::Transform( transformed.push_back(result_plan); } +/////////////////////////////////////////////////////////////////////////////// +/// LogicalExport to Physical Export +LogicalExportToPhysicalExport::LogicalExportToPhysicalExport() { + type_ = RuleType::EXPORT_EXTERNAL_FILE_TO_PHYSICAL; + match_pattern = std::make_shared(OpType::LogicalExportExternalFile); + match_pattern->AddChild(std::make_shared(OpType::Leaf)); +} + +bool LogicalExportToPhysicalExport::Check( + UNUSED_ATTRIBUTE std::shared_ptr plan, + UNUSED_ATTRIBUTE OptimizeContext *context) const { + return true; +} + +void LogicalExportToPhysicalExport::Transform( + std::shared_ptr input, + std::vector> &transformed, + UNUSED_ATTRIBUTE OptimizeContext *context) const { + const auto *export_op = input->Op().As(); + + auto result_plan = + std::make_shared(PhysicalExportExternalFile::make( + export_op->format, export_op->file_name, export_op->delimiter, + export_op->quote, export_op->escape)); + + std::vector> children = input->Children(); + PELOTON_ASSERT(children.size() == 1); + result_plan->PushChild(children[0]); + + transformed.push_back(result_plan); +} + //===--------------------------------------------------------------------===// // Rewrite rules //===--------------------------------------------------------------------===// diff --git a/src/optimizer/util.cpp b/src/optimizer/util.cpp index 0d01e35e8ac..07685376b34 100644 --- a/src/optimizer/util.cpp +++ b/src/optimizer/util.cpp @@ -15,9 +15,6 @@ #include "catalog/query_metrics_catalog.h" #include "concurrency/transaction_manager_factory.h" #include "expression/expression_util.h" -#include "planner/copy_plan.h" -#include "planner/seq_scan_plan.h" -#include "storage/data_table.h" namespace peloton { namespace optimizer { @@ -142,39 +139,6 @@ bool ContainsJoinColumns(const std::unordered_set &l_group_alias, return false; } -std::unique_ptr CreateCopyPlan( - parser::CopyStatement *copy_stmt) { - std::string table_name(copy_stmt->cpy_table->GetTableName()); - bool deserialize_parameters = false; - - // If we're copying the query metric table, then we need to handle the - // deserialization of prepared stmt parameters - if (table_name == QUERY_METRICS_CATALOG_NAME) { - LOG_DEBUG("Copying the query_metric table."); - deserialize_parameters = true; - } - - std::unique_ptr copy_plan( - new planner::CopyPlan(copy_stmt->file_path, deserialize_parameters)); - - auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); - auto txn = txn_manager.BeginTransaction(); - auto target_table = catalog::Catalog::GetInstance()->GetTableWithName( - copy_stmt->cpy_table->GetDatabaseName(), - copy_stmt->cpy_table->GetSchemaName(), - copy_stmt->cpy_table->GetTableName(), txn); - txn_manager.CommitTransaction(txn); - - std::unique_ptr select_plan( - new planner::SeqScanPlan(target_table, nullptr, {}, false)); - - LOG_DEBUG("Sequential scan plan for copy created"); - - // Attach it to the copy plan - copy_plan->AddChild(std::move(select_plan)); - return copy_plan; -} - std::unordered_map> ConstructSelectElementMap( std::vector> &select_list) { diff --git a/src/parser/copy_statement.cpp b/src/parser/copy_statement.cpp index b39fcbc8782..e4c5cd3d621 100644 --- a/src/parser/copy_statement.cpp +++ b/src/parser/copy_statement.cpp @@ -20,7 +20,7 @@ const std::string CopyStatement::GetInfo(int num_indent) const { os << StringUtil::Indent(num_indent) << "CopyStatement\n"; os << StringUtil::Indent(num_indent + 1) << "-> Type :: " << CopyTypeToString(type) << "\n"; - os << cpy_table.get()->GetInfo(num_indent + 1) << std::endl; + os << table.get()->GetInfo(num_indent + 1) << std::endl; os << StringUtil::Indent(num_indent + 1) << "-> File Path :: " << file_path << std::endl; diff --git a/src/parser/postgresparser.cpp b/src/parser/postgresparser.cpp index 797b77406b5..069285fc1a4 100644 --- a/src/parser/postgresparser.cpp +++ b/src/parser/postgresparser.cpp @@ -1505,23 +1505,59 @@ parser::PrepareStatement *PostgresParser::PrepareTransform(PrepareStmt *root) { return result; } -// TODO: Only support COPY TABLE TO FILE and DELIMITER option parser::CopyStatement *PostgresParser::CopyTransform(CopyStmt *root) { - auto result = new CopyStatement(peloton::CopyType::EXPORT_OTHER); - result->cpy_table.reset(RangeVarTransform(root->relation)); - result->file_path = root->filename; - for (auto cell = root->options->head; cell != NULL; cell = cell->next) { - auto def_elem = reinterpret_cast(cell->data.ptr_value); - if (strcmp(def_elem->defname, "delimiter") == 0) { - auto delimiter = reinterpret_cast(def_elem->arg)->val.str; - result->delimiter = *delimiter; - break; + static constexpr char kDelimiterTok[] = "delimiter"; + static constexpr char kFormatTok[] = "format"; + static constexpr char kQuoteTok[] = "quote"; + static constexpr char kEscapeTok[] = "escape"; + + // The main return value + auto *result = new CopyStatement(); + + if (root->relation) { + result->table.reset(RangeVarTransform(root->relation)); + } else { + result->select_stmt.reset( + SelectTransform(reinterpret_cast(root->query))); + } + + result->file_path = (root->filename != nullptr ? root->filename : ""); + result->is_from = root->is_from; + + // Handle options + ListCell *cell = nullptr; + for_each_cell(cell, root->options->head) { + auto *def_elem = reinterpret_cast(cell->data.ptr_value); + + // Check delimiter + if (strncmp(def_elem->defname, kDelimiterTok, sizeof(kDelimiterTok)) == 0) { + auto *delimiter_val = reinterpret_cast(def_elem->arg); + result->delimiter = *delimiter_val->val.str; + } + + // Check format + if (strncmp(def_elem->defname, kFormatTok, sizeof(kFormatTok)) == 0) { + auto *format_val = reinterpret_cast(def_elem->arg); + result->format = StringToExternalFileFormat(format_val->val.str); + } + + // Check quote + if (strncmp(def_elem->defname, kQuoteTok, sizeof(kQuoteTok)) == 0) { + auto *quote_val = reinterpret_cast(def_elem->arg); + result->quote = *quote_val->val.str; + } + + // Check escape + if (strncmp(def_elem->defname, kEscapeTok, sizeof(kEscapeTok)) == 0) { + auto *escape_val = reinterpret_cast(def_elem->arg); + result->escape = *escape_val->val.str; } } + return result; } -// Analyze statment is parsed with vacuum statment. +// Analyze statment is parsed with vacuum statement. parser::AnalyzeStatement *PostgresParser::VacuumTransform(VacuumStmt *root) { if (root->options != VACOPT_ANALYZE) { throw NotImplementedException("Vacuum not supported."); diff --git a/src/planner/aggregate_plan.cpp b/src/planner/aggregate_plan.cpp index 8aad13b3edf..26f3a7e9d19 100644 --- a/src/planner/aggregate_plan.cpp +++ b/src/planner/aggregate_plan.cpp @@ -24,7 +24,8 @@ AggregatePlan::AggTerm::AggTerm(ExpressionType et, bool distinct) : aggtype(et), expression(expr), distinct(distinct) {} -void AggregatePlan::AggTerm::PerformBinding(BindingContext &binding_context) { +void AggregatePlan::AggTerm::PerformBinding(bool is_global, + BindingContext &binding_context) { // If there's an input expression, first perform binding auto *agg_expr = const_cast(expression); if (agg_expr != nullptr) { @@ -47,7 +48,7 @@ void AggregatePlan::AggTerm::PerformBinding(BindingContext &binding_context) { // TODO: Move this logic into the SQL type const auto &input_type = expression->ResultType(); agg_ai.type = codegen::type::Type{codegen::type::Decimal::Instance(), - input_type.nullable}; + input_type.nullable || is_global}; break; } case ExpressionType::AGGREGATE_MAX: @@ -57,6 +58,9 @@ void AggregatePlan::AggTerm::PerformBinding(BindingContext &binding_context) { // return type as its input expression. PELOTON_ASSERT(expression != nullptr); agg_ai.type = expression->ResultType(); + if (is_global) { + agg_ai.type = agg_ai.type.AsNullable(); + } break; } default: { @@ -93,7 +97,7 @@ void AggregatePlan::PerformBinding(BindingContext &binding_context) { // Now let the aggregate expressions do their bindings for (const auto &agg_term : GetUniqueAggTerms()) { auto &non_const_agg_term = const_cast(agg_term); - non_const_agg_term.PerformBinding(input_context); + non_const_agg_term.PerformBinding(IsGlobal(), input_context); } // Handle the projection by creating two binding contexts, the first being @@ -117,8 +121,6 @@ void AggregatePlan::PerformBinding(BindingContext &binding_context) { const_cast(predicate) ->PerformBinding({&binding_context}); } - - } hash_t AggregatePlan::Hash( @@ -165,27 +167,22 @@ hash_t AggregatePlan::Hash() const { bool AggregatePlan::AreEqual( const std::vector &A, const std::vector &B) const { - if (A.size() != B.size()) - return false; + if (A.size() != B.size()) return false; for (size_t i = 0; i < A.size(); i++) { - if (A[i].aggtype != B[i].aggtype) - return false; + if (A[i].aggtype != B[i].aggtype) return false; auto *expr = A[i].expression; - if (expr && (*expr != *B[i].expression)) - return false; + if (expr && (*expr != *B[i].expression)) return false; - if (A[i].distinct != B[i].distinct) - return false; + if (A[i].distinct != B[i].distinct) return false; } return true; } bool AggregatePlan::operator==(const AbstractPlan &rhs) const { - if (GetPlanNodeType() != rhs.GetPlanNodeType()) - return false; + if (GetPlanNodeType() != rhs.GetPlanNodeType()) return false; auto &other = static_cast(rhs); @@ -195,12 +192,10 @@ bool AggregatePlan::operator==(const AbstractPlan &rhs) const { if ((pred == nullptr && other_pred != nullptr) || (pred != nullptr && other_pred == nullptr)) return false; - if (pred && *pred != *other_pred) - return false; + if (pred && *pred != *other_pred) return false; // UniqueAggTerms - if (!AreEqual(GetUniqueAggTerms(), other.GetUniqueAggTerms())) - return false; + if (!AreEqual(GetUniqueAggTerms(), other.GetUniqueAggTerms())) return false; // Project Info auto *proj_info = GetProjectInfo(); @@ -208,24 +203,19 @@ bool AggregatePlan::operator==(const AbstractPlan &rhs) const { if ((proj_info == nullptr && other_proj_info != nullptr) || (proj_info != nullptr && other_proj_info == nullptr)) return false; - if (proj_info && *proj_info != *other_proj_info) - return false; + if (proj_info && *proj_info != *other_proj_info) return false; // Group by size_t group_by_col_ids_count = GetGroupbyColIds().size(); - if (group_by_col_ids_count != other.GetGroupbyColIds().size()) - return false; + if (group_by_col_ids_count != other.GetGroupbyColIds().size()) return false; for (size_t i = 0; i < group_by_col_ids_count; i++) { - if (GetGroupbyColIds()[i] != other.GetGroupbyColIds()[i]) - return false; + if (GetGroupbyColIds()[i] != other.GetGroupbyColIds()[i]) return false; } - if (*GetOutputSchema() != *other.GetOutputSchema()) - return false; + if (*GetOutputSchema() != *other.GetOutputSchema()) return false; - if (GetAggregateStrategy() != other.GetAggregateStrategy()) - return false; + if (GetAggregateStrategy() != other.GetAggregateStrategy()) return false; return (AbstractPlan::operator==(rhs)); } diff --git a/src/planner/csv_scan_plan.cpp b/src/planner/csv_scan_plan.cpp new file mode 100644 index 00000000000..c4ff66765e9 --- /dev/null +++ b/src/planner/csv_scan_plan.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scan_plan.cpp +// +// Identification: src/planner/csv_scan_plan.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "planner/csv_scan_plan.h" + +#include + +#include "codegen/type/type.h" + +namespace peloton { +namespace planner { + +CSVScanPlan::CSVScanPlan(std::string file_name, + std::vector &&cols, + char delimiter, char quote, char escape, + std::string null) + : file_name_(std::move(file_name)), + delimiter_(delimiter), + quote_(quote), + escape_(escape), + null_(null) { + attributes_.resize(cols.size()); + for (uint32_t i = 0; i < cols.size(); i++) { + const auto &col_info = cols[i]; + attributes_[i].type = codegen::type::Type{col_info.type, true}; + attributes_[i].attribute_id = i; + attributes_[i].name = col_info.name; + } +} + +PlanNodeType CSVScanPlan::GetPlanNodeType() const { + return PlanNodeType::CSVSCAN; +} + +std::unique_ptr CSVScanPlan::Copy() const { + std::vector new_cols; + for (const auto &attribute : attributes_) { + new_cols.push_back(CSVScanPlan::ColumnInfo{.name = attribute.name, + .type = attribute.type.type_id}); + } + return std::unique_ptr( + new CSVScanPlan(file_name_, std::move(new_cols))); +} + +void CSVScanPlan::PerformBinding(BindingContext &binding_context) { + for (uint32_t i = 0; i < attributes_.size(); i++) { + binding_context.BindNew(i, &attributes_[i]); + } +} + +void CSVScanPlan::GetOutputColumns(std::vector &columns) const { + columns.clear(); + columns.resize(attributes_.size()); + std::iota(columns.begin(), columns.end(), 0); +} + +hash_t CSVScanPlan::Hash() const { + hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length()); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_)); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash("e_)); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_)); + hash = HashUtil::CombineHashes( + hash, HashUtil::HashBytes(null_.c_str(), null_.length())); + return hash; +} + +bool CSVScanPlan::operator==(const AbstractPlan &rhs) const { + if (rhs.GetPlanNodeType() != PlanNodeType::CSVSCAN) return false; + const auto &other = static_cast(rhs); + return ( + (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) && + delimiter_ == other.delimiter_ && quote_ == other.quote_ && + escape_ == other.escape_); +} + +void CSVScanPlan::GetAttributes(std::vector &ais) const { + ais.clear(); + for (const auto &ai : attributes_) { + ais.push_back(&ai); + } +} + +} // namespace planner +} // namespace peloton \ No newline at end of file diff --git a/src/planner/export_external_file_plan.cpp b/src/planner/export_external_file_plan.cpp new file mode 100644 index 00000000000..8f63cc1a072 --- /dev/null +++ b/src/planner/export_external_file_plan.cpp @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// export_external_file_plan.cpp +// +// Identification: src/planner/export_external_file_plan.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "planner/export_external_file_plan.h" + +#include "common/macros.h" +#include "util/hash_util.h" +#include "util/string_util.h" + +namespace peloton { +namespace planner { + +ExportExternalFilePlan::ExportExternalFilePlan(std::string file_name, + char delimiter, char quote, + char escape) + : file_name_(file_name), + delimiter_(delimiter), + quote_(quote), + escape_(escape) {} + +PlanNodeType ExportExternalFilePlan::GetPlanNodeType() const { + return PlanNodeType::EXPORT_EXTERNAL_FILE; +} + +hash_t ExportExternalFilePlan::Hash() const { + hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length()); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_)); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash("e_)); + hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_)); + return hash; +} + +bool ExportExternalFilePlan::operator==(const AbstractPlan &rhs) const { + if (rhs.GetPlanNodeType() != PlanNodeType::EXPORT_EXTERNAL_FILE) return false; + const auto &other = static_cast(rhs); + return ( + (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) && + delimiter_ == other.delimiter_ && quote_ == other.quote_ && + escape_ == other.escape_); +} + +std::unique_ptr ExportExternalFilePlan::Copy() const { + return std::unique_ptr{ + new ExportExternalFilePlan(file_name_, delimiter_, quote_, escape_)}; +} + +void ExportExternalFilePlan::PerformBinding(BindingContext &binding_context) { + PELOTON_ASSERT(GetChildrenSize() == 1); + auto &child = *GetChild(0); + + std::vector child_output_cols; + child.GetOutputColumns(child_output_cols); + + output_attributes_.clear(); + for (const auto &col_id : child_output_cols) { + output_attributes_.push_back(binding_context.Find(col_id)); + } +} + +} // namespace planner +} // namespace peloton \ No newline at end of file diff --git a/src/planner/insert_plan.cpp b/src/planner/insert_plan.cpp index ff0965c8b6a..c8f0a8cc40a 100644 --- a/src/planner/insert_plan.cpp +++ b/src/planner/insert_plan.cpp @@ -205,11 +205,6 @@ void InsertPlan::SetDefaultValue(uint32_t idx) { values_.push_back(*v); } -type::AbstractPool *InsertPlan::GetPlanPool() { - if (pool_.get() == nullptr) pool_.reset(new type::EphemeralPool()); - return pool_.get(); -} - void InsertPlan::SetParameterValues(std::vector *values) { LOG_TRACE("Set Parameter Values in Insert"); auto *schema = target_table_->GetSchema(); @@ -236,15 +231,19 @@ void InsertPlan::PerformBinding(BindingContext &binding_context) { const auto &children = GetChildren(); if (children.size() == 1) { + // Let child bind children[0]->PerformBinding(binding_context); + // Pull out what we need auto *scan = static_cast(children[0].get()); - auto &col_ids = scan->GetColumnIds(); + + std::vector col_ids; + scan->GetOutputColumns(col_ids); + for (oid_t col_id = 0; col_id < col_ids.size(); col_id++) { ais_.push_back(binding_context.Find(col_id)); } } - // Binding is not required if there is no child } hash_t InsertPlan::Hash() const { diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp index 1fd81b76865..3660fcc2f79 100644 --- a/src/storage/data_table.cpp +++ b/src/storage/data_table.cpp @@ -155,12 +155,12 @@ bool DataTable::CheckConstraints(const AbstractTuple *tuple) const { // column. Like maybe can store a list of just columns that // even have constraints defined so that we don't have to // look at each column individually. - oid_t column_count = schema->GetColumnCount(); + size_t column_count = schema->GetColumnCount(); for (oid_t column_itr = 0; column_itr < column_count; column_itr++) { - std::vector column_cons = + const std::vector &column_constraints = schema->GetColumn(column_itr).GetConstraints(); - for (auto cons : column_cons) { - ConstraintType type = cons.GetType(); + for (const auto &constraint : column_constraints) { + ConstraintType type = constraint.GetType(); switch (type) { case ConstraintType::NOTNULL: { if (CheckNotNulls(tuple, column_itr) == false) { @@ -208,9 +208,9 @@ bool DataTable::CheckConstraints(const AbstractTuple *tuple) const { LOG_TRACE("%s", error.c_str()); throw ConstraintException(error); } - } // SWITCH - } // FOR (constraints) - } // FOR (columns) + } + } + } return true; } diff --git a/src/traffic_cop/traffic_cop.cpp b/src/traffic_cop/traffic_cop.cpp index a87d99c0ac5..7bfffebb4c0 100644 --- a/src/traffic_cop/traffic_cop.cpp +++ b/src/traffic_cop/traffic_cop.cpp @@ -523,6 +523,11 @@ FieldInfo TrafficCop::GetColumnFieldForValueType(std::string column_name, field_size = 255; break; } + case type::TypeId::DATE: { + field_type = PostgresValueType::DATE; + field_size = 4; + break; + } case type::TypeId::TIMESTAMP: { field_type = PostgresValueType::TIMESTAMPS; field_size = 64; // FIXME: Bytes??? diff --git a/src/type/date_type.cpp b/src/type/date_type.cpp index d99617178f4..86e9f8b7af6 100644 --- a/src/type/date_type.cpp +++ b/src/type/date_type.cpp @@ -6,12 +6,13 @@ // // Identification: src/type/date_type.cpp // -// Copyright (c) 2015-16, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "type/date_type.h" +#include "function/date_functions.h" #include "type/value_factory.h" namespace peloton { @@ -19,122 +20,106 @@ namespace type { DateType::DateType() : Type(TypeId::DATE) {} -CmpBool DateType::CompareEquals(const Value& left, const Value& right) const { +CmpBool DateType::CompareEquals(const Value &left, const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return CmpBool::NULL_; return GetCmpBool(left.GetAs() == right.GetAs()); } -CmpBool DateType::CompareNotEquals(const Value& left, - const Value& right) const { +CmpBool DateType::CompareNotEquals(const Value &left, + const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (right.IsNull()) return CmpBool::NULL_; return GetCmpBool(left.GetAs() != right.GetAs()); } -CmpBool DateType::CompareLessThan(const Value& left, const Value& right) const { +CmpBool DateType::CompareLessThan(const Value &left, const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return CmpBool::NULL_; return GetCmpBool(left.GetAs() < right.GetAs()); } -CmpBool DateType::CompareLessThanEquals(const Value& left, - const Value& right) const { +CmpBool DateType::CompareLessThanEquals(const Value &left, + const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return CmpBool::NULL_; return GetCmpBool(left.GetAs() <= right.GetAs()); } -CmpBool DateType::CompareGreaterThan(const Value& left, - const Value& right) const { +CmpBool DateType::CompareGreaterThan(const Value &left, + const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return CmpBool::NULL_; return GetCmpBool(left.GetAs() > right.GetAs()); } -CmpBool DateType::CompareGreaterThanEquals(const Value& left, - const Value& right) const { +CmpBool DateType::CompareGreaterThanEquals(const Value &left, + const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return CmpBool::NULL_; return GetCmpBool(left.GetAs() >= right.GetAs()); } -Value DateType::Min(const Value& left, const Value& right) const { +Value DateType::Min(const Value &left, const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return left.OperateNull(right); if (left.CompareLessThan(right) == CmpBool::CmpTrue) return left.Copy(); return right.Copy(); } -Value DateType::Max(const Value& left, const Value& right) const { +Value DateType::Max(const Value &left, const Value &right) const { PELOTON_ASSERT(left.CheckComparable(right)); if (left.IsNull() || right.IsNull()) return left.OperateNull(right); if (left.CompareGreaterThan(right) == CmpBool::CmpTrue) return left.Copy(); return right.Copy(); } -// Debug -std::string DateType::ToString(const Value& val) const { - if (val.IsNull()) return "date_null"; - int32_t tm = val.value_.date; - tm /= 1000000; - tm /= 100000; - uint16_t year = tm % 10000; - tm /= 10000; - int tz = tm % 27; - tz -= 12; - tm /= 27; - uint16_t day = tm % 32; - tm /= 32; - uint16_t month = tm; - char str[30]; - char zone[5]; - sprintf(str, "%04d-%02d-%02d", year, month, day); - if (tz >= 0) { - str[26] = '+'; - } else - str[26] = '-'; - if (tz < 0) tz = -tz; - sprintf(zone, "%02d", tz); - str[27] = 0; - return std::string(std::string(str) + std::string(zone)); +std::string DateType::ToString(const Value &val) const { + // Null + if (val.IsNull()) { + return "date_null"; + } + + int32_t year, month, day; + function::DateFunctions::JulianToDate(val.value_.date, year, month, day); + return StringUtil::Format("%04d-%02d-%02d", year, month, day); } // Compute a hash value -size_t DateType::Hash(const Value& val) const { +size_t DateType::Hash(const Value &val) const { return std::hash{}(val.value_.date); } -void DateType::HashCombine(const Value& val, size_t& seed) const { +void DateType::HashCombine(const Value &val, size_t &seed) const { val.hash_combine(seed, val.value_.date); } -void DateType::SerializeTo(const Value& val, SerializeOutput& out) const { +void DateType::SerializeTo(const Value &val, SerializeOutput &out) const { out.WriteInt(val.value_.date); } -void DateType::SerializeTo(const Value& val, char* storage, +void DateType::SerializeTo(const Value &val, char *storage, bool inlined UNUSED_ATTRIBUTE, - AbstractPool* pool UNUSED_ATTRIBUTE) const { - *reinterpret_cast(storage) = val.value_.date; + AbstractPool *pool UNUSED_ATTRIBUTE) const { + *reinterpret_cast(storage) = val.value_.date; } // Deserialize a value of the given type from the given storage space. -Value DateType::DeserializeFrom(const char* storage, +Value DateType::DeserializeFrom(const char *storage, const bool inlined UNUSED_ATTRIBUTE, - AbstractPool* pool UNUSED_ATTRIBUTE) const { - int32_t val = *reinterpret_cast(storage); + AbstractPool *pool UNUSED_ATTRIBUTE) const { + int32_t val = *reinterpret_cast(storage); return Value(type_id_, static_cast(val)); } -Value DateType::DeserializeFrom(SerializeInput& in UNUSED_ATTRIBUTE, - AbstractPool* pool UNUSED_ATTRIBUTE) const { +Value DateType::DeserializeFrom(SerializeInput &in UNUSED_ATTRIBUTE, + AbstractPool *pool UNUSED_ATTRIBUTE) const { return Value(type_id_, in.ReadInt()); } // Create a copy of this value -Value DateType::Copy(const Value& val) const { return Value(val); } +Value DateType::Copy(const Value &val) const { return Value(val); } -Value DateType::CastAs(const Value& val, const TypeId type_id) const { +Value DateType::CastAs(const Value &val, const TypeId type_id) const { switch (type_id) { case TypeId::DATE: return Copy(val); @@ -144,7 +129,7 @@ Value DateType::CastAs(const Value& val, const TypeId type_id) const { default: break; } - throw Exception("Date is not coercable to " + + throw Exception("Date is not coercible to " + Type::GetInstance(type_id)->ToString()); } diff --git a/src/util/file.cpp b/src/util/file.cpp new file mode 100644 index 00000000000..275d3848418 --- /dev/null +++ b/src/util/file.cpp @@ -0,0 +1,125 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// file.cpp +// +// Identification: src/util/file.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "util/file.h" + +#include "util/string_util.h" + +namespace peloton { +namespace util { + +void File::Open(const std::string &name, File::AccessMode access_mode) { + // Close the existing file if it's open + Close(); + + int flags; + switch (access_mode) { + case AccessMode::ReadOnly: { + flags = O_RDWR; + break; + } + case AccessMode::WriteOnly: { + flags = O_WRONLY; + break; + } + case AccessMode::ReadWrite: { + flags = O_RDWR; + break; + } + } + + // Open + int fd = open(name.c_str(), flags); + + // Check error + if (fd == -1) { + throw Exception( + StringUtil::Format("unable to read file '%s'", name.c_str())); + } + + // Done + fd_ = fd; +} + +uint64_t File::Read(void *data, uint64_t len) const { + // Ensure open + PELOTON_ASSERT(IsOpen()); + + // Perform read + ssize_t bytes_read = read(fd_, data, len); + + // Check error + if (bytes_read == -1) { + throw Exception( + StringUtil::Format("error reading file: %s", strerror(errno))); + } + + // Done + return static_cast(bytes_read); +} + +uint64_t File::Write(void *data, uint64_t len) const { + // Ensure open + PELOTON_ASSERT(IsOpen()); + + // Perform write + ssize_t bytes_written = write(fd_, data, len); + + // Check error + if (bytes_written == -1) { + throw Exception( + StringUtil::Format("error writing to file: %s", strerror(errno))); + } + + // Done + return static_cast(bytes_written); +} + +uint64_t File::Size() const { + // Ensure open + PELOTON_ASSERT(IsOpen()); + + // Save the current position + off_t curr_off = lseek(fd_, 0, SEEK_CUR); + if (curr_off == -1) { + throw Exception(StringUtil::Format( + "unable to read current position in file: %s", strerror(errno))); + } + + // Seek to the end of the file, returning the new file position i.e., the + // size of the file in bytes. + off_t off = lseek(fd_, 0, SEEK_END); + if (off == -1) { + throw Exception(StringUtil::Format( + "unable to move file position to end file: %s", strerror(errno))); + } + + off_t restore = lseek(fd_, curr_off, SEEK_SET); + if (restore == -1) { + throw Exception(StringUtil::Format( + "unable to restore position after moving to the end: %s", + strerror(errno))); + } + + // Restore position + return static_cast(off); +} + +void File::Close() { + if (IsOpen()) { + close(fd_); + fd_ = kInvalid; + } +} + +} // namespace util +} // namespace peloton \ No newline at end of file diff --git a/src/util/string_util.cpp b/src/util/string_util.cpp index d4fca199219..a0f8ba3987f 100644 --- a/src/util/string_util.cpp +++ b/src/util/string_util.cpp @@ -190,4 +190,13 @@ std::vector StringUtil::Split(const std::string &input, } return splits; } + +std::string StringUtil::Strip(const std::string &str, char c) { + // There's a copy here which is wasteful, so don't use this in performance + // critical code! + std::string tmp = str; + tmp.erase(std::remove(tmp.begin(), tmp.end(), c), tmp.end()); + return tmp; } + +} // namespace peloton diff --git a/test/codegen/csv_scan_test.cpp b/test/codegen/csv_scan_test.cpp new file mode 100644 index 00000000000..127e73b968f --- /dev/null +++ b/test/codegen/csv_scan_test.cpp @@ -0,0 +1,215 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// csv_scan_test.cpp +// +// Identification: test/codegen/csv_scan_test.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/testing_codegen_util.h" + +#include "codegen/util/csv_scanner.h" +#include "common/timer.h" +#include "util/file_util.h" +#include "util/string_util.h" + +namespace peloton { +namespace test { + +class CSVScanTest : public PelotonCodeGenTest {}; + +using CallbackFn = + std::function; + +struct State { + codegen::util::CSVScanner *scanner; + CallbackFn callback; +}; + +void CSVRowCallback(void *s) { + auto *state = reinterpret_cast(s); + state->callback(state->scanner->GetColumns()); +} + +void IterateAsCSV(const std::vector &rows, + const std::vector &col_types, + CallbackFn callback, char delimiter = ',', char quote = '"', + char escape = '"') { + std::string csv_data; + for (const auto &row : rows) { + csv_data.append(row).append("\n"); + } + + // Write the contents into a temporary file + TempFileHandle fh(FileUtil::WriteTempFile(csv_data, "", "tmp")); + + // The memory pool + auto &pool = *TestingHarness::GetInstance().GetTestingPool(); + + // The client-state + State state = {.scanner = nullptr, .callback = callback}; + + // The scanner + codegen::util::CSVScanner scanner( + pool, fh.name, col_types.data(), static_cast(col_types.size()), + CSVRowCallback, reinterpret_cast(&state), delimiter, quote, + escape); + + state.scanner = &scanner; + + // Iterate! + scanner.Produce(); +} + +TEST_F(CSVScanTest, NumericScanTest) { + // The set of test rows and their types + std::vector rows = {"1,2,3.0,4", "4,5,6.0,7", "8,9,10.0,11"}; + std::vector types = {{type::TypeId::INTEGER, false}, + {type::TypeId::INTEGER, false}, + {type::TypeId::DECIMAL, false}, + {type::TypeId::INTEGER, false}}; + + uint32_t rows_read = 0; + IterateAsCSV(rows, types, [&rows, &rows_read, &types]( + const codegen::util::CSVScanner::Column *cols) { + // Split the input row into column values + const auto input_parts = StringUtil::Split(rows[rows_read++], ','); + + // Check contents of row based on test input + for (uint32_t i = 0; i < types.size(); i++) { + // The column isn't null + EXPECT_FALSE(cols[i].is_null); + + // The column has a value + EXPECT_GT(cols[i].len, 0); + + // Check the string representations + EXPECT_EQ(input_parts[i], std::string(cols[i].ptr, cols[i].len)); + } + }); + + EXPECT_EQ(rows.size(), rows_read); +} + +TEST_F(CSVScanTest, QuoteEscapeTest) { + // The set of test rows and their types + std::vector rows = {"yea he's \"cool\",1,2", "a quote:\"\",3,4"}; + std::vector types = {{type::TypeId::VARCHAR, false}, + {type::TypeId::INTEGER, false}, + {type::TypeId::INTEGER, false}}; + + uint32_t rows_read = 0; + IterateAsCSV(rows, types, [&rows, &rows_read, &types]( + const codegen::util::CSVScanner::Column *cols) { + // Split the input row into column values + auto input_parts = StringUtil::Split(rows[rows_read++], ','); + + // Check contents of row based on test input + for (uint32_t i = 0; i < types.size(); i++) { + // The column isn't null + EXPECT_FALSE(cols[i].is_null); + + // The column has a value + EXPECT_GT(cols[i].len, 0); + + // Check the string representations. We need to strip off any quotes from + // the original string since the CSV scan will strip them for us. + EXPECT_EQ(StringUtil::Strip(input_parts[i], '"'), + std::string(cols[i].ptr, cols[i].len)); + } + }); + + EXPECT_EQ(rows.size(), rows_read); +} + +TEST_F(CSVScanTest, MixedStringTest) { + std::vector rows = { + "1,1994-01-01,3,test", "4,2018-01-01,6,\"quoted_test\"", + "8,2016-05-05,10,\"test\nnewline\ninquote\""}; + std::vector types = {{type::TypeId::INTEGER, false}, + {type::TypeId::DATE, false}, + {type::TypeId::INTEGER, false}, + {type::TypeId::VARCHAR, false}}; + uint32_t rows_read = 0; + IterateAsCSV(rows, types, [&rows, &rows_read, &types]( + const codegen::util::CSVScanner::Column *cols) { + // Split the input row into column values + auto input_parts = StringUtil::Split(rows[rows_read++], ','); + + for (uint32_t i = 0; i < types.size(); i++) { + // The column isn't null + EXPECT_FALSE(cols[i].is_null); + + // The column has a value + EXPECT_GT(cols[i].len, 0); + + // Check the string representations. We need to strip off any quotes from + // the original string since the CSV scan will strip them for us. + EXPECT_EQ(StringUtil::Strip(input_parts[i], '"'), + std::string(cols[i].ptr, cols[i].len)); + } + }); + + EXPECT_EQ(rows.size(), rows_read); +} + +TEST_F(CSVScanTest, CatchErrorsTest) { + //////////////////////////////////////////////////////////////////// + /// + /// Test Case - Missing last column + /// + //////////////////////////////////////////////////////////////////// + { + std::vector missing_col = {"1,1994-01-01,3"}; + std::vector types = {{type::TypeId::INTEGER, false}, + {type::TypeId::DATE, false}, + {type::TypeId::INTEGER, false}, + {type::TypeId::VARCHAR, false}}; + EXPECT_ANY_THROW(IterateAsCSV( + missing_col, types, + [](UNUSED_ATTRIBUTE const codegen::util::CSVScanner::Column *cols) { + FAIL(); + })); + } + + //////////////////////////////////////////////////////////////////// + /// + /// Test Case - Unclosed quote + /// + //////////////////////////////////////////////////////////////////// + { + std::vector missing_col = {"1,\"unclosed,3"}; + std::vector types = {{type::TypeId::INTEGER, false}, + {type::TypeId::VARCHAR, false}, + {type::TypeId::INTEGER, false}}; + EXPECT_ANY_THROW(IterateAsCSV( + missing_col, types, + [](UNUSED_ATTRIBUTE const codegen::util::CSVScanner::Column *cols) { + FAIL(); + })); + } + + //////////////////////////////////////////////////////////////////// + /// + /// Test Case - Unclosed quote + /// + //////////////////////////////////////////////////////////////////// + { + std::vector missing_col = {"1,unclosed\",3"}; + std::vector types = {{type::TypeId::INTEGER, false}, + {type::TypeId::VARCHAR, false}, + {type::TypeId::INTEGER, false}}; + EXPECT_ANY_THROW(IterateAsCSV( + missing_col, types, + [](UNUSED_ATTRIBUTE const codegen::util::CSVScanner::Column *cols) { + FAIL(); + })); + } +} + +} // namespace test +} // namespace peloton \ No newline at end of file diff --git a/test/codegen/csv_scan_translator_test.cpp b/test/codegen/csv_scan_translator_test.cpp new file mode 100644 index 00000000000..320db518117 --- /dev/null +++ b/test/codegen/csv_scan_translator_test.cpp @@ -0,0 +1,114 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// +// csv_scan_translator_test.cpp +// +// Identification: test/codegen/csv_scan_translator_test.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/testing_codegen_util.h" + +#include "planner/csv_scan_plan.h" +#include "planner/insert_plan.h" +#include "planner/seq_scan_plan.h" +#include "util/string_util.h" +#include "util/file_util.h" + +namespace peloton { +namespace test { + +class CSVScanTranslatorTest : public PelotonCodeGenTest { + public: + CSVScanTranslatorTest() : PelotonCodeGenTest() {} + + oid_t TestTableId1() { return test_table_oids[0]; } + uint32_t NumRowsInTestTable() const { return num_rows_to_insert; } + + private: + uint32_t num_rows_to_insert = 64; +}; + +TEST_F(CSVScanTranslatorTest, IntCsvScan) { + // The quoting character and a helper function to quote a given string + const char quote = '"'; + const auto quote_string = [quote](std::string s) { + return StringUtil::Format("%c%s%c", quote, s.c_str(), quote); + }; + + // Test input rows + // clang-format off + std::vector rows = { + "1,2,3.9,four", + "5,6,7.4,eight", + "9,10,11.1," + quote_string("twelve"), + "14,15,16.7,eighteen " + quote_string("nineteen") + " twenty " + quote_string("twenty-one")}; + // clang-format on + + std::string csv_data; + for (const auto &row : rows) { + csv_data.append(row).append("\n"); + } + + /////////////////////////////////////////////////// + /// First insert contents of CSV into test table + /////////////////////////////////////////////////// + { + // Write the contents into a temporary file + TempFileHandle fh{FileUtil::WriteTempFile(csv_data, "", "tmp")}; + + // clang-format off + // NOTE: this schema has to match that of the test table! + std::vector cols = { + planner::CSVScanPlan::ColumnInfo{.name = "1", .type = peloton::type::TypeId::INTEGER}, + planner::CSVScanPlan::ColumnInfo{.name = "2", .type = peloton::type::TypeId::INTEGER}, + planner::CSVScanPlan::ColumnInfo{.name = "3", .type = peloton::type::TypeId::DECIMAL}, + planner::CSVScanPlan::ColumnInfo{.name = "4", .type = peloton::type::TypeId::VARCHAR}, + }; + // clang-format on + std::unique_ptr csv_scan{ + new planner::CSVScanPlan(fh.name, std::move(cols), ',')}; + std::unique_ptr insert{ + new planner::InsertPlan(&GetTestTable(TestTableId1()))}; + + insert->AddChild(std::move(csv_scan)); + + planner::BindingContext ctx; + insert->PerformBinding(ctx); + + codegen::BufferingConsumer consumer{{0, 1, 2, 3}, ctx}; + + // Execute insert + CompileAndExecute(*insert, consumer); + ASSERT_EQ(0, consumer.GetOutputTuples().size()); + } + + /////////////////////////////////////////////////// + /// Now scan test table, comparing results + /////////////////////////////////////////////////// + { + std::unique_ptr scan{new planner::SeqScanPlan( + &GetTestTable(TestTableId1()), nullptr, {0, 1, 2, 3})}; + + planner::BindingContext ctx; + scan->PerformBinding(ctx); + + codegen::BufferingConsumer consumer{{0, 1, 2, 3}, ctx}; + + // Execute insert + CompileAndExecute(*scan, consumer); + + const auto &output = consumer.GetOutputTuples(); + ASSERT_EQ(rows.size(), output.size()); + for (uint32_t i = 0; i < rows.size(); i++) { + EXPECT_EQ(StringUtil::Strip(rows[i], '"'), output[i].ToCSV()); + } + } +} + +} // namespace test +} // namespace peloton diff --git a/test/codegen/testing_codegen_util.cpp b/test/codegen/testing_codegen_util.cpp index 5302eae3daf..a19598e33ed 100644 --- a/test/codegen/testing_codegen_util.cpp +++ b/test/codegen/testing_codegen_util.cpp @@ -12,6 +12,8 @@ #include "codegen/testing_codegen_util.h" +#include + #include "catalog/table_catalog.h" #include "codegen/proxy/runtime_functions_proxy.h" #include "codegen/proxy/value_proxy.h" @@ -28,6 +30,9 @@ namespace peloton { namespace test { +TempFileHandle::TempFileHandle(std::string _name) : name(_name) {} +TempFileHandle::~TempFileHandle() { boost::filesystem::remove(name); } + //===----------------------------------------------------------------------===// // PELOTON CODEGEN TEST //===----------------------------------------------------------------------===// @@ -446,7 +451,7 @@ void Printer::ConsumeResult(codegen::ConsumerContext &ctx, format.append("]\n"); // Make the printf call - codegen.CallPrintf(format, cols); + codegen.Printf(format, cols); } } // namespace test diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp index 551e3956e75..0057721352b 100644 --- a/test/codegen/value_integrity_test.cpp +++ b/test/codegen/value_integrity_test.cpp @@ -6,17 +6,20 @@ // // Identification: test/codegen/value_integrity_test.cpp // -// Copyright (c) 2015-17, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include "codegen/testing_codegen_util.h" +#include + #include "codegen/function_builder.h" #include "codegen/type/tinyint_type.h" #include "codegen/type/smallint_type.h" #include "codegen/type/integer_type.h" #include "codegen/type/bigint_type.h" +#include "function/numeric_functions.h" namespace peloton { namespace test { @@ -58,7 +61,7 @@ void DivideByZeroTest(const codegen::type::Type &data_type, ExpressionType op) { } } - codegen.CallPrintf("%lu\n", {res.GetValue()}); + codegen.Printf("%lu\n", {res.GetValue()}); function.ReturnAndFinish(); } @@ -125,7 +128,7 @@ void OverflowTest(const codegen::type::Type &data_type, ExpressionType op) { } } - codegen.CallPrintf("%lu\n", {res.GetValue()}); + codegen.Printf("%lu\n", {res.GetValue()}); function.ReturnAndFinish(); } @@ -161,5 +164,143 @@ TEST_F(ValueIntegrityTest, IntegerDivideByZero) { } } +namespace { + +template +using InputFunc = T (*)(const codegen::type::Type &, const char *, uint32_t); + +template +void TestInputIntegral( + const codegen::type::Type &type, InputFunc TestFunc, + std::vector> extra_valid_tests = {}, + std::vector extra_invalid_tests = {}, + std::vector extra_overflow_tests = {}) { + // Default valid tests - these are valid for all integral types + std::vector> valid_tests = {{"0", 0}, + {"-1", -1}, + {"2", 2}, + {"+3", 3}, + {" 4", 4}, + {" -5", -5}, + {" +6", 6}, + {"7 ", 7}, + {"-8 ", -8}, + {" 9 ", 9}, + {" -10 ", -10}, + {" +11 ", 11}}; + valid_tests.insert(valid_tests.end(), extra_valid_tests.begin(), + extra_valid_tests.end()); + + // Default invalid tests + std::vector invalid_tests = {"a", "-b", "+c", " 1c", + "2d ", "3 3", "-4 4", "-5 a ", + " -6 a", " c 7 "}; + invalid_tests.insert(invalid_tests.end(), extra_invalid_tests.begin(), + extra_invalid_tests.end()); + + // Default overflow tests + std::vector overflow_tests = { + std::to_string(std::numeric_limits::min()) + "1", + std::to_string(std::numeric_limits::max()) + "1", + "123456789123456789123456789"}; + overflow_tests.insert(overflow_tests.end(), extra_overflow_tests.begin(), + extra_overflow_tests.end()); + + for (const auto &test : valid_tests) { + auto *ptr = test.first.data(); + auto len = static_cast(test.first.length()); + try { + EXPECT_EQ(test.second, TestFunc(type, ptr, len)); + } catch (std::exception &e) { + EXPECT_TRUE(false) << "Valid input '" << test.first << "' threw an error"; + } + } + + for (const auto &test : invalid_tests) { + auto *ptr = test.data(); + auto len = static_cast(test.length()); + EXPECT_THROW(TestFunc(type, ptr, len), std::runtime_error) + << "Input '" << test << "' was expected to throw an error, but did not"; + } + + for (const auto &test : overflow_tests) { + auto *ptr = test.data(); + auto len = static_cast(test.length()); + EXPECT_THROW(TestFunc(type, ptr, len), std::overflow_error) + << "Input '" << test << "' expected to overflow, but did not"; + } +} +} // namespace + +TEST_F(ValueIntegrityTest, InputIntegralTypesTest) { + codegen::type::Type tinyint{type::TypeId::TINYINT, false}; + TestInputIntegral(tinyint, function::NumericFunctions::InputTinyInt, + {{"-126", -126}, {"126", 126}}); + + codegen::type::Type smallint{type::TypeId::SMALLINT, false}; + TestInputIntegral(smallint, + function::NumericFunctions::InputSmallInt); + + codegen::type::Type integer{type::TypeId::INTEGER, false}; + TestInputIntegral(integer, function::NumericFunctions::InputInteger); + + codegen::type::Type bigint{type::TypeId::BIGINT, false}; + TestInputIntegral(bigint, function::NumericFunctions::InputBigInt); +} + +TEST_F(ValueIntegrityTest, InputDecimalTypesTest) { + codegen::type::Type decimal{type::TypeId::DECIMAL, false}; + + // First check some valid cases + std::vector> valid_tests = { + {"0.0", 0.0}, + {"-1.0", -1.0}, + {"2.0", 2.0}, + {"+3.0", 3.0}, + {" 4.0", 4.0}, + {" -5.0", -5.0}, + {" +6.0", 6.0}, + {"7.0 ", 7.0}, + {"-8.0 ", -8.0}, + {" 9.0 ", 9.0}, + {" -10.0 ", -10.0}, + {" +11.0 ", 11.0}}; + + for (const auto &test_case : valid_tests) { + auto *ptr = test_case.first.data(); + auto len = static_cast(test_case.first.length()); + EXPECT_EQ(test_case.second, + function::NumericFunctions::InputDecimal(decimal, ptr, len)); + } + + // Now let's try some invalid ones. Take each valid test and randomly insert + // a character somewhere. + std::vector invalid_tests; + + std::random_device rd; + std::mt19937 rng(rd()); + + for (const auto &valid_test : valid_tests) { + auto orig = valid_test.first; + + std::uniform_int_distribution<> dist(0, orig.length()); + auto pos = dist(rng); + + auto invalid_num = orig.substr(0, pos) + "aa" + orig.substr(pos); + + invalid_tests.push_back(invalid_num); + } + + // Now check that each test throws an invalid string error + for (const auto &invalid_test : invalid_tests) { + auto *ptr = invalid_test.data(); + auto len = static_cast(invalid_test.length()); + EXPECT_THROW(function::NumericFunctions::InputDecimal(decimal, ptr, len), + std::runtime_error) + << "Input '" << invalid_test + << "' expected to throw error, but passed parsing logic"; + } +} + } // namespace test } // namespace peloton \ No newline at end of file diff --git a/test/common/internal_types_test.cpp b/test/common/internal_types_test.cpp index c9782514fc6..7a616315e20 100644 --- a/test/common/internal_types_test.cpp +++ b/test/common/internal_types_test.cpp @@ -325,8 +325,8 @@ TEST_F(InternalTypesTests, PlanNodeTypeTest) { PlanNodeType::ORDERBY, PlanNodeType::PROJECTION, PlanNodeType::MATERIALIZE, PlanNodeType::LIMIT, PlanNodeType::DISTINCT, PlanNodeType::SETOP, PlanNodeType::APPEND, PlanNodeType::AGGREGATE_V2, - PlanNodeType::HASH, PlanNodeType::RESULT, PlanNodeType::COPY, - PlanNodeType::MOCK}; + PlanNodeType::HASH, PlanNodeType::RESULT, + PlanNodeType::EXPORT_EXTERNAL_FILE, PlanNodeType::MOCK}; // Make sure that ToString and FromString work for (auto val : list) { diff --git a/test/function/decimal_functions_test.cpp b/test/function/numeric_functions_test.cpp similarity index 75% rename from test/function/decimal_functions_test.cpp rename to test/function/numeric_functions_test.cpp index 994523b732f..be700b4fa9f 100644 --- a/test/function/decimal_functions_test.cpp +++ b/test/function/numeric_functions_test.cpp @@ -2,26 +2,25 @@ // // Peloton // -// decimal_functions_test.cpp +// numeric_functions_test.cpp // // Identification: test/expression/decimal_functions_test.cpp // -// Copyright (c) 2015-17, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// -#include +#include +#include #include #include -#include #include "common/harness.h" -#include "function/decimal_functions.h" +#include "function/numeric_functions.h" #include "common/internal_types.h" #include "type/value.h" #include "type/value_factory.h" -#include "util/string_util.h" using ::testing::NotNull; using ::testing::Return; @@ -29,31 +28,31 @@ using ::testing::Return; namespace peloton { namespace test { -class DecimalFunctionsTests : public PelotonTest {}; +class NumericFunctionsTests : public PelotonTest {}; -TEST_F(DecimalFunctionsTests, SqrtTest) { +TEST_F(NumericFunctionsTests, SqrtTest) { const double column_val = 9.0; const double expected = sqrt(9.0); std::vector args = { type::ValueFactory::GetDecimalValue(column_val)}; - auto result = function::DecimalFunctions::Sqrt(args); + auto result = function::NumericFunctions::Sqrt(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(expected, result.GetAs()); // NULL CHECK args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)}; - result = function::DecimalFunctions::Sqrt(args); + result = function::NumericFunctions::Sqrt(args); EXPECT_TRUE(result.IsNull()); } -TEST_F(DecimalFunctionsTests, FloorTest) { +TEST_F(NumericFunctionsTests, FloorTest) { // Testing Floor with DecimalTypes std::vector inputs = {9.5, 3.3, -4.4, 0.0}; std::vector args; for (double in : inputs) { args = {type::ValueFactory::GetDecimalValue(in)}; - auto result = function::DecimalFunctions::_Floor(args); + auto result = function::NumericFunctions::_Floor(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(floor(in), result.GetAs()); } @@ -61,67 +60,67 @@ TEST_F(DecimalFunctionsTests, FloorTest) { // Testing Floor with Integer Types(Should be a no-op) int64_t numInt64 = 1; args = {type::ValueFactory::GetIntegerValue(numInt64)}; - auto result = function::DecimalFunctions::_Floor(args); + auto result = function::NumericFunctions::_Floor(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(numInt64, result.GetAs()); int32_t numInt32 = 1; args = {type::ValueFactory::GetIntegerValue(numInt32)}; - result = function::DecimalFunctions::_Floor(args); + result = function::NumericFunctions::_Floor(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(numInt32, result.GetAs()); int16_t numInt16 = 1; args = {type::ValueFactory::GetIntegerValue(numInt32)}; - result = function::DecimalFunctions::_Floor(args); + result = function::NumericFunctions::_Floor(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(numInt16, result.GetAs()); int16_t numInt8 = 1; args = {type::ValueFactory::GetIntegerValue(numInt8)}; - result = function::DecimalFunctions::_Floor(args); + result = function::NumericFunctions::_Floor(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(numInt8, result.GetAs()); // NULL CHECK args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)}; - result = function::DecimalFunctions::_Floor(args); + result = function::NumericFunctions::_Floor(args); EXPECT_TRUE(result.IsNull()); } -TEST_F(DecimalFunctionsTests, RoundTest) { +TEST_F(NumericFunctionsTests, RoundTest) { std::vector column_vals = {9.5, 3.3, -4.4, -5.5, 0.0}; std::vector args; for (double val : column_vals) { args = {type::ValueFactory::GetDecimalValue(val)}; - auto result = function::DecimalFunctions::_Round(args); + auto result = function::NumericFunctions::_Round(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(round(val), result.GetAs()); } // NULL CHECK args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)}; - auto result = function::DecimalFunctions::_Round(args); + auto result = function::NumericFunctions::_Round(args); EXPECT_TRUE(result.IsNull()); } -TEST_F(DecimalFunctionsTests,AbsTestDouble) { +TEST_F(NumericFunctionsTests,AbsTestDouble) { std::vector doubleTestInputs = {9.5, -2.5, -4.4, 0.0}; std::vector args; for (double in : doubleTestInputs) { args = {type::ValueFactory::GetDecimalValue(in)}; - auto result = function::DecimalFunctions::_Abs(args); + auto result = function::NumericFunctions::_Abs(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(fabs(in), result.GetAs()); } // NULL CHECK args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)}; - auto result = function::DecimalFunctions::_Abs(args); + auto result = function::NumericFunctions::_Abs(args); EXPECT_TRUE(result.IsNull()); } -TEST_F(DecimalFunctionsTests, AbsTestInt) { +TEST_F(NumericFunctionsTests, AbsTestInt) { std::vector bigIntTestInputs = {-20, -15, -10, 0, 10, 20}; std::vector intTestInputs = {-20, -15, -10, 0, 10, 20}; std::vector smallIntTestInputs = {-20, -15, -10, 0, 10, 20}; @@ -131,51 +130,51 @@ TEST_F(DecimalFunctionsTests, AbsTestInt) { // Testing Abs with Integer Types for (int64_t in: bigIntTestInputs) { args = {type::ValueFactory::GetBigIntValue(in)}; - auto result = function::DecimalFunctions::_Abs(args); + auto result = function::NumericFunctions::_Abs(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(std::abs(in), result.GetAs()); } for (int32_t in: intTestInputs) { args = {type::ValueFactory::GetIntegerValue(in)}; - auto result = function::DecimalFunctions::_Abs(args); + auto result = function::NumericFunctions::_Abs(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(abs(in), result.GetAs()); } for (int16_t in: smallIntTestInputs) { args = {type::ValueFactory::GetSmallIntValue(in)}; - auto result = function::DecimalFunctions::_Abs(args); + auto result = function::NumericFunctions::_Abs(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(abs(in), result.GetAs()); } for (int8_t in: tinyIntTestInputs) { args = {type::ValueFactory::GetTinyIntValue(in)}; - auto result = function::DecimalFunctions::_Abs(args); + auto result = function::NumericFunctions::_Abs(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(abs(in), result.GetAs()); } } -TEST_F(DecimalFunctionsTests, CeilTestDouble) { +TEST_F(NumericFunctionsTests, CeilTestDouble) { std::vector doubleTestInputs = {-36.0, -35.222, -0.7, -0.5, -0.2, 0.0, 0.2, 0.5, 0.7, 35.2, 36.0, 37.2222}; std::vector args; for (double in: doubleTestInputs) { args = {type::ValueFactory::GetDecimalValue(in)}; - auto result = function::DecimalFunctions::_Ceil(args); + auto result = function::NumericFunctions::_Ceil(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(ceil(in), result.GetAs()); } args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)}; - auto result = function::DecimalFunctions::_Ceil(args); + auto result = function::NumericFunctions::_Ceil(args); EXPECT_TRUE(result.IsNull()); } -TEST_F(DecimalFunctionsTests, CeilTestInt) { +TEST_F(NumericFunctionsTests, CeilTestInt) { std::vector bigIntTestInputs = {-20, -15, -10, 0, 10, 20}; std::vector intTestInputs = {-20, -15, -10, 0, 10, 20}; std::vector smallIntTestInputs = {-20, -15, -10, 0, 10, 20}; @@ -185,28 +184,28 @@ TEST_F(DecimalFunctionsTests, CeilTestInt) { // Testing Ceil with Integer Types for (int64_t in: bigIntTestInputs) { args = {type::ValueFactory::GetIntegerValue(in)}; - auto result = function::DecimalFunctions::_Ceil(args); + auto result = function::NumericFunctions::_Ceil(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(ceil(in), result.GetAs()); } for (int in: intTestInputs) { args = {type::ValueFactory::GetIntegerValue(in)}; - auto result = function::DecimalFunctions::_Ceil(args); + auto result = function::NumericFunctions::_Ceil(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(ceil(in), result.GetAs()); } for (int in: smallIntTestInputs) { args = {type::ValueFactory::GetIntegerValue(in)}; - auto result = function::DecimalFunctions::_Ceil(args); + auto result = function::NumericFunctions::_Ceil(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(ceil(in), result.GetAs()); } for (int in: tinyIntTestInputs) { args = {type::ValueFactory::GetIntegerValue(in)}; - auto result = function::DecimalFunctions::_Ceil(args); + auto result = function::NumericFunctions::_Ceil(args); EXPECT_FALSE(result.IsNull()); EXPECT_EQ(ceil(in), result.GetAs()); } diff --git a/test/include/codegen/testing_codegen_util.h b/test/include/codegen/testing_codegen_util.h index 5dc427f03b1..c61a47e67c2 100644 --- a/test/include/codegen/testing_codegen_util.h +++ b/test/include/codegen/testing_codegen_util.h @@ -6,7 +6,7 @@ // // Identification: test/include/codegen/testing_codegen_util.h // -// Copyright (c) 2015-17, Carnegie Mellon University Database Group +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// @@ -40,11 +40,24 @@ using ConstExpressionPtr = using PlanPtr = std::unique_ptr; using ConstPlanPtr = std::unique_ptr; -//===----------------------------------------------------------------------===// -// Common base class for all codegen tests. This class four test tables that all -// the codegen components use. Their ID's are available through the oid_t -// enumeration. -//===----------------------------------------------------------------------===// +/** + * This is a scoped file handle that automatically deletes/removes the file + * with the given name when the class goes out of scope and the destructor is + * called. + */ +struct TempFileHandle { + std::string name; + TempFileHandle(std::string _name); + ~TempFileHandle(); +}; + +/** + * Common base class for all codegen tests. This class has four test tables + * whose IDs and names are stored in test_table_oids and test_table_names, + * respectively. The test tables all have the exact schema: column "a" and "b" + * are integers, column "c" is a decimal, and column "d" is a varchar. The table + * with the highest OID also has a primary key on column "a". + */ class PelotonCodeGenTest : public PelotonTest { public: std::string test_db_name = "peloton_codegen"; diff --git a/test/optimizer/optimizer_test.cpp b/test/optimizer/optimizer_test.cpp index 8b5ed1e0ec7..50696017bb5 100644 --- a/test/optimizer/optimizer_test.cpp +++ b/test/optimizer/optimizer_test.cpp @@ -20,6 +20,7 @@ #include "executor/create_executor.h" #include "executor/insert_executor.h" #include "executor/plan_executor.h" +#include "expression/constant_value_expression.h" #include "expression/tuple_value_expression.h" #include "optimizer/mock_task.h" #include "optimizer/operators.h" diff --git a/test/parser/postgresparser_test.cpp b/test/parser/postgresparser_test.cpp index 36910bdc9a9..dee0d981491 100644 --- a/test/parser/postgresparser_test.cpp +++ b/test/parser/postgresparser_test.cpp @@ -17,6 +17,7 @@ #include "common/internal_types.h" #include "common/logger.h" #include "common/macros.h" +#include "expression/constant_value_expression.h" #include "expression/function_expression.h" #include "expression/operator_expression.h" #include "expression/tuple_value_expression.h" diff --git a/test/trigger/trigger_test.cpp b/test/trigger/trigger_test.cpp index 4dacd00cc1d..7238d7f9e6d 100644 --- a/test/trigger/trigger_test.cpp +++ b/test/trigger/trigger_test.cpp @@ -10,17 +10,18 @@ // //===----------------------------------------------------------------------===// -#include "trigger/trigger.h" #include "catalog/catalog.h" #include "common/harness.h" #include "concurrency/transaction_manager_factory.h" #include "executor/executors.h" #include "executor/executor_context.h" +#include "expression/constant_value_expression.h" #include "parser/pg_trigger.h" #include "parser/postgresparser.h" #include "planner/create_plan.h" #include "planner/insert_plan.h" #include "storage/abstract_table.h" +#include "trigger/trigger.h" namespace peloton { namespace test {