From 78f6e34d89a458fd677ea4184259305eb0cebc19 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 25 Apr 2018 13:28:19 -0400
Subject: [PATCH 01/42] Add more information to CopyStatement. Cleaned up
 includes.

---
 src/binder/bind_node_visitor.cpp    | 12 ++++++++-
 src/codegen/translator_factory.cpp  |  1 +
 src/common/internal_types.cpp       | 30 +++++++++++++++++++++
 src/include/common/internal_types.h |  9 +++++++
 src/include/parser/copy_statement.h | 35 ++++++++++++++++--------
 src/include/parser/postgresparser.h |  2 +-
 src/include/planner/abstract_plan.h |  2 +-
 src/include/planner/copy_plan.h     | 18 ++++---------
 src/optimizer/util.cpp              |  8 +++---
 src/parser/copy_statement.cpp       |  2 +-
 src/parser/postgresparser.cpp       | 42 ++++++++++++++++++++++-------
 test/parser/postgresparser_test.cpp |  1 +
 test/trigger/trigger_test.cpp       |  3 ++-
 13 files changed, 122 insertions(+), 43 deletions(-)
diff --git a/src/binder/bind_node_visitor.cpp b/src/binder/bind_node_visitor.cpp
index a6ffe17b322..c7d25093beb 100644
--- a/src/binder/bind_node_visitor.cpp
+++ b/src/binder/bind_node_visitor.cpp
@@ -166,7 +166,17 @@ void BindNodeVisitor::Visit(parser::DeleteStatement *node) {
 }
 
 void BindNodeVisitor::Visit(parser::LimitDescription *) {}
-void BindNodeVisitor::Visit(parser::CopyStatement *) {}
+
+void BindNodeVisitor::Visit(parser::CopyStatement *node) {
+  // Bind the source/target table of the copy
+  context_ = std::make_shared<BinderContext>(nullptr);
+  if (node->table != nullptr) {
+    node->table->Accept(this);
+  } else {
+    node->select_stmt->Accept(this);
+  }
+}
+
 void BindNodeVisitor::Visit(parser::CreateFunctionStatement *) {}
 void BindNodeVisitor::Visit(parser::CreateStatement *node) {
   node->TryBindDatabaseName(default_database_name_);
diff --git a/src/codegen/translator_factory.cpp b/src/codegen/translator_factory.cpp
index 6fe1f65fce6..f10fd863033 100644
--- a/src/codegen/translator_factory.cpp
+++ b/src/codegen/translator_factory.cpp
@@ -37,6 +37,7 @@
 #include "expression/case_expression.h"
 #include "expression/comparison_expression.h"
 #include "expression/conjunction_expression.h"
+#include "expression/constant_value_expression.h"
 #include "expression/function_expression.h"
 #include "expression/operator_expression.h"
 #include "expression/tuple_value_expression.h"
diff --git a/src/common/internal_types.cpp b/src/common/internal_types.cpp
index b93da7f3b13..1ab2ed393b3 100644
--- a/src/common/internal_types.cpp
+++ b/src/common/internal_types.cpp
@@ -1877,6 +1877,36 @@ std::ostream &operator<<(std::ostream &os, const CopyType &type) {
   return os;
 }
 
+//===--------------------------------------------------------------------===//
+// ExternalFileFormat - String Utilities
+//===--------------------------------------------------------------------===//
+
+std::string ExternalFileFormatToString(ExternalFileFormat format) {
+  switch (format) {
+    case ExternalFileFormat::CSV:
+      return "CSV";
+    case ExternalFileFormat::BINARY:
+    default:
+      return "BINARY";
+  }
+}
+
+ExternalFileFormat StringToExternalFileFormat(const std::string &str) {
+  auto upper = StringUtil::Upper(str);
+  if (upper == "CSV") {
+    return ExternalFileFormat::CSV;
+  } else if (upper == "BINARY") {
+    return ExternalFileFormat::BINARY;
+  }
+  throw ConversionException(StringUtil::Format(
+      "No ExternalFileFormat for input '%s'", upper.c_str()));
+}
+
+std::ostream &operator<<(std::ostream &os, const ExternalFileFormat &format) {
+  os << ExternalFileFormatToString(format);
+  return os;
+}
+
 //===--------------------------------------------------------------------===//
 // PayloadType - String Utilities
 //===--------------------------------------------------------------------===//
diff --git a/src/include/common/internal_types.h b/src/include/common/internal_types.h
index 995a92cea2d..6c32b9665d0 100644
--- a/src/include/common/internal_types.h
+++ b/src/include/common/internal_types.h
@@ -556,6 +556,7 @@ enum class PlanNodeType {
   // Scan Nodes
   SEQSCAN = 10,
   INDEXSCAN = 11,
+  CSVSCAN = 12,
 
   // Join Nodes
   NESTLOOP = 20,
@@ -817,6 +818,14 @@ std::string CopyTypeToString(CopyType type);
 CopyType StringToCopyType(const std::string &str);
 std::ostream &operator<<(std::ostream &os, const CopyType &type);
 
+enum class ExternalFileFormat {
+  CSV,
+  BINARY
+};
+std::string ExternalFileFormatToString(ExternalFileFormat format);
+ExternalFileFormat StringToExternalFileFormat(const std::string &str);
+std::ostream &operator<<(std::ostream &os, const ExternalFileFormat &format);
+
 //===--------------------------------------------------------------------===//
 // Payload Types
 //===--------------------------------------------------------------------===//
diff --git a/src/include/parser/copy_statement.h b/src/include/parser/copy_statement.h
index 3af77a797c4..92100e312d1 100644
--- a/src/include/parser/copy_statement.h
+++ b/src/include/parser/copy_statement.h
@@ -2,19 +2,19 @@
 //
 //                         Peloton
 //
-// statement_import.h
+// copy_statement.h
 //
-// Identification: src/include/parser/statement_import.h
+// Identification: src/include/parser/copy_statement.h
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #pragma once
 
+#include "parser/select_statement.h"
 #include "parser/sql_statement.h"
 #include "parser/table_ref.h"
-#include "expression/constant_value_expression.h"
 #include "common/sql_node_visitor.h"
 
 namespace peloton {
@@ -26,25 +26,38 @@ namespace parser {
  */
 class CopyStatement : public SQLStatement {
  public:
-  CopyStatement(CopyType type)
+  CopyStatement()
       : SQLStatement(StatementType::COPY),
-        cpy_table(nullptr),
-        type(type),
-        delimiter(','){};
+        table(nullptr),
+        type(),
+        delimiter(',') {}
 
-  virtual ~CopyStatement() {}
+  ~CopyStatement() = default;
 
-  virtual void Accept(SqlNodeVisitor *v) override { v->Visit(this); }
+  void Accept(SqlNodeVisitor *v) override { v->Visit(this); }
 
   const std::string GetInfo(int num_indent) const override;
 
   const std::string GetInfo() const override;
 
-  std::unique_ptr<TableRef> cpy_table;
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Public member fields
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  std::unique_ptr<TableRef> table;
+
+  std::unique_ptr<SelectStatement> select_stmt;
 
   CopyType type;
 
   std::string file_path;
+
+  ExternalFileFormat format;
+
+  bool is_from;
+
   char delimiter;
 };
 
diff --git a/src/include/parser/postgresparser.h b/src/include/parser/postgresparser.h
index decd43d9ee7..388623a138c 100644
--- a/src/include/parser/postgresparser.h
+++ b/src/include/parser/postgresparser.h
@@ -6,7 +6,7 @@
 //
 // Identification: src/include/parser/postgresparser.h
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/src/include/planner/abstract_plan.h b/src/include/planner/abstract_plan.h
index 2cb5e89ac49..c257b20d830 100644
--- a/src/include/planner/abstract_plan.h
+++ b/src/include/planner/abstract_plan.h
@@ -6,7 +6,7 @@
 //
 // Identification: src/include/planner/abstract_plan.h
 //
-// Copyright (c) 2015-18, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/src/include/planner/copy_plan.h b/src/include/planner/copy_plan.h
index 079199cf755..fcb991b1666 100644
--- a/src/include/planner/copy_plan.h
+++ b/src/include/planner/copy_plan.h
@@ -6,43 +6,35 @@
 //
 // Identification: src/include/planner/copy_plan.h
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #pragma once
 
-#include "../parser/copy_statement.h"
-#include "../parser/select_statement.h"
 #include "planner/abstract_plan.h"
 
 namespace peloton {
 
 namespace storage {
 class DataTable;
-}
-
-namespace parser {
-class CopyStatement;
-}
+}  // namespace storage
 
 namespace planner {
 
 class CopyPlan : public AbstractPlan {
  public:
-  CopyPlan() = delete;
-
   explicit CopyPlan(std::string file_path, bool deserialize_parameters)
       : file_path(file_path), deserialize_parameters(deserialize_parameters) {
     LOG_DEBUG("Creating a Copy Plan");
   }
 
-  inline PlanNodeType GetPlanNodeType() const { return PlanNodeType::COPY; }
+  PlanNodeType GetPlanNodeType() const override  { return PlanNodeType::COPY; }
 
-  const std::string GetInfo() const { return "CopyPlan"; }
+  const std::string GetInfo() const override { return "CopyPlan"; }
 
   // TODO: Implement copy mechanism
-  std::unique_ptr<AbstractPlan> Copy() const { return nullptr; }
+  std::unique_ptr<AbstractPlan> Copy() const override { return nullptr; }
 
   // The path of the target file
   std::string file_path;
diff --git a/src/optimizer/util.cpp b/src/optimizer/util.cpp
index 0d01e35e8ac..b0129484442 100644
--- a/src/optimizer/util.cpp
+++ b/src/optimizer/util.cpp
@@ -144,7 +144,7 @@ bool ContainsJoinColumns(const std::unordered_set<std::string> &l_group_alias,
 
 std::unique_ptr<planner::AbstractPlan> CreateCopyPlan(
     parser::CopyStatement *copy_stmt) {
-  std::string table_name(copy_stmt->cpy_table->GetTableName());
+  std::string table_name(copy_stmt->table->GetTableName());
   bool deserialize_parameters = false;
 
   // If we're copying the query metric table, then we need to handle the
@@ -160,9 +160,9 @@ std::unique_ptr<planner::AbstractPlan> CreateCopyPlan(
   auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
   auto txn = txn_manager.BeginTransaction();
   auto target_table = catalog::Catalog::GetInstance()->GetTableWithName(
-      copy_stmt->cpy_table->GetDatabaseName(),
-      copy_stmt->cpy_table->GetSchemaName(),
-      copy_stmt->cpy_table->GetTableName(), txn);
+      copy_stmt->table->GetDatabaseName(),
+      copy_stmt->table->GetSchemaName(),
+      copy_stmt->table->GetTableName(), txn);
   txn_manager.CommitTransaction(txn);
 
   std::unique_ptr<planner::SeqScanPlan> select_plan(
diff --git a/src/parser/copy_statement.cpp b/src/parser/copy_statement.cpp
index b39fcbc8782..e4c5cd3d621 100644
--- a/src/parser/copy_statement.cpp
+++ b/src/parser/copy_statement.cpp
@@ -20,7 +20,7 @@ const std::string CopyStatement::GetInfo(int num_indent) const {
   os << StringUtil::Indent(num_indent) << "CopyStatement\n";
   os << StringUtil::Indent(num_indent + 1)
      << "-> Type :: " << CopyTypeToString(type) << "\n";
-  os << cpy_table.get()->GetInfo(num_indent + 1) << std::endl;
+  os << table.get()->GetInfo(num_indent + 1) << std::endl;
 
   os << StringUtil::Indent(num_indent + 1) << "-> File Path :: " << file_path
      << std::endl;
diff --git a/src/parser/postgresparser.cpp b/src/parser/postgresparser.cpp
index 797b77406b5..ffbea10e39d 100644
--- a/src/parser/postgresparser.cpp
+++ b/src/parser/postgresparser.cpp
@@ -1505,19 +1505,41 @@ parser::PrepareStatement *PostgresParser::PrepareTransform(PrepareStmt *root) {
   return result;
 }
 
-// TODO: Only support COPY TABLE TO FILE and DELIMITER option
 parser::CopyStatement *PostgresParser::CopyTransform(CopyStmt *root) {
-  auto result = new CopyStatement(peloton::CopyType::EXPORT_OTHER);
-  result->cpy_table.reset(RangeVarTransform(root->relation));
-  result->file_path = root->filename;
-  for (auto cell = root->options->head; cell != NULL; cell = cell->next) {
-    auto def_elem = reinterpret_cast<DefElem *>(cell->data.ptr_value);
-    if (strcmp(def_elem->defname, "delimiter") == 0) {
-      auto delimiter = reinterpret_cast<value *>(def_elem->arg)->val.str;
-      result->delimiter = *delimiter;
-      break;
+  static constexpr char kDelimiterTok[] = "delimiter";
+  static constexpr char kFormatTok[] = "format";
+
+  // The main return value
+  auto *result = new CopyStatement();
+
+  if (root->relation) {
+    result->table.reset(RangeVarTransform(root->relation));
+  } else {
+    result->select_stmt.reset(
+        SelectTransform(reinterpret_cast<SelectStmt *>(root->query)));
+  }
+
+  result->file_path = (root->filename != nullptr ? root->filename : "");
+  result->is_from = root->is_from;
+
+  // Handle options
+  ListCell *cell = nullptr;
+  for_each_cell(cell, root->options->head) {
+    auto *def_elem = reinterpret_cast<DefElem *>(cell->data.ptr_value);
+
+    // Check delimiter
+    if (strncmp(def_elem->defname, kDelimiterTok, sizeof(kDelimiterTok)) == 0) {
+      auto *delimiter_val = reinterpret_cast<value *>(def_elem->arg);
+      result->delimiter = *delimiter_val->val.str;
+    }
+
+    // Check format
+    if (strncmp(def_elem->defname, kFormatTok, sizeof(kFormatTok)) == 0) {
+      auto *format_val = reinterpret_cast<value *>(def_elem->arg);
+      result->format = StringToExternalFileFormat(format_val->val.str);
     }
   }
+
   return result;
 }
 
diff --git a/test/parser/postgresparser_test.cpp b/test/parser/postgresparser_test.cpp
index 36910bdc9a9..dee0d981491 100644
--- a/test/parser/postgresparser_test.cpp
+++ b/test/parser/postgresparser_test.cpp
@@ -17,6 +17,7 @@
 #include "common/internal_types.h"
 #include "common/logger.h"
 #include "common/macros.h"
+#include "expression/constant_value_expression.h"
 #include "expression/function_expression.h"
 #include "expression/operator_expression.h"
 #include "expression/tuple_value_expression.h"
diff --git a/test/trigger/trigger_test.cpp b/test/trigger/trigger_test.cpp
index 4dacd00cc1d..7238d7f9e6d 100644
--- a/test/trigger/trigger_test.cpp
+++ b/test/trigger/trigger_test.cpp
@@ -10,17 +10,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "trigger/trigger.h"
 #include "catalog/catalog.h"
 #include "common/harness.h"
 #include "concurrency/transaction_manager_factory.h"
 #include "executor/executors.h"
 #include "executor/executor_context.h"
+#include "expression/constant_value_expression.h"
 #include "parser/pg_trigger.h"
 #include "parser/postgresparser.h"
 #include "planner/create_plan.h"
 #include "planner/insert_plan.h"
 #include "storage/abstract_table.h"
+#include "trigger/trigger.h"
 
 namespace peloton {
 namespace test {

From ab08eba35db21cdf9eb688791d1d2f912d6b59a0 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Thu, 26 Apr 2018 01:55:20 -0400
Subject: [PATCH 02/42] Add CSVScan node to ToString and FromString. Removed
 BINARY external format for now.

---
 src/common/internal_types.cpp       | 11 ++++++-----
 src/include/common/internal_types.h |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/common/internal_types.cpp b/src/common/internal_types.cpp
index 1ab2ed393b3..427e9848e25 100644
--- a/src/common/internal_types.cpp
+++ b/src/common/internal_types.cpp
@@ -1304,6 +1304,9 @@ std::string PlanNodeTypeToString(PlanNodeType type) {
     case PlanNodeType::INDEXSCAN: {
       return ("INDEXSCAN");
     }
+    case PlanNodeType::CSVSCAN: {
+      return ("CSVSCAN");
+    }
     case PlanNodeType::NESTLOOP: {
       return ("NESTLOOP");
     }
@@ -1408,6 +1411,8 @@ PlanNodeType StringToPlanNodeType(const std::string &str) {
     return PlanNodeType::SEQSCAN;
   } else if (upper_str == "INDEXSCAN") {
     return PlanNodeType::INDEXSCAN;
+  } else if (upper_str == "CSVSCAN") {
+    return PlanNodeType::CSVSCAN;
   } else if (upper_str == "NESTLOOP") {
     return PlanNodeType::NESTLOOP;
   } else if (upper_str == "NESTLOOPINDEX") {
@@ -1884,10 +1889,8 @@ std::ostream &operator<<(std::ostream &os, const CopyType &type) {
 std::string ExternalFileFormatToString(ExternalFileFormat format) {
   switch (format) {
     case ExternalFileFormat::CSV:
-      return "CSV";
-    case ExternalFileFormat::BINARY:
     default:
-      return "BINARY";
+      return "CSV";
   }
 }
 
@@ -1895,8 +1898,6 @@ ExternalFileFormat StringToExternalFileFormat(const std::string &str) {
   auto upper = StringUtil::Upper(str);
   if (upper == "CSV") {
     return ExternalFileFormat::CSV;
-  } else if (upper == "BINARY") {
-    return ExternalFileFormat::BINARY;
   }
   throw ConversionException(StringUtil::Format(
       "No ExternalFileFormat for input '%s'", upper.c_str()));
diff --git a/src/include/common/internal_types.h b/src/include/common/internal_types.h
index 6c32b9665d0..4654ec9bc77 100644
--- a/src/include/common/internal_types.h
+++ b/src/include/common/internal_types.h
@@ -820,7 +820,6 @@ std::ostream &operator<<(std::ostream &os, const CopyType &type);
 
 enum class ExternalFileFormat {
   CSV,
-  BINARY
 };
 std::string ExternalFileFormatToString(ExternalFileFormat format);
 ExternalFileFormat StringToExternalFileFormat(const std::string &str);
@@ -1345,6 +1344,7 @@ enum class RuleType : uint32_t {
   GET_TO_SEQ_SCAN,
   GET_TO_INDEX_SCAN,
   QUERY_DERIVED_GET_TO_PHYSICAL,
+  EXTERNAL_FILE_GET_TO_PHYSICAL,
   DELETE_TO_PHYSICAL,
   UPDATE_TO_PHYSICAL,
   INSERT_TO_PHYSICAL,

From 2c681f05467d10d7c0ee7f05ad520d6cf91d4b12 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Thu, 26 Apr 2018 02:01:58 -0400
Subject: [PATCH 03/42] Move COPY from DDL to DML processing. COPY now goes
 through planner/optimization.

---
 src/executor/copy_executor.cpp                | 32 ----------
 src/include/executor/copy_executor.h          |  3 -
 .../optimizer/child_property_deriver.h        |  1 +
 src/include/optimizer/cost_calculator.h       |  1 +
 src/include/optimizer/operator_node.h         |  3 +
 src/include/optimizer/operator_visitor.h      |  3 +
 src/include/optimizer/operators.h             | 52 +++++++++++++--
 src/include/optimizer/optimizer.h             | 41 ++++++++----
 src/include/optimizer/plan_generator.h        |  2 +
 src/include/optimizer/rule_impls.h            | 12 ++++
 src/include/optimizer/util.h                  |  6 --
 src/include/planner/copy_plan.h               | 10 +--
 src/include/planner/csv_scan_plan.h           | 46 +++++++++++++
 src/optimizer/child_property_deriver.cpp      |  6 ++
 src/optimizer/cost_calculator.cpp             |  5 ++
 src/optimizer/operators.cpp                   | 64 +++++++++++++++++++
 src/optimizer/optimizer.cpp                   |  7 --
 src/optimizer/plan_generator.cpp              |  5 ++
 .../query_to_operator_transformer.cpp         | 30 ++++++++-
 src/optimizer/rule.cpp                        |  1 +
 src/optimizer/rule_impls.cpp                  | 25 ++++++++
 src/optimizer/util.cpp                        | 33 ----------
 22 files changed, 277 insertions(+), 111 deletions(-)
 create mode 100644 src/include/planner/csv_scan_plan.h

diff --git a/src/executor/copy_executor.cpp b/src/executor/copy_executor.cpp
index ce16d8c83eb..e55d665bc6c 100644
--- a/src/executor/copy_executor.cpp
+++ b/src/executor/copy_executor.cpp
@@ -56,11 +56,6 @@ bool CopyExecutor::DInit() {
     return false;
   }
   LOG_DEBUG("Created target copy output file: %s", node.file_path.c_str());
-
-  // Whether we're copying the parameters which require deserialization
-  if (node.deserialize_parameters) {
-    InitParamColIds();
-  }
   return true;
 }
 
@@ -122,33 +117,6 @@ void CopyExecutor::FFlushFsync() {
   }
 }
 
-void CopyExecutor::InitParamColIds() {
-  // If we're going to deserialize prepared statement, get the column ids for
-  // the varbinary columns first
-  // auto catalog = catalog::Catalog::GetInstance();
-  // try {
-  //   auto query_metric_table =
-  //       catalog->GetTableWithName(CATALOG_DATABASE_NAME, QUERY_METRIC_NAME);
-  //   auto schema = query_metric_table->GetSchema();
-  //   auto &cols = schema->GetColumns();
-  //   for (unsigned int i = 0; i < cols.size(); i++) {
-  //     auto col_name = cols[i].column_name.c_str();
-  //     if (std::strcmp(col_name, QUERY_PARAM_TYPE_COL_NAME) == 0) {
-  //       param_type_col_id = i;
-  //     } else if (std::strcmp(col_name, QUERY_PARAM_FORMAT_COL_NAME) == 0) {
-  //       param_format_col_id = i;
-  //     } else if (std::strcmp(col_name, QUERY_PARAM_VAL_COL_NAME) == 0) {
-  //       param_val_col_id = i;
-  //     } else if (std::strcmp(col_name, QUERY_NUM_PARAM_COL_NAME) == 0) {
-  //       num_param_col_id = i;
-  //     }
-  //   }
-  // }
-  // catch (Exception &e) {
-  //   e.PrintStackTrace();
-  // }
-}
-
 void CopyExecutor::Copy(const char *data, int len, bool end_of_line) {
   // Worst case we need to escape all character and two delimiters
   while (COPY_BUFFER_SIZE - buff_size - buff_ptr < (size_t)len * 3) {
diff --git a/src/include/executor/copy_executor.h b/src/include/executor/copy_executor.h
index 31d65adaa1b..a95b6c49e86 100644
--- a/src/include/executor/copy_executor.h
+++ b/src/include/executor/copy_executor.h
@@ -40,9 +40,6 @@ class CopyExecutor : public AbstractExecutor {
 
   bool DExecute();
 
-  // Initialize the column ids for query parameters
-  void InitParamColIds();
-
   bool InitFileHandle(const char *name, const char *mode);
 
   // Flush the local buffer
diff --git a/src/include/optimizer/child_property_deriver.h b/src/include/optimizer/child_property_deriver.h
index bd4aeb7b933..dd887ff9af3 100644
--- a/src/include/optimizer/child_property_deriver.h
+++ b/src/include/optimizer/child_property_deriver.h
@@ -39,6 +39,7 @@ class ChildPropertyDeriver : public OperatorVisitor {
   void Visit(const DummyScan *) override;
   void Visit(const PhysicalSeqScan *) override;
   void Visit(const PhysicalIndexScan *) override;
+  void Visit(const ExternalFileScan *) override;
   void Visit(const QueryDerivedScan *op) override;
   void Visit(const PhysicalOrderBy *) override;
   void Visit(const PhysicalLimit *) override;
diff --git a/src/include/optimizer/cost_calculator.h b/src/include/optimizer/cost_calculator.h
index 442f386fc5f..8ef40330d6b 100644
--- a/src/include/optimizer/cost_calculator.h
+++ b/src/include/optimizer/cost_calculator.h
@@ -27,6 +27,7 @@ class CostCalculator : public OperatorVisitor {
   void Visit(const DummyScan *) override;
   void Visit(const PhysicalSeqScan *) override;
   void Visit(const PhysicalIndexScan *) override;
+  void Visit(const ExternalFileScan *) override;
   void Visit(const QueryDerivedScan *) override;
   void Visit(const PhysicalOrderBy *) override;
   void Visit(const PhysicalLimit *) override;
diff --git a/src/include/optimizer/operator_node.h b/src/include/optimizer/operator_node.h
index cb20c163bbe..bfc0653518d 100644
--- a/src/include/optimizer/operator_node.h
+++ b/src/include/optimizer/operator_node.h
@@ -27,6 +27,7 @@ enum class OpType {
   Leaf,
   // Logical ops
   Get,
+  LogicalExternalFileGet,
   LogicalQueryDerivedGet,
   LogicalProjection,
   LogicalFilter,
@@ -45,12 +46,14 @@ enum class OpType {
   LogicalUpdate,
   LogicalLimit,
   LogicalDistinct,
+  LogicalExportExternalFile,
   // Separate between logical and physical ops
   LogicalPhysicalDelimiter,
   // Physical ops
   DummyScan, /* Dummy Physical Op for SELECT without FROM*/
   SeqScan,
   IndexScan,
+  ExternalFileScan,
   QueryDerivedScan,
   OrderBy,
   PhysicalLimit,
diff --git a/src/include/optimizer/operator_visitor.h b/src/include/optimizer/operator_visitor.h
index 75b0a9f9c67..50fd98fa024 100644
--- a/src/include/optimizer/operator_visitor.h
+++ b/src/include/optimizer/operator_visitor.h
@@ -29,6 +29,7 @@ class OperatorVisitor {
   virtual void Visit(const DummyScan *) {}
   virtual void Visit(const PhysicalSeqScan *) {}
   virtual void Visit(const PhysicalIndexScan *) {}
+  virtual void Visit(const ExternalFileScan *) {}
   virtual void Visit(const QueryDerivedScan *) {}
   virtual void Visit(const PhysicalOrderBy *) {}
   virtual void Visit(const PhysicalLimit *) {}
@@ -52,6 +53,7 @@ class OperatorVisitor {
   // Logical operator
   virtual void Visit(const LeafOperator *) {}
   virtual void Visit(const LogicalGet *) {}
+  virtual void Visit(const LogicalExternalFileGet *) {}
   virtual void Visit(const LogicalQueryDerivedGet *) {}
   virtual void Visit(const LogicalFilter *) {}
   virtual void Visit(const LogicalProjection *) {}
@@ -70,6 +72,7 @@ class OperatorVisitor {
   virtual void Visit(const LogicalUpdate *) {}
   virtual void Visit(const LogicalDistinct *) {}
   virtual void Visit(const LogicalLimit *) {}
+  virtual void Visit(const LogicalExportExternalFile *) {}
 };
 
 }  // namespace optimizer
diff --git a/src/include/optimizer/operators.h b/src/include/optimizer/operators.h
index a745439251a..7e27240973a 100644
--- a/src/include/optimizer/operators.h
+++ b/src/include/optimizer/operators.h
@@ -1,4 +1,3 @@
-
 //===----------------------------------------------------------------------===//
 //
 //                         Peloton
@@ -7,7 +6,7 @@
 //
 // Identification: src/include/optimizer/operators.h
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,7 +30,7 @@ class UpdateClause;
 }
 
 namespace catalog {
-  class TableCatalogObject;
+class TableCatalogObject;
 }
 
 namespace optimizer {
@@ -51,10 +50,10 @@ class LeafOperator : OperatorNode<LeafOperator> {
 //===--------------------------------------------------------------------===//
 class LogicalGet : public OperatorNode<LogicalGet> {
  public:
-  static Operator make(oid_t get_id = 0,
-                       std::vector<AnnotatedExpression> predicates = {},
-                       std::shared_ptr<catalog::TableCatalogObject> table = nullptr,
-                       std::string alias = "", bool update = false);
+  static Operator make(
+      oid_t get_id = 0, std::vector<AnnotatedExpression> predicates = {},
+      std::shared_ptr<catalog::TableCatalogObject> table = nullptr,
+      std::string alias = "", bool update = false);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -68,6 +67,21 @@ class LogicalGet : public OperatorNode<LogicalGet> {
   bool is_for_update;
 };
 
+//===--------------------------------------------------------------------===//
+// External file get
+//===--------------------------------------------------------------------===//
+class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
+ public:
+  static Operator make(oid_t get_id);
+
+  bool operator==(const BaseOperatorNode &r) override;
+
+  hash_t Hash() const override;
+
+  // identifier for all get operators
+  oid_t get_id;
+};
+
 //===--------------------------------------------------------------------===//
 // Query derived get
 //===--------------------------------------------------------------------===//
@@ -304,6 +318,15 @@ class LogicalUpdate : public OperatorNode<LogicalUpdate> {
   const std::vector<std::unique_ptr<parser::UpdateClause>> *updates;
 };
 
+//===--------------------------------------------------------------------===//
+// External file get
+//===--------------------------------------------------------------------===//
+class LogicalExportExternalFile
+    : public OperatorNode<LogicalExportExternalFile> {
+ public:
+  static Operator make();
+};
+
 //===--------------------------------------------------------------------===//
 // DummyScan
 //===--------------------------------------------------------------------===//
@@ -366,6 +389,21 @@ class PhysicalIndexScan : public OperatorNode<PhysicalIndexScan> {
   std::vector<type::Value> value_list;
 };
 
+//===--------------------------------------------------------------------===//
+// Physical external file scan
+//===--------------------------------------------------------------------===//
+class ExternalFileScan : public OperatorNode<ExternalFileScan> {
+ public:
+  static Operator make(oid_t get_id);
+
+  bool operator==(const BaseOperatorNode &r) override;
+
+  hash_t Hash() const override;
+
+  // identifier for all get operators
+  oid_t get_id;
+};
+
 //===--------------------------------------------------------------------===//
 // Query derived get
 //===--------------------------------------------------------------------===//
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 82b1d4c9a05..71d7afca265 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -38,9 +38,9 @@ class TransactionContext;
 }
 
 namespace test {
-  class OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
-  class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
-} 
+class OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
+class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
+}
 
 namespace optimizer {
 
@@ -60,8 +60,10 @@ class Optimizer : public AbstractOptimizer {
   friend class BindingIterator;
   friend class GroupBindingIterator;
 
-  friend class ::peloton::test::OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
-  friend class ::peloton::test::OptimizerRuleTests_SimpleAssociativeRuleTest2_Test; 
+  friend class ::peloton::test::
+      OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
+  friend class ::peloton::test::
+      OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
 
  public:
   Optimizer(const Optimizer &) = delete;
@@ -83,28 +85,41 @@ class Optimizer : public AbstractOptimizer {
   OptimizerMetadata &GetMetadata() { return metadata_; }
 
   /* For test purposes only */
-  std::shared_ptr<GroupExpression> TestInsertQueryTree(parser::SQLStatement *tree,
-  concurrency::TransactionContext *txn) {
+  std::shared_ptr<GroupExpression> TestInsertQueryTree(
+      parser::SQLStatement *tree, concurrency::TransactionContext *txn) {
     return InsertQueryTree(tree, txn);
   }
   /* For test purposes only */
   void TestExecuteTaskStack(OptimizerTaskStack &task_stack, int root_group_id,
-                        std::shared_ptr<OptimizeContext> root_context) {
+                            std::shared_ptr<OptimizeContext> root_context) {
     return ExecuteTaskStack(task_stack, root_group_id, root_context);
   }
 
  private:
-  /* HandleDDLStatement - Check and handle DDL statment (currently only support
-   *CREATE), set
-   * is_ddl_stmt to false if there is no DDL statement.
+  /**
+   * Check and handle the provided DDL statement, returning the resulting plan
+   * if parsed tree is a DDL statement. The is_ddl_stmt parameter is set to
+   * indicate if the parse tree was indeed a DDL statement.
    *
-   * tree: a peloton query tree representing a select query
-   * return: the DDL plan if it is a DDL statement
+   * @param tree A parsed SQL statement
+   * @param[out] is_ddl_stmt Set to true if the SQL statement is DDL
+   * @param txn The transactional context
+   * @return The constructed plan tree representing the DDL statement
    */
   std::unique_ptr<planner::AbstractPlan> HandleDDLStatement(
       parser::SQLStatement *tree, bool &is_ddl_stmt,
       concurrency::TransactionContext *txn);
 
+  /**
+   * Construct a plan object for the given parsed copy statement.
+   *
+   * @param copy_stmt The copy statement we're transforming
+   * @param txn The transactional context
+   * @return The construct plan object for the COPY statement
+   */
+  std::unique_ptr<planner::AbstractPlan> HandleDDLCopyStatement(
+      parser::CopyStatement *copy_stmt, concurrency::TransactionContext *txn);
+
   /* TransformQueryTree - create an initial operator tree for the given query
    * to be used in performing optimization.
    *
diff --git a/src/include/optimizer/plan_generator.h b/src/include/optimizer/plan_generator.h
index c0a21259bc6..353de6db29f 100644
--- a/src/include/optimizer/plan_generator.h
+++ b/src/include/optimizer/plan_generator.h
@@ -54,6 +54,8 @@ class PlanGenerator : public OperatorVisitor {
 
   void Visit(const PhysicalIndexScan *) override;
 
+  void Visit(const ExternalFileScan *) override;
+
   void Visit(const QueryDerivedScan *) override;
 
   void Visit(const PhysicalOrderBy *) override;
diff --git a/src/include/optimizer/rule_impls.h b/src/include/optimizer/rule_impls.h
index 2c40e3f3c81..5ace068138d 100644
--- a/src/include/optimizer/rule_impls.h
+++ b/src/include/optimizer/rule_impls.h
@@ -73,6 +73,18 @@ class GetToSeqScan : public Rule {
                  OptimizeContext *context) const override;
 };
 
+class LogicalExternalFileGetToPhysical : public Rule {
+ public:
+  LogicalExternalFileGetToPhysical();
+
+  bool Check(std::shared_ptr<OperatorExpression> plan,
+             OptimizeContext *context) const override;
+
+  void Transform(std::shared_ptr<OperatorExpression> input,
+                 std::vector<std::shared_ptr<OperatorExpression>> &transformed,
+                 OptimizeContext *context) const override;
+};
+
 /**
  * @brief Generate dummy scan for queries like "SELECT 1", there's no actual
  * table to generate
diff --git a/src/include/optimizer/util.h b/src/include/optimizer/util.h
index 8b9eb4baeef..877bdee3b96 100644
--- a/src/include/optimizer/util.h
+++ b/src/include/optimizer/util.h
@@ -122,12 +122,6 @@ bool ContainsJoinColumns(const std::unordered_set<std::string> &l_group_alias,
                          const std::unordered_set<std::string> &r_group_alias,
                          const expression::AbstractExpression *expr);
 
-/**
- * @brief Create a copy plan based on the copy statement
- */
-std::unique_ptr<planner::AbstractPlan> CreateCopyPlan(
-    parser::CopyStatement *copy_stmt);
-
 /**
  * @brief Construct the map from subquery column name to the actual expression
  *  at the subquery level, for example SELECT a FROM (SELECT a + b as a FROM
diff --git a/src/include/planner/copy_plan.h b/src/include/planner/copy_plan.h
index fcb991b1666..082598d10af 100644
--- a/src/include/planner/copy_plan.h
+++ b/src/include/planner/copy_plan.h
@@ -24,12 +24,9 @@ namespace planner {
 
 class CopyPlan : public AbstractPlan {
  public:
-  explicit CopyPlan(std::string file_path, bool deserialize_parameters)
-      : file_path(file_path), deserialize_parameters(deserialize_parameters) {
-    LOG_DEBUG("Creating a Copy Plan");
-  }
+  explicit CopyPlan(std::string file_path) : file_path(std::move(file_path)) {}
 
-  PlanNodeType GetPlanNodeType() const override  { return PlanNodeType::COPY; }
+  PlanNodeType GetPlanNodeType() const override { return PlanNodeType::COPY; }
 
   const std::string GetInfo() const override { return "CopyPlan"; }
 
@@ -39,9 +36,6 @@ class CopyPlan : public AbstractPlan {
   // The path of the target file
   std::string file_path;
 
-  // Whether the copying requires deserialization of parameters
-  bool deserialize_parameters = false;
-
  private:
   DISALLOW_COPY_AND_MOVE(CopyPlan);
 };
diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
new file mode 100644
index 00000000000..a58cc87b0f5
--- /dev/null
+++ b/src/include/planner/csv_scan_plan.h
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scan_plan.h
+//
+// Identification: src/include/planner/csv_scan_plan.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "planner/abstract_plan.h"
+
+namespace peloton {
+namespace planner {
+
+class CSVScanPlan : public AbstractPlan {
+ public:
+  CSVScanPlan(const std::string file_name) : file_name_(std::move(file_name)) {}
+
+  PlanNodeType GetPlanNodeType() const override {
+    return PlanNodeType::CSVSCAN;
+  }
+
+  std::unique_ptr<AbstractPlan> Copy() const override;
+
+ private:
+  const std::string file_name_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Implementation below
+///
+////////////////////////////////////////////////////////////////////////////////
+
+inline std::unique_ptr<AbstractPlan> CSVScanPlan::Copy() const {
+  // TODO
+  return std::unique_ptr<AbstractPlan>();
+}
+
+}  // namespace planner
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp
index 1df06b3ea50..5020302b614 100644
--- a/src/optimizer/child_property_deriver.cpp
+++ b/src/optimizer/child_property_deriver.cpp
@@ -94,6 +94,12 @@ void ChildPropertyDeriver::Visit(const PhysicalIndexScan *op) {
       make_pair(provided_prop, vector<shared_ptr<PropertySet>>{}));
 }
 
+void ChildPropertyDeriver::Visit(const ExternalFileScan *) {
+  // External file scans (like sequential scans) do not provide properties
+  output_.push_back(
+      make_pair(make_shared<PropertySet>(), vector<shared_ptr<PropertySet>>{}));
+}
+
 void ChildPropertyDeriver::Visit(const QueryDerivedScan *) {
   output_.push_back(
       make_pair(requirements_, vector<shared_ptr<PropertySet>>{requirements_}));
diff --git a/src/optimizer/cost_calculator.cpp b/src/optimizer/cost_calculator.cpp
index 5dda9e67c8a..56cbbecc64e 100644
--- a/src/optimizer/cost_calculator.cpp
+++ b/src/optimizer/cost_calculator.cpp
@@ -59,6 +59,11 @@ void CostCalculator::Visit(UNUSED_ATTRIBUTE const PhysicalIndexScan *op) {
                  memo_->GetGroupByID(gexpr_->GetGroupID())->GetNumRows() *
                      DEFAULT_TUPLE_COST;
 }
+
+void CostCalculator::Visit(UNUSED_ATTRIBUTE const ExternalFileScan *) {
+  output_cost_ = 0.0;
+}
+
 void CostCalculator::Visit(UNUSED_ATTRIBUTE const QueryDerivedScan *op) {
   output_cost_ = 0.f;
 }
diff --git a/src/optimizer/operators.cpp b/src/optimizer/operators.cpp
index 78c34d16257..60e074556a3 100644
--- a/src/optimizer/operators.cpp
+++ b/src/optimizer/operators.cpp
@@ -61,6 +61,28 @@ bool LogicalGet::operator==(const BaseOperatorNode &r) {
   return get_id == node.get_id;
 }
 
+//===--------------------------------------------------------------------===//
+// External file get
+//===--------------------------------------------------------------------===//
+
+Operator LogicalExternalFileGet::make(oid_t get_id) {
+  auto *get = new LogicalExternalFileGet();
+  get->get_id = get_id;
+  return Operator(get);
+}
+
+bool LogicalExternalFileGet::operator==(const BaseOperatorNode &node) {
+  if (node.GetType() != OpType::LogicalExternalFileGet) return false;
+  const auto &get = *static_cast<const LogicalQueryDerivedGet *>(&node);
+  return get_id == get.get_id;
+}
+
+hash_t LogicalExternalFileGet::Hash() const {
+  hash_t hash = BaseOperatorNode::Hash();
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&get_id));
+  return hash;
+}
+
 //===--------------------------------------------------------------------===//
 // Query derived get
 //===--------------------------------------------------------------------===//
@@ -411,6 +433,14 @@ Operator LogicalLimit::make(int64_t offset, int64_t limit) {
   return Operator(limit_op);
 }
 
+//===--------------------------------------------------------------------===//
+// External file output
+//===--------------------------------------------------------------------===//
+Operator LogicalExportExternalFile::make() {
+  auto *export_op = new LogicalExternalFileGet();
+  return Operator(export_op);
+}
+
 //===--------------------------------------------------------------------===//
 // DummyScan
 //===--------------------------------------------------------------------===//
@@ -506,6 +536,27 @@ hash_t PhysicalIndexScan::Hash() const {
   return hash;
 }
 
+//===--------------------------------------------------------------------===//
+// Physical external file scan
+//===--------------------------------------------------------------------===//
+Operator ExternalFileScan::make(oid_t get_id) {
+  auto *get = new ExternalFileScan();
+  get->get_id = get_id;
+  return Operator(get);
+}
+
+bool ExternalFileScan::operator==(const BaseOperatorNode &node) {
+  if (node.GetType() != OpType::QueryDerivedScan) return false;
+  const auto &get = *static_cast<const ExternalFileScan *>(&node);
+  return get_id == get.get_id;
+}
+
+hash_t ExternalFileScan::Hash() const {
+  hash_t hash = BaseOperatorNode::Hash();
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&get_id));
+  return hash;
+}
+
 //===--------------------------------------------------------------------===//
 // Query derived get
 //===--------------------------------------------------------------------===//
@@ -846,6 +897,8 @@ std::string OperatorNode<LeafOperator>::name_ = "LeafOperator";
 template <>
 std::string OperatorNode<LogicalGet>::name_ = "LogicalGet";
 template <>
+std::string OperatorNode<LogicalExternalFileGet>::name_ = "LogicalExternalFileGet";
+template <>
 std::string OperatorNode<LogicalQueryDerivedGet>::name_ =
     "LogicalQueryDerivedGet";
 template <>
@@ -884,12 +937,16 @@ std::string OperatorNode<LogicalLimit>::name_ = "LogicalLimit";
 template <>
 std::string OperatorNode<LogicalDistinct>::name_ = "LogicalDistinct";
 template <>
+std::string OperatorNode<LogicalExportExternalFile>::name_ = "LogicalExportExternalFile";
+template <>
 std::string OperatorNode<DummyScan>::name_ = "DummyScan";
 template <>
 std::string OperatorNode<PhysicalSeqScan>::name_ = "PhysicalSeqScan";
 template <>
 std::string OperatorNode<PhysicalIndexScan>::name_ = "PhysicalIndexScan";
 template <>
+std::string OperatorNode<ExternalFileScan>::name_ = "ExternalFileScan";
+template <>
 std::string OperatorNode<QueryDerivedScan>::name_ = "QueryDerivedScan";
 template <>
 std::string OperatorNode<PhysicalOrderBy>::name_ = "PhysicalOrderBy";
@@ -937,6 +994,8 @@ OpType OperatorNode<LeafOperator>::type_ = OpType::Leaf;
 template <>
 OpType OperatorNode<LogicalGet>::type_ = OpType::Get;
 template <>
+OpType OperatorNode<LogicalExternalFileGet>::type_ = OpType::LogicalExternalFileGet;
+template <>
 OpType OperatorNode<LogicalQueryDerivedGet>::type_ =
     OpType::LogicalQueryDerivedGet;
 template <>
@@ -974,6 +1033,9 @@ template <>
 OpType OperatorNode<LogicalDistinct>::type_ = OpType::LogicalDistinct;
 template <>
 OpType OperatorNode<LogicalLimit>::type_ = OpType::LogicalLimit;
+template <>
+OpType OperatorNode<LogicalExportExternalFile>::type_ = OpType::LogicalExportExternalFile;
+
 template <>
 OpType OperatorNode<DummyScan>::type_ = OpType::DummyScan;
 template <>
@@ -981,6 +1043,8 @@ OpType OperatorNode<PhysicalSeqScan>::type_ = OpType::SeqScan;
 template <>
 OpType OperatorNode<PhysicalIndexScan>::type_ = OpType::IndexScan;
 template <>
+OpType OperatorNode<ExternalFileScan>::type_ = OpType::ExternalFileScan;
+template <>
 OpType OperatorNode<QueryDerivedScan>::type_ = OpType::QueryDerivedScan;
 template <>
 OpType OperatorNode<PhysicalOrderBy>::type_ = OpType::OrderBy;
diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp
index 62f813ec876..5722034ef7d 100644
--- a/src/optimizer/optimizer.cpp
+++ b/src/optimizer/optimizer.cpp
@@ -214,13 +214,6 @@ unique_ptr<planner::AbstractPlan> Optimizer::HandleDDLStatement(
       ddl_plan = move(analyze_plan);
       break;
     }
-    case StatementType::COPY: {
-      LOG_TRACE("Adding Copy plan...");
-      parser::CopyStatement *copy_parse_tree =
-          static_cast<parser::CopyStatement *>(tree);
-      ddl_plan = util::CreateCopyPlan(copy_parse_tree);
-      break;
-    }
     default:
       is_ddl_stmt = false;
   }
diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp
index a16b70c3878..b592e92fe4c 100644
--- a/src/optimizer/plan_generator.cpp
+++ b/src/optimizer/plan_generator.cpp
@@ -20,6 +20,7 @@
 #include "optimizer/operator_expression.h"
 #include "optimizer/properties.h"
 #include "planner/aggregate_plan.h"
+#include "planner/csv_scan_plan.h"
 #include "planner/delete_plan.h"
 #include "planner/hash_join_plan.h"
 #include "planner/hash_plan.h"
@@ -127,6 +128,10 @@ void PlanGenerator::Visit(const PhysicalIndexScan *op) {
       predicate.release(), column_ids, index_scan_desc, false));
 }
 
+void PlanGenerator::Visit(const ExternalFileScan *) {
+  output_plan_.reset(new planner::CSVScanPlan("sdfsdf"));
+}
+
 void PlanGenerator::Visit(const QueryDerivedScan *) {
   PELOTON_ASSERT(children_plans_.size() == 1);
   output_plan_ = move(children_plans_[0]);
diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp
index ff75140d5f5..b0f9b8f4446 100644
--- a/src/optimizer/query_to_operator_transformer.cpp
+++ b/src/optimizer/query_to_operator_transformer.cpp
@@ -359,8 +359,34 @@ void QueryToOperatorTransformer::Visit(parser::UpdateStatement *op) {
 
   output_expr_ = update_expr;
 }
-void QueryToOperatorTransformer::Visit(
-    UNUSED_ATTRIBUTE parser::CopyStatement *op) {}
+void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
+  if (op->is_from) {
+    auto get_op = std::make_shared<OperatorExpression>(
+        LogicalExternalFileGet::make(GetAndIncreaseGetId()));
+
+    auto target_table =
+        catalog::Catalog::GetInstance()
+            ->GetDatabaseObject(op->table->GetDatabaseName(), txn_)
+            ->GetTableObject(op->table->GetTableName());
+
+    auto insert_expr = std::make_shared<OperatorExpression>(
+        LogicalInsertSelect::make(target_table));
+
+    insert_expr->PushChild(get_op);
+    output_expr_ = insert_expr;
+  } else {
+    if (op->select_stmt != nullptr) {
+      op->select_stmt->Accept(this);
+    } else {
+      op->table->Accept(this);
+    }
+    auto export_op =
+        std::make_shared<OperatorExpression>(LogicalExportExternalFile::make());
+    export_op->PushChild(output_expr_);
+    output_expr_ = export_op;
+  }
+}
+
 void QueryToOperatorTransformer::Visit(
     UNUSED_ATTRIBUTE parser::AnalyzeStatement *op) {}
 
diff --git a/src/optimizer/rule.cpp b/src/optimizer/rule.cpp
index 1e81799147d..fc4bc837736 100644
--- a/src/optimizer/rule.cpp
+++ b/src/optimizer/rule.cpp
@@ -39,6 +39,7 @@ RuleSet::RuleSet() {
   AddImplementationRule(new GetToDummyScan());
   AddImplementationRule(new GetToSeqScan());
   AddImplementationRule(new GetToIndexScan());
+  AddImplementationRule(new LogicalExternalFileGetToPhysical());
   AddImplementationRule(new LogicalQueryDerivedGetToPhysical());
   AddImplementationRule(new InnerJoinToInnerNLJoin());
   AddImplementationRule(new InnerJoinToInnerHashJoin());
diff --git a/src/optimizer/rule_impls.cpp b/src/optimizer/rule_impls.cpp
index e540555c9e3..fcab5412621 100644
--- a/src/optimizer/rule_impls.cpp
+++ b/src/optimizer/rule_impls.cpp
@@ -440,6 +440,31 @@ void LogicalQueryDerivedGetToPhysical::Transform(
   transformed.push_back(result_plan);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// LogicalExternalFileGetToPhysical
+LogicalExternalFileGetToPhysical::LogicalExternalFileGetToPhysical() {
+  type_ = RuleType::EXTERNAL_FILE_GET_TO_PHYSICAL;
+  match_pattern = std::make_shared<Pattern>(OpType::LogicalExternalFileGet);
+}
+
+bool LogicalExternalFileGetToPhysical::Check(
+    UNUSED_ATTRIBUTE std::shared_ptr<OperatorExpression> plan,
+    UNUSED_ATTRIBUTE OptimizeContext *context) const {
+  return true;
+}
+
+void LogicalExternalFileGetToPhysical::Transform(
+    std::shared_ptr<OperatorExpression> input,
+    std::vector<std::shared_ptr<OperatorExpression>> &transformed,
+    UNUSED_ATTRIBUTE OptimizeContext *context) const {
+  const auto *get = input->Op().As<LogicalExternalFileGet>();
+
+  auto result_plan =
+      std::make_shared<OperatorExpression>(ExternalFileScan::make(get->get_id));
+  PELOTON_ASSERT(input->Children().empty());
+  transformed.push_back(result_plan);
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 /// LogicalDeleteToPhysical
 LogicalDeleteToPhysical::LogicalDeleteToPhysical() {
diff --git a/src/optimizer/util.cpp b/src/optimizer/util.cpp
index b0129484442..4ff60ee36c8 100644
--- a/src/optimizer/util.cpp
+++ b/src/optimizer/util.cpp
@@ -142,39 +142,6 @@ bool ContainsJoinColumns(const std::unordered_set<std::string> &l_group_alias,
   return false;
 }
 
-std::unique_ptr<planner::AbstractPlan> CreateCopyPlan(
-    parser::CopyStatement *copy_stmt) {
-  std::string table_name(copy_stmt->table->GetTableName());
-  bool deserialize_parameters = false;
-
-  // If we're copying the query metric table, then we need to handle the
-  // deserialization of prepared stmt parameters
-  if (table_name == QUERY_METRICS_CATALOG_NAME) {
-    LOG_DEBUG("Copying the query_metric table.");
-    deserialize_parameters = true;
-  }
-
-  std::unique_ptr<planner::AbstractPlan> copy_plan(
-      new planner::CopyPlan(copy_stmt->file_path, deserialize_parameters));
-
-  auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
-  auto txn = txn_manager.BeginTransaction();
-  auto target_table = catalog::Catalog::GetInstance()->GetTableWithName(
-      copy_stmt->table->GetDatabaseName(),
-      copy_stmt->table->GetSchemaName(),
-      copy_stmt->table->GetTableName(), txn);
-  txn_manager.CommitTransaction(txn);
-
-  std::unique_ptr<planner::SeqScanPlan> select_plan(
-      new planner::SeqScanPlan(target_table, nullptr, {}, false));
-
-  LOG_DEBUG("Sequential scan plan for copy created");
-
-  // Attach it to the copy plan
-  copy_plan->AddChild(std::move(select_plan));
-  return copy_plan;
-}
-
 std::unordered_map<std::string, std::shared_ptr<expression::AbstractExpression>>
 ConstructSelectElementMap(
     std::vector<std::unique_ptr<expression::AbstractExpression>> &select_list) {

From c749fe288adbcef136c7c07d06ccf7c985c42ab5 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Thu, 26 Apr 2018 02:19:13 -0400
Subject: [PATCH 04/42] Propagate external file information

---
 src/executor/plan_executor.cpp                | 12 ++---
 src/include/optimizer/operators.h             | 34 +++++++++----
 src/include/planner/csv_scan_plan.h           |  2 +-
 src/optimizer/operators.cpp                   | 48 +++++++++++++------
 src/optimizer/plan_generator.cpp              |  9 +++-
 .../query_to_operator_transformer.cpp         | 13 ++---
 src/optimizer/rule_impls.cpp                  | 21 ++++----
 7 files changed, 88 insertions(+), 51 deletions(-)

diff --git a/src/executor/plan_executor.cpp b/src/executor/plan_executor.cpp
index feca24cec2f..a01330b7b6d 100644
--- a/src/executor/plan_executor.cpp
+++ b/src/executor/plan_executor.cpp
@@ -170,9 +170,9 @@ void PlanExecutor::ExecutePlan(
   } catch (Exception &e) {
     ExecutionResult result;
     result.m_result = ResultType::FAILURE;
-    result.m_error_message = e.what();
-    LOG_ERROR("Error thrown during execution: %s",
-              result.m_error_message.c_str());
+    result.m_error_message =
+        StringUtil::Format("ERROR:  during execution ['%s']", e.what());
+    LOG_ERROR("Error during execution: %s", e.what());
     on_complete(result, {});
   }
 }
@@ -349,9 +349,9 @@ executor::AbstractExecutor *BuildExecutorTree(
       break;
 
     default:
-      LOG_ERROR("Unsupported plan node type : %s",
-                PlanNodeTypeToString(plan_node_type).c_str());
-      break;
+      throw NotImplementedException{
+          StringUtil::Format("Unsupported plan node type : %s",
+                             PlanNodeTypeToString(plan_node_type).c_str())};
   }
   LOG_TRACE("Adding %s Executor", PlanNodeTypeToString(plan_node_type).c_str());
 
diff --git a/src/include/optimizer/operators.h b/src/include/optimizer/operators.h
index 7e27240973a..8ec891c8131 100644
--- a/src/include/optimizer/operators.h
+++ b/src/include/optimizer/operators.h
@@ -72,7 +72,8 @@ class LogicalGet : public OperatorNode<LogicalGet> {
 //===--------------------------------------------------------------------===//
 class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
  public:
-  static Operator make(oid_t get_id);
+  static Operator make(oid_t get_id, ExternalFileFormat format,
+                       std::string file_name);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -80,6 +81,8 @@ class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
 
   // identifier for all get operators
   oid_t get_id;
+  ExternalFileFormat format;
+  std::string file_name;
 };
 
 //===--------------------------------------------------------------------===//
@@ -260,7 +263,8 @@ class LogicalAggregateAndGroupBy
 class LogicalInsert : public OperatorNode<LogicalInsert> {
  public:
   static Operator make(
-      std::shared_ptr<catalog::TableCatalogObject> target_table, const std::vector<std::string> *columns,
+      std::shared_ptr<catalog::TableCatalogObject> target_table,
+      const std::vector<std::string> *columns,
       const std::vector<std::vector<
           std::unique_ptr<expression::AbstractExpression>>> *values);
 
@@ -272,7 +276,8 @@ class LogicalInsert : public OperatorNode<LogicalInsert> {
 
 class LogicalInsertSelect : public OperatorNode<LogicalInsertSelect> {
  public:
-  static Operator make(std::shared_ptr<catalog::TableCatalogObject> target_table);
+  static Operator make(
+      std::shared_ptr<catalog::TableCatalogObject> target_table);
 
   std::shared_ptr<catalog::TableCatalogObject> target_table;
 };
@@ -300,7 +305,8 @@ class LogicalLimit : public OperatorNode<LogicalLimit> {
 //===--------------------------------------------------------------------===//
 class LogicalDelete : public OperatorNode<LogicalDelete> {
  public:
-  static Operator make(std::shared_ptr<catalog::TableCatalogObject> target_table);
+  static Operator make(
+      std::shared_ptr<catalog::TableCatalogObject> target_table);
 
   std::shared_ptr<catalog::TableCatalogObject> target_table;
 };
@@ -340,7 +346,8 @@ class DummyScan : public OperatorNode<DummyScan> {
 //===--------------------------------------------------------------------===//
 class PhysicalSeqScan : public OperatorNode<PhysicalSeqScan> {
  public:
-  static Operator make(oid_t get_id, std::shared_ptr<catalog::TableCatalogObject> table,
+  static Operator make(oid_t get_id,
+                       std::shared_ptr<catalog::TableCatalogObject> table,
                        std::string alias,
                        std::vector<AnnotatedExpression> predicates,
                        bool update);
@@ -362,7 +369,8 @@ class PhysicalSeqScan : public OperatorNode<PhysicalSeqScan> {
 //===--------------------------------------------------------------------===//
 class PhysicalIndexScan : public OperatorNode<PhysicalIndexScan> {
  public:
-  static Operator make(oid_t get_id, std::shared_ptr<catalog::TableCatalogObject> table,
+  static Operator make(oid_t get_id,
+                       std::shared_ptr<catalog::TableCatalogObject> table,
                        std::string alias,
                        std::vector<AnnotatedExpression> predicates, bool update,
                        oid_t index_id, std::vector<oid_t> key_column_id_list,
@@ -394,7 +402,8 @@ class PhysicalIndexScan : public OperatorNode<PhysicalIndexScan> {
 //===--------------------------------------------------------------------===//
 class ExternalFileScan : public OperatorNode<ExternalFileScan> {
  public:
-  static Operator make(oid_t get_id);
+  static Operator make(oid_t get_id, ExternalFileFormat format,
+                       std::string file_name);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -402,6 +411,8 @@ class ExternalFileScan : public OperatorNode<ExternalFileScan> {
 
   // identifier for all get operators
   oid_t get_id;
+  ExternalFileFormat format;
+  std::string file_name;
 };
 
 //===--------------------------------------------------------------------===//
@@ -551,7 +562,8 @@ class PhysicalOuterHashJoin : public OperatorNode<PhysicalOuterHashJoin> {
 class PhysicalInsert : public OperatorNode<PhysicalInsert> {
  public:
   static Operator make(
-      std::shared_ptr<catalog::TableCatalogObject> target_table, const std::vector<std::string> *columns,
+      std::shared_ptr<catalog::TableCatalogObject> target_table,
+      const std::vector<std::string> *columns,
       const std::vector<std::vector<
           std::unique_ptr<expression::AbstractExpression>>> *values);
 
@@ -563,7 +575,8 @@ class PhysicalInsert : public OperatorNode<PhysicalInsert> {
 
 class PhysicalInsertSelect : public OperatorNode<PhysicalInsertSelect> {
  public:
-  static Operator make(std::shared_ptr<catalog::TableCatalogObject> target_table);
+  static Operator make(
+      std::shared_ptr<catalog::TableCatalogObject> target_table);
 
   std::shared_ptr<catalog::TableCatalogObject> target_table;
 };
@@ -573,7 +586,8 @@ class PhysicalInsertSelect : public OperatorNode<PhysicalInsertSelect> {
 //===--------------------------------------------------------------------===//
 class PhysicalDelete : public OperatorNode<PhysicalDelete> {
  public:
-  static Operator make(std::shared_ptr<catalog::TableCatalogObject> target_table);
+  static Operator make(
+      std::shared_ptr<catalog::TableCatalogObject> target_table);
   std::shared_ptr<catalog::TableCatalogObject> target_table;
 };
 
diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
index a58cc87b0f5..e871b15ac02 100644
--- a/src/include/planner/csv_scan_plan.h
+++ b/src/include/planner/csv_scan_plan.h
@@ -19,7 +19,7 @@ namespace planner {
 
 class CSVScanPlan : public AbstractPlan {
  public:
-  CSVScanPlan(const std::string file_name) : file_name_(std::move(file_name)) {}
+  CSVScanPlan(std::string file_name) : file_name_(std::move(file_name)) {}
 
   PlanNodeType GetPlanNodeType() const override {
     return PlanNodeType::CSVSCAN;
diff --git a/src/optimizer/operators.cpp b/src/optimizer/operators.cpp
index 60e074556a3..c9fb133bc90 100644
--- a/src/optimizer/operators.cpp
+++ b/src/optimizer/operators.cpp
@@ -11,11 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "optimizer/operators.h"
+
 #include "optimizer/operator_visitor.h"
 #include "expression/expression_util.h"
 
 namespace peloton {
 namespace optimizer {
+
 //===--------------------------------------------------------------------===//
 // Leaf
 //===--------------------------------------------------------------------===//
@@ -51,7 +53,7 @@ hash_t LogicalGet::Hash() const {
 }
 
 bool LogicalGet::operator==(const BaseOperatorNode &r) {
-  if (r.GetType()!= OpType::Get) return false;
+  if (r.GetType() != OpType::Get) return false;
   const LogicalGet &node = *static_cast<const LogicalGet *>(&r);
   if (predicates.size() != node.predicates.size()) return false;
   for (size_t i = 0; i < predicates.size(); i++) {
@@ -65,21 +67,28 @@ bool LogicalGet::operator==(const BaseOperatorNode &r) {
 // External file get
 //===--------------------------------------------------------------------===//
 
-Operator LogicalExternalFileGet::make(oid_t get_id) {
+Operator LogicalExternalFileGet::make(oid_t get_id, ExternalFileFormat format,
+                                      std::string file_name) {
   auto *get = new LogicalExternalFileGet();
   get->get_id = get_id;
+  get->format = format;
+  get->file_name = std::move(file_name);
   return Operator(get);
 }
 
 bool LogicalExternalFileGet::operator==(const BaseOperatorNode &node) {
   if (node.GetType() != OpType::LogicalExternalFileGet) return false;
-  const auto &get = *static_cast<const LogicalQueryDerivedGet *>(&node);
-  return get_id == get.get_id;
+  const auto &get = *static_cast<const LogicalExternalFileGet *>(&node);
+  return (get_id == get.get_id && format == get.format &&
+          file_name == get.file_name);
 }
 
 hash_t LogicalExternalFileGet::Hash() const {
   hash_t hash = BaseOperatorNode::Hash();
   hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&get_id));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
+  hash = HashUtil::CombineHashes(
+      hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
   return hash;
 }
 
@@ -407,8 +416,8 @@ Operator LogicalDelete::make(
 //===--------------------------------------------------------------------===//
 Operator LogicalUpdate::make(
     std::shared_ptr<catalog::TableCatalogObject> target_table,
-    const std::vector<std::unique_ptr<peloton::parser::UpdateClause>>
-        *updates) {
+    const std::vector<std::unique_ptr<peloton::parser::UpdateClause>> *
+        updates) {
   LogicalUpdate *update_op = new LogicalUpdate;
   update_op->target_table = target_table;
   update_op->updates = updates;
@@ -539,21 +548,28 @@ hash_t PhysicalIndexScan::Hash() const {
 //===--------------------------------------------------------------------===//
 // Physical external file scan
 //===--------------------------------------------------------------------===//
-Operator ExternalFileScan::make(oid_t get_id) {
+Operator ExternalFileScan::make(oid_t get_id, ExternalFileFormat format,
+                                std::string file_name) {
   auto *get = new ExternalFileScan();
   get->get_id = get_id;
+  get->format = format;
+  get->file_name = file_name;
   return Operator(get);
 }
 
 bool ExternalFileScan::operator==(const BaseOperatorNode &node) {
   if (node.GetType() != OpType::QueryDerivedScan) return false;
   const auto &get = *static_cast<const ExternalFileScan *>(&node);
-  return get_id == get.get_id;
+  return (get_id == get.get_id && format == get.format &&
+          file_name == get.file_name);
 }
 
 hash_t ExternalFileScan::Hash() const {
   hash_t hash = BaseOperatorNode::Hash();
   hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&get_id));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
+  hash = HashUtil::CombineHashes(
+      hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
   return hash;
 }
 
@@ -799,8 +815,8 @@ Operator PhysicalDelete::make(
 //===--------------------------------------------------------------------===//
 Operator PhysicalUpdate::make(
     std::shared_ptr<catalog::TableCatalogObject> target_table,
-    const std::vector<std::unique_ptr<peloton::parser::UpdateClause>>
-        *updates) {
+    const std::vector<std::unique_ptr<peloton::parser::UpdateClause>> *
+        updates) {
   PhysicalUpdate *update = new PhysicalUpdate;
   update->target_table = target_table;
   update->updates = updates;
@@ -897,7 +913,8 @@ std::string OperatorNode<LeafOperator>::name_ = "LeafOperator";
 template <>
 std::string OperatorNode<LogicalGet>::name_ = "LogicalGet";
 template <>
-std::string OperatorNode<LogicalExternalFileGet>::name_ = "LogicalExternalFileGet";
+std::string OperatorNode<LogicalExternalFileGet>::name_ =
+    "LogicalExternalFileGet";
 template <>
 std::string OperatorNode<LogicalQueryDerivedGet>::name_ =
     "LogicalQueryDerivedGet";
@@ -937,7 +954,8 @@ std::string OperatorNode<LogicalLimit>::name_ = "LogicalLimit";
 template <>
 std::string OperatorNode<LogicalDistinct>::name_ = "LogicalDistinct";
 template <>
-std::string OperatorNode<LogicalExportExternalFile>::name_ = "LogicalExportExternalFile";
+std::string OperatorNode<LogicalExportExternalFile>::name_ =
+    "LogicalExportExternalFile";
 template <>
 std::string OperatorNode<DummyScan>::name_ = "DummyScan";
 template <>
@@ -994,7 +1012,8 @@ OpType OperatorNode<LeafOperator>::type_ = OpType::Leaf;
 template <>
 OpType OperatorNode<LogicalGet>::type_ = OpType::Get;
 template <>
-OpType OperatorNode<LogicalExternalFileGet>::type_ = OpType::LogicalExternalFileGet;
+OpType OperatorNode<LogicalExternalFileGet>::type_ =
+    OpType::LogicalExternalFileGet;
 template <>
 OpType OperatorNode<LogicalQueryDerivedGet>::type_ =
     OpType::LogicalQueryDerivedGet;
@@ -1034,7 +1053,8 @@ OpType OperatorNode<LogicalDistinct>::type_ = OpType::LogicalDistinct;
 template <>
 OpType OperatorNode<LogicalLimit>::type_ = OpType::LogicalLimit;
 template <>
-OpType OperatorNode<LogicalExportExternalFile>::type_ = OpType::LogicalExportExternalFile;
+OpType OperatorNode<LogicalExportExternalFile>::type_ =
+    OpType::LogicalExportExternalFile;
 
 template <>
 OpType OperatorNode<DummyScan>::type_ = OpType::DummyScan;
diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp
index b592e92fe4c..6960e4ddd25 100644
--- a/src/optimizer/plan_generator.cpp
+++ b/src/optimizer/plan_generator.cpp
@@ -128,8 +128,13 @@ void PlanGenerator::Visit(const PhysicalIndexScan *op) {
       predicate.release(), column_ids, index_scan_desc, false));
 }
 
-void PlanGenerator::Visit(const ExternalFileScan *) {
-  output_plan_.reset(new planner::CSVScanPlan("sdfsdf"));
+void PlanGenerator::Visit(const ExternalFileScan *op) {
+  switch (op->format) {
+    case ExternalFileFormat::CSV: {
+      output_plan_.reset(new planner::CSVScanPlan(op->file_name));
+      break;
+    }
+  }
 }
 
 void PlanGenerator::Visit(const QueryDerivedScan *) {
diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp
index b0f9b8f4446..f5f05d6c6aa 100644
--- a/src/optimizer/query_to_operator_transformer.cpp
+++ b/src/optimizer/query_to_operator_transformer.cpp
@@ -259,10 +259,10 @@ void QueryToOperatorTransformer::Visit(parser::InsertStatement *op) {
           if (column_objects[i]->IsNotNull()) {
             // TODO: Add check for default value's existence for the current
             // column
-            throw CatalogException(
-                StringUtil::Format("ERROR:  null value in column \"%s\" "
-                                   "violates not-null constraint",
-                                   column_objects[i]->GetColumnName().c_str()));
+            throw CatalogException(StringUtil::Format(
+                "ERROR:  null value in column \"%s\" "
+                "violates not-null constraint",
+                column_objects[i]->GetColumnName().c_str()));
           }
         }
       }
@@ -361,8 +361,9 @@ void QueryToOperatorTransformer::Visit(parser::UpdateStatement *op) {
 }
 void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
   if (op->is_from) {
-    auto get_op = std::make_shared<OperatorExpression>(
-        LogicalExternalFileGet::make(GetAndIncreaseGetId()));
+    auto get_op =
+        std::make_shared<OperatorExpression>(LogicalExternalFileGet::make(
+            GetAndIncreaseGetId(), op->format, op->file_path));
 
     auto target_table =
         catalog::Catalog::GetInstance()
diff --git a/src/optimizer/rule_impls.cpp b/src/optimizer/rule_impls.cpp
index fcab5412621..284109a38f1 100644
--- a/src/optimizer/rule_impls.cpp
+++ b/src/optimizer/rule_impls.cpp
@@ -275,9 +275,8 @@ void GetToIndexScan::Transform(
         sort_by_asc_base_column = false;
         break;
       }
-      auto bound_oids =
-          reinterpret_cast<expression::TupleValueExpression *>(expr)
-              ->GetBoundOid();
+      auto bound_oids = reinterpret_cast<expression::TupleValueExpression *>(
+                            expr)->GetBoundOid();
       sort_col_ids.push_back(std::get<2>(bound_oids));
     }
     // Check whether any index can fulfill sort property
@@ -358,20 +357,16 @@ void GetToIndexScan::Transform(
         if (value_expr->GetExpressionType() == ExpressionType::VALUE_CONSTANT) {
           value_list.push_back(
               reinterpret_cast<expression::ConstantValueExpression *>(
-                  value_expr)
-                  ->GetValue());
+                  value_expr)->GetValue());
           LOG_TRACE("Value Type: %d",
                     static_cast<int>(
                         reinterpret_cast<expression::ConstantValueExpression *>(
-                            expr->GetModifiableChild(1))
-                            ->GetValueType()));
+                            expr->GetModifiableChild(1))->GetValueType()));
         } else {
           value_list.push_back(
               type::ValueFactory::GetParameterOffsetValue(
                   reinterpret_cast<expression::ParameterValueExpression *>(
-                      value_expr)
-                      ->GetValueIdx())
-                  .Copy());
+                      value_expr)->GetValueIdx()).Copy());
           LOG_TRACE("Parameter offset: %s",
                     (*value_list.rbegin()).GetInfo().c_str());
         }
@@ -459,9 +454,11 @@ void LogicalExternalFileGetToPhysical::Transform(
     UNUSED_ATTRIBUTE OptimizeContext *context) const {
   const auto *get = input->Op().As<LogicalExternalFileGet>();
 
-  auto result_plan =
-      std::make_shared<OperatorExpression>(ExternalFileScan::make(get->get_id));
+  auto result_plan = std::make_shared<OperatorExpression>(
+      ExternalFileScan::make(get->get_id, get->format, get->file_name));
+
   PELOTON_ASSERT(input->Children().empty());
+
   transformed.push_back(result_plan);
 }
 

From 73d583ff2ad91e661d8a6761fd430fcbc52bf873 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Fri, 27 Apr 2018 09:28:30 -0400
Subject: [PATCH 05/42] Removed unused serialization stuff from plan nodes

---
 src/include/planner/abstract_plan.h      |  27 ---
 src/include/planner/abstract_scan_plan.h |   2 -
 src/include/planner/seq_scan_plan.h      |  19 +-
 src/planner/abstract_plan.cpp            |   3 -
 src/planner/seq_scan_plan.cpp            | 227 +----------------------
 5 files changed, 11 insertions(+), 267 deletions(-)

diff --git a/src/include/planner/abstract_plan.h b/src/include/planner/abstract_plan.h
index c257b20d830..bb1428f81d4 100644
--- a/src/include/planner/abstract_plan.h
+++ b/src/include/planner/abstract_plan.h
@@ -20,8 +20,6 @@
 #include "codegen/query_parameters_map.h"
 #include "common/printable.h"
 #include "planner/binding_context.h"
-#include "type/serializeio.h"
-#include "type/serializer.h"
 #include "common/internal_types.h"
 #include "type/value.h"
 #include "util/hash_util.h"
@@ -66,8 +64,6 @@ class AbstractPlan : public Printable {
 
   const AbstractPlan *GetChild(uint32_t child_index) const;
 
-  const AbstractPlan *GetParent() const;
-  
   //===--------------------------------------------------------------------===//
   // Accessors
   //===--------------------------------------------------------------------===//
@@ -111,23 +107,6 @@ class AbstractPlan : public Printable {
 
   virtual std::unique_ptr<AbstractPlan> Copy() const = 0;
 
-  // A plan will be sent to anther node via serialization
-  // So serialization should be implemented by the derived classes
-
-  //===--------------------------------------------------------------------===//
-  // Serialization/Deserialization
-  // Each sub-class will have to implement these functions
-  // After the implementation for each sub-class, we should set these to pure
-  // virtual
-  //===--------------------------------------------------------------------===//
-  virtual bool SerializeTo(SerializeOutput &output UNUSED_ATTRIBUTE) const {
-    return false;
-  }
-  virtual bool DeserializeFrom(SerializeInput &input UNUSED_ATTRIBUTE) {
-    return false;
-  }
-  virtual int SerializeSize() const { return 0; }
-
   virtual hash_t Hash() const;
 
   virtual bool operator==(const AbstractPlan &rhs) const;
@@ -143,16 +122,10 @@ class AbstractPlan : public Printable {
     }
   }
 
- protected:
-  // only used by its derived classes (when deserialization)
-  AbstractPlan *Parent() const { return parent_; }
-
  private:
   // A plan node can have multiple children
   std::vector<std::unique_ptr<AbstractPlan>> children_;
 
-  AbstractPlan *parent_ = nullptr;
-  
   // TODO: This field is harded coded now. This needs to be changed when
   // optimizer has the cost model and cardinality estimation
   int estimated_cardinality_ = 500000;
diff --git a/src/include/planner/abstract_scan_plan.h b/src/include/planner/abstract_scan_plan.h
index 816676736b5..099bf5a161b 100644
--- a/src/include/planner/abstract_scan_plan.h
+++ b/src/include/planner/abstract_scan_plan.h
@@ -71,8 +71,6 @@ class AbstractScan : public AbstractPlan {
  protected:
   void SetTargetTable(storage::DataTable *table) { target_table_ = table; }
 
-  void AddColumnId(oid_t col_id) { column_ids_.push_back(col_id); }
-
   void SetPredicate(expression::AbstractExpression *predicate) {
     predicate_ = std::unique_ptr<expression::AbstractExpression>(predicate);
   }
diff --git a/src/include/planner/seq_scan_plan.h b/src/include/planner/seq_scan_plan.h
index 9f0f411f2cb..fed2f12d783 100644
--- a/src/include/planner/seq_scan_plan.h
+++ b/src/include/planner/seq_scan_plan.h
@@ -18,10 +18,20 @@
 
 #include "common/internal_types.h"
 #include "common/logger.h"
+#include "expression/abstract_expression.h"
 #include "planner/abstract_scan_plan.h"
 #include "type/serializer.h"
 
 namespace peloton {
+
+namespace expression {
+class Parameter;
+}  // namespace expression
+
+namespace storage {
+class DataTable;
+}  // namespace storage
+
 namespace planner {
 
 class SeqScanPlan : public AbstractScan {
@@ -48,15 +58,6 @@ class SeqScanPlan : public AbstractScan {
 
   void SetParameterValues(std::vector<type::Value> *values) override;
 
-  //===--------------------------------------------------------------------===//
-  // Serialization/Deserialization
-  //===--------------------------------------------------------------------===//
-  bool SerializeTo(SerializeOutput &output) const override;
-  bool DeserializeFrom(SerializeInput &input) override;
-
-  /* For init SerializeOutput */
-  int SerializeSize() const override;
-
   std::unique_ptr<AbstractPlan> Copy() const override {
     auto *new_plan =
         new SeqScanPlan(GetTable(), GetPredicate()->Copy(), GetColumnIds());
diff --git a/src/planner/abstract_plan.cpp b/src/planner/abstract_plan.cpp
index 241323bb0e9..49014a6f471 100644
--- a/src/planner/abstract_plan.cpp
+++ b/src/planner/abstract_plan.cpp
@@ -14,7 +14,6 @@
 
 #include "common/logger.h"
 #include "common/macros.h"
-#include "expression/expression_util.h"
 #include "util/hash_util.h"
 
 namespace peloton {
@@ -38,8 +37,6 @@ const AbstractPlan *AbstractPlan::GetChild(uint32_t child_index) const {
   return children_[child_index].get();
 }
 
-const AbstractPlan *AbstractPlan::GetParent() const { return parent_; }
-
 // Get a string representation of this plan
 std::ostream &operator<<(std::ostream &os, const AbstractPlan &plan) {
   os << PlanNodeTypeToString(plan.GetPlanNodeType());
diff --git a/src/planner/seq_scan_plan.cpp b/src/planner/seq_scan_plan.cpp
index 62e8299aae7..7c3ba3d8a14 100644
--- a/src/planner/seq_scan_plan.cpp
+++ b/src/planner/seq_scan_plan.cpp
@@ -6,246 +6,21 @@
 //
 // Identification: src/planner/seq_scan_plan.cpp
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "planner/seq_scan_plan.h"
 
-#include "parser/select_statement.h"
-#include "catalog/manager.h"
-#include "catalog/schema.h"
 #include "common/logger.h"
 #include "common/macros.h"
 #include "expression/abstract_expression.h"
-#include "expression/expression_util.h"
 #include "storage/data_table.h"
-#include "storage/storage_manager.h"
 #include "common/internal_types.h"
 
 namespace peloton {
 namespace planner {
 
-//===--------------------------------------------------------------------===//
-// Serialization/Deserialization
-//===--------------------------------------------------------------------===//
-
-/**
- * The SeqScanPlan has the following members:
- *   database_id, table_id, predicate, column_id, parent(might be NULL)
- * TODO: SeqScanPlan doesn't have children, so we don't need to handle it
- *
- * Therefore a SeqScanPlan is serialized as:
- * [(int) total size]
- * [(int8_t) plan type]
- * [(int) database_id]
- * [(int) table_id]
- * [(int) num column_id]
- * [(int) column id...]
- * [(int8_t) expr type]     : if invalid, predicate is null
- * [(bytes) predicate]      : predicate is Expression
- * [(int8_t) plan type]     : if invalid, parent is null
- * [(bytes) parent]         : parent is also a plan
- *
- * TODO: parent_ seems never be set or used
- */
-
-bool SeqScanPlan::SerializeTo(SerializeOutput &output) const {
-  // A placeholder for the total size written at the end
-  int start = output.Position();
-  output.WriteInt(-1);
-
-  // Write the SeqScanPlan type
-  PlanNodeType plan_type = GetPlanNodeType();
-  output.WriteByte(static_cast<int8_t>(plan_type));
-
-  // Write database id and table id
-  if (!GetTable()) {
-    // The plan is not completed
-    return false;
-  }
-  oid_t database_id = GetTable()->GetDatabaseOid();
-  oid_t table_id = GetTable()->GetOid();
-
-  output.WriteInt(static_cast<int>(database_id));
-  output.WriteInt(static_cast<int>(table_id));
-
-  // If column has 0 item, just write the columnid_count with 0
-  int columnid_count = GetColumnIds().size();
-  output.WriteInt(columnid_count);
-
-  // If column has 0 item, nothing happens here
-  for (int it = 0; it < columnid_count; it++) {
-    oid_t col_id = GetColumnIds()[it];
-    output.WriteInt(static_cast<int>(col_id));
-  }
-
-  // Write predicate
-  if (GetPredicate() == nullptr) {
-    // Write the type
-    output.WriteByte(static_cast<int8_t>(ExpressionType::INVALID));
-  } else {
-    // Write the expression type
-    ExpressionType expr_type = GetPredicate()->GetExpressionType();
-    output.WriteByte(static_cast<int8_t>(expr_type));
-  }
-
-  // Write parent, but parent seems never be set or used right now
-  if (GetParent() == nullptr) {
-    // Write the type
-    output.WriteByte(static_cast<int8_t>(PlanNodeType::INVALID));
-  } else {
-    // Write the parent type
-    PlanNodeType parent_type = GetParent()->GetPlanNodeType();
-    output.WriteByte(static_cast<int8_t>(parent_type));
-
-    // Write parent
-    GetParent()->SerializeTo(output);
-  }
-
-  // Write the total length
-  int32_t sz = static_cast<int32_t>(output.Position() - start - sizeof(int));
-  PELOTON_ASSERT(sz > 0);
-  output.WriteIntAt(start, sz);
-
-  return true;
-}
-
-/**
-   * Therefore a SeqScanPlan is serialized as:
-   * [(int) total size]
-   * [(int8_t) plan type]
-   * [(int) database_id]
-   * [(int) table_id]
-   * [(int) num column_id]
-   * [(int) column id...]
-   * [(int8_t) expr type]     : if invalid, predicate is null
-   * [(bytes) predicate]      : predicate is Expression
-   * [(int8_t) plan type]     : if invalid, parent is null
-   * [(bytes) parent]         : parent is also a plan
- */
-bool SeqScanPlan::DeserializeFrom(SerializeInput &input) {
-  // Read the size of SeqScanPlan class
-  input.ReadInt();
-
-  // Read the type
-  UNUSED_ATTRIBUTE PlanNodeType plan_type =
-      (PlanNodeType)input.ReadEnumInSingleByte();
-  PELOTON_ASSERT(plan_type == GetPlanNodeType());
-
-  // Read database id
-  oid_t database_oid = input.ReadInt();
-
-  // Read table id
-  oid_t table_oid = input.ReadInt();
-
-  // Get table and set it to the member
-  storage::DataTable *target_table = nullptr;
-  try{
-      target_table = static_cast<storage::DataTable *>(
-        storage::StorageManager::GetInstance()->GetTableWithOid(
-              database_oid, table_oid));
-  } catch (CatalogException &e) {
-      LOG_TRACE("Can't find table %d! Return false", table_oid);
-      return false;
-  }
-  SetTargetTable(target_table);
-
-  // Read the number of column_id and set them to column_ids_
-  oid_t columnid_count = input.ReadInt();
-  for (oid_t it = 0; it < columnid_count; it++) {
-    oid_t column_id = input.ReadInt();
-    AddColumnId(column_id);
-  }
-
-  // Read the type
-  ExpressionType expr_type = (ExpressionType)input.ReadEnumInSingleByte();
-
-  // Predicate deserialization
-  if (expr_type != ExpressionType::INVALID) {
-    switch (expr_type) {
-      //            case ExpressionType::COMPARE_IN:
-      //                predicate_ =
-      //                std::unique_ptr<ExpressionType::COMPARE_IN>(new
-      //                ComparisonExpression (101));
-      //                predicate_.DeserializeFrom(input);
-      //              break;
-
-      default: {
-        LOG_ERROR(
-            "Expression deserialization :: Unsupported EXPRESSION_TYPE: %s",
-            ExpressionTypeToString(expr_type).c_str());
-        break;
-      }
-    }
-  }
-
-  // Read the type of parent
-  PlanNodeType parent_type = (PlanNodeType)input.ReadEnumInSingleByte();
-
-  // Parent deserialization
-  if (parent_type != PlanNodeType::INVALID) {
-    switch (expr_type) {
-      //            case ExpressionType::COMPARE_IN:
-      //                predicate_ =
-      //                std::unique_ptr<ExpressionType::COMPARE_IN>(new
-      //                ComparisonExpression (101));
-      //                predicate_.DeserializeFrom(input);
-      //              break;
-
-      default: {
-        LOG_ERROR("Parent deserialization :: Unsupported PlanNodeType: %s",
-                  ExpressionTypeToString(expr_type).c_str());
-        break;
-      }
-    }
-  }
-
-  return true;
-}
-/**
- *
- * SeqScanPlan is serialized as:
- * [(int) total size]
- * [(int8_t) plan type]
- * [(int) database_id]
- * [(int) table_id]
- * [(int) num column_id]
- * [(int) column id...]
- * [(int8_t) expr type]     : if invalid, predicate is null
- * [(bytes) predicate]      : predicate is Expression
- * [(int8_t) plan type]     : if invalid, parent is null
- * [(bytes) parent]         : parent is also a plan
- *
- * So, the fixed size part is:
- *      [(int) total size]   4 +
- *      [(int8_t) plan type] 1 +
- *      [(int) database_id]  4 +
- *      [(int) table_id]     4 +
- *      [(int) num column_id]4 +
- *      [(int8_t) expr type] 1 +
- *      [(int8_t) plan type] 1 =
- *     the variant part is :
- *      [(int) column id...]: num column_id * 4
- *      [(bytes) predicate] : predicate->GetSerializeSize()
- *      [(bytes) parent]    : parent->GetSerializeSize()
- */
-int SeqScanPlan::SerializeSize() const {
-  // Fixed size. see the detail above
-  int size_fix = sizeof(int) * 4 + 3;
-  int size_column_ids = GetColumnIds().size() * sizeof(int);
-  int size = size_fix + size_column_ids;
-
-  if (GetPredicate() != nullptr) {
-    size = size + GetPredicate()->SerializeSize();
-  }
-  if (Parent()) {
-    size = size + Parent()->SerializeSize();
-  }
-
-  return size;
-}
-
 void SeqScanPlan::SetParameterValues(std::vector<type::Value> *values) {
   LOG_TRACE("Setting parameter values in Sequential Scan");
 

From 7e61425e321bc744e6a536f0ed4d54198342fb02 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 00:36:17 -0400
Subject: [PATCH 06/42] Codegen can now have constant generic/opaque bytes in
 module

---
 src/codegen/codegen.cpp           | 38 +++++++++++++++++++------------
 src/codegen/type/boolean_type.cpp |  4 ++--
 src/include/codegen/codegen.h     |  8 ++++---
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index 251a4edd8bf..b6449ae4138 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -59,12 +59,30 @@ llvm::Constant *CodeGen::ConstDouble(double val) const {
   return llvm::ConstantFP::get(DoubleType(), val);
 }
 
-llvm::Constant *CodeGen::ConstString(const std::string &s) const {
+llvm::Value *CodeGen::ConstString(const std::string &str_val,
+                                     const std::string &name) const {
   // Strings are treated as arrays of bytes
-  auto *str = llvm::ConstantDataArray::getString(GetContext(), s);
-  return new llvm::GlobalVariable(GetModule(), str->getType(), true,
-                                  llvm::GlobalValue::InternalLinkage, str,
-                                  "str");
+  auto *str = llvm::ConstantDataArray::getString(GetContext(), str_val);
+  auto *global_var =
+      new llvm::GlobalVariable(GetModule(), str->getType(), true,
+                               llvm::GlobalValue::InternalLinkage, str, name);
+  return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)});
+}
+
+llvm::Value *CodeGen::ConstGenericBytes(llvm::Type *type, const void *data,
+                                        uint32_t length,
+                                        const std::string &name) const {
+  // Create the constant data array that wraps the input data
+  llvm::ArrayRef<uint8_t> elements{reinterpret_cast<const uint8_t *>(data),
+                                   length};
+  auto *arr = llvm::ConstantDataArray::get(GetContext(), elements);
+
+  // Create a global variable for the data
+  auto *global_var = new llvm::GlobalVariable(
+      GetModule(), type, true, llvm::GlobalValue::InternalLinkage, arr, name);
+
+  // Return a pointer to the first element
+  return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)});
 }
 
 llvm::Constant *CodeGen::Null(llvm::Type *type) const {
@@ -75,11 +93,6 @@ llvm::Constant *CodeGen::NullPtr(llvm::PointerType *type) const {
   return llvm::ConstantPointerNull::get(type);
 }
 
-llvm::Value *CodeGen::ConstStringPtr(const std::string &s) const {
-  auto &ir_builder = GetBuilder();
-  return ir_builder.CreateConstInBoundsGEP2_32(nullptr, ConstString(s), 0, 0);
-}
-
 llvm::Value *CodeGen::AllocateVariable(llvm::Type *type,
                                        const std::string &name) {
   // To allocate a variable, a function must be under construction
@@ -143,12 +156,9 @@ llvm::Value *CodeGen::CallPrintf(const std::string &format,
         "printf", llvm::TypeBuilder<int(char *, ...), false>::get(GetContext()),
         reinterpret_cast<void *>(printf));
   }
-  auto &ir_builder = code_context_.GetBuilder();
-  auto *format_str =
-      ir_builder.CreateGEP(ConstString(format), {Const32(0), Const32(0)});
 
   // Collect all the arguments into a vector
-  std::vector<llvm::Value *> printf_args{format_str};
+  std::vector<llvm::Value *> printf_args = {ConstString(format, "format")};
   printf_args.insert(printf_args.end(), args.begin(), args.end());
 
   // Call the function
diff --git a/src/codegen/type/boolean_type.cpp b/src/codegen/type/boolean_type.cpp
index 2580e210d4b..edc761d8179 100644
--- a/src/codegen/type/boolean_type.cpp
+++ b/src/codegen/type/boolean_type.cpp
@@ -84,7 +84,8 @@ struct CastBooleanToVarchar : public TypeSystem::CastHandleNull {
 
     // Convert this boolean (unsigned int) into a string
     llvm::Value *str_val = codegen->CreateSelect(
-        value.GetValue(), codegen.ConstString("T"), codegen.ConstString("F"));
+        value.GetValue(), codegen.ConstString("T", "true"),
+        codegen.ConstString("F", "false"));
 
     // We could be casting this non-nullable value to a nullable type
     llvm::Value *null = to_type.nullable ? codegen.ConstBool(false) : nullptr;
@@ -250,7 +251,6 @@ struct LogicalOr : public TypeSystem::BinaryOperatorHandleNull {
 std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::BOOLEAN};
 
-
 // Explicit casts
 CastBooleanToInteger kBooleanToInteger;
 CastBooleanToDecimal kBooleanToDecimal;
diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h
index 5612868d0d5..09edae81900 100644
--- a/src/include/codegen/codegen.h
+++ b/src/include/codegen/codegen.h
@@ -95,11 +95,13 @@ class CodeGen {
   llvm::Constant *Const32(int32_t val) const;
   llvm::Constant *Const64(int64_t val) const;
   llvm::Constant *ConstDouble(double val) const;
-  llvm::Constant *ConstString(const std::string &s) const;
+  llvm::Value *ConstString(const std::string &str_val,
+                           const std::string &name) const;
+  llvm::Value *ConstGenericBytes(llvm::Type *type, const void *data,
+                                 uint32_t length,
+                                 const std::string &name) const;
   llvm::Constant *Null(llvm::Type *type) const;
   llvm::Constant *NullPtr(llvm::PointerType *type) const;
-  /// Wrapper for pointer for constant string
-  llvm::Value *ConstStringPtr(const std::string &s) const;
 
   llvm::Value *AllocateVariable(llvm::Type *type, const std::string &name);
   llvm::Value *AllocateBuffer(llvm::Type *element_type, uint32_t num_elems,

From 02bd504f20d6dba942f78ec0d3ef88dc888f2295 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 00:39:54 -0400
Subject: [PATCH 07/42] When no columns specified during copy, all columns are
 inserted

---
 src/binder/bind_node_visitor.cpp             |   4 +-
 src/include/optimizer/input_column_deriver.h |   2 +
 src/include/optimizer/optimizer.h            |  20 +---
 src/include/optimizer/util.h                 |   1 -
 src/include/parser/copy_statement.h          |   8 ++
 src/include/planner/abstract_scan_plan.h     |   2 +-
 src/include/planner/csv_scan_plan.h          | 107 +++++++++++++++++--
 src/optimizer/input_column_deriver.cpp       |   2 +
 src/optimizer/optimizer.cpp                  |  19 +++-
 src/optimizer/plan_generator.cpp             |  13 ++-
 10 files changed, 148 insertions(+), 30 deletions(-)

diff --git a/src/binder/bind_node_visitor.cpp b/src/binder/bind_node_visitor.cpp
index c7d25093beb..eec8a03c091 100644
--- a/src/binder/bind_node_visitor.cpp
+++ b/src/binder/bind_node_visitor.cpp
@@ -168,10 +168,12 @@ void BindNodeVisitor::Visit(parser::DeleteStatement *node) {
 void BindNodeVisitor::Visit(parser::LimitDescription *) {}
 
 void BindNodeVisitor::Visit(parser::CopyStatement *node) {
-  // Bind the source/target table of the copy
   context_ = std::make_shared<BinderContext>(nullptr);
   if (node->table != nullptr) {
     node->table->Accept(this);
+
+    // If the table is given, we're either writing or reading all columns
+    context_->GenerateAllColumnExpressions(node->select_list);
   } else {
     node->select_stmt->Accept(this);
   }
diff --git a/src/include/optimizer/input_column_deriver.h b/src/include/optimizer/input_column_deriver.h
index fa1ec6ca5a1..728a08305c4 100644
--- a/src/include/optimizer/input_column_deriver.h
+++ b/src/include/optimizer/input_column_deriver.h
@@ -53,6 +53,8 @@ class InputColumnDeriver : public OperatorVisitor {
 
   void Visit(const PhysicalIndexScan *op) override;
 
+  void Visit(const ExternalFileScan *op) override;
+
   void Visit(const QueryDerivedScan *op) override;
 
   void Visit(const PhysicalOrderBy *) override;
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 71d7afca265..18608c06756 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -23,24 +23,24 @@ namespace peloton {
 namespace parser {
 class SQLStatementList;
 class SQLStatement;
-}
+}  // namespace parser
 
 namespace planner {
 class AbstractPlan;
-};
+}  // namespace planner
 
 namespace optimizer {
 class OperatorExpression;
-}
+}  // namespace optimizer
 
 namespace concurrency {
 class TransactionContext;
-}
+}  // namespace concurrency
 
 namespace test {
 class OptimizerRuleTests_SimpleAssociativeRuleTest_Test;
 class OptimizerRuleTests_SimpleAssociativeRuleTest2_Test;
-}
+}  // namespace test
 
 namespace optimizer {
 
@@ -110,16 +110,6 @@ class Optimizer : public AbstractOptimizer {
       parser::SQLStatement *tree, bool &is_ddl_stmt,
       concurrency::TransactionContext *txn);
 
-  /**
-   * Construct a plan object for the given parsed copy statement.
-   *
-   * @param copy_stmt The copy statement we're transforming
-   * @param txn The transactional context
-   * @return The construct plan object for the COPY statement
-   */
-  std::unique_ptr<planner::AbstractPlan> HandleDDLCopyStatement(
-      parser::CopyStatement *copy_stmt, concurrency::TransactionContext *txn);
-
   /* TransformQueryTree - create an initial operator tree for the given query
    * to be used in performing optimization.
    *
diff --git a/src/include/optimizer/util.h b/src/include/optimizer/util.h
index 877bdee3b96..634e1297347 100644
--- a/src/include/optimizer/util.h
+++ b/src/include/optimizer/util.h
@@ -17,7 +17,6 @@
 #include <string>
 
 #include "expression/abstract_expression.h"
-#include "parser/copy_statement.h"
 #include "planner/abstract_plan.h"
 
 namespace peloton {
diff --git a/src/include/parser/copy_statement.h b/src/include/parser/copy_statement.h
index 92100e312d1..8145cd695e9 100644
--- a/src/include/parser/copy_statement.h
+++ b/src/include/parser/copy_statement.h
@@ -46,14 +46,22 @@ class CopyStatement : public SQLStatement {
   ///
   //////////////////////////////////////////////////////////////////////////////
 
+  // The table that is copied into or copied from
   std::unique_ptr<TableRef> table;
 
+  // The SQL statement used instead of a table when copying data out to a file
   std::unique_ptr<SelectStatement> select_stmt;
 
+  // The set of attributes being written out or read in
+  std::vector<std::unique_ptr<expression::AbstractExpression>> select_list;
+
+  // The type of copy
   CopyType type;
 
+  // The input or output file that is read of written into
   std::string file_path;
 
+  // The format of the file
   ExternalFileFormat format;
 
   bool is_from;
diff --git a/src/include/planner/abstract_scan_plan.h b/src/include/planner/abstract_scan_plan.h
index 099bf5a161b..b770d66b7fe 100644
--- a/src/include/planner/abstract_scan_plan.h
+++ b/src/include/planner/abstract_scan_plan.h
@@ -56,7 +56,7 @@ class AbstractScan : public AbstractPlan {
 
   storage::DataTable *GetTable() const { return target_table_; }
 
-  void GetAttributes(std::vector<const AttributeInfo *> &ais) const {
+  virtual void GetAttributes(std::vector<const AttributeInfo *> &ais) const {
     for (const auto &ai : attributes_) {
       ais.push_back(&ai);
     }
diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
index e871b15ac02..1c14a1d9ece 100644
--- a/src/include/planner/csv_scan_plan.h
+++ b/src/include/planner/csv_scan_plan.h
@@ -12,23 +12,63 @@
 
 #pragma once
 
-#include "planner/abstract_plan.h"
+#include <numeric>
+
+#include "codegen/type/type.h"
+#include "planner/abstract_scan_plan.h"
+#include "planner/attribute_info.h"
 
 namespace peloton {
 namespace planner {
 
-class CSVScanPlan : public AbstractPlan {
+class CSVScanPlan : public AbstractScan {
  public:
-  CSVScanPlan(std::string file_name) : file_name_(std::move(file_name)) {}
+  struct ColumnInfo {
+    std::string name;
+    type::TypeId type;
+  };
 
-  PlanNodeType GetPlanNodeType() const override {
-    return PlanNodeType::CSVSCAN;
-  }
+ public:
+  /**
+   * Constructs a sequential scan over a CSV file
+   *
+   * @param file_name The file path
+   * @param cols Information of the columns expected in each row of the CSV
+   */
+  CSVScanPlan(std::string file_name, std::vector<ColumnInfo> &&cols);
+
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Accessors
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  PlanNodeType GetPlanNodeType() const override;
+
+  void GetOutputColumns(std::vector<oid_t> &columns) const override;
+
+  const std::string &GetFileName() const { return file_name_; }
+
+  void GetAttributes(std::vector<const AttributeInfo *> &ais) const override;
+
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Utilities + Internal
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  hash_t Hash() const override;
+
+  bool operator==(const AbstractPlan &rhs) const override;
 
   std::unique_ptr<AbstractPlan> Copy() const override;
 
+  void PerformBinding(BindingContext &binding_context) override;
+
  private:
   const std::string file_name_;
+
+  std::vector<std::unique_ptr<planner::AttributeInfo>> attributes_;
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -37,9 +77,60 @@ class CSVScanPlan : public AbstractPlan {
 ///
 ////////////////////////////////////////////////////////////////////////////////
 
+inline CSVScanPlan::CSVScanPlan(std::string file_name,
+                                std::vector<CSVScanPlan::ColumnInfo> &&cols)
+    : file_name_(std::move(file_name)) {
+  for (const auto &col : cols) {
+    std::unique_ptr<planner::AttributeInfo> attribute{
+        new planner::AttributeInfo()};
+    attribute->name = col.name;
+    attribute->type = codegen::type::Type{col.type, true};
+    attributes_.emplace_back(std::move(attribute));
+  }
+}
+
+inline PlanNodeType CSVScanPlan::GetPlanNodeType() const {
+  return PlanNodeType::CSVSCAN;
+}
+
 inline std::unique_ptr<AbstractPlan> CSVScanPlan::Copy() const {
-  // TODO
-  return std::unique_ptr<AbstractPlan>();
+  std::vector<CSVScanPlan::ColumnInfo> new_cols;
+  for (const auto &attribute : attributes_) {
+    new_cols.push_back(CSVScanPlan::ColumnInfo{
+        .name = attribute->name, .type = attribute->type.type_id});
+  }
+  return std::unique_ptr<AbstractPlan>(
+      new CSVScanPlan(file_name_, std::move(new_cols)));
+}
+
+inline void CSVScanPlan::PerformBinding(BindingContext &binding_context) {
+  for (uint32_t i = 0; i < attributes_.size(); i++) {
+    binding_context.BindNew(i, attributes_[i].get());
+  }
+}
+
+inline void CSVScanPlan::GetOutputColumns(std::vector<oid_t> &columns) const {
+  columns.clear();
+  columns.resize(attributes_.size());
+  std::iota(columns.begin(), columns.end(), 0);
+}
+
+inline hash_t CSVScanPlan::Hash() const {
+  return HashUtil::HashBytes(file_name_.data(), file_name_.length());
+}
+
+inline bool CSVScanPlan::operator==(const AbstractPlan &rhs) const {
+  if (rhs.GetPlanNodeType() != PlanNodeType::CSVSCAN) return false;
+  const auto &other = static_cast<const CSVScanPlan &>(rhs);
+  return StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_);
+}
+
+inline void CSVScanPlan::GetAttributes(
+    std::vector<const AttributeInfo *> &ais) const {
+  ais.clear();
+  for (const auto &ai : attributes_) {
+    ais.push_back(ai.get());
+  }
 }
 
 }  // namespace planner
diff --git a/src/optimizer/input_column_deriver.cpp b/src/optimizer/input_column_deriver.cpp
index 7819f81afb9..08d7c54a4ae 100644
--- a/src/optimizer/input_column_deriver.cpp
+++ b/src/optimizer/input_column_deriver.cpp
@@ -55,6 +55,8 @@ void InputColumnDeriver::Visit(const PhysicalSeqScan *) { ScanHelper(); }
 
 void InputColumnDeriver::Visit(const PhysicalIndexScan *) { ScanHelper(); }
 
+void InputColumnDeriver::Visit(const ExternalFileScan *) { ScanHelper(); }
+
 void InputColumnDeriver::Visit(const QueryDerivedScan *op) {
   // QueryDerivedScan should only be a renaming layer
   ExprMap output_cols_map;
diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp
index 5722034ef7d..2525915fcc1 100644
--- a/src/optimizer/optimizer.cpp
+++ b/src/optimizer/optimizer.cpp
@@ -259,20 +259,33 @@ QueryInfo Optimizer::GetQueryInfo(parser::SQLStatement *tree) {
   std::shared_ptr<PropertySet> physical_props = std::make_shared<PropertySet>();
   switch (tree->GetType()) {
     case StatementType::SELECT: {
-      auto select = reinterpret_cast<parser::SelectStatement *>(tree);
+      auto *select = reinterpret_cast<parser::SelectStatement *>(tree);
       GetQueryInfoHelper(select->select_list, select->order, output_exprs,
                          physical_props);
       break;
     }
     case StatementType::INSERT: {
-      auto insert = reinterpret_cast<parser::InsertStatement *>(tree);
+      auto *insert = reinterpret_cast<parser::InsertStatement *>(tree);
       if (insert->select != nullptr)
         GetQueryInfoHelper(insert->select->select_list, insert->select->order,
                            output_exprs, physical_props);
       break;
     }
+    case StatementType::COPY: {
+      auto *copy = reinterpret_cast<parser::CopyStatement *>(tree);
+      if (copy->select_stmt != nullptr) {
+        GetQueryInfoHelper(copy->select_stmt->select_list,
+                           copy->select_stmt->order, output_exprs,
+                           physical_props);
+      } else {
+        std::unique_ptr<parser::OrderDescription> order;
+        GetQueryInfoHelper(copy->select_list, order, output_exprs,
+                           physical_props);
+      }
+      break;
+    }
     default:
-      ;
+      break;
   }
 
   return QueryInfo(output_exprs, physical_props);
diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp
index 6960e4ddd25..804184b6246 100644
--- a/src/optimizer/plan_generator.cpp
+++ b/src/optimizer/plan_generator.cpp
@@ -15,6 +15,7 @@
 #include "catalog/column_catalog.h"
 #include "catalog/index_catalog.h"
 #include "catalog/table_catalog.h"
+#include "codegen/type/type.h"
 #include "concurrency/transaction_context.h"
 #include "expression/expression_util.h"
 #include "optimizer/operator_expression.h"
@@ -131,7 +132,17 @@ void PlanGenerator::Visit(const PhysicalIndexScan *op) {
 void PlanGenerator::Visit(const ExternalFileScan *op) {
   switch (op->format) {
     case ExternalFileFormat::CSV: {
-      output_plan_.reset(new planner::CSVScanPlan(op->file_name));
+      // First construct the output column descriptions
+      std::vector<planner::CSVScanPlan::ColumnInfo> cols;
+      for (const auto *output_col : output_cols_) {
+        auto col_info = planner::CSVScanPlan::ColumnInfo{
+            .name = "", .type = output_col->GetValueType()};
+        cols.emplace_back(std::move(col_info));
+      }
+
+      // Create the plan
+      output_plan_.reset(
+          new planner::CSVScanPlan(op->file_name, std::move(cols)));
       break;
     }
   }

From 226d341d62d8cbbfd0015db4d5b4fcba0d4d6b20 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 00:41:06 -0400
Subject: [PATCH 08/42] Added function to throw expception with ill-formatted
 input string when converting to number

---
 src/codegen/runtime_functions.cpp       |  4 ++++
 src/include/codegen/runtime_functions.h | 10 +++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/codegen/runtime_functions.cpp b/src/codegen/runtime_functions.cpp
index 23dc1eec6c6..2bebfc150a6 100644
--- a/src/codegen/runtime_functions.cpp
+++ b/src/codegen/runtime_functions.cpp
@@ -255,5 +255,9 @@ void RuntimeFunctions::ThrowOverflowException() {
   throw std::overflow_error("ERROR: overflow");
 }
 
+void RuntimeFunctions::ThrowInvalidInputStringException() {
+  throw std::runtime_error("ERROR: invalid input string");
+}
+
 }  // namespace codegen
 }  // namespace peloton
\ No newline at end of file
diff --git a/src/include/codegen/runtime_functions.h b/src/include/codegen/runtime_functions.h
index 13712188be4..4438bce31b7 100644
--- a/src/include/codegen/runtime_functions.h
+++ b/src/include/codegen/runtime_functions.h
@@ -77,7 +77,7 @@ class RuntimeFunctions {
    */
   static void GetTileGroupLayout(const storage::TileGroup *tile_group,
                                  ColumnLayoutInfo *infos, uint32_t num_cols);
-
+  
   /**
    * Execute a parallel scan over the given table in the given database.
    *
@@ -106,6 +106,12 @@ class RuntimeFunctions {
       void *query_state, executor::ExecutorContext::ThreadStates &thread_states,
       void (*work_func)(void *, void *));
 
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Exception related functions
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
   /**
    * Throw a divide-by-zero exception. This function doesn't return.
    */
@@ -115,6 +121,8 @@ class RuntimeFunctions {
    * Throw a mathematical overflow exception. This function does not return.
    */
   static void ThrowOverflowException();
+
+  static void ThrowInvalidInputStringException();
 };
 
 }  // namespace codegen

From e9e1a8f33867a43b18457e13e01dfc4edfab755a Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 00:41:28 -0400
Subject: [PATCH 09/42] Removed serialization

---
 src/network/service/peloton_service.cpp | 4 ++--
 test/network/rpc_queryplan_test.cpp     | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/network/service/peloton_service.cpp b/src/network/service/peloton_service.cpp
index 9e5095a0916..90a5b81ee8f 100644
--- a/src/network/service/peloton_service.cpp
+++ b/src/network/service/peloton_service.cpp
@@ -357,7 +357,7 @@ void PelotonService::QueryPlan(::google::protobuf::RpcController *controller,
         LOG_ERROR("Queryplan recived desen't have type");
         break;
       }
-
+#if 0
       case PlanNodeType::SEQSCAN: {
         LOG_TRACE("SEQSCAN revieved");
         std::string plan = request->plan();
@@ -400,7 +400,7 @@ void PelotonService::QueryPlan(::google::protobuf::RpcController *controller,
 
         break;
       }
-
+#endif
       default: {
         LOG_ERROR("Queryplan recived :: Unsupported TYPE: %s",
                   PlanNodeTypeToString(plan_type).c_str());
diff --git a/test/network/rpc_queryplan_test.cpp b/test/network/rpc_queryplan_test.cpp
index cb11891a1db..90b55e06668 100644
--- a/test/network/rpc_queryplan_test.cpp
+++ b/test/network/rpc_queryplan_test.cpp
@@ -22,6 +22,7 @@ namespace test {
 class RpcQueryPlanTests : public PelotonTest {};
 
 TEST_F(RpcQueryPlanTests, BasicTest) {
+#if 0
   peloton::planner::SeqScanPlan mapped_plan_ptr;
 
   const peloton::PlanNodeType type = mapped_plan_ptr.GetPlanNodeType();
@@ -32,6 +33,7 @@ TEST_F(RpcQueryPlanTests, BasicTest) {
   bool serialize = mapped_plan_ptr.SerializeTo(output_plan);
   // Becuase the plan is not completed, so it is false
   EXPECT_FALSE(serialize);
+#endif
 }
 }
 }

From 0847d23abb383b1dd5426b3c08fb999e45e36f3a Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 00:43:29 -0400
Subject: [PATCH 10/42] Added input functions in prepartion to read table data
 from files

---
 src/codegen/proxy/runtime_functions_proxy.cpp |   2 +
 src/codegen/proxy/values_runtime_proxy.cpp    |   7 +
 src/codegen/values_runtime.cpp                | 223 ++++++++++++++++--
 .../codegen/proxy/runtime_functions_proxy.h   |   6 +
 .../codegen/proxy/values_runtime_proxy.h      |   7 +
 src/include/codegen/values_runtime.h          |  35 +++
 6 files changed, 254 insertions(+), 26 deletions(-)

diff --git a/src/codegen/proxy/runtime_functions_proxy.cpp b/src/codegen/proxy/runtime_functions_proxy.cpp
index b406a50fcca..652d1ba2e08 100644
--- a/src/codegen/proxy/runtime_functions_proxy.cpp
+++ b/src/codegen/proxy/runtime_functions_proxy.cpp
@@ -26,6 +26,8 @@ DEFINE_TYPE(ColumnLayoutInfo, "peloton::ColumnLayoutInfo", col_start_ptr,
 DEFINE_TYPE(AbstractExpression, "peloton::expression::AbstractExpression",
             opaque);
 
+DEFINE_TYPE(Type, "peloton::Type", opaque);
+
 DEFINE_METHOD(peloton::codegen, RuntimeFunctions, HashMurmur3);
 DEFINE_METHOD(peloton::codegen, RuntimeFunctions, HashCrc64);
 DEFINE_METHOD(peloton::codegen, RuntimeFunctions, GetTileGroup);
diff --git a/src/codegen/proxy/values_runtime_proxy.cpp b/src/codegen/proxy/values_runtime_proxy.cpp
index 85f866e74f8..e8dd45d10bc 100644
--- a/src/codegen/proxy/values_runtime_proxy.cpp
+++ b/src/codegen/proxy/values_runtime_proxy.cpp
@@ -27,6 +27,13 @@ DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputTimestamp);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputDecimal);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputVarchar);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputVarbinary);
+
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputBoolean);
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputTinyInt);
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputSmallInt);
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputInteger);
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputBigInt);
+
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, CompareStrings);
 
 }  // namespace codegen
diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index 461d6f8faf7..33977174925 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -6,12 +6,16 @@
 //
 // Identification: src/codegen/values_runtime.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "codegen/values_runtime.h"
 
+#include <type_traits>
+
+#include "codegen/runtime_functions.h"
+#include "codegen/type/type.h"
 #include "type/value.h"
 #include "type/type_util.h"
 #include "type/value_factory.h"
@@ -22,75 +26,242 @@ namespace codegen {
 
 namespace {
 
-inline void SetValue(type::Value *val_ptr, type::Value &&val) {
-  new (val_ptr) type::Value(val);
+inline void SetValue(peloton::type::Value *val_ptr,
+                     peloton::type::Value &&val) {
+  new (val_ptr) peloton::type::Value(val);
 }
 
 }  // namespace
 
 void ValuesRuntime::OutputBoolean(char *values, uint32_t idx, bool val,
                                   bool is_null) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
   if (is_null) {
-    SetValue(&vals[idx],
-             type::ValueFactory::GetNullValueByType(type::TypeId::BOOLEAN));
+    SetValue(&vals[idx], peloton::type::ValueFactory::GetNullValueByType(
+                             peloton::type::TypeId::BOOLEAN));
   } else {
-    SetValue(&vals[idx], type::ValueFactory::GetBooleanValue(val));
+    SetValue(&vals[idx], peloton::type::ValueFactory::GetBooleanValue(val));
   }
 }
 
 void ValuesRuntime::OutputTinyInt(char *values, uint32_t idx, int8_t val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetTinyIntValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetTinyIntValue(val));
 }
 
 void ValuesRuntime::OutputSmallInt(char *values, uint32_t idx, int16_t val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetSmallIntValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetSmallIntValue(val));
 }
 
 void ValuesRuntime::OutputInteger(char *values, uint32_t idx, int32_t val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetIntegerValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetIntegerValue(val));
 }
 
 void ValuesRuntime::OutputBigInt(char *values, uint32_t idx, int64_t val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetBigIntValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetBigIntValue(val));
 }
 
 void ValuesRuntime::OutputDate(char *values, uint32_t idx, int32_t val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetDateValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetDateValue(val));
 }
 
 void ValuesRuntime::OutputTimestamp(char *values, uint32_t idx, int64_t val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetTimestampValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetTimestampValue(val));
 }
 
 void ValuesRuntime::OutputDecimal(char *values, uint32_t idx, double val) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetDecimalValue(val));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx], peloton::type::ValueFactory::GetDecimalValue(val));
 }
 
 void ValuesRuntime::OutputVarchar(char *values, uint32_t idx, const char *str,
                                   uint32_t len) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
-  SetValue(&vals[idx], type::ValueFactory::GetVarcharValue(str, len, false));
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
+  SetValue(&vals[idx],
+           peloton::type::ValueFactory::GetVarcharValue(str, len, false));
 }
 
 void ValuesRuntime::OutputVarbinary(char *values, uint32_t idx, const char *ptr,
                                     uint32_t len) {
-  auto *vals = reinterpret_cast<type::Value *>(values);
+  auto *vals = reinterpret_cast<peloton::type::Value *>(values);
   const auto *bin_ptr = reinterpret_cast<const unsigned char *>(ptr);
   SetValue(&vals[idx],
-           type::ValueFactory::GetVarbinaryValue(bin_ptr, len, false));
+           peloton::type::ValueFactory::GetVarbinaryValue(bin_ptr, len, false));
+}
+
+namespace {
+
+void TrimLeftRight(char *&left, char *&right) {
+  while (*left == ' ') {
+    left++;
+  }
+  while (*right == ' ') {
+    right++;
+  }
+}
+
+template <typename T>
+typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
+    char *ptr, uint32_t len) {
+  char *start = ptr, *end = ptr + len;
+  if (start == end) {
+    // ERROR
+  }
+
+  // Trim whitespace on left and right
+  TrimLeftRight(start, end);
+
+  // Check negative or positive sign
+  bool negative = false;
+  if (*start == '-') {
+    negative = true;
+    start++;
+  } else if (*start == '+') {
+    start++;
+  }
+
+  int64_t num = 0;
+  while (start != end) {
+    if (*start < '0' || *start > '9') {
+      RuntimeFunctions::ThrowInvalidInputStringException();
+    }
+
+    num = (num * 10) + (*start - '0');
+
+    start++;
+  }
+
+  if (negative) {
+    num = -num;
+  }
+
+  if (num <= std::numeric_limits<T>::min() ||
+      num >= std::numeric_limits<T>::max()) {
+    RuntimeFunctions::ThrowOverflowException();
+  }
+
+  return static_cast<T>(num);
+}
+
+}  // namespace
+
+bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
+                                 char *ptr, uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
+
+  char *start = ptr, *end = ptr + len;
+
+  // Trim whitespace on both ends
+  TrimLeftRight(start, end);
+
+  //
+  uint64_t trimmed_len = end - start;
+
+  // Check cases
+  switch (*start) {
+    case 't':
+    case 'T': {
+      static constexpr char kTrue[] = "true";
+      if (strncasecmp(start, kTrue, std::min(trimmed_len, sizeof(kTrue)))) {
+        return true;
+      }
+      break;
+    }
+    case 'f':
+    case 'F': {
+      static constexpr char kFalse[] = "false";
+      if (strncasecmp(start, kFalse, std::min(trimmed_len, sizeof(kFalse)))) {
+        return false;
+      }
+      break;
+    }
+    case 'y':
+    case 'Y': {
+      static constexpr char kYes[] = "yes";
+      if (strncasecmp(start, kYes, std::min(trimmed_len, sizeof(kYes)))) {
+        return true;
+      }
+      break;
+    }
+    case 'n':
+    case 'N': {
+      static constexpr char kNo[] = "no";
+      if (strncasecmp(start, kNo, std::min(trimmed_len, sizeof(kNo)))) {
+        return false;
+      }
+      break;
+    }
+    case 'o':
+    case 'O': {
+      // 'o' not enough to distinguish between on/off
+      static constexpr char kOff[] = "off";
+      static constexpr char kOn[] = "on";
+      if (strncasecmp(start, kOff, std::min(trimmed_len, sizeof(kOff)))) {
+        return false;
+      } else if (strncasecmp(start, kOn, std::min(trimmed_len, sizeof(kOn)))) {
+        return true;
+      }
+      break;
+    }
+    case '0': {
+      if (trimmed_len == 1) {
+        return false;
+      } else {
+        return true;
+      }
+    }
+    case '1': {
+      if (trimmed_len == 1) {
+        return true;
+      } else {
+        return false;
+      }
+    }
+    default: { break; }
+  }
+
+  // Error
+  RuntimeFunctions::ThrowInvalidInputStringException();
+  __builtin_unreachable();
+}
+
+int8_t ValuesRuntime::InputTinyInt(UNUSED_ATTRIBUTE const type::Type &type,
+                                   char *ptr, uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
+  return ToNum<int8_t>(ptr, len);
+}
+
+int16_t ValuesRuntime::InputSmallInt(UNUSED_ATTRIBUTE const type::Type &type,
+                                     char *ptr, uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
+  return ToNum<int16_t>(ptr, len);
+}
+
+int32_t ValuesRuntime::InputInteger(UNUSED_ATTRIBUTE const type::Type &type,
+                                    char *ptr, uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
+  return ToNum<int32_t>(ptr, len);
+}
+
+int64_t ValuesRuntime::InputBigInt(UNUSED_ATTRIBUTE const type::Type &type,
+                                   char *ptr, uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
+  return ToNum<int64_t>(ptr, len);
 }
 
 int32_t ValuesRuntime::CompareStrings(const char *str1, uint32_t len1,
                                       const char *str2, uint32_t len2) {
-  return type::TypeUtil::CompareStrings(str1, len1, str2, len2);
+  return peloton::type::TypeUtil::CompareStrings(str1, len1, str2, len2);
 }
 
 }  // namespace codegen
diff --git a/src/include/codegen/proxy/runtime_functions_proxy.h b/src/include/codegen/proxy/runtime_functions_proxy.h
index c20ba145eb4..5700f7fffb9 100644
--- a/src/include/codegen/proxy/runtime_functions_proxy.h
+++ b/src/include/codegen/proxy/runtime_functions_proxy.h
@@ -33,6 +33,11 @@ PROXY(AbstractExpression) {
   DECLARE_TYPE;
 };
 
+PROXY(Type) {
+  DECLARE_MEMBER(0, char[sizeof(codegen::type::Type)], opaque);
+  DECLARE_TYPE;
+};
+
 PROXY(RuntimeFunctions) {
   DECLARE_METHOD(HashMurmur3);
   DECLARE_METHOD(HashCrc64);
@@ -47,6 +52,7 @@ PROXY(RuntimeFunctions) {
 
 TYPE_BUILDER(ColumnLayoutInfo, codegen::RuntimeFunctions::ColumnLayoutInfo);
 TYPE_BUILDER(AbstractExpression, expression::AbstractExpression);
+TYPE_BUILDER(Type, codegen::type::Type);
 
 }  // namespace codegen
 }  // namespace peloton
\ No newline at end of file
diff --git a/src/include/codegen/proxy/values_runtime_proxy.h b/src/include/codegen/proxy/values_runtime_proxy.h
index e74954a999a..77f78979572 100644
--- a/src/include/codegen/proxy/values_runtime_proxy.h
+++ b/src/include/codegen/proxy/values_runtime_proxy.h
@@ -29,6 +29,13 @@ PROXY(ValuesRuntime) {
   DECLARE_METHOD(OutputDecimal);
   DECLARE_METHOD(OutputVarchar);
   DECLARE_METHOD(OutputVarbinary);
+
+  DECLARE_MEMBER(InputBoolean);
+  DECLARE_MEMBER(InputTinyInt);
+  DECLARE_MEMBER(InputSmallInt);
+  DECLARE_MEMBER(InputInteger);
+  DECLARE_MEMBER(InputBigInt);
+
   DECLARE_METHOD(CompareStrings);
 };
 
diff --git a/src/include/codegen/values_runtime.h b/src/include/codegen/values_runtime.h
index e6cf4967ca2..e37396e5aa8 100644
--- a/src/include/codegen/values_runtime.h
+++ b/src/include/codegen/values_runtime.h
@@ -17,8 +17,18 @@
 namespace peloton {
 namespace codegen {
 
+namespace type {
+class Type;
+}  // namespace type
+
 class ValuesRuntime {
  public:
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Output functions
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
   // Write out the given boolean value into the array at the provided index
   static void OutputBoolean(char *values, uint32_t idx, bool val, bool is_null);
 
@@ -51,6 +61,31 @@ class ValuesRuntime {
   static void OutputVarbinary(char *values, uint32_t idx, const char *str,
                               uint32_t len);
 
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Input functions
+  ////
+  //////////////////////////////////////////////////////////////////////////////
+
+  static bool InputBoolean(const type::Type &type, char *ptr, uint32_t len);
+
+  static int8_t InputTinyInt(const type::Type &type, char *ptr, uint32_t len);
+
+  static int16_t InputSmallInt(const type::Type &type, char *ptr, uint32_t len);
+
+  static int32_t InputInteger(const type::Type &type, char *ptr, uint32_t len);
+
+  static int64_t InputBigInt(const type::Type &type, char *ptr, uint32_t len);
+
+  /**
+   * Compare two strings, returning an integer value indicating their sort order
+   *
+   * @param str1 A pointer to the first string
+   * @param len1 The length of the first string
+   * @param str2 A pointer to the second string
+   * @param len2 The length of the second string
+   * @return
+   */
   static int32_t CompareStrings(const char *str1, uint32_t len1,
                                 const char *str2, uint32_t len2);
 };

From 1d9e33430693016078f3080dd3e5c3469eeb2859 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 09:26:01 -0400
Subject: [PATCH 11/42] All SQL types must now provide an input function to
 convert a string into a SQL type

---
 src/codegen/proxy/values_runtime_proxy.cpp    |  1 +
 src/codegen/type/array_type.cpp               | 11 +++--
 src/codegen/type/bigint_type.cpp              | 37 +++++++--------
 src/codegen/type/boolean_type.cpp             | 16 ++++---
 src/codegen/type/date_type.cpp                | 11 ++++-
 src/codegen/type/decimal_type.cpp             | 30 +++++++------
 src/codegen/type/integer_type.cpp             | 45 +++++++++----------
 src/codegen/type/smallint_type.cpp            | 39 ++++++++--------
 src/codegen/type/sql_type.cpp                 |  6 +++
 src/codegen/type/timestamp_type.cpp           | 11 ++++-
 src/codegen/type/tinyint_type.cpp             | 25 ++++++-----
 src/codegen/type/varbinary_type.cpp           | 11 +++--
 src/codegen/type/varchar_type.cpp             | 22 +++++----
 src/codegen/values_runtime.cpp                | 39 +++++++++++++++-
 .../codegen/proxy/values_runtime_proxy.h      | 10 ++---
 src/include/codegen/type/array_type.h         |  3 ++
 src/include/codegen/type/bigint_type.h        |  3 ++
 src/include/codegen/type/boolean_type.h       |  3 ++
 src/include/codegen/type/date_type.h          |  3 ++
 src/include/codegen/type/decimal_type.h       |  3 ++
 src/include/codegen/type/integer_type.h       |  3 ++
 src/include/codegen/type/smallint_type.h      |  3 ++
 src/include/codegen/type/sql_type.h           |  2 +
 src/include/codegen/type/timestamp_type.h     |  3 ++
 src/include/codegen/type/tinyint_type.h       |  3 ++
 src/include/codegen/type/varbinary_type.h     |  3 ++
 src/include/codegen/type/varchar_type.h       |  3 ++
 27 files changed, 231 insertions(+), 118 deletions(-)

diff --git a/src/codegen/proxy/values_runtime_proxy.cpp b/src/codegen/proxy/values_runtime_proxy.cpp
index e8dd45d10bc..37f90834362 100644
--- a/src/codegen/proxy/values_runtime_proxy.cpp
+++ b/src/codegen/proxy/values_runtime_proxy.cpp
@@ -13,6 +13,7 @@
 #include "codegen/proxy/values_runtime_proxy.h"
 
 #include "codegen/proxy/value_proxy.h"
+#include "codegen/proxy/runtime_functions_proxy.h"
 
 namespace peloton {
 namespace codegen {
diff --git a/src/codegen/type/array_type.cpp b/src/codegen/type/array_type.cpp
index b99daa2a4ac..f9e6e49a677 100644
--- a/src/codegen/type/array_type.cpp
+++ b/src/codegen/type/array_type.cpp
@@ -61,9 +61,8 @@ static std::vector<TypeSystem::NoArgOpInfo> kNoArgOperatorTable = {};
 Array::Array()
     : SqlType(peloton::type::TypeId::ARRAY),
       type_system_(kImplicitCastingTable, kExplicitCastingTable,
-                   kComparisonTable, kUnaryOperatorTable,
-                   kBinaryOperatorTable, kNaryOperatorTable,
-                   kNoArgOperatorTable) {}
+                   kComparisonTable, kUnaryOperatorTable, kBinaryOperatorTable,
+                   kNaryOperatorTable, kNoArgOperatorTable) {}
 
 Value Array::GetMinValue(UNUSED_ATTRIBUTE CodeGen &codegen) const {
   throw Exception{"Arrays don't have minimum values ...."};
@@ -86,6 +85,12 @@ void Array::GetTypeForMaterialization(
       "Arrays currently do not have a materialization format. Fix me."};
 }
 
+llvm::Function *Array::GetInputFunction(
+    UNUSED_ATTRIBUTE CodeGen &codegen,
+    UNUSED_ATTRIBUTE const Type &type) const {
+  throw NotImplementedException{"Array's can't be input ... for now ..."};
+}
+
 llvm::Function *Array::GetOutputFunction(
     UNUSED_ATTRIBUTE CodeGen &codegen,
     UNUSED_ATTRIBUTE const Type &type) const {
diff --git a/src/codegen/type/bigint_type.cpp b/src/codegen/type/bigint_type.cpp
index e20e3e0396f..9332bc51fbc 100644
--- a/src/codegen/type/bigint_type.cpp
+++ b/src/codegen/type/bigint_type.cpp
@@ -190,8 +190,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
   }
 
   Value Impl(CodeGen &codegen, const Value &val,
-             const TypeSystem::InvocationContext &ctx)
-    const override {
+             const TypeSystem::InvocationContext &ctx) const override {
     PELOTON_ASSERT(SupportsType(val.GetType()));
     // The BigInt subtraction implementation
     Sub sub;
@@ -201,7 +200,8 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
     // We want: raw_ret = (val < 0 ? 0 - val : val)
     auto sub_result = sub.Impl(codegen, zero, val, ctx);
     auto *lt_zero = codegen->CreateICmpSLT(val.GetValue(), zero.GetValue());
-    auto *raw_ret = codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue());
+    auto *raw_ret =
+        codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue());
     return Value{BigInt::Instance(), raw_ret};
   }
 };
@@ -287,7 +287,7 @@ struct Sqrt : public TypeSystem::UnaryOperatorHandleNull {
  protected:
   Value Impl(CodeGen &codegen, const Value &val,
              UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx)
-  const override {
+      const override {
     auto casted = cast.Impl(codegen, val, Decimal::Instance());
     auto *raw_ret = codegen.Sqrt(casted.GetValue());
     return Value{Decimal::Instance(), raw_ret};
@@ -332,10 +332,9 @@ struct Add : public TypeSystem::BinaryOperatorHandleNull {
 };
 
 // Subtraction
-bool Sub::SupportsTypes(const Type &left_type,
-                        const Type &right_type) const {
+bool Sub::SupportsTypes(const Type &left_type, const Type &right_type) const {
   return left_type.GetSqlType() == BigInt::Instance() &&
-    left_type == right_type;
+         left_type == right_type;
 }
 
 Type Sub::ResultType(UNUSED_ATTRIBUTE const Type &left_type,
@@ -350,7 +349,7 @@ Value Sub::Impl(CodeGen &codegen, const Value &left, const Value &right,
   // Do subtraction
   llvm::Value *overflow_bit = nullptr;
   llvm::Value *result = codegen.CallSubWithOverflow(
-        left.GetValue(), right.GetValue(), overflow_bit);
+      left.GetValue(), right.GetValue(), overflow_bit);
 
   if (ctx.on_error == OnError::Exception) {
     codegen.ThrowIfOverflow(overflow_bit);
@@ -513,20 +512,17 @@ struct Modulo : public TypeSystem::BinaryOperatorHandleNull {
 std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL};
 
+// clang-format off
 // Explicit casts
 CastBigInt kCastBigInt;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::BOOLEAN,
-     kCastBigInt},
-    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::TINYINT,
-     kCastBigInt},
-    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::SMALLINT,
-     kCastBigInt},
-    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::INTEGER,
-     kCastBigInt},
+    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::BOOLEAN, kCastBigInt},
+    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::TINYINT, kCastBigInt},
+    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::SMALLINT, kCastBigInt},
+    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::INTEGER, kCastBigInt},
     {peloton::type::TypeId::BIGINT, peloton::type::TypeId::BIGINT, kCastBigInt},
-    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL,
-     kCastBigInt}};
+    {peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL, kCastBigInt}};
+// clang-format on
 
 // Comparison operations
 CompareBigInt kCompareBigInt;
@@ -599,6 +595,11 @@ void BigInt::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = nullptr;
 }
 
+llvm::Function *BigInt::GetInputFunction(
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return ValuesRuntimeProxy::InputBigInt.GetFunction(codegen);
+}
+
 llvm::Function *BigInt::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputBigInt.GetFunction(codegen);
diff --git a/src/codegen/type/boolean_type.cpp b/src/codegen/type/boolean_type.cpp
index edc761d8179..5f7387ed9b4 100644
--- a/src/codegen/type/boolean_type.cpp
+++ b/src/codegen/type/boolean_type.cpp
@@ -251,17 +251,16 @@ struct LogicalOr : public TypeSystem::BinaryOperatorHandleNull {
 std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::BOOLEAN};
 
+// clang-format off
 // Explicit casts
 CastBooleanToInteger kBooleanToInteger;
 CastBooleanToDecimal kBooleanToDecimal;
 CastBooleanToVarchar kBooleanToVarchar;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::INTEGER,
-     kBooleanToInteger},
-    {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::VARCHAR,
-     kBooleanToVarchar},
-    {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::DECIMAL,
-     kBooleanToDecimal}};
+    {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::INTEGER, kBooleanToInteger},
+    {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::VARCHAR, kBooleanToVarchar},
+    {peloton::type::TypeId::BOOLEAN, peloton::type::TypeId::DECIMAL, kBooleanToDecimal}};
+// clang-format on
 
 // Comparison operations
 CompareBoolean kCompareBoolean;
@@ -325,6 +324,11 @@ void Boolean::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = nullptr;
 }
 
+llvm::Function *Boolean::GetInputFunction(
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return ValuesRuntimeProxy::InputBoolean.GetFunction(codegen);
+}
+
 llvm::Function *Boolean::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputBoolean.GetFunction(codegen);
diff --git a/src/codegen/type/date_type.cpp b/src/codegen/type/date_type.cpp
index 8f11f4d9ff1..26342c23db9 100644
--- a/src/codegen/type/date_type.cpp
+++ b/src/codegen/type/date_type.cpp
@@ -130,11 +130,12 @@ struct CompareDate : public TypeSystem::SimpleComparisonHandleNull {
 std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP};
 
+// clang-format off
 // Explicit casts
 CastDateToTimestamp kDateToTimestamp;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP,
-     kDateToTimestamp}};
+    {peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP, kDateToTimestamp}};
+// clang-format on
 
 // Comparison operations
 CompareDate kCompareDate;
@@ -187,6 +188,12 @@ void Date::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = nullptr;
 }
 
+llvm::Function *Date::GetInputFunction(
+    UNUSED_ATTRIBUTE CodeGen &codegen,
+    UNUSED_ATTRIBUTE const Type &type) const {
+  throw NotImplementedException{"Date inputs not supported yet"};
+}
+
 llvm::Function *Date::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputDate.GetFunction(codegen);
diff --git a/src/codegen/type/decimal_type.cpp b/src/codegen/type/decimal_type.cpp
index f081013e0b2..50a0b09e29c 100644
--- a/src/codegen/type/decimal_type.cpp
+++ b/src/codegen/type/decimal_type.cpp
@@ -192,9 +192,9 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
 
   Value Impl(CodeGen &codegen, const Value &val,
              UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx)
-    const override {
+      const override {
     llvm::Value *raw_ret =
-      codegen.Call(DecimalFunctionsProxy::Abs, {val.GetValue()});
+        codegen.Call(DecimalFunctionsProxy::Abs, {val.GetValue()});
     return Value{Decimal::Instance(), raw_ret};
   }
 };
@@ -473,21 +473,17 @@ struct Modulo : public TypeSystem::BinaryOperatorHandleNull {
 std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::DECIMAL};
 
+// clang-format off
 // Explicit casting rules
 CastDecimal kCastDecimal;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BOOLEAN,
-     kCastDecimal},
-    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::TINYINT,
-     kCastDecimal},
-    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::SMALLINT,
-     kCastDecimal},
-    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::INTEGER,
-     kCastDecimal},
-    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BIGINT,
-     kCastDecimal},
-    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::DECIMAL,
-     kCastDecimal}};
+    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BOOLEAN, kCastDecimal},
+    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::TINYINT, kCastDecimal},
+    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::SMALLINT, kCastDecimal},
+    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::INTEGER, kCastDecimal},
+    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::BIGINT, kCastDecimal},
+    {peloton::type::TypeId::DECIMAL, peloton::type::TypeId::DECIMAL, kCastDecimal}};
+// clang-format on
 
 // Comparison operations
 CompareDecimal kCompareDecimal;
@@ -562,6 +558,12 @@ void Decimal::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = nullptr;
 }
 
+llvm::Function *Decimal::GetInputFunction(
+    UNUSED_ATTRIBUTE CodeGen &codegen,
+    UNUSED_ATTRIBUTE const Type &type) const {
+  throw NotImplementedException{"Decimal inputs not implemented yet"};
+}
+
 llvm::Function *Decimal::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   // TODO: We should be using the precision/scale in the output function
diff --git a/src/codegen/type/integer_type.cpp b/src/codegen/type/integer_type.cpp
index dc49056a5d1..92809098341 100644
--- a/src/codegen/type/integer_type.cpp
+++ b/src/codegen/type/integer_type.cpp
@@ -187,8 +187,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
   }
 
   Value Impl(CodeGen &codegen, const Value &val,
-             const TypeSystem::InvocationContext &ctx)
-    const override {
+             const TypeSystem::InvocationContext &ctx) const override {
     // The integer subtraction implementation
     Sub sub;
     // Zero place-holder
@@ -197,7 +196,8 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
     // We want: raw_ret = (val < 0 ? 0 - val : val)
     auto sub_result = sub.Impl(codegen, zero, val, ctx);
     auto *lt_zero = codegen->CreateICmpSLT(val.GetValue(), zero.GetValue());
-    auto *raw_ret = codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue());
+    auto *raw_ret =
+        codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue());
     return Value{Integer::Instance(), raw_ret};
   }
 };
@@ -251,7 +251,7 @@ struct Floor : public TypeSystem::UnaryOperatorHandleNull {
 // Ceiling
 struct Ceil : public TypeSystem::UnaryOperatorHandleNull {
   CastInteger cast;
-  
+
   bool SupportsType(const Type &type) const override {
     return type.GetSqlType() == Integer::Instance();
   }
@@ -283,7 +283,7 @@ struct Sqrt : public TypeSystem::UnaryOperatorHandleNull {
  protected:
   Value Impl(CodeGen &codegen, const Value &val,
              UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx)
-  const override {
+      const override {
     auto casted = cast.Impl(codegen, val, Decimal::Instance());
     auto *raw_ret = codegen.Sqrt(casted.GetValue());
     return Value{Decimal::Instance(), raw_ret};
@@ -328,10 +328,9 @@ struct Add : public TypeSystem::BinaryOperatorHandleNull {
 };
 
 // Subtraction
-bool Sub::SupportsTypes(const Type &left_type,
-                        const Type &right_type) const {
+bool Sub::SupportsTypes(const Type &left_type, const Type &right_type) const {
   return left_type.GetSqlType() == Integer::Instance() &&
-    left_type == right_type;
+         left_type == right_type;
 }
 
 Type Sub::ResultType(UNUSED_ATTRIBUTE const Type &left_type,
@@ -346,7 +345,7 @@ Value Sub::Impl(CodeGen &codegen, const Value &left, const Value &right,
   // Do subtraction
   llvm::Value *overflow_bit = nullptr;
   llvm::Value *result = codegen.CallSubWithOverflow(
-        left.GetValue(), right.GetValue(), overflow_bit);
+      left.GetValue(), right.GetValue(), overflow_bit);
 
   if (ctx.on_error == OnError::Exception) {
     codegen.ThrowIfOverflow(overflow_bit);
@@ -510,26 +509,21 @@ std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT,
     peloton::type::TypeId::DECIMAL};
 
+// clang-format off
 // Explicit casting rules
 CastInteger kCastInteger;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BOOLEAN,
-     kCastInteger},
-    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::TINYINT,
-     kCastInteger},
-    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::SMALLINT,
-     kCastInteger},
-    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::INTEGER,
-     kCastInteger},
-    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT,
-     kCastInteger},
-    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::DECIMAL,
-     kCastInteger}};
+    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BOOLEAN, kCastInteger},
+    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::TINYINT, kCastInteger},
+    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::SMALLINT, kCastInteger},
+    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::INTEGER, kCastInteger},
+    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT, kCastInteger},
+    {peloton::type::TypeId::INTEGER, peloton::type::TypeId::DECIMAL, kCastInteger}};
+// clang-format on
 
 // Comparison operations
 CompareInteger kCompareInteger;
-std::vector<TypeSystem::ComparisonInfo> kComparisonTable = {
-    {kCompareInteger}};
+std::vector<TypeSystem::ComparisonInfo> kComparisonTable = {{kCompareInteger}};
 
 // Unary operators
 Negate kNegOp;
@@ -599,6 +593,11 @@ void Integer::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = nullptr;
 }
 
+llvm::Function *Integer::GetInputFunction(
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return ValuesRuntimeProxy::InputInteger.GetFunction(codegen);
+}
+
 llvm::Function *Integer::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputInteger.GetFunction(codegen);
diff --git a/src/codegen/type/smallint_type.cpp b/src/codegen/type/smallint_type.cpp
index 408523ea583..e0f31561c95 100644
--- a/src/codegen/type/smallint_type.cpp
+++ b/src/codegen/type/smallint_type.cpp
@@ -194,9 +194,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
   }
 
   Value Impl(CodeGen &codegen, const Value &val,
-             const TypeSystem::InvocationContext &ctx)
-    const override {
-
+             const TypeSystem::InvocationContext &ctx) const override {
     // The smallint subtraction implementation
     Sub sub;
     PELOTON_ASSERT(SupportsType(val.GetType()));
@@ -206,7 +204,8 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
     // We want: raw_ret = (val < 0 ? 0 - val : val)
     auto sub_result = sub.Impl(codegen, zero, val, ctx);
     auto *lt_zero = codegen->CreateICmpSLT(val.GetValue(), zero.GetValue());
-    auto *raw_ret = codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue());
+    auto *raw_ret =
+        codegen->CreateSelect(lt_zero, sub_result.GetValue(), val.GetValue());
     return Value{SmallInt::Instance(), raw_ret};
   }
 };
@@ -338,10 +337,9 @@ struct Add : public TypeSystem::BinaryOperatorHandleNull {
 
 // Subtraction
 
-bool Sub::SupportsTypes(const Type &left_type,
-                        const Type &right_type) const {
+bool Sub::SupportsTypes(const Type &left_type, const Type &right_type) const {
   return left_type.GetSqlType() == SmallInt::Instance() &&
-    left_type == right_type;
+         left_type == right_type;
 }
 
 Type Sub::ResultType(UNUSED_ATTRIBUTE const Type &left_type,
@@ -356,7 +354,7 @@ Value Sub::Impl(CodeGen &codegen, const Value &left, const Value &right,
   // Do subtraction
   llvm::Value *overflow_bit = nullptr;
   llvm::Value *result = codegen.CallSubWithOverflow(
-        left.GetValue(), right.GetValue(), overflow_bit);
+      left.GetValue(), right.GetValue(), overflow_bit);
 
   if (ctx.on_error == OnError::Exception) {
     codegen.ThrowIfOverflow(overflow_bit);
@@ -522,21 +520,17 @@ std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::SMALLINT, peloton::type::TypeId::INTEGER,
     peloton::type::TypeId::BIGINT, peloton::type::TypeId::DECIMAL};
 
+// clang-format off
 // Explicit casting rules
 CastSmallInt kCastSmallInt;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BOOLEAN,
-     kCastSmallInt},
-    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::TINYINT,
-     kCastSmallInt},
-    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::SMALLINT,
-     kCastSmallInt},
-    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::INTEGER,
-     kCastSmallInt},
-    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BIGINT,
-     kCastSmallInt},
-    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::DECIMAL,
-     kCastSmallInt}};
+    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BOOLEAN, kCastSmallInt},
+    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::TINYINT, kCastSmallInt},
+    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::SMALLINT, kCastSmallInt},
+    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::INTEGER, kCastSmallInt},
+    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::BIGINT, kCastSmallInt},
+    {peloton::type::TypeId::SMALLINT, peloton::type::TypeId::DECIMAL, kCastSmallInt}};
+// clang-format on
 
 // Comparison operations
 CompareSmallInt kCompareSmallInt;
@@ -610,6 +604,11 @@ void SmallInt::GetTypeForMaterialization(CodeGen &codegen,
   len_type = nullptr;
 }
 
+llvm::Function *SmallInt::GetInputFunction(
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return ValuesRuntimeProxy::InputSmallInt.GetFunction(codegen);
+}
+
 llvm::Function *SmallInt::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputSmallInt.GetFunction(codegen);
diff --git a/src/codegen/type/sql_type.cpp b/src/codegen/type/sql_type.cpp
index 6901976b008..49613d6d378 100644
--- a/src/codegen/type/sql_type.cpp
+++ b/src/codegen/type/sql_type.cpp
@@ -54,6 +54,12 @@ class Invalid : public SqlType, public Singleton<Invalid> {
     throw Exception{"INVALID type doesn't have a materialization type"};
   }
 
+  llvm::Function *GetInputFunction(
+      UNUSED_ATTRIBUTE CodeGen &codegen,
+      UNUSED_ATTRIBUTE const Type &type) const override {
+    throw Exception{"INVALID type does not have an input function"};
+  }
+
   llvm::Function *GetOutputFunction(
       UNUSED_ATTRIBUTE CodeGen &codegen,
       UNUSED_ATTRIBUTE const Type &type) const override {
diff --git a/src/codegen/type/timestamp_type.cpp b/src/codegen/type/timestamp_type.cpp
index 73603f222b2..68dcd180f0f 100644
--- a/src/codegen/type/timestamp_type.cpp
+++ b/src/codegen/type/timestamp_type.cpp
@@ -148,11 +148,12 @@ struct Now : public TypeSystem::NoArgOperator {
 std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::DATE, peloton::type::TypeId::TIMESTAMP};
 
+// clang-format off
 // Explicit casts
 CastTimestampToDate kTimestampToDate;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::TIMESTAMP, peloton::type::TypeId::DATE,
-     kTimestampToDate}};
+    {peloton::type::TypeId::TIMESTAMP, peloton::type::TypeId::DATE, kTimestampToDate}};
+// clang-format on
 
 // Comparisons
 CompareTimestamp kCompareTimestamp;
@@ -209,6 +210,12 @@ void Timestamp::GetTypeForMaterialization(CodeGen &codegen,
   len_type = nullptr;
 }
 
+llvm::Function *Timestamp::GetInputFunction(
+    UNUSED_ATTRIBUTE CodeGen &codegen,
+    UNUSED_ATTRIBUTE const Type &type) const {
+  throw NotImplementedException{"Timestamp input not implemented yet"};
+}
+
 llvm::Function *Timestamp::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputTimestamp.GetFunction(codegen);
diff --git a/src/codegen/type/tinyint_type.cpp b/src/codegen/type/tinyint_type.cpp
index 254ef0d8e47..24cad11558c 100644
--- a/src/codegen/type/tinyint_type.cpp
+++ b/src/codegen/type/tinyint_type.cpp
@@ -516,21 +516,17 @@ std::vector<peloton::type::TypeId> kImplicitCastingTable = {
     peloton::type::TypeId::INTEGER, peloton::type::TypeId::BIGINT,
     peloton::type::TypeId::DECIMAL};
 
+// clang-format off
 // Explicit casting rules
 CastTinyInt kCastTinyInt;
 std::vector<TypeSystem::CastInfo> kExplicitCastingTable = {
-    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BOOLEAN,
-     kCastTinyInt},
-    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::TINYINT,
-     kCastTinyInt},
-    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::SMALLINT,
-     kCastTinyInt},
-    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::INTEGER,
-     kCastTinyInt},
-    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BIGINT,
-     kCastTinyInt},
-    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::DECIMAL,
-     kCastTinyInt}};
+    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BOOLEAN, kCastTinyInt},
+    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::TINYINT, kCastTinyInt},
+    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::SMALLINT, kCastTinyInt},
+    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::INTEGER, kCastTinyInt},
+    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::BIGINT, kCastTinyInt},
+    {peloton::type::TypeId::TINYINT, peloton::type::TypeId::DECIMAL, kCastTinyInt}};
+// clang-format on
 
 // Comparison operations
 CompareTinyInt kCompareTinyInt;
@@ -603,6 +599,11 @@ void TinyInt::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = nullptr;
 }
 
+llvm::Function *TinyInt::GetInputFunction(
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return ValuesRuntimeProxy::InputTinyInt.GetFunction(codegen);
+}
+
 llvm::Function *TinyInt::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   return ValuesRuntimeProxy::OutputTinyInt.GetFunction(codegen);
diff --git a/src/codegen/type/varbinary_type.cpp b/src/codegen/type/varbinary_type.cpp
index 7706545c84c..bcbf0c8a1de 100644
--- a/src/codegen/type/varbinary_type.cpp
+++ b/src/codegen/type/varbinary_type.cpp
@@ -159,9 +159,8 @@ std::vector<TypeSystem::NoArgOpInfo> kNoArgOperatorTable = {};
 Varbinary::Varbinary()
     : SqlType(peloton::type::TypeId::VARBINARY),
       type_system_(kImplicitCastingTable, kExplicitCastingTable,
-                   kComparisonTable, kUnaryOperatorTable,
-                   kBinaryOperatorTable, kNaryOperatorTable, 
-                   kNoArgOperatorTable) {}
+                   kComparisonTable, kUnaryOperatorTable, kBinaryOperatorTable,
+                   kNaryOperatorTable, kNoArgOperatorTable) {}
 
 Value Varbinary::GetMinValue(UNUSED_ATTRIBUTE CodeGen &codegen) const {
   throw Exception{"The VARBINARY type does not have a minimum value ..."};
@@ -183,6 +182,12 @@ void Varbinary::GetTypeForMaterialization(CodeGen &codegen,
   len_type = codegen.Int32Type();
 }
 
+llvm::Function *Varbinary::GetInputFunction(
+    UNUSED_ATTRIBUTE CodeGen &codegen,
+    UNUSED_ATTRIBUTE const Type &type) const {
+  throw NotImplementedException{"Blob input not implemented yet"};
+}
+
 llvm::Function *Varbinary::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   // TODO: We should use the length information in the type?
diff --git a/src/codegen/type/varchar_type.cpp b/src/codegen/type/varchar_type.cpp
index 0066457e425..001b6afaca9 100644
--- a/src/codegen/type/varchar_type.cpp
+++ b/src/codegen/type/varchar_type.cpp
@@ -498,11 +498,8 @@ struct Substr : public TypeSystem::NaryOperator {
     // Setup function arguments
     llvm::Value *executor_ctx = ctx.executor_context;
     std::vector<llvm::Value *> args = {
-        executor_ctx,
-        input_args[0].GetValue(),
-        input_args[0].GetLength(),
-        input_args[1].GetValue(),
-        input_args[2].GetValue(),
+        executor_ctx, input_args[0].GetValue(), input_args[0].GetLength(),
+        input_args[1].GetValue(), input_args[2].GetValue(),
     };
 
     // Call
@@ -550,9 +547,12 @@ LTrim kLTrim;
 RTrim kRTrim;
 Repeat kRepeat;
 std::vector<TypeSystem::BinaryOpInfo> kBinaryOperatorTable = {
-    {OperatorId::Like, kLike},         {OperatorId::DateTrunc, kDateTrunc},
-    {OperatorId::DatePart, kDatePart}, {OperatorId::BTrim, kBTrim},
-    {OperatorId::LTrim, kLTrim},       {OperatorId::RTrim, kRTrim},
+    {OperatorId::Like, kLike},
+    {OperatorId::DateTrunc, kDateTrunc},
+    {OperatorId::DatePart, kDatePart},
+    {OperatorId::BTrim, kBTrim},
+    {OperatorId::LTrim, kLTrim},
+    {OperatorId::RTrim, kRTrim},
     {OperatorId::Repeat, kRepeat}};
 
 // Nary operations
@@ -596,6 +596,12 @@ void Varchar::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
   len_type = codegen.Int32Type();
 }
 
+llvm::Function *Varchar::GetInputFunction(
+    UNUSED_ATTRIBUTE CodeGen &codegen,
+    UNUSED_ATTRIBUTE const Type &type) const {
+  throw NotImplementedException{"String input not implemented yet"};
+}
+
 llvm::Function *Varchar::GetOutputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   // TODO: We should use the length information in the type?
diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index 33977174925..1e3324ade4d 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -24,6 +24,12 @@
 namespace peloton {
 namespace codegen {
 
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Output functions
+///
+////////////////////////////////////////////////////////////////////////////////
+
 namespace {
 
 inline void SetValue(peloton::type::Value *val_ptr,
@@ -94,8 +100,22 @@ void ValuesRuntime::OutputVarbinary(char *values, uint32_t idx, const char *ptr,
            peloton::type::ValueFactory::GetVarbinaryValue(bin_ptr, len, false));
 }
 
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Input functions
+///
+////////////////////////////////////////////////////////////////////////////////
+
 namespace {
 
+/**
+ * Skip all leading and trailing whitespace from the string bounded by the
+ * provided pointers. This function will modify the input pointers to point to
+ * the first non-space character at the start and end of the input string.
+ *
+ * @param[in,out] left A pointer to the leftmost character in the input string
+ * @param[in,out] right A pointer to the rightmost character in the input string
+ */
 void TrimLeftRight(char *&left, char *&right) {
   while (*left == ' ') {
     left++;
@@ -105,6 +125,17 @@ void TrimLeftRight(char *&left, char *&right) {
   }
 }
 
+/**
+ * Convert the provided input string into a integral number. This function
+ * handles leading whitespace and leading negative (-) or positive (+) signs.
+ * Additionally, it performs a bounds check to ensure the number falls into the
+ * valid range of numbers for the given type.
+ *
+ * @tparam T The integral type (int8_t, int16_t, int32_t, int64_t)
+ * @param ptr A pointer to the start of the input string
+ * @param len The length of the input string
+ * @return The numeric interpretation of the input string
+ */
 template <typename T>
 typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
     char *ptr, uint32_t len) {
@@ -113,7 +144,7 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
     // ERROR
   }
 
-  // Trim whitespace on left and right
+  // Trim leading and trailing whitespace
   TrimLeftRight(start, end);
 
   // Check negative or positive sign
@@ -125,6 +156,7 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
     start++;
   }
 
+  // Convert
   int64_t num = 0;
   while (start != end) {
     if (*start < '0' || *start > '9') {
@@ -136,15 +168,18 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
     start++;
   }
 
+  // Negate number if we need to
   if (negative) {
     num = -num;
   }
 
+  // Perform bounds check
   if (num <= std::numeric_limits<T>::min() ||
       num >= std::numeric_limits<T>::max()) {
     RuntimeFunctions::ThrowOverflowException();
   }
 
+  // Done
   return static_cast<T>(num);
 }
 
@@ -157,7 +192,7 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
 
   char *start = ptr, *end = ptr + len;
 
-  // Trim whitespace on both ends
+  // Trim leading and trailing whitespace
   TrimLeftRight(start, end);
 
   //
diff --git a/src/include/codegen/proxy/values_runtime_proxy.h b/src/include/codegen/proxy/values_runtime_proxy.h
index 77f78979572..3fe57ab36fb 100644
--- a/src/include/codegen/proxy/values_runtime_proxy.h
+++ b/src/include/codegen/proxy/values_runtime_proxy.h
@@ -30,11 +30,11 @@ PROXY(ValuesRuntime) {
   DECLARE_METHOD(OutputVarchar);
   DECLARE_METHOD(OutputVarbinary);
 
-  DECLARE_MEMBER(InputBoolean);
-  DECLARE_MEMBER(InputTinyInt);
-  DECLARE_MEMBER(InputSmallInt);
-  DECLARE_MEMBER(InputInteger);
-  DECLARE_MEMBER(InputBigInt);
+  DECLARE_METHOD(InputBoolean);
+  DECLARE_METHOD(InputTinyInt);
+  DECLARE_METHOD(InputSmallInt);
+  DECLARE_METHOD(InputInteger);
+  DECLARE_METHOD(InputBigInt);
 
   DECLARE_METHOD(CompareStrings);
 };
diff --git a/src/include/codegen/type/array_type.h b/src/include/codegen/type/array_type.h
index e3b0fe7cc6a..052e55ca4ca 100644
--- a/src/include/codegen/type/array_type.h
+++ b/src/include/codegen/type/array_type.h
@@ -33,6 +33,9 @@ class Array : public SqlType, public Singleton<Array> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/bigint_type.h b/src/include/codegen/type/bigint_type.h
index 043e71a3e91..9f2abfe7aea 100644
--- a/src/include/codegen/type/bigint_type.h
+++ b/src/include/codegen/type/bigint_type.h
@@ -33,6 +33,9 @@ class BigInt : public SqlType, public Singleton<BigInt> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/boolean_type.h b/src/include/codegen/type/boolean_type.h
index 3c070b18714..5e854ba800e 100644
--- a/src/include/codegen/type/boolean_type.h
+++ b/src/include/codegen/type/boolean_type.h
@@ -35,6 +35,9 @@ class Boolean : public SqlType, public Singleton<Boolean> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/date_type.h b/src/include/codegen/type/date_type.h
index 03cf5da7827..225420e59c8 100644
--- a/src/include/codegen/type/date_type.h
+++ b/src/include/codegen/type/date_type.h
@@ -33,6 +33,9 @@ class Date : public SqlType, public Singleton<Date> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/decimal_type.h b/src/include/codegen/type/decimal_type.h
index b180fc2b4eb..6260fb98aba 100644
--- a/src/include/codegen/type/decimal_type.h
+++ b/src/include/codegen/type/decimal_type.h
@@ -33,6 +33,9 @@ class Decimal : public SqlType, public Singleton<Decimal> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/integer_type.h b/src/include/codegen/type/integer_type.h
index b8f6d97ea4f..dbc2b30957e 100644
--- a/src/include/codegen/type/integer_type.h
+++ b/src/include/codegen/type/integer_type.h
@@ -33,6 +33,9 @@ class Integer : public SqlType, public Singleton<Integer> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/smallint_type.h b/src/include/codegen/type/smallint_type.h
index 86f0e9a2cb3..9c1068a0a82 100644
--- a/src/include/codegen/type/smallint_type.h
+++ b/src/include/codegen/type/smallint_type.h
@@ -33,6 +33,9 @@ class SmallInt : public SqlType, public Singleton<SmallInt> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/sql_type.h b/src/include/codegen/type/sql_type.h
index a9232339570..256073bf80e 100644
--- a/src/include/codegen/type/sql_type.h
+++ b/src/include/codegen/type/sql_type.h
@@ -56,6 +56,8 @@ class SqlType {
   virtual void GetTypeForMaterialization(CodeGen &codegen,
                                          llvm::Type *&val_type,
                                          llvm::Type *&len_type) const = 0;
+  virtual llvm::Function *GetInputFunction(CodeGen &codegen,
+                                           const Type &type) const = 0;
   virtual llvm::Function *GetOutputFunction(CodeGen &codegen,
                                             const Type &type) const = 0;
   virtual const TypeSystem &GetTypeSystem() const = 0;
diff --git a/src/include/codegen/type/timestamp_type.h b/src/include/codegen/type/timestamp_type.h
index b185cc349bf..febc95f1077 100644
--- a/src/include/codegen/type/timestamp_type.h
+++ b/src/include/codegen/type/timestamp_type.h
@@ -33,6 +33,9 @@ class Timestamp : public SqlType, public Singleton<Timestamp> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/tinyint_type.h b/src/include/codegen/type/tinyint_type.h
index ae7cbd86b18..8593dd7b1de 100644
--- a/src/include/codegen/type/tinyint_type.h
+++ b/src/include/codegen/type/tinyint_type.h
@@ -33,6 +33,9 @@ class TinyInt : public SqlType, public Singleton<TinyInt> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/varbinary_type.h b/src/include/codegen/type/varbinary_type.h
index 54974e0a613..b9ad9cd3cf0 100644
--- a/src/include/codegen/type/varbinary_type.h
+++ b/src/include/codegen/type/varbinary_type.h
@@ -33,6 +33,9 @@ class Varbinary : public SqlType, public Singleton<Varbinary> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 
diff --git a/src/include/codegen/type/varchar_type.h b/src/include/codegen/type/varchar_type.h
index 796d493772a..1664a8a10c7 100644
--- a/src/include/codegen/type/varchar_type.h
+++ b/src/include/codegen/type/varchar_type.h
@@ -33,6 +33,9 @@ class Varchar : public SqlType, public Singleton<Varchar> {
   void GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
                                  llvm::Type *&len_type) const override;
 
+  llvm::Function *GetInputFunction(CodeGen &codegen,
+                                   const Type &type) const override;
+
   llvm::Function *GetOutputFunction(CodeGen &codegen,
                                     const Type &type) const override;
 

From 3e3d689b5d70e2e571f453dbfaa1d2955e64cfd1 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 10:36:01 -0400
Subject: [PATCH 12/42] Added test for value integrity

---
 src/codegen/values_runtime.cpp        | 28 +++++-----
 src/include/codegen/values_runtime.h  | 15 ++++--
 test/codegen/value_integrity_test.cpp | 76 +++++++++++++++++++++++++++
 3 files changed, 99 insertions(+), 20 deletions(-)

diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index 1e3324ade4d..dddc0a43ac6 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -116,12 +116,12 @@ namespace {
  * @param[in,out] left A pointer to the leftmost character in the input string
  * @param[in,out] right A pointer to the rightmost character in the input string
  */
-void TrimLeftRight(char *&left, char *&right) {
+void TrimLeftRight(const char *&left, const char *&right) {
   while (*left == ' ') {
     left++;
   }
-  while (*right == ' ') {
-    right++;
+  while (right > left && *(right - 1) == ' ') {
+    right--;
   }
 }
 
@@ -138,11 +138,9 @@ void TrimLeftRight(char *&left, char *&right) {
  */
 template <typename T>
 typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
-    char *ptr, uint32_t len) {
-  char *start = ptr, *end = ptr + len;
-  if (start == end) {
-    // ERROR
-  }
+    const char *ptr, uint32_t len) {
+  const char *start = ptr;
+  const char *end = start + len;
 
   // Trim leading and trailing whitespace
   TrimLeftRight(start, end);
@@ -173,7 +171,7 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
     num = -num;
   }
 
-  // Perform bounds check
+  // Range check
   if (num <= std::numeric_limits<T>::min() ||
       num >= std::numeric_limits<T>::max()) {
     RuntimeFunctions::ThrowOverflowException();
@@ -186,11 +184,11 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
 }  // namespace
 
 bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
-                                 char *ptr, uint32_t len) {
+                                 const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
   PELOTON_ASSERT(len != 0 && "Length must be non-zero");
 
-  char *start = ptr, *end = ptr + len;
+  const char *start = ptr, *end = ptr + len;
 
   // Trim leading and trailing whitespace
   TrimLeftRight(start, end);
@@ -267,28 +265,28 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
 }
 
 int8_t ValuesRuntime::InputTinyInt(UNUSED_ATTRIBUTE const type::Type &type,
-                                   char *ptr, uint32_t len) {
+                                   const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
   PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int8_t>(ptr, len);
 }
 
 int16_t ValuesRuntime::InputSmallInt(UNUSED_ATTRIBUTE const type::Type &type,
-                                     char *ptr, uint32_t len) {
+                                     const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
   PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int16_t>(ptr, len);
 }
 
 int32_t ValuesRuntime::InputInteger(UNUSED_ATTRIBUTE const type::Type &type,
-                                    char *ptr, uint32_t len) {
+                                    const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
   PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int32_t>(ptr, len);
 }
 
 int64_t ValuesRuntime::InputBigInt(UNUSED_ATTRIBUTE const type::Type &type,
-                                   char *ptr, uint32_t len) {
+                                   const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
   PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int64_t>(ptr, len);
diff --git a/src/include/codegen/values_runtime.h b/src/include/codegen/values_runtime.h
index e37396e5aa8..206e9ed9bb2 100644
--- a/src/include/codegen/values_runtime.h
+++ b/src/include/codegen/values_runtime.h
@@ -67,15 +67,20 @@ class ValuesRuntime {
   ////
   //////////////////////////////////////////////////////////////////////////////
 
-  static bool InputBoolean(const type::Type &type, char *ptr, uint32_t len);
+  static bool InputBoolean(const type::Type &type, const char *ptr,
+                           uint32_t len);
 
-  static int8_t InputTinyInt(const type::Type &type, char *ptr, uint32_t len);
+  static int8_t InputTinyInt(const type::Type &type, const char *ptr,
+                             uint32_t len);
 
-  static int16_t InputSmallInt(const type::Type &type, char *ptr, uint32_t len);
+  static int16_t InputSmallInt(const type::Type &type, const char *ptr,
+                               uint32_t len);
 
-  static int32_t InputInteger(const type::Type &type, char *ptr, uint32_t len);
+  static int32_t InputInteger(const type::Type &type, const char *ptr,
+                              uint32_t len);
 
-  static int64_t InputBigInt(const type::Type &type, char *ptr, uint32_t len);
+  static int64_t InputBigInt(const type::Type &type, const char *ptr,
+                             uint32_t len);
 
   /**
    * Compare two strings, returning an integer value indicating their sort order
diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp
index 551e3956e75..4c4ccf97690 100644
--- a/test/codegen/value_integrity_test.cpp
+++ b/test/codegen/value_integrity_test.cpp
@@ -17,6 +17,7 @@
 #include "codegen/type/smallint_type.h"
 #include "codegen/type/integer_type.h"
 #include "codegen/type/bigint_type.h"
+#include "codegen/values_runtime.h"
 
 namespace peloton {
 namespace test {
@@ -161,5 +162,80 @@ TEST_F(ValueIntegrityTest, IntegerDivideByZero) {
   }
 }
 
+namespace {
+
+template <typename T>
+using InputFunc = T (*)(const codegen::type::Type &, const char *, uint32_t);
+
+template <typename T>
+void TestInputIntegral(
+    const codegen::type::Type &type, InputFunc<T> TestFunc,
+    std::vector<std::pair<std::string, int64_t>> extra_valid_tests = {},
+    std::vector<std::string> extra_invalid_tests = {},
+    std::vector<std::string> extra_overflow_tests = {}) {
+  // Default valid tests - these are valid for all integral types
+  std::vector<std::pair<std::string, int64_t>> valid_tests = {{"0", 0},
+                                                              {"-1", -1},
+                                                              {"2", 2},
+                                                              {"+3", 3},
+                                                              {"  4", 4},
+                                                              {"  -5", -5},
+                                                              {"  +6", 6},
+                                                              {"7  ", 7},
+                                                              {"-8  ", -8},
+                                                              {"  9  ", 9},
+                                                              {"  -10  ", -10},
+                                                              {"  +11  ", 11}};
+  valid_tests.insert(valid_tests.end(), extra_valid_tests.begin(),
+                     extra_valid_tests.end());
+
+  // Default invalid tests
+  std::vector<std::string> invalid_tests = {"a",   "-b",  "+c",  " 1c",
+                                            "2d ", "3 3", "-4 4"};
+  invalid_tests.insert(invalid_tests.end(), extra_invalid_tests.begin(),
+                       extra_invalid_tests.end());
+
+  // Default overflow tests
+  std::vector<std::string> overflow_tests = {
+      std::to_string(static_cast<int64_t>(std::numeric_limits<T>::min()) - 1),
+      std::to_string(static_cast<int64_t>(std::numeric_limits<T>::max()) + 1)};
+  overflow_tests.insert(overflow_tests.end(), extra_overflow_tests.begin(),
+                        extra_overflow_tests.end());
+
+  for (const auto &test : valid_tests) {
+    auto *ptr = test.first.data();
+    auto len = static_cast<uint32_t>(test.first.length());
+    EXPECT_EQ(test.second, TestFunc(type, ptr, len));
+  }
+
+  for (const auto &test : invalid_tests) {
+    auto *ptr = test.data();
+    auto len = static_cast<uint32_t>(test.length());
+    EXPECT_THROW(TestFunc(type, ptr, len), std::runtime_error);
+  }
+
+  for (const auto &test : overflow_tests) {
+    auto *ptr = test.data();
+    auto len = static_cast<uint32_t>(test.length());
+    EXPECT_THROW(TestFunc(type, ptr, len), std::overflow_error);
+  }
+}
+}  // namespace
+
+TEST_F(ValueIntegrityTest, InputIntegralTypesTest) {
+  codegen::type::Type tinyint{type::TypeId::TINYINT, false};
+  TestInputIntegral<int8_t>(tinyint, codegen::ValuesRuntime::InputTinyInt,
+                            {{"-126", -126}, {"126", 126}});
+
+  codegen::type::Type smallint{type::TypeId::SMALLINT, false};
+  TestInputIntegral<int16_t>(smallint, codegen::ValuesRuntime::InputSmallInt);
+
+  codegen::type::Type integer{type::TypeId::INTEGER, false};
+  TestInputIntegral<int32_t>(integer, codegen::ValuesRuntime::InputInteger);
+
+  codegen::type::Type bigint{type::TypeId::BIGINT, false};
+  TestInputIntegral<int64_t>(bigint, codegen::ValuesRuntime::InputBigInt);
+}
+
 }  // namespace test
 }  // namespace peloton
\ No newline at end of file

From 473b9b423ef5b6110c45b9df32dc3b4419e6e04c Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 30 Apr 2018 22:11:34 -0400
Subject: [PATCH 13/42] First take at CSV Scan translator

---
 src/codegen/codegen.cpp                       |  21 ++-
 src/codegen/operator/csv_scan_translator.cpp  | 171 ++++++++++++++++++
 src/codegen/proxy/csv_scanner_proxy.cpp       |  31 ++++
 src/codegen/query_compiler.cpp                |   3 +-
 src/codegen/translator_factory.cpp            |   7 +
 src/codegen/type/type.cpp                     |  13 +-
 src/codegen/util/csv_scanner.cpp              |  75 ++++++++
 src/include/codegen/codegen.h                 |   8 +-
 .../codegen/operator/csv_scan_translator.h    |  74 ++++++++
 src/include/codegen/proxy/csv_scanner_proxy.h |  47 +++++
 src/include/codegen/type/type.h               |  16 ++
 src/include/codegen/util/csv_scanner.h        |  65 +++++++
 src/include/planner/aggregate_plan.h          |   2 +-
 src/include/planner/insert_plan.h             |   3 -
 src/planner/aggregate_plan.cpp                |  48 ++---
 src/planner/insert_plan.cpp                   |  13 +-
 16 files changed, 545 insertions(+), 52 deletions(-)
 create mode 100644 src/codegen/operator/csv_scan_translator.cpp
 create mode 100644 src/codegen/proxy/csv_scanner_proxy.cpp
 create mode 100644 src/codegen/util/csv_scanner.cpp
 create mode 100644 src/include/codegen/operator/csv_scan_translator.h
 create mode 100644 src/include/codegen/proxy/csv_scanner_proxy.h
 create mode 100644 src/include/codegen/util/csv_scanner.h

diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index b6449ae4138..6a96a0f7542 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -60,7 +60,7 @@ llvm::Constant *CodeGen::ConstDouble(double val) const {
 }
 
 llvm::Value *CodeGen::ConstString(const std::string &str_val,
-                                     const std::string &name) const {
+                                  const std::string &name) const {
   // Strings are treated as arrays of bytes
   auto *str = llvm::ConstantDataArray::getString(GetContext(), str_val);
   auto *global_var =
@@ -69,8 +69,18 @@ llvm::Value *CodeGen::ConstString(const std::string &str_val,
   return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)});
 }
 
-llvm::Value *CodeGen::ConstGenericBytes(llvm::Type *type, const void *data,
-                                        uint32_t length,
+llvm::Value *CodeGen::ConstType(const type::Type &type) {
+  auto iter = type_variables_.find(type);
+  if (iter != type_variables_.end()) {
+    return iter->second;
+  }
+  const type::Type t = type;
+  llvm::Value *ret = ConstGenericBytes(&type, sizeof(type), "type");
+  type_variables_.insert(std::make_pair(t, ret));
+  return ret;
+}
+
+llvm::Value *CodeGen::ConstGenericBytes(const void *data, uint32_t length,
                                         const std::string &name) const {
   // Create the constant data array that wraps the input data
   llvm::ArrayRef<uint8_t> elements{reinterpret_cast<const uint8_t *>(data),
@@ -78,8 +88,9 @@ llvm::Value *CodeGen::ConstGenericBytes(llvm::Type *type, const void *data,
   auto *arr = llvm::ConstantDataArray::get(GetContext(), elements);
 
   // Create a global variable for the data
-  auto *global_var = new llvm::GlobalVariable(
-      GetModule(), type, true, llvm::GlobalValue::InternalLinkage, arr, name);
+  auto *global_var =
+      new llvm::GlobalVariable(GetModule(), arr->getType(), true,
+                               llvm::GlobalValue::InternalLinkage, arr, name);
 
   // Return a pointer to the first element
   return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)});
diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
new file mode 100644
index 00000000000..e38525ada35
--- /dev/null
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -0,0 +1,171 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scan_translator.cpp
+//
+// Identification: src/codegen/operator/csv_scan_translator.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/operator/csv_scan_translator.h"
+
+#include "codegen/compilation_context.h"
+#include "codegen/function_builder.h"
+#include "codegen/lang/if.h"
+#include "codegen/operator/projection_translator.h"
+#include "codegen/pipeline.h"
+#include "codegen/proxy/csv_scanner_proxy.h"
+#include "codegen/proxy/runtime_functions_proxy.h"
+#include "codegen/type/sql_type.h"
+#include "planner/csv_scan_plan.h"
+
+namespace peloton {
+namespace codegen {
+
+CSVScanTranslator::CSVScanTranslator(const planner::CSVScanPlan &scan,
+                                     CompilationContext &context,
+                                     Pipeline &pipeline)
+    : OperatorTranslator(context, pipeline), scan_(scan) {
+  auto &runtime_state = context.GetRuntimeState();
+  scanner_id_ = runtime_state.RegisterState(
+      "csvScanner", CSVScannerProxy::GetType(GetCodeGen()));
+}
+
+void CSVScanTranslator::InitializeState() {
+  auto &codegen = GetCodeGen();
+
+  // Arguments
+  auto *scanner_ptr = LoadStatePtr(scanner_id_);
+  auto *file_path = codegen.ConstString(scan_.GetFileName(), "filePath");
+  auto *output_col_types = ConstructColumnDescriptor();
+  auto *runtime_state_ptr = codegen->CreatePointerCast(
+      codegen.GetState(), codegen.VoidType()->getPointerTo());
+
+  std::vector<oid_t> out_cols;
+  scan_.GetOutputColumns(out_cols);
+  auto *num_output_cols =
+      codegen.Const32(static_cast<uint32_t>(out_cols.size()));
+
+  auto *consumer_func = codegen->CreatePointerCast(
+      consumer_func_, proxy::TypeBuilder<void(*)(void *)>::GetType(codegen));
+
+  // Call
+  codegen.Call(CSVScannerProxy::Init,
+               {scanner_ptr, file_path, output_col_types, num_output_cols,
+                consumer_func, runtime_state_ptr});
+}
+
+void CSVScanTranslator::DefineAuxiliaryFunctions() {
+  // Define consumer function here
+  CodeGen &codegen = GetCodeGen();
+  CompilationContext &cc = GetCompilationContext();
+
+  std::vector<FunctionDeclaration::ArgumentInfo> arg_types = {
+      {"runtimeState",
+       cc.GetRuntimeState().FinalizeType(codegen)->getPointerTo()}};
+  codegen::FunctionDeclaration decl{codegen.GetCodeContext(), "consumer",
+                                    FunctionDeclaration::Visibility::Internal,
+                                    codegen.VoidType(), arg_types};
+  codegen::FunctionBuilder scan_consumer{codegen.GetCodeContext(), decl};
+  {
+    ConsumerContext ctx{cc, GetPipeline()};
+
+    Vector v{nullptr, 1, nullptr};
+    RowBatch one{GetCompilationContext(), codegen.Const32(0),
+                 codegen.Const32(1), v, false};
+    RowBatch::Row row{one, nullptr, nullptr};
+
+    // Get the attributes
+    std::vector<const planner::AttributeInfo *> output_attributes;
+    scan_.GetAttributes(output_attributes);
+
+    // Load the pointer to the columns view
+    auto *cols = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
+        CSVScannerProxy::GetType(codegen), LoadStatePtr(scanner_id_), 0, 4));
+
+    // For each column, call the type's input function to read the input value
+    for (uint32_t i = 0; i < output_attributes.size(); i++) {
+      const auto *output_ai = output_attributes[i];
+
+      const auto &sql_type = output_ai->type.GetSqlType();
+
+      auto *is_null = codegen->CreateConstInBoundsGEP2_32(
+          CSVScannerColumnProxy::GetType(codegen), cols, i, 3);
+
+      codegen::Value val, null_val;
+      lang::If not_null{codegen,
+                        codegen->CreateNot(codegen->CreateLoad(is_null))};
+      {
+        // Grab a pointer to the ptr and length
+        auto *type = codegen->CreatePointerCast(
+            codegen.ConstType(output_ai->type),
+            TypeProxy::GetType(codegen)->getPointerTo());
+        auto *ptr = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
+            CSVScannerColumnProxy::GetType(codegen), cols, i, 1));
+        auto *len = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
+            CSVScannerColumnProxy::GetType(codegen), cols, i, 2));
+
+        // Invoke the input function
+        auto *input_func = sql_type.GetInputFunction(codegen, output_ai->type);
+        auto *raw_val = codegen.CallFunc(input_func, {type, ptr, len});
+
+        // Non-null value
+        val = codegen::Value{output_ai->type, raw_val, nullptr,
+                             codegen.ConstBool(false)};
+      }
+      not_null.ElseBlock();
+      {
+        // Null value
+        null_val = sql_type.GetNullValue(codegen);
+      }
+      not_null.EndIf();
+
+      codegen::Value final_val = not_null.BuildPHI(val, null_val);
+      row.RegisterAttributeValue(output_ai, final_val);
+    }
+
+    ctx.Consume(row);
+    scan_consumer.ReturnAndFinish();
+  }
+  consumer_func_ = scan_consumer.GetFunction();
+}
+
+void CSVScanTranslator::Produce() const {
+  auto *scanner_ptr = LoadStatePtr(scanner_id_);
+  GetCodeGen().Call(CSVScannerProxy::Produce, {scanner_ptr});
+}
+
+void CSVScanTranslator::TearDownState() {
+  auto *scanner_ptr = LoadStatePtr(scanner_id_);
+  GetCodeGen().Call(CSVScannerProxy::Destroy, {scanner_ptr});
+}
+
+std::string CSVScanTranslator::GetName() const {
+  return std::__cxx11::string();
+}
+
+llvm::Value *CSVScanTranslator::ConstructColumnDescriptor() const {
+  // First, we pull out all the attributes produced by the scan, in order
+  std::vector<const planner::AttributeInfo *> cols;
+  scan_.GetAttributes(cols);
+
+  // But, what we really need are just the column types, so pull those out now
+  std::vector<codegen::type::Type> col_types_vec;
+  for (const auto *col : cols) {
+    col_types_vec.push_back(col->type);
+  }
+
+  CodeGen &codegen = GetCodeGen();
+
+  auto num_bytes = cols.size() * sizeof(decltype(col_types_vec)::value_type);
+  auto *bytes = codegen.ConstGenericBytes(
+      col_types_vec.data(), static_cast<uint32_t>(num_bytes), "colTypes");
+  return codegen->CreatePointerCast(
+      bytes, TypeProxy::GetType(codegen)->getPointerTo());
+}
+
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/codegen/proxy/csv_scanner_proxy.cpp b/src/codegen/proxy/csv_scanner_proxy.cpp
new file mode 100644
index 00000000000..89b4b7dca16
--- /dev/null
+++ b/src/codegen/proxy/csv_scanner_proxy.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scanner_proxy.cpp
+//
+// Identification: src/codegen/proxy/csv_scanner_proxy.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/proxy/csv_scanner_proxy.h"
+
+#include "codegen/proxy/runtime_functions_proxy.h"
+
+namespace peloton {
+namespace codegen {
+
+DEFINE_TYPE(CSVScanner, "util::CSVScanner", MEMBER(file_path), MEMBER(callback),
+            MEMBER(opaque_callback_state), MEMBER(cols), MEMBER(cols_view));
+
+DEFINE_TYPE(CSVScannerColumn, "util::CSVScanner::Column", MEMBER(type),
+            MEMBER(ptr),  MEMBER(len), MEMBER(is_null));
+
+DEFINE_METHOD(peloton::codegen::util, CSVScanner, Init);
+DEFINE_METHOD(peloton::codegen::util, CSVScanner, Destroy);
+DEFINE_METHOD(peloton::codegen::util, CSVScanner, Produce);
+
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/codegen/query_compiler.cpp b/src/codegen/query_compiler.cpp
index 104e4f5783a..d6aa9912d51 100644
--- a/src/codegen/query_compiler.cpp
+++ b/src/codegen/query_compiler.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/query_compiler.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -46,6 +46,7 @@ std::unique_ptr<Query> QueryCompiler::Compile(
 bool QueryCompiler::IsSupported(const planner::AbstractPlan &plan) {
   switch (plan.GetPlanNodeType()) {
     case PlanNodeType::SEQSCAN:
+    case PlanNodeType::CSVSCAN:
     case PlanNodeType::ORDERBY:
     case PlanNodeType::DELETE:
     case PlanNodeType::INSERT:
diff --git a/src/codegen/translator_factory.cpp b/src/codegen/translator_factory.cpp
index f10fd863033..15b9dab7e7a 100644
--- a/src/codegen/translator_factory.cpp
+++ b/src/codegen/translator_factory.cpp
@@ -23,6 +23,7 @@
 #include "codegen/expression/parameter_translator.h"
 #include "codegen/expression/tuple_value_translator.h"
 #include "codegen/operator/block_nested_loop_join_translator.h"
+#include "codegen/operator/csv_scan_translator.h"
 #include "codegen/operator/delete_translator.h"
 #include "codegen/operator/global_group_by_translator.h"
 #include "codegen/operator/hash_group_by_translator.h"
@@ -42,6 +43,7 @@
 #include "expression/operator_expression.h"
 #include "expression/tuple_value_expression.h"
 #include "planner/aggregate_plan.h"
+#include "planner/csv_scan_plan.h"
 #include "planner/delete_plan.h"
 #include "planner/hash_join_plan.h"
 #include "planner/hash_plan.h"
@@ -68,6 +70,11 @@ std::unique_ptr<OperatorTranslator> TranslatorFactory::CreateTranslator(
       translator = new TableScanTranslator(scan, context, pipeline);
       break;
     }
+    case PlanNodeType::CSVSCAN: {
+      auto &scan = static_cast<const planner::CSVScanPlan &>(plan_node);
+      translator = new CSVScanTranslator(scan, context, pipeline);
+      break;
+    }
     case PlanNodeType::PROJECTION: {
       auto &projection =
           static_cast<const planner::ProjectionPlan &>(plan_node);
diff --git a/src/codegen/type/type.cpp b/src/codegen/type/type.cpp
index ed8425302ee..9b4e8e7cf1b 100644
--- a/src/codegen/type/type.cpp
+++ b/src/codegen/type/type.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/type/type.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,15 +18,20 @@ namespace peloton {
 namespace codegen {
 namespace type {
 
-Type::Type() : Type(peloton::type::TypeId::INVALID, false) {}
-
 Type::Type(peloton::type::TypeId type_id, bool _nullable)
-    : type_id(type_id), nullable(_nullable) {}
+    : type_id(type_id), nullable(_nullable) {
+  aux_info.varlen = 0;
+  aux_info.numeric_info.precision = 0;
+  aux_info.numeric_info.scale = 0;
+}
+
+Type::Type() : Type(peloton::type::TypeId::INVALID, false) {}
 
 Type::Type(const SqlType &sql_type, bool _nullable)
     : Type(sql_type.TypeId(), _nullable) {}
 
 bool Type::operator==(const Type &other) const {
+  // TODO(pmenon): This isn't correct; we need to check all other fields ...
   return type_id == other.type_id;
 }
 
diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
new file mode 100644
index 00000000000..544269c82bd
--- /dev/null
+++ b/src/codegen/util/csv_scanner.cpp
@@ -0,0 +1,75 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scanner.cpp
+//
+// Identification: src/codegen/util/csv_scanner.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/util/csv_scanner.h"
+
+#include <boost/filesystem.hpp>
+
+#include "common/exception.h"
+#include "util/string_util.h"
+
+namespace peloton {
+namespace codegen {
+namespace util {
+
+CSVScanner::CSVScanner(const std::string &file_path,
+                       const codegen::type::Type *col_types, uint32_t num_cols,
+                       CSVScanner::Callback func, void *opaque_state)
+    : file_path_(file_path), func_(func), opaque_state_(opaque_state) {
+  // Initialize the columns
+  cols_.resize(num_cols);
+  for (uint32_t i = 0; i < num_cols; i++) {
+    cols_[i].col_type = col_types[i];
+    cols_[i].ptr = nullptr;
+    cols_[i].is_null = false;
+  }
+
+  // Setup the view. Since the Column's vector will never be resized after this
+  // point (it isn't possible to add or remove columns once the scan has been
+  // constructed), grabbing a pointer to the underlying array is safe for the
+  // lifetime of this scanner.
+  cols_view_ = cols_.data();
+}
+
+CSVScanner::~CSVScanner() {}
+
+void CSVScanner::Init(CSVScanner &scanner, const char *file_path,
+                      const codegen::type::Type *col_types, uint32_t num_cols,
+                      CSVScanner::Callback func, void *opaque_state) {
+  new (&scanner) CSVScanner(file_path, col_types, num_cols, func, opaque_state);
+}
+
+void CSVScanner::Destroy(CSVScanner &scanner) { scanner.~CSVScanner(); }
+
+void CSVScanner::Produce() { InitializeScan(); }
+
+void CSVScanner::InitializeScan() {
+  // Validity checks
+  if (!boost::filesystem::exists(file_path_)) {
+    throw ExecutorException{StringUtil::Format(
+        "ERROR: input path '%s' does not exist", file_path_.c_str())};
+  }
+
+  if (!boost::filesystem::is_directory(file_path_)) {
+    throw ExecutorException{StringUtil::Format(
+        "ERROR: input '%s' is a directory, not a file", file_path_.c_str())};
+  }
+
+  if (!boost::filesystem::is_regular_file(file_path_)) {
+    throw ExecutorException{StringUtil::Format(
+        "ERROR: unable to read file '%s'", file_path_.c_str())};
+  }
+}
+
+}  // namespace util
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h
index 09edae81900..3dceb820715 100644
--- a/src/include/codegen/codegen.h
+++ b/src/include/codegen/codegen.h
@@ -16,6 +16,7 @@
 #include <vector>
 
 #include "codegen/code_context.h"
+#include "codegen/type/type.h"
 
 namespace peloton {
 namespace codegen {
@@ -97,8 +98,8 @@ class CodeGen {
   llvm::Constant *ConstDouble(double val) const;
   llvm::Value *ConstString(const std::string &str_val,
                            const std::string &name) const;
-  llvm::Value *ConstGenericBytes(llvm::Type *type, const void *data,
-                                 uint32_t length,
+  llvm::Value *ConstType(const type::Type &type);
+  llvm::Value *ConstGenericBytes(const void *data, uint32_t length,
                                  const std::string &name) const;
   llvm::Constant *Null(llvm::Type *type) const;
   llvm::Constant *NullPtr(llvm::PointerType *type) const;
@@ -192,6 +193,9 @@ class CodeGen {
  private:
   // The context/module where all the code this class produces goes
   CodeContext &code_context_;
+
+  std::unordered_map<type::Type, llvm::Value *, type::TypeHasher,
+                     type::TypeEquality> type_variables_;
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/operator/csv_scan_translator.h b/src/include/codegen/operator/csv_scan_translator.h
new file mode 100644
index 00000000000..12e132ab4ce
--- /dev/null
+++ b/src/include/codegen/operator/csv_scan_translator.h
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scan_translator.h
+//
+// Identification: src/include/codegen/operator/csv_scan_translator.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "codegen/operator/operator_translator.h"
+
+namespace peloton {
+
+namespace planner {
+class CSVScanPlan;
+}  // namespace planner
+
+namespace codegen {
+class CompilationContext;
+class Pipeline;
+}  // namespace codegen
+
+namespace codegen {
+
+//===----------------------------------------------------------------------===//
+// A translator for CSV file scans
+//===----------------------------------------------------------------------===//
+class CSVScanTranslator : public OperatorTranslator {
+ public:
+  // Constructor
+  CSVScanTranslator(const planner::CSVScanPlan &scan,
+                    CompilationContext &context, Pipeline &pipeline);
+
+  void InitializeState() override;
+
+  void DefineAuxiliaryFunctions() override;
+
+  // The method that produces new tuples
+  void Produce() const override;
+
+  // Scans are leaves in the query plan and, hence, do not consume tuples
+  void Consume(ConsumerContext &, RowBatch &) const override {}
+  void Consume(ConsumerContext &, RowBatch::Row &) const override {}
+
+  // Similar to InitializeState(), file scans don't have any state
+  void TearDownState() override;
+
+  // Get a stringified version of this translator
+  std::string GetName() const override;
+
+ private:
+  // Plan accessor
+  const planner::CSVScanPlan &GetScanPlan() const { return scan_; }
+
+  llvm::Value *ConstructColumnDescriptor() const;
+
+ private:
+  // The scan
+  const planner::CSVScanPlan &scan_;
+
+  // The scanner state ID
+  RuntimeState::StateID scanner_id_;
+
+  // The generated CSV scan consumer function
+  llvm::Function *consumer_func_;
+};
+
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/include/codegen/proxy/csv_scanner_proxy.h b/src/include/codegen/proxy/csv_scanner_proxy.h
new file mode 100644
index 00000000000..c31d871ff74
--- /dev/null
+++ b/src/include/codegen/proxy/csv_scanner_proxy.h
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scanner_proxy.h
+//
+// Identification: src/include/codegen/proxy/csv_scanner_proxy.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "codegen/proxy/proxy.h"
+#include "codegen/proxy/type_builder.h"
+#include "codegen/util/csv_scanner.h"
+
+namespace peloton {
+namespace codegen {
+
+PROXY(CSVScannerColumn) {
+  DECLARE_MEMBER(0, char[sizeof(type::Type)], type);
+  DECLARE_MEMBER(1, char *, ptr);
+  DECLARE_MEMBER(2, uint32_t, len);
+  DECLARE_MEMBER(3, bool, is_null);
+  DECLARE_TYPE;
+};
+
+PROXY(CSVScanner) {
+  DECLARE_MEMBER(0, char[sizeof(std::string)], file_path);
+  DECLARE_MEMBER(1, char[sizeof(util::CSVScanner::Callback)], callback);
+  DECLARE_MEMBER(2, void *, opaque_callback_state);
+  DECLARE_MEMBER(3, char[sizeof(std::vector<util::CSVScanner::Column>)], cols);
+  DECLARE_MEMBER(4, util::CSVScanner::Column *, cols_view);
+  DECLARE_TYPE;
+
+  DECLARE_METHOD(Init);
+  DECLARE_METHOD(Destroy);
+  DECLARE_METHOD(Produce);
+};
+
+TYPE_BUILDER(CSVScanner, codegen::util::CSVScanner);
+TYPE_BUILDER(CSVScannerColumn, codegen::util::CSVScanner::Column);
+
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/include/codegen/type/type.h b/src/include/codegen/type/type.h
index d636d7d6572..1f485ad4e0e 100644
--- a/src/include/codegen/type/type.h
+++ b/src/include/codegen/type/type.h
@@ -15,6 +15,7 @@
 #include <cstdint>
 
 #include "type/type_id.h"
+#include "util/hash_util.h"
 
 namespace peloton {
 namespace codegen {
@@ -78,6 +79,21 @@ class Type {
   Type AsNonNullable() const;
 };
 
+struct TypeHasher {
+  std::size_t operator()(const type::Type &type) const {
+    // TODO: hash the other parts
+    auto hash = HashUtil::Hash(&type.type_id);
+    hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&type.nullable));
+    return hash;
+  }
+};
+
+struct TypeEquality {
+  bool operator()(const type::Type &l, const type::Type &r) const {
+    return l == r;
+  }
+};
+
 }  // namespace type
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h
new file mode 100644
index 00000000000..711a3d13535
--- /dev/null
+++ b/src/include/codegen/util/csv_scanner.h
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scanner.h
+//
+// Identification: src/include/codegen/util/csv_scanner.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "codegen/type/type.h"
+
+namespace peloton {
+namespace codegen {
+namespace util {
+
+class CSVScanner {
+ public:
+  using Callback = void (*)(void *);
+
+  struct Column {
+    codegen::type::Type col_type;
+    char *ptr;
+    uint32_t len;
+    bool is_null;
+  };
+
+  CSVScanner(const std::string &file_path, const codegen::type::Type *col_types,
+             uint32_t num_cols, Callback func, void *opaque_state);
+
+  ~CSVScanner();
+
+  static void Init(CSVScanner &scanner, const char *file_path,
+                   const codegen::type::Type *col_types, uint32_t num_cols,
+                   Callback func, void *opaque_state);
+
+  static void Destroy(CSVScanner &scanner);
+
+  void Produce();
+
+ private:
+  void InitializeScan();
+
+ private:
+  // The file
+  const std::string file_path_;
+
+  // The callback function and opaque state
+  Callback func_;
+  void *opaque_state_;
+
+  std::vector<Column> cols_;
+  Column *cols_view_;
+};
+
+}  // namespace util
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/include/planner/aggregate_plan.h b/src/include/planner/aggregate_plan.h
index 56c0e99a6b6..51d9d8cfe42 100644
--- a/src/include/planner/aggregate_plan.h
+++ b/src/include/planner/aggregate_plan.h
@@ -41,7 +41,7 @@ class AggregatePlan : public AbstractPlan {
             bool distinct = false);
 
     // Bindings
-    void PerformBinding(BindingContext &binding_context);
+    void PerformBinding(bool is_global, BindingContext &binding_context);
 
     AggTerm Copy() const;
   };
diff --git a/src/include/planner/insert_plan.h b/src/include/planner/insert_plan.h
index 54072e76b3d..7c2bc212e55 100644
--- a/src/include/planner/insert_plan.h
+++ b/src/include/planner/insert_plan.h
@@ -70,9 +70,6 @@ class InsertPlan : public AbstractPlan {
                  std::vector<std::unique_ptr<expression::AbstractExpression>>> *
                  insert_values);
 
-  // Get a varlen pool - will construct the pool only if needed
-  type::AbstractPool *GetPlanPool();
-
   PlanNodeType GetPlanNodeType() const override {
     return PlanNodeType::INSERT;
   };
diff --git a/src/planner/aggregate_plan.cpp b/src/planner/aggregate_plan.cpp
index 8aad13b3edf..26f3a7e9d19 100644
--- a/src/planner/aggregate_plan.cpp
+++ b/src/planner/aggregate_plan.cpp
@@ -24,7 +24,8 @@ AggregatePlan::AggTerm::AggTerm(ExpressionType et,
                                 bool distinct)
     : aggtype(et), expression(expr), distinct(distinct) {}
 
-void AggregatePlan::AggTerm::PerformBinding(BindingContext &binding_context) {
+void AggregatePlan::AggTerm::PerformBinding(bool is_global,
+                                            BindingContext &binding_context) {
   // If there's an input expression, first perform binding
   auto *agg_expr = const_cast<expression::AbstractExpression *>(expression);
   if (agg_expr != nullptr) {
@@ -47,7 +48,7 @@ void AggregatePlan::AggTerm::PerformBinding(BindingContext &binding_context) {
       // TODO: Move this logic into the SQL type
       const auto &input_type = expression->ResultType();
       agg_ai.type = codegen::type::Type{codegen::type::Decimal::Instance(),
-                                        input_type.nullable};
+                                        input_type.nullable || is_global};
       break;
     }
     case ExpressionType::AGGREGATE_MAX:
@@ -57,6 +58,9 @@ void AggregatePlan::AggTerm::PerformBinding(BindingContext &binding_context) {
       // return type as its input expression.
       PELOTON_ASSERT(expression != nullptr);
       agg_ai.type = expression->ResultType();
+      if (is_global) {
+        agg_ai.type = agg_ai.type.AsNullable();
+      }
       break;
     }
     default: {
@@ -93,7 +97,7 @@ void AggregatePlan::PerformBinding(BindingContext &binding_context) {
   // Now let the aggregate expressions do their bindings
   for (const auto &agg_term : GetUniqueAggTerms()) {
     auto &non_const_agg_term = const_cast<AggregatePlan::AggTerm &>(agg_term);
-    non_const_agg_term.PerformBinding(input_context);
+    non_const_agg_term.PerformBinding(IsGlobal(), input_context);
   }
 
   // Handle the projection by creating two binding contexts, the first being
@@ -117,8 +121,6 @@ void AggregatePlan::PerformBinding(BindingContext &binding_context) {
     const_cast<expression::AbstractExpression *>(predicate)
         ->PerformBinding({&binding_context});
   }
-
-
 }
 
 hash_t AggregatePlan::Hash(
@@ -165,27 +167,22 @@ hash_t AggregatePlan::Hash() const {
 bool AggregatePlan::AreEqual(
     const std::vector<planner::AggregatePlan::AggTerm> &A,
     const std::vector<planner::AggregatePlan::AggTerm> &B) const {
-  if (A.size() != B.size())
-    return false;
+  if (A.size() != B.size()) return false;
 
   for (size_t i = 0; i < A.size(); i++) {
-    if (A[i].aggtype != B[i].aggtype)
-      return false;
+    if (A[i].aggtype != B[i].aggtype) return false;
 
     auto *expr = A[i].expression;
 
-    if (expr && (*expr != *B[i].expression))
-      return false;
+    if (expr && (*expr != *B[i].expression)) return false;
 
-    if (A[i].distinct != B[i].distinct)
-      return false;
+    if (A[i].distinct != B[i].distinct) return false;
   }
   return true;
 }
 
 bool AggregatePlan::operator==(const AbstractPlan &rhs) const {
-  if (GetPlanNodeType() != rhs.GetPlanNodeType())
-    return false;
+  if (GetPlanNodeType() != rhs.GetPlanNodeType()) return false;
 
   auto &other = static_cast<const planner::AggregatePlan &>(rhs);
 
@@ -195,12 +192,10 @@ bool AggregatePlan::operator==(const AbstractPlan &rhs) const {
   if ((pred == nullptr && other_pred != nullptr) ||
       (pred != nullptr && other_pred == nullptr))
     return false;
-  if (pred && *pred != *other_pred)
-    return false;
+  if (pred && *pred != *other_pred) return false;
 
   // UniqueAggTerms
-  if (!AreEqual(GetUniqueAggTerms(), other.GetUniqueAggTerms()))
-    return false;
+  if (!AreEqual(GetUniqueAggTerms(), other.GetUniqueAggTerms())) return false;
 
   // Project Info
   auto *proj_info = GetProjectInfo();
@@ -208,24 +203,19 @@ bool AggregatePlan::operator==(const AbstractPlan &rhs) const {
   if ((proj_info == nullptr && other_proj_info != nullptr) ||
       (proj_info != nullptr && other_proj_info == nullptr))
     return false;
-  if (proj_info && *proj_info != *other_proj_info)
-    return false;
+  if (proj_info && *proj_info != *other_proj_info) return false;
 
   // Group by
   size_t group_by_col_ids_count = GetGroupbyColIds().size();
-  if (group_by_col_ids_count != other.GetGroupbyColIds().size())
-    return false;
+  if (group_by_col_ids_count != other.GetGroupbyColIds().size()) return false;
 
   for (size_t i = 0; i < group_by_col_ids_count; i++) {
-    if (GetGroupbyColIds()[i] != other.GetGroupbyColIds()[i])
-      return false;
+    if (GetGroupbyColIds()[i] != other.GetGroupbyColIds()[i]) return false;
   }
 
-  if (*GetOutputSchema() != *other.GetOutputSchema())
-    return false;
+  if (*GetOutputSchema() != *other.GetOutputSchema()) return false;
 
-  if (GetAggregateStrategy() != other.GetAggregateStrategy())
-    return false;
+  if (GetAggregateStrategy() != other.GetAggregateStrategy()) return false;
 
   return (AbstractPlan::operator==(rhs));
 }
diff --git a/src/planner/insert_plan.cpp b/src/planner/insert_plan.cpp
index ff0965c8b6a..c8f0a8cc40a 100644
--- a/src/planner/insert_plan.cpp
+++ b/src/planner/insert_plan.cpp
@@ -205,11 +205,6 @@ void InsertPlan::SetDefaultValue(uint32_t idx) {
     values_.push_back(*v);
 }
 
-type::AbstractPool *InsertPlan::GetPlanPool() {
-  if (pool_.get() == nullptr) pool_.reset(new type::EphemeralPool());
-  return pool_.get();
-}
-
 void InsertPlan::SetParameterValues(std::vector<type::Value> *values) {
   LOG_TRACE("Set Parameter Values in Insert");
   auto *schema = target_table_->GetSchema();
@@ -236,15 +231,19 @@ void InsertPlan::PerformBinding(BindingContext &binding_context) {
   const auto &children = GetChildren();
 
   if (children.size() == 1) {
+    // Let child bind
     children[0]->PerformBinding(binding_context);
 
+    // Pull out what we need
     auto *scan = static_cast<planner::AbstractScan *>(children[0].get());
-    auto &col_ids = scan->GetColumnIds();
+
+    std::vector<oid_t> col_ids;
+    scan->GetOutputColumns(col_ids);
+
     for (oid_t col_id = 0; col_id < col_ids.size(); col_id++) {
       ais_.push_back(binding_context.Find(col_id));
     }
   }
-  // Binding is not required if there is no child
 }
 
 hash_t InsertPlan::Hash() const {

From 852fd42c44cd5d4457fc10e6bacf4eaf7a54b8e0 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 1 May 2018 16:48:52 -0400
Subject: [PATCH 14/42] Fix after rebase

---
 src/catalog/abstract_catalog.cpp                | 1 +
 src/optimizer/query_to_operator_transformer.cpp | 7 ++++++-
 test/optimizer/optimizer_test.cpp               | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/catalog/abstract_catalog.cpp b/src/catalog/abstract_catalog.cpp
index 53c0b938279..9d9934a7c61 100644
--- a/src/catalog/abstract_catalog.cpp
+++ b/src/catalog/abstract_catalog.cpp
@@ -35,6 +35,7 @@
 #include "executor/plan_executor.h"
 #include "executor/seq_scan_executor.h"
 #include "executor/update_executor.h"
+#include "expression/constant_value_expression.h"
 
 #include "storage/database.h"
 #include "storage/storage_manager.h"
diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp
index f5f05d6c6aa..816ef24a7fb 100644
--- a/src/optimizer/query_to_operator_transformer.cpp
+++ b/src/optimizer/query_to_operator_transformer.cpp
@@ -361,6 +361,10 @@ void QueryToOperatorTransformer::Visit(parser::UpdateStatement *op) {
 }
 void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
   if (op->is_from) {
+    // The copy statement is reading from a file into a table. We construct a
+    // logical external-file get operator as the leaf, and an insert operator
+    // as the root.
+
     auto get_op =
         std::make_shared<OperatorExpression>(LogicalExternalFileGet::make(
             GetAndIncreaseGetId(), op->format, op->file_path));
@@ -368,7 +372,8 @@ void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
     auto target_table =
         catalog::Catalog::GetInstance()
             ->GetDatabaseObject(op->table->GetDatabaseName(), txn_)
-            ->GetTableObject(op->table->GetTableName());
+            ->GetTableObject(op->table->GetTableName(),
+                             op->table->GetSchemaName());
 
     auto insert_expr = std::make_shared<OperatorExpression>(
         LogicalInsertSelect::make(target_table));
diff --git a/test/optimizer/optimizer_test.cpp b/test/optimizer/optimizer_test.cpp
index 8b5ed1e0ec7..50696017bb5 100644
--- a/test/optimizer/optimizer_test.cpp
+++ b/test/optimizer/optimizer_test.cpp
@@ -20,6 +20,7 @@
 #include "executor/create_executor.h"
 #include "executor/insert_executor.h"
 #include "executor/plan_executor.h"
+#include "expression/constant_value_expression.h"
 #include "expression/tuple_value_expression.h"
 #include "optimizer/mock_task.h"
 #include "optimizer/operators.h"

From b4906df32984864e55c92d830143a7c8840be326 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 7 May 2018 23:57:10 -0400
Subject: [PATCH 15/42] file api

---
 src/include/util/file.h |  74 ++++++++++++++++++++++++
 src/util/file.cpp       | 125 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 199 insertions(+)
 create mode 100644 src/include/util/file.h
 create mode 100644 src/util/file.cpp

diff --git a/src/include/util/file.h b/src/include/util/file.h
new file mode 100644
index 00000000000..6bf35850674
--- /dev/null
+++ b/src/include/util/file.h
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// file.h
+//
+// Identification: src/include/util/file.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <fcntl.h>
+#include <memory>
+#include <string>
+
+#include "common/exception.h"
+
+namespace peloton {
+namespace util {
+
+class File {
+ public:
+  enum class AccessMode : uint8_t { ReadOnly, WriteOnly, ReadWrite };
+
+  File() : fd_(kInvalid) {}
+
+  ~File() { Close(); }
+
+  // Move
+  File(File &&other) noexcept : fd_(kInvalid) { std::swap(fd_, other.fd_); }
+
+  // Move
+  File &operator=(File &&other) noexcept {
+    // First, close this file
+    Close();
+
+    // Swap descriptors
+    std::swap(fd_, other.fd_);
+
+    // Done
+    return *this;
+  }
+
+  void Open(const std::string &name, AccessMode access_mode);
+
+  void Create(const std::string &name);
+
+  void CreateTemp();
+
+  uint64_t Read(void *data, uint64_t len) const;
+
+  uint64_t Write(void *data, uint64_t len) const;
+
+  uint64_t Size() const;
+
+  bool IsOpen() const { return fd_ != kInvalid; }
+
+  void Close();
+
+ private:
+  // The file descriptor
+  int fd_;
+
+  static constexpr int kInvalid = -1;
+
+ private:
+  DISALLOW_COPY(File);
+};
+
+}  // namespace util
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/util/file.cpp b/src/util/file.cpp
new file mode 100644
index 00000000000..de0835982c8
--- /dev/null
+++ b/src/util/file.cpp
@@ -0,0 +1,125 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// file.cpp
+//
+// Identification: src/util/file.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "util/file.h"
+
+#include "util/string_util.h"
+
+namespace peloton {
+namespace util {
+
+void File::Open(const std::string &name, File::AccessMode access_mode) {
+  // Close the existing file if it's open
+  Close();
+
+  int flags;
+  switch (access_mode) {
+    case AccessMode::ReadOnly: {
+      flags = O_RDWR;
+      break;
+    }
+    case AccessMode::WriteOnly: {
+      flags = O_WRONLY;
+      break;
+    }
+    case AccessMode::ReadWrite: {
+      flags = O_RDWR;
+      break;
+    }
+  }
+
+  // Open
+  int fd = open(name.c_str(), flags);
+
+  // Check error
+  if (fd == -1) {
+    throw Exception{
+        StringUtil::Format("Unable to read file '%s'", name.c_str())};
+  }
+
+  // Done
+  fd_ = fd;
+}
+
+uint64_t File::Read(void *data, uint64_t len) const {
+  // Ensure open
+  PELOTON_ASSERT(IsOpen());
+
+  // Perform read
+  ssize_t bytes_read = read(fd_, data, len);
+
+  // Check error
+  if (bytes_read == -1) {
+    throw Exception{
+        StringUtil::Format("Error reading file: %s", strerror(errno))};
+  }
+
+  // Done
+  return static_cast<uint64_t>(bytes_read);
+}
+
+uint64_t File::Write(void *data, uint64_t len) const {
+  // Ensure open
+  PELOTON_ASSERT(IsOpen());
+
+  // Perform write
+  ssize_t bytes_written = write(fd_, data, len);
+
+  // Check error
+  if (bytes_written == -1) {
+    throw Exception{
+        StringUtil::Format("Error writing to file: %s", strerror(errno))};
+  }
+
+  // Done
+  return static_cast<uint64_t>(bytes_written);
+}
+
+uint64_t File::Size() const {
+  // Ensure open
+  PELOTON_ASSERT(IsOpen());
+
+  // Save the current position
+  off_t curr_off = lseek(fd_, 0, SEEK_CUR);
+  if (curr_off == -1) {
+    throw Exception{StringUtil::Format(
+        "unable to read current position in file: %s", strerror(errno))};
+  }
+
+  // Seek to the end of the file, returning the new file position i.e., the
+  // size of the file in bytes.
+  off_t off = lseek(fd_, 0, SEEK_END);
+  if (off == -1) {
+    throw Exception{StringUtil::Format(
+        "unable to move file position to end file: %s", strerror(errno))};
+  }
+
+  off_t restore = lseek(fd_, curr_off, SEEK_SET);
+  if (restore == -1) {
+    throw Exception{StringUtil::Format(
+        "unable to restore position after moving to the end: %s",
+        strerror(errno))};
+  }
+
+  // Restore position
+  return static_cast<uint64_t>(off);
+}
+
+void File::Close() {
+  if (IsOpen()) {
+    close(fd_);
+    fd_ = kInvalid;
+  }
+}
+
+}  // namespace util
+}  // namespace peloton
\ No newline at end of file

From b41b863a67f83823d4cff272f2dc4215ec931103 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 8 May 2018 01:04:25 -0400
Subject: [PATCH 16/42] CSV scanner reads lines

---
 src/codegen/operator/csv_scan_translator.cpp  |  10 +-
 src/codegen/proxy/csv_scanner_proxy.cpp       |   7 +-
 src/codegen/util/csv_scanner.cpp              | 226 +++++++++++++++---
 src/include/codegen/proxy/csv_scanner_proxy.h |  13 +-
 src/include/codegen/util/csv_scanner.h        | 169 ++++++++++++-
 src/include/planner/csv_scan_plan.h           |  19 +-
 6 files changed, 392 insertions(+), 52 deletions(-)

diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index e38525ada35..480ad45e479 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -39,6 +39,7 @@ void CSVScanTranslator::InitializeState() {
 
   // Arguments
   auto *scanner_ptr = LoadStatePtr(scanner_id_);
+  auto *exec_ctx_ptr = GetCompilationContext().GetExecutorContextPtr();
   auto *file_path = codegen.ConstString(scan_.GetFileName(), "filePath");
   auto *output_col_types = ConstructColumnDescriptor();
   auto *runtime_state_ptr = codegen->CreatePointerCast(
@@ -50,12 +51,15 @@ void CSVScanTranslator::InitializeState() {
       codegen.Const32(static_cast<uint32_t>(out_cols.size()));
 
   auto *consumer_func = codegen->CreatePointerCast(
-      consumer_func_, proxy::TypeBuilder<void(*)(void *)>::GetType(codegen));
+      consumer_func_, proxy::TypeBuilder<void (*)(void *)>::GetType(codegen));
 
   // Call
   codegen.Call(CSVScannerProxy::Init,
-               {scanner_ptr, file_path, output_col_types, num_output_cols,
-                consumer_func, runtime_state_ptr});
+               {scanner_ptr, exec_ctx_ptr, file_path, output_col_types,
+                num_output_cols, consumer_func, runtime_state_ptr,
+                codegen.Const8(scan_.GetDelimiterChar()),
+                codegen.Const8(scan_.GetQuoteChar()),
+                codegen.Const8(scan_.GetEscapeChar())});
 }
 
 void CSVScanTranslator::DefineAuxiliaryFunctions() {
diff --git a/src/codegen/proxy/csv_scanner_proxy.cpp b/src/codegen/proxy/csv_scanner_proxy.cpp
index 89b4b7dca16..f57a11fe014 100644
--- a/src/codegen/proxy/csv_scanner_proxy.cpp
+++ b/src/codegen/proxy/csv_scanner_proxy.cpp
@@ -12,16 +12,17 @@
 
 #include "codegen/proxy/csv_scanner_proxy.h"
 
+#include "codegen/proxy/executor_context_proxy.h"
 #include "codegen/proxy/runtime_functions_proxy.h"
 
 namespace peloton {
 namespace codegen {
 
-DEFINE_TYPE(CSVScanner, "util::CSVScanner", MEMBER(file_path), MEMBER(callback),
-            MEMBER(opaque_callback_state), MEMBER(cols), MEMBER(cols_view));
+DEFINE_TYPE(CSVScanner, "util::CSVScanner", MEMBER(opaque1), MEMBER(cols),
+            MEMBER(opaque2));
 
 DEFINE_TYPE(CSVScannerColumn, "util::CSVScanner::Column", MEMBER(type),
-            MEMBER(ptr),  MEMBER(len), MEMBER(is_null));
+            MEMBER(ptr), MEMBER(len), MEMBER(is_null));
 
 DEFINE_METHOD(peloton::codegen::util, CSVScanner, Init);
 DEFINE_METHOD(peloton::codegen::util, CSVScanner, Destroy);
diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
index 544269c82bd..b0038563339 100644
--- a/src/codegen/util/csv_scanner.cpp
+++ b/src/codegen/util/csv_scanner.cpp
@@ -15,59 +15,231 @@
 #include <boost/filesystem.hpp>
 
 #include "common/exception.h"
+#include "executor/executor_context.h"
+#include "type/abstract_pool.h"
 #include "util/string_util.h"
 
 namespace peloton {
 namespace codegen {
 namespace util {
 
-CSVScanner::CSVScanner(const std::string &file_path,
+CSVScanner::CSVScanner(peloton::type::AbstractPool &pool,
+                       const std::string &file_path,
                        const codegen::type::Type *col_types, uint32_t num_cols,
-                       CSVScanner::Callback func, void *opaque_state)
-    : file_path_(file_path), func_(func), opaque_state_(opaque_state) {
+                       CSVScanner::Callback func, void *opaque_state,
+                       char delimiter, char quote, char escape)
+    : memory_(pool),
+      file_path_(file_path),
+      file_(),
+      buffer_(nullptr),
+      buffer_begin_(0),
+      buffer_end_(0),
+      line_(nullptr),
+      line_len_(0),
+      line_maxlen_(0),
+      delimiter_(delimiter),
+      quote_(quote),
+      escape_(escape),
+      func_(func),
+      opaque_state_(opaque_state) {
+  // Make column array
+  cols_ = static_cast<CSVScanner::Column *>(
+      memory_.Allocate(sizeof(CSVScanner::Column) * num_cols));
+
   // Initialize the columns
-  cols_.resize(num_cols);
   for (uint32_t i = 0; i < num_cols; i++) {
     cols_[i].col_type = col_types[i];
     cols_[i].ptr = nullptr;
+    cols_[i].len = 0;
     cols_[i].is_null = false;
   }
-
-  // Setup the view. Since the Column's vector will never be resized after this
-  // point (it isn't possible to add or remove columns once the scan has been
-  // constructed), grabbing a pointer to the underlying array is safe for the
-  // lifetime of this scanner.
-  cols_view_ = cols_.data();
 }
 
-CSVScanner::~CSVScanner() {}
+CSVScanner::~CSVScanner() {
+  if (buffer_ != nullptr) {
+    memory_.Free(buffer_);
+  }
+  if (line_ != nullptr) {
+    memory_.Free(line_);
+  }
+  if (cols_ != nullptr) {
+    memory_.Free(cols_);
+  }
+}
 
-void CSVScanner::Init(CSVScanner &scanner, const char *file_path,
+void CSVScanner::Init(CSVScanner &scanner,
+                      executor::ExecutorContext &executor_context,
+                      const char *file_path,
                       const codegen::type::Type *col_types, uint32_t num_cols,
-                      CSVScanner::Callback func, void *opaque_state) {
-  new (&scanner) CSVScanner(file_path, col_types, num_cols, func, opaque_state);
+                      CSVScanner::Callback func, void *opaque_state,
+                      char delimiter, char quote, char escape) {
+  // Forward to constructor
+  new (&scanner)
+      CSVScanner(*executor_context.GetPool(), file_path, col_types, num_cols,
+                 func, opaque_state, delimiter, quote, escape);
 }
 
-void CSVScanner::Destroy(CSVScanner &scanner) { scanner.~CSVScanner(); }
+void CSVScanner::Destroy(CSVScanner &scanner) {
+  // Forward to destructor
+  scanner.~CSVScanner();
+}
 
-void CSVScanner::Produce() { InitializeScan(); }
+void CSVScanner::Produce() {
+  // Initialize
+  Initialize();
 
-void CSVScanner::InitializeScan() {
-  // Validity checks
-  if (!boost::filesystem::exists(file_path_)) {
-    throw ExecutorException{StringUtil::Format(
-        "ERROR: input path '%s' does not exist", file_path_.c_str())};
+  // Loop lines
+  while (const char *line = NextLine()) {
+    ProduceCSV(line);
   }
+}
+
+void CSVScanner::Initialize() {
+  // Let's first perform a few validity checks
+  boost::filesystem::path path{file_path_};
 
-  if (!boost::filesystem::is_directory(file_path_)) {
-    throw ExecutorException{StringUtil::Format(
-        "ERROR: input '%s' is a directory, not a file", file_path_.c_str())};
+  if (!boost::filesystem::exists(path)) {
+    throw ExecutorException{StringUtil::Format("input path '%s' does not exist",
+                                               file_path_.c_str())};
+  } else if (!boost::filesystem::is_regular_file(file_path_)) {
+    throw ExecutorException{
+        StringUtil::Format("unable to read file '%s'", file_path_.c_str())};
   }
 
-  if (!boost::filesystem::is_regular_file(file_path_)) {
-    throw ExecutorException{StringUtil::Format(
-        "ERROR: unable to read file '%s'", file_path_.c_str())};
+  // The path looks okay, let's try opening it
+  file_.Open(file_path_, peloton::util::File::AccessMode::ReadOnly);
+
+  // Allocate buffer space
+  buffer_ = static_cast<char *>(memory_.Allocate(kDefaultBufferSize + 1));
+
+  // Fill read-buffer
+  NextBuffer();
+
+  // Allocate space for the full line, if it doesn't fit into the buffer
+  line_ = static_cast<char *>(memory_.Allocate(kDefaultBufferSize));
+  line_len_ = 0;
+  line_maxlen_ = kDefaultBufferSize;
+}
+
+bool CSVScanner::NextBuffer() {
+  // Do read
+  buffer_begin_ = 0;
+  buffer_end_ = static_cast<uint32_t>(file_.Read(buffer_, kDefaultBufferSize));
+
+  // Update stats
+  stats_.num_reads++;
+
+  return (buffer_end_ != 0);
+}
+
+void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
+  // Short-circuit if we're not appending any data
+  if (len == 0) {
+    return;
+  }
+
+  if (line_len_ + len > line_maxlen_) {
+    // The current line buffer isn't large enough to store the new bytes, so we
+    // need to resize it. By default, we double the capacity.
+    auto new_maxlen = line_maxlen_ * 2;
+    while (new_maxlen < len) {
+      new_maxlen *= 2;
+    }
+    auto *new_line = static_cast<char *>(memory_.Allocate(new_maxlen));
+
+    // Copy the old data
+    PELOTON_MEMCPY(new_line, line_, line_len_);
+
+    // Setup pointers and sizes
+    line_ = new_line;
+    line_maxlen_ = new_maxlen;
+
+    stats_.num_reallocs++;
   }
+
+  // At this point, we've guaranteed that the line is large enough to
+  // accommodate the new bytes, so let's go ahead and perform the copy.
+
+  PELOTON_MEMCPY(line_ + line_len_, data, len);
+
+  // Increase the length of the line
+  line_len_ += len;
+
+  // Track copy stats
+  stats_.num_copies++;
+}
+
+// The main purpose of this function is to find the start of the next line in
+// the CSV file.
+const char *CSVScanner::NextLine() {
+  line_len_ = 0;
+
+  bool in_quote = false;
+  bool last_was_escape = false;
+  bool copied_to_line_buf = false;
+
+  uint32_t line_end = buffer_begin_;
+
+  while (true) {
+    if (line_end >= buffer_end_) {
+      // We need to read more data from the CSV file. But first, we need to copy
+      // all the data in the read-buffer (i.e., [buffer_begin_, buffer_end_] to
+      // the line-buffer.
+
+      AppendToCurrentLine(buffer_ + buffer_begin_,
+                          static_cast<uint32_t>(buffer_end_ - buffer_begin_));
+
+      // Now, read more data
+      if (!NextBuffer()) {
+        return nullptr;
+      }
+
+      // Reset positions
+      line_end = buffer_begin_;
+      copied_to_line_buf = true;
+    }
+
+    // Read character
+    char c = buffer_[line_end];
+
+    if (in_quote && c == escape_) {
+      last_was_escape = true;
+    }
+    if (c == quote_ && !last_was_escape) {
+      in_quote = true;
+    }
+    if (c != escape_) {
+      last_was_escape = false;
+    }
+
+    // Process the new-line character. If we a new-line and we're not currently
+    // in a quoted section, we're done.
+    if (c == '\n' && !in_quote) {
+      buffer_[line_end] = '\0';
+      break;
+    }
+
+    // Move along
+    line_end++;
+  }
+
+  // Increment line number
+  line_number_++;
+
+  if (copied_to_line_buf) {
+    AppendToCurrentLine(buffer_, line_end);
+    buffer_begin_ = line_end + 1;
+    return line_;
+  } else {
+    const char *ret = buffer_ + buffer_begin_;
+    buffer_begin_ = line_end + 1;
+    return ret;
+  }
+}
+
+void CSVScanner::ProduceCSV(UNUSED_ATTRIBUTE const char *line) {
+  // TODO: me
+  func_(opaque_state_);
 }
 
 }  // namespace util
diff --git a/src/include/codegen/proxy/csv_scanner_proxy.h b/src/include/codegen/proxy/csv_scanner_proxy.h
index c31d871ff74..fabcfe9d953 100644
--- a/src/include/codegen/proxy/csv_scanner_proxy.h
+++ b/src/include/codegen/proxy/csv_scanner_proxy.h
@@ -12,9 +12,11 @@
 
 #pragma once
 
+#include "codegen/proxy/pool_proxy.h"
 #include "codegen/proxy/proxy.h"
 #include "codegen/proxy/type_builder.h"
 #include "codegen/util/csv_scanner.h"
+#include "util/file.h"
 
 namespace peloton {
 namespace codegen {
@@ -28,11 +30,12 @@ PROXY(CSVScannerColumn) {
 };
 
 PROXY(CSVScanner) {
-  DECLARE_MEMBER(0, char[sizeof(std::string)], file_path);
-  DECLARE_MEMBER(1, char[sizeof(util::CSVScanner::Callback)], callback);
-  DECLARE_MEMBER(2, void *, opaque_callback_state);
-  DECLARE_MEMBER(3, char[sizeof(std::vector<util::CSVScanner::Column>)], cols);
-  DECLARE_MEMBER(4, util::CSVScanner::Column *, cols_view);
+  DECLARE_MEMBER(0, char[sizeof(codegen::util::CSVScanner) -
+                         sizeof(util::CSVScanner::Column *) -
+                         sizeof(util::CSVScanner::Stats) - 4],
+                 opaque1);
+  DECLARE_MEMBER(1, util::CSVScanner::Column *, cols);
+  DECLARE_MEMBER(2, char[sizeof(util::CSVScanner::Stats) + 4], opaque2);
   DECLARE_TYPE;
 
   DECLARE_METHOD(Init);
diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h
index 711a3d13535..56d64f1f371 100644
--- a/src/include/codegen/util/csv_scanner.h
+++ b/src/include/codegen/util/csv_scanner.h
@@ -16,48 +16,195 @@
 #include <vector>
 
 #include "codegen/type/type.h"
+#include "util/file.h"
 
 namespace peloton {
+
+namespace executor {
+class ExecutorContext;
+}  // namespace executor
+
+namespace type {
+class AbstractPool;
+}  // namespace type
+
 namespace codegen {
 namespace util {
 
+/**
+ * This is the primary class to scan CSV files.  Callers use the constructor to
+ * configure various aspects of how parsing is performed.  Callers must provide
+ * a description of the rows stored in the CSV file, and a callback function
+ * that is invoked once for every row in the CSV file.  The delimiter character,
+ * quoting character, and escape characters can also be configured through the
+ * constructor.
+ *
+ * This scanner class is fail-fast. If it finds an ill-formatted row, it will
+ * immediately throw an error.
+ *
+ * TODO: implement a more generous parser that is best-effort.
+ */
 class CSVScanner {
  public:
+  // 64K buffer size
+  static constexpr uint32_t kDefaultBufferSize = (1ul << 16);
+
+  // The signature of the callback function
   using Callback = void (*)(void *);
 
+  /**
+   * Column information
+   */
   struct Column {
+    // The type of data this column represents
     codegen::type::Type col_type;
+
+    // A pointer to where the next value of this column is
     char *ptr;
+
+    // The number of bytes
     uint32_t len;
+
+    // Is the next value of this column NULL
     bool is_null;
   };
 
-  CSVScanner(const std::string &file_path, const codegen::type::Type *col_types,
-             uint32_t num_cols, Callback func, void *opaque_state);
+  /**
+   * Various statistics tracked while we scan the CSV
+   */
+  struct Stats {
+    // The number of times the read-buffer was copied into the line-buffer
+    uint32_t num_copies = 0;
+    // The number of times we had to re-allocate the line-buffer to make room
+    // for new data (i.e., to handle really long lines that don't fit into the
+    // read-buffer)
+    uint32_t num_reallocs = 0;
+    // The number of times we had to call Read() from the file
+    uint32_t num_reads = 0;
+  };
 
+  /**
+   * Constructor.
+   *
+   * @param memory A memory pool where all allocations are sourced from
+   * @param file_path The full path to the CSV file
+   * @param col_types A description of the rows stored in the CSV
+   * @param num_cols The number of columns to expect
+   * @param func The callback function to invoke per row/line in the CSV
+   * @param opaque_state An opaque state that is passed to the callback function
+   * upon invocation.
+   * @param delimiter The character that separates columns within a row
+   * @param quote The quoting character used to quote data (i.e., strings)
+   * @param escape The character that appears before any data characters that
+   * are the same as the quote character.
+   */
+  CSVScanner(peloton::type::AbstractPool &memory, const std::string &file_path,
+             const codegen::type::Type *col_types, uint32_t num_cols,
+             Callback func, void *opaque_state, char delimiter = ',',
+             char quote = '"', char escape = '"');
+
+  /**
+   * Destructor
+   */
   ~CSVScanner();
 
-  static void Init(CSVScanner &scanner, const char *file_path,
-                   const codegen::type::Type *col_types, uint32_t num_cols,
-                   Callback func, void *opaque_state);
-
+  /**
+   * Initialization function. This is the entry point from codegen to initialize
+   * scanner instances.
+   *
+   * @param scanner The scanner we're initializing
+   * @param memory A memory pool where all allocations are sourced from
+   * @param file_path The full path to the CSV file
+   * @param col_types A description of the rows stored in the CSV
+   * @param num_cols The number of columns to expect
+   * @param func The callback function to invoke per row/line in the CSV
+   * @param opaque_state An opaque state that is passed to the callback function
+   * upon invocation.
+   * @param delimiter The character that separates columns within a row
+   * @param quote The quoting character used to quote data (i.e., strings)
+   * @param escape The character that appears before any data characters that
+   * are the same as the quote character.
+   */
+  static void Init(CSVScanner &scanner,
+                   executor::ExecutorContext &executor_context,
+                   const char *file_path, const codegen::type::Type *col_types,
+                   uint32_t num_cols, Callback func, void *opaque_state,
+                   char delimiter, char quote, char escape);
+
+  /**
+   * Destruction function. This is the entry point from codegen when cleaning up
+   * and reclaiming memory from scanner instances.
+   *
+   * @param scanner The scanner we're destroying.
+   */
   static void Destroy(CSVScanner &scanner);
 
+  /**
+   * Produce all the rows stored in the configured CSV file
+   */
   void Produce();
 
+  /**
+   * Return the list of columns
+   *
+   * @return
+   */
+  const Column *GetColumns() const { return cols_; }
+
  private:
-  void InitializeScan();
+  // Initialize the scan
+  void Initialize();
+
+  // Append bytes to the end of the currently accruing line.
+  void AppendToCurrentLine(const char *data, uint32_t len);
+
+  // Read the next line from the CSV file
+  const char *NextLine();
+
+  // Read a buffer's worth of data from the CSV file
+  bool NextBuffer();
+
+  // Produce CSV data stored in the provided line
+  void ProduceCSV(const char *line);
 
  private:
-  // The file
+  // All memory allocations happen from this pool
+  peloton::type::AbstractPool &memory_;
+
+  // The path to the CSV file
   const std::string file_path_;
 
-  // The callback function and opaque state
+  // The CSV file handle
+  peloton::util::File file_;
+
+  // The temporary buffer where raw file contents are read into
+  // TODO: make these unique_ptr's with a customer deleter
+  char *buffer_;
+  uint32_t buffer_begin_;
+  uint32_t buffer_end_;
+
+  // A pointer to the start of a line in the CSV file
+  char *line_;
+  uint32_t line_len_;
+  uint32_t line_maxlen_;
+
+  // Line number
+  uint32_t line_number_;
+
+  // The column delimiter, quote, and escape characters configured for this CSV
+  char delimiter_;
+  char quote_;
+  char escape_;
+
+  // The callback function to call for each row of the CSV, and an opaque state
   Callback func_;
   void *opaque_state_;
 
-  std::vector<Column> cols_;
-  Column *cols_view_;
+  // The columns
+  Column *cols_;
+
+  // Statistics
+  Stats stats_;
 };
 
 }  // namespace util
diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
index 1c14a1d9ece..f9611b22630 100644
--- a/src/include/planner/csv_scan_plan.h
+++ b/src/include/planner/csv_scan_plan.h
@@ -35,7 +35,8 @@ class CSVScanPlan : public AbstractScan {
    * @param file_name The file path
    * @param cols Information of the columns expected in each row of the CSV
    */
-  CSVScanPlan(std::string file_name, std::vector<ColumnInfo> &&cols);
+  CSVScanPlan(std::string file_name, std::vector<ColumnInfo> &&cols,
+              char delimiter = ',', char quote = '"', char escape = '"');
 
   //////////////////////////////////////////////////////////////////////////////
   ///
@@ -51,6 +52,10 @@ class CSVScanPlan : public AbstractScan {
 
   void GetAttributes(std::vector<const AttributeInfo *> &ais) const override;
 
+  char GetDelimiterChar() const { return delimiter_; }
+  char GetQuoteChar() const { return quote_; }
+  char GetEscapeChar() const { return escape_; }
+
   //////////////////////////////////////////////////////////////////////////////
   ///
   /// Utilities + Internal
@@ -68,6 +73,10 @@ class CSVScanPlan : public AbstractScan {
  private:
   const std::string file_name_;
 
+  char delimiter_;
+  char quote_;
+  char escape_;
+
   std::vector<std::unique_ptr<planner::AttributeInfo>> attributes_;
 };
 
@@ -78,8 +87,12 @@ class CSVScanPlan : public AbstractScan {
 ////////////////////////////////////////////////////////////////////////////////
 
 inline CSVScanPlan::CSVScanPlan(std::string file_name,
-                                std::vector<CSVScanPlan::ColumnInfo> &&cols)
-    : file_name_(std::move(file_name)) {
+                                std::vector<CSVScanPlan::ColumnInfo> &&cols,
+                                char delimiter, char quote, char escape)
+    : file_name_(std::move(file_name)),
+      delimiter_(delimiter),
+      quote_(quote),
+      escape_(escape) {
   for (const auto &col : cols) {
     std::unique_ptr<planner::AttributeInfo> attribute{
         new planner::AttributeInfo()};

From 93f39fc676086ae758b028ee450834a855555aaa Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 8 May 2018 01:51:05 -0400
Subject: [PATCH 17/42] Process CSV line in scanner

---
 src/codegen/operator/csv_scan_translator.cpp  |  2 +-
 src/codegen/util/csv_scanner.cpp              | 25 ++++++++++++++-----
 src/include/codegen/proxy/csv_scanner_proxy.h |  7 +++---
 src/include/codegen/util/csv_scanner.h        |  3 ++-
 4 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index 480ad45e479..3d87dd9dbe7 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -88,7 +88,7 @@ void CSVScanTranslator::DefineAuxiliaryFunctions() {
 
     // Load the pointer to the columns view
     auto *cols = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
-        CSVScannerProxy::GetType(codegen), LoadStatePtr(scanner_id_), 0, 4));
+        CSVScannerProxy::GetType(codegen), LoadStatePtr(scanner_id_), 0, 1));
 
     // For each column, call the type's input function to read the input value
     for (uint32_t i = 0; i < output_attributes.size(); i++) {
diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
index b0038563339..6c5f23ee61d 100644
--- a/src/codegen/util/csv_scanner.cpp
+++ b/src/codegen/util/csv_scanner.cpp
@@ -41,13 +41,13 @@ CSVScanner::CSVScanner(peloton::type::AbstractPool &pool,
       quote_(quote),
       escape_(escape),
       func_(func),
-      opaque_state_(opaque_state) {
+      opaque_state_(opaque_state),
+      num_cols_(num_cols) {
   // Make column array
-  cols_ = static_cast<CSVScanner::Column *>(
-      memory_.Allocate(sizeof(CSVScanner::Column) * num_cols));
+  cols_ = static_cast<Column *>(memory_.Allocate(sizeof(Column) * num_cols_));
 
   // Initialize the columns
-  for (uint32_t i = 0; i < num_cols; i++) {
+  for (uint32_t i = 0; i < num_cols_; i++) {
     cols_[i].col_type = col_types[i];
     cols_[i].ptr = nullptr;
     cols_[i].len = 0;
@@ -237,8 +237,21 @@ const char *CSVScanner::NextLine() {
   }
 }
 
-void CSVScanner::ProduceCSV(UNUSED_ATTRIBUTE const char *line) {
-  // TODO: me
+void CSVScanner::ProduceCSV(const char *line) {
+  // At this point, we have a well-formed line. Let's pull out pointers to the
+  // columns.
+
+  const auto *iter = line;
+  for (uint32_t col_idx = 0; col_idx < num_cols_; col_idx++) {
+    const char *start = iter;
+    for (; *iter != 0 && *iter != delimiter_; iter++) {}
+    cols_[col_idx].ptr = start;
+    cols_[col_idx].len = static_cast<uint32_t>(iter - start);
+    cols_[col_idx].is_null = (cols_[col_idx].len == 0);
+    iter++;
+  }
+
+  // Invoke callback
   func_(opaque_state_);
 }
 
diff --git a/src/include/codegen/proxy/csv_scanner_proxy.h b/src/include/codegen/proxy/csv_scanner_proxy.h
index fabcfe9d953..ae9b13cd7ec 100644
--- a/src/include/codegen/proxy/csv_scanner_proxy.h
+++ b/src/include/codegen/proxy/csv_scanner_proxy.h
@@ -12,11 +12,9 @@
 
 #pragma once
 
-#include "codegen/proxy/pool_proxy.h"
 #include "codegen/proxy/proxy.h"
 #include "codegen/proxy/type_builder.h"
 #include "codegen/util/csv_scanner.h"
-#include "util/file.h"
 
 namespace peloton {
 namespace codegen {
@@ -32,10 +30,11 @@ PROXY(CSVScannerColumn) {
 PROXY(CSVScanner) {
   DECLARE_MEMBER(0, char[sizeof(codegen::util::CSVScanner) -
                          sizeof(util::CSVScanner::Column *) -
-                         sizeof(util::CSVScanner::Stats) - 4],
+                         sizeof(util::CSVScanner::Stats) - sizeof(uint32_t)],
                  opaque1);
   DECLARE_MEMBER(1, util::CSVScanner::Column *, cols);
-  DECLARE_MEMBER(2, char[sizeof(util::CSVScanner::Stats) + 4], opaque2);
+  DECLARE_MEMBER(2, char[sizeof(util::CSVScanner::Stats) + sizeof(uint32_t)],
+                 opaque2);
   DECLARE_TYPE;
 
   DECLARE_METHOD(Init);
diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h
index 56d64f1f371..a69c673054b 100644
--- a/src/include/codegen/util/csv_scanner.h
+++ b/src/include/codegen/util/csv_scanner.h
@@ -60,7 +60,7 @@ class CSVScanner {
     codegen::type::Type col_type;
 
     // A pointer to where the next value of this column is
-    char *ptr;
+    const char *ptr;
 
     // The number of bytes
     uint32_t len;
@@ -202,6 +202,7 @@ class CSVScanner {
 
   // The columns
   Column *cols_;
+  uint32_t num_cols_;
 
   // Statistics
   Stats stats_;

From 2aa0aa16a35250e4fd47517181b1356f2b9539f7 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 9 May 2018 02:02:52 -0400
Subject: [PATCH 18/42] Free memory when re-allocating line buffer

---
 src/codegen/util/csv_scanner.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
index 6c5f23ee61d..a7140ad322d 100644
--- a/src/codegen/util/csv_scanner.cpp
+++ b/src/codegen/util/csv_scanner.cpp
@@ -150,6 +150,9 @@ void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
     // Copy the old data
     PELOTON_MEMCPY(new_line, line_, line_len_);
 
+    // Free old old
+    memory_.Free(line_);
+
     // Setup pointers and sizes
     line_ = new_line;
     line_maxlen_ = new_maxlen;

From 78c080541d605f2364d667a85c183ac5a7fda1ea Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 9 May 2018 02:04:30 -0400
Subject: [PATCH 19/42] Added memcmp to codegen interface. Renamed CallPrintf()
 to Printf().

---
 src/codegen/codegen.cpp                | 19 ++++++++++++++++---
 src/include/codegen/codegen.h          |  6 ++++--
 src/include/codegen/util/csv_scanner.h |  8 ++++----
 test/codegen/testing_codegen_util.cpp  |  2 +-
 test/codegen/value_integrity_test.cpp  |  4 ++--
 5 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index 6a96a0f7542..e0082f7d588 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -159,12 +159,12 @@ llvm::Value *CodeGen::CallFunc(llvm::Value *fn,
   return GetBuilder().CreateCall(fn, args);
 }
 
-llvm::Value *CodeGen::CallPrintf(const std::string &format,
-                                 const std::vector<llvm::Value *> &args) {
+llvm::Value *CodeGen::Printf(const std::string &format,
+                             const std::vector<llvm::Value *> &args) {
   auto *printf_fn = LookupBuiltin("printf");
   if (printf_fn == nullptr) {
     printf_fn = RegisterBuiltin(
-        "printf", llvm::TypeBuilder<int(char *, ...), false>::get(GetContext()),
+        "printf", llvm::TypeBuilder<decltype(printf), false>::get(GetContext()),
         reinterpret_cast<void *>(printf));
   }
 
@@ -176,6 +176,19 @@ llvm::Value *CodeGen::CallPrintf(const std::string &format,
   return CallFunc(printf_fn, printf_args);
 }
 
+llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2,
+                             llvm::Value *len) {
+  static constexpr char kMemcmpFnName[] = "memcmp";
+  auto *memcmp_fn = LookupBuiltin(kMemcmpFnName);
+  if (memcmp_fn == nullptr) {
+    memcmp_fn = RegisterBuiltin(
+        kMemcmpFnName,
+        llvm::TypeBuilder<decltype(memcmp), false>::get(GetContext()),
+        reinterpret_cast<void *>(printf));
+  }
+  return CallFunc(memcmp_fn, {ptr1, ptr2, len});
+}
+
 llvm::Value *CodeGen::Sqrt(llvm::Value *val) {
   llvm::Function *sqrt_func = llvm::Intrinsic::getDeclaration(
       &GetModule(), llvm::Intrinsic::sqrt, val->getType());
diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h
index 3dceb820715..037e01dbe11 100644
--- a/src/include/codegen/codegen.h
+++ b/src/include/codegen/codegen.h
@@ -131,8 +131,10 @@ class CodeGen {
   //===--------------------------------------------------------------------===//
   // C/C++ standard library functions
   //===--------------------------------------------------------------------===//
-  llvm::Value *CallPrintf(const std::string &format,
-                          const std::vector<llvm::Value *> &args);
+  llvm::Value *Printf(const std::string &format,
+                      const std::vector<llvm::Value *> &args);
+  llvm::Value *Memcmp(llvm::Value *ptr1, llvm::Value *ptr2,
+                      llvm::Value *len);
   llvm::Value *Sqrt(llvm::Value *val);
 
   //===--------------------------------------------------------------------===//
diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h
index a69c673054b..d51475c4c43 100644
--- a/src/include/codegen/util/csv_scanner.h
+++ b/src/include/codegen/util/csv_scanner.h
@@ -95,8 +95,8 @@ class CSVScanner {
    * upon invocation.
    * @param delimiter The character that separates columns within a row
    * @param quote The quoting character used to quote data (i.e., strings)
-   * @param escape The character that appears before any data characters that
-   * are the same as the quote character.
+   * @param escape The character that should appear before any data characters
+   * that match the quote character.
    */
   CSVScanner(peloton::type::AbstractPool &memory, const std::string &file_path,
              const codegen::type::Type *col_types, uint32_t num_cols,
@@ -122,8 +122,8 @@ class CSVScanner {
    * upon invocation.
    * @param delimiter The character that separates columns within a row
    * @param quote The quoting character used to quote data (i.e., strings)
-   * @param escape The character that appears before any data characters that
-   * are the same as the quote character.
+   * @param escape The character that should appear before any data characters
+   * that match the quote character.
    */
   static void Init(CSVScanner &scanner,
                    executor::ExecutorContext &executor_context,
diff --git a/test/codegen/testing_codegen_util.cpp b/test/codegen/testing_codegen_util.cpp
index 5302eae3daf..316b46331d6 100644
--- a/test/codegen/testing_codegen_util.cpp
+++ b/test/codegen/testing_codegen_util.cpp
@@ -446,7 +446,7 @@ void Printer::ConsumeResult(codegen::ConsumerContext &ctx,
   format.append("]\n");
 
   // Make the printf call
-  codegen.CallPrintf(format, cols);
+  codegen.Printf(format, cols);
 }
 
 }  // namespace test
diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp
index 4c4ccf97690..9c78ece4787 100644
--- a/test/codegen/value_integrity_test.cpp
+++ b/test/codegen/value_integrity_test.cpp
@@ -59,7 +59,7 @@ void DivideByZeroTest(const codegen::type::Type &data_type, ExpressionType op) {
       }
     }
 
-    codegen.CallPrintf("%lu\n", {res.GetValue()});
+    codegen.Printf("%lu\n", {res.GetValue()});
 
     function.ReturnAndFinish();
   }
@@ -126,7 +126,7 @@ void OverflowTest(const codegen::type::Type &data_type, ExpressionType op) {
       }
     }
 
-    codegen.CallPrintf("%lu\n", {res.GetValue()});
+    codegen.Printf("%lu\n", {res.GetValue()});
 
     function.ReturnAndFinish();
   }

From f46634a482a98a5842edffb03929016499f3985c Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 9 May 2018 02:05:11 -0400
Subject: [PATCH 20/42] Cleaned up CSV scan translator. Added null checking.

---
 src/codegen/operator/csv_scan_translator.cpp  | 241 ++++++++++++------
 src/codegen/values_runtime.cpp                |  31 ++-
 .../codegen/operator/csv_scan_translator.h    |  11 +-
 src/include/codegen/proxy/csv_scanner_proxy.h |   3 +-
 src/include/planner/csv_scan_plan.h           |  37 ++-
 5 files changed, 205 insertions(+), 118 deletions(-)

diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index 3d87dd9dbe7..d1c191a9ccd 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -29,111 +29,199 @@ CSVScanTranslator::CSVScanTranslator(const planner::CSVScanPlan &scan,
                                      CompilationContext &context,
                                      Pipeline &pipeline)
     : OperatorTranslator(context, pipeline), scan_(scan) {
+  // Register the CSV scanner instance
   auto &runtime_state = context.GetRuntimeState();
   scanner_id_ = runtime_state.RegisterState(
       "csvScanner", CSVScannerProxy::GetType(GetCodeGen()));
+
+  // Load information about the attributes output by the scan plan
+  scan_.GetAttributes(output_attributes_);
 }
 
 void CSVScanTranslator::InitializeState() {
   auto &codegen = GetCodeGen();
 
   // Arguments
-  auto *scanner_ptr = LoadStatePtr(scanner_id_);
-  auto *exec_ctx_ptr = GetCompilationContext().GetExecutorContextPtr();
-  auto *file_path = codegen.ConstString(scan_.GetFileName(), "filePath");
-  auto *output_col_types = ConstructColumnDescriptor();
-  auto *runtime_state_ptr = codegen->CreatePointerCast(
-      codegen.GetState(), codegen.VoidType()->getPointerTo());
+  llvm::Value *scanner_ptr = LoadStatePtr(scanner_id_);
+  llvm::Value *exec_ctx_ptr = GetCompilationContext().GetExecutorContextPtr();
+  llvm::Value *file_path = codegen.ConstString(scan_.GetFileName(), "filePath");
 
-  std::vector<oid_t> out_cols;
-  scan_.GetOutputColumns(out_cols);
-  auto *num_output_cols =
-      codegen.Const32(static_cast<uint32_t>(out_cols.size()));
+  auto num_cols = static_cast<uint32_t>(output_attributes_.size());
 
-  auto *consumer_func = codegen->CreatePointerCast(
-      consumer_func_, proxy::TypeBuilder<void (*)(void *)>::GetType(codegen));
+  // We need to generate an array of type::Type. To do so, we construct a vector
+  // of the types of the output columns, and we create an LLVM constant that is
+  // a copy of the underlying bytes.
+
+  std::vector<codegen::type::Type> col_types_vec;
+  col_types_vec.reserve(num_cols);
+  for (const auto *ai : output_attributes_) {
+    col_types_vec.push_back(ai->type);
+  }
+  llvm::Value *raw_col_type_bytes = codegen.ConstGenericBytes(
+      col_types_vec.data(), static_cast<uint32_t>(col_types_vec.capacity()),
+      "colTypes");
+  llvm::Value *output_col_types = codegen->CreatePointerCast(
+      raw_col_type_bytes, TypeProxy::GetType(codegen)->getPointerTo());
+
+  // Now create a pointer to the consumer function
+  using ConsumerFuncType = void (*)(void *);
+  llvm::Value *consumer_func = codegen->CreatePointerCast(
+      consumer_func_, proxy::TypeBuilder<ConsumerFuncType>::GetType(codegen));
+
+  // Cast the runtime type to an opaque void*. This is because we're calling
+  // into pre-compiled C++ that doesn't know that the dynamically generated
+  // RuntimeState* looks like.
+  llvm::Value *runtime_state_ptr = codegen->CreatePointerCast(
+      codegen.GetState(), codegen.VoidType()->getPointerTo());
 
-  // Call
+  // Call CSVScanner::Init()
   codegen.Call(CSVScannerProxy::Init,
                {scanner_ptr, exec_ctx_ptr, file_path, output_col_types,
-                num_output_cols, consumer_func, runtime_state_ptr,
+                codegen.Const32(num_cols), consumer_func, runtime_state_ptr,
                 codegen.Const8(scan_.GetDelimiterChar()),
                 codegen.Const8(scan_.GetQuoteChar()),
                 codegen.Const8(scan_.GetEscapeChar())});
 }
 
+namespace {
+
+class CSVColumnAccess : public RowBatch::AttributeAccess {
+ public:
+  CSVColumnAccess(const planner::AttributeInfo *ai, llvm::Value *csv_columns,
+                  const std::string &null_str, llvm::Value *runtime_null_str)
+      : ai_(ai),
+        csv_columns_(csv_columns),
+        null_str_(null_str),
+        runtime_null_(runtime_null_str) {}
+
+  llvm::Value *Columns() const { return csv_columns_; }
+
+  uint32_t ColumnIndex() const { return ai_->attribute_id; }
+
+  bool IsNullable() const { return ai_->type.nullable; }
+
+  const type::SqlType &SqlType() const { return ai_->type.GetSqlType(); }
+
+  llvm::Value *IsNull(CodeGen &codegen, llvm::Value *data_ptr,
+                      llvm::Value *data_len) const {
+    uint32_t null_str_len = static_cast<uint32_t>(null_str_.length());
+
+    // Is the length of the column value the same as the NULL string?
+    llvm::Value *eq_len =
+        codegen->CreateICmpEQ(data_len, codegen.Const32(null_str_len));
+
+    // If the null string is empty, generate simple comparison
+    if (null_str_len == 0) {
+      return eq_len;
+    }
+
+    llvm::Value *cmp_res;
+    lang::If check_null{codegen, eq_len};
+    {
+      // Do a memcmp against the NULL string
+      cmp_res = codegen.Memcmp(data_ptr, runtime_null_,
+                               codegen.Const64(null_str_.length()));
+      cmp_res = codegen->CreateICmpEQ(cmp_res, codegen.Const32(0));
+    }
+    check_null.EndIf();
+    return check_null.BuildPHI(cmp_res, codegen.ConstBool(false));
+  }
+
+  Value LoadValueIgnoreNull(CodeGen &codegen, llvm::Value *type,
+                            llvm::Value *data_ptr,
+                            llvm::Value *data_len) const {
+    auto *input_func = SqlType().GetInputFunction(codegen, ai_->type);
+    auto *raw_val = codegen.CallFunc(input_func, {type, data_ptr, data_len});
+    return codegen::Value{ai_->type, raw_val, nullptr,
+                          codegen.ConstBool(false)};
+  }
+
+  Value Access(CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override {
+    // Load the type, data pointer and length values for the column
+    auto *type = codegen->CreateConstInBoundsGEP2_32(
+        CSVScannerColumnProxy::GetType(codegen), Columns(), ColumnIndex(), 0);
+    auto *data_ptr = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
+        CSVScannerColumnProxy::GetType(codegen), Columns(), ColumnIndex(), 1));
+    auto *data_len = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
+        CSVScannerColumnProxy::GetType(codegen), Columns(), ColumnIndex(), 2));
+
+    // If the valid isn't NULLable, avoid the null check here
+    if (!IsNullable()) {
+      return LoadValueIgnoreNull(codegen, type, data_ptr, data_len);
+    }
+
+    // If the value isn't actually null, try to parse it
+    codegen::Value valid_val, null_val;
+    lang::If is_null{codegen,
+                     codegen->CreateNot(IsNull(codegen, data_ptr, data_len))};
+    {
+      // Load valid
+      valid_val = LoadValueIgnoreNull(codegen, type, data_ptr, data_len);
+    }
+    is_null.ElseBlock();
+    {
+      // Default null
+      null_val = SqlType().GetNullValue(codegen);
+    }
+    is_null.EndIf();
+
+    // Return
+    return is_null.BuildPHI(valid_val, null_val);
+  }
+
+ private:
+  const planner::AttributeInfo *ai_;
+  llvm::Value *csv_columns_;
+  const std::string &null_str_;
+  llvm::Value *runtime_null_;
+};
+
+}  // namespace
+
 void CSVScanTranslator::DefineAuxiliaryFunctions() {
-  // Define consumer function here
   CodeGen &codegen = GetCodeGen();
   CompilationContext &cc = GetCompilationContext();
 
+  // Define consumer function here
   std::vector<FunctionDeclaration::ArgumentInfo> arg_types = {
       {"runtimeState",
        cc.GetRuntimeState().FinalizeType(codegen)->getPointerTo()}};
-  codegen::FunctionDeclaration decl{codegen.GetCodeContext(), "consumer",
-                                    FunctionDeclaration::Visibility::Internal,
-                                    codegen.VoidType(), arg_types};
-  codegen::FunctionBuilder scan_consumer{codegen.GetCodeContext(), decl};
+  FunctionDeclaration decl{codegen.GetCodeContext(), "consumer",
+                           FunctionDeclaration::Visibility::Internal,
+                           codegen.VoidType(), arg_types};
+  FunctionBuilder scan_consumer{codegen.GetCodeContext(), decl};
   {
     ConsumerContext ctx{cc, GetPipeline()};
 
     Vector v{nullptr, 1, nullptr};
     RowBatch one{GetCompilationContext(), codegen.Const32(0),
                  codegen.Const32(1), v, false};
-    RowBatch::Row row{one, nullptr, nullptr};
-
-    // Get the attributes
-    std::vector<const planner::AttributeInfo *> output_attributes;
-    scan_.GetAttributes(output_attributes);
 
     // Load the pointer to the columns view
-    auto *cols = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
+    llvm::Value *cols = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
         CSVScannerProxy::GetType(codegen), LoadStatePtr(scanner_id_), 0, 1));
 
-    // For each column, call the type's input function to read the input value
-    for (uint32_t i = 0; i < output_attributes.size(); i++) {
-      const auto *output_ai = output_attributes[i];
-
-      const auto &sql_type = output_ai->type.GetSqlType();
-
-      auto *is_null = codegen->CreateConstInBoundsGEP2_32(
-          CSVScannerColumnProxy::GetType(codegen), cols, i, 3);
-
-      codegen::Value val, null_val;
-      lang::If not_null{codegen,
-                        codegen->CreateNot(codegen->CreateLoad(is_null))};
-      {
-        // Grab a pointer to the ptr and length
-        auto *type = codegen->CreatePointerCast(
-            codegen.ConstType(output_ai->type),
-            TypeProxy::GetType(codegen)->getPointerTo());
-        auto *ptr = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
-            CSVScannerColumnProxy::GetType(codegen), cols, i, 1));
-        auto *len = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
-            CSVScannerColumnProxy::GetType(codegen), cols, i, 2));
-
-        // Invoke the input function
-        auto *input_func = sql_type.GetInputFunction(codegen, output_ai->type);
-        auto *raw_val = codegen.CallFunc(input_func, {type, ptr, len});
-
-        // Non-null value
-        val = codegen::Value{output_ai->type, raw_val, nullptr,
-                             codegen.ConstBool(false)};
-      }
-      not_null.ElseBlock();
-      {
-        // Null value
-        null_val = sql_type.GetNullValue(codegen);
-      }
-      not_null.EndIf();
-
-      codegen::Value final_val = not_null.BuildPHI(val, null_val);
-      row.RegisterAttributeValue(output_ai, final_val);
+    llvm::Value *null_str = codegen.ConstString(scan_.GetNullString(), "null");
+
+    // Add accessors for all columns into the row batch
+    std::vector<CSVColumnAccess> column_accessors;
+    for (uint32_t i = 0; i < output_attributes_.size(); i++) {
+      column_accessors.emplace_back(output_attributes_[i], cols,
+                                    scan_.GetNullString(), null_str);
+    }
+    for (uint32_t i = 0; i < output_attributes_.size(); i++) {
+      one.AddAttribute(output_attributes_[i], &column_accessors[i]);
     }
 
+    // Push the row through the pipeline
+    RowBatch::Row row{one, nullptr, nullptr};
     ctx.Consume(row);
+
+    // Done
     scan_consumer.ReturnAndFinish();
   }
+
+  // The consumer function has been generated. Get a pointer to it now.
   consumer_func_ = scan_consumer.GetFunction();
 }
 
@@ -148,27 +236,10 @@ void CSVScanTranslator::TearDownState() {
 }
 
 std::string CSVScanTranslator::GetName() const {
-  return std::__cxx11::string();
-}
-
-llvm::Value *CSVScanTranslator::ConstructColumnDescriptor() const {
-  // First, we pull out all the attributes produced by the scan, in order
-  std::vector<const planner::AttributeInfo *> cols;
-  scan_.GetAttributes(cols);
-
-  // But, what we really need are just the column types, so pull those out now
-  std::vector<codegen::type::Type> col_types_vec;
-  for (const auto *col : cols) {
-    col_types_vec.push_back(col->type);
-  }
-
-  CodeGen &codegen = GetCodeGen();
-
-  auto num_bytes = cols.size() * sizeof(decltype(col_types_vec)::value_type);
-  auto *bytes = codegen.ConstGenericBytes(
-      col_types_vec.data(), static_cast<uint32_t>(num_bytes), "colTypes");
-  return codegen->CreatePointerCast(
-      bytes, TypeProxy::GetType(codegen)->getPointerTo());
+  return StringUtil::Format(
+      "CSVScan(file: '%s', delimiter: '%c', quote: '%c', escape: '%c')",
+      scan_.GetFileName().c_str(), scan_.GetDelimiterChar(),
+      scan_.GetQuoteChar(), scan_.GetEscapeChar());
 }
 
 }  // namespace codegen
diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index dddc0a43ac6..9796b5457f6 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -139,6 +139,11 @@ void TrimLeftRight(const char *&left, const char *&right) {
 template <typename T>
 typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
     const char *ptr, uint32_t len) {
+  if (len == 0) {
+    RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
+
   const char *start = ptr;
   const char *end = start + len;
 
@@ -159,6 +164,7 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
   while (start != end) {
     if (*start < '0' || *start > '9') {
       RuntimeFunctions::ThrowInvalidInputStringException();
+      __builtin_unreachable();
     }
 
     num = (num * 10) + (*start - '0');
@@ -175,6 +181,7 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
   if (num <= std::numeric_limits<T>::min() ||
       num >= std::numeric_limits<T>::max()) {
     RuntimeFunctions::ThrowOverflowException();
+    __builtin_unreachable();
   }
 
   // Done
@@ -186,7 +193,11 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
 bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
                                  const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
+
+  if (len == 0) {
+    RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
 
   const char *start = ptr, *end = ptr + len;
 
@@ -201,7 +212,8 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
     case 't':
     case 'T': {
       static constexpr char kTrue[] = "true";
-      if (strncasecmp(start, kTrue, std::min(trimmed_len, sizeof(kTrue)))) {
+      std::cout << sizeof(kTrue) << std::endl;
+      if (strncasecmp(start, kTrue, trimmed_len) == 0) {
         return true;
       }
       break;
@@ -209,7 +221,7 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
     case 'f':
     case 'F': {
       static constexpr char kFalse[] = "false";
-      if (strncasecmp(start, kFalse, std::min(trimmed_len, sizeof(kFalse)))) {
+      if (strncasecmp(start, kFalse, trimmed_len) == 0) {
         return false;
       }
       break;
@@ -217,7 +229,7 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
     case 'y':
     case 'Y': {
       static constexpr char kYes[] = "yes";
-      if (strncasecmp(start, kYes, std::min(trimmed_len, sizeof(kYes)))) {
+      if (strncasecmp(start, kYes, trimmed_len) == 0) {
         return true;
       }
       break;
@@ -225,7 +237,7 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
     case 'n':
     case 'N': {
       static constexpr char kNo[] = "no";
-      if (strncasecmp(start, kNo, std::min(trimmed_len, sizeof(kNo)))) {
+      if (strncasecmp(start, kNo, trimmed_len) == 0) {
         return false;
       }
       break;
@@ -235,9 +247,10 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
       // 'o' not enough to distinguish between on/off
       static constexpr char kOff[] = "off";
       static constexpr char kOn[] = "on";
-      if (strncasecmp(start, kOff, std::min(trimmed_len, sizeof(kOff)))) {
+      if (strncasecmp(start, kOff, (trimmed_len > 3 ? trimmed_len : 3)) == 0) {
         return false;
-      } else if (strncasecmp(start, kOn, std::min(trimmed_len, sizeof(kOn)))) {
+      } else if (strncasecmp(start, kOn, (trimmed_len > 2 ? trimmed_len : 2)) ==
+                 0) {
         return true;
       }
       break;
@@ -267,28 +280,24 @@ bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
 int8_t ValuesRuntime::InputTinyInt(UNUSED_ATTRIBUTE const type::Type &type,
                                    const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int8_t>(ptr, len);
 }
 
 int16_t ValuesRuntime::InputSmallInt(UNUSED_ATTRIBUTE const type::Type &type,
                                      const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int16_t>(ptr, len);
 }
 
 int32_t ValuesRuntime::InputInteger(UNUSED_ATTRIBUTE const type::Type &type,
                                     const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int32_t>(ptr, len);
 }
 
 int64_t ValuesRuntime::InputBigInt(UNUSED_ATTRIBUTE const type::Type &type,
                                    const char *ptr, uint32_t len) {
   PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  PELOTON_ASSERT(len != 0 && "Length must be non-zero");
   return ToNum<int64_t>(ptr, len);
 }
 
diff --git a/src/include/codegen/operator/csv_scan_translator.h b/src/include/codegen/operator/csv_scan_translator.h
index 12e132ab4ce..3389e1e5c09 100644
--- a/src/include/codegen/operator/csv_scan_translator.h
+++ b/src/include/codegen/operator/csv_scan_translator.h
@@ -54,15 +54,12 @@ class CSVScanTranslator : public OperatorTranslator {
   std::string GetName() const override;
 
  private:
-  // Plan accessor
-  const planner::CSVScanPlan &GetScanPlan() const { return scan_; }
-
-  llvm::Value *ConstructColumnDescriptor() const;
-
- private:
-  // The scan
+  // The plan
   const planner::CSVScanPlan &scan_;
 
+  // The set of attributes output by the csv scan
+  std::vector<const planner::AttributeInfo *> output_attributes_;
+
   // The scanner state ID
   RuntimeState::StateID scanner_id_;
 
diff --git a/src/include/codegen/proxy/csv_scanner_proxy.h b/src/include/codegen/proxy/csv_scanner_proxy.h
index ae9b13cd7ec..ee27ce2b003 100644
--- a/src/include/codegen/proxy/csv_scanner_proxy.h
+++ b/src/include/codegen/proxy/csv_scanner_proxy.h
@@ -15,12 +15,13 @@
 #include "codegen/proxy/proxy.h"
 #include "codegen/proxy/type_builder.h"
 #include "codegen/util/csv_scanner.h"
+#include "codegen/proxy/runtime_functions_proxy.h"
 
 namespace peloton {
 namespace codegen {
 
 PROXY(CSVScannerColumn) {
-  DECLARE_MEMBER(0, char[sizeof(type::Type)], type);
+  DECLARE_MEMBER(0, type::Type, type);
   DECLARE_MEMBER(1, char *, ptr);
   DECLARE_MEMBER(2, uint32_t, len);
   DECLARE_MEMBER(3, bool, is_null);
diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
index f9611b22630..2f40999efd0 100644
--- a/src/include/planner/csv_scan_plan.h
+++ b/src/include/planner/csv_scan_plan.h
@@ -34,9 +34,14 @@ class CSVScanPlan : public AbstractScan {
    *
    * @param file_name The file path
    * @param cols Information of the columns expected in each row of the CSV
+   * @param delimiter The character that separates columns within a row
+   * @param quote The character used to quote data (i.e., strings)
+   * @param escape The character that should appear before any data characters
+   * that match the quote character.
    */
   CSVScanPlan(std::string file_name, std::vector<ColumnInfo> &&cols,
-              char delimiter = ',', char quote = '"', char escape = '"');
+              char delimiter = ',', char quote = '"', char escape = '"',
+              std::string null = "");
 
   //////////////////////////////////////////////////////////////////////////////
   ///
@@ -55,6 +60,7 @@ class CSVScanPlan : public AbstractScan {
   char GetDelimiterChar() const { return delimiter_; }
   char GetQuoteChar() const { return quote_; }
   char GetEscapeChar() const { return escape_; }
+  const std::string &GetNullString() const { return null_; }
 
   //////////////////////////////////////////////////////////////////////////////
   ///
@@ -76,8 +82,9 @@ class CSVScanPlan : public AbstractScan {
   char delimiter_;
   char quote_;
   char escape_;
+  const std::string null_;
 
-  std::vector<std::unique_ptr<planner::AttributeInfo>> attributes_;
+  std::vector<planner::AttributeInfo> attributes_;
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -88,17 +95,19 @@ class CSVScanPlan : public AbstractScan {
 
 inline CSVScanPlan::CSVScanPlan(std::string file_name,
                                 std::vector<CSVScanPlan::ColumnInfo> &&cols,
-                                char delimiter, char quote, char escape)
+                                char delimiter, char quote, char escape,
+                                std::string null)
     : file_name_(std::move(file_name)),
       delimiter_(delimiter),
       quote_(quote),
-      escape_(escape) {
-  for (const auto &col : cols) {
-    std::unique_ptr<planner::AttributeInfo> attribute{
-        new planner::AttributeInfo()};
-    attribute->name = col.name;
-    attribute->type = codegen::type::Type{col.type, true};
-    attributes_.emplace_back(std::move(attribute));
+      escape_(escape),
+      null_(null) {
+  attributes_.resize(cols.size());
+  for (uint32_t i = 0; i < cols.size(); i++) {
+    const auto &col_info = cols[i];
+    attributes_[i].type = codegen::type::Type{col_info.type, true};
+    attributes_[i].attribute_id = i;
+    attributes_[i].name = col_info.name;
   }
 }
 
@@ -109,8 +118,8 @@ inline PlanNodeType CSVScanPlan::GetPlanNodeType() const {
 inline std::unique_ptr<AbstractPlan> CSVScanPlan::Copy() const {
   std::vector<CSVScanPlan::ColumnInfo> new_cols;
   for (const auto &attribute : attributes_) {
-    new_cols.push_back(CSVScanPlan::ColumnInfo{
-        .name = attribute->name, .type = attribute->type.type_id});
+    new_cols.push_back(CSVScanPlan::ColumnInfo{.name = attribute.name,
+                                               .type = attribute.type.type_id});
   }
   return std::unique_ptr<AbstractPlan>(
       new CSVScanPlan(file_name_, std::move(new_cols)));
@@ -118,7 +127,7 @@ inline std::unique_ptr<AbstractPlan> CSVScanPlan::Copy() const {
 
 inline void CSVScanPlan::PerformBinding(BindingContext &binding_context) {
   for (uint32_t i = 0; i < attributes_.size(); i++) {
-    binding_context.BindNew(i, attributes_[i].get());
+    binding_context.BindNew(i, &attributes_[i]);
   }
 }
 
@@ -142,7 +151,7 @@ inline void CSVScanPlan::GetAttributes(
     std::vector<const AttributeInfo *> &ais) const {
   ais.clear();
   for (const auto &ai : attributes_) {
-    ais.push_back(ai.get());
+    ais.push_back(&ai);
   }
 }
 

From 66bb521b76df43db6757c0c4267718052cf76f1c Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 9 May 2018 02:25:20 -0400
Subject: [PATCH 21/42] Moved TupleRuntime::CreateVarlen() into
 ValuesRuntime::WriteVarlen(). Better code organization and clearer name.

---
 src/codegen/operator/csv_scan_translator.cpp  |  6 ++--
 src/codegen/proxy/tuple_runtime_proxy.cpp     | 24 -------------
 src/codegen/proxy/values_runtime_proxy.cpp    |  5 ++-
 src/codegen/table_storage.cpp                 |  6 ++--
 src/codegen/tuple_runtime.cpp                 | 35 -------------------
 src/codegen/values_runtime.cpp                | 19 ++++++++++
 .../codegen/proxy/tuple_runtime_proxy.h       | 25 -------------
 .../codegen/proxy/values_runtime_proxy.h      |  4 ++-
 src/include/codegen/tuple_runtime.h           | 32 -----------------
 src/include/codegen/values_runtime.h          | 16 +++++++++
 10 files changed, 48 insertions(+), 124 deletions(-)
 delete mode 100644 src/codegen/proxy/tuple_runtime_proxy.cpp
 delete mode 100644 src/codegen/tuple_runtime.cpp
 delete mode 100644 src/include/codegen/proxy/tuple_runtime_proxy.h
 delete mode 100644 src/include/codegen/tuple_runtime.h

diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index d1c191a9ccd..8603a043e89 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -88,10 +88,10 @@ namespace {
 class CSVColumnAccess : public RowBatch::AttributeAccess {
  public:
   CSVColumnAccess(const planner::AttributeInfo *ai, llvm::Value *csv_columns,
-                  const std::string &null_str, llvm::Value *runtime_null_str)
+                  std::string null_str, llvm::Value *runtime_null_str)
       : ai_(ai),
         csv_columns_(csv_columns),
-        null_str_(null_str),
+        null_str_(std::move(null_str)),
         runtime_null_(runtime_null_str) {}
 
   llvm::Value *Columns() const { return csv_columns_; }
@@ -172,7 +172,7 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
  private:
   const planner::AttributeInfo *ai_;
   llvm::Value *csv_columns_;
-  const std::string &null_str_;
+  const std::string null_str_;
   llvm::Value *runtime_null_;
 };
 
diff --git a/src/codegen/proxy/tuple_runtime_proxy.cpp b/src/codegen/proxy/tuple_runtime_proxy.cpp
deleted file mode 100644
index 128c938f522..00000000000
--- a/src/codegen/proxy/tuple_runtime_proxy.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// tuple_runtime_proxy.cpp
-//
-// Identification: src/codegen/proxy/tuple_runtime_proxy.cpp
-//
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#include "codegen/proxy/tuple_runtime_proxy.h"
-
-#include "codegen/tuple_runtime.h"
-#include "codegen/proxy/pool_proxy.h"
-
-namespace peloton {
-namespace codegen {
-
-DEFINE_METHOD(peloton::codegen, TupleRuntime, CreateVarlen);
-
-}  // namespace codegen
-}  // namespace peloton
diff --git a/src/codegen/proxy/values_runtime_proxy.cpp b/src/codegen/proxy/values_runtime_proxy.cpp
index 37f90834362..1c32b6259b0 100644
--- a/src/codegen/proxy/values_runtime_proxy.cpp
+++ b/src/codegen/proxy/values_runtime_proxy.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/proxy/values_runtime_proxy.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +14,7 @@
 
 #include "codegen/proxy/value_proxy.h"
 #include "codegen/proxy/runtime_functions_proxy.h"
+#include "codegen/proxy/pool_proxy.h"
 
 namespace peloton {
 namespace codegen {
@@ -37,5 +38,7 @@ DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputBigInt);
 
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, CompareStrings);
 
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, WriteVarlen);
+
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/codegen/table_storage.cpp b/src/codegen/table_storage.cpp
index 198c7df9f2a..99df998b644 100644
--- a/src/codegen/table_storage.cpp
+++ b/src/codegen/table_storage.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/table_storage.cpp
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,7 +14,7 @@
 
 #include "catalog/schema.h"
 #include "codegen/lang/if.h"
-#include "codegen/proxy/tuple_runtime_proxy.h"
+#include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/sql_type.h"
 #include "codegen/type/type.h"
 #include "codegen/value.h"
@@ -49,7 +49,7 @@ void TableStorage::StoreValues(CodeGen &codegen, llvm::Value *tuple_ptr,
       }
       value_is_null.ElseBlock();
       {
-        codegen.Call(TupleRuntimeProxy::CreateVarlen,
+        codegen.Call(ValuesRuntimeProxy::WriteVarlen,
                      {value.GetValue(), value.GetLength(), val_ptr, pool});
       }
       value_is_null.EndIf();
diff --git a/src/codegen/tuple_runtime.cpp b/src/codegen/tuple_runtime.cpp
deleted file mode 100644
index d065feed5d9..00000000000
--- a/src/codegen/tuple_runtime.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// tuple_runtime.cpp
-//
-// Identification: src/codegen/tuple_runtime.cpp
-//
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#include "codegen/tuple_runtime.h"
-#include "type/abstract_pool.h"
-
-namespace peloton {
-namespace codegen {
-
-void TupleRuntime::CreateVarlen(char *data, uint32_t len, char *buf,
-                                peloton::type::AbstractPool *pool) {
-  struct varlen_t {
-    uint32_t len;
-    char data[0];
-  };
-
-  auto *area =
-      reinterpret_cast<varlen_t *>(pool->Allocate(sizeof(uint32_t) + len));
-  area->len = len;
-  PELOTON_MEMCPY(area->data, data, len);
-
-  *reinterpret_cast<varlen_t **>(buf) = area;
-}
-
-}  // namespace codegen
-}  // namespace peloton
diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index 9796b5457f6..2c2c771e845 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -16,6 +16,7 @@
 
 #include "codegen/runtime_functions.h"
 #include "codegen/type/type.h"
+#include "type/abstract_pool.h"
 #include "type/value.h"
 #include "type/type_util.h"
 #include "type/value_factory.h"
@@ -306,5 +307,23 @@ int32_t ValuesRuntime::CompareStrings(const char *str1, uint32_t len1,
   return peloton::type::TypeUtil::CompareStrings(str1, len1, str2, len2);
 }
 
+void ValuesRuntime::WriteVarlen(const char *data, uint32_t len, char *buf,
+                                peloton::type::AbstractPool &pool) {
+  struct Varlen {
+    uint32_t len;
+    char data[0];
+  };
+
+  // Allocate memory for the Varlen object
+  auto *area = static_cast<Varlen *>(pool.Allocate(sizeof(uint32_t) + len));
+
+  // Populate it
+  area->len = len;
+  PELOTON_MEMCPY(area->data, data, len);
+
+  // Store a pointer to the Varlen object into the target memory space
+  *reinterpret_cast<Varlen **>(buf) = area;
+}
+
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/include/codegen/proxy/tuple_runtime_proxy.h b/src/include/codegen/proxy/tuple_runtime_proxy.h
deleted file mode 100644
index e166349575e..00000000000
--- a/src/include/codegen/proxy/tuple_runtime_proxy.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// tuple_runtime_proxy.h
-//
-// Identification: src/include/codegen/proxy/tuple_runtime_proxy.h
-//
-// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "codegen/proxy/proxy.h"
-
-namespace peloton {
-namespace codegen {
-
-PROXY(TupleRuntime) {
-  DECLARE_METHOD(CreateVarlen);
-};
-
-}  // namespace codegen
-}  // namespace peloton
diff --git a/src/include/codegen/proxy/values_runtime_proxy.h b/src/include/codegen/proxy/values_runtime_proxy.h
index 3fe57ab36fb..85d9d1cfb85 100644
--- a/src/include/codegen/proxy/values_runtime_proxy.h
+++ b/src/include/codegen/proxy/values_runtime_proxy.h
@@ -6,7 +6,7 @@
 //
 // Identification: src/include/codegen/proxy/values_runtime_proxy.h
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,6 +37,8 @@ PROXY(ValuesRuntime) {
   DECLARE_METHOD(InputBigInt);
 
   DECLARE_METHOD(CompareStrings);
+
+  DECLARE_METHOD(WriteVarlen);
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/tuple_runtime.h b/src/include/codegen/tuple_runtime.h
deleted file mode 100644
index 86532055c7a..00000000000
--- a/src/include/codegen/tuple_runtime.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// tuple_runtime.h
-//
-// Identification: src/include/codegen/tuple_runtime.h
-//
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "common/internal_types.h"
-
-namespace peloton {
-
-namespace type {
-class AbstractPool;
-}  // namespace type
-
-namespace codegen {
-
-class TupleRuntime {
- public:
-  static void CreateVarlen(char *data, uint32_t len, char *buf,
-                           peloton::type::AbstractPool *pool);
-};
-
-}  // namespace codegen
-}  // namespace peloton
diff --git a/src/include/codegen/values_runtime.h b/src/include/codegen/values_runtime.h
index 206e9ed9bb2..e01b93c54f1 100644
--- a/src/include/codegen/values_runtime.h
+++ b/src/include/codegen/values_runtime.h
@@ -15,6 +15,11 @@
 #include <cstdint>
 
 namespace peloton {
+
+namespace type {
+class AbstractPool;
+}  // namespace type
+
 namespace codegen {
 
 namespace type {
@@ -93,6 +98,17 @@ class ValuesRuntime {
    */
   static int32_t CompareStrings(const char *str1, uint32_t len1,
                                 const char *str2, uint32_t len2);
+
+  /**
+   * Write the provided variable length object into the target buffer.
+   *
+   * @param data The bytes we wish to serialize
+   * @param len The length of the byte array
+   * @param buf The target position we wish to write to
+   * @param pool A memory pool to source memory from
+   */
+  static void WriteVarlen(const char *data, uint32_t len, char *buf,
+                          peloton::type::AbstractPool &pool);
 };
 
 }  // namespace codegen

From e318f661f76ceab089a9ff55a626c54897f8e26e Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 14 May 2018 16:08:09 -0400
Subject: [PATCH 22/42] Added error handling for long columns. Added
 null-terminator byte for when read-buffers are copied to line-buffers.

---
 src/codegen/util/csv_scanner.cpp       | 45 ++++++++++++++++++++------
 src/include/codegen/util/csv_scanner.h |  7 ++--
 2 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
index a7140ad322d..da606fcaac3 100644
--- a/src/codegen/util/csv_scanner.cpp
+++ b/src/codegen/util/csv_scanner.cpp
@@ -37,6 +37,7 @@ CSVScanner::CSVScanner(peloton::type::AbstractPool &pool,
       line_(nullptr),
       line_len_(0),
       line_maxlen_(0),
+      line_number_(0),
       delimiter_(delimiter),
       quote_(quote),
       escape_(escape),
@@ -110,15 +111,16 @@ void CSVScanner::Initialize() {
   file_.Open(file_path_, peloton::util::File::AccessMode::ReadOnly);
 
   // Allocate buffer space
-  buffer_ = static_cast<char *>(memory_.Allocate(kDefaultBufferSize + 1));
+  buffer_ = static_cast<char *>(memory_.Allocate(kDefaultBufferSize));
 
   // Fill read-buffer
   NextBuffer();
 
-  // Allocate space for the full line, if it doesn't fit into the buffer
+  // Allocate space for the full line, if it doesn't fit into the buffer. We
+  // reserve the last byte for the null-byte terminator.
   line_ = static_cast<char *>(memory_.Allocate(kDefaultBufferSize));
   line_len_ = 0;
-  line_maxlen_ = kDefaultBufferSize;
+  line_maxlen_ = kDefaultBufferSize - 1;
 }
 
 bool CSVScanner::NextBuffer() {
@@ -139,12 +141,25 @@ void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
   }
 
   if (line_len_ + len > line_maxlen_) {
+    // Check if we can even allocate any more bytes
+    if (static_cast<uint64_t>(len) > kMaxAllocSize - line_len_) {
+      const auto msg = StringUtil::Format(
+          "Line %u in file '%s' exceeds maximum line length: %lu",
+          line_number_ + 1, file_path_.c_str(), kMaxAllocSize);
+      throw Exception{msg};
+    }
+
     // The current line buffer isn't large enough to store the new bytes, so we
-    // need to resize it. By default, we double the capacity.
-    auto new_maxlen = line_maxlen_ * 2;
+    // need to resize it. Let's find an allocation size large enough to fit the
+    // new bytes.
+    uint32_t new_maxlen = line_maxlen_ * 2;
     while (new_maxlen < len) {
       new_maxlen *= 2;
     }
+
+    // Clamp
+    new_maxlen = std::min(new_maxlen, static_cast<uint32_t>(kMaxAllocSize));
+
     auto *new_line = static_cast<char *>(memory_.Allocate(new_maxlen));
 
     // Copy the old data
@@ -155,15 +170,14 @@ void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
 
     // Setup pointers and sizes
     line_ = new_line;
-    line_maxlen_ = new_maxlen;
+    line_maxlen_ = new_maxlen - 1;
 
     stats_.num_reallocs++;
   }
 
-  // At this point, we've guaranteed that the line is large enough to
-  // accommodate the new bytes, so let's go ahead and perform the copy.
-
+  // Copy provided data into the line buffer, ensuring null-byte termination.
   PELOTON_MEMCPY(line_ + line_len_, data, len);
+  line_[line_len_ + len] = '\0';
 
   // Increase the length of the line
   line_len_ += len;
@@ -246,11 +260,22 @@ void CSVScanner::ProduceCSV(const char *line) {
 
   const auto *iter = line;
   for (uint32_t col_idx = 0; col_idx < num_cols_; col_idx++) {
+    // Start points to the beginning of the column's data value
     const char *start = iter;
-    for (; *iter != 0 && *iter != delimiter_; iter++) {}
+
+    // Eat text until the next delimiter
+    while (*iter != 0 && *iter != delimiter_) {
+      iter++;
+    }
+
+    // At this point, iter points to the end of the column's data value
+
+    // Let's setup the columns
     cols_[col_idx].ptr = start;
     cols_[col_idx].len = static_cast<uint32_t>(iter - start);
     cols_[col_idx].is_null = (cols_[col_idx].len == 0);
+
+    // Eat delimiter, moving to next column
     iter++;
   }
 
diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h
index d51475c4c43..a946dec903e 100644
--- a/src/include/codegen/util/csv_scanner.h
+++ b/src/include/codegen/util/csv_scanner.h
@@ -47,7 +47,10 @@ namespace util {
 class CSVScanner {
  public:
   // 64K buffer size
-  static constexpr uint32_t kDefaultBufferSize = (1ul << 16);
+  static constexpr uint32_t kDefaultBufferSize = (1ul << 16ul);
+
+  // We allocate a maximum of 1GB for the line buffer
+  static constexpr uint64_t kMaxAllocSize = (1ul << 30ul);
 
   // The signature of the callback function
   using Callback = void (*)(void *);
@@ -70,7 +73,7 @@ class CSVScanner {
   };
 
   /**
-   * Various statistics tracked while we scan the CSV
+   * This structure tracks various statistics while we scan the CSV
    */
   struct Stats {
     // The number of times the read-buffer was copied into the line-buffer

From 447932ce8b168742da318f2bf0b194d0a89e574b Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Mon, 14 May 2018 16:14:48 -0400
Subject: [PATCH 23/42] Added inputs for decimal types

---
 src/codegen/proxy/values_runtime_proxy.cpp    |  1 +
 src/codegen/type/decimal_type.cpp             |  6 +--
 src/codegen/values_runtime.cpp                | 38 +++++++++++++++++++
 .../codegen/proxy/values_runtime_proxy.h      |  1 +
 src/include/codegen/values_runtime.h          |  3 ++
 5 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/src/codegen/proxy/values_runtime_proxy.cpp b/src/codegen/proxy/values_runtime_proxy.cpp
index 1c32b6259b0..530ad6b4e20 100644
--- a/src/codegen/proxy/values_runtime_proxy.cpp
+++ b/src/codegen/proxy/values_runtime_proxy.cpp
@@ -35,6 +35,7 @@ DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputTinyInt);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputSmallInt);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputInteger);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputBigInt);
+DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputDecimal);
 
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, CompareStrings);
 
diff --git a/src/codegen/type/decimal_type.cpp b/src/codegen/type/decimal_type.cpp
index 50a0b09e29c..92cc7ec5b6a 100644
--- a/src/codegen/type/decimal_type.cpp
+++ b/src/codegen/type/decimal_type.cpp
@@ -559,9 +559,9 @@ void Decimal::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 }
 
 llvm::Function *Decimal::GetInputFunction(
-    UNUSED_ATTRIBUTE CodeGen &codegen,
-    UNUSED_ATTRIBUTE const Type &type) const {
-  throw NotImplementedException{"Decimal inputs not implemented yet"};
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  // TODO: We should be using the precision/scale in the output function
+  return ValuesRuntimeProxy::InputDecimal.GetFunction(codegen);
 }
 
 llvm::Function *Decimal::GetOutputFunction(
diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index 2c2c771e845..e23e552813a 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -189,6 +189,32 @@ typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
   return static_cast<T>(num);
 }
 
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type ToNum(
+    const char *ptr, uint32_t len) {
+  if (len == 0) {
+    RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
+
+  // TODO(pmenon): Optimize me later
+  char *end = nullptr;
+  auto ret = std::strtod(ptr, &end);
+
+  if (unlikely_branch(end == ptr)) {
+    if (errno == ERANGE) {
+      RuntimeFunctions::ThrowOverflowException();
+      __builtin_unreachable();
+    } else {
+      RuntimeFunctions::ThrowInvalidInputStringException();
+      __builtin_unreachable();
+    }
+  }
+
+  // Done
+  return static_cast<T>(ret);
+}
+
 }  // namespace
 
 bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
@@ -302,6 +328,18 @@ int64_t ValuesRuntime::InputBigInt(UNUSED_ATTRIBUTE const type::Type &type,
   return ToNum<int64_t>(ptr, len);
 }
 
+double ValuesRuntime::InputDecimal(UNUSED_ATTRIBUTE const type::Type &type,
+                                   const char *ptr, uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  return ToNum<double>(ptr, len);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// String comparison
+///
+////////////////////////////////////////////////////////////////////////////////
+
 int32_t ValuesRuntime::CompareStrings(const char *str1, uint32_t len1,
                                       const char *str2, uint32_t len2) {
   return peloton::type::TypeUtil::CompareStrings(str1, len1, str2, len2);
diff --git a/src/include/codegen/proxy/values_runtime_proxy.h b/src/include/codegen/proxy/values_runtime_proxy.h
index 85d9d1cfb85..059f700d8c6 100644
--- a/src/include/codegen/proxy/values_runtime_proxy.h
+++ b/src/include/codegen/proxy/values_runtime_proxy.h
@@ -35,6 +35,7 @@ PROXY(ValuesRuntime) {
   DECLARE_METHOD(InputSmallInt);
   DECLARE_METHOD(InputInteger);
   DECLARE_METHOD(InputBigInt);
+  DECLARE_METHOD(InputDecimal);
 
   DECLARE_METHOD(CompareStrings);
 
diff --git a/src/include/codegen/values_runtime.h b/src/include/codegen/values_runtime.h
index e01b93c54f1..905ead1fd68 100644
--- a/src/include/codegen/values_runtime.h
+++ b/src/include/codegen/values_runtime.h
@@ -87,6 +87,9 @@ class ValuesRuntime {
   static int64_t InputBigInt(const type::Type &type, const char *ptr,
                              uint32_t len);
 
+  static double InputDecimal(const type::Type &type, const char *ptr,
+                             uint32_t len);
+
   /**
    * Compare two strings, returning an integer value indicating their sort order
    *

From d1e214a7bc4e0e0f01f17edf1624ac72735097b6 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 15 May 2018 22:04:38 -0400
Subject: [PATCH 24/42] Moved type-specific functions into function namespace

---
 src/catalog/catalog.cpp                       |  54 +--
 src/codegen/proxy/date_functions_proxy.cpp    |   7 +-
 src/codegen/proxy/decimal_functions_proxy.cpp |  30 --
 src/codegen/proxy/numeric_functions_proxy.cpp |  37 ++
 src/codegen/proxy/string_functions_proxy.cpp  |  10 +-
 src/codegen/proxy/values_runtime_proxy.cpp    |  11 -
 src/codegen/table_storage.cpp                 |   4 +-
 src/codegen/type/bigint_type.cpp              |   5 +-
 src/codegen/type/boolean_type.cpp             |   5 +-
 src/codegen/type/date_type.cpp                |   6 +-
 src/codegen/type/decimal_type.cpp             |  12 +-
 src/codegen/type/integer_type.cpp             |   5 +-
 src/codegen/type/smallint_type.cpp            |   5 +-
 src/codegen/type/tinyint_type.cpp             |   5 +-
 src/codegen/type/varbinary_type.cpp           |   3 +-
 src/codegen/type/varchar_type.cpp             |   2 +-
 src/codegen/values_runtime.cpp                | 264 -----------
 src/function/date_functions.cpp               |  33 +-
 src/function/decimal_functions.cpp            | 152 -------
 src/function/numeric_functions.cpp            | 417 ++++++++++++++++++
 src/function/string_functions.cpp             |  25 ++
 .../codegen/proxy/date_functions_proxy.h      |   7 +-
 ...ions_proxy.h => numeric_functions_proxy.h} |  19 +-
 .../codegen/proxy/string_functions_proxy.h    |   2 +
 .../codegen/proxy/values_runtime_proxy.h      |  11 -
 src/include/codegen/values_runtime.h          |  53 ---
 src/include/common/container_tuple.h          |   3 +-
 src/include/function/date_functions.h         |  22 +
 src/include/function/decimal_functions.h      |  46 --
 src/include/function/numeric_functions.h      |  81 ++++
 src/include/function/string_functions.h       |  30 ++
 test/codegen/value_integrity_test.cpp         |  11 +-
 test/function/decimal_functions_test.cpp      |  46 +-
 33 files changed, 748 insertions(+), 675 deletions(-)
 delete mode 100644 src/codegen/proxy/decimal_functions_proxy.cpp
 create mode 100644 src/codegen/proxy/numeric_functions_proxy.cpp
 delete mode 100644 src/function/decimal_functions.cpp
 create mode 100644 src/function/numeric_functions.cpp
 rename src/include/codegen/proxy/{decimal_functions_proxy.h => numeric_functions_proxy.h} (52%)
 delete mode 100644 src/include/function/decimal_functions.h
 create mode 100644 src/include/function/numeric_functions.h

diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp
index 0759da7d42f..adc4f77b66e 100644
--- a/src/catalog/catalog.cpp
+++ b/src/catalog/catalog.cpp
@@ -30,7 +30,7 @@
 #include "codegen/code_context.h"
 #include "concurrency/transaction_manager_factory.h"
 #include "function/date_functions.h"
-#include "function/decimal_functions.h"
+#include "function/numeric_functions.h"
 #include "function/old_engine_string_functions.h"
 #include "function/timestamp_functions.h"
 #include "index/index_factory.h"
@@ -1283,43 +1283,43 @@ void Catalog::InitializeFunctions() {
       AddBuiltinFunction("abs", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL,
                          internal_lang, "Abs",
                          function::BuiltInFuncType{
-                             OperatorId::Abs, function::DecimalFunctions::_Abs},
+                             OperatorId::Abs, function::NumericFunctions::_Abs},
                          txn);
       AddBuiltinFunction(
           "sqrt", {type::TypeId::TINYINT}, type::TypeId::DECIMAL, internal_lang,
           "Sqrt",
           function::BuiltInFuncType{OperatorId::Sqrt,
-                                    function::DecimalFunctions::Sqrt},
+                                    function::NumericFunctions::Sqrt},
           txn);
       AddBuiltinFunction(
           "sqrt", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL,
           internal_lang, "Sqrt",
           function::BuiltInFuncType{OperatorId::Sqrt,
-                                    function::DecimalFunctions::Sqrt},
+                                    function::NumericFunctions::Sqrt},
           txn);
       AddBuiltinFunction(
           "sqrt", {type::TypeId::INTEGER}, type::TypeId::DECIMAL, internal_lang,
           "Sqrt",
           function::BuiltInFuncType{OperatorId::Sqrt,
-                                    function::DecimalFunctions::Sqrt},
+                                    function::NumericFunctions::Sqrt},
           txn);
       AddBuiltinFunction(
           "sqrt", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang,
           "Sqrt",
           function::BuiltInFuncType{OperatorId::Sqrt,
-                                    function::DecimalFunctions::Sqrt},
+                                    function::NumericFunctions::Sqrt},
           txn);
       AddBuiltinFunction(
           "sqrt", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang,
           "Sqrt",
           function::BuiltInFuncType{OperatorId::Sqrt,
-                                    function::DecimalFunctions::Sqrt},
+                                    function::NumericFunctions::Sqrt},
           txn);
       AddBuiltinFunction(
           "floor", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL,
           internal_lang, "Floor",
           function::BuiltInFuncType{OperatorId::Floor,
-                                    function::DecimalFunctions::_Floor},
+                                    function::NumericFunctions::_Floor},
           txn);
 
       /**
@@ -1328,126 +1328,126 @@ void Catalog::InitializeFunctions() {
       AddBuiltinFunction("abs", {type::TypeId::TINYINT}, type::TypeId::TINYINT,
                          internal_lang, "Abs",
                          function::BuiltInFuncType{
-                             OperatorId::Abs, function::DecimalFunctions::_Abs},
+                             OperatorId::Abs, function::NumericFunctions::_Abs},
                          txn);
 
       AddBuiltinFunction("abs", {type::TypeId::SMALLINT},
                          type::TypeId::SMALLINT, internal_lang, "Abs",
                          function::BuiltInFuncType{
-                             OperatorId::Abs, function::DecimalFunctions::_Abs},
+                             OperatorId::Abs, function::NumericFunctions::_Abs},
                          txn);
 
       AddBuiltinFunction("abs", {type::TypeId::INTEGER}, type::TypeId::INTEGER,
                          internal_lang, "Abs",
                          function::BuiltInFuncType{
-                             OperatorId::Abs, function::DecimalFunctions::_Abs},
+                             OperatorId::Abs, function::NumericFunctions::_Abs},
                          txn);
 
       AddBuiltinFunction("abs", {type::TypeId::BIGINT}, type::TypeId::BIGINT,
                          internal_lang, "Abs",
                          function::BuiltInFuncType{
-                             OperatorId::Abs, function::DecimalFunctions::_Abs},
+                             OperatorId::Abs, function::NumericFunctions::_Abs},
                          txn);
 
       AddBuiltinFunction(
           "floor", {type::TypeId::INTEGER}, type::TypeId::DECIMAL,
           internal_lang, "Floor",
           function::BuiltInFuncType{OperatorId::Floor,
-                                    function::DecimalFunctions::_Floor},
+                                    function::NumericFunctions::_Floor},
           txn);
       AddBuiltinFunction(
           "floor", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang,
           "Floor",
           function::BuiltInFuncType{OperatorId::Floor,
-                                    function::DecimalFunctions::_Floor},
+                                    function::NumericFunctions::_Floor},
           txn);
       AddBuiltinFunction(
           "floor", {type::TypeId::TINYINT}, type::TypeId::DECIMAL,
           internal_lang, "Floor",
           function::BuiltInFuncType{OperatorId::Floor,
-                                    function::DecimalFunctions::_Floor},
+                                    function::NumericFunctions::_Floor},
           txn);
       AddBuiltinFunction(
           "floor", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL,
           internal_lang, "Floor",
           function::BuiltInFuncType{OperatorId::Floor,
-                                    function::DecimalFunctions::_Floor},
+                                    function::NumericFunctions::_Floor},
           txn);
       AddBuiltinFunction(
           "round", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL,
           internal_lang, "Round",
           function::BuiltInFuncType{OperatorId::Round,
-                                    function::DecimalFunctions::_Round},
+                                    function::NumericFunctions::_Round},
           txn);
 
       AddBuiltinFunction(
           "ceil", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL, internal_lang,
           "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceil", {type::TypeId::TINYINT}, type::TypeId::DECIMAL, internal_lang,
           "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceil", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL,
           internal_lang, "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceil", {type::TypeId::INTEGER}, type::TypeId::DECIMAL, internal_lang,
           "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceil", {type::TypeId::BIGINT}, type::TypeId::DECIMAL, internal_lang,
           "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceiling", {type::TypeId::DECIMAL}, type::TypeId::DECIMAL,
           internal_lang, "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceiling", {type::TypeId::TINYINT}, type::TypeId::DECIMAL,
           internal_lang, "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceiling", {type::TypeId::SMALLINT}, type::TypeId::DECIMAL,
           internal_lang, "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceiling", {type::TypeId::INTEGER}, type::TypeId::DECIMAL,
           internal_lang, "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       AddBuiltinFunction(
           "ceiling", {type::TypeId::BIGINT}, type::TypeId::DECIMAL,
           internal_lang, "Ceil",
           function::BuiltInFuncType{OperatorId::Ceil,
-                                    function::DecimalFunctions::_Ceil},
+                                    function::NumericFunctions::_Ceil},
           txn);
 
       /**
diff --git a/src/codegen/proxy/date_functions_proxy.cpp b/src/codegen/proxy/date_functions_proxy.cpp
index de8f030ef4f..7bce9276f56 100644
--- a/src/codegen/proxy/date_functions_proxy.cpp
+++ b/src/codegen/proxy/date_functions_proxy.cpp
@@ -6,19 +6,24 @@
 //
 // Identification: src/codegen/proxy/date_functions_proxy.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "codegen/proxy/date_functions_proxy.h"
 
+#include "codegen/proxy/runtime_functions_proxy.h"
 #include "codegen/proxy/type_builder.h"
 #include "function/date_functions.h"
 
 namespace peloton {
 namespace codegen {
 
+// Utility functions
 DEFINE_METHOD(peloton::function, DateFunctions, Now);
 
+// Input functions
+DEFINE_METHOD(peloton::function, DateFunctions, InputDate);
+
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/codegen/proxy/decimal_functions_proxy.cpp b/src/codegen/proxy/decimal_functions_proxy.cpp
deleted file mode 100644
index 4cbc6d05640..00000000000
--- a/src/codegen/proxy/decimal_functions_proxy.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// decimal_functions_proxy.cpp
-//
-// Identification: src/codegen/proxy/decimal_functions_proxy.cpp
-//
-// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#include "codegen/proxy/decimal_functions_proxy.h"
-
-#include "codegen/proxy/type_builder.h"
-#include "function/decimal_functions.h"
-
-namespace peloton {
-namespace codegen {
-
-DEFINE_METHOD(peloton::function, DecimalFunctions, Abs);
-
-DEFINE_METHOD(peloton::function, DecimalFunctions, Floor);
-
-DEFINE_METHOD(peloton::function, DecimalFunctions, Round);
-
-DEFINE_METHOD(peloton::function, DecimalFunctions, Ceil);
-
-}  // namespace codegen
-}  // namespace peloton
diff --git a/src/codegen/proxy/numeric_functions_proxy.cpp b/src/codegen/proxy/numeric_functions_proxy.cpp
new file mode 100644
index 00000000000..133917b668d
--- /dev/null
+++ b/src/codegen/proxy/numeric_functions_proxy.cpp
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// numeric_functions_proxy.cpp
+//
+// Identification: src/codegen/proxy/numeric_functions_proxy.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/proxy/numeric_functions_proxy.h"
+
+#include "codegen/proxy/runtime_functions_proxy.h"
+#include "codegen/proxy/type_builder.h"
+#include "function/numeric_functions.h"
+
+namespace peloton {
+namespace codegen {
+
+// Utility functions
+DEFINE_METHOD(peloton::function, NumericFunctions, Abs);
+DEFINE_METHOD(peloton::function, NumericFunctions, Floor);
+DEFINE_METHOD(peloton::function, NumericFunctions, Round);
+DEFINE_METHOD(peloton::function, NumericFunctions, Ceil);
+
+// Input functions
+DEFINE_METHOD(peloton::function, NumericFunctions, InputBoolean);
+DEFINE_METHOD(peloton::function, NumericFunctions, InputTinyInt);
+DEFINE_METHOD(peloton::function, NumericFunctions, InputSmallInt);
+DEFINE_METHOD(peloton::function, NumericFunctions, InputInteger);
+DEFINE_METHOD(peloton::function, NumericFunctions, InputBigInt);
+DEFINE_METHOD(peloton::function, NumericFunctions, InputDecimal);
+
+}  // namespace codegen
+}  // namespace peloton
diff --git a/src/codegen/proxy/string_functions_proxy.cpp b/src/codegen/proxy/string_functions_proxy.cpp
index 32e25ccc0e1..db765480e9b 100644
--- a/src/codegen/proxy/string_functions_proxy.cpp
+++ b/src/codegen/proxy/string_functions_proxy.cpp
@@ -13,6 +13,7 @@
 #include "codegen/proxy/string_functions_proxy.h"
 
 #include "codegen/proxy/executor_context_proxy.h"
+#include "codegen/proxy/pool_proxy.h"
 
 namespace peloton {
 namespace codegen {
@@ -20,18 +21,17 @@ namespace codegen {
 // StrWithLen struct
 DEFINE_TYPE(StrWithLen, "peloton::StrWithLen", str, length);
 
-// String Function
 DEFINE_METHOD(peloton::function, StringFunctions, Ascii);
 DEFINE_METHOD(peloton::function, StringFunctions, Like);
 DEFINE_METHOD(peloton::function, StringFunctions, Length);
-DEFINE_METHOD(peloton::function, StringFunctions, Substr);
-DEFINE_METHOD(peloton::function, StringFunctions, Repeat);
-
-// Trim-related functions
 DEFINE_METHOD(peloton::function, StringFunctions, BTrim);
 DEFINE_METHOD(peloton::function, StringFunctions, Trim);
 DEFINE_METHOD(peloton::function, StringFunctions, LTrim);
 DEFINE_METHOD(peloton::function, StringFunctions, RTrim);
+DEFINE_METHOD(peloton::function, StringFunctions, Substr);
+DEFINE_METHOD(peloton::function, StringFunctions, Repeat);
+DEFINE_METHOD(peloton::function, StringFunctions, CompareStrings);
+DEFINE_METHOD(peloton::function, StringFunctions, WriteString);
 
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/codegen/proxy/values_runtime_proxy.cpp b/src/codegen/proxy/values_runtime_proxy.cpp
index 530ad6b4e20..0c30ef1d4ac 100644
--- a/src/codegen/proxy/values_runtime_proxy.cpp
+++ b/src/codegen/proxy/values_runtime_proxy.cpp
@@ -30,16 +30,5 @@ DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputDecimal);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputVarchar);
 DEFINE_METHOD(peloton::codegen, ValuesRuntime, OutputVarbinary);
 
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputBoolean);
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputTinyInt);
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputSmallInt);
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputInteger);
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputBigInt);
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, InputDecimal);
-
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, CompareStrings);
-
-DEFINE_METHOD(peloton::codegen, ValuesRuntime, WriteVarlen);
-
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/codegen/table_storage.cpp b/src/codegen/table_storage.cpp
index 99df998b644..e4240c6f7bc 100644
--- a/src/codegen/table_storage.cpp
+++ b/src/codegen/table_storage.cpp
@@ -14,7 +14,7 @@
 
 #include "catalog/schema.h"
 #include "codegen/lang/if.h"
-#include "codegen/proxy/values_runtime_proxy.h"
+#include "codegen/proxy/string_functions_proxy.h"
 #include "codegen/type/sql_type.h"
 #include "codegen/type/type.h"
 #include "codegen/value.h"
@@ -49,7 +49,7 @@ void TableStorage::StoreValues(CodeGen &codegen, llvm::Value *tuple_ptr,
       }
       value_is_null.ElseBlock();
       {
-        codegen.Call(ValuesRuntimeProxy::WriteVarlen,
+        codegen.Call(StringFunctionsProxy::WriteString,
                      {value.GetValue(), value.GetLength(), val_ptr, pool});
       }
       value_is_null.EndIf();
diff --git a/src/codegen/type/bigint_type.cpp b/src/codegen/type/bigint_type.cpp
index 9332bc51fbc..45b43b3ad46 100644
--- a/src/codegen/type/bigint_type.cpp
+++ b/src/codegen/type/bigint_type.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/type/bigint_type.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +14,7 @@
 
 #include "codegen/lang/if.h"
 #include "codegen/value.h"
+#include "codegen/proxy/numeric_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/decimal_type.h"
@@ -597,7 +598,7 @@ void BigInt::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 
 llvm::Function *BigInt::GetInputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
-  return ValuesRuntimeProxy::InputBigInt.GetFunction(codegen);
+  return NumericFunctionsProxy::InputBigInt.GetFunction(codegen);
 }
 
 llvm::Function *BigInt::GetOutputFunction(
diff --git a/src/codegen/type/boolean_type.cpp b/src/codegen/type/boolean_type.cpp
index 5f7387ed9b4..37668c761da 100644
--- a/src/codegen/type/boolean_type.cpp
+++ b/src/codegen/type/boolean_type.cpp
@@ -6,12 +6,13 @@
 //
 // Identification: src/codegen/type/boolean_type.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "codegen/type/boolean_type.h"
 
+#include "codegen/proxy/numeric_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/integer_type.h"
 #include "codegen/type/varchar_type.h"
@@ -326,7 +327,7 @@ void Boolean::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 
 llvm::Function *Boolean::GetInputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
-  return ValuesRuntimeProxy::InputBoolean.GetFunction(codegen);
+  return NumericFunctionsProxy::InputBoolean.GetFunction(codegen);
 }
 
 llvm::Function *Boolean::GetOutputFunction(
diff --git a/src/codegen/type/date_type.cpp b/src/codegen/type/date_type.cpp
index 26342c23db9..5b541c32dcd 100644
--- a/src/codegen/type/date_type.cpp
+++ b/src/codegen/type/date_type.cpp
@@ -14,6 +14,7 @@
 
 #include "codegen/lang/if.h"
 #include "codegen/value.h"
+#include "codegen/proxy/date_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/integer_type.h"
@@ -189,9 +190,8 @@ void Date::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 }
 
 llvm::Function *Date::GetInputFunction(
-    UNUSED_ATTRIBUTE CodeGen &codegen,
-    UNUSED_ATTRIBUTE const Type &type) const {
-  throw NotImplementedException{"Date inputs not supported yet"};
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return DateFunctionsProxy::InputDate.GetFunction(codegen);
 }
 
 llvm::Function *Date::GetOutputFunction(
diff --git a/src/codegen/type/decimal_type.cpp b/src/codegen/type/decimal_type.cpp
index 92cc7ec5b6a..7f527092a1d 100644
--- a/src/codegen/type/decimal_type.cpp
+++ b/src/codegen/type/decimal_type.cpp
@@ -13,7 +13,7 @@
 #include "codegen/type/decimal_type.h"
 
 #include "codegen/lang/if.h"
-#include "codegen/proxy/decimal_functions_proxy.h"
+#include "codegen/proxy/numeric_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/integer_type.h"
@@ -194,7 +194,7 @@ struct Abs : public TypeSystem::UnaryOperatorHandleNull {
              UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx)
       const override {
     llvm::Value *raw_ret =
-        codegen.Call(DecimalFunctionsProxy::Abs, {val.GetValue()});
+        codegen.Call(NumericFunctionsProxy::Abs, {val.GetValue()});
     return Value{Decimal::Instance(), raw_ret};
   }
 };
@@ -213,7 +213,7 @@ struct Floor : public TypeSystem::UnaryOperatorHandleNull {
              UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx)
       const override {
     llvm::Value *raw_ret =
-        codegen.Call(DecimalFunctionsProxy::Floor, {val.GetValue()});
+        codegen.Call(NumericFunctionsProxy::Floor, {val.GetValue()});
     return Value{Decimal::Instance(), raw_ret};
   }
 };
@@ -232,7 +232,7 @@ struct Round : public TypeSystem::UnaryOperatorHandleNull {
              UNUSED_ATTRIBUTE const TypeSystem::InvocationContext &ctx)
       const override {
     llvm::Value *raw_ret =
-        codegen.Call(DecimalFunctionsProxy::Round, {val.GetValue()});
+        codegen.Call(NumericFunctionsProxy::Round, {val.GetValue()});
     return Value{Decimal::Instance(), raw_ret};
   }
 };
@@ -252,7 +252,7 @@ struct Ceil : public TypeSystem::UnaryOperatorHandleNull {
       const override {
     PELOTON_ASSERT(SupportsType(val.GetType()));
 
-    auto *result = codegen.Call(DecimalFunctionsProxy::Ceil, {val.GetValue()});
+    auto *result = codegen.Call(NumericFunctionsProxy::Ceil, {val.GetValue()});
 
     return Value{Decimal::Instance(), result};
   }
@@ -561,7 +561,7 @@ void Decimal::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 llvm::Function *Decimal::GetInputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
   // TODO: We should be using the precision/scale in the output function
-  return ValuesRuntimeProxy::InputDecimal.GetFunction(codegen);
+  return NumericFunctionsProxy::InputDecimal.GetFunction(codegen);
 }
 
 llvm::Function *Decimal::GetOutputFunction(
diff --git a/src/codegen/type/integer_type.cpp b/src/codegen/type/integer_type.cpp
index 92809098341..6d9a61ebde1 100644
--- a/src/codegen/type/integer_type.cpp
+++ b/src/codegen/type/integer_type.cpp
@@ -6,13 +6,14 @@
 //
 // Identification: src/codegen/type/integer_type.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "codegen/type/integer_type.h"
 
 #include "codegen/lang/if.h"
+#include "codegen/proxy/numeric_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/decimal_type.h"
@@ -595,7 +596,7 @@ void Integer::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 
 llvm::Function *Integer::GetInputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
-  return ValuesRuntimeProxy::InputInteger.GetFunction(codegen);
+  return NumericFunctionsProxy::InputInteger.GetFunction(codegen);
 }
 
 llvm::Function *Integer::GetOutputFunction(
diff --git a/src/codegen/type/smallint_type.cpp b/src/codegen/type/smallint_type.cpp
index e0f31561c95..b645af00ffe 100644
--- a/src/codegen/type/smallint_type.cpp
+++ b/src/codegen/type/smallint_type.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/type/smallint_type.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +14,7 @@
 
 #include "codegen/lang/if.h"
 #include "codegen/value.h"
+#include "codegen/proxy/numeric_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/decimal_type.h"
@@ -606,7 +607,7 @@ void SmallInt::GetTypeForMaterialization(CodeGen &codegen,
 
 llvm::Function *SmallInt::GetInputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
-  return ValuesRuntimeProxy::InputSmallInt.GetFunction(codegen);
+  return NumericFunctionsProxy::InputSmallInt.GetFunction(codegen);
 }
 
 llvm::Function *SmallInt::GetOutputFunction(
diff --git a/src/codegen/type/tinyint_type.cpp b/src/codegen/type/tinyint_type.cpp
index 24cad11558c..ab82f4982a2 100644
--- a/src/codegen/type/tinyint_type.cpp
+++ b/src/codegen/type/tinyint_type.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/codegen/type/tinyint_type.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +14,7 @@
 
 #include "codegen/lang/if.h"
 #include "codegen/value.h"
+#include "codegen/proxy/numeric_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/decimal_type.h"
@@ -601,7 +602,7 @@ void TinyInt::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 
 llvm::Function *TinyInt::GetInputFunction(
     CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
-  return ValuesRuntimeProxy::InputTinyInt.GetFunction(codegen);
+  return NumericFunctionsProxy::InputTinyInt.GetFunction(codegen);
 }
 
 llvm::Function *TinyInt::GetOutputFunction(
diff --git a/src/codegen/type/varbinary_type.cpp b/src/codegen/type/varbinary_type.cpp
index bcbf0c8a1de..6d80b924243 100644
--- a/src/codegen/type/varbinary_type.cpp
+++ b/src/codegen/type/varbinary_type.cpp
@@ -13,6 +13,7 @@
 #include "codegen/type/varbinary_type.h"
 
 #include "codegen/value.h"
+#include "codegen/proxy/string_functions_proxy.h"
 #include "codegen/proxy/values_runtime_proxy.h"
 #include "codegen/type/boolean_type.h"
 #include "codegen/type/integer_type.h"
@@ -52,7 +53,7 @@ struct CompareVarbinary : public TypeSystem::ExpensiveComparisonHandleNull {
     // Setup the function arguments and invoke the call
     std::vector<llvm::Value *> args = {left.GetValue(), left.GetLength(),
                                        right.GetValue(), right.GetLength()};
-    return codegen.Call(ValuesRuntimeProxy::CompareStrings, args);
+    return codegen.Call(StringFunctionsProxy::CompareStrings, args);
   }
 
   Value CompareLtImpl(CodeGen &codegen, const Value &left,
diff --git a/src/codegen/type/varchar_type.cpp b/src/codegen/type/varchar_type.cpp
index 001b6afaca9..dc3ab961f3d 100644
--- a/src/codegen/type/varchar_type.cpp
+++ b/src/codegen/type/varchar_type.cpp
@@ -52,7 +52,7 @@ struct CompareVarchar : public TypeSystem::ExpensiveComparisonHandleNull {
     // Setup the function arguments and invoke the call
     std::vector<llvm::Value *> args = {left.GetValue(), left.GetLength(),
                                        right.GetValue(), right.GetLength()};
-    return codegen.Call(ValuesRuntimeProxy::CompareStrings, args);
+    return codegen.Call(StringFunctionsProxy::CompareStrings, args);
   }
 
   Value CompareLtImpl(CodeGen &codegen, const Value &left,
diff --git a/src/codegen/values_runtime.cpp b/src/codegen/values_runtime.cpp
index e23e552813a..a3c41196762 100644
--- a/src/codegen/values_runtime.cpp
+++ b/src/codegen/values_runtime.cpp
@@ -12,8 +12,6 @@
 
 #include "codegen/values_runtime.h"
 
-#include <type_traits>
-
 #include "codegen/runtime_functions.h"
 #include "codegen/type/type.h"
 #include "type/abstract_pool.h"
@@ -101,267 +99,5 @@ void ValuesRuntime::OutputVarbinary(char *values, uint32_t idx, const char *ptr,
            peloton::type::ValueFactory::GetVarbinaryValue(bin_ptr, len, false));
 }
 
-////////////////////////////////////////////////////////////////////////////////
-///
-/// Input functions
-///
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-/**
- * Skip all leading and trailing whitespace from the string bounded by the
- * provided pointers. This function will modify the input pointers to point to
- * the first non-space character at the start and end of the input string.
- *
- * @param[in,out] left A pointer to the leftmost character in the input string
- * @param[in,out] right A pointer to the rightmost character in the input string
- */
-void TrimLeftRight(const char *&left, const char *&right) {
-  while (*left == ' ') {
-    left++;
-  }
-  while (right > left && *(right - 1) == ' ') {
-    right--;
-  }
-}
-
-/**
- * Convert the provided input string into a integral number. This function
- * handles leading whitespace and leading negative (-) or positive (+) signs.
- * Additionally, it performs a bounds check to ensure the number falls into the
- * valid range of numbers for the given type.
- *
- * @tparam T The integral type (int8_t, int16_t, int32_t, int64_t)
- * @param ptr A pointer to the start of the input string
- * @param len The length of the input string
- * @return The numeric interpretation of the input string
- */
-template <typename T>
-typename std::enable_if<std::is_integral<T>::value, T>::type ToNum(
-    const char *ptr, uint32_t len) {
-  if (len == 0) {
-    RuntimeFunctions::ThrowInvalidInputStringException();
-    __builtin_unreachable();
-  }
-
-  const char *start = ptr;
-  const char *end = start + len;
-
-  // Trim leading and trailing whitespace
-  TrimLeftRight(start, end);
-
-  // Check negative or positive sign
-  bool negative = false;
-  if (*start == '-') {
-    negative = true;
-    start++;
-  } else if (*start == '+') {
-    start++;
-  }
-
-  // Convert
-  int64_t num = 0;
-  while (start != end) {
-    if (*start < '0' || *start > '9') {
-      RuntimeFunctions::ThrowInvalidInputStringException();
-      __builtin_unreachable();
-    }
-
-    num = (num * 10) + (*start - '0');
-
-    start++;
-  }
-
-  // Negate number if we need to
-  if (negative) {
-    num = -num;
-  }
-
-  // Range check
-  if (num <= std::numeric_limits<T>::min() ||
-      num >= std::numeric_limits<T>::max()) {
-    RuntimeFunctions::ThrowOverflowException();
-    __builtin_unreachable();
-  }
-
-  // Done
-  return static_cast<T>(num);
-}
-
-template <typename T>
-typename std::enable_if<std::is_floating_point<T>::value, T>::type ToNum(
-    const char *ptr, uint32_t len) {
-  if (len == 0) {
-    RuntimeFunctions::ThrowInvalidInputStringException();
-    __builtin_unreachable();
-  }
-
-  // TODO(pmenon): Optimize me later
-  char *end = nullptr;
-  auto ret = std::strtod(ptr, &end);
-
-  if (unlikely_branch(end == ptr)) {
-    if (errno == ERANGE) {
-      RuntimeFunctions::ThrowOverflowException();
-      __builtin_unreachable();
-    } else {
-      RuntimeFunctions::ThrowInvalidInputStringException();
-      __builtin_unreachable();
-    }
-  }
-
-  // Done
-  return static_cast<T>(ret);
-}
-
-}  // namespace
-
-bool ValuesRuntime::InputBoolean(UNUSED_ATTRIBUTE const type::Type &type,
-                                 const char *ptr, uint32_t len) {
-  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-
-  if (len == 0) {
-    RuntimeFunctions::ThrowInvalidInputStringException();
-    __builtin_unreachable();
-  }
-
-  const char *start = ptr, *end = ptr + len;
-
-  // Trim leading and trailing whitespace
-  TrimLeftRight(start, end);
-
-  //
-  uint64_t trimmed_len = end - start;
-
-  // Check cases
-  switch (*start) {
-    case 't':
-    case 'T': {
-      static constexpr char kTrue[] = "true";
-      std::cout << sizeof(kTrue) << std::endl;
-      if (strncasecmp(start, kTrue, trimmed_len) == 0) {
-        return true;
-      }
-      break;
-    }
-    case 'f':
-    case 'F': {
-      static constexpr char kFalse[] = "false";
-      if (strncasecmp(start, kFalse, trimmed_len) == 0) {
-        return false;
-      }
-      break;
-    }
-    case 'y':
-    case 'Y': {
-      static constexpr char kYes[] = "yes";
-      if (strncasecmp(start, kYes, trimmed_len) == 0) {
-        return true;
-      }
-      break;
-    }
-    case 'n':
-    case 'N': {
-      static constexpr char kNo[] = "no";
-      if (strncasecmp(start, kNo, trimmed_len) == 0) {
-        return false;
-      }
-      break;
-    }
-    case 'o':
-    case 'O': {
-      // 'o' not enough to distinguish between on/off
-      static constexpr char kOff[] = "off";
-      static constexpr char kOn[] = "on";
-      if (strncasecmp(start, kOff, (trimmed_len > 3 ? trimmed_len : 3)) == 0) {
-        return false;
-      } else if (strncasecmp(start, kOn, (trimmed_len > 2 ? trimmed_len : 2)) ==
-                 0) {
-        return true;
-      }
-      break;
-    }
-    case '0': {
-      if (trimmed_len == 1) {
-        return false;
-      } else {
-        return true;
-      }
-    }
-    case '1': {
-      if (trimmed_len == 1) {
-        return true;
-      } else {
-        return false;
-      }
-    }
-    default: { break; }
-  }
-
-  // Error
-  RuntimeFunctions::ThrowInvalidInputStringException();
-  __builtin_unreachable();
-}
-
-int8_t ValuesRuntime::InputTinyInt(UNUSED_ATTRIBUTE const type::Type &type,
-                                   const char *ptr, uint32_t len) {
-  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  return ToNum<int8_t>(ptr, len);
-}
-
-int16_t ValuesRuntime::InputSmallInt(UNUSED_ATTRIBUTE const type::Type &type,
-                                     const char *ptr, uint32_t len) {
-  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  return ToNum<int16_t>(ptr, len);
-}
-
-int32_t ValuesRuntime::InputInteger(UNUSED_ATTRIBUTE const type::Type &type,
-                                    const char *ptr, uint32_t len) {
-  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  return ToNum<int32_t>(ptr, len);
-}
-
-int64_t ValuesRuntime::InputBigInt(UNUSED_ATTRIBUTE const type::Type &type,
-                                   const char *ptr, uint32_t len) {
-  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  return ToNum<int64_t>(ptr, len);
-}
-
-double ValuesRuntime::InputDecimal(UNUSED_ATTRIBUTE const type::Type &type,
-                                   const char *ptr, uint32_t len) {
-  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
-  return ToNum<double>(ptr, len);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-///
-/// String comparison
-///
-////////////////////////////////////////////////////////////////////////////////
-
-int32_t ValuesRuntime::CompareStrings(const char *str1, uint32_t len1,
-                                      const char *str2, uint32_t len2) {
-  return peloton::type::TypeUtil::CompareStrings(str1, len1, str2, len2);
-}
-
-void ValuesRuntime::WriteVarlen(const char *data, uint32_t len, char *buf,
-                                peloton::type::AbstractPool &pool) {
-  struct Varlen {
-    uint32_t len;
-    char data[0];
-  };
-
-  // Allocate memory for the Varlen object
-  auto *area = static_cast<Varlen *>(pool.Allocate(sizeof(uint32_t) + len));
-
-  // Populate it
-  area->len = len;
-  PELOTON_MEMCPY(area->data, data, len);
-
-  // Store a pointer to the Varlen object into the target memory space
-  *reinterpret_cast<Varlen **>(buf) = area;
-}
-
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/function/date_functions.cpp b/src/function/date_functions.cpp
index 233259844c8..9c676690a13 100644
--- a/src/function/date_functions.cpp
+++ b/src/function/date_functions.cpp
@@ -18,7 +18,6 @@
 #include <time.h>
 #include <sys/time.h>
 
-#include "common/logger.h"
 #include "common/internal_types.h"
 #include "type/value.h"
 #include "type/value_factory.h"
@@ -26,30 +25,28 @@
 namespace peloton {
 namespace function {
 
-// This now is not what postgres does.
-// Postgres is returning the time when the transaction begins
-// We are here intead generating a new time when this function
-// is called
+// This implementation of Now() is **not** what postgres does. Postgres is
+// returning the time when the transaction begins. We are here instead
+// generating a new time when this function is called.
 int64_t DateFunctions::Now() {
   uint64_t time_stamp;
   struct timeval tv;
   struct tm *time_info;
 
-  uint64_t hour_min_sec_base = 1000000; //us to sec
+  uint64_t hour_min_sec_base = 1000000;  // us to sec
   uint64_t year_base = hour_min_sec_base * 100000;
-  uint64_t day_base = year_base * 10000 * 27; // skip the time zone
+  uint64_t day_base = year_base * 10000 * 27;  // skip the time zone
   uint64_t month_base = day_base * 32;
 
   gettimeofday(&tv, NULL);
   time_info = gmtime(&(tv.tv_sec));
 
-  uint32_t hour_min_sec = time_info->tm_hour * 3600 + 
-                          time_info->tm_min * 60 + 
-                          time_info->tm_sec;
+  uint32_t hour_min_sec =
+      time_info->tm_hour * 3600 + time_info->tm_min * 60 + time_info->tm_sec;
   // EPOCH time start from 1970
   uint16_t year = time_info->tm_year + 1900;
   uint16_t day = time_info->tm_mday;
-  uint16_t month = time_info->tm_mon + 1; // tm_mon is from 0 - 11
+  uint16_t month = time_info->tm_mon + 1;  // tm_mon is from 0 - 11
 
   time_stamp = tv.tv_usec;
   time_stamp += hour_min_sec_base * hour_min_sec;
@@ -60,10 +57,16 @@ int64_t DateFunctions::Now() {
   return time_stamp;
 }
 
-type::Value DateFunctions::_Now(const UNUSED_ATTRIBUTE std::vector<type::Value> &args) {
-  PELOTON_ASSERT(args.size() == 0);
-  int64_t now = Now();
-  return type::ValueFactory::GetTimestampValue(now);
+type::Value DateFunctions::_Now(
+    UNUSED_ATTRIBUTE const std::vector<type::Value> &args) {
+  PELOTON_ASSERT(args.empty());
+  return type::ValueFactory::GetTimestampValue(Now());
+}
+
+int32_t DateFunctions::InputDate(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type,
+    UNUSED_ATTRIBUTE const char *data, UNUSED_ATTRIBUTE uint32_t len) {
+  return 0;
 }
 
 }  // namespace expression
diff --git a/src/function/decimal_functions.cpp b/src/function/decimal_functions.cpp
deleted file mode 100644
index b722993b4d0..00000000000
--- a/src/function/decimal_functions.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// decimal_functions.cpp
-//
-// Identification: src/function/decimal_functions.cpp
-//
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#include "function/decimal_functions.h"
-#include "type/value_factory.h"
-
-namespace peloton {
-namespace function {
-
-// Get square root of the value
-type::Value DecimalFunctions::Sqrt(const std::vector<type::Value> &args) {
-  PELOTON_ASSERT(args.size() == 1);
-  if (args[0].IsNull()) {
-    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  return args[0].Sqrt();
-}
-
-// Get Abs of value
-type::Value DecimalFunctions::_Abs(const std::vector<type::Value> &args) {
-  PELOTON_ASSERT(args.size() == 1);
-  if (args[0].IsNull()) {
-    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  switch (args[0].GetElementType()) {
-    case type::TypeId::DECIMAL:
-      {
-        double result;
-        result = Abs(args[0].GetAs<double>());
-        return type::ValueFactory::GetDecimalValue(result);
-      }
-      break;
-    case type::TypeId::INTEGER:
-      {
-        int32_t result;
-        result = abs(args[0].GetAs<int32_t>());
-        return type::ValueFactory::GetIntegerValue(result);
-        break;
-      }
-    case type::TypeId::BIGINT:
-      {
-        int64_t result;
-        result = std::abs(args[0].GetAs<int64_t>());
-        return type::ValueFactory::GetBigIntValue(result);
-      }
-      break;
-    case type::TypeId::SMALLINT:
-      {
-        int16_t result;
-        result = abs(args[0].GetAs<int16_t>());
-        return type::ValueFactory::GetSmallIntValue(result);
-      }
-      break;
-    case type::TypeId::TINYINT:
-      {
-        int8_t result;
-        result = abs(args[0].GetAs<int8_t>());
-        return type::ValueFactory::GetTinyIntValue(result);
-      }
-      break;
-    default:
-      return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-}
-
-double DecimalFunctions::Abs(const double args) { return fabs(args); }
-
-// Get ceiling of value
-type::Value DecimalFunctions::_Ceil(const std::vector<type::Value> &args) {
-  PELOTON_ASSERT(args.size() == 1);
-  if (args[0].IsNull()) {
-    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  double result;
-  switch (args[0].GetElementType()) {
-    case type::TypeId::DECIMAL:
-      result = Ceil(args[0].GetAs<double>());
-      break;
-    case type::TypeId::INTEGER:
-      result = args[0].GetAs<int32_t>();
-      break;
-    case type::TypeId::BIGINT:
-      result = args[0].GetAs<int64_t>();
-      break;
-    case type::TypeId::SMALLINT:
-      result = args[0].GetAs<int16_t>();
-      break;
-    case type::TypeId::TINYINT:
-      result = args[0].GetAs<int8_t>();
-      break;
-    default:
-      return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  return type::ValueFactory::GetDecimalValue(result);
-}
-
-double DecimalFunctions::Ceil(const double args) { return ceil(args); }
-
-// Get floor value
-type::Value DecimalFunctions::_Floor(const std::vector<type::Value> &args) {
-  PELOTON_ASSERT(args.size() == 1);
-  if (args[0].IsNull()) {
-    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  double res;
-  switch(args[0].GetElementType()) {
-    case type::TypeId::DECIMAL:
-      res = Floor(args[0].GetAs<double>());
-      break;
-    case type::TypeId::INTEGER:
-      res = args[0].GetAs<int32_t>();
-      break;
-    case type::TypeId::BIGINT:
-      res = args[0].GetAs<int64_t>();
-      break;
-    case type::TypeId::SMALLINT:
-      res = args[0].GetAs<int16_t>();
-      break;
-    case type::TypeId::TINYINT:
-      res = args[0].GetAs<int8_t>();
-      break;
-    default:
-      return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  return type::ValueFactory::GetDecimalValue(res);
-}
-
-double DecimalFunctions::Floor(const double val) { return floor(val); }
-
-// Round to nearest integer
-type::Value DecimalFunctions::_Round(const std::vector<type::Value> &args) {
-  PELOTON_ASSERT(args.size() == 1);
-  if (args[0].IsNull()) {
-    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
-  }
-  return type::ValueFactory::GetDecimalValue(Round(args[0].GetAs<double>()));
-}
-
-double DecimalFunctions::Round(double arg) { return round(arg); }
-
-
-}  // namespace function
-}  // namespace peloton
diff --git a/src/function/numeric_functions.cpp b/src/function/numeric_functions.cpp
new file mode 100644
index 00000000000..50a00ee516a
--- /dev/null
+++ b/src/function/numeric_functions.cpp
@@ -0,0 +1,417 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// numeric_functions.cpp
+//
+// Identification: src/function/numeric_functions.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "function/numeric_functions.h"
+
+#include "codegen/type/type.h"
+#include "codegen/runtime_functions.h"
+#include "type/value.h"
+#include "type/value_factory.h"
+
+namespace peloton {
+namespace function {
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Square root
+///
+////////////////////////////////////////////////////////////////////////////////
+
+double NumericFunctions::ISqrt(uint32_t num) {
+  return std::sqrt<uint32_t>(num);
+}
+
+double NumericFunctions::DSqrt(double num) { return std::sqrt(num); }
+
+type::Value NumericFunctions::Sqrt(const std::vector<type::Value> &args) {
+  PELOTON_ASSERT(args.size() == 1);
+  if (args[0].IsNull()) {
+    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  return args[0].Sqrt();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Absolute value
+///
+////////////////////////////////////////////////////////////////////////////////
+
+double NumericFunctions::Abs(const double args) { return fabs(args); }
+
+// Get Abs of value
+type::Value NumericFunctions::_Abs(const std::vector<type::Value> &args) {
+  PELOTON_ASSERT(args.size() == 1);
+  if (args[0].IsNull()) {
+    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  switch (args[0].GetElementType()) {
+    case type::TypeId::DECIMAL: {
+      double result;
+      result = Abs(args[0].GetAs<double>());
+      return type::ValueFactory::GetDecimalValue(result);
+    }
+    case type::TypeId::INTEGER: {
+      int32_t result;
+      result = abs(args[0].GetAs<int32_t>());
+      return type::ValueFactory::GetIntegerValue(result);
+    }
+    case type::TypeId::BIGINT: {
+      int64_t result;
+      result = std::abs(args[0].GetAs<int64_t>());
+      return type::ValueFactory::GetBigIntValue(result);
+    }
+    case type::TypeId::SMALLINT: {
+      int16_t result;
+      result = abs(args[0].GetAs<int16_t>());
+      return type::ValueFactory::GetSmallIntValue(result);
+    }
+    case type::TypeId::TINYINT: {
+      int8_t result;
+      result = abs(args[0].GetAs<int8_t>());
+      return type::ValueFactory::GetTinyIntValue(result);
+    }
+    default: {
+      return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Ceiling value
+///
+////////////////////////////////////////////////////////////////////////////////
+
+double NumericFunctions::Ceil(const double args) { return ceil(args); }
+
+type::Value NumericFunctions::_Ceil(const std::vector<type::Value> &args) {
+  PELOTON_ASSERT(args.size() == 1);
+  if (args[0].IsNull()) {
+    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  double result;
+  switch (args[0].GetElementType()) {
+    case type::TypeId::DECIMAL:
+      result = Ceil(args[0].GetAs<double>());
+      break;
+    case type::TypeId::INTEGER:
+      result = args[0].GetAs<int32_t>();
+      break;
+    case type::TypeId::BIGINT:
+      result = args[0].GetAs<int64_t>();
+      break;
+    case type::TypeId::SMALLINT:
+      result = args[0].GetAs<int16_t>();
+      break;
+    case type::TypeId::TINYINT:
+      result = args[0].GetAs<int8_t>();
+      break;
+    default:
+      return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  return type::ValueFactory::GetDecimalValue(result);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Floor value
+///
+////////////////////////////////////////////////////////////////////////////////
+
+double NumericFunctions::Floor(const double val) { return floor(val); }
+
+type::Value NumericFunctions::_Floor(const std::vector<type::Value> &args) {
+  PELOTON_ASSERT(args.size() == 1);
+  if (args[0].IsNull()) {
+    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  double res;
+  switch (args[0].GetElementType()) {
+    case type::TypeId::DECIMAL:
+      res = Floor(args[0].GetAs<double>());
+      break;
+    case type::TypeId::INTEGER:
+      res = args[0].GetAs<int32_t>();
+      break;
+    case type::TypeId::BIGINT:
+      res = args[0].GetAs<int64_t>();
+      break;
+    case type::TypeId::SMALLINT:
+      res = args[0].GetAs<int16_t>();
+      break;
+    case type::TypeId::TINYINT:
+      res = args[0].GetAs<int8_t>();
+      break;
+    default:
+      return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  return type::ValueFactory::GetDecimalValue(res);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Rounding
+///
+////////////////////////////////////////////////////////////////////////////////
+
+double NumericFunctions::Round(double arg) { return round(arg); }
+
+type::Value NumericFunctions::_Round(const std::vector<type::Value> &args) {
+  PELOTON_ASSERT(args.size() == 1);
+  if (args[0].IsNull()) {
+    return type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL);
+  }
+  return type::ValueFactory::GetDecimalValue(Round(args[0].GetAs<double>()));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Input functions
+///
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+/**
+ * Skip all leading and trailing whitespace from the string bounded by the
+ * provided pointers. This function will modify the input pointers to point to
+ * the first non-whitespace space character at the start and end of the input
+ * string.
+ *
+ * @param[in,out] left Pointer to the left-most character in the input string
+ * @param[in,out] right Pointer to the right-most character in the input string
+ */
+void TrimLeftRight(const char *&left, const char *&right) {
+  while (*left == ' ') {
+    left++;
+  }
+  while (right > left && *(right - 1) == ' ') {
+    right--;
+  }
+}
+
+/**
+ * Convert the provided input string into an integral number. This function
+ * handles leading whitespace and leading negative (-) or positive (+) signs.
+ * Additionally, it performs a bounds check to ensure the number falls into the
+ * valid range of numbers for the given type.
+ *
+ * @tparam T The integral type (int8_t, int16_t, int32_t, int64_t)
+ * @param ptr A pointer to the start of the input string
+ * @param len The length of the input string
+ * @return The numeric interpretation of the input string
+ */
+template <typename T>
+T ParseInteger(const char *ptr, uint32_t len) {
+  static_assert(std::is_integral<T>::value,
+                "Must provide integer-type when calling ParseInteger");
+
+  if (len == 0) {
+    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
+
+  const char *start = ptr;
+  const char *end = start + len;
+
+  // Trim leading and trailing whitespace
+  TrimLeftRight(start, end);
+
+  // Check negative or positive sign
+  bool negative = false;
+  if (*start == '-') {
+    negative = true;
+    start++;
+  } else if (*start == '+') {
+    start++;
+  }
+
+  // Convert
+  int64_t num = 0;
+  while (start != end) {
+    if (*start < '0' || *start > '9') {
+      codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+      __builtin_unreachable();
+    }
+
+    num = (num * 10) + (*start - '0');
+
+    start++;
+  }
+
+  PELOTON_ASSERT(start == end);
+
+  // Negate number if we need to
+  if (negative) {
+    num = -num;
+  }
+
+  // Range check
+  if (num <= std::numeric_limits<T>::min() ||
+      num >= std::numeric_limits<T>::max()) {
+    codegen::RuntimeFunctions::ThrowOverflowException();
+    __builtin_unreachable();
+  }
+
+  // Done
+  return static_cast<T>(num);
+}
+
+}  // namespace
+
+bool NumericFunctions::InputBoolean(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr,
+    uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+
+  if (len == 0) {
+    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
+
+  const char *start = ptr, *end = ptr + len;
+
+  // Trim leading and trailing whitespace
+  TrimLeftRight(start, end);
+
+  //
+  uint64_t trimmed_len = end - start;
+
+  // Check cases
+  switch (*start) {
+    case 't':
+    case 'T': {
+      static constexpr char kTrue[] = "true";
+      if (strncasecmp(start, kTrue, trimmed_len) == 0) {
+        return true;
+      }
+      break;
+    }
+    case 'f':
+    case 'F': {
+      static constexpr char kFalse[] = "false";
+      if (strncasecmp(start, kFalse, trimmed_len) == 0) {
+        return false;
+      }
+      break;
+    }
+    case 'y':
+    case 'Y': {
+      static constexpr char kYes[] = "yes";
+      if (strncasecmp(start, kYes, trimmed_len) == 0) {
+        return true;
+      }
+      break;
+    }
+    case 'n':
+    case 'N': {
+      static constexpr char kNo[] = "no";
+      if (strncasecmp(start, kNo, trimmed_len) == 0) {
+        return false;
+      }
+      break;
+    }
+    case 'o':
+    case 'O': {
+      // 'o' not enough to distinguish between on/off
+      static constexpr char kOff[] = "off";
+      static constexpr char kOn[] = "on";
+      if (strncasecmp(start, kOff, (trimmed_len > 3 ? trimmed_len : 3)) == 0) {
+        return false;
+      } else if (strncasecmp(start, kOn, (trimmed_len > 2 ? trimmed_len : 2)) ==
+                 0) {
+        return true;
+      }
+      break;
+    }
+    case '0': {
+      if (trimmed_len == 1) {
+        return false;
+      } else {
+        return true;
+      }
+    }
+    case '1': {
+      if (trimmed_len == 1) {
+        return true;
+      } else {
+        return false;
+      }
+    }
+    default: { break; }
+  }
+
+  // Error
+  codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+  __builtin_unreachable();
+}
+
+int8_t NumericFunctions::InputTinyInt(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr,
+    uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  return ParseInteger<int8_t>(ptr, len);
+}
+
+int16_t NumericFunctions::InputSmallInt(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr,
+    uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  return ParseInteger<int16_t>(ptr, len);
+}
+
+int32_t NumericFunctions::InputInteger(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr,
+    uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  return ParseInteger<int32_t>(ptr, len);
+}
+
+int64_t NumericFunctions::InputBigInt(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr,
+    uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  return ParseInteger<int64_t>(ptr, len);
+}
+
+double NumericFunctions::InputDecimal(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *ptr,
+    uint32_t len) {
+  PELOTON_ASSERT(ptr != nullptr && "Input is assumed to be non-NULL");
+  if (len == 0) {
+    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
+
+  // We don't trim because std::strtod() does the trimming for us
+
+  // TODO(pmenon): Optimize me later
+  char *end = nullptr;
+  double ret = std::strtod(ptr, &end);
+
+  if (unlikely_branch(end == ptr)) {
+    if (errno == ERANGE) {
+      codegen::RuntimeFunctions::ThrowOverflowException();
+      __builtin_unreachable();
+    } else {
+      codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+      __builtin_unreachable();
+    }
+  }
+
+  // Done
+  return ret;
+}
+
+}  // namespace function
+}  // namespace peloton
diff --git a/src/function/string_functions.cpp b/src/function/string_functions.cpp
index 841a9ee6e15..2cf8d9f0b89 100644
--- a/src/function/string_functions.cpp
+++ b/src/function/string_functions.cpp
@@ -14,6 +14,8 @@
 
 #include "common/macros.h"
 #include "executor/executor_context.h"
+#include "type/type_util.h"
+#include "type/abstract_pool.h"
 
 namespace peloton {
 namespace function {
@@ -220,5 +222,28 @@ uint32_t StringFunctions::Length(
   return length;
 }
 
+int32_t StringFunctions::CompareStrings(const char *str1, uint32_t len1,
+                                        const char *str2, uint32_t len2) {
+  return peloton::type::TypeUtil::CompareStrings(str1, len1, str2, len2);
+}
+
+void StringFunctions::WriteString(const char *data, uint32_t len, char *buf,
+                                  peloton::type::AbstractPool &pool) {
+  struct Varlen {
+    uint32_t len;
+    char data[0];
+  };
+
+  // Allocate memory for the Varlen object
+  auto *area = static_cast<Varlen *>(pool.Allocate(sizeof(uint32_t) + len));
+
+  // Populate it
+  area->len = len;
+  PELOTON_MEMCPY(area->data, data, len);
+
+  // Store a pointer to the Varlen object into the target memory space
+  *reinterpret_cast<Varlen **>(buf) = area;
+}
+
 }  // namespace function
 }  // namespace peloton
diff --git a/src/include/codegen/proxy/date_functions_proxy.h b/src/include/codegen/proxy/date_functions_proxy.h
index 38f96b3cd38..7954afe72d3 100644
--- a/src/include/codegen/proxy/date_functions_proxy.h
+++ b/src/include/codegen/proxy/date_functions_proxy.h
@@ -6,7 +6,7 @@
 //
 // Identification: src/include/codegen/proxy/date_functions_proxy.h
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,8 +18,11 @@ namespace peloton {
 namespace codegen {
 
 PROXY(DateFunctions) {
-  // Proxy everything in function::StringFunctions
+  // Utility functions
   DECLARE_METHOD(Now);
+
+  // Input functions
+  DECLARE_METHOD(InputDate);
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/proxy/decimal_functions_proxy.h b/src/include/codegen/proxy/numeric_functions_proxy.h
similarity index 52%
rename from src/include/codegen/proxy/decimal_functions_proxy.h
rename to src/include/codegen/proxy/numeric_functions_proxy.h
index 4d9b70a5671..b3a338e06a8 100644
--- a/src/include/codegen/proxy/decimal_functions_proxy.h
+++ b/src/include/codegen/proxy/numeric_functions_proxy.h
@@ -2,11 +2,11 @@
 //
 //                         Peloton
 //
-// decimal_functions_proxy.h
+// numeric_functions_proxy.h
 //
-// Identification: src/include/codegen/proxy/decimal_functions_proxy.h
+// Identification: src/include/codegen/proxy/numeric_functions_proxy.h
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,13 +17,20 @@
 namespace peloton {
 namespace codegen {
 
-PROXY(DecimalFunctions) {
-  // Proxy everything in function::DecimalFunctions
-
+PROXY(NumericFunctions) {
+  // Utility functions
   DECLARE_METHOD(Abs);
   DECLARE_METHOD(Floor);
   DECLARE_METHOD(Round);
   DECLARE_METHOD(Ceil);
+
+  // Input functions
+  DECLARE_METHOD(InputBoolean);
+  DECLARE_METHOD(InputTinyInt);
+  DECLARE_METHOD(InputSmallInt);
+  DECLARE_METHOD(InputInteger);
+  DECLARE_METHOD(InputBigInt);
+  DECLARE_METHOD(InputDecimal);
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/proxy/string_functions_proxy.h b/src/include/codegen/proxy/string_functions_proxy.h
index e9cf1c9c7fc..bff1911d0a2 100644
--- a/src/include/codegen/proxy/string_functions_proxy.h
+++ b/src/include/codegen/proxy/string_functions_proxy.h
@@ -29,6 +29,8 @@ PROXY(StringFunctions) {
   DECLARE_METHOD(RTrim);
   DECLARE_METHOD(Substr);
   DECLARE_METHOD(Repeat);
+  DECLARE_METHOD(CompareStrings);
+  DECLARE_METHOD(WriteString);
 };
 
 PROXY(StrWithLen) {
diff --git a/src/include/codegen/proxy/values_runtime_proxy.h b/src/include/codegen/proxy/values_runtime_proxy.h
index 059f700d8c6..9868d518bac 100644
--- a/src/include/codegen/proxy/values_runtime_proxy.h
+++ b/src/include/codegen/proxy/values_runtime_proxy.h
@@ -29,17 +29,6 @@ PROXY(ValuesRuntime) {
   DECLARE_METHOD(OutputDecimal);
   DECLARE_METHOD(OutputVarchar);
   DECLARE_METHOD(OutputVarbinary);
-
-  DECLARE_METHOD(InputBoolean);
-  DECLARE_METHOD(InputTinyInt);
-  DECLARE_METHOD(InputSmallInt);
-  DECLARE_METHOD(InputInteger);
-  DECLARE_METHOD(InputBigInt);
-  DECLARE_METHOD(InputDecimal);
-
-  DECLARE_METHOD(CompareStrings);
-
-  DECLARE_METHOD(WriteVarlen);
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/values_runtime.h b/src/include/codegen/values_runtime.h
index 905ead1fd68..fd5c26b0e78 100644
--- a/src/include/codegen/values_runtime.h
+++ b/src/include/codegen/values_runtime.h
@@ -28,12 +28,6 @@ class Type;
 
 class ValuesRuntime {
  public:
-  //////////////////////////////////////////////////////////////////////////////
-  ///
-  /// Output functions
-  ///
-  //////////////////////////////////////////////////////////////////////////////
-
   // Write out the given boolean value into the array at the provided index
   static void OutputBoolean(char *values, uint32_t idx, bool val, bool is_null);
 
@@ -65,53 +59,6 @@ class ValuesRuntime {
   // Write out the given varbinary value into the array at the provided index
   static void OutputVarbinary(char *values, uint32_t idx, const char *str,
                               uint32_t len);
-
-  //////////////////////////////////////////////////////////////////////////////
-  ///
-  /// Input functions
-  ////
-  //////////////////////////////////////////////////////////////////////////////
-
-  static bool InputBoolean(const type::Type &type, const char *ptr,
-                           uint32_t len);
-
-  static int8_t InputTinyInt(const type::Type &type, const char *ptr,
-                             uint32_t len);
-
-  static int16_t InputSmallInt(const type::Type &type, const char *ptr,
-                               uint32_t len);
-
-  static int32_t InputInteger(const type::Type &type, const char *ptr,
-                              uint32_t len);
-
-  static int64_t InputBigInt(const type::Type &type, const char *ptr,
-                             uint32_t len);
-
-  static double InputDecimal(const type::Type &type, const char *ptr,
-                             uint32_t len);
-
-  /**
-   * Compare two strings, returning an integer value indicating their sort order
-   *
-   * @param str1 A pointer to the first string
-   * @param len1 The length of the first string
-   * @param str2 A pointer to the second string
-   * @param len2 The length of the second string
-   * @return
-   */
-  static int32_t CompareStrings(const char *str1, uint32_t len1,
-                                const char *str2, uint32_t len2);
-
-  /**
-   * Write the provided variable length object into the target buffer.
-   *
-   * @param data The bytes we wish to serialize
-   * @param len The length of the byte array
-   * @param buf The target position we wish to write to
-   * @param pool A memory pool to source memory from
-   */
-  static void WriteVarlen(const char *data, uint32_t len, char *buf,
-                          peloton::type::AbstractPool &pool);
 };
 
 }  // namespace codegen
diff --git a/src/include/common/container_tuple.h b/src/include/common/container_tuple.h
index 29613067734..0d27a0da6f5 100644
--- a/src/include/common/container_tuple.h
+++ b/src/include/common/container_tuple.h
@@ -6,7 +6,7 @@
 //
 // Identification: src/include/common/container_tuple.h
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +17,7 @@
 #include <sstream>
 #include <vector>
 
+#include "catalog/schema.h"
 #include "common/abstract_tuple.h"
 #include "common/exception.h"
 #include "common/macros.h"
diff --git a/src/include/function/date_functions.h b/src/include/function/date_functions.h
index e5a6ca85cd6..fc5973ac95b 100644
--- a/src/include/function/date_functions.h
+++ b/src/include/function/date_functions.h
@@ -20,12 +20,34 @@
 #include "type/value.h"
 
 namespace peloton {
+
+namespace codegen {
+namespace type {
+class Type;
+}  // namespace type
+}  // namespace codegen
+
 namespace function {
 
 class DateFunctions {
  public:
+  /**
+   * Function used to return the current date/time. Normally called at the start
+   * of a transaction, and consistent throughout its duration.
+   *
+   * @return The current date at the time of invocation
+   */
   static int64_t Now();
   static type::Value _Now(const std::vector<type::Value> &args);
+
+  /**
+   *
+   * @param data
+   * @param len
+   * @return
+   */
+  static int32_t InputDate(const codegen::type::Type &type, const char *data,
+                           uint32_t len);
 };
 
 }  // namespace function
diff --git a/src/include/function/decimal_functions.h b/src/include/function/decimal_functions.h
deleted file mode 100644
index f4373aa5750..00000000000
--- a/src/include/function/decimal_functions.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// decimal_functions.h
-//
-// Identification: src/include/function/decimal_functions.h
-//
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include <vector>
-
-#include "type/value.h"
-
-namespace peloton {
-namespace function {
-
-class DecimalFunctions {
- public:
-
-  // Abs
-  static double Abs(double arg);
-  static type::Value _Abs(const std::vector<type::Value>& args);
-
-  // Sqrt
-  static type::Value Sqrt(const std::vector<type::Value>& args);
-
-  // Floor
-  static double Floor(const double val);
-  static type::Value _Floor(const std::vector<type::Value>& args);
-
-  // Round
-  static double Round(double arg);
-  static type::Value _Round(const std::vector<type::Value>& args);
-
-  // Ceil
-  static double Ceil(const double args);
-  static type::Value _Ceil(const std::vector<type::Value>& args);
-};
-
-}  // namespace function
-}  // namespace peloton
diff --git a/src/include/function/numeric_functions.h b/src/include/function/numeric_functions.h
new file mode 100644
index 00000000000..6a606caf5d5
--- /dev/null
+++ b/src/include/function/numeric_functions.h
@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// numeric_functions.h
+//
+// Identification: src/include/function/numeric_functions.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+namespace peloton {
+
+namespace codegen {
+namespace type {
+class Type;
+}  // namespace type
+}  // namespace codegen
+
+namespace type {
+class Value;
+}  // namespace value
+
+namespace function {
+
+class NumericFunctions {
+ public:
+  // Abs
+  static double Abs(double arg);
+  static type::Value _Abs(const std::vector<type::Value> &args);
+
+  // Sqrt
+  static double ISqrt(uint32_t num);
+  static double DSqrt(double num);
+  static type::Value Sqrt(const std::vector<type::Value> &args);
+
+  // Floor
+  static double Floor(double val);
+  static type::Value _Floor(const std::vector<type::Value> &args);
+
+  // Round
+  static double Round(double arg);
+  static type::Value _Round(const std::vector<type::Value> &args);
+
+  // Ceil
+  static double Ceil(double args);
+  static type::Value _Ceil(const std::vector<type::Value> &args);
+
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Input functions
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  static bool InputBoolean(const codegen::type::Type &type, const char *ptr,
+                           uint32_t len);
+
+  static int8_t InputTinyInt(const codegen::type::Type &type, const char *ptr,
+                             uint32_t len);
+
+  static int16_t InputSmallInt(const codegen::type::Type &type, const char *ptr,
+                               uint32_t len);
+
+  static int32_t InputInteger(const codegen::type::Type &type, const char *ptr,
+                              uint32_t len);
+
+  static int64_t InputBigInt(const codegen::type::Type &type, const char *ptr,
+                             uint32_t len);
+
+  static double InputDecimal(const codegen::type::Type &type, const char *ptr,
+                             uint32_t len);
+};
+
+}  // namespace function
+}  // namespace peloton
diff --git a/src/include/function/string_functions.h b/src/include/function/string_functions.h
index 2a209d0dee6..db79dc2409e 100644
--- a/src/include/function/string_functions.h
+++ b/src/include/function/string_functions.h
@@ -20,6 +20,10 @@ namespace executor {
 class ExecutorContext;
 }  // namespace executor
 
+namespace type {
+class AbstractPool;
+}  // namespace type;
+
 namespace function {
 
 class StringFunctions {
@@ -74,6 +78,32 @@ class StringFunctions {
   // Length will return the number of characters in the given string
   static uint32_t Length(executor::ExecutorContext &ctx, const char *str,
                          uint32_t length);
+
+  /**
+   * Compare two (potentially empty) strings returning an integer value
+   * indicating their sort order.
+   *
+   * @param str1 A pointer to the first string
+   * @param len1 The length of the first string
+   * @param str2 A pointer to the second string
+   * @param len2 The length of the second string
+   * @return -1 if the first string is strictly less than the second; 0 if the
+   * two strings are equal; 1 if the second string is strictly greater than the
+   * second.
+   */
+  static int32_t CompareStrings(const char *str1, uint32_t len1,
+                                const char *str2, uint32_t len2);
+
+  /**
+   * Write the provided variable length object into the target buffer.
+   *
+   * @param data The bytes we wish to serialize
+   * @param len The length of the byte array
+   * @param buf The target position we wish to write to
+   * @param pool A memory pool to source memory from
+   */
+  static void WriteString(const char *data, uint32_t len, char *buf,
+                          peloton::type::AbstractPool &pool);
 };
 
 }  // namespace function
diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp
index 9c78ece4787..97429771708 100644
--- a/test/codegen/value_integrity_test.cpp
+++ b/test/codegen/value_integrity_test.cpp
@@ -17,7 +17,7 @@
 #include "codegen/type/smallint_type.h"
 #include "codegen/type/integer_type.h"
 #include "codegen/type/bigint_type.h"
-#include "codegen/values_runtime.h"
+#include "function/numeric_functions.h"
 
 namespace peloton {
 namespace test {
@@ -224,17 +224,18 @@ void TestInputIntegral(
 
 TEST_F(ValueIntegrityTest, InputIntegralTypesTest) {
   codegen::type::Type tinyint{type::TypeId::TINYINT, false};
-  TestInputIntegral<int8_t>(tinyint, codegen::ValuesRuntime::InputTinyInt,
+  TestInputIntegral<int8_t>(tinyint, function::NumericFunctions::InputTinyInt,
                             {{"-126", -126}, {"126", 126}});
 
   codegen::type::Type smallint{type::TypeId::SMALLINT, false};
-  TestInputIntegral<int16_t>(smallint, codegen::ValuesRuntime::InputSmallInt);
+  TestInputIntegral<int16_t>(smallint,
+                             function::NumericFunctions::InputSmallInt);
 
   codegen::type::Type integer{type::TypeId::INTEGER, false};
-  TestInputIntegral<int32_t>(integer, codegen::ValuesRuntime::InputInteger);
+  TestInputIntegral<int32_t>(integer, function::NumericFunctions::InputInteger);
 
   codegen::type::Type bigint{type::TypeId::BIGINT, false};
-  TestInputIntegral<int64_t>(bigint, codegen::ValuesRuntime::InputBigInt);
+  TestInputIntegral<int64_t>(bigint, function::NumericFunctions::InputBigInt);
 }
 
 }  // namespace test
diff --git a/test/function/decimal_functions_test.cpp b/test/function/decimal_functions_test.cpp
index 994523b732f..1ef4f7cd87c 100644
--- a/test/function/decimal_functions_test.cpp
+++ b/test/function/decimal_functions_test.cpp
@@ -17,7 +17,7 @@
 
 #include "common/harness.h"
 
-#include "function/decimal_functions.h"
+#include "function/numeric_functions.h"
 #include "common/internal_types.h"
 #include "type/value.h"
 #include "type/value_factory.h"
@@ -37,13 +37,13 @@ TEST_F(DecimalFunctionsTests, SqrtTest) {
   std::vector<type::Value> args = {
       type::ValueFactory::GetDecimalValue(column_val)};
 
-  auto result = function::DecimalFunctions::Sqrt(args);
+  auto result = function::NumericFunctions::Sqrt(args);
   EXPECT_FALSE(result.IsNull());
   EXPECT_EQ(expected, result.GetAs<double>());
 
   // NULL CHECK
   args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)};
-  result = function::DecimalFunctions::Sqrt(args);
+  result = function::NumericFunctions::Sqrt(args);
   EXPECT_TRUE(result.IsNull());
 }
 
@@ -53,7 +53,7 @@ TEST_F(DecimalFunctionsTests, FloorTest) {
   std::vector<type::Value> args;
   for (double in : inputs) {
     args = {type::ValueFactory::GetDecimalValue(in)};
-    auto result = function::DecimalFunctions::_Floor(args);
+    auto result = function::NumericFunctions::_Floor(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(floor(in), result.GetAs<double>());
   }
@@ -61,31 +61,31 @@ TEST_F(DecimalFunctionsTests, FloorTest) {
   // Testing Floor with Integer Types(Should be a no-op)
   int64_t numInt64 = 1;
   args = {type::ValueFactory::GetIntegerValue(numInt64)};
-  auto result = function::DecimalFunctions::_Floor(args);
+  auto result = function::NumericFunctions::_Floor(args);
   EXPECT_FALSE(result.IsNull());
   EXPECT_EQ(numInt64, result.GetAs<double>());
 
   int32_t numInt32 = 1;
   args = {type::ValueFactory::GetIntegerValue(numInt32)};
-  result = function::DecimalFunctions::_Floor(args);
+  result = function::NumericFunctions::_Floor(args);
   EXPECT_FALSE(result.IsNull());
   EXPECT_EQ(numInt32, result.GetAs<double>());
 
   int16_t numInt16 = 1;
   args = {type::ValueFactory::GetIntegerValue(numInt32)};
-  result = function::DecimalFunctions::_Floor(args);
+  result = function::NumericFunctions::_Floor(args);
   EXPECT_FALSE(result.IsNull());
   EXPECT_EQ(numInt16, result.GetAs<double>());
 
   int16_t numInt8 = 1;
   args = {type::ValueFactory::GetIntegerValue(numInt8)};
-  result = function::DecimalFunctions::_Floor(args);
+  result = function::NumericFunctions::_Floor(args);
   EXPECT_FALSE(result.IsNull());
   EXPECT_EQ(numInt8, result.GetAs<double>());
 
   // NULL CHECK
   args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)};
-  result = function::DecimalFunctions::_Floor(args);
+  result = function::NumericFunctions::_Floor(args);
   EXPECT_TRUE(result.IsNull());
 }
 
@@ -94,14 +94,14 @@ TEST_F(DecimalFunctionsTests, RoundTest) {
   std::vector<type::Value> args;
   for (double val : column_vals) {
     args = {type::ValueFactory::GetDecimalValue(val)};
-    auto result = function::DecimalFunctions::_Round(args);
+    auto result = function::NumericFunctions::_Round(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(round(val), result.GetAs<double>());
   }
 
   // NULL CHECK
   args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)};
-  auto result = function::DecimalFunctions::_Round(args);
+  auto result = function::NumericFunctions::_Round(args);
   EXPECT_TRUE(result.IsNull());
 }
 
@@ -110,14 +110,14 @@ TEST_F(DecimalFunctionsTests,AbsTestDouble) {
   std::vector<type::Value> args;
   for (double in : doubleTestInputs) {
     args = {type::ValueFactory::GetDecimalValue(in)};
-    auto result = function::DecimalFunctions::_Abs(args);
+    auto result = function::NumericFunctions::_Abs(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(fabs(in), result.GetAs<double>());
   }
 
   // NULL CHECK
   args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)};
-  auto result = function::DecimalFunctions::_Abs(args);
+  auto result = function::NumericFunctions::_Abs(args);
   EXPECT_TRUE(result.IsNull());
 }
 
@@ -131,28 +131,28 @@ TEST_F(DecimalFunctionsTests, AbsTestInt) {
   // Testing Abs with Integer Types
   for (int64_t in: bigIntTestInputs) {
     args = {type::ValueFactory::GetBigIntValue(in)};
-    auto result = function::DecimalFunctions::_Abs(args);
+    auto result = function::NumericFunctions::_Abs(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(std::abs(in), result.GetAs<int64_t>());
   }
 
   for (int32_t in: intTestInputs) {
     args = {type::ValueFactory::GetIntegerValue(in)};
-    auto result = function::DecimalFunctions::_Abs(args);
+    auto result = function::NumericFunctions::_Abs(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(abs(in), result.GetAs<int32_t>());
   }
 
   for (int16_t in: smallIntTestInputs) {
     args = {type::ValueFactory::GetSmallIntValue(in)};
-    auto result = function::DecimalFunctions::_Abs(args);
+    auto result = function::NumericFunctions::_Abs(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(abs(in), result.GetAs<int16_t>());
   }
 
   for (int8_t in: tinyIntTestInputs) {
     args = {type::ValueFactory::GetTinyIntValue(in)};
-    auto result = function::DecimalFunctions::_Abs(args);
+    auto result = function::NumericFunctions::_Abs(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(abs(in), result.GetAs<int8_t>());
   }
@@ -165,13 +165,13 @@ TEST_F(DecimalFunctionsTests, CeilTestDouble) {
   std::vector<type::Value> args;
   for (double in: doubleTestInputs) {
     args = {type::ValueFactory::GetDecimalValue(in)};
-    auto result = function::DecimalFunctions::_Ceil(args);
+    auto result = function::NumericFunctions::_Ceil(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(ceil(in), result.GetAs<double>());
   }
 
   args = {type::ValueFactory::GetNullValueByType(type::TypeId::DECIMAL)};
-  auto result = function::DecimalFunctions::_Ceil(args);
+  auto result = function::NumericFunctions::_Ceil(args);
   EXPECT_TRUE(result.IsNull());
 }
 
@@ -185,28 +185,28 @@ TEST_F(DecimalFunctionsTests, CeilTestInt) {
   // Testing Ceil with Integer Types
   for (int64_t in: bigIntTestInputs) {
     args = {type::ValueFactory::GetIntegerValue(in)};
-    auto result = function::DecimalFunctions::_Ceil(args);
+    auto result = function::NumericFunctions::_Ceil(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(ceil(in), result.GetAs<double>());
   }
 
   for (int in: intTestInputs) {
     args = {type::ValueFactory::GetIntegerValue(in)};
-    auto result = function::DecimalFunctions::_Ceil(args);
+    auto result = function::NumericFunctions::_Ceil(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(ceil(in), result.GetAs<double>());
   }
 
   for (int in: smallIntTestInputs) {
     args = {type::ValueFactory::GetIntegerValue(in)};
-    auto result = function::DecimalFunctions::_Ceil(args);
+    auto result = function::NumericFunctions::_Ceil(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(ceil(in), result.GetAs<double>());
   }
 
   for (int in: tinyIntTestInputs) {
     args = {type::ValueFactory::GetIntegerValue(in)};
-    auto result = function::DecimalFunctions::_Ceil(args);
+    auto result = function::NumericFunctions::_Ceil(args);
     EXPECT_FALSE(result.IsNull());
     EXPECT_EQ(ceil(in), result.GetAs<double>());
   }

From e76ea692c6b271bd7aaa23fe595928d39571531a Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 00:37:08 -0400
Subject: [PATCH 25/42] REALLY simple Date support

---
 src/function/date_functions.cpp       | 157 +++++++++++++++++++++++++-
 src/include/function/date_functions.h | 130 ++++++++++++---------
 src/type/date_type.cpp                |  89 ++++++---------
 3 files changed, 264 insertions(+), 112 deletions(-)

diff --git a/src/function/date_functions.cpp b/src/function/date_functions.cpp
index 9c676690a13..ad4681b7714 100644
--- a/src/function/date_functions.cpp
+++ b/src/function/date_functions.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/function/date_functions.cpp
 //
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,10 +14,9 @@
 
 #include <date/date.h>
 #include <date/iso_week.h>
-#include <inttypes.h>
-#include <time.h>
 #include <sys/time.h>
 
+#include "codegen/runtime_functions.h"
 #include "common/internal_types.h"
 #include "type/value.h"
 #include "type/value_factory.h"
@@ -63,10 +62,156 @@ type::Value DateFunctions::_Now(
   return type::ValueFactory::GetTimestampValue(Now());
 }
 
+int32_t DateFunctions::DateToJulian(int32_t year, int32_t month, int32_t day) {
+  // From Postgres date2j()
+
+  if (month > 2) {
+    month += 1;
+    year += 4800;
+  } else {
+    month += 13;
+    year += 4799;
+  }
+
+  int32_t century = year / 100;
+
+  int32_t julian = year * 365 - 32167;
+  julian += year / 4 - century + century / 4;
+  julian += 7834 * month / 256 + day;
+
+  return julian;
+}
+
+void DateFunctions::JulianToDate(int32_t julian_date, int32_t &year, int32_t &month,
+                                 int32_t &day) {
+  // From Postgres j2date()
+
+  uint32_t julian = static_cast<uint32_t>(julian_date);
+  julian += 32044;
+
+  uint32_t quad = julian / 146097;
+
+  uint32_t extra = (julian - quad * 146097) * 4 + 3;
+  julian += 60 + quad * 3 + extra / 146097;
+  quad = julian / 1461;
+  julian -= quad * 1461;
+
+  int32_t y = julian * 4 / 1461;
+  julian = ((y != 0) ? (julian + 305) % 365 : (julian + 306) % 366) + 123;
+  y += quad * 4;
+
+  // Set year
+  year = static_cast<uint32_t>(y - 4800);
+  quad = julian * 2141 / 65536;
+
+  // Set day
+  day = julian - 7834 * quad / 256;
+
+  // Set month
+  month = (quad + 10) % 12 + 1;
+}
+
+namespace {
+
+template <typename T>
+bool TryParseInt(const char *&data, const char *end, T &out) {
+  static_assert(std::is_integral<T>::value,
+                "ParseInt() must only be called with integer types");
+
+  // Initialize
+  out = 0;
+
+  // Trim leading whitespace
+  while (*data == ' ') {
+    data++;
+  }
+
+  // Return if no more data
+  if (data == end) {
+    return false;
+  }
+
+  const char *snapshot = data;
+  while (data != end) {
+    if (*data < '0' || *data > '9') {
+      // Not a valid integer, stop
+      break;
+    }
+
+    // Update running sum
+    out = (out * 10) + (*data - '0');
+
+    // Move along
+    data++;
+  }
+
+  return snapshot != data;
+}
+
+}  // namespace
+
 int32_t DateFunctions::InputDate(
-    UNUSED_ATTRIBUTE const codegen::type::Type &type,
-    UNUSED_ATTRIBUTE const char *data, UNUSED_ATTRIBUTE uint32_t len) {
-  return 0;
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *data,
+    uint32_t len) {
+  // Okay, Postgres supports a crap-tonne of different date-time and timestamp
+  // formats. I don't want to spend time implementing them all. For now, let's
+  // cover the most common formats: yyyy-mm-dd
+
+  const char *curr_ptr = data;
+  const char *end = data + len;
+
+  uint32_t nums[3] = {0, 0, 0};
+  uint32_t year, month, day;
+
+  for (uint32_t i = 0; i < 3; i++) {
+    bool parsed = TryParseInt(curr_ptr, end, nums[i]);
+
+    bool unexpected_next_char = (*curr_ptr != '-' && *curr_ptr != '/');
+    if (!parsed || (i != 2 && unexpected_next_char)) {
+      goto unsupported;
+    }
+
+    curr_ptr++;
+  }
+
+  // Looks okay ... let's check the components.
+  year = nums[0], month = nums[1], day = nums[2];
+
+  if (month == 0 || month > 12 || day == 0 || day > 31) {
+    goto unsupported;
+  }
+
+  switch (month) {
+    case 2: {
+      uint32_t days_in_feb =
+          ((year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)) ? 29 : 28;
+      if (day > days_in_feb) {
+        goto unsupported;
+      }
+      break;
+    }
+    case 4:
+    case 6:
+    case 9:
+    case 11: {
+      if (day > 30) {
+        goto unsupported;
+      }
+      break;
+    }
+    default: {
+      if (day > 31) {
+        goto unsupported;
+      }
+      break;
+    }
+  }
+
+  return DateToJulian(year, month, day);
+
+unsupported:
+  codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+  __builtin_unreachable();
 }
 
 }  // namespace expression
diff --git a/src/include/function/date_functions.h b/src/include/function/date_functions.h
index fc5973ac95b..73e95a512a5 100644
--- a/src/include/function/date_functions.h
+++ b/src/include/function/date_functions.h
@@ -1,54 +1,76 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// date_functions.h
-//
-// Identification: src/include/function/date_functions.h
-//
-// Copyright (c) 2015-2017, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include "common/logger.h"
-#include "common/internal_types.h"
-#include "type/value.h"
-
-namespace peloton {
-
-namespace codegen {
-namespace type {
-class Type;
-}  // namespace type
-}  // namespace codegen
-
-namespace function {
-
-class DateFunctions {
- public:
-  /**
-   * Function used to return the current date/time. Normally called at the start
-   * of a transaction, and consistent throughout its duration.
-   *
-   * @return The current date at the time of invocation
-   */
-  static int64_t Now();
-  static type::Value _Now(const std::vector<type::Value> &args);
-
-  /**
-   *
-   * @param data
-   * @param len
-   * @return
-   */
-  static int32_t InputDate(const codegen::type::Type &type, const char *data,
-                           uint32_t len);
-};
-
-}  // namespace function
-}  // namespace peloton
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// date_functions.h
+//
+// Identification: src/include/function/date_functions.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "type/value.h"
+
+namespace peloton {
+
+namespace codegen {
+namespace type {
+class Type;
+}  // namespace type
+}  // namespace codegen
+
+namespace function {
+
+class DateFunctions {
+ public:
+  /**
+   * Function used to return the current date/time. Normally called at the start
+   * of a transaction, and consistent throughout its duration.
+   *
+   * @return The current date at the time of invocation
+   */
+  static int64_t Now();
+  static type::Value _Now(const std::vector<type::Value> &args);
+
+  /**
+   * Convert the given input into a Julian date format.
+   *
+   * @param year The year
+   * @param month The month (1-based)
+   * @param day The day (1-based)
+   * @return The equivalent 32-bit integer representation of the date
+   */
+  static int32_t DateToJulian(int32_t year, int32_t month, int32_t day);
+
+  /**
+   * Decompose the given 32-bit Julian date value into year, month, and day
+   * components.
+   *
+   * @param julian_date The julian date
+   * @param year[out] Where the year is written
+   * @param month[out] Where the result month is written
+   * @param day[out] Where the result day is written
+   */
+  static void JulianToDate(int32_t julian_date, int32_t &year, int32_t &month,
+                           int32_t &day);
+
+  /**
+   * Convert the given input string into a date.
+   *
+   * @param data A pointer to a string representation of a date
+   * @param len The length of the string
+   * @return A suitable date representation of the given input string that can
+   * be stored in the data tables. This typically means a Julian date.
+   */
+  static int32_t InputDate(const codegen::type::Type &type, const char *data,
+                           uint32_t len);
+};
+
+}  // namespace function
+}  // namespace peloton
diff --git a/src/type/date_type.cpp b/src/type/date_type.cpp
index d99617178f4..86e9f8b7af6 100644
--- a/src/type/date_type.cpp
+++ b/src/type/date_type.cpp
@@ -6,12 +6,13 @@
 //
 // Identification: src/type/date_type.cpp
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "type/date_type.h"
 
+#include "function/date_functions.h"
 #include "type/value_factory.h"
 
 namespace peloton {
@@ -19,122 +20,106 @@ namespace type {
 
 DateType::DateType() : Type(TypeId::DATE) {}
 
-CmpBool DateType::CompareEquals(const Value& left, const Value& right) const {
+CmpBool DateType::CompareEquals(const Value &left, const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return CmpBool::NULL_;
   return GetCmpBool(left.GetAs<int32_t>() == right.GetAs<int32_t>());
 }
 
-CmpBool DateType::CompareNotEquals(const Value& left,
-                                   const Value& right) const {
+CmpBool DateType::CompareNotEquals(const Value &left,
+                                   const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (right.IsNull()) return CmpBool::NULL_;
   return GetCmpBool(left.GetAs<int32_t>() != right.GetAs<int32_t>());
 }
 
-CmpBool DateType::CompareLessThan(const Value& left, const Value& right) const {
+CmpBool DateType::CompareLessThan(const Value &left, const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return CmpBool::NULL_;
   return GetCmpBool(left.GetAs<int32_t>() < right.GetAs<int32_t>());
 }
 
-CmpBool DateType::CompareLessThanEquals(const Value& left,
-                                        const Value& right) const {
+CmpBool DateType::CompareLessThanEquals(const Value &left,
+                                        const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return CmpBool::NULL_;
   return GetCmpBool(left.GetAs<int32_t>() <= right.GetAs<int32_t>());
 }
 
-CmpBool DateType::CompareGreaterThan(const Value& left,
-                                     const Value& right) const {
+CmpBool DateType::CompareGreaterThan(const Value &left,
+                                     const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return CmpBool::NULL_;
   return GetCmpBool(left.GetAs<int32_t>() > right.GetAs<int32_t>());
 }
 
-CmpBool DateType::CompareGreaterThanEquals(const Value& left,
-                                           const Value& right) const {
+CmpBool DateType::CompareGreaterThanEquals(const Value &left,
+                                           const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return CmpBool::NULL_;
   return GetCmpBool(left.GetAs<int32_t>() >= right.GetAs<int32_t>());
 }
 
-Value DateType::Min(const Value& left, const Value& right) const {
+Value DateType::Min(const Value &left, const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return left.OperateNull(right);
   if (left.CompareLessThan(right) == CmpBool::CmpTrue) return left.Copy();
   return right.Copy();
 }
 
-Value DateType::Max(const Value& left, const Value& right) const {
+Value DateType::Max(const Value &left, const Value &right) const {
   PELOTON_ASSERT(left.CheckComparable(right));
   if (left.IsNull() || right.IsNull()) return left.OperateNull(right);
   if (left.CompareGreaterThan(right) == CmpBool::CmpTrue) return left.Copy();
   return right.Copy();
 }
 
-// Debug
-std::string DateType::ToString(const Value& val) const {
-  if (val.IsNull()) return "date_null";
-  int32_t tm = val.value_.date;
-  tm /= 1000000;
-  tm /= 100000;
-  uint16_t year = tm % 10000;
-  tm /= 10000;
-  int tz = tm % 27;
-  tz -= 12;
-  tm /= 27;
-  uint16_t day = tm % 32;
-  tm /= 32;
-  uint16_t month = tm;
-  char str[30];
-  char zone[5];
-  sprintf(str, "%04d-%02d-%02d", year, month, day);
-  if (tz >= 0) {
-    str[26] = '+';
-  } else
-    str[26] = '-';
-  if (tz < 0) tz = -tz;
-  sprintf(zone, "%02d", tz);
-  str[27] = 0;
-  return std::string(std::string(str) + std::string(zone));
+std::string DateType::ToString(const Value &val) const {
+  // Null
+  if (val.IsNull()) {
+    return "date_null";
+  }
+
+  int32_t year, month, day;
+  function::DateFunctions::JulianToDate(val.value_.date, year, month, day);
+  return StringUtil::Format("%04d-%02d-%02d", year, month, day);
 }
 
 // Compute a hash value
-size_t DateType::Hash(const Value& val) const {
+size_t DateType::Hash(const Value &val) const {
   return std::hash<int32_t>{}(val.value_.date);
 }
 
-void DateType::HashCombine(const Value& val, size_t& seed) const {
+void DateType::HashCombine(const Value &val, size_t &seed) const {
   val.hash_combine<int32_t>(seed, val.value_.date);
 }
 
-void DateType::SerializeTo(const Value& val, SerializeOutput& out) const {
+void DateType::SerializeTo(const Value &val, SerializeOutput &out) const {
   out.WriteInt(val.value_.date);
 }
 
-void DateType::SerializeTo(const Value& val, char* storage,
+void DateType::SerializeTo(const Value &val, char *storage,
                            bool inlined UNUSED_ATTRIBUTE,
-                           AbstractPool* pool UNUSED_ATTRIBUTE) const {
-  *reinterpret_cast<int32_t*>(storage) = val.value_.date;
+                           AbstractPool *pool UNUSED_ATTRIBUTE) const {
+  *reinterpret_cast<int32_t *>(storage) = val.value_.date;
 }
 
 // Deserialize a value of the given type from the given storage space.
-Value DateType::DeserializeFrom(const char* storage,
+Value DateType::DeserializeFrom(const char *storage,
                                 const bool inlined UNUSED_ATTRIBUTE,
-                                AbstractPool* pool UNUSED_ATTRIBUTE) const {
-  int32_t val = *reinterpret_cast<const int32_t*>(storage);
+                                AbstractPool *pool UNUSED_ATTRIBUTE) const {
+  int32_t val = *reinterpret_cast<const int32_t *>(storage);
   return Value(type_id_, static_cast<int32_t>(val));
 }
-Value DateType::DeserializeFrom(SerializeInput& in UNUSED_ATTRIBUTE,
-                                AbstractPool* pool UNUSED_ATTRIBUTE) const {
+Value DateType::DeserializeFrom(SerializeInput &in UNUSED_ATTRIBUTE,
+                                AbstractPool *pool UNUSED_ATTRIBUTE) const {
   return Value(type_id_, in.ReadInt());
 }
 
 // Create a copy of this value
-Value DateType::Copy(const Value& val) const { return Value(val); }
+Value DateType::Copy(const Value &val) const { return Value(val); }
 
-Value DateType::CastAs(const Value& val, const TypeId type_id) const {
+Value DateType::CastAs(const Value &val, const TypeId type_id) const {
   switch (type_id) {
     case TypeId::DATE:
       return Copy(val);
@@ -144,7 +129,7 @@ Value DateType::CastAs(const Value& val, const TypeId type_id) const {
     default:
       break;
   }
-  throw Exception("Date is not coercable to " +
+  throw Exception("Date is not coercible to " +
                   Type::GetInstance(type_id)->ToString());
 }
 

From c4ede0aa1ee306005a69add2c61dc8cfb63e6468 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 01:00:02 -0400
Subject: [PATCH 26/42] Compile fixes for GCC 6+

---
 src/codegen/codegen.cpp         | 18 +++++++++++++++++-
 src/function/date_functions.cpp |  4 ++--
 src/include/common/macros.h     | 15 ++++-----------
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index e0082f7d588..0f8b426b61c 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -163,16 +163,23 @@ llvm::Value *CodeGen::Printf(const std::string &format,
                              const std::vector<llvm::Value *> &args) {
   auto *printf_fn = LookupBuiltin("printf");
   if (printf_fn == nullptr) {
+#if GCC_AT_LEAST_6
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
     printf_fn = RegisterBuiltin(
         "printf", llvm::TypeBuilder<decltype(printf), false>::get(GetContext()),
         reinterpret_cast<void *>(printf));
+#if GCC_AT_LEAST_6
+#pragma GCC diagnostic pop
+#endif
   }
 
   // Collect all the arguments into a vector
   std::vector<llvm::Value *> printf_args = {ConstString(format, "format")};
   printf_args.insert(printf_args.end(), args.begin(), args.end());
 
-  // Call the function
+  // Call printf()
   return CallFunc(printf_fn, printf_args);
 }
 
@@ -181,11 +188,20 @@ llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2,
   static constexpr char kMemcmpFnName[] = "memcmp";
   auto *memcmp_fn = LookupBuiltin(kMemcmpFnName);
   if (memcmp_fn == nullptr) {
+#if GCC_AT_LEAST_6
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
     memcmp_fn = RegisterBuiltin(
         kMemcmpFnName,
         llvm::TypeBuilder<decltype(memcmp), false>::get(GetContext()),
         reinterpret_cast<void *>(printf));
+#if GCC_AT_LEAST_6
+#pragma GCC diagnostic pop
+#endif
   }
+
+  // Call memcmp()
   return CallFunc(memcmp_fn, {ptr1, ptr2, len});
 }
 
diff --git a/src/function/date_functions.cpp b/src/function/date_functions.cpp
index ad4681b7714..ac37f21492c 100644
--- a/src/function/date_functions.cpp
+++ b/src/function/date_functions.cpp
@@ -82,8 +82,8 @@ int32_t DateFunctions::DateToJulian(int32_t year, int32_t month, int32_t day) {
   return julian;
 }
 
-void DateFunctions::JulianToDate(int32_t julian_date, int32_t &year, int32_t &month,
-                                 int32_t &day) {
+void DateFunctions::JulianToDate(int32_t julian_date, int32_t &year,
+                                 int32_t &month, int32_t &day) {
   // From Postgres j2date()
 
   uint32_t julian = static_cast<uint32_t>(julian_date);
diff --git a/src/include/common/macros.h b/src/include/common/macros.h
index e7f2dc95008..96aaf6ab0d2 100644
--- a/src/include/common/macros.h
+++ b/src/include/common/macros.h
@@ -97,20 +97,13 @@ namespace peloton {
 #endif /* CHECK_INVARIANTS */
 
 //===--------------------------------------------------------------------===//
-// override
+// Compiler version checks
 //===--------------------------------------------------------------------===//
 
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)
-#define GCC_AT_LEAST_47 1
+#if __GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 0)
+#define GCC_AT_LEAST_6 1
 #else
-#define GCC_AT_LEAST_47 0
-#endif
-
-// g++-4.6 does not support override
-#if GCC_AT_LEAST_47
-#define OVERRIDE override
-#else
-#define OVERRIDE
+#define GCC_AT_LEAST_6 0
 #endif
 
 //===--------------------------------------------------------------------===//

From c50b665b3ad485416b79dfe8400188a6434a14f8 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 02:26:04 -0400
Subject: [PATCH 27/42] Get string inputs working

---
 src/codegen/operator/csv_scan_translator.cpp     | 12 ++++++++++--
 src/codegen/proxy/string_functions_proxy.cpp     |  2 ++
 src/codegen/type/varchar_type.cpp                |  5 ++---
 src/codegen/util/csv_scanner.cpp                 | 13 ++++++++-----
 src/function/string_functions.cpp                |  8 ++++++++
 .../codegen/proxy/string_functions_proxy.h       |  1 +
 src/include/function/string_functions.h          | 16 ++++++++++++++++
 src/traffic_cop/traffic_cop.cpp                  |  5 +++++
 8 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index 8603a043e89..9e8880f70c0 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -132,8 +132,16 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
                             llvm::Value *data_len) const {
     auto *input_func = SqlType().GetInputFunction(codegen, ai_->type);
     auto *raw_val = codegen.CallFunc(input_func, {type, data_ptr, data_len});
-    return codegen::Value{ai_->type, raw_val, nullptr,
-                          codegen.ConstBool(false)};
+    if (SqlType().IsVariableLength()) {
+      // StrWithLen
+      llvm::Value *str_ptr = codegen->CreateExtractValue(raw_val, 0);
+      llvm::Value *str_len = codegen->CreateExtractValue(raw_val, 1);
+      return codegen::Value{ai_->type, str_ptr, str_len,
+                            codegen.ConstBool(false)};
+    } else {
+      return codegen::Value{ai_->type, raw_val, nullptr,
+                            codegen.ConstBool(false)};
+    }
   }
 
   Value Access(CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override {
diff --git a/src/codegen/proxy/string_functions_proxy.cpp b/src/codegen/proxy/string_functions_proxy.cpp
index db765480e9b..bc0a6bce6fd 100644
--- a/src/codegen/proxy/string_functions_proxy.cpp
+++ b/src/codegen/proxy/string_functions_proxy.cpp
@@ -14,6 +14,7 @@
 
 #include "codegen/proxy/executor_context_proxy.h"
 #include "codegen/proxy/pool_proxy.h"
+#include "codegen/proxy/runtime_functions_proxy.h"
 
 namespace peloton {
 namespace codegen {
@@ -32,6 +33,7 @@ DEFINE_METHOD(peloton::function, StringFunctions, Substr);
 DEFINE_METHOD(peloton::function, StringFunctions, Repeat);
 DEFINE_METHOD(peloton::function, StringFunctions, CompareStrings);
 DEFINE_METHOD(peloton::function, StringFunctions, WriteString);
+DEFINE_METHOD(peloton::function, StringFunctions, InputString);
 
 }  // namespace codegen
 }  // namespace peloton
diff --git a/src/codegen/type/varchar_type.cpp b/src/codegen/type/varchar_type.cpp
index dc3ab961f3d..f786bc83945 100644
--- a/src/codegen/type/varchar_type.cpp
+++ b/src/codegen/type/varchar_type.cpp
@@ -597,9 +597,8 @@ void Varchar::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
 }
 
 llvm::Function *Varchar::GetInputFunction(
-    UNUSED_ATTRIBUTE CodeGen &codegen,
-    UNUSED_ATTRIBUTE const Type &type) const {
-  throw NotImplementedException{"String input not implemented yet"};
+    CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
+  return StringFunctionsProxy::InputString.GetFunction(codegen);
 }
 
 llvm::Function *Varchar::GetOutputFunction(
diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
index da606fcaac3..0481a4444e1 100644
--- a/src/codegen/util/csv_scanner.cpp
+++ b/src/codegen/util/csv_scanner.cpp
@@ -197,6 +197,9 @@ const char *CSVScanner::NextLine() {
 
   uint32_t line_end = buffer_begin_;
 
+  char quote = quote_;
+  char escape = (quote_ == escape_ ? static_cast<char>('\0') : escape_);
+
   while (true) {
     if (line_end >= buffer_end_) {
       // We need to read more data from the CSV file. But first, we need to copy
@@ -219,13 +222,13 @@ const char *CSVScanner::NextLine() {
     // Read character
     char c = buffer_[line_end];
 
-    if (in_quote && c == escape_) {
-      last_was_escape = true;
+    if (in_quote && c == escape) {
+      last_was_escape = !last_was_escape;
     }
-    if (c == quote_ && !last_was_escape) {
-      in_quote = true;
+    if (c == quote && !last_was_escape) {
+      in_quote = !in_quote;
     }
-    if (c != escape_) {
+    if (c != escape) {
       last_was_escape = false;
     }
 
diff --git a/src/function/string_functions.cpp b/src/function/string_functions.cpp
index 2cf8d9f0b89..75af3a67523 100644
--- a/src/function/string_functions.cpp
+++ b/src/function/string_functions.cpp
@@ -245,5 +245,13 @@ void StringFunctions::WriteString(const char *data, uint32_t len, char *buf,
   *reinterpret_cast<Varlen **>(buf) = area;
 }
 
+// TODO(pmenon): UTF8 checking, string checking, lots of error handling here
+// TODO(pmenon): Why do we need this +1 on the length ?
+StringFunctions::StrWithLen StringFunctions::InputString(
+    UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *data,
+    uint32_t len) {
+  return StringFunctions::StrWithLen{data, len + 1};
+}
+
 }  // namespace function
 }  // namespace peloton
diff --git a/src/include/codegen/proxy/string_functions_proxy.h b/src/include/codegen/proxy/string_functions_proxy.h
index bff1911d0a2..27a24995e3a 100644
--- a/src/include/codegen/proxy/string_functions_proxy.h
+++ b/src/include/codegen/proxy/string_functions_proxy.h
@@ -31,6 +31,7 @@ PROXY(StringFunctions) {
   DECLARE_METHOD(Repeat);
   DECLARE_METHOD(CompareStrings);
   DECLARE_METHOD(WriteString);
+  DECLARE_METHOD(InputString);
 };
 
 PROXY(StrWithLen) {
diff --git a/src/include/function/string_functions.h b/src/include/function/string_functions.h
index db79dc2409e..47c72c62e15 100644
--- a/src/include/function/string_functions.h
+++ b/src/include/function/string_functions.h
@@ -16,6 +16,12 @@
 
 namespace peloton {
 
+namespace codegen {
+namespace type {
+class Type;
+}  // namespace type
+}  // namespace codegen
+
 namespace executor {
 class ExecutorContext;
 }  // namespace executor
@@ -104,6 +110,16 @@ class StringFunctions {
    */
   static void WriteString(const char *data, uint32_t len, char *buf,
                           peloton::type::AbstractPool &pool);
+
+  /**
+   *
+   * @param type
+   * @param data
+   * @param len
+   * @return
+   */
+  static StrWithLen InputString(const codegen::type::Type &type,
+                                const char *data, uint32_t len);
 };
 
 }  // namespace function
diff --git a/src/traffic_cop/traffic_cop.cpp b/src/traffic_cop/traffic_cop.cpp
index a87d99c0ac5..7bfffebb4c0 100644
--- a/src/traffic_cop/traffic_cop.cpp
+++ b/src/traffic_cop/traffic_cop.cpp
@@ -523,6 +523,11 @@ FieldInfo TrafficCop::GetColumnFieldForValueType(std::string column_name,
       field_size = 255;
       break;
     }
+    case type::TypeId::DATE: {
+      field_type = PostgresValueType::DATE;
+      field_size = 4;
+      break;
+    }
     case type::TypeId::TIMESTAMP: {
       field_type = PostgresValueType::TIMESTAMPS;
       field_size = 64;  // FIXME: Bytes???

From d6bb8738d4834fe36815e451da23c2ebc62607b2 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 12:11:04 -0400
Subject: [PATCH 28/42] Beefed up tests

---
 src/function/numeric_functions.cpp    | 74 ++++++++++++++-------------
 src/include/index/bwtree.h            |  2 +-
 test/codegen/value_integrity_test.cpp | 73 +++++++++++++++++++++++---
 3 files changed, 107 insertions(+), 42 deletions(-)

diff --git a/src/function/numeric_functions.cpp b/src/function/numeric_functions.cpp
index 50a00ee516a..f4a943c8ce0 100644
--- a/src/function/numeric_functions.cpp
+++ b/src/function/numeric_functions.cpp
@@ -182,24 +182,6 @@ type::Value NumericFunctions::_Round(const std::vector<type::Value> &args) {
 
 namespace {
 
-/**
- * Skip all leading and trailing whitespace from the string bounded by the
- * provided pointers. This function will modify the input pointers to point to
- * the first non-whitespace space character at the start and end of the input
- * string.
- *
- * @param[in,out] left Pointer to the left-most character in the input string
- * @param[in,out] right Pointer to the right-most character in the input string
- */
-void TrimLeftRight(const char *&left, const char *&right) {
-  while (*left == ' ') {
-    left++;
-  }
-  while (right > left && *(right - 1) == ' ') {
-    right--;
-  }
-}
-
 /**
  * Convert the provided input string into an integral number. This function
  * handles leading whitespace and leading negative (-) or positive (+) signs.
@@ -216,16 +198,13 @@ T ParseInteger(const char *ptr, uint32_t len) {
   static_assert(std::is_integral<T>::value,
                 "Must provide integer-type when calling ParseInteger");
 
-  if (len == 0) {
-    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
-    __builtin_unreachable();
-  }
-
   const char *start = ptr;
   const char *end = start + len;
 
-  // Trim leading and trailing whitespace
-  TrimLeftRight(start, end);
+  // Trim leading whitespace
+  while (start < end && *start == ' ') {
+    start++;
+  }
 
   // Check negative or positive sign
   bool negative = false;
@@ -238,10 +217,9 @@ T ParseInteger(const char *ptr, uint32_t len) {
 
   // Convert
   int64_t num = 0;
-  while (start != end) {
+  while (start < end) {
     if (*start < '0' || *start > '9') {
-      codegen::RuntimeFunctions::ThrowInvalidInputStringException();
-      __builtin_unreachable();
+      break;
     }
 
     num = (num * 10) + (*start - '0');
@@ -249,7 +227,16 @@ T ParseInteger(const char *ptr, uint32_t len) {
     start++;
   }
 
-  PELOTON_ASSERT(start == end);
+  // Trim trailing whitespace
+  while (start < end && *start == ' ') {
+    start++;
+  }
+
+  // If we haven't consumed everything at this point, it was an invalid input
+  if (start < end) {
+    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
 
   // Negate number if we need to
   if (negative) {
@@ -279,10 +266,13 @@ bool NumericFunctions::InputBoolean(
     __builtin_unreachable();
   }
 
-  const char *start = ptr, *end = ptr + len;
+  const char *start = ptr;
+  const char *end = ptr + len;
 
-  // Trim leading and trailing whitespace
-  TrimLeftRight(start, end);
+  // Trim leading whitespace
+  while (start < end && *start == ' ') {
+    start++;
+  }
 
   //
   uint64_t trimmed_len = end - start;
@@ -393,13 +383,16 @@ double NumericFunctions::InputDecimal(
     __builtin_unreachable();
   }
 
+  const char *start = ptr;
+  const char *end = ptr + len;
+
   // We don't trim because std::strtod() does the trimming for us
 
   // TODO(pmenon): Optimize me later
-  char *end = nullptr;
-  double ret = std::strtod(ptr, &end);
+  char *consumed_ptr = nullptr;
+  double ret = std::strtod(ptr, &consumed_ptr);
 
-  if (unlikely_branch(end == ptr)) {
+  if (unlikely_branch(consumed_ptr == start)) {
     if (errno == ERANGE) {
       codegen::RuntimeFunctions::ThrowOverflowException();
       __builtin_unreachable();
@@ -409,6 +402,17 @@ double NumericFunctions::InputDecimal(
     }
   }
 
+  // Eat the rest
+  while (consumed_ptr < end && *consumed_ptr == ' ') {
+    consumed_ptr++;
+  }
+
+  // If we haven't consumed everything at this point, it was an invalid input
+  if (consumed_ptr < end) {
+    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+    __builtin_unreachable();
+  }
+
   // Done
   return ret;
 }
diff --git a/src/include/index/bwtree.h b/src/include/index/bwtree.h
index f9352aad09a..abb293f2e67 100755
--- a/src/include/index/bwtree.h
+++ b/src/include/index/bwtree.h
@@ -7585,7 +7585,7 @@ class BwTree : public BwTreeBase {
       // would always fail, until we have cleaned all epoch nodes
       current_epoch_p = nullptr;
 
-      LOG_TRACE("Clearing the epoch in ~EpochManager()...");
+      LOG_TRACE("Clearing the epoch in ~EpochManager() ...");
 
       // If all threads has exited then all thread counts are
       // 0, and therefore this should proceed way to the end
diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp
index 97429771708..87450683afc 100644
--- a/test/codegen/value_integrity_test.cpp
+++ b/test/codegen/value_integrity_test.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: test/codegen/value_integrity_test.cpp
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -190,8 +190,9 @@ void TestInputIntegral(
                      extra_valid_tests.end());
 
   // Default invalid tests
-  std::vector<std::string> invalid_tests = {"a",   "-b",  "+c",  " 1c",
-                                            "2d ", "3 3", "-4 4"};
+  std::vector<std::string> invalid_tests = {"a",       "-b",    "+c",   " 1c",
+                                            "2d ",     "3 3",   "-4 4", "-5 a ",
+                                            "  -6  a", "  c 7 "};
   invalid_tests.insert(invalid_tests.end(), extra_invalid_tests.begin(),
                        extra_invalid_tests.end());
 
@@ -205,19 +206,25 @@ void TestInputIntegral(
   for (const auto &test : valid_tests) {
     auto *ptr = test.first.data();
     auto len = static_cast<uint32_t>(test.first.length());
-    EXPECT_EQ(test.second, TestFunc(type, ptr, len));
+    try {
+      EXPECT_EQ(test.second, TestFunc(type, ptr, len));
+    } catch (std::exception &e) {
+      EXPECT_TRUE(false) << "Valid input '" << test.first << "' threw an error";
+    }
   }
 
   for (const auto &test : invalid_tests) {
     auto *ptr = test.data();
     auto len = static_cast<uint32_t>(test.length());
-    EXPECT_THROW(TestFunc(type, ptr, len), std::runtime_error);
+    EXPECT_THROW(TestFunc(type, ptr, len), std::runtime_error)
+        << "Input '" << test << "' was expected to throw an error, but did not";
   }
 
   for (const auto &test : overflow_tests) {
     auto *ptr = test.data();
     auto len = static_cast<uint32_t>(test.length());
-    EXPECT_THROW(TestFunc(type, ptr, len), std::overflow_error);
+    EXPECT_THROW(TestFunc(type, ptr, len), std::overflow_error)
+        << "Input '" << test << "' expected to overflow, but did not";
   }
 }
 }  // namespace
@@ -238,5 +245,59 @@ TEST_F(ValueIntegrityTest, InputIntegralTypesTest) {
   TestInputIntegral<int64_t>(bigint, function::NumericFunctions::InputBigInt);
 }
 
+TEST_F(ValueIntegrityTest, InputDecimalTypesTest) {
+  codegen::type::Type decimal{type::TypeId::DECIMAL, false};
+
+  // First check some valid cases
+  std::vector<std::pair<std::string, double>> valid_tests = {
+      {"0.0", 0.0},
+      {"-1.0", -1.0},
+      {"2.0", 2.0},
+      {"+3.0", 3.0},
+      {"  4.0", 4.0},
+      {"  -5.0", -5.0},
+      {"  +6.0", 6.0},
+      {"7.0  ", 7.0},
+      {"-8.0  ", -8.0},
+      {"  9.0  ", 9.0},
+      {"  -10.0  ", -10.0},
+      {"  +11.0  ", 11.0}};
+
+  for (const auto &test_case : valid_tests) {
+    auto *ptr = test_case.first.data();
+    auto len = static_cast<uint32_t>(test_case.first.length());
+    EXPECT_EQ(test_case.second,
+              function::NumericFunctions::InputDecimal(decimal, ptr, len));
+  }
+
+  // Now let's try some invalid ones. Take each valid test and randomly insert
+  // a character somewhere.
+  std::vector<std::string> invalid_tests;
+
+  std::random_device rd;
+  std::mt19937 rng(rd());
+
+  for (const auto &valid_test : valid_tests) {
+    auto orig = valid_test.first;
+
+    std::uniform_int_distribution<> dist(0, orig.length());
+    auto pos = dist(rng);
+
+    auto invalid_num = orig.substr(0, pos) + "aa" + orig.substr(pos);
+
+    invalid_tests.push_back(invalid_num);
+  }
+
+  // Now check that each test throws an invalid string error
+  for (const auto &invalid_test : invalid_tests) {
+    auto *ptr = invalid_test.data();
+    auto len = static_cast<uint32_t>(invalid_test.length());
+    EXPECT_THROW(function::NumericFunctions::InputDecimal(decimal, ptr, len),
+                 std::runtime_error)
+        << "Input '" << invalid_test
+        << "' expected to throw error, but passed parsing logic";
+  }
+}
+
 }  // namespace test
 }  // namespace peloton
\ No newline at end of file

From b34ba308471bc1e9c73d8b4b0e6b8c879869d9b5 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 14:49:51 -0400
Subject: [PATCH 29/42] Simple CSV scan test

---
 test/codegen/csv_scan_test.cpp | 117 +++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 test/codegen/csv_scan_test.cpp

diff --git a/test/codegen/csv_scan_test.cpp b/test/codegen/csv_scan_test.cpp
new file mode 100644
index 00000000000..f40fc823e80
--- /dev/null
+++ b/test/codegen/csv_scan_test.cpp
@@ -0,0 +1,117 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scan_test.cpp
+//
+// Identification: test/codegen/csv_scan_test.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/testing_codegen_util.h"
+
+#include "codegen/util/csv_scanner.h"
+#include "common/timer.h"
+#include "util/file_util.h"
+
+namespace peloton {
+namespace test {
+
+class CSVScanTest : public PelotonCodeGenTest {};
+
+using CallbackFn =
+    std::function<void(const codegen::util::CSVScanner::Column *)>;
+
+struct State {
+  codegen::util::CSVScanner *scanner;
+  CallbackFn callback;
+};
+
+struct TempFileHandle {
+  std::string name;
+  TempFileHandle(std::string _name) : name(_name) {}
+  ~TempFileHandle() { boost::filesystem::remove(name); }
+};
+
+void CSVRowCallback(void *s) {
+  auto *state = reinterpret_cast<State *>(s);
+  state->callback(state->scanner->GetColumns());
+}
+
+void IterateAsCSV(const std::vector<std::string> &rows,
+                  const std::vector<codegen::type::Type> &col_types,
+                  CallbackFn callback, char delimiter = ',') {
+  std::string csv_data;
+  for (uint32_t i = 0; i < rows.size(); i++) {
+    csv_data.append(rows[i]).append("\n");
+  }
+
+  // Write the contents into a temporary file
+  TempFileHandle fh{FileUtil::WriteTempFile(csv_data, "", "tmp")};
+
+  // The memory pool
+  auto &pool = *TestingHarness::GetInstance().GetTestingPool();
+
+  // The client-state
+  State state = {.scanner = nullptr, .callback = callback};
+
+  // The scanner
+  codegen::util::CSVScanner scanner{
+      pool, fh.name, col_types.data(), static_cast<uint32_t>(col_types.size()),
+      CSVRowCallback, reinterpret_cast<void *>(&state), delimiter};
+
+  state.scanner = &scanner;
+
+  // Iterate!
+  scanner.Produce();
+}
+
+TEST_F(CSVScanTest, SimpleNumericScan) {
+  // Create a temporary CSV file
+  std::vector<std::string> rows = {"1,2,3.0,4", "4,5,6.0,7", "8,9,10.0,11"};
+  std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
+                                            {type::TypeId::INTEGER, false},
+                                            {type::TypeId::DECIMAL, false},
+                                            {type::TypeId::INTEGER, false}};
+
+  uint32_t rows_read = 0;
+  IterateAsCSV(rows, types, [&rows_read, &types](
+                                const codegen::util::CSVScanner::Column *cols) {
+    rows_read++;
+    for (uint32_t i = 0; i < types.size(); i++) {
+      EXPECT_FALSE(cols[i].is_null);
+      EXPECT_GT(cols[i].len, 0);
+    }
+  });
+
+  // Check
+  EXPECT_EQ(rows.size(), rows_read);
+}
+
+TEST_F(CSVScanTest, MixedStringScan) {
+  // Create a temporary CSV file
+  std::vector<std::string> rows = {"1,2,3,test", "4,5,6,\"test\"",
+                                   "8,9,10,\"test\nnewline\ninquote\""};
+  std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
+                                            {type::TypeId::INTEGER, false},
+                                            {type::TypeId::INTEGER, false},
+                                            {type::TypeId::VARCHAR, false}};
+
+  uint32_t rows_read = 0;
+  IterateAsCSV(rows, types, [&rows_read, &types](
+                                const codegen::util::CSVScanner::Column *cols) {
+    rows_read++;
+    for (uint32_t i = 0; i < types.size(); i++) {
+      EXPECT_FALSE(cols[i].is_null);
+      EXPECT_GT(cols[i].len, 0);
+    }
+  });
+
+  // Check
+  EXPECT_EQ(rows.size(), rows_read);
+}
+
+}  // namespace test
+}  // namespace peloton
\ No newline at end of file

From 70d501275ea98b4264ab6a8203cecd83bdccce9a Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 17:32:37 -0400
Subject: [PATCH 30/42] Updated optimize to continue support for
 old/weird/strange AF copy executor

---
 src/common/internal_types.cpp                 |  10 +-
 src/executor/copy_executor.cpp                |  24 ++--
 src/executor/plan_executor.cpp                |   2 +-
 src/include/common/internal_types.h           |   3 +-
 .../optimizer/child_property_deriver.h        |   1 +
 src/include/optimizer/input_column_deriver.h  |   2 +
 src/include/optimizer/operator_node.h         |   3 +-
 src/include/optimizer/operator_visitor.h      |   5 +-
 src/include/optimizer/operators.h             |  27 +++-
 src/include/optimizer/plan_generator.h        |   2 +
 src/include/optimizer/rule_impls.h            |  15 +++
 src/include/planner/copy_plan.h               |  44 -------
 src/include/planner/csv_scan_plan.h           |  13 +-
 .../planner/export_external_file_plan.h       | 119 ++++++++++++++++++
 src/optimizer/child_property_deriver.cpp      |   7 ++
 src/optimizer/input_column_deriver.cpp        |   4 +
 src/optimizer/operators.cpp                   |  55 +++++++-
 src/optimizer/optimizer_task.cpp              |  16 ++-
 src/optimizer/plan_generator.cpp              |   8 ++
 .../query_to_operator_transformer.cpp         |   4 +-
 src/optimizer/rule.cpp                        |   1 +
 src/optimizer/rule_impls.cpp                  |  31 +++++
 src/optimizer/util.cpp                        |   3 -
 23 files changed, 316 insertions(+), 83 deletions(-)
 delete mode 100644 src/include/planner/copy_plan.h
 create mode 100644 src/include/planner/export_external_file_plan.h

diff --git a/src/common/internal_types.cpp b/src/common/internal_types.cpp
index 427e9848e25..855f7ef2d9b 100644
--- a/src/common/internal_types.cpp
+++ b/src/common/internal_types.cpp
@@ -1382,9 +1382,6 @@ std::string PlanNodeTypeToString(PlanNodeType type) {
     case PlanNodeType::RESULT: {
       return ("RESULT");
     }
-    case PlanNodeType::COPY: {
-      return ("COPY");
-    }
     case PlanNodeType::MOCK: {
       return ("MOCK");
     }
@@ -1394,6 +1391,9 @@ std::string PlanNodeTypeToString(PlanNodeType type) {
     case PlanNodeType::ANALYZE: {
       return ("ANALYZE");
     }
+    case PlanNodeType::EXPORT_EXTERNAL_FILE: {
+      return ("EXPORT_EXTERNAL_FILE");
+    }
     default: {
       throw ConversionException(
           StringUtil::Format("No string conversion for PlanNodeType value '%d'",
@@ -1461,12 +1461,12 @@ PlanNodeType StringToPlanNodeType(const std::string &str) {
     return PlanNodeType::HASH;
   } else if (upper_str == "RESULT") {
     return PlanNodeType::RESULT;
-  } else if (upper_str == "COPY") {
-    return PlanNodeType::COPY;
   } else if (upper_str == "MOCK") {
     return PlanNodeType::MOCK;
   } else if (upper_str == "ANALYZE") {
     return PlanNodeType::ANALYZE;
+  } else if (upper_str == "EXPORT_EXTERNAL_FILE") {
+    return PlanNodeType::EXPORT_EXTERNAL_FILE;
   } else {
     throw ConversionException(StringUtil::Format(
         "No PlanNodeType conversion from string '%s'", upper_str.c_str()));
diff --git a/src/executor/copy_executor.cpp b/src/executor/copy_executor.cpp
index e55d665bc6c..f499e899708 100644
--- a/src/executor/copy_executor.cpp
+++ b/src/executor/copy_executor.cpp
@@ -6,23 +6,25 @@
 //
 // Identification: src/executor/copy_executor.cpp
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
+#include "executor/copy_executor.h"
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+
 #include "common/logger.h"
 #include "catalog/catalog.h"
 #include "concurrency/transaction_manager_factory.h"
-#include "executor/copy_executor.h"
 #include "executor/executor_context.h"
 #include "executor/logical_tile_factory.h"
-#include "planner/copy_plan.h"
+#include "planner/export_external_file_plan.h"
 #include "storage/table_factory.h"
 #include "network/postgres_protocol_handler.h"
 #include "common/exception.h"
 #include "common/macros.h"
-#include <sys/stat.h>
-#include <sys/mman.h>
 
 namespace peloton {
 namespace executor {
@@ -35,7 +37,7 @@ CopyExecutor::CopyExecutor(const planner::AbstractPlan *node,
                            ExecutorContext *executor_context)
     : AbstractExecutor(node, executor_context) {}
 
-CopyExecutor::~CopyExecutor() {}
+CopyExecutor::~CopyExecutor() = default;
 
 /**
  * @brief Basic initialization.
@@ -45,21 +47,19 @@ bool CopyExecutor::DInit() {
   PELOTON_ASSERT(children_.size() == 1);
 
   // Grab info from plan node and check it
-  const planner::CopyPlan &node = GetPlanNode<planner::CopyPlan>();
+  const auto &node = GetPlanNode<planner::ExportExternalFilePlan>();
 
-  bool success = InitFileHandle(node.file_path.c_str(), "w");
+  bool success = InitFileHandle(node.GetFileName().c_str(), "w");
 
   if (success == false) {
-    throw ExecutorException("Failed to create file " + node.file_path +
+    throw ExecutorException("Failed to create file " + node.GetFileName() +
                             ". Try absolute path and make sure you have the "
                             "permission to access this file.");
-    return false;
   }
-  LOG_DEBUG("Created target copy output file: %s", node.file_path.c_str());
+  LOG_DEBUG("Created target copy output file: %s", node.GetFileName().c_str());
   return true;
 }
 
-
 bool CopyExecutor::InitFileHandle(const char *name, const char *mode) {
   auto file = fopen(name, mode);
   if (file == NULL) {
diff --git a/src/executor/plan_executor.cpp b/src/executor/plan_executor.cpp
index a01330b7b6d..6226e3a26cf 100644
--- a/src/executor/plan_executor.cpp
+++ b/src/executor/plan_executor.cpp
@@ -339,7 +339,7 @@ executor::AbstractExecutor *BuildExecutorTree(
           new executor::CreateFunctionExecutor(plan, executor_context);
       break;
 
-    case PlanNodeType::COPY:
+    case PlanNodeType::EXPORT_EXTERNAL_FILE:
       child_executor = new executor::CopyExecutor(plan, executor_context);
       break;
 
diff --git a/src/include/common/internal_types.h b/src/include/common/internal_types.h
index 4654ec9bc77..22598226407 100644
--- a/src/include/common/internal_types.h
+++ b/src/include/common/internal_types.h
@@ -595,7 +595,7 @@ enum class PlanNodeType {
 
   // Utility
   RESULT = 70,
-  COPY = 71,
+  EXPORT_EXTERNAL_FILE = 71,
   CREATE_FUNC = 72,
 
   // Test
@@ -1355,6 +1355,7 @@ enum class RuleType : uint32_t {
   INNER_JOIN_TO_HASH_JOIN,
   IMPLEMENT_DISTINCT,
   IMPLEMENT_LIMIT,
+  EXPORT_EXTERNAL_FILE_TO_PHYSICAL,
 
   // Don't move this one
   RewriteDelimiter,
diff --git a/src/include/optimizer/child_property_deriver.h b/src/include/optimizer/child_property_deriver.h
index dd887ff9af3..914cc77ab27 100644
--- a/src/include/optimizer/child_property_deriver.h
+++ b/src/include/optimizer/child_property_deriver.h
@@ -59,6 +59,7 @@ class ChildPropertyDeriver : public OperatorVisitor {
   void Visit(const PhysicalSortGroupBy *) override;
   void Visit(const PhysicalDistinct *) override;
   void Visit(const PhysicalAggregate *) override;
+  void Visit(const PhysicalExportExternalFile *) override;
 
  private:
   void DeriveForJoin();
diff --git a/src/include/optimizer/input_column_deriver.h b/src/include/optimizer/input_column_deriver.h
index 728a08305c4..ef66823bba0 100644
--- a/src/include/optimizer/input_column_deriver.h
+++ b/src/include/optimizer/input_column_deriver.h
@@ -93,6 +93,8 @@ class InputColumnDeriver : public OperatorVisitor {
 
   void Visit(const PhysicalAggregate *) override;
 
+  void Visit(const PhysicalExportExternalFile *) override;
+
  private:
   /**
    * @brief Provide all tuple value expressions needed in the expression
diff --git a/src/include/optimizer/operator_node.h b/src/include/optimizer/operator_node.h
index bfc0653518d..f870df330eb 100644
--- a/src/include/optimizer/operator_node.h
+++ b/src/include/optimizer/operator_node.h
@@ -72,7 +72,8 @@ enum class OpType {
   Update,
   Aggregate,
   HashGroupBy,
-  SortGroupBy
+  SortGroupBy,
+  ExportExternalFile,
 };
 
 //===--------------------------------------------------------------------===//
diff --git a/src/include/optimizer/operator_visitor.h b/src/include/optimizer/operator_visitor.h
index 50fd98fa024..e225287cebb 100644
--- a/src/include/optimizer/operator_visitor.h
+++ b/src/include/optimizer/operator_visitor.h
@@ -6,7 +6,7 @@
 //
 // Identification: src/include/optimizer/operator_visitor.h
 //
-// Copyright (c) 2015-16, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,7 +23,7 @@ namespace optimizer {
 
 class OperatorVisitor {
  public:
-  virtual ~OperatorVisitor(){};
+  virtual ~OperatorVisitor() = default;
 
   // Physical operator
   virtual void Visit(const DummyScan *) {}
@@ -49,6 +49,7 @@ class OperatorVisitor {
   virtual void Visit(const PhysicalSortGroupBy *) {}
   virtual void Visit(const PhysicalDistinct *) {}
   virtual void Visit(const PhysicalAggregate *) {}
+  virtual void Visit(const PhysicalExportExternalFile *) {}
 
   // Logical operator
   virtual void Visit(const LeafOperator *) {}
diff --git a/src/include/optimizer/operators.h b/src/include/optimizer/operators.h
index 8ec891c8131..8a7c7582e56 100644
--- a/src/include/optimizer/operators.h
+++ b/src/include/optimizer/operators.h
@@ -325,12 +325,19 @@ class LogicalUpdate : public OperatorNode<LogicalUpdate> {
 };
 
 //===--------------------------------------------------------------------===//
-// External file get
+// Export to external file
 //===--------------------------------------------------------------------===//
 class LogicalExportExternalFile
     : public OperatorNode<LogicalExportExternalFile> {
  public:
-  static Operator make();
+  static Operator make(ExternalFileFormat format, std::string file_name);
+
+  bool operator==(const BaseOperatorNode &r) override;
+
+  hash_t Hash() const override;
+
+  ExternalFileFormat format;
+  std::string file_name;
 };
 
 //===--------------------------------------------------------------------===//
@@ -604,6 +611,22 @@ class PhysicalUpdate : public OperatorNode<PhysicalUpdate> {
   const std::vector<std::unique_ptr<parser::UpdateClause>> *updates;
 };
 
+//===--------------------------------------------------------------------===//
+// Physical ExportExternalFile
+//===--------------------------------------------------------------------===//
+class PhysicalExportExternalFile
+    : public OperatorNode<PhysicalExportExternalFile> {
+ public:
+  static Operator make(ExternalFileFormat format, std::string file_name);
+
+  bool operator==(const BaseOperatorNode &r) override;
+
+  hash_t Hash() const override;
+
+  ExternalFileFormat format;
+  std::string file_name;
+};
+
 //===--------------------------------------------------------------------===//
 // PhysicalHashGroupBy
 //===--------------------------------------------------------------------===//
diff --git a/src/include/optimizer/plan_generator.h b/src/include/optimizer/plan_generator.h
index 353de6db29f..9fba272d4a8 100644
--- a/src/include/optimizer/plan_generator.h
+++ b/src/include/optimizer/plan_generator.h
@@ -94,6 +94,8 @@ class PlanGenerator : public OperatorVisitor {
 
   void Visit(const PhysicalAggregate *) override;
 
+  void Visit(const PhysicalExportExternalFile *) override;
+
  private:
   /**
    * @brief Generate all tuple value expressions of a base table
diff --git a/src/include/optimizer/rule_impls.h b/src/include/optimizer/rule_impls.h
index 5ace068138d..57902e744a9 100644
--- a/src/include/optimizer/rule_impls.h
+++ b/src/include/optimizer/rule_impls.h
@@ -281,6 +281,21 @@ class ImplementLimit : public Rule {
                  OptimizeContext *context) const override;
 };
 
+/**
+ * @brief Logical Export to External File -> Physical Export to External file
+ */
+class LogicalExportToPhysicalExport : public Rule {
+ public:
+  LogicalExportToPhysicalExport();
+
+  bool Check(std::shared_ptr<OperatorExpression> plan,
+             OptimizeContext *context) const override;
+
+  void Transform(std::shared_ptr<OperatorExpression> input,
+                 std::vector<std::shared_ptr<OperatorExpression>> &transformed,
+                 OptimizeContext *context) const override;
+};
+
 //===--------------------------------------------------------------------===//
 // Rewrite rules
 //===--------------------------------------------------------------------===//
diff --git a/src/include/planner/copy_plan.h b/src/include/planner/copy_plan.h
deleted file mode 100644
index 082598d10af..00000000000
--- a/src/include/planner/copy_plan.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                         Peloton
-//
-// copy_plan.h
-//
-// Identification: src/include/planner/copy_plan.h
-//
-// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include "planner/abstract_plan.h"
-
-namespace peloton {
-
-namespace storage {
-class DataTable;
-}  // namespace storage
-
-namespace planner {
-
-class CopyPlan : public AbstractPlan {
- public:
-  explicit CopyPlan(std::string file_path) : file_path(std::move(file_path)) {}
-
-  PlanNodeType GetPlanNodeType() const override { return PlanNodeType::COPY; }
-
-  const std::string GetInfo() const override { return "CopyPlan"; }
-
-  // TODO: Implement copy mechanism
-  std::unique_ptr<AbstractPlan> Copy() const override { return nullptr; }
-
-  // The path of the target file
-  std::string file_path;
-
- private:
-  DISALLOW_COPY_AND_MOVE(CopyPlan);
-};
-
-}  // namespace planner
-}  // namespace peloton
\ No newline at end of file
diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
index 2f40999efd0..516debcaeeb 100644
--- a/src/include/planner/csv_scan_plan.h
+++ b/src/include/planner/csv_scan_plan.h
@@ -138,13 +138,22 @@ inline void CSVScanPlan::GetOutputColumns(std::vector<oid_t> &columns) const {
 }
 
 inline hash_t CSVScanPlan::Hash() const {
-  return HashUtil::HashBytes(file_name_.data(), file_name_.length());
+  hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length());
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&quote_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_));
+  hash = HashUtil::CombineHashes(
+      hash, HashUtil::HashBytes(null_.c_str(), null_.length()));
+  return hash;
 }
 
 inline bool CSVScanPlan::operator==(const AbstractPlan &rhs) const {
   if (rhs.GetPlanNodeType() != PlanNodeType::CSVSCAN) return false;
   const auto &other = static_cast<const CSVScanPlan &>(rhs);
-  return StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_);
+  return (
+      (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) &&
+      delimiter_ == other.delimiter_ && quote_ == other.quote_ &&
+      escape_ == other.escape_);
 }
 
 inline void CSVScanPlan::GetAttributes(
diff --git a/src/include/planner/export_external_file_plan.h b/src/include/planner/export_external_file_plan.h
new file mode 100644
index 00000000000..6962891a19d
--- /dev/null
+++ b/src/include/planner/export_external_file_plan.h
@@ -0,0 +1,119 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// export_external_file_plan.h
+//
+// Identification: src/include/planner/export_external_file_plan.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "concurrency/transaction_context.h"
+#include "planner/abstract_plan.h"
+
+namespace peloton {
+namespace planner {
+
+class ExportExternalFilePlan : public AbstractPlan {
+ public:
+  ExportExternalFilePlan(std::string file_name, char delimiter = ',',
+                         char quote = '"', char escape = '\"');
+
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Accessors
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  PlanNodeType GetPlanNodeType() const override;
+
+  const std::string &GetFileName() const { return file_name_; }
+
+  char GetDelimiterChar() const { return delimiter_; }
+  char GetQuoteChar() const { return quote_; }
+  char GetEscapeChar() const { return escape_; }
+
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Utilities + Internal
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  hash_t Hash() const override;
+
+  bool operator==(const AbstractPlan &rhs) const override;
+
+  std::unique_ptr<AbstractPlan> Copy() const override;
+
+  void PerformBinding(BindingContext &binding_context) override;
+
+ private:
+  std::vector<const planner::AttributeInfo *> output_attributes_;
+
+  std::string file_name_;
+
+  char delimiter_;
+  char quote_;
+  char escape_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Implementation below
+///
+////////////////////////////////////////////////////////////////////////////////
+
+inline ExportExternalFilePlan::ExportExternalFilePlan(std::string file_name,
+                                                      char delimiter,
+                                                      char quote, char escape)
+    : file_name_(file_name),
+      delimiter_(delimiter),
+      quote_(quote),
+      escape_(escape) {}
+
+inline PlanNodeType ExportExternalFilePlan::GetPlanNodeType() const {
+  return PlanNodeType::EXPORT_EXTERNAL_FILE;
+}
+
+inline hash_t ExportExternalFilePlan::Hash() const {
+  hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length());
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&quote_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_));
+  return hash;
+}
+
+inline bool ExportExternalFilePlan::operator==(const AbstractPlan &rhs) const {
+  if (rhs.GetPlanNodeType() != PlanNodeType::EXPORT_EXTERNAL_FILE) return false;
+  const auto &other = static_cast<const ExportExternalFilePlan &>(rhs);
+  return (
+      (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) &&
+      delimiter_ == other.delimiter_ && quote_ == other.quote_ &&
+      escape_ == other.escape_);
+}
+
+inline std::unique_ptr<AbstractPlan> ExportExternalFilePlan::Copy() const {
+  return std::unique_ptr<AbstractPlan>{
+      new ExportExternalFilePlan(file_name_, delimiter_, quote_, escape_)};
+}
+
+inline void ExportExternalFilePlan::PerformBinding(
+    BindingContext &binding_context) {
+  PELOTON_ASSERT(GetChildrenSize() == 1);
+  auto &child = *GetChild(0);
+
+  std::vector<oid_t> child_output_cols;
+  child.GetOutputColumns(child_output_cols);
+
+  output_attributes_.clear();
+  for (const auto &col_id : child_output_cols) {
+    output_attributes_.push_back(binding_context.Find(col_id));
+  }
+}
+
+}  // namespace planner
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp
index 5020302b614..39ca06d811b 100644
--- a/src/optimizer/child_property_deriver.cpp
+++ b/src/optimizer/child_property_deriver.cpp
@@ -193,6 +193,13 @@ void ChildPropertyDeriver::Visit(const DummyScan *) {
       make_pair(make_shared<PropertySet>(), vector<shared_ptr<PropertySet>>()));
 }
 
+void ChildPropertyDeriver::Visit(const PhysicalExportExternalFile *) {
+  // Let child fulfil all the required properties
+  vector<shared_ptr<PropertySet>> child_input_properties{requirements_};
+
+  output_.push_back(make_pair(requirements_, move(child_input_properties)));
+}
+
 void ChildPropertyDeriver::DeriveForJoin() {
   output_.push_back(make_pair(
       make_shared<PropertySet>(),
diff --git a/src/optimizer/input_column_deriver.cpp b/src/optimizer/input_column_deriver.cpp
index 08d7c54a4ae..019117ae68e 100644
--- a/src/optimizer/input_column_deriver.cpp
+++ b/src/optimizer/input_column_deriver.cpp
@@ -157,6 +157,10 @@ void InputColumnDeriver::Visit(const PhysicalDelete *) { Passdown(); }
 
 void InputColumnDeriver::Visit(const PhysicalUpdate *) { Passdown(); }
 
+void InputColumnDeriver::Visit(const PhysicalExportExternalFile *) {
+  Passdown();
+}
+
 void InputColumnDeriver::ScanHelper() {
   // Scan does not have input column, output columns should contain all tuple
   // value expressions needed
diff --git a/src/optimizer/operators.cpp b/src/optimizer/operators.cpp
index c9fb133bc90..e168a4d4bea 100644
--- a/src/optimizer/operators.cpp
+++ b/src/optimizer/operators.cpp
@@ -445,11 +445,29 @@ Operator LogicalLimit::make(int64_t offset, int64_t limit) {
 //===--------------------------------------------------------------------===//
 // External file output
 //===--------------------------------------------------------------------===//
-Operator LogicalExportExternalFile::make() {
-  auto *export_op = new LogicalExternalFileGet();
+Operator LogicalExportExternalFile::make(ExternalFileFormat format,
+                                         std::string file_name) {
+  auto *export_op = new LogicalExportExternalFile();
+  export_op->format = format;
+  export_op->file_name = std::move(file_name);
   return Operator(export_op);
 }
 
+bool LogicalExportExternalFile::operator==(const BaseOperatorNode &node) {
+  if (node.GetType() != OpType::LogicalExportExternalFile) return false;
+  const auto &export_op =
+      *static_cast<const LogicalExportExternalFile *>(&node);
+  return (format == export_op.format && file_name == export_op.file_name);
+}
+
+hash_t LogicalExportExternalFile::Hash() const {
+  hash_t hash = BaseOperatorNode::Hash();
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
+  hash = HashUtil::CombineHashes(
+      hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
+  return hash;
+}
+
 //===--------------------------------------------------------------------===//
 // DummyScan
 //===--------------------------------------------------------------------===//
@@ -823,6 +841,32 @@ Operator PhysicalUpdate::make(
   return Operator(update);
 }
 
+//===--------------------------------------------------------------------===//
+// PhysicalExportExternalFile
+//===--------------------------------------------------------------------===//
+Operator PhysicalExportExternalFile::make(ExternalFileFormat format,
+                                          std::string file_name) {
+  auto *export_op = new PhysicalExportExternalFile();
+  export_op->format = format;
+  export_op->file_name = file_name;
+  return Operator(export_op);
+}
+
+bool PhysicalExportExternalFile::operator==(const BaseOperatorNode &node) {
+  if (node.GetType() != OpType::ExportExternalFile) return false;
+  const auto &export_op =
+      *static_cast<const PhysicalExportExternalFile *>(&node);
+  return (format == export_op.format && file_name == export_op.file_name);
+}
+
+hash_t PhysicalExportExternalFile::Hash() const {
+  hash_t hash = BaseOperatorNode::Hash();
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
+  hash = HashUtil::CombineHashes(
+      hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
+  return hash;
+}
+
 //===--------------------------------------------------------------------===//
 // PhysicalHashGroupBy
 //===--------------------------------------------------------------------===//
@@ -1005,6 +1049,9 @@ template <>
 std::string OperatorNode<PhysicalDistinct>::name_ = "PhysicalDistinct";
 template <>
 std::string OperatorNode<PhysicalAggregate>::name_ = "PhysicalAggregate";
+template <>
+std::string OperatorNode<PhysicalExportExternalFile>::name_ =
+    "PhysicalExportExternalFile";
 
 //===--------------------------------------------------------------------===//
 template <>
@@ -1102,7 +1149,11 @@ template <>
 OpType OperatorNode<PhysicalSortGroupBy>::type_ = OpType::SortGroupBy;
 template <>
 OpType OperatorNode<PhysicalAggregate>::type_ = OpType::Aggregate;
+template <>
+OpType OperatorNode<PhysicalExportExternalFile>::type_ =
+    OpType::ExportExternalFile;
 //===--------------------------------------------------------------------===//
+
 template <typename T>
 bool OperatorNode<T>::IsLogical() const {
   return type_ < OpType::LogicalPhysicalDelimiter;
diff --git a/src/optimizer/optimizer_task.cpp b/src/optimizer/optimizer_task.cpp
index f0a489906ae..8c430f76ae2 100644
--- a/src/optimizer/optimizer_task.cpp
+++ b/src/optimizer/optimizer_task.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/optimizer/optimizer_task.cpp
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,6 +22,7 @@
 
 namespace peloton {
 namespace optimizer {
+
 //===--------------------------------------------------------------------===//
 // Base class
 //===--------------------------------------------------------------------===//
@@ -30,13 +31,16 @@ void OptimizerTask::ConstructValidRules(
     std::vector<std::unique_ptr<Rule>> &rules,
     std::vector<RuleWithPromise> &valid_rules) {
   for (auto &rule : rules) {
-    if (group_expr->Op().GetType() !=
-            rule->GetMatchPattern()->Type() ||  // Root pattern type mismatch
-        group_expr->HasRuleExplored(rule.get()) ||  // Rule has been applied
+    // Check if we can apply the rule
+    bool root_pattern_mismatch =
+        group_expr->Op().GetType() != rule->GetMatchPattern()->Type();
+    bool already_explored = group_expr->HasRuleExplored(rule.get());
+    bool child_pattern_mismatch =
         group_expr->GetChildrenGroupsSize() !=
-            rule->GetMatchPattern()
-                ->GetChildPatternsSize())  // Children size does not math
+        rule->GetMatchPattern()->GetChildPatternsSize();
+    if (root_pattern_mismatch || already_explored || child_pattern_mismatch) {
       continue;
+    }
 
     auto promise = rule->Promise(group_expr, context);
     if (promise > 0) valid_rules.emplace_back(rule.get(), promise);
diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp
index 804184b6246..c2c2dcc399a 100644
--- a/src/optimizer/plan_generator.cpp
+++ b/src/optimizer/plan_generator.cpp
@@ -23,6 +23,7 @@
 #include "planner/aggregate_plan.h"
 #include "planner/csv_scan_plan.h"
 #include "planner/delete_plan.h"
+#include "planner/export_external_file_plan.h"
 #include "planner/hash_join_plan.h"
 #include "planner/hash_plan.h"
 #include "planner/index_scan_plan.h"
@@ -385,6 +386,13 @@ void PlanGenerator::Visit(const PhysicalUpdate *op) {
   output_plan_ = move(update_plan);
 }
 
+void PlanGenerator::Visit(const PhysicalExportExternalFile *op) {
+  unique_ptr<planner::AbstractPlan> export_plan{
+      new planner::ExportExternalFilePlan(op->file_name)};
+  export_plan->AddChild(move(children_plans_[0]));
+  output_plan_ = move(export_plan);
+}
+
 /************************* Private Functions *******************************/
 vector<unique_ptr<expression::AbstractExpression>>
 PlanGenerator::GenerateTableTVExprs(
diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp
index 816ef24a7fb..73c52f9266e 100644
--- a/src/optimizer/query_to_operator_transformer.cpp
+++ b/src/optimizer/query_to_operator_transformer.cpp
@@ -386,8 +386,8 @@ void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
     } else {
       op->table->Accept(this);
     }
-    auto export_op =
-        std::make_shared<OperatorExpression>(LogicalExportExternalFile::make());
+    auto export_op = std::make_shared<OperatorExpression>(
+        LogicalExportExternalFile::make(op->format, op->file_path));
     export_op->PushChild(output_expr_);
     output_expr_ = export_op;
   }
diff --git a/src/optimizer/rule.cpp b/src/optimizer/rule.cpp
index fc4bc837736..8c72ed17fa8 100644
--- a/src/optimizer/rule.cpp
+++ b/src/optimizer/rule.cpp
@@ -45,6 +45,7 @@ RuleSet::RuleSet() {
   AddImplementationRule(new InnerJoinToInnerHashJoin());
   AddImplementationRule(new ImplementDistinct());
   AddImplementationRule(new ImplementLimit());
+  AddImplementationRule(new LogicalExportToPhysicalExport());
 
   AddRewriteRule(RewriteRuleSetName::PREDICATE_PUSH_DOWN,
                  new PushFilterThroughJoin());
diff --git a/src/optimizer/rule_impls.cpp b/src/optimizer/rule_impls.cpp
index 284109a38f1..9d0a4624c2a 100644
--- a/src/optimizer/rule_impls.cpp
+++ b/src/optimizer/rule_impls.cpp
@@ -819,6 +819,37 @@ void ImplementLimit::Transform(
   transformed.push_back(result_plan);
 }
 
+///////////////////////////////////////////////////////////////////////////////
+/// LogicalExport to Physical Export
+LogicalExportToPhysicalExport::LogicalExportToPhysicalExport() {
+  type_ = RuleType::EXPORT_EXTERNAL_FILE_TO_PHYSICAL;
+  match_pattern = std::make_shared<Pattern>(OpType::LogicalExportExternalFile);
+  match_pattern->AddChild(std::make_shared<Pattern>(OpType::Leaf));
+}
+
+bool LogicalExportToPhysicalExport::Check(
+    UNUSED_ATTRIBUTE std::shared_ptr<OperatorExpression> plan,
+    UNUSED_ATTRIBUTE OptimizeContext *context) const {
+  return true;
+}
+
+void LogicalExportToPhysicalExport::Transform(
+    std::shared_ptr<OperatorExpression> input,
+    std::vector<std::shared_ptr<OperatorExpression>> &transformed,
+    UNUSED_ATTRIBUTE OptimizeContext *context) const {
+  const auto *logical_export = input->Op().As<LogicalExportExternalFile>();
+
+  auto result_plan =
+      std::make_shared<OperatorExpression>(PhysicalExportExternalFile::make(
+          logical_export->format, logical_export->file_name));
+
+  std::vector<std::shared_ptr<OperatorExpression>> children = input->Children();
+  PELOTON_ASSERT(children.size() == 1);
+  result_plan->PushChild(children[0]);
+
+  transformed.push_back(result_plan);
+}
+
 //===--------------------------------------------------------------------===//
 // Rewrite rules
 //===--------------------------------------------------------------------===//
diff --git a/src/optimizer/util.cpp b/src/optimizer/util.cpp
index 4ff60ee36c8..07685376b34 100644
--- a/src/optimizer/util.cpp
+++ b/src/optimizer/util.cpp
@@ -15,9 +15,6 @@
 #include "catalog/query_metrics_catalog.h"
 #include "concurrency/transaction_manager_factory.h"
 #include "expression/expression_util.h"
-#include "planner/copy_plan.h"
-#include "planner/seq_scan_plan.h"
-#include "storage/data_table.h"
 
 namespace peloton {
 namespace optimizer {

From f04d036f87e6aad2a79a51c8b5be435de1a7c085 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 17:41:58 -0400
Subject: [PATCH 31/42] Extracted implementation into CPP file for plan node

---
 src/include/planner/csv_scan_plan.h           | 85 ++---------------
 .../planner/export_external_file_plan.h       | 64 ++-----------
 src/planner/csv_scan_plan.cpp                 | 93 +++++++++++++++++++
 src/planner/export_external_file_plan.cpp     | 70 ++++++++++++++
 4 files changed, 179 insertions(+), 133 deletions(-)
 create mode 100644 src/planner/csv_scan_plan.cpp
 create mode 100644 src/planner/export_external_file_plan.cpp

diff --git a/src/include/planner/csv_scan_plan.h b/src/include/planner/csv_scan_plan.h
index 516debcaeeb..2cd255884d3 100644
--- a/src/include/planner/csv_scan_plan.h
+++ b/src/include/planner/csv_scan_plan.h
@@ -12,15 +12,21 @@
 
 #pragma once
 
+#include <memory>
 #include <numeric>
+#include <string>
+#include <vector>
 
-#include "codegen/type/type.h"
 #include "planner/abstract_scan_plan.h"
 #include "planner/attribute_info.h"
+#include "type/type_id.h"
 
 namespace peloton {
 namespace planner {
 
+/**
+ * This is the plan node when scanning a CSV file.
+ */
 class CSVScanPlan : public AbstractScan {
  public:
   struct ColumnInfo {
@@ -87,82 +93,5 @@ class CSVScanPlan : public AbstractScan {
   std::vector<planner::AttributeInfo> attributes_;
 };
 
-////////////////////////////////////////////////////////////////////////////////
-///
-/// Implementation below
-///
-////////////////////////////////////////////////////////////////////////////////
-
-inline CSVScanPlan::CSVScanPlan(std::string file_name,
-                                std::vector<CSVScanPlan::ColumnInfo> &&cols,
-                                char delimiter, char quote, char escape,
-                                std::string null)
-    : file_name_(std::move(file_name)),
-      delimiter_(delimiter),
-      quote_(quote),
-      escape_(escape),
-      null_(null) {
-  attributes_.resize(cols.size());
-  for (uint32_t i = 0; i < cols.size(); i++) {
-    const auto &col_info = cols[i];
-    attributes_[i].type = codegen::type::Type{col_info.type, true};
-    attributes_[i].attribute_id = i;
-    attributes_[i].name = col_info.name;
-  }
-}
-
-inline PlanNodeType CSVScanPlan::GetPlanNodeType() const {
-  return PlanNodeType::CSVSCAN;
-}
-
-inline std::unique_ptr<AbstractPlan> CSVScanPlan::Copy() const {
-  std::vector<CSVScanPlan::ColumnInfo> new_cols;
-  for (const auto &attribute : attributes_) {
-    new_cols.push_back(CSVScanPlan::ColumnInfo{.name = attribute.name,
-                                               .type = attribute.type.type_id});
-  }
-  return std::unique_ptr<AbstractPlan>(
-      new CSVScanPlan(file_name_, std::move(new_cols)));
-}
-
-inline void CSVScanPlan::PerformBinding(BindingContext &binding_context) {
-  for (uint32_t i = 0; i < attributes_.size(); i++) {
-    binding_context.BindNew(i, &attributes_[i]);
-  }
-}
-
-inline void CSVScanPlan::GetOutputColumns(std::vector<oid_t> &columns) const {
-  columns.clear();
-  columns.resize(attributes_.size());
-  std::iota(columns.begin(), columns.end(), 0);
-}
-
-inline hash_t CSVScanPlan::Hash() const {
-  hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length());
-  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_));
-  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&quote_));
-  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_));
-  hash = HashUtil::CombineHashes(
-      hash, HashUtil::HashBytes(null_.c_str(), null_.length()));
-  return hash;
-}
-
-inline bool CSVScanPlan::operator==(const AbstractPlan &rhs) const {
-  if (rhs.GetPlanNodeType() != PlanNodeType::CSVSCAN) return false;
-  const auto &other = static_cast<const CSVScanPlan &>(rhs);
-  return (
-      (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) &&
-      delimiter_ == other.delimiter_ && quote_ == other.quote_ &&
-      escape_ == other.escape_);
-}
-
-inline void CSVScanPlan::GetAttributes(
-    std::vector<const AttributeInfo *> &ais) const {
-  ais.clear();
-  for (const auto &ai : attributes_) {
-    ais.push_back(&ai);
-  }
-}
-
 }  // namespace planner
 }  // namespace peloton
\ No newline at end of file
diff --git a/src/include/planner/export_external_file_plan.h b/src/include/planner/export_external_file_plan.h
index 6962891a19d..7dfb5807422 100644
--- a/src/include/planner/export_external_file_plan.h
+++ b/src/include/planner/export_external_file_plan.h
@@ -12,12 +12,20 @@
 
 #pragma once
 
-#include "concurrency/transaction_context.h"
+#include <memory>
+#include <vector>
+#include <string>
+
 #include "planner/abstract_plan.h"
 
 namespace peloton {
 namespace planner {
 
+/**
+ * This is the plan node when exporting data from the database into an external
+ * file. It is configured with the name of the file to write content into, and
+ * the delimiter, quote, and escape characters to use when writing content.
+ */
 class ExportExternalFilePlan : public AbstractPlan {
  public:
   ExportExternalFilePlan(std::string file_name, char delimiter = ',',
@@ -61,59 +69,5 @@ class ExportExternalFilePlan : public AbstractPlan {
   char escape_;
 };
 
-////////////////////////////////////////////////////////////////////////////////
-///
-/// Implementation below
-///
-////////////////////////////////////////////////////////////////////////////////
-
-inline ExportExternalFilePlan::ExportExternalFilePlan(std::string file_name,
-                                                      char delimiter,
-                                                      char quote, char escape)
-    : file_name_(file_name),
-      delimiter_(delimiter),
-      quote_(quote),
-      escape_(escape) {}
-
-inline PlanNodeType ExportExternalFilePlan::GetPlanNodeType() const {
-  return PlanNodeType::EXPORT_EXTERNAL_FILE;
-}
-
-inline hash_t ExportExternalFilePlan::Hash() const {
-  hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length());
-  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_));
-  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&quote_));
-  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_));
-  return hash;
-}
-
-inline bool ExportExternalFilePlan::operator==(const AbstractPlan &rhs) const {
-  if (rhs.GetPlanNodeType() != PlanNodeType::EXPORT_EXTERNAL_FILE) return false;
-  const auto &other = static_cast<const ExportExternalFilePlan &>(rhs);
-  return (
-      (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) &&
-      delimiter_ == other.delimiter_ && quote_ == other.quote_ &&
-      escape_ == other.escape_);
-}
-
-inline std::unique_ptr<AbstractPlan> ExportExternalFilePlan::Copy() const {
-  return std::unique_ptr<AbstractPlan>{
-      new ExportExternalFilePlan(file_name_, delimiter_, quote_, escape_)};
-}
-
-inline void ExportExternalFilePlan::PerformBinding(
-    BindingContext &binding_context) {
-  PELOTON_ASSERT(GetChildrenSize() == 1);
-  auto &child = *GetChild(0);
-
-  std::vector<oid_t> child_output_cols;
-  child.GetOutputColumns(child_output_cols);
-
-  output_attributes_.clear();
-  for (const auto &col_id : child_output_cols) {
-    output_attributes_.push_back(binding_context.Find(col_id));
-  }
-}
-
 }  // namespace planner
 }  // namespace peloton
\ No newline at end of file
diff --git a/src/planner/csv_scan_plan.cpp b/src/planner/csv_scan_plan.cpp
new file mode 100644
index 00000000000..c4ff66765e9
--- /dev/null
+++ b/src/planner/csv_scan_plan.cpp
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// csv_scan_plan.cpp
+//
+// Identification: src/planner/csv_scan_plan.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "planner/csv_scan_plan.h"
+
+#include <numeric>
+
+#include "codegen/type/type.h"
+
+namespace peloton {
+namespace planner {
+
+CSVScanPlan::CSVScanPlan(std::string file_name,
+                         std::vector<CSVScanPlan::ColumnInfo> &&cols,
+                         char delimiter, char quote, char escape,
+                         std::string null)
+    : file_name_(std::move(file_name)),
+      delimiter_(delimiter),
+      quote_(quote),
+      escape_(escape),
+      null_(null) {
+  attributes_.resize(cols.size());
+  for (uint32_t i = 0; i < cols.size(); i++) {
+    const auto &col_info = cols[i];
+    attributes_[i].type = codegen::type::Type{col_info.type, true};
+    attributes_[i].attribute_id = i;
+    attributes_[i].name = col_info.name;
+  }
+}
+
+PlanNodeType CSVScanPlan::GetPlanNodeType() const {
+  return PlanNodeType::CSVSCAN;
+}
+
+std::unique_ptr<AbstractPlan> CSVScanPlan::Copy() const {
+  std::vector<CSVScanPlan::ColumnInfo> new_cols;
+  for (const auto &attribute : attributes_) {
+    new_cols.push_back(CSVScanPlan::ColumnInfo{.name = attribute.name,
+                                               .type = attribute.type.type_id});
+  }
+  return std::unique_ptr<AbstractPlan>(
+      new CSVScanPlan(file_name_, std::move(new_cols)));
+}
+
+void CSVScanPlan::PerformBinding(BindingContext &binding_context) {
+  for (uint32_t i = 0; i < attributes_.size(); i++) {
+    binding_context.BindNew(i, &attributes_[i]);
+  }
+}
+
+void CSVScanPlan::GetOutputColumns(std::vector<oid_t> &columns) const {
+  columns.clear();
+  columns.resize(attributes_.size());
+  std::iota(columns.begin(), columns.end(), 0);
+}
+
+hash_t CSVScanPlan::Hash() const {
+  hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length());
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&quote_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_));
+  hash = HashUtil::CombineHashes(
+      hash, HashUtil::HashBytes(null_.c_str(), null_.length()));
+  return hash;
+}
+
+bool CSVScanPlan::operator==(const AbstractPlan &rhs) const {
+  if (rhs.GetPlanNodeType() != PlanNodeType::CSVSCAN) return false;
+  const auto &other = static_cast<const CSVScanPlan &>(rhs);
+  return (
+      (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) &&
+      delimiter_ == other.delimiter_ && quote_ == other.quote_ &&
+      escape_ == other.escape_);
+}
+
+void CSVScanPlan::GetAttributes(std::vector<const AttributeInfo *> &ais) const {
+  ais.clear();
+  for (const auto &ai : attributes_) {
+    ais.push_back(&ai);
+  }
+}
+
+}  // namespace planner
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/planner/export_external_file_plan.cpp b/src/planner/export_external_file_plan.cpp
new file mode 100644
index 00000000000..8f63cc1a072
--- /dev/null
+++ b/src/planner/export_external_file_plan.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// export_external_file_plan.cpp
+//
+// Identification: src/planner/export_external_file_plan.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "planner/export_external_file_plan.h"
+
+#include "common/macros.h"
+#include "util/hash_util.h"
+#include "util/string_util.h"
+
+namespace peloton {
+namespace planner {
+
+ExportExternalFilePlan::ExportExternalFilePlan(std::string file_name,
+                                               char delimiter, char quote,
+                                               char escape)
+    : file_name_(file_name),
+      delimiter_(delimiter),
+      quote_(quote),
+      escape_(escape) {}
+
+PlanNodeType ExportExternalFilePlan::GetPlanNodeType() const {
+  return PlanNodeType::EXPORT_EXTERNAL_FILE;
+}
+
+hash_t ExportExternalFilePlan::Hash() const {
+  hash_t hash = HashUtil::HashBytes(file_name_.data(), file_name_.length());
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&delimiter_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&quote_));
+  hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&escape_));
+  return hash;
+}
+
+bool ExportExternalFilePlan::operator==(const AbstractPlan &rhs) const {
+  if (rhs.GetPlanNodeType() != PlanNodeType::EXPORT_EXTERNAL_FILE) return false;
+  const auto &other = static_cast<const ExportExternalFilePlan &>(rhs);
+  return (
+      (StringUtil::Upper(file_name_) == StringUtil::Upper(other.file_name_)) &&
+      delimiter_ == other.delimiter_ && quote_ == other.quote_ &&
+      escape_ == other.escape_);
+}
+
+std::unique_ptr<AbstractPlan> ExportExternalFilePlan::Copy() const {
+  return std::unique_ptr<AbstractPlan>{
+      new ExportExternalFilePlan(file_name_, delimiter_, quote_, escape_)};
+}
+
+void ExportExternalFilePlan::PerformBinding(BindingContext &binding_context) {
+  PELOTON_ASSERT(GetChildrenSize() == 1);
+  auto &child = *GetChild(0);
+
+  std::vector<oid_t> child_output_cols;
+  child.GetOutputColumns(child_output_cols);
+
+  output_attributes_.clear();
+  for (const auto &col_id : child_output_cols) {
+    output_attributes_.push_back(binding_context.Find(col_id));
+  }
+}
+
+}  // namespace planner
+}  // namespace peloton
\ No newline at end of file

From 0483a6de8385060cd2f314ddf08f0c350278faa0 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Wed, 16 May 2018 17:51:12 -0400
Subject: [PATCH 32/42] * Propagatge file options through optimization. * Added
 codegen.cpp to source validator whitelist, since we have the ability to call
 printf() from codegen for debug. * Beefed up overflow checks in
 NumericRuntime. * Fixed tests.

---
 script/validators/source_validator.py         |  4 +-
 src/function/numeric_functions.cpp            | 34 ++++++++++---
 src/include/optimizer/operators.h             | 24 +++++++--
 src/include/parser/copy_statement.h           |  6 ++-
 src/optimizer/operators.cpp                   | 50 ++++++++++++++++---
 src/optimizer/plan_generator.cpp              |  6 ++-
 .../query_to_operator_transformer.cpp         |  8 +--
 src/optimizer/rule_impls.cpp                  |  8 +--
 src/parser/postgresparser.cpp                 | 16 +++++-
 test/codegen/csv_scan_test.cpp                | 49 +++++++++++++++---
 test/codegen/value_integrity_test.cpp         |  7 ++-
 test/common/internal_types_test.cpp           |  4 +-
 ...ns_test.cpp => numeric_functions_test.cpp} | 25 +++++-----
 13 files changed, 185 insertions(+), 56 deletions(-)
 rename test/function/{decimal_functions_test.cpp => numeric_functions_test.cpp} (93%)

diff --git a/script/validators/source_validator.py b/script/validators/source_validator.py
index 331bdc7c688..ad70c24c7e5 100755
--- a/script/validators/source_validator.py
+++ b/script/validators/source_validator.py
@@ -58,12 +58,12 @@
     "src/network/protocol.cpp",
     "src/include/common/macros.h",
     "src/common/stack_trace.cpp",
-    "src/include/parser/sql_scanner.h", # There is a free() in comments
     "src/include/index/bloom_filter.h",
     "src/include/index/compact_ints_key.h",
     "src/include/index/bwtree.h",
     "src/codegen/util/oa_hash_table.cpp",
-    "src/codegen/util/cc_hash_table.cpp"
+    "src/codegen/util/cc_hash_table.cpp",
+    "src/codegen/codegen.cpp",              # We allow calling printf() from codegen for debugging
 ]
 
 ## ==============================================
diff --git a/src/function/numeric_functions.cpp b/src/function/numeric_functions.cpp
index f4a943c8ce0..f0d13e92ffc 100644
--- a/src/function/numeric_functions.cpp
+++ b/src/function/numeric_functions.cpp
@@ -216,13 +216,25 @@ T ParseInteger(const char *ptr, uint32_t len) {
   }
 
   // Convert
-  int64_t num = 0;
+  uint64_t cutoff =
+      static_cast<uint64_t>(negative ? -std::numeric_limits<int64_t>::min()
+                                     : std::numeric_limits<int64_t>::max());
+  uint64_t cutlimit = cutoff % 10;
+  cutoff /= 10;
+
+  uint64_t num = 0;
   while (start < end) {
     if (*start < '0' || *start > '9') {
       break;
     }
 
-    num = (num * 10) + (*start - '0');
+    uint32_t c = static_cast<uint32_t>(*start - '0');
+
+    if (num > cutoff || (num == cutoff && c > cutlimit)) {
+      goto overflow;
+    }
+
+    num = (num * 10) + c;
 
     start++;
   }
@@ -234,8 +246,7 @@ T ParseInteger(const char *ptr, uint32_t len) {
 
   // If we haven't consumed everything at this point, it was an invalid input
   if (start < end) {
-    codegen::RuntimeFunctions::ThrowInvalidInputStringException();
-    __builtin_unreachable();
+    goto invalid;
   }
 
   // Negate number if we need to
@@ -244,14 +255,21 @@ T ParseInteger(const char *ptr, uint32_t len) {
   }
 
   // Range check
-  if (num <= std::numeric_limits<T>::min() ||
-      num >= std::numeric_limits<T>::max()) {
-    codegen::RuntimeFunctions::ThrowOverflowException();
-    __builtin_unreachable();
+  if (static_cast<int64_t>(num) <= std::numeric_limits<T>::min() ||
+      static_cast<int64_t>(num) >= std::numeric_limits<T>::max()) {
+    goto overflow;
   }
 
   // Done
   return static_cast<T>(num);
+
+overflow:
+  codegen::RuntimeFunctions::ThrowOverflowException();
+  __builtin_unreachable();
+
+invalid:
+  codegen::RuntimeFunctions::ThrowInvalidInputStringException();
+  __builtin_unreachable();
 }
 
 }  // namespace
diff --git a/src/include/optimizer/operators.h b/src/include/optimizer/operators.h
index 8a7c7582e56..d51d66b01e8 100644
--- a/src/include/optimizer/operators.h
+++ b/src/include/optimizer/operators.h
@@ -73,7 +73,8 @@ class LogicalGet : public OperatorNode<LogicalGet> {
 class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
  public:
   static Operator make(oid_t get_id, ExternalFileFormat format,
-                       std::string file_name);
+                       std::string file_name, char delimiter, char quote,
+                       char escape);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -83,6 +84,9 @@ class LogicalExternalFileGet : public OperatorNode<LogicalExternalFileGet> {
   oid_t get_id;
   ExternalFileFormat format;
   std::string file_name;
+  char delimiter;
+  char quote;
+  char escape;
 };
 
 //===--------------------------------------------------------------------===//
@@ -330,7 +334,8 @@ class LogicalUpdate : public OperatorNode<LogicalUpdate> {
 class LogicalExportExternalFile
     : public OperatorNode<LogicalExportExternalFile> {
  public:
-  static Operator make(ExternalFileFormat format, std::string file_name);
+  static Operator make(ExternalFileFormat format, std::string file_name,
+                       char delimiter, char quote, char escape);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -338,6 +343,9 @@ class LogicalExportExternalFile
 
   ExternalFileFormat format;
   std::string file_name;
+  char delimiter;
+  char quote;
+  char escape;
 };
 
 //===--------------------------------------------------------------------===//
@@ -410,7 +418,8 @@ class PhysicalIndexScan : public OperatorNode<PhysicalIndexScan> {
 class ExternalFileScan : public OperatorNode<ExternalFileScan> {
  public:
   static Operator make(oid_t get_id, ExternalFileFormat format,
-                       std::string file_name);
+                       std::string file_name, char delimiter, char quote,
+                       char escape);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -420,6 +429,9 @@ class ExternalFileScan : public OperatorNode<ExternalFileScan> {
   oid_t get_id;
   ExternalFileFormat format;
   std::string file_name;
+  char delimiter;
+  char quote;
+  char escape;
 };
 
 //===--------------------------------------------------------------------===//
@@ -617,7 +629,8 @@ class PhysicalUpdate : public OperatorNode<PhysicalUpdate> {
 class PhysicalExportExternalFile
     : public OperatorNode<PhysicalExportExternalFile> {
  public:
-  static Operator make(ExternalFileFormat format, std::string file_name);
+  static Operator make(ExternalFileFormat format, std::string file_name,
+                       char delimiter, char quote, char escape);
 
   bool operator==(const BaseOperatorNode &r) override;
 
@@ -625,6 +638,9 @@ class PhysicalExportExternalFile
 
   ExternalFileFormat format;
   std::string file_name;
+  char delimiter;
+  char quote;
+  char escape;
 };
 
 //===--------------------------------------------------------------------===//
diff --git a/src/include/parser/copy_statement.h b/src/include/parser/copy_statement.h
index 8145cd695e9..67e8fe5ee25 100644
--- a/src/include/parser/copy_statement.h
+++ b/src/include/parser/copy_statement.h
@@ -62,11 +62,13 @@ class CopyStatement : public SQLStatement {
   std::string file_path;
 
   // The format of the file
-  ExternalFileFormat format;
+  ExternalFileFormat format = ExternalFileFormat::CSV;
 
   bool is_from;
 
-  char delimiter;
+  char delimiter = ',';
+  char quote = '"';
+  char escape = '"';
 };
 
 }  // namespace parser
diff --git a/src/optimizer/operators.cpp b/src/optimizer/operators.cpp
index e168a4d4bea..6457e769db2 100644
--- a/src/optimizer/operators.cpp
+++ b/src/optimizer/operators.cpp
@@ -68,11 +68,15 @@ bool LogicalGet::operator==(const BaseOperatorNode &r) {
 //===--------------------------------------------------------------------===//
 
 Operator LogicalExternalFileGet::make(oid_t get_id, ExternalFileFormat format,
-                                      std::string file_name) {
+                                      std::string file_name, char delimiter,
+                                      char quote, char escape) {
   auto *get = new LogicalExternalFileGet();
   get->get_id = get_id;
   get->format = format;
   get->file_name = std::move(file_name);
+  get->delimiter = delimiter;
+  get->quote = quote;
+  get->escape = escape;
   return Operator(get);
 }
 
@@ -80,7 +84,8 @@ bool LogicalExternalFileGet::operator==(const BaseOperatorNode &node) {
   if (node.GetType() != OpType::LogicalExternalFileGet) return false;
   const auto &get = *static_cast<const LogicalExternalFileGet *>(&node);
   return (get_id == get.get_id && format == get.format &&
-          file_name == get.file_name);
+          file_name == get.file_name && delimiter == get.delimiter &&
+          quote == get.quote && escape == get.escape);
 }
 
 hash_t LogicalExternalFileGet::Hash() const {
@@ -89,6 +94,9 @@ hash_t LogicalExternalFileGet::Hash() const {
   hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
   hash = HashUtil::CombineHashes(
       hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
   return hash;
 }
 
@@ -446,10 +454,14 @@ Operator LogicalLimit::make(int64_t offset, int64_t limit) {
 // External file output
 //===--------------------------------------------------------------------===//
 Operator LogicalExportExternalFile::make(ExternalFileFormat format,
-                                         std::string file_name) {
+                                         std::string file_name, char delimiter,
+                                         char quote, char escape) {
   auto *export_op = new LogicalExportExternalFile();
   export_op->format = format;
   export_op->file_name = std::move(file_name);
+  export_op->delimiter = delimiter;
+  export_op->quote = quote;
+  export_op->escape = escape;
   return Operator(export_op);
 }
 
@@ -457,7 +469,9 @@ bool LogicalExportExternalFile::operator==(const BaseOperatorNode &node) {
   if (node.GetType() != OpType::LogicalExportExternalFile) return false;
   const auto &export_op =
       *static_cast<const LogicalExportExternalFile *>(&node);
-  return (format == export_op.format && file_name == export_op.file_name);
+  return (format == export_op.format && file_name == export_op.file_name &&
+          delimiter == export_op.delimiter && quote == export_op.quote &&
+          escape == export_op.escape);
 }
 
 hash_t LogicalExportExternalFile::Hash() const {
@@ -465,6 +479,9 @@ hash_t LogicalExportExternalFile::Hash() const {
   hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
   hash = HashUtil::CombineHashes(
       hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
   return hash;
 }
 
@@ -567,11 +584,15 @@ hash_t PhysicalIndexScan::Hash() const {
 // Physical external file scan
 //===--------------------------------------------------------------------===//
 Operator ExternalFileScan::make(oid_t get_id, ExternalFileFormat format,
-                                std::string file_name) {
+                                std::string file_name, char delimiter,
+                                char quote, char escape) {
   auto *get = new ExternalFileScan();
   get->get_id = get_id;
   get->format = format;
   get->file_name = file_name;
+  get->delimiter = delimiter;
+  get->quote = quote;
+  get->escape = escape;
   return Operator(get);
 }
 
@@ -579,7 +600,8 @@ bool ExternalFileScan::operator==(const BaseOperatorNode &node) {
   if (node.GetType() != OpType::QueryDerivedScan) return false;
   const auto &get = *static_cast<const ExternalFileScan *>(&node);
   return (get_id == get.get_id && format == get.format &&
-          file_name == get.file_name);
+          file_name == get.file_name && delimiter == get.delimiter &&
+          quote == get.quote && escape == get.escape);
 }
 
 hash_t ExternalFileScan::Hash() const {
@@ -588,6 +610,9 @@ hash_t ExternalFileScan::Hash() const {
   hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
   hash = HashUtil::CombineHashes(
       hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
   return hash;
 }
 
@@ -845,10 +870,14 @@ Operator PhysicalUpdate::make(
 // PhysicalExportExternalFile
 //===--------------------------------------------------------------------===//
 Operator PhysicalExportExternalFile::make(ExternalFileFormat format,
-                                          std::string file_name) {
+                                          std::string file_name, char delimiter,
+                                          char quote, char escape) {
   auto *export_op = new PhysicalExportExternalFile();
   export_op->format = format;
   export_op->file_name = file_name;
+  export_op->delimiter = delimiter;
+  export_op->quote = quote;
+  export_op->escape = escape;
   return Operator(export_op);
 }
 
@@ -856,7 +885,9 @@ bool PhysicalExportExternalFile::operator==(const BaseOperatorNode &node) {
   if (node.GetType() != OpType::ExportExternalFile) return false;
   const auto &export_op =
       *static_cast<const PhysicalExportExternalFile *>(&node);
-  return (format == export_op.format && file_name == export_op.file_name);
+  return (format == export_op.format && file_name == export_op.file_name &&
+          delimiter == export_op.delimiter && quote == export_op.quote &&
+          escape == export_op.escape);
 }
 
 hash_t PhysicalExportExternalFile::Hash() const {
@@ -864,6 +895,9 @@ hash_t PhysicalExportExternalFile::Hash() const {
   hash = HashUtil::CombineHashes(hash, HashUtil::Hash(&format));
   hash = HashUtil::CombineHashes(
       hash, HashUtil::HashBytes(file_name.data(), file_name.length()));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&delimiter, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&quote, 1));
+  hash = HashUtil::CombineHashes(hash, HashUtil::HashBytes(&escape, 1));
   return hash;
 }
 
diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp
index c2c2dcc399a..671ef94dabd 100644
--- a/src/optimizer/plan_generator.cpp
+++ b/src/optimizer/plan_generator.cpp
@@ -143,7 +143,8 @@ void PlanGenerator::Visit(const ExternalFileScan *op) {
 
       // Create the plan
       output_plan_.reset(
-          new planner::CSVScanPlan(op->file_name, std::move(cols)));
+          new planner::CSVScanPlan(op->file_name, std::move(cols),
+                                   op->delimiter, op->quote, op->escape));
       break;
     }
   }
@@ -388,7 +389,8 @@ void PlanGenerator::Visit(const PhysicalUpdate *op) {
 
 void PlanGenerator::Visit(const PhysicalExportExternalFile *op) {
   unique_ptr<planner::AbstractPlan> export_plan{
-      new planner::ExportExternalFilePlan(op->file_name)};
+      new planner::ExportExternalFilePlan(op->file_name, op->delimiter,
+                                          op->quote, op->escape)};
   export_plan->AddChild(move(children_plans_[0]));
   output_plan_ = move(export_plan);
 }
diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp
index 73c52f9266e..56925c3b117 100644
--- a/src/optimizer/query_to_operator_transformer.cpp
+++ b/src/optimizer/query_to_operator_transformer.cpp
@@ -367,7 +367,8 @@ void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
 
     auto get_op =
         std::make_shared<OperatorExpression>(LogicalExternalFileGet::make(
-            GetAndIncreaseGetId(), op->format, op->file_path));
+            GetAndIncreaseGetId(), op->format, op->file_path, op->delimiter,
+            op->quote, op->escape));
 
     auto target_table =
         catalog::Catalog::GetInstance()
@@ -386,8 +387,9 @@ void QueryToOperatorTransformer::Visit(parser::CopyStatement *op) {
     } else {
       op->table->Accept(this);
     }
-    auto export_op = std::make_shared<OperatorExpression>(
-        LogicalExportExternalFile::make(op->format, op->file_path));
+    auto export_op =
+        std::make_shared<OperatorExpression>(LogicalExportExternalFile::make(
+            op->format, op->file_path, op->delimiter, op->quote, op->escape));
     export_op->PushChild(output_expr_);
     output_expr_ = export_op;
   }
diff --git a/src/optimizer/rule_impls.cpp b/src/optimizer/rule_impls.cpp
index 9d0a4624c2a..33fb241df8d 100644
--- a/src/optimizer/rule_impls.cpp
+++ b/src/optimizer/rule_impls.cpp
@@ -455,7 +455,8 @@ void LogicalExternalFileGetToPhysical::Transform(
   const auto *get = input->Op().As<LogicalExternalFileGet>();
 
   auto result_plan = std::make_shared<OperatorExpression>(
-      ExternalFileScan::make(get->get_id, get->format, get->file_name));
+      ExternalFileScan::make(get->get_id, get->format, get->file_name,
+                             get->delimiter, get->quote, get->escape));
 
   PELOTON_ASSERT(input->Children().empty());
 
@@ -837,11 +838,12 @@ void LogicalExportToPhysicalExport::Transform(
     std::shared_ptr<OperatorExpression> input,
     std::vector<std::shared_ptr<OperatorExpression>> &transformed,
     UNUSED_ATTRIBUTE OptimizeContext *context) const {
-  const auto *logical_export = input->Op().As<LogicalExportExternalFile>();
+  const auto *export_op = input->Op().As<LogicalExportExternalFile>();
 
   auto result_plan =
       std::make_shared<OperatorExpression>(PhysicalExportExternalFile::make(
-          logical_export->format, logical_export->file_name));
+          export_op->format, export_op->file_name, export_op->delimiter,
+          export_op->quote, export_op->escape));
 
   std::vector<std::shared_ptr<OperatorExpression>> children = input->Children();
   PELOTON_ASSERT(children.size() == 1);
diff --git a/src/parser/postgresparser.cpp b/src/parser/postgresparser.cpp
index ffbea10e39d..069285fc1a4 100644
--- a/src/parser/postgresparser.cpp
+++ b/src/parser/postgresparser.cpp
@@ -1508,6 +1508,8 @@ parser::PrepareStatement *PostgresParser::PrepareTransform(PrepareStmt *root) {
 parser::CopyStatement *PostgresParser::CopyTransform(CopyStmt *root) {
   static constexpr char kDelimiterTok[] = "delimiter";
   static constexpr char kFormatTok[] = "format";
+  static constexpr char kQuoteTok[] = "quote";
+  static constexpr char kEscapeTok[] = "escape";
 
   // The main return value
   auto *result = new CopyStatement();
@@ -1538,12 +1540,24 @@ parser::CopyStatement *PostgresParser::CopyTransform(CopyStmt *root) {
       auto *format_val = reinterpret_cast<value *>(def_elem->arg);
       result->format = StringToExternalFileFormat(format_val->val.str);
     }
+
+    // Check quote
+    if (strncmp(def_elem->defname, kQuoteTok, sizeof(kQuoteTok)) == 0) {
+      auto *quote_val = reinterpret_cast<value *>(def_elem->arg);
+      result->quote = *quote_val->val.str;
+    }
+
+    // Check escape
+    if (strncmp(def_elem->defname, kEscapeTok, sizeof(kEscapeTok)) == 0) {
+      auto *escape_val = reinterpret_cast<value *>(def_elem->arg);
+      result->escape = *escape_val->val.str;
+    }
   }
 
   return result;
 }
 
-// Analyze statment is parsed with vacuum statment.
+// Analyze statment is parsed with vacuum statement.
 parser::AnalyzeStatement *PostgresParser::VacuumTransform(VacuumStmt *root) {
   if (root->options != VACOPT_ANALYZE) {
     throw NotImplementedException("Vacuum not supported.");
diff --git a/test/codegen/csv_scan_test.cpp b/test/codegen/csv_scan_test.cpp
index f40fc823e80..89da65be90d 100644
--- a/test/codegen/csv_scan_test.cpp
+++ b/test/codegen/csv_scan_test.cpp
@@ -14,6 +14,9 @@
 
 #include "codegen/util/csv_scanner.h"
 #include "common/timer.h"
+#include "function/date_functions.h"
+#include "function/numeric_functions.h"
+#include "function/string_functions.h"
 #include "util/file_util.h"
 
 namespace peloton {
@@ -92,25 +95,59 @@ TEST_F(CSVScanTest, SimpleNumericScan) {
 
 TEST_F(CSVScanTest, MixedStringScan) {
   // Create a temporary CSV file
-  std::vector<std::string> rows = {"1,2,3,test", "4,5,6,\"test\"",
-                                   "8,9,10,\"test\nnewline\ninquote\""};
+  std::vector<std::string> rows = {
+      "1,1994-01-01,3,test", "4,2018-01-01,6,\"test\"",
+      "8,2016-05-05,10,\"test\nnewline\ninquote\""};
   std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
-                                            {type::TypeId::INTEGER, false},
+                                            {type::TypeId::DATE, false},
                                             {type::TypeId::INTEGER, false},
                                             {type::TypeId::VARCHAR, false}};
 
-  uint32_t rows_read = 0;
+  std::vector<std::string> rows_read;
   IterateAsCSV(rows, types, [&rows_read, &types](
                                 const codegen::util::CSVScanner::Column *cols) {
-    rows_read++;
+    std::string row;
     for (uint32_t i = 0; i < types.size(); i++) {
       EXPECT_FALSE(cols[i].is_null);
       EXPECT_GT(cols[i].len, 0);
+      if (i > 0) row.append(",");
+      switch (types[i].type_id) {
+        case type::TypeId::INTEGER: {
+          row.append(std::to_string(function::NumericFunctions::InputInteger(
+              types[i], cols[i].ptr, cols[i].len)));
+          break;
+        }
+        case type::TypeId::DATE: {
+          auto raw_date = function::DateFunctions::InputDate(
+              types[i], cols[i].ptr, cols[i].len);
+          int32_t year, month, day;
+          function::DateFunctions::JulianToDate(raw_date, year, month, day);
+          row.append(StringUtil::Format("%u-%02u-%02u", year, month, day));
+          break;
+        }
+        case type::TypeId::VARCHAR: {
+          auto ret = function::StringFunctions::InputString(
+              types[i], cols[i].ptr, cols[i].len);
+          row.append(std::string{ret.str, ret.length - 1});
+          break;
+        }
+        default: {
+          throw Exception{StringUtil::Format(
+              "Did not expect column type '%s' in test. Did you forget to "
+              "modify the switch statement to handle a column type you've added"
+              "in the test case?",
+              TypeIdToString(types[i].type_id).c_str())};
+        }
+      }
     }
+    rows_read.push_back(row);
   });
 
   // Check
-  EXPECT_EQ(rows.size(), rows_read);
+  ASSERT_EQ(rows.size(), rows_read.size());
+  for (uint32_t i = 0; i < rows.size(); i++) {
+    EXPECT_EQ(rows[i], rows_read[i]);
+  }
 }
 
 }  // namespace test
diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp
index 87450683afc..0057721352b 100644
--- a/test/codegen/value_integrity_test.cpp
+++ b/test/codegen/value_integrity_test.cpp
@@ -12,6 +12,8 @@
 
 #include "codegen/testing_codegen_util.h"
 
+#include <random>
+
 #include "codegen/function_builder.h"
 #include "codegen/type/tinyint_type.h"
 #include "codegen/type/smallint_type.h"
@@ -198,8 +200,9 @@ void TestInputIntegral(
 
   // Default overflow tests
   std::vector<std::string> overflow_tests = {
-      std::to_string(static_cast<int64_t>(std::numeric_limits<T>::min()) - 1),
-      std::to_string(static_cast<int64_t>(std::numeric_limits<T>::max()) + 1)};
+      std::to_string(std::numeric_limits<T>::min()) + "1",
+      std::to_string(std::numeric_limits<T>::max()) + "1",
+      "123456789123456789123456789"};
   overflow_tests.insert(overflow_tests.end(), extra_overflow_tests.begin(),
                         extra_overflow_tests.end());
 
diff --git a/test/common/internal_types_test.cpp b/test/common/internal_types_test.cpp
index c9782514fc6..7a616315e20 100644
--- a/test/common/internal_types_test.cpp
+++ b/test/common/internal_types_test.cpp
@@ -325,8 +325,8 @@ TEST_F(InternalTypesTests, PlanNodeTypeTest) {
       PlanNodeType::ORDERBY, PlanNodeType::PROJECTION,
       PlanNodeType::MATERIALIZE, PlanNodeType::LIMIT, PlanNodeType::DISTINCT,
       PlanNodeType::SETOP, PlanNodeType::APPEND, PlanNodeType::AGGREGATE_V2,
-      PlanNodeType::HASH, PlanNodeType::RESULT, PlanNodeType::COPY,
-      PlanNodeType::MOCK};
+      PlanNodeType::HASH, PlanNodeType::RESULT,
+      PlanNodeType::EXPORT_EXTERNAL_FILE, PlanNodeType::MOCK};
 
   // Make sure that ToString and FromString work
   for (auto val : list) {
diff --git a/test/function/decimal_functions_test.cpp b/test/function/numeric_functions_test.cpp
similarity index 93%
rename from test/function/decimal_functions_test.cpp
rename to test/function/numeric_functions_test.cpp
index 1ef4f7cd87c..35622209fde 100644
--- a/test/function/decimal_functions_test.cpp
+++ b/test/function/numeric_functions_test.cpp
@@ -2,18 +2,18 @@
 //
 //                         Peloton
 //
-// decimal_functions_test.cpp
+// numeric_functions_test.cpp
 //
 // Identification: test/expression/decimal_functions_test.cpp
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
-#include <set>
+#include <cmath>
+#include <random>
 #include <string>
 #include <vector>
-#include <cmath>
 
 #include "common/harness.h"
 
@@ -21,7 +21,6 @@
 #include "common/internal_types.h"
 #include "type/value.h"
 #include "type/value_factory.h"
-#include "util/string_util.h"
 
 using ::testing::NotNull;
 using ::testing::Return;
@@ -29,9 +28,9 @@ using ::testing::Return;
 namespace peloton {
 namespace test {
 
-class DecimalFunctionsTests : public PelotonTest {};
+class NumericFunctionsTest : public PelotonTest {};
 
-TEST_F(DecimalFunctionsTests, SqrtTest) {
+TEST_F(NumericFunctionsTest, SqrtTest) {
   const double column_val = 9.0;
   const double expected = sqrt(9.0);
   std::vector<type::Value> args = {
@@ -47,7 +46,7 @@ TEST_F(DecimalFunctionsTests, SqrtTest) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(DecimalFunctionsTests, FloorTest) {
+TEST_F(NumericFunctionsTest, FloorTest) {
   // Testing Floor with DecimalTypes
   std::vector<double> inputs = {9.5, 3.3, -4.4, 0.0};
   std::vector<type::Value> args;
@@ -89,7 +88,7 @@ TEST_F(DecimalFunctionsTests, FloorTest) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(DecimalFunctionsTests, RoundTest) {
+TEST_F(NumericFunctionsTest, RoundTest) {
   std::vector<double> column_vals = {9.5, 3.3, -4.4, -5.5, 0.0};
   std::vector<type::Value> args;
   for (double val : column_vals) {
@@ -105,7 +104,7 @@ TEST_F(DecimalFunctionsTests, RoundTest) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(DecimalFunctionsTests,AbsTestDouble) {
+TEST_F(NumericFunctionsTest,AbsTestDouble) {
   std::vector<double> doubleTestInputs = {9.5, -2.5, -4.4, 0.0};
   std::vector<type::Value> args;
   for (double in : doubleTestInputs) {
@@ -121,7 +120,7 @@ TEST_F(DecimalFunctionsTests,AbsTestDouble) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(DecimalFunctionsTests, AbsTestInt) {
+TEST_F(NumericFunctionsTest, AbsTestInt) {
   std::vector<int64_t> bigIntTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int32_t> intTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int16_t> smallIntTestInputs = {-20, -15, -10, 0, 10, 20};
@@ -158,7 +157,7 @@ TEST_F(DecimalFunctionsTests, AbsTestInt) {
   }
 }
 
-TEST_F(DecimalFunctionsTests, CeilTestDouble) {
+TEST_F(NumericFunctionsTest, CeilTestDouble) {
   std::vector<double> doubleTestInputs = {-36.0, -35.222, -0.7, -0.5, -0.2,
                                           0.0, 0.2, 0.5, 0.7, 35.2, 36.0,
                                           37.2222};
@@ -175,7 +174,7 @@ TEST_F(DecimalFunctionsTests, CeilTestDouble) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(DecimalFunctionsTests, CeilTestInt) {
+TEST_F(NumericFunctionsTest, CeilTestInt) {
   std::vector<int64_t> bigIntTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int32_t> intTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int16_t> smallIntTestInputs = {-20, -15, -10, 0, 10, 20};

From 342c4caa69d41e37ba372df16f25d119c2e70505 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 22 May 2018 11:24:38 -0400
Subject: [PATCH 33/42] Fixes after rebase

---
 src/codegen/codegen.cpp                       |   2 +-
 src/codegen/operator/csv_scan_translator.cpp  | 103 +++++++++++++-----
 src/codegen/proxy/csv_scanner_proxy.cpp       |   7 +-
 .../codegen/operator/csv_scan_translator.h    |  12 +-
 4 files changed, 85 insertions(+), 39 deletions(-)

diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index 0f8b426b61c..b35838d16e1 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -195,7 +195,7 @@ llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2,
     memcmp_fn = RegisterBuiltin(
         kMemcmpFnName,
         llvm::TypeBuilder<decltype(memcmp), false>::get(GetContext()),
-        reinterpret_cast<void *>(printf));
+        reinterpret_cast<void *>(memcmp));
 #if GCC_AT_LEAST_6
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index 9e8880f70c0..f8687518057 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -20,6 +20,7 @@
 #include "codegen/proxy/csv_scanner_proxy.h"
 #include "codegen/proxy/runtime_functions_proxy.h"
 #include "codegen/type/sql_type.h"
+#include "codegen/vector.h"
 #include "planner/csv_scan_plan.h"
 
 namespace peloton {
@@ -28,23 +29,25 @@ namespace codegen {
 CSVScanTranslator::CSVScanTranslator(const planner::CSVScanPlan &scan,
                                      CompilationContext &context,
                                      Pipeline &pipeline)
-    : OperatorTranslator(context, pipeline), scan_(scan) {
+    : OperatorTranslator(scan, context, pipeline) {
   // Register the CSV scanner instance
-  auto &runtime_state = context.GetRuntimeState();
-  scanner_id_ = runtime_state.RegisterState(
+  auto &query_state = context.GetQueryState();
+  scanner_id_ = query_state.RegisterState(
       "csvScanner", CSVScannerProxy::GetType(GetCodeGen()));
 
   // Load information about the attributes output by the scan plan
-  scan_.GetAttributes(output_attributes_);
+  scan.GetAttributes(output_attributes_);
 }
 
-void CSVScanTranslator::InitializeState() {
+void CSVScanTranslator::InitializeQueryState() {
   auto &codegen = GetCodeGen();
 
+  auto &scan = GetPlanAs<planner::CSVScanPlan>();
+
   // Arguments
   llvm::Value *scanner_ptr = LoadStatePtr(scanner_id_);
-  llvm::Value *exec_ctx_ptr = GetCompilationContext().GetExecutorContextPtr();
-  llvm::Value *file_path = codegen.ConstString(scan_.GetFileName(), "filePath");
+  llvm::Value *exec_ctx_ptr = GetExecutorContextPtr();
+  llvm::Value *file_path = codegen.ConstString(scan.GetFileName(), "filePath");
 
   auto num_cols = static_cast<uint32_t>(output_attributes_.size());
 
@@ -71,20 +74,24 @@ void CSVScanTranslator::InitializeState() {
   // Cast the runtime type to an opaque void*. This is because we're calling
   // into pre-compiled C++ that doesn't know that the dynamically generated
   // RuntimeState* looks like.
-  llvm::Value *runtime_state_ptr = codegen->CreatePointerCast(
+  llvm::Value *query_state_ptr = codegen->CreatePointerCast(
       codegen.GetState(), codegen.VoidType()->getPointerTo());
 
   // Call CSVScanner::Init()
   codegen.Call(CSVScannerProxy::Init,
                {scanner_ptr, exec_ctx_ptr, file_path, output_col_types,
-                codegen.Const32(num_cols), consumer_func, runtime_state_ptr,
-                codegen.Const8(scan_.GetDelimiterChar()),
-                codegen.Const8(scan_.GetQuoteChar()),
-                codegen.Const8(scan_.GetEscapeChar())});
+                codegen.Const32(num_cols), consumer_func, query_state_ptr,
+                codegen.Const8(scan.GetDelimiterChar()),
+                codegen.Const8(scan.GetQuoteChar()),
+                codegen.Const8(scan.GetEscapeChar())});
 }
 
 namespace {
 
+/**
+ * This is a deferred column access class configured to load the contents of a
+ * given column.
+ */
 class CSVColumnAccess : public RowBatch::AttributeAccess {
  public:
   CSVColumnAccess(const planner::AttributeInfo *ai, llvm::Value *csv_columns,
@@ -94,6 +101,12 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
         null_str_(std::move(null_str)),
         runtime_null_(runtime_null_str) {}
 
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Accessors
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
   llvm::Value *Columns() const { return csv_columns_; }
 
   uint32_t ColumnIndex() const { return ai_->attribute_id; }
@@ -102,6 +115,25 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
 
   const type::SqlType &SqlType() const { return ai_->type.GetSqlType(); }
 
+  //////////////////////////////////////////////////////////////////////////////
+  ///
+  /// Logic
+  ///
+  //////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Check if a column's value is considered NULL. Given a pointer to the
+   * column's string value, and the length of the string, this function will
+   * check if the column's value is determined to be NULL. This is done by
+   * comparing the column's contents with the NULL string configured in the
+   * CSV scan plan (i.e., provided by the user).
+   *
+   * @param codegen The codegen instance
+   * @param data_ptr A pointer to the column's string value
+   * @param data_len The length of the column's string value
+   * @return True if the column is equivalent to the NULL string. False
+   * otherwise.
+   */
   llvm::Value *IsNull(CodeGen &codegen, llvm::Value *data_ptr,
                       llvm::Value *data_len) const {
     uint32_t null_str_len = static_cast<uint32_t>(null_str_.length());
@@ -127,6 +159,16 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
     return check_null.BuildPHI(cmp_res, codegen.ConstBool(false));
   }
 
+  /**
+   * Load the value of the given column with the given type, ignoring a null
+   * check.
+   *
+   * @param codegen The codegen instance
+   * @param type The SQL type of the column
+   * @param data_ptr A pointer to the column's string representation
+   * @param data_len The length of the column's string representation
+   * @return The parsed value
+   */
   Value LoadValueIgnoreNull(CodeGen &codegen, llvm::Value *type,
                             llvm::Value *data_ptr,
                             llvm::Value *data_len) const {
@@ -144,6 +186,15 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
     }
   }
 
+  /**
+   * Access this column in the given row. In reality, this function pulls out
+   * the column information from the CSVScanner state and loads/parses the
+   * column's value.
+   *
+   * @param codegen The codegen instance
+   * @param row The row. This isn't used.
+   * @return The value of the column
+   */
   Value Access(CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override {
     // Load the type, data pointer and length values for the column
     auto *type = codegen->CreateConstInBoundsGEP2_32(
@@ -178,22 +229,31 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
   }
 
  private:
+  // Information about the attribute
   const planner::AttributeInfo *ai_;
+
+  // A pointer to the array of columns
   llvm::Value *csv_columns_;
+
+  // The NULL string configured for the CSV scan
   const std::string null_str_;
+
+  // The runtime NULL string (a constant in LLVM)
   llvm::Value *runtime_null_;
 };
 
 }  // namespace
 
+// We define the callback/consumer function for CSV parsing here
 void CSVScanTranslator::DefineAuxiliaryFunctions() {
   CodeGen &codegen = GetCodeGen();
   CompilationContext &cc = GetCompilationContext();
 
+  auto &scan = GetPlanAs<planner::CSVScanPlan>();
+
   // Define consumer function here
   std::vector<FunctionDeclaration::ArgumentInfo> arg_types = {
-      {"runtimeState",
-       cc.GetRuntimeState().FinalizeType(codegen)->getPointerTo()}};
+      {"queryState", cc.GetQueryState().GetType()->getPointerTo()}};
   FunctionDeclaration decl{codegen.GetCodeContext(), "consumer",
                            FunctionDeclaration::Visibility::Internal,
                            codegen.VoidType(), arg_types};
@@ -209,13 +269,13 @@ void CSVScanTranslator::DefineAuxiliaryFunctions() {
     llvm::Value *cols = codegen->CreateLoad(codegen->CreateConstInBoundsGEP2_32(
         CSVScannerProxy::GetType(codegen), LoadStatePtr(scanner_id_), 0, 1));
 
-    llvm::Value *null_str = codegen.ConstString(scan_.GetNullString(), "null");
+    llvm::Value *null_str = codegen.ConstString(scan.GetNullString(), "null");
 
     // Add accessors for all columns into the row batch
     std::vector<CSVColumnAccess> column_accessors;
     for (uint32_t i = 0; i < output_attributes_.size(); i++) {
       column_accessors.emplace_back(output_attributes_[i], cols,
-                                    scan_.GetNullString(), null_str);
+                                    scan.GetNullString(), null_str);
     }
     for (uint32_t i = 0; i < output_attributes_.size(); i++) {
       one.AddAttribute(output_attributes_[i], &column_accessors[i]);
@@ -238,17 +298,10 @@ void CSVScanTranslator::Produce() const {
   GetCodeGen().Call(CSVScannerProxy::Produce, {scanner_ptr});
 }
 
-void CSVScanTranslator::TearDownState() {
+void CSVScanTranslator::TearDownQueryState() {
   auto *scanner_ptr = LoadStatePtr(scanner_id_);
   GetCodeGen().Call(CSVScannerProxy::Destroy, {scanner_ptr});
 }
 
-std::string CSVScanTranslator::GetName() const {
-  return StringUtil::Format(
-      "CSVScan(file: '%s', delimiter: '%c', quote: '%c', escape: '%c')",
-      scan_.GetFileName().c_str(), scan_.GetDelimiterChar(),
-      scan_.GetQuoteChar(), scan_.GetEscapeChar());
-}
-
 }  // namespace codegen
-}  // namespace peloton
\ No newline at end of file
+}  // namespace peloton
diff --git a/src/codegen/proxy/csv_scanner_proxy.cpp b/src/codegen/proxy/csv_scanner_proxy.cpp
index f57a11fe014..c13914fbecd 100644
--- a/src/codegen/proxy/csv_scanner_proxy.cpp
+++ b/src/codegen/proxy/csv_scanner_proxy.cpp
@@ -18,11 +18,10 @@
 namespace peloton {
 namespace codegen {
 
-DEFINE_TYPE(CSVScanner, "util::CSVScanner", MEMBER(opaque1), MEMBER(cols),
-            MEMBER(opaque2));
+DEFINE_TYPE(CSVScanner, "util::CSVScanner", opaque1, cols, opaque2);
 
-DEFINE_TYPE(CSVScannerColumn, "util::CSVScanner::Column", MEMBER(type),
-            MEMBER(ptr), MEMBER(len), MEMBER(is_null));
+DEFINE_TYPE(CSVScannerColumn, "util::CSVScanner::Column", type, ptr, len,
+            is_null);
 
 DEFINE_METHOD(peloton::codegen::util, CSVScanner, Init);
 DEFINE_METHOD(peloton::codegen::util, CSVScanner, Destroy);
diff --git a/src/include/codegen/operator/csv_scan_translator.h b/src/include/codegen/operator/csv_scan_translator.h
index 3389e1e5c09..9b7efca8fc6 100644
--- a/src/include/codegen/operator/csv_scan_translator.h
+++ b/src/include/codegen/operator/csv_scan_translator.h
@@ -36,7 +36,7 @@ class CSVScanTranslator : public OperatorTranslator {
   CSVScanTranslator(const planner::CSVScanPlan &scan,
                     CompilationContext &context, Pipeline &pipeline);
 
-  void InitializeState() override;
+  void InitializeQueryState() override;
 
   void DefineAuxiliaryFunctions() override;
 
@@ -48,20 +48,14 @@ class CSVScanTranslator : public OperatorTranslator {
   void Consume(ConsumerContext &, RowBatch::Row &) const override {}
 
   // Similar to InitializeState(), file scans don't have any state
-  void TearDownState() override;
-
-  // Get a stringified version of this translator
-  std::string GetName() const override;
+  void TearDownQueryState() override;
 
  private:
-  // The plan
-  const planner::CSVScanPlan &scan_;
-
   // The set of attributes output by the csv scan
   std::vector<const planner::AttributeInfo *> output_attributes_;
 
   // The scanner state ID
-  RuntimeState::StateID scanner_id_;
+  QueryState::Id scanner_id_;
 
   // The generated CSV scan consumer function
   llvm::Function *consumer_func_;

From 669cca35f7ca342dc7c142178194d00c500d0a8f Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Thu, 24 May 2018 17:39:00 -0400
Subject: [PATCH 34/42] Simple function to convert tuple to string CSV

---
 src/codegen/buffering_consumer.cpp       | 9 +++++++++
 src/include/codegen/buffering_consumer.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/src/codegen/buffering_consumer.cpp b/src/codegen/buffering_consumer.cpp
index 1edf1096b00..7316b8261f9 100644
--- a/src/codegen/buffering_consumer.cpp
+++ b/src/codegen/buffering_consumer.cpp
@@ -40,6 +40,15 @@ WrappedTuple &WrappedTuple::operator=(const WrappedTuple &o) {
   return *this;
 }
 
+std::string WrappedTuple::ToCSV() const {
+  std::string ret;
+  for (uint32_t i = 0; i < tuple_.size(); i++) {
+    if (i != 0) ret.append(",");
+    ret.append(tuple_[i].ToString());
+  }
+  return ret;
+}
+
 //===----------------------------------------------------------------------===//
 // BufferTuple() Proxy
 //===----------------------------------------------------------------------===//
diff --git a/src/include/codegen/buffering_consumer.h b/src/include/codegen/buffering_consumer.h
index 0e537486a3e..5238563c45e 100644
--- a/src/include/codegen/buffering_consumer.h
+++ b/src/include/codegen/buffering_consumer.h
@@ -42,6 +42,8 @@ class WrappedTuple : public ContainerTuple<std::vector<peloton::type::Value>> {
   // Assignment
   WrappedTuple &operator=(const WrappedTuple &o);
 
+  std::string ToCSV() const;
+
   // The tuple
   std::vector<peloton::type::Value> tuple_;
 };

From 3cd74b64dd8e953cc4194a86977e9e56336babf9 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Thu, 24 May 2018 17:39:31 -0400
Subject: [PATCH 35/42] Fix void* -> i8* conversion

---
 src/codegen/operator/csv_scan_translator.cpp | 4 ++--
 src/include/codegen/proxy/type_builder.h     | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/codegen/operator/csv_scan_translator.cpp b/src/codegen/operator/csv_scan_translator.cpp
index f8687518057..8084fca9bb7 100644
--- a/src/codegen/operator/csv_scan_translator.cpp
+++ b/src/codegen/operator/csv_scan_translator.cpp
@@ -74,8 +74,8 @@ void CSVScanTranslator::InitializeQueryState() {
   // Cast the runtime type to an opaque void*. This is because we're calling
   // into pre-compiled C++ that doesn't know that the dynamically generated
   // RuntimeState* looks like.
-  llvm::Value *query_state_ptr = codegen->CreatePointerCast(
-      codegen.GetState(), codegen.VoidType()->getPointerTo());
+  llvm::Value *query_state_ptr =
+      codegen->CreatePointerCast(codegen.GetState(), codegen.VoidPtrType());
 
   // Call CSVScanner::Init()
   codegen.Call(CSVScannerProxy::Init,
diff --git a/src/include/codegen/proxy/type_builder.h b/src/include/codegen/proxy/type_builder.h
index caab2705f72..cc30f6b5f97 100644
--- a/src/include/codegen/proxy/type_builder.h
+++ b/src/include/codegen/proxy/type_builder.h
@@ -53,6 +53,9 @@ DEFINE_PRIMITIVE_BUILDER(unsigned long, Int64);
 DEFINE_PRIMITIVE_BUILDER(long long, Int64);
 DEFINE_PRIMITIVE_BUILDER(unsigned long long, Int64);
 DEFINE_PRIMITIVE_BUILDER(double, Double);
+DEFINE_PRIMITIVE_BUILDER(void *, VoidPtr);
+DEFINE_PRIMITIVE_BUILDER(char *, CharPtr);
+DEFINE_PRIMITIVE_BUILDER(unsigned char *, CharPtr);
 #undef DEFINE_PRIMITIVE_BUILDER
 
 /// Const

From dbb042efa80913b28da5e9ad57aed5ec2b09204e Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Thu, 24 May 2018 17:40:02 -0400
Subject: [PATCH 36/42] More tests

---
 test/codegen/csv_scan_test.cpp              |   6 --
 test/codegen/csv_scan_translator_test.cpp   | 102 ++++++++++++++++++++
 test/codegen/testing_codegen_util.cpp       |   5 +
 test/include/codegen/testing_codegen_util.h |  23 +++--
 4 files changed, 124 insertions(+), 12 deletions(-)
 create mode 100644 test/codegen/csv_scan_translator_test.cpp

diff --git a/test/codegen/csv_scan_test.cpp b/test/codegen/csv_scan_test.cpp
index 89da65be90d..2cebff0873e 100644
--- a/test/codegen/csv_scan_test.cpp
+++ b/test/codegen/csv_scan_test.cpp
@@ -32,12 +32,6 @@ struct State {
   CallbackFn callback;
 };
 
-struct TempFileHandle {
-  std::string name;
-  TempFileHandle(std::string _name) : name(_name) {}
-  ~TempFileHandle() { boost::filesystem::remove(name); }
-};
-
 void CSVRowCallback(void *s) {
   auto *state = reinterpret_cast<State *>(s);
   state->callback(state->scanner->GetColumns());
diff --git a/test/codegen/csv_scan_translator_test.cpp b/test/codegen/csv_scan_translator_test.cpp
new file mode 100644
index 00000000000..66da8ead0d5
--- /dev/null
+++ b/test/codegen/csv_scan_translator_test.cpp
@@ -0,0 +1,102 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+//
+// csv_scan_translator_test.cpp
+//
+// Identification: test/codegen/csv_scan_translator_test.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/testing_codegen_util.h"
+
+#include "planner/csv_scan_plan.h"
+#include "planner/insert_plan.h"
+#include "planner/seq_scan_plan.h"
+#include "util/file_util.h"
+
+namespace peloton {
+namespace test {
+
+class CSVScanTranslatorTest : public PelotonCodeGenTest {
+ public:
+  CSVScanTranslatorTest() : PelotonCodeGenTest() {}
+
+  oid_t TestTableId1() { return test_table_oids[0]; }
+  uint32_t NumRowsInTestTable() const { return num_rows_to_insert; }
+
+ private:
+  uint32_t num_rows_to_insert = 64;
+};
+
+TEST_F(CSVScanTranslatorTest, IntCsvScan) {
+  // Test input
+  std::vector<std::string> rows = {"1,2,3.9,four",
+                                   "5,6,7.4,eight",
+                                   "9,10,11.1,\"twelve\""};
+  std::string csv_data;
+  for (const auto &row : rows) {
+    csv_data.append(row).append("\n");
+  }
+
+  ///////////////////////////////////////////////////
+  /// First insert contents of CSV into test table
+  ///////////////////////////////////////////////////
+  {
+    // Write the contents into a temporary file
+    TempFileHandle fh{FileUtil::WriteTempFile(csv_data, "", "tmp")};
+
+    // clang-format off
+    // NOTE: this schema has to match that of the test table!
+    std::vector<planner::CSVScanPlan::ColumnInfo> cols = {
+        planner::CSVScanPlan::ColumnInfo{.name = "1", .type = peloton::type::TypeId::INTEGER},
+        planner::CSVScanPlan::ColumnInfo{.name = "2", .type = peloton::type::TypeId::INTEGER},
+        planner::CSVScanPlan::ColumnInfo{.name = "3", .type = peloton::type::TypeId::DECIMAL},
+        planner::CSVScanPlan::ColumnInfo{.name = "4", .type = peloton::type::TypeId::VARCHAR},
+    };
+    // clang-format on
+    std::unique_ptr<planner::AbstractPlan> csv_scan{
+        new planner::CSVScanPlan(fh.name, std::move(cols), ',')};
+    std::unique_ptr<planner::AbstractPlan> insert{
+        new planner::InsertPlan(&GetTestTable(TestTableId1()))};
+
+    insert->AddChild(std::move(csv_scan));
+
+    planner::BindingContext ctx;
+    insert->PerformBinding(ctx);
+
+    codegen::BufferingConsumer consumer{{0, 1, 2, 3}, ctx};
+
+    // Execute insert
+    CompileAndExecute(*insert, consumer);
+    ASSERT_EQ(0, consumer.GetOutputTuples().size());
+  }
+
+  ///////////////////////////////////////////////////
+  /// Now scan test table, comparing results
+  ///////////////////////////////////////////////////
+  {
+    std::unique_ptr<planner::AbstractPlan> scan{new planner::SeqScanPlan(
+        &GetTestTable(TestTableId1()), nullptr, {0, 1, 2, 3})};
+
+    planner::BindingContext ctx;
+    scan->PerformBinding(ctx);
+
+    codegen::BufferingConsumer consumer{{0, 1, 2, 3}, ctx};
+
+    // Execute insert
+    CompileAndExecute(*scan, consumer);
+
+    const auto &output = consumer.GetOutputTuples();
+    ASSERT_EQ(rows.size(), output.size());
+    for (uint32_t i = 0; i < rows.size(); i++) {
+      EXPECT_EQ(rows[i], output[i].ToCSV());
+    }
+  }
+}
+
+}  // namespace test
+}  // namespace peloton
diff --git a/test/codegen/testing_codegen_util.cpp b/test/codegen/testing_codegen_util.cpp
index 316b46331d6..a19598e33ed 100644
--- a/test/codegen/testing_codegen_util.cpp
+++ b/test/codegen/testing_codegen_util.cpp
@@ -12,6 +12,8 @@
 
 #include "codegen/testing_codegen_util.h"
 
+#include <boost/filesystem.hpp>
+
 #include "catalog/table_catalog.h"
 #include "codegen/proxy/runtime_functions_proxy.h"
 #include "codegen/proxy/value_proxy.h"
@@ -28,6 +30,9 @@
 namespace peloton {
 namespace test {
 
+TempFileHandle::TempFileHandle(std::string _name) : name(_name) {}
+TempFileHandle::~TempFileHandle() { boost::filesystem::remove(name); }
+
 //===----------------------------------------------------------------------===//
 // PELOTON CODEGEN TEST
 //===----------------------------------------------------------------------===//
diff --git a/test/include/codegen/testing_codegen_util.h b/test/include/codegen/testing_codegen_util.h
index 5dc427f03b1..a017fdedfa5 100644
--- a/test/include/codegen/testing_codegen_util.h
+++ b/test/include/codegen/testing_codegen_util.h
@@ -6,7 +6,7 @@
 //
 // Identification: test/include/codegen/testing_codegen_util.h
 //
-// Copyright (c) 2015-17, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -40,11 +40,22 @@ using ConstExpressionPtr =
 using PlanPtr = std::unique_ptr<planner::AbstractPlan>;
 using ConstPlanPtr = std::unique_ptr<const planner::AbstractPlan>;
 
-//===----------------------------------------------------------------------===//
-// Common base class for all codegen tests. This class four test tables that all
-// the codegen components use. Their ID's are available through the oid_t
-// enumeration.
-//===----------------------------------------------------------------------===//
+/**
+ * This is a scoped file handle that automatically deletes/removes the file
+ * with the given name when the class goes out of scope and the destructor is
+ * called.
+ */
+struct TempFileHandle {
+  std::string name;
+  TempFileHandle(std::string _name);
+  ~TempFileHandle();
+};
+
+/**
+ * Common base class for all codegen tests. This class four test tables that all
+ * the codegen components use. Their ID's are available through the oid_t
+ * enumeration.
+ */
 class PelotonCodeGenTest : public PelotonTest {
  public:
   std::string test_db_name = "peloton_codegen";

From 985d329d5f4a82161ffa24cc9e2af016634a3b64 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 29 May 2018 16:59:51 -0400
Subject: [PATCH 37/42] Address reviews

---
 src/codegen/codegen.cpp                     | 27 ++++++++++++---------
 src/include/codegen/codegen.h               | 14 +++++------
 test/function/numeric_functions_test.cpp    | 16 ++++++------
 test/include/codegen/testing_codegen_util.h |  8 +++---
 4 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index b35838d16e1..b810fd4c092 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -69,17 +69,6 @@ llvm::Value *CodeGen::ConstString(const std::string &str_val,
   return GetBuilder().CreateInBoundsGEP(global_var, {Const32(0), Const32(0)});
 }
 
-llvm::Value *CodeGen::ConstType(const type::Type &type) {
-  auto iter = type_variables_.find(type);
-  if (iter != type_variables_.end()) {
-    return iter->second;
-  }
-  const type::Type t = type;
-  llvm::Value *ret = ConstGenericBytes(&type, sizeof(type), "type");
-  type_variables_.insert(std::make_pair(t, ret));
-  return ret;
-}
-
 llvm::Value *CodeGen::ConstGenericBytes(const void *data, uint32_t length,
                                         const std::string &name) const {
   // Create the constant data array that wraps the input data
@@ -164,6 +153,14 @@ llvm::Value *CodeGen::Printf(const std::string &format,
   auto *printf_fn = LookupBuiltin("printf");
   if (printf_fn == nullptr) {
 #if GCC_AT_LEAST_6
+// In newer GCC versions (i.e., GCC 6+), function attributes are part of the
+// type system and are attached to the function signature. For example, printf()
+// comes with the "noexcept" attribute. Moreover, GCC 6+ will complain when
+// attributes attached to a function (e.g., noexcept()) are not used at
+// their call-site. Below, we use decltype(printf) to get the C/C++ function
+// type of printf(...), but we discard the attributes since we don't need
+// them. Hence, on GCC 6+, compilation will fail without adding the
+// "-Wignored-attributes" flag. So, we add it here only.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wignored-attributes"
 #endif
@@ -189,6 +186,14 @@ llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2,
   auto *memcmp_fn = LookupBuiltin(kMemcmpFnName);
   if (memcmp_fn == nullptr) {
 #if GCC_AT_LEAST_6
+// In newer GCC versions (i.e., GCC 6+), function attributes are part of the
+// type system and are attached to the function signature. For example, memcmp()
+// comes with the "throw()" attribute, among many others. Moreover, GCC 6+ will
+// complain when attributes attached to a function are not used at their
+// call-site. Below, we use decltype(memcmp) to get the C/C++ function type
+// of memcmp(...), but we discard the attributes since we don't need them.
+// Hence, on GCC 6+, compilation will fail without adding the
+// "-Wignored-attributes" flag. So, we add it here only.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wignored-attributes"
 #endif
diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h
index 037e01dbe11..9a56edf5dfd 100644
--- a/src/include/codegen/codegen.h
+++ b/src/include/codegen/codegen.h
@@ -59,9 +59,10 @@ class CppProxyMember {
   uint32_t slot_;
 };
 
-//===----------------------------------------------------------------------===//
-// The main wrapper around LLVM's IR Builder to generate IR
-//===----------------------------------------------------------------------===//
+/**
+ * The main API used to generate code in Peloton. Provides a thin wrapper around
+ * LLVM's IR Builder to generate IR.
+ */
 class CodeGen {
  public:
   /// Constructor and destructor
@@ -89,7 +90,8 @@ class CodeGen {
   }
   llvm::Type *ArrayType(llvm::Type *type, uint32_t num_elements) const;
 
-  /// Constant wrappers for bool, int8, int16, int32, int64, strings, and null
+  /// Functions to return LLVM values for constant boolean, int8, int16, int32,
+  // int64, strings, and null values.
   llvm::Constant *ConstBool(bool val) const;
   llvm::Constant *Const8(int8_t val) const;
   llvm::Constant *Const16(int16_t val) const;
@@ -98,7 +100,6 @@ class CodeGen {
   llvm::Constant *ConstDouble(double val) const;
   llvm::Value *ConstString(const std::string &str_val,
                            const std::string &name) const;
-  llvm::Value *ConstType(const type::Type &type);
   llvm::Value *ConstGenericBytes(const void *data, uint32_t length,
                                  const std::string &name) const;
   llvm::Constant *Null(llvm::Type *type) const;
@@ -195,9 +196,6 @@ class CodeGen {
  private:
   // The context/module where all the code this class produces goes
   CodeContext &code_context_;
-
-  std::unordered_map<type::Type, llvm::Value *, type::TypeHasher,
-                     type::TypeEquality> type_variables_;
 };
 
 }  // namespace codegen
diff --git a/test/function/numeric_functions_test.cpp b/test/function/numeric_functions_test.cpp
index 35622209fde..be700b4fa9f 100644
--- a/test/function/numeric_functions_test.cpp
+++ b/test/function/numeric_functions_test.cpp
@@ -28,9 +28,9 @@ using ::testing::Return;
 namespace peloton {
 namespace test {
 
-class NumericFunctionsTest : public PelotonTest {};
+class NumericFunctionsTests : public PelotonTest {};
 
-TEST_F(NumericFunctionsTest, SqrtTest) {
+TEST_F(NumericFunctionsTests, SqrtTest) {
   const double column_val = 9.0;
   const double expected = sqrt(9.0);
   std::vector<type::Value> args = {
@@ -46,7 +46,7 @@ TEST_F(NumericFunctionsTest, SqrtTest) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(NumericFunctionsTest, FloorTest) {
+TEST_F(NumericFunctionsTests, FloorTest) {
   // Testing Floor with DecimalTypes
   std::vector<double> inputs = {9.5, 3.3, -4.4, 0.0};
   std::vector<type::Value> args;
@@ -88,7 +88,7 @@ TEST_F(NumericFunctionsTest, FloorTest) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(NumericFunctionsTest, RoundTest) {
+TEST_F(NumericFunctionsTests, RoundTest) {
   std::vector<double> column_vals = {9.5, 3.3, -4.4, -5.5, 0.0};
   std::vector<type::Value> args;
   for (double val : column_vals) {
@@ -104,7 +104,7 @@ TEST_F(NumericFunctionsTest, RoundTest) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(NumericFunctionsTest,AbsTestDouble) {
+TEST_F(NumericFunctionsTests,AbsTestDouble) {
   std::vector<double> doubleTestInputs = {9.5, -2.5, -4.4, 0.0};
   std::vector<type::Value> args;
   for (double in : doubleTestInputs) {
@@ -120,7 +120,7 @@ TEST_F(NumericFunctionsTest,AbsTestDouble) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(NumericFunctionsTest, AbsTestInt) {
+TEST_F(NumericFunctionsTests, AbsTestInt) {
   std::vector<int64_t> bigIntTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int32_t> intTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int16_t> smallIntTestInputs = {-20, -15, -10, 0, 10, 20};
@@ -157,7 +157,7 @@ TEST_F(NumericFunctionsTest, AbsTestInt) {
   }
 }
 
-TEST_F(NumericFunctionsTest, CeilTestDouble) {
+TEST_F(NumericFunctionsTests, CeilTestDouble) {
   std::vector<double> doubleTestInputs = {-36.0, -35.222, -0.7, -0.5, -0.2,
                                           0.0, 0.2, 0.5, 0.7, 35.2, 36.0,
                                           37.2222};
@@ -174,7 +174,7 @@ TEST_F(NumericFunctionsTest, CeilTestDouble) {
   EXPECT_TRUE(result.IsNull());
 }
 
-TEST_F(NumericFunctionsTest, CeilTestInt) {
+TEST_F(NumericFunctionsTests, CeilTestInt) {
   std::vector<int64_t> bigIntTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int32_t> intTestInputs = {-20, -15, -10, 0, 10, 20};
   std::vector<int16_t> smallIntTestInputs = {-20, -15, -10, 0, 10, 20};
diff --git a/test/include/codegen/testing_codegen_util.h b/test/include/codegen/testing_codegen_util.h
index a017fdedfa5..c61a47e67c2 100644
--- a/test/include/codegen/testing_codegen_util.h
+++ b/test/include/codegen/testing_codegen_util.h
@@ -52,9 +52,11 @@ struct TempFileHandle {
 };
 
 /**
- * Common base class for all codegen tests. This class four test tables that all
- * the codegen components use. Their ID's are available through the oid_t
- * enumeration.
+ * Common base class for all codegen tests. This class has four test tables
+ * whose IDs and names are stored in test_table_oids and test_table_names,
+ * respectively. The test tables all have the exact schema: column "a" and "b"
+ * are integers, column "c" is a decimal, and column "d" is a varchar. The table
+ * with the highest OID also has a primary key on column "a".
  */
 class PelotonCodeGenTest : public PelotonTest {
  public:

From 70f94eccf9f7262cccdc052af2a3fa91dee08a57 Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 29 May 2018 17:03:35 -0400
Subject: [PATCH 38/42] Revert "Removed serialization"

This reverts commit d055ff94b02aef2ccaba86ceee7bb96ce6266d6a.
---
 src/network/service/peloton_service.cpp | 4 ++--
 test/network/rpc_queryplan_test.cpp     | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/network/service/peloton_service.cpp b/src/network/service/peloton_service.cpp
index 90a5b81ee8f..9e5095a0916 100644
--- a/src/network/service/peloton_service.cpp
+++ b/src/network/service/peloton_service.cpp
@@ -357,7 +357,7 @@ void PelotonService::QueryPlan(::google::protobuf::RpcController *controller,
         LOG_ERROR("Queryplan recived desen't have type");
         break;
       }
-#if 0
+
       case PlanNodeType::SEQSCAN: {
         LOG_TRACE("SEQSCAN revieved");
         std::string plan = request->plan();
@@ -400,7 +400,7 @@ void PelotonService::QueryPlan(::google::protobuf::RpcController *controller,
 
         break;
       }
-#endif
+
       default: {
         LOG_ERROR("Queryplan recived :: Unsupported TYPE: %s",
                   PlanNodeTypeToString(plan_type).c_str());
diff --git a/test/network/rpc_queryplan_test.cpp b/test/network/rpc_queryplan_test.cpp
index 90b55e06668..cb11891a1db 100644
--- a/test/network/rpc_queryplan_test.cpp
+++ b/test/network/rpc_queryplan_test.cpp
@@ -22,7 +22,6 @@ namespace test {
 class RpcQueryPlanTests : public PelotonTest {};
 
 TEST_F(RpcQueryPlanTests, BasicTest) {
-#if 0
   peloton::planner::SeqScanPlan mapped_plan_ptr;
 
   const peloton::PlanNodeType type = mapped_plan_ptr.GetPlanNodeType();
@@ -33,7 +32,6 @@ TEST_F(RpcQueryPlanTests, BasicTest) {
   bool serialize = mapped_plan_ptr.SerializeTo(output_plan);
   // Becuase the plan is not completed, so it is false
   EXPECT_FALSE(serialize);
-#endif
 }
 }
 }

From b8d0c34c79e0ede45914e3a4db02bf3b2bce15cd Mon Sep 17 00:00:00 2001
From: Prashanth Menon <pmenon@cs.cmu.edu>
Date: Tue, 29 May 2018 17:03:38 -0400
Subject: [PATCH 39/42] Revert "Removed unused serialization stuff from plan
 nodes"

This reverts commit 74427c78151e6102a5dc61bf2278dcc0cc5f82f3.
---
 src/include/planner/abstract_plan.h      |  27 +++
 src/include/planner/abstract_scan_plan.h |   2 +
 src/include/planner/seq_scan_plan.h      |  19 +-
 src/planner/abstract_plan.cpp            |   3 +
 src/planner/seq_scan_plan.cpp            | 227 ++++++++++++++++++++++-
 5 files changed, 267 insertions(+), 11 deletions(-)

diff --git a/src/include/planner/abstract_plan.h b/src/include/planner/abstract_plan.h
index bb1428f81d4..c257b20d830 100644
--- a/src/include/planner/abstract_plan.h
+++ b/src/include/planner/abstract_plan.h
@@ -20,6 +20,8 @@
 #include "codegen/query_parameters_map.h"
 #include "common/printable.h"
 #include "planner/binding_context.h"
+#include "type/serializeio.h"
+#include "type/serializer.h"
 #include "common/internal_types.h"
 #include "type/value.h"
 #include "util/hash_util.h"
@@ -64,6 +66,8 @@ class AbstractPlan : public Printable {
 
   const AbstractPlan *GetChild(uint32_t child_index) const;
 
+  const AbstractPlan *GetParent() const;
+  
   //===--------------------------------------------------------------------===//
   // Accessors
   //===--------------------------------------------------------------------===//
@@ -107,6 +111,23 @@ class AbstractPlan : public Printable {
 
   virtual std::unique_ptr<AbstractPlan> Copy() const = 0;
 
+  // A plan will be sent to anther node via serialization
+  // So serialization should be implemented by the derived classes
+
+  //===--------------------------------------------------------------------===//
+  // Serialization/Deserialization
+  // Each sub-class will have to implement these functions
+  // After the implementation for each sub-class, we should set these to pure
+  // virtual
+  //===--------------------------------------------------------------------===//
+  virtual bool SerializeTo(SerializeOutput &output UNUSED_ATTRIBUTE) const {
+    return false;
+  }
+  virtual bool DeserializeFrom(SerializeInput &input UNUSED_ATTRIBUTE) {
+    return false;
+  }
+  virtual int SerializeSize() const { return 0; }
+
   virtual hash_t Hash() const;
 
   virtual bool operator==(const AbstractPlan &rhs) const;
@@ -122,10 +143,16 @@ class AbstractPlan : public Printable {
     }
   }
 
+ protected:
+  // only used by its derived classes (when deserialization)
+  AbstractPlan *Parent() const { return parent_; }
+
  private:
   // A plan node can have multiple children
   std::vector<std::unique_ptr<AbstractPlan>> children_;
 
+  AbstractPlan *parent_ = nullptr;
+  
   // TODO: This field is harded coded now. This needs to be changed when
   // optimizer has the cost model and cardinality estimation
   int estimated_cardinality_ = 500000;
diff --git a/src/include/planner/abstract_scan_plan.h b/src/include/planner/abstract_scan_plan.h
index b770d66b7fe..7241f844c74 100644
--- a/src/include/planner/abstract_scan_plan.h
+++ b/src/include/planner/abstract_scan_plan.h
@@ -71,6 +71,8 @@ class AbstractScan : public AbstractPlan {
  protected:
   void SetTargetTable(storage::DataTable *table) { target_table_ = table; }
 
+  void AddColumnId(oid_t col_id) { column_ids_.push_back(col_id); }
+
   void SetPredicate(expression::AbstractExpression *predicate) {
     predicate_ = std::unique_ptr<expression::AbstractExpression>(predicate);
   }
diff --git a/src/include/planner/seq_scan_plan.h b/src/include/planner/seq_scan_plan.h
index fed2f12d783..9f0f411f2cb 100644
--- a/src/include/planner/seq_scan_plan.h
+++ b/src/include/planner/seq_scan_plan.h
@@ -18,20 +18,10 @@
 
 #include "common/internal_types.h"
 #include "common/logger.h"
-#include "expression/abstract_expression.h"
 #include "planner/abstract_scan_plan.h"
 #include "type/serializer.h"
 
 namespace peloton {
-
-namespace expression {
-class Parameter;
-}  // namespace expression
-
-namespace storage {
-class DataTable;
-}  // namespace storage
-
 namespace planner {
 
 class SeqScanPlan : public AbstractScan {
@@ -58,6 +48,15 @@ class SeqScanPlan : public AbstractScan {
 
   void SetParameterValues(std::vector<type::Value> *values) override;
 
+  //===--------------------------------------------------------------------===//
+  // Serialization/Deserialization
+  //===--------------------------------------------------------------------===//
+  bool SerializeTo(SerializeOutput &output) const override;
+  bool DeserializeFrom(SerializeInput &input) override;
+
+  /* For init SerializeOutput */
+  int SerializeSize() const override;
+
   std::unique_ptr<AbstractPlan> Copy() const override {
     auto *new_plan =
         new SeqScanPlan(GetTable(), GetPredicate()->Copy(), GetColumnIds());
diff --git a/src/planner/abstract_plan.cpp b/src/planner/abstract_plan.cpp
index 49014a6f471..241323bb0e9 100644
--- a/src/planner/abstract_plan.cpp
+++ b/src/planner/abstract_plan.cpp
@@ -14,6 +14,7 @@
 
 #include "common/logger.h"
 #include "common/macros.h"
+#include "expression/expression_util.h"
 #include "util/hash_util.h"
 
 namespace peloton {
@@ -37,6 +38,8 @@ const AbstractPlan *AbstractPlan::GetChild(uint32_t child_index) const {
   return children_[child_index].get();
 }
 
+const AbstractPlan *AbstractPlan::GetParent() const { return parent_; }
+
 // Get a string representation of this plan
 std::ostream &operator<<(std::ostream &os, const AbstractPlan &plan) {
   os << PlanNodeTypeToString(plan.GetPlanNodeType());
diff --git a/src/planner/seq_scan_plan.cpp b/src/planner/seq_scan_plan.cpp
index 7c3ba3d8a14..62e8299aae7 100644
--- a/src/planner/seq_scan_plan.cpp
+++ b/src/planner/seq_scan_plan.cpp
@@ -6,21 +6,246 @@
 //
 // Identification: src/planner/seq_scan_plan.cpp
 //
-// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+// Copyright (c) 2015-17, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
 #include "planner/seq_scan_plan.h"
 
+#include "parser/select_statement.h"
+#include "catalog/manager.h"
+#include "catalog/schema.h"
 #include "common/logger.h"
 #include "common/macros.h"
 #include "expression/abstract_expression.h"
+#include "expression/expression_util.h"
 #include "storage/data_table.h"
+#include "storage/storage_manager.h"
 #include "common/internal_types.h"
 
 namespace peloton {
 namespace planner {
 
+//===--------------------------------------------------------------------===//
+// Serialization/Deserialization
+//===--------------------------------------------------------------------===//
+
+/**
+ * The SeqScanPlan has the following members:
+ *   database_id, table_id, predicate, column_id, parent(might be NULL)
+ * TODO: SeqScanPlan doesn't have children, so we don't need to handle it
+ *
+ * Therefore a SeqScanPlan is serialized as:
+ * [(int) total size]
+ * [(int8_t) plan type]
+ * [(int) database_id]
+ * [(int) table_id]
+ * [(int) num column_id]
+ * [(int) column id...]
+ * [(int8_t) expr type]     : if invalid, predicate is null
+ * [(bytes) predicate]      : predicate is Expression
+ * [(int8_t) plan type]     : if invalid, parent is null
+ * [(bytes) parent]         : parent is also a plan
+ *
+ * TODO: parent_ seems never be set or used
+ */
+
+bool SeqScanPlan::SerializeTo(SerializeOutput &output) const {
+  // A placeholder for the total size written at the end
+  int start = output.Position();
+  output.WriteInt(-1);
+
+  // Write the SeqScanPlan type
+  PlanNodeType plan_type = GetPlanNodeType();
+  output.WriteByte(static_cast<int8_t>(plan_type));
+
+  // Write database id and table id
+  if (!GetTable()) {
+    // The plan is not completed
+    return false;
+  }
+  oid_t database_id = GetTable()->GetDatabaseOid();
+  oid_t table_id = GetTable()->GetOid();
+
+  output.WriteInt(static_cast<int>(database_id));
+  output.WriteInt(static_cast<int>(table_id));
+
+  // If column has 0 item, just write the columnid_count with 0
+  int columnid_count = GetColumnIds().size();
+  output.WriteInt(columnid_count);
+
+  // If column has 0 item, nothing happens here
+  for (int it = 0; it < columnid_count; it++) {
+    oid_t col_id = GetColumnIds()[it];
+    output.WriteInt(static_cast<int>(col_id));
+  }
+
+  // Write predicate
+  if (GetPredicate() == nullptr) {
+    // Write the type
+    output.WriteByte(static_cast<int8_t>(ExpressionType::INVALID));
+  } else {
+    // Write the expression type
+    ExpressionType expr_type = GetPredicate()->GetExpressionType();
+    output.WriteByte(static_cast<int8_t>(expr_type));
+  }
+
+  // Write parent, but parent seems never be set or used right now
+  if (GetParent() == nullptr) {
+    // Write the type
+    output.WriteByte(static_cast<int8_t>(PlanNodeType::INVALID));
+  } else {
+    // Write the parent type
+    PlanNodeType parent_type = GetParent()->GetPlanNodeType();
+    output.WriteByte(static_cast<int8_t>(parent_type));
+
+    // Write parent
+    GetParent()->SerializeTo(output);
+  }
+
+  // Write the total length
+  int32_t sz = static_cast<int32_t>(output.Position() - start - sizeof(int));
+  PELOTON_ASSERT(sz > 0);
+  output.WriteIntAt(start, sz);
+
+  return true;
+}
+
+/**
+   * Therefore a SeqScanPlan is serialized as:
+   * [(int) total size]
+   * [(int8_t) plan type]
+   * [(int) database_id]
+   * [(int) table_id]
+   * [(int) num column_id]
+   * [(int) column id...]
+   * [(int8_t) expr type]     : if invalid, predicate is null
+   * [(bytes) predicate]      : predicate is Expression
+   * [(int8_t) plan type]     : if invalid, parent is null
+   * [(bytes) parent]         : parent is also a plan
+ */
+bool SeqScanPlan::DeserializeFrom(SerializeInput &input) {
+  // Read the size of SeqScanPlan class
+  input.ReadInt();
+
+  // Read the type
+  UNUSED_ATTRIBUTE PlanNodeType plan_type =
+      (PlanNodeType)input.ReadEnumInSingleByte();
+  PELOTON_ASSERT(plan_type == GetPlanNodeType());
+
+  // Read database id
+  oid_t database_oid = input.ReadInt();
+
+  // Read table id
+  oid_t table_oid = input.ReadInt();
+
+  // Get table and set it to the member
+  storage::DataTable *target_table = nullptr;
+  try{
+      target_table = static_cast<storage::DataTable *>(
+        storage::StorageManager::GetInstance()->GetTableWithOid(
+              database_oid, table_oid));
+  } catch (CatalogException &e) {
+      LOG_TRACE("Can't find table %d! Return false", table_oid);
+      return false;
+  }
+  SetTargetTable(target_table);
+
+  // Read the number of column_id and set them to column_ids_
+  oid_t columnid_count = input.ReadInt();
+  for (oid_t it = 0; it < columnid_count; it++) {
+    oid_t column_id = input.ReadInt();
+    AddColumnId(column_id);
+  }
+
+  // Read the type
+  ExpressionType expr_type = (ExpressionType)input.ReadEnumInSingleByte();
+
+  // Predicate deserialization
+  if (expr_type != ExpressionType::INVALID) {
+    switch (expr_type) {
+      //            case ExpressionType::COMPARE_IN:
+      //                predicate_ =
+      //                std::unique_ptr<ExpressionType::COMPARE_IN>(new
+      //                ComparisonExpression (101));
+      //                predicate_.DeserializeFrom(input);
+      //              break;
+
+      default: {
+        LOG_ERROR(
+            "Expression deserialization :: Unsupported EXPRESSION_TYPE: %s",
+            ExpressionTypeToString(expr_type).c_str());
+        break;
+      }
+    }
+  }
+
+  // Read the type of parent
+  PlanNodeType parent_type = (PlanNodeType)input.ReadEnumInSingleByte();
+
+  // Parent deserialization
+  if (parent_type != PlanNodeType::INVALID) {
+    switch (expr_type) {
+      //            case ExpressionType::COMPARE_IN:
+      //                predicate_ =
+      //                std::unique_ptr<ExpressionType::COMPARE_IN>(new
+      //                ComparisonExpression (101));
+      //                predicate_.DeserializeFrom(input);
+      //              break;
+
+      default: {
+        LOG_ERROR("Parent deserialization :: Unsupported PlanNodeType: %s",
+                  ExpressionTypeToString(expr_type).c_str());
+        break;
+      }
+    }
+  }
+
+  return true;
+}
+/**
+ *
+ * SeqScanPlan is serialized as:
+ * [(int) total size]
+ * [(int8_t) plan type]
+ * [(int) database_id]
+ * [(int) table_id]
+ * [(int) num column_id]
+ * [(int) column id...]
+ * [(int8_t) expr type]     : if invalid, predicate is null
+ * [(bytes) predicate]      : predicate is Expression
+ * [(int8_t) plan type]     : if invalid, parent is null
+ * [(bytes) parent]         : parent is also a plan
+ *
+ * So, the fixed size part is:
+ *      [(int) total size]   4 +
+ *      [(int8_t) plan type] 1 +
+ *      [(int) database_id]  4 +
+ *      [(int) table_id]     4 +
+ *      [(int) num column_id]4 +
+ *      [(int8_t) expr type] 1 +
+ *      [(int8_t) plan type] 1 =
+ *     the variant part is :
+ *      [(int) column id...]: num column_id * 4
+ *      [(bytes) predicate] : predicate->GetSerializeSize()
+ *      [(bytes) parent]    : parent->GetSerializeSize()
+ */
+int SeqScanPlan::SerializeSize() const {
+  // Fixed size. see the detail above
+  int size_fix = sizeof(int) * 4 + 3;
+  int size_column_ids = GetColumnIds().size() * sizeof(int);
+  int size = size_fix + size_column_ids;
+
+  if (GetPredicate() != nullptr) {
+    size = size + GetPredicate()->SerializeSize();
+  }
+  if (Parent()) {
+    size = size + Parent()->SerializeSize();
+  }
+
+  return size;
+}
+
 void SeqScanPlan::SetParameterValues(std::vector<type::Value> *values) {
   LOG_TRACE("Setting parameter values in Sequential Scan");
 

From 13f84a4d71b5b884e68cc9e974e265f3d0d90e5a Mon Sep 17 00:00:00 2001
From: Prashanth <pmenon@cs.cmu.edu>
Date: Wed, 6 Jun 2018 15:22:19 -0400
Subject: [PATCH 40/42] Beefed up tests, which caught more bugs

---
 src/codegen/util/csv_scanner.cpp       | 186 ++++++++++++++++++-------
 src/include/codegen/util/csv_scanner.h |  12 +-
 src/include/util/string_util.h         |  13 ++
 src/util/file.cpp                      |  24 ++--
 src/util/string_util.cpp               |   9 ++
 test/codegen/csv_scan_test.cpp         | 177 +++++++++++++++--------
 6 files changed, 297 insertions(+), 124 deletions(-)

diff --git a/src/codegen/util/csv_scanner.cpp b/src/codegen/util/csv_scanner.cpp
index 0481a4444e1..5f09349f973 100644
--- a/src/codegen/util/csv_scanner.cpp
+++ b/src/codegen/util/csv_scanner.cpp
@@ -32,7 +32,7 @@ CSVScanner::CSVScanner(peloton::type::AbstractPool &pool,
       file_path_(file_path),
       file_(),
       buffer_(nullptr),
-      buffer_begin_(0),
+      buffer_pos_(0),
       buffer_end_(0),
       line_(nullptr),
       line_len_(0),
@@ -59,12 +59,17 @@ CSVScanner::CSVScanner(peloton::type::AbstractPool &pool,
 CSVScanner::~CSVScanner() {
   if (buffer_ != nullptr) {
     memory_.Free(buffer_);
+    buffer_ = nullptr;
   }
+
   if (line_ != nullptr) {
     memory_.Free(line_);
+    line_ = nullptr;
   }
+
   if (cols_ != nullptr) {
     memory_.Free(cols_);
+    cols_ = nullptr;
   }
 }
 
@@ -90,21 +95,22 @@ void CSVScanner::Produce() {
   Initialize();
 
   // Loop lines
-  while (const char *line = NextLine()) {
+  while (char *line = NextLine()) {
     ProduceCSV(line);
   }
 }
 
 void CSVScanner::Initialize() {
   // Let's first perform a few validity checks
-  boost::filesystem::path path{file_path_};
+  boost::filesystem::path path(file_path_);
 
   if (!boost::filesystem::exists(path)) {
-    throw ExecutorException{StringUtil::Format("input path '%s' does not exist",
-                                               file_path_.c_str())};
+    throw ExecutorException(StringUtil::Format("input path '%s' does not exist",
+                                               file_path_.c_str()));
   } else if (!boost::filesystem::is_regular_file(file_path_)) {
-    throw ExecutorException{
-        StringUtil::Format("unable to read file '%s'", file_path_.c_str())};
+    auto msg =
+        StringUtil::Format("unable to read file '%s'", file_path_.c_str());
+    throw ExecutorException(msg);
   }
 
   // The path looks okay, let's try opening it
@@ -125,7 +131,7 @@ void CSVScanner::Initialize() {
 
 bool CSVScanner::NextBuffer() {
   // Do read
-  buffer_begin_ = 0;
+  buffer_pos_ = 0;
   buffer_end_ = static_cast<uint32_t>(file_.Read(buffer_, kDefaultBufferSize));
 
   // Update stats
@@ -134,7 +140,9 @@ bool CSVScanner::NextBuffer() {
   return (buffer_end_ != 0);
 }
 
-void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
+void CSVScanner::AppendToLineBuffer(const char *data, uint32_t len) {
+  PELOTON_ASSERT(len > 0);
+
   // Short-circuit if we're not appending any data
   if (len == 0) {
     return;
@@ -146,7 +154,7 @@ void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
       const auto msg = StringUtil::Format(
           "Line %u in file '%s' exceeds maximum line length: %lu",
           line_number_ + 1, file_path_.c_str(), kMaxAllocSize);
-      throw Exception{msg};
+      throw Exception(msg);
     }
 
     // The current line buffer isn't large enough to store the new bytes, so we
@@ -186,41 +194,44 @@ void CSVScanner::AppendToCurrentLine(const char *data, uint32_t len) {
   stats_.num_copies++;
 }
 
-// The main purpose of this function is to find the start of the next line in
-// the CSV file.
-const char *CSVScanner::NextLine() {
+// The objective of this function is to find a complete line in the CSV file.
+// The returned value will be a valid pointer to a null-terminated string that
+// is the next line in the CSV to be processed.
+char *CSVScanner::NextLine() {
   line_len_ = 0;
 
+  const char quote = quote_;
+  const char escape = (quote_ == escape_ ? static_cast<char>('\0') : escape_);
+
   bool in_quote = false;
   bool last_was_escape = false;
-  bool copied_to_line_buf = false;
 
-  uint32_t line_end = buffer_begin_;
-
-  char quote = quote_;
-  char escape = (quote_ == escape_ ? static_cast<char>('\0') : escape_);
+  const char *buf = buffer_;
+  uint32_t curr_buffer_pos = buffer_pos_;
 
   while (true) {
-    if (line_end >= buffer_end_) {
+    if (curr_buffer_pos == buffer_end_) {
       // We need to read more data from the CSV file. But first, we need to copy
       // all the data in the read-buffer (i.e., [buffer_begin_, buffer_end_] to
       // the line-buffer.
+      if (buffer_pos_ < curr_buffer_pos) {
+        AppendToLineBuffer(buffer_ + buffer_pos_,
+                           curr_buffer_pos - buffer_pos_);
+        buffer_pos_ = curr_buffer_pos;
+      }
 
-      AppendToCurrentLine(buffer_ + buffer_begin_,
-                          static_cast<uint32_t>(buffer_end_ - buffer_begin_));
+      // Reset positions
+      curr_buffer_pos = 0;
 
       // Now, read more data
       if (!NextBuffer()) {
-        return nullptr;
+        // We hit en EOF
+        break;
       }
-
-      // Reset positions
-      line_end = buffer_begin_;
-      copied_to_line_buf = true;
     }
 
     // Read character
-    char c = buffer_[line_end];
+    char c = buf[curr_buffer_pos++];
 
     if (in_quote && c == escape) {
       last_was_escape = !last_was_escape;
@@ -235,47 +246,120 @@ const char *CSVScanner::NextLine() {
     // Process the new-line character. If we a new-line and we're not currently
     // in a quoted section, we're done.
     if (c == '\n' && !in_quote) {
-      buffer_[line_end] = '\0';
       break;
     }
+  }
 
-    // Move along
-    line_end++;
+  // Flush remaining valid bytes
+  if (buffer_pos_ < curr_buffer_pos) {
+    AppendToLineBuffer(buffer_ + buffer_pos_, curr_buffer_pos - buffer_pos_);
+    buffer_pos_ = curr_buffer_pos;
   }
 
   // Increment line number
   line_number_++;
 
-  if (copied_to_line_buf) {
-    AppendToCurrentLine(buffer_, line_end);
-    buffer_begin_ = line_end + 1;
-    return line_;
-  } else {
-    const char *ret = buffer_ + buffer_begin_;
-    buffer_begin_ = line_end + 1;
-    return ret;
+  // If we didn't transfer any bytes to the line buffer, we must have reached an
+  // EOF. If so, return null indicating there are no more lines.
+  if (line_len_ == 0) {
+    return nullptr;
   }
+
+  // A full line has been transferred to the line buffer, but we also copied the
+  // newline character. Strip it off now.
+  line_len_--;
+  line_[line_len_] = '\0';
+
+  // Done
+  return line_;
 }
 
-void CSVScanner::ProduceCSV(const char *line) {
-  // At this point, we have a well-formed line. Let's pull out pointers to the
-  // columns.
+void CSVScanner::ProduceCSV(char *line) {
+  const char delimiter = delimiter_;
+  const char quote = quote_;
+  const char escape = escape_;
 
-  const auto *iter = line;
-  for (uint32_t col_idx = 0; col_idx < num_cols_; col_idx++) {
-    // Start points to the beginning of the column's data value
-    const char *start = iter;
+  // The iterator over characters in the line
+  char *iter = line;
 
-    // Eat text until the next delimiter
-    while (*iter != 0 && *iter != delimiter_) {
-      iter++;
+  for (uint32_t col_idx = 0; col_idx < num_cols_; col_idx++) {
+    char *col_begin = iter;
+    char *col_end = nullptr;
+
+    // We need to move col_end to the end of the column's data. Along the way,
+    // we may need to shift data down due to quotes and escapes. Inspired by
+    // Postgres.
+    {
+      char *out = col_begin;
+      while (true) {
+        // This first loop looks for either the delimiter character or the end
+        // of the line, indicating the end of a columns data. It breaks out of
+        // the loop if a quote character is found. It flows into a second loop
+        // whose only purpose is to find the end of the quoted section.
+        while (true) {
+          char c = *iter++;
+
+          // If we see the delimiter character, or the end of the string,
+          // finish
+          if (c == delimiter || c == '\0') {
+            col_end = out;
+            iter--;
+            goto colend;
+          }
+
+          // If we see a quote character, move to the second loop to find the
+          // closing quote.
+          if (c == quote) {
+            break;
+          }
+
+          *out++ = c;
+        }
+
+        while (true) {
+          char c = *iter++;
+
+          // If we see the end of the line *within* a quoted section, throw
+          // error
+          if (c == '\0') {
+            throw Exception(StringUtil::Format(
+                "unterminated CSV quoted field at %u", col_idx));
+          }
+
+          // If we see an escape character within a quoted section, we need to
+          // check if the following character is a quote. If so, we must
+          // escape it
+          if (c == escape) {
+            char next = *iter;
+            if (next == quote || next == escape) {
+              *out++ = next;
+              iter++;
+              continue;
+            }
+          }
+
+          // If we see the closing quote, we're done.
+          if (c == quote) {
+            break;
+          }
+
+          *out++ = c;
+        }
+      }
     }
 
-    // At this point, iter points to the end of the column's data value
+  colend:
+    // If we've reached the of the line, but haven't setup all the columns, then
+    // we're missing data for the remaining columns and should throw an error.
+    if (*iter == '\0' && col_idx != (num_cols_ - 1)) {
+      throw Exception(
+          StringUtil::Format("missing data for column %u on line %u",
+                             (col_idx + 2), line_number_));
+    }
 
     // Let's setup the columns
-    cols_[col_idx].ptr = start;
-    cols_[col_idx].len = static_cast<uint32_t>(iter - start);
+    cols_[col_idx].ptr = col_begin;
+    cols_[col_idx].len = static_cast<uint32_t>(col_end - col_begin);
     cols_[col_idx].is_null = (cols_[col_idx].len == 0);
 
     // Eat delimiter, moving to next column
diff --git a/src/include/codegen/util/csv_scanner.h b/src/include/codegen/util/csv_scanner.h
index a946dec903e..f230354c5fa 100644
--- a/src/include/codegen/util/csv_scanner.h
+++ b/src/include/codegen/util/csv_scanner.h
@@ -158,17 +158,17 @@ class CSVScanner {
   // Initialize the scan
   void Initialize();
 
-  // Append bytes to the end of the currently accruing line.
-  void AppendToCurrentLine(const char *data, uint32_t len);
+  // Append bytes to the end of the line buffer
+  void AppendToLineBuffer(const char *data, uint32_t len);
 
   // Read the next line from the CSV file
-  const char *NextLine();
+  char *NextLine();
 
   // Read a buffer's worth of data from the CSV file
   bool NextBuffer();
 
   // Produce CSV data stored in the provided line
-  void ProduceCSV(const char *line);
+  void ProduceCSV(char *line);
 
  private:
   // All memory allocations happen from this pool
@@ -180,10 +180,10 @@ class CSVScanner {
   // The CSV file handle
   peloton::util::File file_;
 
-  // The temporary buffer where raw file contents are read into
+  // The temporary read-buffer where raw file contents are first read into
   // TODO: make these unique_ptr's with a customer deleter
   char *buffer_;
-  uint32_t buffer_begin_;
+  uint32_t buffer_pos_;
   uint32_t buffer_end_;
 
   // A pointer to the start of a line in the CSV file
diff --git a/src/include/util/string_util.h b/src/include/util/string_util.h
index d61f297ce09..9882ce3ecd5 100644
--- a/src/include/util/string_util.h
+++ b/src/include/util/string_util.h
@@ -133,6 +133,19 @@ class StringUtil {
   static void RTrim(std::string &str);
 
   static std::string Indent(const int num_indent);
+
+  /**
+   * Return a new string that has stripped all occurrences of the provided
+   * character from the provided string.
+   *
+   * NOTE: This function copies the input string into a new string, which is
+   * wasteful. Don't use this for performance critical code, please!
+   *
+   * @param str The input string
+   * @param c The character we want to remove
+   * @return A new string with no occurrences of the provided character
+   */
+  static std::string Strip(const std::string &str, char c);
 };
 
 }  // namespace peloton
diff --git a/src/util/file.cpp b/src/util/file.cpp
index de0835982c8..275d3848418 100644
--- a/src/util/file.cpp
+++ b/src/util/file.cpp
@@ -42,8 +42,8 @@ void File::Open(const std::string &name, File::AccessMode access_mode) {
 
   // Check error
   if (fd == -1) {
-    throw Exception{
-        StringUtil::Format("Unable to read file '%s'", name.c_str())};
+    throw Exception(
+        StringUtil::Format("unable to read file '%s'", name.c_str()));
   }
 
   // Done
@@ -59,8 +59,8 @@ uint64_t File::Read(void *data, uint64_t len) const {
 
   // Check error
   if (bytes_read == -1) {
-    throw Exception{
-        StringUtil::Format("Error reading file: %s", strerror(errno))};
+    throw Exception(
+        StringUtil::Format("error reading file: %s", strerror(errno)));
   }
 
   // Done
@@ -76,8 +76,8 @@ uint64_t File::Write(void *data, uint64_t len) const {
 
   // Check error
   if (bytes_written == -1) {
-    throw Exception{
-        StringUtil::Format("Error writing to file: %s", strerror(errno))};
+    throw Exception(
+        StringUtil::Format("error writing to file: %s", strerror(errno)));
   }
 
   // Done
@@ -91,23 +91,23 @@ uint64_t File::Size() const {
   // Save the current position
   off_t curr_off = lseek(fd_, 0, SEEK_CUR);
   if (curr_off == -1) {
-    throw Exception{StringUtil::Format(
-        "unable to read current position in file: %s", strerror(errno))};
+    throw Exception(StringUtil::Format(
+        "unable to read current position in file: %s", strerror(errno)));
   }
 
   // Seek to the end of the file, returning the new file position i.e., the
   // size of the file in bytes.
   off_t off = lseek(fd_, 0, SEEK_END);
   if (off == -1) {
-    throw Exception{StringUtil::Format(
-        "unable to move file position to end file: %s", strerror(errno))};
+    throw Exception(StringUtil::Format(
+        "unable to move file position to end file: %s", strerror(errno)));
   }
 
   off_t restore = lseek(fd_, curr_off, SEEK_SET);
   if (restore == -1) {
-    throw Exception{StringUtil::Format(
+    throw Exception(StringUtil::Format(
         "unable to restore position after moving to the end: %s",
-        strerror(errno))};
+        strerror(errno)));
   }
 
   // Restore position
diff --git a/src/util/string_util.cpp b/src/util/string_util.cpp
index d4fca199219..a0f8ba3987f 100644
--- a/src/util/string_util.cpp
+++ b/src/util/string_util.cpp
@@ -190,4 +190,13 @@ std::vector<std::string> StringUtil::Split(const std::string &input,
   }
   return splits;
 }
+
+std::string StringUtil::Strip(const std::string &str, char c) {
+  // There's a copy here which is wasteful, so don't use this in performance
+  // critical code!
+  std::string tmp = str;
+  tmp.erase(std::remove(tmp.begin(), tmp.end(), c), tmp.end());
+  return tmp;
 }
+
+}  // namespace peloton
diff --git a/test/codegen/csv_scan_test.cpp b/test/codegen/csv_scan_test.cpp
index 2cebff0873e..127e73b968f 100644
--- a/test/codegen/csv_scan_test.cpp
+++ b/test/codegen/csv_scan_test.cpp
@@ -14,10 +14,8 @@
 
 #include "codegen/util/csv_scanner.h"
 #include "common/timer.h"
-#include "function/date_functions.h"
-#include "function/numeric_functions.h"
-#include "function/string_functions.h"
 #include "util/file_util.h"
+#include "util/string_util.h"
 
 namespace peloton {
 namespace test {
@@ -39,14 +37,15 @@ void CSVRowCallback(void *s) {
 
 void IterateAsCSV(const std::vector<std::string> &rows,
                   const std::vector<codegen::type::Type> &col_types,
-                  CallbackFn callback, char delimiter = ',') {
+                  CallbackFn callback, char delimiter = ',', char quote = '"',
+                  char escape = '"') {
   std::string csv_data;
-  for (uint32_t i = 0; i < rows.size(); i++) {
-    csv_data.append(rows[i]).append("\n");
+  for (const auto &row : rows) {
+    csv_data.append(row).append("\n");
   }
 
   // Write the contents into a temporary file
-  TempFileHandle fh{FileUtil::WriteTempFile(csv_data, "", "tmp")};
+  TempFileHandle fh(FileUtil::WriteTempFile(csv_data, "", "tmp"));
 
   // The memory pool
   auto &pool = *TestingHarness::GetInstance().GetTestingPool();
@@ -55,9 +54,10 @@ void IterateAsCSV(const std::vector<std::string> &rows,
   State state = {.scanner = nullptr, .callback = callback};
 
   // The scanner
-  codegen::util::CSVScanner scanner{
+  codegen::util::CSVScanner scanner(
       pool, fh.name, col_types.data(), static_cast<uint32_t>(col_types.size()),
-      CSVRowCallback, reinterpret_cast<void *>(&state), delimiter};
+      CSVRowCallback, reinterpret_cast<void *>(&state), delimiter, quote,
+      escape);
 
   state.scanner = &scanner;
 
@@ -65,8 +65,8 @@ void IterateAsCSV(const std::vector<std::string> &rows,
   scanner.Produce();
 }
 
-TEST_F(CSVScanTest, SimpleNumericScan) {
-  // Create a temporary CSV file
+TEST_F(CSVScanTest, NumericScanTest) {
+  // The set of test rows and their types
   std::vector<std::string> rows = {"1,2,3.0,4", "4,5,6.0,7", "8,9,10.0,11"};
   std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
                                             {type::TypeId::INTEGER, false},
@@ -74,73 +74,140 @@ TEST_F(CSVScanTest, SimpleNumericScan) {
                                             {type::TypeId::INTEGER, false}};
 
   uint32_t rows_read = 0;
-  IterateAsCSV(rows, types, [&rows_read, &types](
+  IterateAsCSV(rows, types, [&rows, &rows_read, &types](
                                 const codegen::util::CSVScanner::Column *cols) {
-    rows_read++;
+    // Split the input row into column values
+    const auto input_parts = StringUtil::Split(rows[rows_read++], ',');
+
+    // Check contents of row based on test input
+    for (uint32_t i = 0; i < types.size(); i++) {
+      // The column isn't null
+      EXPECT_FALSE(cols[i].is_null);
+
+      // The column has a value
+      EXPECT_GT(cols[i].len, 0);
+
+      // Check the string representations
+      EXPECT_EQ(input_parts[i], std::string(cols[i].ptr, cols[i].len));
+    }
+  });
+
+  EXPECT_EQ(rows.size(), rows_read);
+}
+
+TEST_F(CSVScanTest, QuoteEscapeTest) {
+  // The set of test rows and their types
+  std::vector<std::string> rows = {"yea he's \"cool\",1,2", "a quote:\"\",3,4"};
+  std::vector<codegen::type::Type> types = {{type::TypeId::VARCHAR, false},
+                                            {type::TypeId::INTEGER, false},
+                                            {type::TypeId::INTEGER, false}};
+
+  uint32_t rows_read = 0;
+  IterateAsCSV(rows, types, [&rows, &rows_read, &types](
+                                const codegen::util::CSVScanner::Column *cols) {
+    // Split the input row into column values
+    auto input_parts = StringUtil::Split(rows[rows_read++], ',');
+
+    // Check contents of row based on test input
     for (uint32_t i = 0; i < types.size(); i++) {
+      // The column isn't null
       EXPECT_FALSE(cols[i].is_null);
+
+      // The column has a value
       EXPECT_GT(cols[i].len, 0);
+
+      // Check the string representations. We need to strip off any quotes from
+      // the original string since the CSV scan will strip them for us.
+      EXPECT_EQ(StringUtil::Strip(input_parts[i], '"'),
+                std::string(cols[i].ptr, cols[i].len));
     }
   });
 
-  // Check
   EXPECT_EQ(rows.size(), rows_read);
 }
 
-TEST_F(CSVScanTest, MixedStringScan) {
-  // Create a temporary CSV file
+TEST_F(CSVScanTest, MixedStringTest) {
   std::vector<std::string> rows = {
-      "1,1994-01-01,3,test", "4,2018-01-01,6,\"test\"",
+      "1,1994-01-01,3,test", "4,2018-01-01,6,\"quoted_test\"",
       "8,2016-05-05,10,\"test\nnewline\ninquote\""};
   std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
                                             {type::TypeId::DATE, false},
                                             {type::TypeId::INTEGER, false},
                                             {type::TypeId::VARCHAR, false}};
-
-  std::vector<std::string> rows_read;
-  IterateAsCSV(rows, types, [&rows_read, &types](
+  uint32_t rows_read = 0;
+  IterateAsCSV(rows, types, [&rows, &rows_read, &types](
                                 const codegen::util::CSVScanner::Column *cols) {
-    std::string row;
+    // Split the input row into column values
+    auto input_parts = StringUtil::Split(rows[rows_read++], ',');
+
     for (uint32_t i = 0; i < types.size(); i++) {
+      // The column isn't null
       EXPECT_FALSE(cols[i].is_null);
+
+      // The column has a value
       EXPECT_GT(cols[i].len, 0);
-      if (i > 0) row.append(",");
-      switch (types[i].type_id) {
-        case type::TypeId::INTEGER: {
-          row.append(std::to_string(function::NumericFunctions::InputInteger(
-              types[i], cols[i].ptr, cols[i].len)));
-          break;
-        }
-        case type::TypeId::DATE: {
-          auto raw_date = function::DateFunctions::InputDate(
-              types[i], cols[i].ptr, cols[i].len);
-          int32_t year, month, day;
-          function::DateFunctions::JulianToDate(raw_date, year, month, day);
-          row.append(StringUtil::Format("%u-%02u-%02u", year, month, day));
-          break;
-        }
-        case type::TypeId::VARCHAR: {
-          auto ret = function::StringFunctions::InputString(
-              types[i], cols[i].ptr, cols[i].len);
-          row.append(std::string{ret.str, ret.length - 1});
-          break;
-        }
-        default: {
-          throw Exception{StringUtil::Format(
-              "Did not expect column type '%s' in test. Did you forget to "
-              "modify the switch statement to handle a column type you've added"
-              "in the test case?",
-              TypeIdToString(types[i].type_id).c_str())};
-        }
-      }
+
+      // Check the string representations. We need to strip off any quotes from
+      // the original string since the CSV scan will strip them for us.
+      EXPECT_EQ(StringUtil::Strip(input_parts[i], '"'),
+                std::string(cols[i].ptr, cols[i].len));
     }
-    rows_read.push_back(row);
   });
 
-  // Check
-  ASSERT_EQ(rows.size(), rows_read.size());
-  for (uint32_t i = 0; i < rows.size(); i++) {
-    EXPECT_EQ(rows[i], rows_read[i]);
+  EXPECT_EQ(rows.size(), rows_read);
+}
+
+TEST_F(CSVScanTest, CatchErrorsTest) {
+  ////////////////////////////////////////////////////////////////////
+  ///
+  /// Test Case - Missing last column
+  ///
+  ////////////////////////////////////////////////////////////////////
+  {
+    std::vector<std::string> missing_col = {"1,1994-01-01,3"};
+    std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
+                                              {type::TypeId::DATE, false},
+                                              {type::TypeId::INTEGER, false},
+                                              {type::TypeId::VARCHAR, false}};
+    EXPECT_ANY_THROW(IterateAsCSV(
+        missing_col, types,
+        [](UNUSED_ATTRIBUTE const codegen::util::CSVScanner::Column *cols) {
+          FAIL();
+        }));
+  }
+
+  ////////////////////////////////////////////////////////////////////
+  ///
+  /// Test Case - Unclosed quote
+  ///
+  ////////////////////////////////////////////////////////////////////
+  {
+    std::vector<std::string> missing_col = {"1,\"unclosed,3"};
+    std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
+                                              {type::TypeId::VARCHAR, false},
+                                              {type::TypeId::INTEGER, false}};
+    EXPECT_ANY_THROW(IterateAsCSV(
+        missing_col, types,
+        [](UNUSED_ATTRIBUTE const codegen::util::CSVScanner::Column *cols) {
+          FAIL();
+        }));
+  }
+
+  ////////////////////////////////////////////////////////////////////
+  ///
+  /// Test Case - Unclosed quote
+  ///
+  ////////////////////////////////////////////////////////////////////
+  {
+    std::vector<std::string> missing_col = {"1,unclosed\",3"};
+    std::vector<codegen::type::Type> types = {{type::TypeId::INTEGER, false},
+                                              {type::TypeId::VARCHAR, false},
+                                              {type::TypeId::INTEGER, false}};
+    EXPECT_ANY_THROW(IterateAsCSV(
+        missing_col, types,
+        [](UNUSED_ATTRIBUTE const codegen::util::CSVScanner::Column *cols) {
+          FAIL();
+        }));
   }
 }
 

From bca783db58dd56cd8544a171f52031d85540b29b Mon Sep 17 00:00:00 2001
From: Prashanth <pmenon@cs.cmu.edu>
Date: Wed, 6 Jun 2018 16:14:38 -0400
Subject: [PATCH 41/42] Fix tests

---
 test/codegen/csv_scan_translator_test.cpp | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/test/codegen/csv_scan_translator_test.cpp b/test/codegen/csv_scan_translator_test.cpp
index 66da8ead0d5..320db518117 100644
--- a/test/codegen/csv_scan_translator_test.cpp
+++ b/test/codegen/csv_scan_translator_test.cpp
@@ -16,6 +16,7 @@
 #include "planner/csv_scan_plan.h"
 #include "planner/insert_plan.h"
 #include "planner/seq_scan_plan.h"
+#include "util/string_util.h"
 #include "util/file_util.h"
 
 namespace peloton {
@@ -33,10 +34,21 @@ class CSVScanTranslatorTest : public PelotonCodeGenTest {
 };
 
 TEST_F(CSVScanTranslatorTest, IntCsvScan) {
-  // Test input
-  std::vector<std::string> rows = {"1,2,3.9,four",
-                                   "5,6,7.4,eight",
-                                   "9,10,11.1,\"twelve\""};
+  // The quoting character and a helper function to quote a given string
+  const char quote = '"';
+  const auto quote_string = [quote](std::string s) {
+    return StringUtil::Format("%c%s%c", quote, s.c_str(), quote);
+  };
+
+  // Test input rows
+  // clang-format off
+  std::vector<std::string> rows = {
+      "1,2,3.9,four",
+      "5,6,7.4,eight",
+      "9,10,11.1," + quote_string("twelve"),
+      "14,15,16.7,eighteen " + quote_string("nineteen") + " twenty " + quote_string("twenty-one")};
+  // clang-format on
+
   std::string csv_data;
   for (const auto &row : rows) {
     csv_data.append(row).append("\n");
@@ -93,7 +105,7 @@ TEST_F(CSVScanTranslatorTest, IntCsvScan) {
     const auto &output = consumer.GetOutputTuples();
     ASSERT_EQ(rows.size(), output.size());
     for (uint32_t i = 0; i < rows.size(); i++) {
-      EXPECT_EQ(rows[i], output[i].ToCSV());
+      EXPECT_EQ(StringUtil::Strip(rows[i], '"'), output[i].ToCSV());
     }
   }
 }

From e327ac7e7ef737d5ee2f5e3d01dc7ec52f70eed6 Mon Sep 17 00:00:00 2001
From: Prashanth <pmenon@cs.cmu.edu>
Date: Wed, 6 Jun 2018 16:16:19 -0400
Subject: [PATCH 42/42] Reducing copying overhead for columns, constraints and
 loop variables during CheckConstraints(). We were spending 50% of our time
 here during bulk insertions into wide tables due to unnecessary copying!

---
 src/include/catalog/schema.h |  2 +-
 src/storage/data_table.cpp   | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/include/catalog/schema.h b/src/include/catalog/schema.h
index 43a62d6444f..2f6875b453b 100644
--- a/src/include/catalog/schema.h
+++ b/src/include/catalog/schema.h
@@ -126,7 +126,7 @@ class Schema : public Printable {
     return columns[column_id].IsInlined();
   }
 
-  inline const Column GetColumn(const oid_t column_id) const {
+  inline const Column &GetColumn(const oid_t column_id) const {
     return columns[column_id];
   }
 
diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp
index 1fd81b76865..3660fcc2f79 100644
--- a/src/storage/data_table.cpp
+++ b/src/storage/data_table.cpp
@@ -155,12 +155,12 @@ bool DataTable::CheckConstraints(const AbstractTuple *tuple) const {
   //       column. Like maybe can store a list of just columns that
   //       even have constraints defined so that we don't have to
   //       look at each column individually.
-  oid_t column_count = schema->GetColumnCount();
+  size_t column_count = schema->GetColumnCount();
   for (oid_t column_itr = 0; column_itr < column_count; column_itr++) {
-    std::vector<catalog::Constraint> column_cons =
+    const std::vector<catalog::Constraint> &column_constraints =
         schema->GetColumn(column_itr).GetConstraints();
-    for (auto cons : column_cons) {
-      ConstraintType type = cons.GetType();
+    for (const auto &constraint : column_constraints) {
+      ConstraintType type = constraint.GetType();
       switch (type) {
         case ConstraintType::NOTNULL: {
           if (CheckNotNulls(tuple, column_itr) == false) {
@@ -208,9 +208,9 @@ bool DataTable::CheckConstraints(const AbstractTuple *tuple) const {
           LOG_TRACE("%s", error.c_str());
           throw ConstraintException(error);
         }
-      }  // SWITCH
-    }    // FOR (constraints)
-  }      // FOR (columns)
+      }
+    }
+  }
   return true;
 }