From 6e7682d90a41728e4737b0bf318585e3bbb07fd4 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Wed, 6 Jun 2018 18:26:03 -0700
Subject: [PATCH 01/10] Init CudaCFGFactory and CudaCodeSource

---
 dyninst/cuda/CudaCFGFactory.cpp |  1 +
 dyninst/cuda/CudaCFGFactory.hpp | 27 +++++++++++++
 dyninst/cuda/CudaCodeSource.cpp |  1 +
 dyninst/cuda/CudaCodeSource.hpp | 69 +++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+)
 create mode 100644 dyninst/cuda/CudaCFGFactory.cpp
 create mode 100644 dyninst/cuda/CudaCFGFactory.hpp
 create mode 100644 dyninst/cuda/CudaCodeSource.cpp
 create mode 100644 dyninst/cuda/CudaCodeSource.hpp
diff --git a/dyninst/cuda/CudaCFGFactory.cpp b/dyninst/cuda/CudaCFGFactory.cpp
new file mode 100644
index 0000000..27de5bc
--- /dev/null
+++ b/dyninst/cuda/CudaCFGFactory.cpp
@@ -0,0 +1 @@
+#include "CudaCFGFactory.hpp"
diff --git a/dyninst/cuda/CudaCFGFactory.hpp b/dyninst/cuda/CudaCFGFactory.hpp
new file mode 100644
index 0000000..5bd8475
--- /dev/null
+++ b/dyninst/cuda/CudaCFGFactory.hpp
@@ -0,0 +1,27 @@
+#include <CFGFactory.h>
+
+namespace Dyninst {
+namespace ParseAPI {
+
+class PARSER_EXPORT CudaCFGFactory : public CFGFactory {   
+ public:
+    CudaCFGFactory() {};
+    virtual ~CudaCFGFactory();
+
+ protected:
+    virtual Function * mkfunc(Address addr, FuncSource src, 
+            std::string name, CodeObject * obj, CodeRegion * region, 
+            Dyninst::InstructionSource * isrc);
+    virtual Block * mkblock(Function * f, CodeRegion * r, 
+            Address addr);
+    virtual Edge * mkedge(Block * src, Block * trg, 
+            EdgeTypeEnum type);
+    virtual Block * mksink(CodeObject *obj, CodeRegion *r);
+
+    virtual void free_func(Function * f);
+    virtual void free_block(Block * b);
+    virtual void free_edge(Edge * e);
+};
+
+}
+}
diff --git a/dyninst/cuda/CudaCodeSource.cpp b/dyninst/cuda/CudaCodeSource.cpp
new file mode 100644
index 0000000..01e03ff
--- /dev/null
+++ b/dyninst/cuda/CudaCodeSource.cpp
@@ -0,0 +1 @@
+#include "CudaCodeSource.hpp"
diff --git a/dyninst/cuda/CudaCodeSource.hpp b/dyninst/cuda/CudaCodeSource.hpp
new file mode 100644
index 0000000..64a593e
--- /dev/null
+++ b/dyninst/cuda/CudaCodeSource.hpp
@@ -0,0 +1,69 @@
+#include <CodeSource.h>
+
+namespace Dyninst {
+namespace ParseAPI {
+
+class PARSER_EXPORT CudaCodeSource : public CodeSource {
+ public:
+    CudaCodeSource() {};
+    virtual ~CudaCodeSource();
+
+ public:
+    /** CodeSource Implementation **/
+    virtual bool nonReturning(Address /*func_entry*/);
+    virtual bool nonReturningSyscall(int /*number*/);
+    virtual Address baseAddress();
+    virtual Address loadAddress();
+    virtual Address getTOC(Address);
+
+    virtual void print_stats() const;
+    virtual bool have_stats() const;
+
+    virtual void incrementCounter(const std::string& /*name*/) const;
+    virtual void addCounter(const std::string& /*name*/, int /*num*/) const;
+    virtual void decrementCounter(const std::string& /*name*/) const;
+    virtual void startTimer(const std::string& /*name*/) const;
+    virtual void stopTimer(const std::string& /*name*/) const;
+    virtual bool findCatchBlockByTryRange(Address /*given try address*/, std::set<Address> & /* catch start */)  const;
+
+    /** InstructionSource implementation **/
+    bool isValidAddress(const Address) const;
+    void* getPtrToInstruction(const Address) const;
+    void* getPtrToData(const Address) const;
+    unsigned int getAddressWidth() const;
+    bool isCode(const Address) const;
+    bool isData(const Address) const;
+    bool isReadOnly(const Address) const;
+    Address offset() const;
+    Address length() const;
+    Architecture getArch() const;
+};
+
+
+class PARSER_EXPORT CudaCodeRegion : public CodeRegion {
+ public:
+    CudaCodeRegion();
+    ~CudaCodeRegion();
+
+    /** CodeRegion implementation **/
+    void names(Address, std::vector<std::string> &);
+    bool findCatchBlock(Address addr, Address & catchStart);
+
+    Address low() const;
+    Address high() const;
+
+    /** InstructionSource implementation **/
+    bool isValidAddress(const Address) const;
+    void* getPtrToInstruction(const Address) const;
+    void* getPtrToData(const Address) const;
+    unsigned int getAddressWidth() const;
+    bool isCode(const Address) const;
+    bool isData(const Address) const;
+    bool isReadOnly(const Address) const;
+    Address offset() const;
+    Address length() const;
+    Architecture getArch() const;
+};
+
+}
+}

From 74a086859e66384b4cd3f58f741ea54294a43541 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Fri, 8 Jun 2018 01:49:42 -0700
Subject: [PATCH 02/10] First Attempt to construct CFG

---
 dyninst/cuda/CFGParser.cpp      | 127 ++++++++++++++++++++++++++
 dyninst/cuda/CFGParser.hpp      |  33 +++++++
 dyninst/cuda/CudaCFGFactory.cpp |  63 +++++++++++++
 dyninst/cuda/CudaCFGFactory.hpp |  18 ++--
 dyninst/cuda/CudaCodeSource.cpp |  13 +++
 dyninst/cuda/CudaCodeSource.hpp |  70 ++++-----------
 dyninst/cuda/DotCFG.hpp         | 153 ++++++++++++++++++++++++++++++++
 dyninst/cuda/Graph.hpp          |  52 +++++++++++
 dyninst/cuda/GraphReader.cpp    |  51 +++++++++++
 dyninst/cuda/GraphReader.hpp    |  35 ++++++++
 dyninst/cuda/Makefile           |  64 +++++++++++++
 dyninst/cuda/cuda-parse.cpp     |  49 ++++++++--
 12 files changed, 656 insertions(+), 72 deletions(-)
 create mode 100644 dyninst/cuda/CFGParser.cpp
 create mode 100644 dyninst/cuda/CFGParser.hpp
 create mode 100644 dyninst/cuda/DotCFG.hpp
 create mode 100644 dyninst/cuda/Graph.hpp
 create mode 100644 dyninst/cuda/GraphReader.cpp
 create mode 100644 dyninst/cuda/GraphReader.hpp
 create mode 100644 dyninst/cuda/Makefile

diff --git a/dyninst/cuda/CFGParser.cpp b/dyninst/cuda/CFGParser.cpp
new file mode 100644
index 0000000..9f60b50
--- /dev/null
+++ b/dyninst/cuda/CFGParser.cpp
@@ -0,0 +1,127 @@
+#include "CFGParser.hpp"
+#include <cctype>
+
+namespace CudaParse {
+
+void CFGParser::parse_inst_strings(
+  const std::string &label,
+  std::deque<std::string> &inst_strings) {
+  std::regex e("\\\\l([|]*)");
+  std::istringstream ss(std::regex_replace(label, e, "\n"));
+  std::string s;
+  while (std::getline(ss, s)) {
+    inst_strings.push_back(s);
+  }
+  while (inst_strings.size() > 0) {
+    if (isdigit(inst_strings.front()[0]) || inst_strings.front()[0] == '<') {
+      break;
+    }
+    inst_strings.pop_front();
+  }
+  inst_strings.pop_back();
+}
+
+
+size_t CFGParser::find_block_parent(size_t node) {
+  size_t parent = _block_parent[node];
+  size_t graph_size = _block_parent.size();
+  if (parent == graph_size) {
+    return _block_parent[node] = node;
+  } else if (parent == node) {
+    return node;
+  } else {
+    return _block_parent[node] = find_block_parent(parent);
+  }
+}
+
+
+void CFGParser::unite_blocks(size_t l, size_t r) {
+  _block_parent[l] = find_block_parent(r);
+}
+
+
+static bool compare_block_ptr(Block *l, Block *r) {
+  return *l < *r;
+}
+
+
+static bool compare_target_ptr(Target *l, Target *r) {
+  return *l < *r;
+}
+
+
+void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
+  std::unordered_map<size_t, Block *> block_map;
+  std::vector<Block *> blocks;
+  size_t graph_size = graph.vertices.size();
+  _block_parent.resize(graph_size);
+  for (size_t i = 0; i < graph_size; ++i) {
+    _block_parent[i] = graph_size;
+  }
+
+  // Parse every vertex to build blocks
+  for (auto vertex : graph.vertices) {
+    Block *block = new Block(vertex->id, vertex->name);
+
+    std::deque<std::string> inst_strings;
+    parse_inst_strings(vertex->label, inst_strings);
+    for (auto inst_string : inst_strings) {
+      block->insts.push_back(new Inst(inst_string));
+    }
+
+    blocks.push_back(block);
+    block_map[block->id] = block;
+  }
+
+  // Parse every edge to build block relations
+  for (auto edge : graph.edges) {
+    // Find toppest block
+    unite_blocks(edge->target_id, edge->source_id);
+    Block *target_block = block_map[edge->target_id];
+    Block *source_block = block_map[edge->source_id];
+    // Link blocks
+    Inst *target_inst;
+    for (auto inst : source_block->insts) {
+      if (inst->port == edge->source_port[0]) {
+        source_block->targets.push_back(new Target(inst, target_block));
+      }
+    }
+    // Some edge may not have port information
+    if (source_block->targets.size() == 0) {
+      source_block->targets.push_back(new Target(source_block->insts.back(), target_block));
+    }
+  }
+
+  //for (auto block : blocks) {
+  //  std::cout << "From: " << std::endl;
+  //  std::cout << block->name << std::endl;
+  //  std::cout << "Target: " << std::endl;
+  //  for (auto target : block->targets) {
+  //    std::cout << target->block->name << std::endl;
+  //  }
+  //}
+
+  // Build functions
+  size_t function_id = 0;
+  for (auto block : blocks) {
+    // Sort block targets according to inst offset
+    std::sort(block->targets.begin(), block->targets.end(), compare_target_ptr);
+    if (find_block_parent(block->id) == block->id) {
+      // Filter out self contained useless loops. A normal function will not start with "."
+      if (block_map[block->id]->name[0] == '.') {
+        continue;
+      }
+      Function *function = new Function(function_id, block_map[block->id]->name);
+      ++function_id;
+      for (auto bb : blocks) {
+        if (find_block_parent(bb->id) == block->id) {
+          function->blocks.push_back(bb);
+        }
+      }
+      std::sort(function->blocks.begin(), function->blocks.end(), compare_block_ptr);
+      functions.push_back(function);
+    }
+  }
+}
+
+}
diff --git a/dyninst/cuda/CFGParser.hpp b/dyninst/cuda/CFGParser.hpp
new file mode 100644
index 0000000..72157df
--- /dev/null
+++ b/dyninst/cuda/CFGParser.hpp
@@ -0,0 +1,33 @@
+#ifndef _CFG_PARSER_H_
+#define _CFG_PARSER_H_
+
+#include <string>
+#include <vector>
+#include <deque>
+#include <unordered_map>
+#include "DotCFG.hpp"
+#include "Graph.hpp"
+
+namespace CudaParse {
+
+class CFGParser {
+ public:
+  CFGParser() : _block_parent(0) {}
+  void parse(const Graph &graph, std::vector<Function *> &functions);
+
+  ~CFGParser() {};
+
+ private:
+  void parse_inst_strings(const std::string &label, std::deque<std::string> &inst_strings);
+
+  size_t find_block_parent(size_t node);
+
+  void unite_blocks(size_t l, size_t r);
+
+ private:
+  std::vector<size_t> _block_parent;
+};
+
+}
+
+#endif
diff --git a/dyninst/cuda/CudaCFGFactory.cpp b/dyninst/cuda/CudaCFGFactory.cpp
index 27de5bc..2bceb10 100644
--- a/dyninst/cuda/CudaCFGFactory.cpp
+++ b/dyninst/cuda/CudaCFGFactory.cpp
@@ -1 +1,64 @@
 #include "CudaCFGFactory.hpp"
+
+namespace Dyninst {
+namespace ParseAPI {
+
+Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src, 
+  std::string name, CodeObject * obj, CodeRegion * region, 
+  Dyninst::InstructionSource * isrc) {
+  // find function by name
+  for (auto *function : _functions) {
+    if (function->name == name) {
+      Function *ret_func = new Function(addr, name, obj, region, isrc);
+      //ret_func->_cache_valid = true;
+      funcs_.add(*ret_func);
+
+      bool first_entry = true;
+      for (auto *block : function->blocks) {
+        Block *ret_block = NULL;
+        if (_block_filter.find(block->id) == _block_filter.end()) {
+          ret_block = new Block(obj, region, block->insts[0]->offset);
+          blocks_.add(*ret_block);
+        } else {
+          ret_block = _block_filter[block->id];
+        }
+
+        if (first_entry) {
+          ret_func->setEntryBlock(ret_block);
+          first_entry = false;
+        }
+
+        //ret_func->add_block(ret_block);
+
+        for (auto *target : block->targets) {
+          Block *ret_target_block = NULL;
+          if (_block_filter.find(target->block->id) == _block_filter.end()) {
+            ret_target_block = new Block(obj, region, target->block->insts[0]->offset);
+            blocks_.add(*ret_target_block);
+          } else {
+            ret_target_block = _block_filter[target->block->id];
+          }
+
+          Edge *ret_edge = NULL;
+          if (target->type == CudaParse::CALL) {
+            ret_edge = new Edge(ret_block, ret_target_block, CALL);
+            //ret_func->_call_edge_list.insert(ret_edge);
+          } else {  // TODO(Keren): Add more edge types
+            ret_edge = new Edge(ret_block, ret_target_block, DIRECT);
+          }
+          ret_edge->install();
+          edges_.add(*ret_edge);
+        }
+      }
+      return ret_func;
+    }
+  }
+  return NULL;
+  // iterate blocks
+  // add blocks
+  // iterate targets
+  // add edges
+}
+
+}
+}
diff --git a/dyninst/cuda/CudaCFGFactory.hpp b/dyninst/cuda/CudaCFGFactory.hpp
index 5bd8475..64b982a 100644
--- a/dyninst/cuda/CudaCFGFactory.hpp
+++ b/dyninst/cuda/CudaCFGFactory.hpp
@@ -1,26 +1,24 @@
 #include <CFGFactory.h>
+#include <unordered_map>
+
+#include "DotCFG.hpp"
 
 namespace Dyninst {
 namespace ParseAPI {
 
 class PARSER_EXPORT CudaCFGFactory : public CFGFactory {   
  public:
-    CudaCFGFactory() {};
-    virtual ~CudaCFGFactory();
+    CudaCFGFactory(std::vector<CudaParse::Function *> &functions) : _functions(functions) {};
+    ~CudaCFGFactory();
 
  protected:
     virtual Function * mkfunc(Address addr, FuncSource src, 
             std::string name, CodeObject * obj, CodeRegion * region, 
             Dyninst::InstructionSource * isrc);
-    virtual Block * mkblock(Function * f, CodeRegion * r, 
-            Address addr);
-    virtual Edge * mkedge(Block * src, Block * trg, 
-            EdgeTypeEnum type);
-    virtual Block * mksink(CodeObject *obj, CodeRegion *r);
 
-    virtual void free_func(Function * f);
-    virtual void free_block(Block * b);
-    virtual void free_edge(Edge * e);
+ private:
+    std::vector<CudaParse::Function *> &_functions;
+    std::unordered_map<size_t, Block *> _block_filter; 
 };
 
 }
diff --git a/dyninst/cuda/CudaCodeSource.cpp b/dyninst/cuda/CudaCodeSource.cpp
index 01e03ff..949247d 100644
--- a/dyninst/cuda/CudaCodeSource.cpp
+++ b/dyninst/cuda/CudaCodeSource.cpp
@@ -1 +1,14 @@
 #include "CudaCodeSource.hpp"
+
+namespace Dyninst {
+namespace ParseAPI {
+
+CudaCodeSource::CudaCodeSource(std::vector<CudaParse::Function *> &functions) {
+  for (auto *function : functions) {
+    int offset = function->blocks[0]->insts[0]->offset;
+    _hints.push_back(Hint(offset, 0, 0, function->name));
+  }
+}
+
+}
+}
diff --git a/dyninst/cuda/CudaCodeSource.hpp b/dyninst/cuda/CudaCodeSource.hpp
index 64a593e..ee959b5 100644
--- a/dyninst/cuda/CudaCodeSource.hpp
+++ b/dyninst/cuda/CudaCodeSource.hpp
@@ -1,68 +1,28 @@
+#include <dyn_regs.h>
 #include <CodeSource.h>
 
+#include "DotCFG.hpp"
+
 namespace Dyninst {
 namespace ParseAPI {
 
 class PARSER_EXPORT CudaCodeSource : public CodeSource {
  public:
-    CudaCodeSource() {};
-    virtual ~CudaCodeSource();
-
- public:
-    /** CodeSource Implementation **/
-    virtual bool nonReturning(Address /*func_entry*/);
-    virtual bool nonReturningSyscall(int /*number*/);
-    virtual Address baseAddress();
-    virtual Address loadAddress();
-    virtual Address getTOC(Address);
-
-    virtual void print_stats() const;
-    virtual bool have_stats() const;
-
-    virtual void incrementCounter(const std::string& /*name*/) const;
-    virtual void addCounter(const std::string& /*name*/, int /*num*/) const;
-    virtual void decrementCounter(const std::string& /*name*/) const;
-    virtual void startTimer(const std::string& /*name*/) const;
-    virtual void stopTimer(const std::string& /*name*/) const;
-    virtual bool findCatchBlockByTryRange(Address /*given try address*/, std::set<Address> & /* catch start */)  const;
-
-    /** InstructionSource implementation **/
-    bool isValidAddress(const Address) const;
-    void* getPtrToInstruction(const Address) const;
-    void* getPtrToData(const Address) const;
-    unsigned int getAddressWidth() const;
-    bool isCode(const Address) const;
-    bool isData(const Address) const;
-    bool isReadOnly(const Address) const;
-    Address offset() const;
-    Address length() const;
-    Architecture getArch() const;
-};
-
+    CudaCodeSource(std::vector<CudaParse::Function *> &functions);
+    ~CudaCodeSource() {};
 
-class PARSER_EXPORT CudaCodeRegion : public CodeRegion {
  public:
-    CudaCodeRegion();
-    ~CudaCodeRegion();
-
-    /** CodeRegion implementation **/
-    void names(Address, std::vector<std::string> &);
-    bool findCatchBlock(Address addr, Address & catchStart);
-
-    Address low() const;
-    Address high() const;
-
     /** InstructionSource implementation **/
-    bool isValidAddress(const Address) const;
-    void* getPtrToInstruction(const Address) const;
-    void* getPtrToData(const Address) const;
-    unsigned int getAddressWidth() const;
-    bool isCode(const Address) const;
-    bool isData(const Address) const;
-    bool isReadOnly(const Address) const;
-    Address offset() const;
-    Address length() const;
-    Architecture getArch() const;
+    virtual bool isValidAddress(const Address) const { return false; }
+    virtual void* getPtrToInstruction(const Address) const { return NULL; }
+    virtual void* getPtrToData(const Address) const { return NULL; }
+    virtual unsigned int getAddressWidth() const { return 0; }
+    virtual bool isCode(const Address) const { return false; }
+    virtual bool isData(const Address) const { return false; }
+    virtual bool isReadOnly(const Address) const { return false; }
+    virtual Address offset() const { return 0; }
+    virtual Address length() const { return 0; }
+    virtual Architecture getArch() const { return Arch_cuda; }
 };
 
 }
diff --git a/dyninst/cuda/DotCFG.hpp b/dyninst/cuda/DotCFG.hpp
new file mode 100644
index 0000000..c16cb63
--- /dev/null
+++ b/dyninst/cuda/DotCFG.hpp
@@ -0,0 +1,153 @@
+#ifndef _DOT_CFG_H_
+#define _DOT_CFG_H_
+
+#include <iostream>
+#include <regex>
+#include <string>
+#include <sstream>
+#include <unordered_set>
+#include <vector>
+
+namespace CudaParse {
+
+struct Inst {
+  int offset;
+  std::string opcode;
+  std::string port;
+  std::vector<std::string> operands;
+
+  Inst(std::string &inst_str) {
+    if (inst_str[0] == '/') {  // Dual issue
+      inst_str = inst_str.substr(2);
+      auto pos = inst_str.find("*/");
+      inst_str.replace(pos, 2, "");
+    }
+    std::istringstream iss(inst_str);
+    std::string s;
+    if (std::getline(iss, s, ':')) {
+      if (s.find("<") != std::string::npos) {
+        auto pos = s.find(">");
+        this->port = s.substr(1, pos - 1);
+        s = s.substr(pos + 1); 
+      }
+      std::stringstream ss;
+      ss << std::hex << s;
+      ss >> offset;
+      if (std::getline(iss, s, ':')) {
+        std::regex e("\\\\ ");
+        iss = std::istringstream(std::regex_replace(s, e, "\n"));
+        while (std::getline(iss, s)) {
+          if (s != "") {
+            if (opcode == "") {
+              opcode = s;
+            } else {
+              operands.push_back(s);
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
+
+struct Block;
+
+enum TargetType {
+  CALL = 0,
+  DIRECT = 1
+};
+
+struct Target {
+  Inst *inst;
+  Block *block;
+  TargetType type; 
+
+  Target(Inst *inst, Block *block) : inst(inst), block(block) {}
+
+  bool operator<(const Target &other) const {
+    return this->inst->offset < other.inst->offset;
+  }
+};
+
+
+struct Block {
+  std::vector<Inst *> insts;
+  std::vector<Target *> targets;
+  size_t id;
+  std::string name;
+
+  Block(size_t id, std::string &name) : id(id), name(name) {}
+
+  bool operator<(const Block &other) const {
+    if (this->insts.size() == 0) {
+      return true;
+    } else if (other.insts.size() == 0) {
+      return false;
+    } else {
+      return this->insts[0]->offset < other.insts[0]->offset;
+    }
+  }
+
+  ~Block() {
+    for (auto *inst : insts) {
+      delete inst;
+    }
+    for (auto *target: targets) {
+      delete target;
+    }
+  }
+};
+
+
+struct Function {
+  std::vector<Block *> blocks;
+  size_t id;
+  std::string name;
+
+  Function(size_t id, const std::string &name) : id(id), name(name) {}
+
+  ~Function() {
+    for (auto *block : blocks) {
+      delete block;
+    }
+  }
+};
+
+
+struct LoopEntry {
+  Block *entry_block; 
+  Block *back_edge_block;
+  Inst *back_edge_inst;
+
+  LoopEntry(Block *entry_block) : entry_block(entry_block) {}
+
+  LoopEntry(Block *entry_block, Block *back_edge_block, Inst *back_edge_inst) :
+    entry_block(entry_block), back_edge_block(back_edge_block), back_edge_inst(back_edge_inst) {}
+};
+
+
+struct Loop {
+  std::vector<LoopEntry *> entries;
+  std::unordered_set<Loop *> child_loops;
+  std::unordered_set<Block *> blocks;
+  std::unordered_set<Block *> child_blocks;
+  Function *function;
+
+  Loop(Function *function) : function(function) {}
+};
+
+
+struct Call {
+  Inst *inst;
+  Block *block; 
+  Function *caller_function;
+  Function *callee_function;
+
+  Call(Inst *inst, Block *block, Function *caller_function, Function *callee_function) :
+    inst(inst), block(block), caller_function(caller_function), callee_function(callee_function) {}
+};
+
+}
+
+#endif
diff --git a/dyninst/cuda/Graph.hpp b/dyninst/cuda/Graph.hpp
new file mode 100644
index 0000000..5300e36
--- /dev/null
+++ b/dyninst/cuda/Graph.hpp
@@ -0,0 +1,52 @@
+#ifndef _GRAPH_H_
+#define _GRAPH_H_
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace CudaParse {
+
+struct Vertex {
+  size_t id;
+  std::string name;
+  std::string label;
+
+  Vertex(size_t id, const std::string &name, const std::string &label) :
+    id(id), name(name), label(label) {}
+};
+
+
+struct Edge {
+  size_t source_id;
+  size_t target_id;
+  // At most two entries for port
+  std::vector<std::string> source_port;
+  std::vector<std::string> target_port;
+
+  Edge(size_t source_id, size_t target_id,
+    const std::vector<std::string> &source_port,
+    const std::vector<std::string> &target_port) :
+    source_id(source_id), target_id(target_id),
+    source_port(source_port), target_port(target_port) {}
+};
+
+
+struct Graph {
+  // vertex_id->vertex
+  std::vector<Vertex *> vertices;
+  std::vector<Edge *> edges;
+
+  ~Graph() {
+    for (auto *vertex : vertices) {
+      delete vertex;
+    }
+    for (auto *edge : edges) {
+      delete edge;
+    }
+  }
+};
+
+}
+
+#endif
diff --git a/dyninst/cuda/GraphReader.cpp b/dyninst/cuda/GraphReader.cpp
new file mode 100644
index 0000000..fcc6adb
--- /dev/null
+++ b/dyninst/cuda/GraphReader.cpp
@@ -0,0 +1,51 @@
+#include "GraphReader.hpp"
+#include <vector>
+
+namespace CudaParse {
+
+void GraphReader::read(Graph &graph) {
+  // Read dot graph
+  std::ifstream file(_file_name);
+  std::stringstream dotfile;
+
+  dotfile << file.rdbuf();
+  file.close();
+
+  boost::read_graphviz_detail::parser_result result;
+  boost::read_graphviz_detail::parse_graphviz_from_string(dotfile.str(), result, true);
+
+  std::unordered_map<std::string, size_t> vertex_name_to_id;
+  read_vertices(result, vertex_name_to_id, graph);  
+  read_edges(result, vertex_name_to_id, graph);
+}
+
+
+void GraphReader::read_vertices(
+  const boost::read_graphviz_detail::parser_result &result,
+  std::unordered_map<std::string, size_t> &vertex_name_to_id,
+  Graph &graph) {
+  size_t vertex_id = 0;
+  for (auto node : result.nodes) {
+    const std::string &vertex_name = node.first;
+    const std::string &vertex_label = (node.second)["label"];
+    graph.vertices.push_back(new Vertex(vertex_id, vertex_name, vertex_label));
+    vertex_name_to_id[vertex_name] = vertex_id;
+    ++vertex_id;
+  }
+}
+
+
+void GraphReader::read_edges(
+  const boost::read_graphviz_detail::parser_result &result,
+  std::unordered_map<std::string, size_t> &vertex_name_to_id,
+  Graph &graph) {
+  for (auto einfo : result.edges) {
+    size_t source_id = vertex_name_to_id[einfo.source.name];
+    size_t target_id = vertex_name_to_id[einfo.target.name];
+    std::vector<std::string> &source_port = einfo.source.location;
+    std::vector<std::string> &target_port = einfo.target.location;
+    graph.edges.push_back(new Edge(source_id, target_id, source_port, target_port));
+  }
+}
+
+}
diff --git a/dyninst/cuda/GraphReader.hpp b/dyninst/cuda/GraphReader.hpp
new file mode 100644
index 0000000..d6e286e
--- /dev/null
+++ b/dyninst/cuda/GraphReader.hpp
@@ -0,0 +1,35 @@
+#ifndef _GRAPH_READER_H_
+#define _GRAPH_READER_H_
+
+#include <unordered_map>
+#include <string>
+#include <boost/graph/graphviz.hpp>
+#include <boost/graph/detail/read_graphviz_new.hpp>
+#include "Graph.hpp"
+
+namespace CudaParse {
+
+class GraphReader {
+ public:
+  GraphReader(const std::string &file_name) : _file_name(file_name) {}
+
+  void read(Graph &graph);
+
+ private:
+  void read_vertices(
+    const boost::read_graphviz_detail::parser_result &result,
+    std::unordered_map<std::string, size_t> &vertex_name_to_id,
+    Graph &graph);
+
+  void read_edges(
+    const boost::read_graphviz_detail::parser_result &result,
+    std::unordered_map<std::string, size_t> &vertex_name_to_id,
+    Graph &graph);
+
+ private:
+  std::string _file_name;
+};
+
+}
+
+#endif
diff --git a/dyninst/cuda/Makefile b/dyninst/cuda/Makefile
new file mode 100644
index 0000000..a6e0d87
--- /dev/null
+++ b/dyninst/cuda/Makefile
@@ -0,0 +1,64 @@
+#
+#  Make.template
+#
+#  Copy this file to Makefile, fill in the paths and then run make.
+#  Assume we're using new elfutils with libdw and not libdwarf.
+#
+
+CXX = g++
+CXXFLAGS = -g -O -std=c++11
+
+DYNINST = /home/jokeren/Install/dyninst
+BOOST = /home/jokeren/Install/spack/linux-ubuntu18.04-x86_64/gcc-7.3.0/boost-1.65.1-yedailx37eqncxmqibk66elhc7pnmz3d
+ELFUTILS = /home/jokeren/Install/spack/linux-ubuntu18.04-x86_64/gcc-7.3.0/elfutils-0.170-5vs7ecnlnxnobkfyx2m4fb6tnhlsrs2g
+BZIP = /home/jokeren/Install/spack/linux-ubuntu18.04-x86_64/gcc-7.3.0/bzip2-1.0.6-jb5oxcpfhd4orv7dua5vukxtr6x45lit
+LZMA = /home/jokeren/Install/spack/linux-ubuntu18.04-x86_64/gcc-7.3.0/lzma-4.32.7-vm3tyb6brnwygs3g27csif2hfbtgf6lb
+ZLIB = /home/jokeren/Install/zlib
+
+#------------------------------------------------------------
+
+#  Hopefully, don't need to change anything below here.
+
+DEFS = -DDYNINST_USE_CUDA
+
+INCL =  \
+    -I$(DYNINST)/include  \
+    -I$(BOOST)/include  \
+    -I$(ELFUTILS)/include
+
+LIBS =  \
+    -L$(DYNINST)/lib  \
+    -lparseAPI  -linstructionAPI  -lsymtabAPI  -ldynDwarf  -ldynElf  -lcommon  \
+    -L$(BOOST)/lib  \
+    -lboost_atomic  -lboost_chrono  -lboost_date_time  -lboost_filesystem  \
+    -lboost_system  -lboost_thread  -lboost_graph\
+    -L$(ELFUTILS)/lib  -lelf  -ldw  \
+    -ldl
+
+RPATH =  \
+    -Wl,-rpath,$(DYNINST)/lib  \
+    -Wl,-rpath,$(BOOST)/lib  \
+    -Wl,-rpath,$(ELFUTILS)/lib  \
+    -Wl,-rpath,$(BZIP)/lib  \
+    -Wl,-rpath,$(LZMA)/lib  \
+    -Wl,-rpath,$(ZLIB)/lib
+
+
+PROG = cuda-parse
+
+OBJS = cuda-parse.o  ElfHelper.o  Fatbin.o  InputFile.o  RelocateCubin.o  CudaCodeSource.o  CudaCFGFactory.o  GraphReader.o  CFGParser.o
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+	$(CXX)  -o $@  $(OBJS)  $(LIBS)  $(RPATH)
+
+%.o: %.cpp
+	$(CXX)  -c -o $@  $(CXXFLAGS)  $(DEFS)  $(INCL)  $<
+
+clean:
+	rm -f  *.o
+
+vclean: clean
+	rm -f  $(PROG)
+
diff --git a/dyninst/cuda/cuda-parse.cpp b/dyninst/cuda/cuda-parse.cpp
index 3051734..961e728 100644
--- a/dyninst/cuda/cuda-parse.cpp
+++ b/dyninst/cuda/cuda-parse.cpp
@@ -99,6 +99,9 @@
 #include "Fatbin.hpp"
 #include "InputFile.hpp"
 #include "RelocateCubin.hpp"
+#include "CudaCodeSource.hpp"
+#include "GraphReader.hpp"
+#include "CFGParser.hpp"
 
 #define MAX_VMA  0xfffffffffffffff0
 
@@ -559,15 +562,42 @@ main(int argc, char **argv)
 	gettimeofday(&tv_symtab, NULL);
 	getrusage(RUSAGE_SELF, &ru_symtab);
 
-	SymtabCodeSource * code_src = NULL;
+	CodeSource * code_src = NULL;
 	CodeObject * code_obj = NULL;
 
 	if (cuda_file) {
-	    //
-	    // FIXME: write a replacement for CodeObject and parse()
-	    //
-	    cout << "\nskip cuda:  " << elf_name << endl;
-	    continue;
+      std::string relocated_cubin = filename + ".relocated";
+      int fd = open(relocated_cubin.c_str(), O_WRONLY);
+      if (write(fd, elf_addr, elf_len) != elf_len) {
+          cout << "Write " << relocated_cubin << " to disk failed" << endl; 
+          continue;
+      }
+
+      std::string relocated_dot = filename + ".dot";
+      std::string cmd = "nvdisasm -cfg -poff " + relocated_cubin + " > " + relocated_dot;
+      std::shared_ptr<FILE> pipe(popen(cmd.c_str(), "r"), pclose);
+      if (!pipe) {
+          cout << "Dump " << relocated_dot << " to disk failed" << endl; 
+          continue;
+      }
+
+      CudaParse::GraphReader graph_reader(relocated_dot);
+      CudaParse::Graph graph;
+      graph_reader.read(graph);
+      CudaParse::CFGParser cfg_parser;
+      std::vector<CudaParse::Function *> functions;
+      cfg_parser.parse(graph, functions);
+
+      code_src = new CudaCodeSource(functions); 
+      std::vector< Hint > hints = code_src->hints();
+      for (auto hint : hints) {
+          cout << hint._name << std::endl;
+      }
+      //code_obj = new CodeObject(code_src);
+      for (auto *function : functions) {
+        delete function;
+      }
+      continue;
 	}
 	else {
 	    code_src = new SymtabCodeSource(the_symtab);
@@ -617,8 +647,13 @@ main(int argc, char **argv)
 	    delete code_obj;
 	}
 	if (code_src != NULL) {
-	    delete code_src;
+      if (cuda_file) {
+	        delete (CudaCodeSource *)code_src;
+      } else {
+	        delete (SymtabCodeSource *)code_src;
+      }
 	}
+
 	Symtab::closeSymtab(the_symtab);
     }
 

From 5ee2f353d5db48453a9847886b1376250dd06e78 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Wed, 20 Jun 2018 09:27:59 -0700
Subject: [PATCH 03/10] Init first attempt to parse cubin loops

---
 dyninst/cuda/CFGParser.cpp      | 12 ++++++++--
 dyninst/cuda/CudaCFGFactory.cpp | 17 +++++++------
 dyninst/cuda/CudaCFGFactory.hpp | 19 +++++++++------
 dyninst/cuda/CudaCodeSource.hpp | 25 ++++++++++++++++++++
 dyninst/cuda/CudaFunction.cpp   | 13 ++++++++++
 dyninst/cuda/CudaFunction.hpp   | 23 ++++++++++++++++++
 dyninst/cuda/DotCFG.hpp         |  8 ++++---
 dyninst/cuda/Makefile           |  3 ++-
 dyninst/cuda/cuda-parse.cpp     | 42 +++++++++++++++++----------------
 9 files changed, 122 insertions(+), 40 deletions(-)
 create mode 100644 dyninst/cuda/CudaFunction.cpp
 create mode 100644 dyninst/cuda/CudaFunction.hpp

diff --git a/dyninst/cuda/CFGParser.cpp b/dyninst/cuda/CFGParser.cpp
index 9f60b50..fe1f09f 100644
--- a/dyninst/cuda/CFGParser.cpp
+++ b/dyninst/cuda/CFGParser.cpp
@@ -1,5 +1,6 @@
 #include "CFGParser.hpp"
 #include <cctype>
+#include <iostream>
 
 namespace CudaParse {
 
@@ -79,16 +80,23 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
     unite_blocks(edge->target_id, edge->source_id);
     Block *target_block = block_map[edge->target_id];
     Block *source_block = block_map[edge->source_id];
+    
+    TargetType type = DIRECT;
     // Link blocks
     Inst *target_inst;
     for (auto inst : source_block->insts) {
       if (inst->port == edge->source_port[0]) {
-        source_block->targets.push_back(new Target(inst, target_block));
+        if (inst == source_block->insts.back()) {
+          type = FALLTHROUGH;
+        }
+        source_block->targets.push_back(new Target(inst, target_block, type));
       }
     }
     // Some edge may not have port information
     if (source_block->targets.size() == 0) {
-      source_block->targets.push_back(new Target(source_block->insts.back(), target_block));
+      Inst *inst = source_block->insts.back();
+      type = FALLTHROUGH;
+      source_block->targets.push_back(new Target(inst, target_block, type));
     }
   }
 
diff --git a/dyninst/cuda/CudaCFGFactory.cpp b/dyninst/cuda/CudaCFGFactory.cpp
index 2bceb10..9722f76 100644
--- a/dyninst/cuda/CudaCFGFactory.cpp
+++ b/dyninst/cuda/CudaCFGFactory.cpp
@@ -1,4 +1,6 @@
 #include "CudaCFGFactory.hpp"
+#include "CudaFunction.hpp"
+#include <iostream>
 
 namespace Dyninst {
 namespace ParseAPI {
@@ -9,32 +11,32 @@ Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src,
   // find function by name
   for (auto *function : _functions) {
     if (function->name == name) {
-      Function *ret_func = new Function(addr, name, obj, region, isrc);
-      //ret_func->_cache_valid = true;
-      funcs_.add(*ret_func);
+      CudaFunction *ret_func = new CudaFunction(addr, name, obj, region, isrc);
 
       bool first_entry = true;
       for (auto *block : function->blocks) {
         Block *ret_block = NULL;
         if (_block_filter.find(block->id) == _block_filter.end()) {
           ret_block = new Block(obj, region, block->insts[0]->offset);
+          _block_filter[block->id] = ret_block;
           blocks_.add(*ret_block);
+          ret_func->add_block(ret_block);
         } else {
           ret_block = _block_filter[block->id];
         }
 
         if (first_entry) {
-          ret_func->setEntryBlock(ret_block);
+          ret_func->setEntry(ret_block);
           first_entry = false;
         }
 
-        //ret_func->add_block(ret_block);
-
         for (auto *target : block->targets) {
           Block *ret_target_block = NULL;
           if (_block_filter.find(target->block->id) == _block_filter.end()) {
             ret_target_block = new Block(obj, region, target->block->insts[0]->offset);
+            _block_filter[target->block->id] = ret_target_block;
             blocks_.add(*ret_target_block);
+            ret_func->add_block(ret_target_block);
           } else {
             ret_target_block = _block_filter[target->block->id];
           }
@@ -42,7 +44,8 @@ Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src,
           Edge *ret_edge = NULL;
           if (target->type == CudaParse::CALL) {
             ret_edge = new Edge(ret_block, ret_target_block, CALL);
-            //ret_func->_call_edge_list.insert(ret_edge);
+          } else if (target->type = CudaParse::FALLTHROUGH) { 
+            ret_edge = new Edge(ret_block, ret_target_block, FALLTHROUGH);
           } else {  // TODO(Keren): Add more edge types
             ret_edge = new Edge(ret_block, ret_target_block, DIRECT);
           }
diff --git a/dyninst/cuda/CudaCFGFactory.hpp b/dyninst/cuda/CudaCFGFactory.hpp
index 64b982a..6fd6ec8 100644
--- a/dyninst/cuda/CudaCFGFactory.hpp
+++ b/dyninst/cuda/CudaCFGFactory.hpp
@@ -1,3 +1,6 @@
+#ifndef _CUDA_CFG_FACTORY_H_
+#define _CUDA_CFG_FACTORY_H_
+
 #include <CFGFactory.h>
 #include <unordered_map>
 
@@ -8,18 +11,20 @@ namespace ParseAPI {
 
 class PARSER_EXPORT CudaCFGFactory : public CFGFactory {   
  public:
-    CudaCFGFactory(std::vector<CudaParse::Function *> &functions) : _functions(functions) {};
-    ~CudaCFGFactory();
+   CudaCFGFactory(std::vector<CudaParse::Function *> &functions) : _functions(functions) {};
+   virtual ~CudaCFGFactory() {};
 
  protected:
-    virtual Function * mkfunc(Address addr, FuncSource src, 
-            std::string name, CodeObject * obj, CodeRegion * region, 
-            Dyninst::InstructionSource * isrc);
+   virtual Function * mkfunc(Address addr, FuncSource src, 
+     std::string name, CodeObject * obj, CodeRegion * region,
+     Dyninst::InstructionSource * isrc);
 
  private:
-    std::vector<CudaParse::Function *> &_functions;
-    std::unordered_map<size_t, Block *> _block_filter; 
+   std::vector<CudaParse::Function *> &_functions;
+   std::unordered_map<size_t, Block *> _block_filter; 
 };
 
 }
 }
+
+#endif
diff --git a/dyninst/cuda/CudaCodeSource.hpp b/dyninst/cuda/CudaCodeSource.hpp
index ee959b5..01c9819 100644
--- a/dyninst/cuda/CudaCodeSource.hpp
+++ b/dyninst/cuda/CudaCodeSource.hpp
@@ -1,3 +1,6 @@
+#ifndef _CUDA_CODE_SOURCE_H_
+#define _CUDA_CODE_SOURCE_H_
+
 #include <dyn_regs.h>
 #include <CodeSource.h>
 
@@ -23,7 +26,29 @@ class PARSER_EXPORT CudaCodeSource : public CodeSource {
     virtual Address offset() const { return 0; }
     virtual Address length() const { return 0; }
     virtual Architecture getArch() const { return Arch_cuda; }
+
+    virtual bool nonReturning(Address /*func_entry*/) { return false; }                                                                                  
+		virtual bool nonReturningSyscall(int /*number*/) { return false; }
+
+		/* If the binary file type supplies per-function
+		 * TOC's (e.g. ppc64 Linux), override.
+		 */
+    virtual Address getTOC(Address) const { return _table_of_contents; }
+
+    // statistics accessor
+    virtual void print_stats() const { return; }                                                                                                         
+    virtual bool have_stats() const { return false; }
+
+    // manage statistics
+    virtual void incrementCounter(const std::string& /*name*/) const { return; } 
+    virtual void addCounter(const std::string& /*name*/, int /*num*/) const { return; }
+    virtual void decrementCounter(const std::string& /*name*/) const { return; }
+    virtual void startTimer(const std::string& /*name*/) const { return; } 
+    virtual void stopTimer(const std::string& /*name*/) const { return; }
+    virtual bool findCatchBlockByTryRange(Address /*given try address*/, std::set<Address> & /* catch start */)  const { return false; }
 };
 
 }
 }
+
+#endif
diff --git a/dyninst/cuda/CudaFunction.cpp b/dyninst/cuda/CudaFunction.cpp
new file mode 100644
index 0000000..1ee21f3
--- /dev/null
+++ b/dyninst/cuda/CudaFunction.cpp
@@ -0,0 +1,13 @@
+#include "CudaFunction.hpp"
+
+namespace Dyninst {
+namespace ParseAPI {
+
+void CudaFunction::setEntry(Block *entry) {
+  _region = entry->region();
+  _start = entry->start();
+  _entry = entry;
+}
+
+}
+}
diff --git a/dyninst/cuda/CudaFunction.hpp b/dyninst/cuda/CudaFunction.hpp
new file mode 100644
index 0000000..cb5cc59
--- /dev/null
+++ b/dyninst/cuda/CudaFunction.hpp
@@ -0,0 +1,23 @@
+#ifndef _CUDA_FUNCTION_H_
+#define _CUDA_FUNCTION_H_
+
+#include <CFG.h>
+
+namespace Dyninst {
+namespace ParseAPI {
+
+class PARSER_EXPORT CudaFunction : public Function {
+ public:
+   CudaFunction(Address addr, std::string name, CodeObject * obj, 
+     CodeRegion * region, InstructionSource * isource) :
+     Function(addr, name, obj, region, isource) {}
+
+   virtual ~CudaFunction() {}
+
+   void setEntry(Block *entry);
+};
+
+}
+}
+
+#endif
diff --git a/dyninst/cuda/DotCFG.hpp b/dyninst/cuda/DotCFG.hpp
index c16cb63..beb2f68 100644
--- a/dyninst/cuda/DotCFG.hpp
+++ b/dyninst/cuda/DotCFG.hpp
@@ -7,6 +7,7 @@
 #include <sstream>
 #include <unordered_set>
 #include <vector>
+#include <iostream>
 
 namespace CudaParse {
 
@@ -54,8 +55,9 @@ struct Inst {
 struct Block;
 
 enum TargetType {
-  CALL = 0,
-  DIRECT = 1
+  DIRECT = 0,
+  FALLTHROUGH = 1,
+  CALL = 2
 };
 
 struct Target {
@@ -63,7 +65,7 @@ struct Target {
   Block *block;
   TargetType type; 
 
-  Target(Inst *inst, Block *block) : inst(inst), block(block) {}
+  Target(Inst *inst, Block *block, TargetType type) : inst(inst), block(block), type(type) {}
 
   bool operator<(const Target &other) const {
     return this->inst->offset < other.inst->offset;
diff --git a/dyninst/cuda/Makefile b/dyninst/cuda/Makefile
index a6e0d87..aa371d7 100644
--- a/dyninst/cuda/Makefile
+++ b/dyninst/cuda/Makefile
@@ -46,7 +46,8 @@ RPATH =  \
 
 PROG = cuda-parse
 
-OBJS = cuda-parse.o  ElfHelper.o  Fatbin.o  InputFile.o  RelocateCubin.o  CudaCodeSource.o  CudaCFGFactory.o  GraphReader.o  CFGParser.o
+OBJS = cuda-parse.o  ElfHelper.o  Fatbin.o  InputFile.o  RelocateCubin.o  \
+			 CudaCFGFactory.o  CudaCodeSource.o  CudaFunction.o  GraphReader.o  CFGParser.o
 
 all: $(PROG)
 
diff --git a/dyninst/cuda/cuda-parse.cpp b/dyninst/cuda/cuda-parse.cpp
index 961e728..24206e0 100644
--- a/dyninst/cuda/cuda-parse.cpp
+++ b/dyninst/cuda/cuda-parse.cpp
@@ -99,6 +99,7 @@
 #include "Fatbin.hpp"
 #include "InputFile.hpp"
 #include "RelocateCubin.hpp"
+#include "CudaCFGFactory.hpp"
 #include "CudaCodeSource.hpp"
 #include "GraphReader.hpp"
 #include "CFGParser.hpp"
@@ -134,7 +135,7 @@ class Options {
 	verbose = false;
 	do_delete = true;
 	do_memory = true;
-	do_instns = true;
+	do_instns = false;
 	do_inline = true;
 	do_linemap = true;
     }
@@ -564,22 +565,24 @@ main(int argc, char **argv)
 
 	CodeSource * code_src = NULL;
 	CodeObject * code_obj = NULL;
+  CFGFactory * cfg_fact = NULL;
 
 	if (cuda_file) {
-      std::string relocated_cubin = filename + ".relocated";
-      int fd = open(relocated_cubin.c_str(), O_WRONLY);
-      if (write(fd, elf_addr, elf_len) != elf_len) {
-          cout << "Write " << relocated_cubin << " to disk failed" << endl; 
-          continue;
-      }
-
+      //std::string relocated_cubin = filename + ".relocated";
+      //int fd = open(relocated_cubin.c_str(), O_CREAT|O_WRONLY|O_TRUNC, S_IRWXU);
+      //if (write(fd, elf_addr, elf_len) != elf_len) {
+      //    cout << "Write " << relocated_cubin << " to disk failed" << endl; 
+      //    continue;
+      //}
+      //close(fd);
       std::string relocated_dot = filename + ".dot";
-      std::string cmd = "nvdisasm -cfg -poff " + relocated_cubin + " > " + relocated_dot;
-      std::shared_ptr<FILE> pipe(popen(cmd.c_str(), "r"), pclose);
-      if (!pipe) {
+      std::string cmd = "nvdisasm -cfg -poff " + filename + " > " + relocated_dot;
+      FILE *output = popen(cmd.c_str(), "r");
+      if (!output) {
           cout << "Dump " << relocated_dot << " to disk failed" << endl; 
           continue;
       }
+      pclose(output);
 
       CudaParse::GraphReader graph_reader(relocated_dot);
       CudaParse::Graph graph;
@@ -588,16 +591,12 @@ main(int argc, char **argv)
       std::vector<CudaParse::Function *> functions;
       cfg_parser.parse(graph, functions);
 
+      cfg_fact = new CudaCFGFactory(functions);
       code_src = new CudaCodeSource(functions); 
-      std::vector< Hint > hints = code_src->hints();
-      for (auto hint : hints) {
-          cout << hint._name << std::endl;
-      }
-      //code_obj = new CodeObject(code_src);
+      code_obj = new CodeObject(code_src, cfg_fact);
       for (auto *function : functions) {
         delete function;
       }
-      continue;
 	}
 	else {
 	    code_src = new SymtabCodeSource(the_symtab);
@@ -643,14 +642,17 @@ main(int argc, char **argv)
 	printTime("parse: ", &tv_symtab, &tv_parse, &ru_symtab, &ru_parse);
 	printTime("total: ", &tv_init, &tv_fini, &ru_init, &ru_fini);
 
+  if (cfg_fact != NULL) {
+      delete cfg_fact;
+  }
 	if (code_obj != NULL) {
 	    delete code_obj;
 	}
 	if (code_src != NULL) {
-      if (cuda_file) {
-	        delete (CudaCodeSource *)code_src;
+      if (cuda_file == true) {
+          delete (CudaCodeSource *)code_src;
       } else {
-	        delete (SymtabCodeSource *)code_src;
+          delete (SymtabCodeSource *)code_src;
       }
 	}
 

From 2bbf9af372f01a2d8ba3c04e9ed83c518416cb68 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Wed, 20 Jun 2018 18:25:15 -0700
Subject: [PATCH 04/10] Add a dummy parse in cuda-parse

---
 dyninst/cuda/cuda-parse.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/dyninst/cuda/cuda-parse.cpp b/dyninst/cuda/cuda-parse.cpp
index 24206e0..a9326a6 100644
--- a/dyninst/cuda/cuda-parse.cpp
+++ b/dyninst/cuda/cuda-parse.cpp
@@ -568,13 +568,13 @@ main(int argc, char **argv)
   CFGFactory * cfg_fact = NULL;
 
 	if (cuda_file) {
-      //std::string relocated_cubin = filename + ".relocated";
-      //int fd = open(relocated_cubin.c_str(), O_CREAT|O_WRONLY|O_TRUNC, S_IRWXU);
-      //if (write(fd, elf_addr, elf_len) != elf_len) {
-      //    cout << "Write " << relocated_cubin << " to disk failed" << endl; 
-      //    continue;
-      //}
-      //close(fd);
+      std::string relocated_cubin = filename + ".relocated";
+      int fd = open(relocated_cubin.c_str(), O_CREAT|O_WRONLY|O_TRUNC, S_IRWXU);
+      if (write(fd, elf_addr, elf_len) != elf_len) {
+          cout << "Write " << relocated_cubin << " to disk failed" << endl; 
+          continue;
+      }
+      close(fd);
       std::string relocated_dot = filename + ".dot";
       std::string cmd = "nvdisasm -cfg -poff " + filename + " > " + relocated_dot;
       FILE *output = popen(cmd.c_str(), "r");
@@ -594,6 +594,7 @@ main(int argc, char **argv)
       cfg_fact = new CudaCFGFactory(functions);
       code_src = new CudaCodeSource(functions); 
       code_obj = new CodeObject(code_src, cfg_fact);
+      code_obj->parse();
       for (auto *function : functions) {
         delete function;
       }

From 69d7025d7ee8e97c949bd474e66e9ab830025228 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Thu, 21 Jun 2018 01:34:30 -0700
Subject: [PATCH 05/10] Add call edges

---
 dyninst/cuda/CFGParser.cpp | 40 ++++++++++++++++++++++++++++----------
 dyninst/cuda/CFGParser.hpp |  2 ++
 dyninst/cuda/DotCFG.hpp    | 39 ++++++++++++++++++++++++++++---------
 3 files changed, 62 insertions(+), 19 deletions(-)

diff --git a/dyninst/cuda/CFGParser.cpp b/dyninst/cuda/CFGParser.cpp
index fe1f09f..5d503ee 100644
--- a/dyninst/cuda/CFGParser.cpp
+++ b/dyninst/cuda/CFGParser.cpp
@@ -23,6 +23,29 @@ void CFGParser::parse_inst_strings(
 }
 
 
+void CFGParser::parse_calls(std::vector<Function *> &functions) {
+  for (auto function : functions) {
+    for (auto block : function->blocks) {
+      for (auto inst : block->insts) {
+        if (inst->opcode.find("CALL") != std::string::npos || // sm_70
+          inst->opcode.find("CAL") != std::string::npos) { // sm_60
+          std::string &operand = inst->operands[0];
+          std::string callee = operand.substr(2, operand.size() - 4);
+          Function *callee_function;
+          for (auto ff : functions) {
+            if (ff->name == callee) {
+              callee_function = ff;
+              break;
+            }
+          }
+          block->targets.push_back(new Target(inst, callee_function->blocks[0], CALL));
+        }
+      }
+    }
+  }
+}
+
+
 size_t CFGParser::find_block_parent(size_t node) {
   size_t parent = _block_parent[node];
   size_t graph_size = _block_parent.size();
@@ -86,7 +109,11 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
     Inst *target_inst;
     for (auto inst : source_block->insts) {
       if (inst->port == edge->source_port[0]) {
-        if (inst == source_block->insts.back()) {
+        if (inst->predicate.find("!@") != std::string::npos) {
+          type = COND_TAKEN;
+        } else if (inst->predicate.find("@") != std::string::npos) {
+          type = COND_NOT_TAKEN;
+        } else if (inst == source_block->insts.back()) {
           type = FALLTHROUGH;
         }
         source_block->targets.push_back(new Target(inst, target_block, type));
@@ -100,15 +127,6 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
     }
   }
 
-  //for (auto block : blocks) {
-  //  std::cout << "From: " << std::endl;
-  //  std::cout << block->name << std::endl;
-  //  std::cout << "Target: " << std::endl;
-  //  for (auto target : block->targets) {
-  //    std::cout << target->block->name << std::endl;
-  //  }
-  //}
-
   // Build functions
   size_t function_id = 0;
   for (auto block : blocks) {
@@ -130,6 +148,8 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
       functions.push_back(function);
     }
   }
+
+  parse_calls(functions);
 }
 
 }
diff --git a/dyninst/cuda/CFGParser.hpp b/dyninst/cuda/CFGParser.hpp
index 72157df..6f68ca4 100644
--- a/dyninst/cuda/CFGParser.hpp
+++ b/dyninst/cuda/CFGParser.hpp
@@ -18,6 +18,8 @@ class CFGParser {
   ~CFGParser() {};
 
  private:
+  void parse_calls(std::vector<Function *> &functions);
+
   void parse_inst_strings(const std::string &label, std::deque<std::string> &inst_strings);
 
   size_t find_block_parent(size_t node);
diff --git a/dyninst/cuda/DotCFG.hpp b/dyninst/cuda/DotCFG.hpp
index beb2f68..d0f216f 100644
--- a/dyninst/cuda/DotCFG.hpp
+++ b/dyninst/cuda/DotCFG.hpp
@@ -1,27 +1,35 @@
 #ifndef _DOT_CFG_H_
 #define _DOT_CFG_H_
 
+#include <algorithm>
 #include <iostream>
 #include <regex>
-#include <string>
 #include <sstream>
+#include <string>
 #include <unordered_set>
 #include <vector>
-#include <iostream>
 
 namespace CudaParse {
 
 struct Inst {
   int offset;
+  bool dual;
+  std::string predicate;
   std::string opcode;
   std::string port;
   std::vector<std::string> operands;
 
-  Inst(std::string &inst_str) {
-    if (inst_str[0] == '/') {  // Dual issue
+  Inst(std::string &inst_str) : offset(0), dual(false) {
+    if (inst_str.find("{") != std::string::npos) {  // Dual first
+      auto pos = inst_str.find("{");
+      inst_str.replace(pos, 1, " ");
+      dual = true;
+    }
+    if (inst_str.find("}") != std::string::npos) {  // Dual second
       inst_str = inst_str.substr(2);
       auto pos = inst_str.find("*/");
-      inst_str.replace(pos, 2, "");
+      inst_str.replace(pos, 2, ":");
+      dual = true;
     }
     std::istringstream iss(inst_str);
     std::string s;
@@ -35,12 +43,23 @@ struct Inst {
       ss << std::hex << s;
       ss >> offset;
       if (std::getline(iss, s, ':')) {
+        s.erase(std::remove(s.begin(), s.end(), '{'), s.end());
+        s.erase(std::remove(s.begin(), s.end(), '}'), s.end());
+        s.erase(std::remove(s.begin(), s.end(), ';'), s.end());
+        s.erase(std::remove(s.begin(), s.end(), ','), s.end());
+        s.erase(std::remove(s.begin(), s.end(), '('), s.end());
+        s.erase(std::remove(s.begin(), s.end(), ')'), s.end());
+        s.erase(std::remove(s.begin(), s.end(), '`'), s.end());
         std::regex e("\\\\ ");
         iss = std::istringstream(std::regex_replace(s, e, "\n"));
         while (std::getline(iss, s)) {
           if (s != "") {
             if (opcode == "") {
-              opcode = s;
+              if (s.find("@") != std::string::npos) {
+                predicate = s;
+              } else {
+                opcode = s;
+              }
             } else {
               operands.push_back(s);
             }
@@ -55,9 +74,11 @@ struct Inst {
 struct Block;
 
 enum TargetType {
-  DIRECT = 0,
-  FALLTHROUGH = 1,
-  CALL = 2
+  CALL = 0,
+  COND_TAKEN,
+  COND_NOT_TAKEN,
+  FALLTHROUGH,
+  DIRECT
 };
 
 struct Target {

From 685e45d8d6ca061de2664bf5fad87ad72b061bac Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Thu, 21 Jun 2018 07:45:04 -0700
Subject: [PATCH 06/10] Read line-mapping info

---
 dyninst/cuda/CudaCFGFactory.cpp | 18 ++++++++++----
 dyninst/cuda/CudaCFGFactory.hpp |  3 ++-
 dyninst/cuda/Makefile           |  3 ++-
 dyninst/cuda/cuda-parse.cpp     | 42 ++++++++++++++++++++++++++-------
 4 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/dyninst/cuda/CudaCFGFactory.cpp b/dyninst/cuda/CudaCFGFactory.cpp
index 9722f76..8c532da 100644
--- a/dyninst/cuda/CudaCFGFactory.cpp
+++ b/dyninst/cuda/CudaCFGFactory.cpp
@@ -15,9 +15,14 @@ Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src,
 
       bool first_entry = true;
       for (auto *block : function->blocks) {
-        Block *ret_block = NULL;
+        CudaBlock *ret_block = NULL;
         if (_block_filter.find(block->id) == _block_filter.end()) {
-          ret_block = new Block(obj, region, block->insts[0]->offset);
+          ret_block = new CudaBlock(obj, region, block->insts[0]->offset);
+          std::vector<Offset> inst_offsets;
+          for (auto *inst : block->insts) {
+            inst_offsets.push_back(inst->offset);
+          }
+          ret_block->set_inst_offsets(inst_offsets);
           _block_filter[block->id] = ret_block;
           blocks_.add(*ret_block);
           ret_func->add_block(ret_block);
@@ -31,9 +36,14 @@ Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src,
         }
 
         for (auto *target : block->targets) {
-          Block *ret_target_block = NULL;
+          CudaBlock *ret_target_block = NULL;
           if (_block_filter.find(target->block->id) == _block_filter.end()) {
-            ret_target_block = new Block(obj, region, target->block->insts[0]->offset);
+            ret_target_block = new CudaBlock(obj, region, target->block->insts[0]->offset);
+            std::vector<Offset> inst_offsets;
+            for (auto *inst : target->block->insts) {
+              inst_offsets.push_back(inst->offset);
+            }
+            ret_target_block->set_inst_offsets(inst_offsets);
             _block_filter[target->block->id] = ret_target_block;
             blocks_.add(*ret_target_block);
             ret_func->add_block(ret_target_block);
diff --git a/dyninst/cuda/CudaCFGFactory.hpp b/dyninst/cuda/CudaCFGFactory.hpp
index 6fd6ec8..2919abb 100644
--- a/dyninst/cuda/CudaCFGFactory.hpp
+++ b/dyninst/cuda/CudaCFGFactory.hpp
@@ -4,6 +4,7 @@
 #include <CFGFactory.h>
 #include <unordered_map>
 
+#include "CudaBlock.hpp"
 #include "DotCFG.hpp"
 
 namespace Dyninst {
@@ -21,7 +22,7 @@ class PARSER_EXPORT CudaCFGFactory : public CFGFactory {
 
  private:
    std::vector<CudaParse::Function *> &_functions;
-   std::unordered_map<size_t, Block *> _block_filter; 
+   std::unordered_map<size_t, CudaBlock *> _block_filter; 
 };
 
 }
diff --git a/dyninst/cuda/Makefile b/dyninst/cuda/Makefile
index aa371d7..dab7c22 100644
--- a/dyninst/cuda/Makefile
+++ b/dyninst/cuda/Makefile
@@ -47,7 +47,8 @@ RPATH =  \
 PROG = cuda-parse
 
 OBJS = cuda-parse.o  ElfHelper.o  Fatbin.o  InputFile.o  RelocateCubin.o  \
-			 CudaCFGFactory.o  CudaCodeSource.o  CudaFunction.o  GraphReader.o  CFGParser.o
+			 CudaCFGFactory.o  CudaCodeSource.o  CudaFunction.o  CudaBlock.o \
+			 GraphReader.o  CFGParser.o
 
 all: $(PROG)
 
diff --git a/dyninst/cuda/cuda-parse.cpp b/dyninst/cuda/cuda-parse.cpp
index a9326a6..d0cd01e 100644
--- a/dyninst/cuda/cuda-parse.cpp
+++ b/dyninst/cuda/cuda-parse.cpp
@@ -100,6 +100,8 @@
 #include "InputFile.hpp"
 #include "RelocateCubin.hpp"
 #include "CudaCFGFactory.hpp"
+#include "CudaFunction.hpp"
+#include "CudaBlock.hpp"
 #include "CudaCodeSource.hpp"
 #include "GraphReader.hpp"
 #include "CFGParser.hpp"
@@ -135,7 +137,7 @@ class Options {
 	verbose = false;
 	do_delete = true;
 	do_memory = true;
-	do_instns = false;
+	do_instns = true;
 	do_inline = true;
 	do_linemap = true;
     }
@@ -203,6 +205,7 @@ doInstruction(Offset addr, FuncInfo & finfo)
 
 	if (! svec.empty()) {
 	    int line = svec[0]->getLine();
+      std::cout << line << std::endl;
 
 	    // line = 0 means unknown
 	    if (line > 0) {
@@ -255,13 +258,20 @@ doBlock(Block * block, BlockSet & visited, FuncInfo & finfo)
 
     // split basic block into instructions (optional)
     if (opts.do_instns) {
- 	Dyninst::ParseAPI::Block::Insns imap;
-	block->getInsns(imap);
-
-	for (auto iit = imap.begin(); iit != imap.end(); ++iit) {
-	    Offset addr = iit->first;
-	    doInstruction(addr, finfo);
-	}
+ 	//Dyninst::ParseAPI::Block::Insns imap;
+	//block->getInsns(imap);
+
+	//for (auto iit = imap.begin(); iit != imap.end(); ++iit) {
+	//    Offset addr = iit->first;
+	//    doInstruction(addr, finfo);
+	//}
+      std::vector<Offset> offsets = ((CudaBlock *)block)->get_inst_offsets();
+
+      for (auto it = offsets.begin(); it != offsets.end(); ++it) {
+        Offset addr = *it;
+        std::cout << "Line mapping: " << addr << "->";
+        doInstruction(addr, finfo);
+      }
     }
 }
 
@@ -584,6 +594,7 @@ main(int argc, char **argv)
       }
       pclose(output);
 
+      // parse dot cfg
       CudaParse::GraphReader graph_reader(relocated_dot);
       CudaParse::Graph graph;
       graph_reader.read(graph);
@@ -591,6 +602,21 @@ main(int argc, char **argv)
       std::vector<CudaParse::Function *> functions;
       cfg_parser.parse(graph, functions);
 
+      // relocate instructions
+      std::vector<Symbol *> symbols;
+      the_symtab->getAllSymbols(symbols);
+      for (auto *symbol : symbols) {
+        for (auto *function : functions) {
+          if (function->name == symbol->getMangledName()) {
+            for (auto *block : function->blocks) {
+              for (auto *inst : block->insts) {
+                inst->offset += symbol->getOffset();
+              }
+            }
+          }
+        }
+      }
+
       cfg_fact = new CudaCFGFactory(functions);
       code_src = new CudaCodeSource(functions); 
       code_obj = new CodeObject(code_src, cfg_fact);

From 0fc18dc492913fd89c819e0cf44d9da52e13ad10 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Thu, 21 Jun 2018 07:56:32 -0700
Subject: [PATCH 07/10] Fix function calls

---
 dyninst/cuda/CFGParser.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dyninst/cuda/CFGParser.cpp b/dyninst/cuda/CFGParser.cpp
index 5d503ee..2568512 100644
--- a/dyninst/cuda/CFGParser.cpp
+++ b/dyninst/cuda/CFGParser.cpp
@@ -30,10 +30,9 @@ void CFGParser::parse_calls(std::vector<Function *> &functions) {
         if (inst->opcode.find("CALL") != std::string::npos || // sm_70
           inst->opcode.find("CAL") != std::string::npos) { // sm_60
           std::string &operand = inst->operands[0];
-          std::string callee = operand.substr(2, operand.size() - 4);
           Function *callee_function;
           for (auto ff : functions) {
-            if (ff->name == callee) {
+            if (ff->name == operand) {
               callee_function = ff;
               break;
             }

From df7a355dcea984b3ce8a8743d314405936044364 Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Thu, 21 Jun 2018 17:19:24 -0700
Subject: [PATCH 08/10] Keep block::getInsns interface

---
 dyninst/cuda/CudaCFGFactory.cpp |  6 ++----
 dyninst/cuda/cuda-parse.cpp     | 20 +++++++-------------
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/dyninst/cuda/CudaCFGFactory.cpp b/dyninst/cuda/CudaCFGFactory.cpp
index 8c532da..31e8b3c 100644
--- a/dyninst/cuda/CudaCFGFactory.cpp
+++ b/dyninst/cuda/CudaCFGFactory.cpp
@@ -17,12 +17,11 @@ Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src,
       for (auto *block : function->blocks) {
         CudaBlock *ret_block = NULL;
         if (_block_filter.find(block->id) == _block_filter.end()) {
-          ret_block = new CudaBlock(obj, region, block->insts[0]->offset);
           std::vector<Offset> inst_offsets;
           for (auto *inst : block->insts) {
             inst_offsets.push_back(inst->offset);
           }
-          ret_block->set_inst_offsets(inst_offsets);
+          ret_block = new CudaBlock(obj, region, block->insts[0]->offset, inst_offsets);
           _block_filter[block->id] = ret_block;
           blocks_.add(*ret_block);
           ret_func->add_block(ret_block);
@@ -38,12 +37,11 @@ Function *CudaCFGFactory::mkfunc(Address addr, FuncSource src,
         for (auto *target : block->targets) {
           CudaBlock *ret_target_block = NULL;
           if (_block_filter.find(target->block->id) == _block_filter.end()) {
-            ret_target_block = new CudaBlock(obj, region, target->block->insts[0]->offset);
             std::vector<Offset> inst_offsets;
             for (auto *inst : target->block->insts) {
               inst_offsets.push_back(inst->offset);
             }
-            ret_target_block->set_inst_offsets(inst_offsets);
+            ret_target_block = new CudaBlock(obj, region, target->block->insts[0]->offset, inst_offsets);
             _block_filter[target->block->id] = ret_target_block;
             blocks_.add(*ret_target_block);
             ret_func->add_block(ret_target_block);
diff --git a/dyninst/cuda/cuda-parse.cpp b/dyninst/cuda/cuda-parse.cpp
index d0cd01e..95e6e86 100644
--- a/dyninst/cuda/cuda-parse.cpp
+++ b/dyninst/cuda/cuda-parse.cpp
@@ -258,19 +258,13 @@ doBlock(Block * block, BlockSet & visited, FuncInfo & finfo)
 
     // split basic block into instructions (optional)
     if (opts.do_instns) {
- 	//Dyninst::ParseAPI::Block::Insns imap;
-	//block->getInsns(imap);
-
-	//for (auto iit = imap.begin(); iit != imap.end(); ++iit) {
-	//    Offset addr = iit->first;
-	//    doInstruction(addr, finfo);
-	//}
-      std::vector<Offset> offsets = ((CudaBlock *)block)->get_inst_offsets();
-
-      for (auto it = offsets.begin(); it != offsets.end(); ++it) {
-        Offset addr = *it;
-        std::cout << "Line mapping: " << addr << "->";
-        doInstruction(addr, finfo);
+      Dyninst::ParseAPI::Block::Insns imap;
+      block->getInsns(imap);
+
+      for (auto iit = imap.begin(); iit != imap.end(); ++iit) {
+          Offset addr = iit->first;
+          std::cout << "Line mapping: " << addr << "->";
+          doInstruction(addr, finfo);
       }
     }
 }

From 3fe2bf60fc4c531059599d69e7f45219363379dc Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Thu, 21 Jun 2018 18:03:24 -0700
Subject: [PATCH 09/10] Add cubin symbols

---
 dyninst/cuda/RelocateCubin.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/dyninst/cuda/RelocateCubin.cpp b/dyninst/cuda/RelocateCubin.cpp
index 5d4cb53..26a82ff 100644
--- a/dyninst/cuda/RelocateCubin.cpp
+++ b/dyninst/cuda/RelocateCubin.cpp
@@ -102,6 +102,11 @@
 //---------------------------------------------------------
 #define R_NV_32                 0x01
 #define R_NV_64                 0x02
+#define R_NV_G32                0x03
+#define R_NV_G64                0x04
+
+#define RELOC_32(x) (x == R_NV_32 || x == R_NV_G32)
+#define RELOC_64(x) (x == R_NV_64 || x == R_NV_G64)
 
 
 
@@ -157,10 +162,10 @@ binding_name
 static void
 applyRelocation(void *addr, unsigned rel_type, uint64_t rel_value)
 {
-  if (rel_type == R_NV_64) {
+  if (RELOC_64(rel_type)) {
     uint64_t *addr64 = (uint64_t *) addr;
     *addr64 = rel_value;
-  } else if (rel_type == R_NV_32) {
+  } else if (RELOC_32(rel_type)) {
     uint32_t *addr32 = (uint32_t *) addr;
     *addr32 = rel_value;
   } else {

From 2c6ddf18cc758743cd9fe953829c4ccc914d58fc Mon Sep 17 00:00:00 2001
From: Jokeren <robinho364@gmail.com>
Date: Thu, 28 Jun 2018 22:03:15 -0700
Subject: [PATCH 10/10] Add support for optimized cubins

---
 dyninst/cuda/CFGParser.cpp           |   20 +-
 dyninst/cuda/CudaBlock.cpp           |   22 +
 dyninst/cuda/CudaBlock.hpp           |   24 +
 dyninst/cuda/Line.hpp                |  164 ++++
 dyninst/cuda/LineInfoDecoder.hpp     |  123 +++
 dyninst/cuda/LineInfoDecoderDump.hpp |   69 ++
 dyninst/cuda/LineMapping.cpp         |  104 +++
 dyninst/cuda/LineMapping.hpp         |   51 ++
 dyninst/cuda/Makefile                |    5 +-
 dyninst/cuda/ReadCubinLineMap.cpp    | 1121 ++++++++++++++++++++++++++
 dyninst/cuda/ReadCubinLineMap.hpp    |   76 ++
 dyninst/cuda/cuda-parse.cpp          |  123 ++-
 12 files changed, 1853 insertions(+), 49 deletions(-)
 create mode 100644 dyninst/cuda/CudaBlock.cpp
 create mode 100644 dyninst/cuda/CudaBlock.hpp
 create mode 100644 dyninst/cuda/Line.hpp
 create mode 100644 dyninst/cuda/LineInfoDecoder.hpp
 create mode 100644 dyninst/cuda/LineInfoDecoderDump.hpp
 create mode 100644 dyninst/cuda/LineMapping.cpp
 create mode 100644 dyninst/cuda/LineMapping.hpp
 create mode 100644 dyninst/cuda/ReadCubinLineMap.cpp
 create mode 100644 dyninst/cuda/ReadCubinLineMap.hpp

diff --git a/dyninst/cuda/CFGParser.cpp b/dyninst/cuda/CFGParser.cpp
index 2568512..b9f65e1 100644
--- a/dyninst/cuda/CFGParser.cpp
+++ b/dyninst/cuda/CFGParser.cpp
@@ -24,14 +24,14 @@ void CFGParser::parse_inst_strings(
 
 
 void CFGParser::parse_calls(std::vector<Function *> &functions) {
-  for (auto function : functions) {
-    for (auto block : function->blocks) {
-      for (auto inst : block->insts) {
+  for (auto *function : functions) {
+    for (auto *block : function->blocks) {
+      for (auto *inst : block->insts) {
         if (inst->opcode.find("CALL") != std::string::npos || // sm_70
           inst->opcode.find("CAL") != std::string::npos) { // sm_60
           std::string &operand = inst->operands[0];
           Function *callee_function;
-          for (auto ff : functions) {
+          for (auto *ff : functions) {
             if (ff->name == operand) {
               callee_function = ff;
               break;
@@ -83,12 +83,12 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
   }
 
   // Parse every vertex to build blocks
-  for (auto vertex : graph.vertices) {
+  for (auto *vertex : graph.vertices) {
     Block *block = new Block(vertex->id, vertex->name);
 
     std::deque<std::string> inst_strings;
     parse_inst_strings(vertex->label, inst_strings);
-    for (auto inst_string : inst_strings) {
+    for (auto &inst_string : inst_strings) {
       block->insts.push_back(new Inst(inst_string));
     }
 
@@ -97,7 +97,7 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
   }
 
   // Parse every edge to build block relations
-  for (auto edge : graph.edges) {
+  for (auto *edge : graph.edges) {
     // Find toppest block
     unite_blocks(edge->target_id, edge->source_id);
     Block *target_block = block_map[edge->target_id];
@@ -106,7 +106,7 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
     TargetType type = DIRECT;
     // Link blocks
     Inst *target_inst;
-    for (auto inst : source_block->insts) {
+    for (auto *inst : source_block->insts) {
       if (inst->port == edge->source_port[0]) {
         if (inst->predicate.find("!@") != std::string::npos) {
           type = COND_TAKEN;
@@ -128,7 +128,7 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
 
   // Build functions
   size_t function_id = 0;
-  for (auto block : blocks) {
+  for (auto *block : blocks) {
     // Sort block targets according to inst offset
     std::sort(block->targets.begin(), block->targets.end(), compare_target_ptr);
     if (find_block_parent(block->id) == block->id) {
@@ -138,7 +138,7 @@ void CFGParser::parse(const Graph &graph, std::vector<Function *> &functions) {
       }
       Function *function = new Function(function_id, block_map[block->id]->name);
       ++function_id;
-      for (auto bb : blocks) {
+      for (auto *bb : blocks) {
         if (find_block_parent(bb->id) == block->id) {
           function->blocks.push_back(bb);
         }
diff --git a/dyninst/cuda/CudaBlock.cpp b/dyninst/cuda/CudaBlock.cpp
new file mode 100644
index 0000000..2918bc8
--- /dev/null
+++ b/dyninst/cuda/CudaBlock.cpp
@@ -0,0 +1,22 @@
+#include "CudaBlock.hpp"
+
+
+namespace Dyninst {
+namespace ParseAPI {
+
+CudaBlock::CudaBlock(CodeObject * o, CodeRegion * r,
+  Address start, std::vector<Offset> &offsets) : Block(o, r, start) {
+  for (auto offset : offsets) {
+    _inst_offsets.push_back(offset);
+  }
+}
+
+void CudaBlock::getInsns(Insns &insns) const {
+  for (auto offset : _inst_offsets) {
+    insns.insert(std::pair<long unsigned int, 
+      boost::shared_ptr<Dyninst::InstructionAPI::Instruction>>(offset, NULL));
+  }
+}
+
+}
+}
diff --git a/dyninst/cuda/CudaBlock.hpp b/dyninst/cuda/CudaBlock.hpp
new file mode 100644
index 0000000..6ed0e9c
--- /dev/null
+++ b/dyninst/cuda/CudaBlock.hpp
@@ -0,0 +1,24 @@
+#ifndef _CUDA_BLOCK_H_
+#define _CUDA_BLOCK_H_
+
+#include <CFG.h>
+
+namespace Dyninst {
+namespace ParseAPI {
+
+class PARSER_EXPORT CudaBlock : public Block {
+ public:
+   CudaBlock(CodeObject * o, CodeRegion * r, Address start, std::vector<Offset> &offsets);
+
+   virtual ~CudaBlock() {}
+
+   virtual void getInsns(Insns &insns) const;
+
+ private:
+   std::vector<Offset> _inst_offsets;
+};
+
+}
+}
+
+#endif
diff --git a/dyninst/cuda/Line.hpp b/dyninst/cuda/Line.hpp
new file mode 100644
index 0000000..3c391ee
--- /dev/null
+++ b/dyninst/cuda/Line.hpp
@@ -0,0 +1,164 @@
+// * BeginRiceCopyright *****************************************************
+//
+// $HeadURL$
+// $Id$
+//
+// --------------------------------------------------------------------------
+// Part of HPCToolkit (hpctoolkit.org)
+//
+// Information about sources of support for research and development of
+// HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
+// --------------------------------------------------------------------------
+//
+// Copyright ((c)) 2002-2018, Rice University
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// * Neither the name of Rice University (RICE) nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+// This software is provided by RICE and contributors "as is" and any
+// express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular
+// purpose are disclaimed. In no event shall RICE or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or
+// business interruption) however caused and on any theory of liability,
+// whether in contract, strict liability, or tort (including negligence
+// or otherwise) arising in any way out of the use of this software, even
+// if advised of the possibility of such damage.
+//
+// ******************************************************* EndRiceCopyright *
+
+
+//***************************************************************************
+//
+// File: Line.hpp
+//
+// Purpose:
+//   Interface for a Line Map Line 
+//
+//***************************************************************************
+
+#ifndef __Line_hpp__
+#define __Line_hpp__
+
+//******************************************************************************
+// system includes
+//******************************************************************************
+
+#include <inttypes.h>
+#include <linux/limits.h>
+#include <vector>
+
+
+
+
+//******************************************************************************
+// forward declarations
+//******************************************************************************
+
+class LineDecoderSettings;
+
+
+
+//******************************************************************************
+// type declarations
+//******************************************************************************
+
+class LineInfo {
+public:
+  uint64_t address;           // address of line
+  uint8_t op_index;           // index in VLIW inst; 0 if non VLIW
+  uint16_t file;              // file table entry
+  uint32_t line;              // source line number; 0 = unknown
+  uint32_t column;            // column within source line; 0 = unknown
+
+  bool is_stmt;               // recommended breakpoint location
+  bool basic_block;           // current inst is the beginning of a basic block
+  bool end_sequence;          // first byte after sequence of inst; resets row
+  bool prologue_end;          // point for an entry breakpoint of a function
+  bool epilogue_begin;        // point for an exit breakpoint of a function
+
+  uint32_t isa;               // encodes the applicable inst set arch
+  uint32_t discriminator;     // identifies the block of the current inst 
+
+public:
+
+  void setAddress(uint64_t a)         { address        = a;   op_index = 0;  };
+  void fixedAdvancePC(uint64_t o)     { address        += o;  op_index = 0;  };
+
+  void setFile(uint64_t f)            { file           = f;                  };
+
+  void setLine(uint64_t l)            { line           = l;                  };
+  void advanceLine(uint64_t o)        { line           += o;                 };
+
+  void setColumn(uint64_t c)          { column         = c;                  };
+  void negateStmt()                   { is_stmt        = !is_stmt;           };
+
+  void setIsa(uint32_t l)             { isa            = l;                  };
+
+  void basicBlock()                   { basic_block    = true;               };
+  void endSequence()                  { end_sequence   = true;               };
+  void endPrologue()                  { prologue_end   = true;               };
+  void beginEpilogue()                { epilogue_begin = true;               };
+  void setDiscrim(uint32_t d)         { discriminator  = d;                  };
+
+  void reset(const LineDecoderSettings &lds);
+  void resetFlagsAndDiscriminator();
+
+  void advancePC
+  (
+   uint64_t operation_advance,
+   const LineDecoderSettings &lds 
+  );
+
+  void applySpecialOp
+  (
+   uint64_t opcode_raw,
+   const LineDecoderSettings &lds
+  );
+};
+
+
+class FileSystemRepr;
+
+
+class FileSystem {
+public:
+
+  FileSystem();
+  ~FileSystem();
+
+  const char *getFileName(int i);
+  const char *getDirName(int i);
+
+
+public:
+   FileSystemRepr *repr;
+};
+
+
+class LineInfoHandler {
+public:
+  virtual void processMatrixRow
+  (
+    LineInfo *li, 
+    FileSystem *fs
+  ) { 
+  }; 
+};
+
+#endif
diff --git a/dyninst/cuda/LineInfoDecoder.hpp b/dyninst/cuda/LineInfoDecoder.hpp
new file mode 100644
index 0000000..6434b4d
--- /dev/null
+++ b/dyninst/cuda/LineInfoDecoder.hpp
@@ -0,0 +1,123 @@
+// * BeginRiceCopyright *****************************************************
+//
+// $HeadURL$
+// $Id$
+//
+// --------------------------------------------------------------------------
+// Part of HPCToolkit (hpctoolkit.org)
+//
+// Information about sources of support for research and development of
+// HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
+// --------------------------------------------------------------------------
+//
+// Copyright ((c)) 2002-2018, Rice University
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// * Neither the name of Rice University (RICE) nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+// This software is provided by RICE and contributors "as is" and any
+// express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular
+// purpose are disclaimed. In no event shall RICE or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or
+// business interruption) however caused and on any theory of liability,
+// whether in contract, strict liability, or tort (including negligence
+// or otherwise) arising in any way out of the use of this software, even
+// if advised of the possibility of such damage.
+//
+// ******************************************************* EndRiceCopyright *
+
+
+//***************************************************************************
+//
+// File: LineInfoDecoder.hpp
+//
+// Purpose:
+//   Interface for a LineInfoDecoder
+//
+//***************************************************************************
+
+#ifndef __LineInfoDecoder_hpp__
+#define __LineInfoDecoder_hpp__
+
+#include "Line.hpp"
+
+class LineInfoDecoder : public LineInfoHandler {
+public:
+  LineInfoDecoder() {
+    start_address = 0;
+    end_address = 0;
+    file_number = 0;
+    line = 0;
+    column = 0;
+    first = true;
+  };
+
+  void reportLine(FileSystem *fs) {
+    reportLine(start_address, end_address, file_number, line, column, fs); 
+  };
+
+
+  //****************************************************************************
+  // virtual functions
+  //****************************************************************************
+
+  virtual void reportLine
+  (
+    uint64_t start_address,
+    uint64_t end_address,
+    uint16_t file_number,
+    uint32_t line, 
+    uint32_t column,
+    FileSystem *fs
+  ) {
+  };
+
+
+  virtual void processMatrixRow
+  (
+    LineInfo *li,
+    FileSystem *fs
+  ) {
+    if (first) {
+      first = false;
+    } else {
+      end_address = li->address;
+      reportLine(fs);
+    }
+
+    // set state for next line
+    file_number = li->file;
+    line = li->line;
+    column = li->column;
+    start_address = li->address;
+  };
+
+
+private:
+  uint64_t start_address;     // start address of line
+  uint64_t end_address;       // end address of line
+  uint16_t file_number;       // file table entry
+  uint32_t line;              // source line number; 0 = unknown
+  uint32_t column;            // column within source line; 0 = unknown
+
+protected:
+  bool first;
+};
+
+#endif
diff --git a/dyninst/cuda/LineInfoDecoderDump.hpp b/dyninst/cuda/LineInfoDecoderDump.hpp
new file mode 100644
index 0000000..b1fb3a9
--- /dev/null
+++ b/dyninst/cuda/LineInfoDecoderDump.hpp
@@ -0,0 +1,69 @@
+#ifndef __LineInfoDecoderDump_hpp__
+#define __LineInfoDecoderDump_hpp__
+
+//******************************************************************************
+// local include files
+//******************************************************************************
+
+#include "LineInfoDecoder.hpp"
+
+#define DUMP_DECODED_LINE 1
+
+//******************************************************************************
+// macros
+//******************************************************************************
+
+#if DUMP_DECODED_LINE 
+#define DUMP_DECODED_OUTPUT(...) std::cout << __VA_ARGS__
+#else
+#define DUMP_DECODED_OUTPUT(...)
+#endif
+
+
+
+//******************************************************************************
+// type declarations
+//******************************************************************************
+
+class LineInfoDecoderDump : public LineInfoDecoder {
+public:
+
+  virtual void reportLine
+  (
+    uint64_t start_address,
+    uint64_t end_address,
+    const char *dir_name,
+    const char *file_name,
+    uint32_t line, 
+    uint32_t column 
+  ) {
+    DUMP_DECODED_OUTPUT(file_name << "\t\t" << line << "\t\t[");
+    dumpAddr(start_address);
+    DUMP_DECODED_OUTPUT(", ");
+    dumpAddr(end_address);
+    DUMP_DECODED_OUTPUT(")\n");
+  };
+
+
+  void dumpAddr(uint64_t addr) {
+    if (addr == 0) 
+      DUMP_DECODED_OUTPUT("0x"); // the line below drops the 0x for 0
+    DUMP_DECODED_OUTPUT(std::hex << (void *) addr << std::dec);
+  };
+
+  virtual void processMatrixRow
+  (
+    LineInfo *li,
+    FileSystem *fs
+  ) {
+    if (first) {
+      DUMP_DECODED_OUTPUT("CU: " << fs->getDirName(li->file) <<
+                          "/" << fs->getFileName(li->file) << "\n");
+      DUMP_DECODED_OUTPUT("File name                            Line number"
+                      "    Starting address    View\n");
+    }
+    LineInfoDecoder::processMatrixRow(li, fs);
+  };
+};
+
+#endif
diff --git a/dyninst/cuda/LineMapping.cpp b/dyninst/cuda/LineMapping.cpp
new file mode 100644
index 0000000..ef3d85a
--- /dev/null
+++ b/dyninst/cuda/LineMapping.cpp
@@ -0,0 +1,104 @@
+#include "LineMapping.hpp"
+
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <libdw.h>
+
+#include <LineInformation.h>
+#include <Function.h>
+#include <Module.h>
+
+#include "LineMapping.hpp"
+#include "ReadCubinLineMap.hpp"
+
+using namespace Dyninst;
+using namespace SymtabAPI;
+
+static std::vector<Line> line_mappings;
+
+void CudaLineInfoDecoder::reportLine(uint64_t start_address, uint64_t end_address,
+  uint16_t file_number, uint32_t line, uint32_t column, FileSystem *fs) {
+	Line line_mapping(start_address, end_address, file_number, line, column);
+	line_mappings.push_back(line_mapping);
+}
+
+
+bool LineMapping::read_lines(const std::string &relocated_cubin) {
+  int fd = open(relocated_cubin.c_str(), O_RDONLY);
+  if (fd == -1) {
+    return false;
+  }
+
+  struct stat sb;
+  fstat(fd, &sb);
+  void *p = (char *)mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0); 
+  if (p == MAP_FAILED) {
+    return false;
+  }
+
+  // create a memory-resident copy of the file that we can read and write 
+  char *memPtr = (char *)malloc(sb.st_size); 
+  if (memPtr == 0) {
+    return false;
+  }
+
+  memcpy(memPtr, p, sb.st_size); 
+
+  munmap(p, sb.st_size); 
+
+  Elf *elf = elf_memory(memPtr, sb.st_size);
+
+  Dwarf *dbg = dwarf_begin_elf(elf, DWARF_C_READ, 0);
+
+  if (dbg == NULL) {
+    readCubinLineMap(memPtr, elf, &this->_cuda_line_info_decoder);
+  }
+
+  elf_end(elf);
+  free(memPtr);
+  close(fd);
+  return dbg == NULL;
+}
+
+
+bool LineMapping::insert_lines(Symtab *symtab) {
+  for (auto &line : line_mappings) {
+    SymtabAPI::Function *sym_func = NULL;
+    symtab->getContainingFunction(line.start_address, sym_func);
+    Module * mod = NULL;
+    if (sym_func != NULL) {
+      mod = sym_func->getModule();
+    } else {
+      return false;
+    }
+
+    if (mod != NULL) {
+      LineInformation *line_information = mod->getLineInformation();
+      if (line_information == NULL) {
+        line_information = new LineInformation();
+        if (line_information->addLine(line.file, line.line, line.column,
+                                      line.start_address, line.end_address)) {
+          mod->setLineInfo(line_information);
+        } else {
+          return false;
+        }
+      } else {
+        if (!line_information->addLine(line.file, line.line, line.column,
+                                       line.start_address, line.end_address)) {
+          return false;
+        }
+      }
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
diff --git a/dyninst/cuda/LineMapping.hpp b/dyninst/cuda/LineMapping.hpp
new file mode 100644
index 0000000..d3cd611
--- /dev/null
+++ b/dyninst/cuda/LineMapping.hpp
@@ -0,0 +1,51 @@
+#ifndef _LINE_MAPPING_H_
+#define _LINE_MAPPING_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include <Symtab.h>
+#include <dyntypes.h>
+
+#include "LineInfoDecoder.hpp"
+
+struct Line {
+	uint64_t start_address;
+	uint64_t end_address;
+	uint16_t file;
+	uint32_t line;
+	uint32_t column;
+
+	Line(uint64_t start_address, uint64_t end_address, 
+		uint16_t file, uint32_t line, uint32_t column) : 
+		start_address(start_address), end_address(end_address),
+		file(file), line(line), column(column) {}
+};
+
+
+class CudaLineInfoDecoder : public LineInfoDecoder {
+	public:
+   CudaLineInfoDecoder() {}  
+
+	 virtual void reportLine(uint64_t start_address, uint64_t end_address,
+	 	 uint16_t line_number, uint32_t line, uint32_t column, FileSystem *fs);
+
+   virtual void processMatrixRow(LineInfo *li, FileSystem *fs) {
+     LineInfoDecoder::processMatrixRow(li, fs);
+   }
+};
+
+
+class LineMapping {
+ public:
+  LineMapping() {}
+
+  bool read_lines(const std::string &cubin_file);
+  bool insert_lines(Dyninst::SymtabAPI::Symtab *symtab);
+
+ private:
+  CudaLineInfoDecoder _cuda_line_info_decoder;
+};
+
+#endif
diff --git a/dyninst/cuda/Makefile b/dyninst/cuda/Makefile
index dab7c22..20899fa 100644
--- a/dyninst/cuda/Makefile
+++ b/dyninst/cuda/Makefile
@@ -24,7 +24,8 @@ DEFS = -DDYNINST_USE_CUDA
 INCL =  \
     -I$(DYNINST)/include  \
     -I$(BOOST)/include  \
-    -I$(ELFUTILS)/include
+    -I$(ELFUTILS)/include  \
+    -I$(ELFUTILS)/include/elfutils
 
 LIBS =  \
     -L$(DYNINST)/lib  \
@@ -48,7 +49,7 @@ PROG = cuda-parse
 
 OBJS = cuda-parse.o  ElfHelper.o  Fatbin.o  InputFile.o  RelocateCubin.o  \
 			 CudaCFGFactory.o  CudaCodeSource.o  CudaFunction.o  CudaBlock.o \
-			 GraphReader.o  CFGParser.o
+			 ReadCubinLineMap.o  LineMapping.o  GraphReader.o  CFGParser.o
 
 all: $(PROG)
 
diff --git a/dyninst/cuda/ReadCubinLineMap.cpp b/dyninst/cuda/ReadCubinLineMap.cpp
new file mode 100644
index 0000000..17bd58c
--- /dev/null
+++ b/dyninst/cuda/ReadCubinLineMap.cpp
@@ -0,0 +1,1121 @@
+// * BeginRiceCopyright *****************************************************
+//
+// $HeadURL$
+// $Id$
+//
+// --------------------------------------------------------------------------
+// Part of HPCToolkit (hpctoolkit.org)
+//
+// Information about sources of support for research and development of
+// HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
+// --------------------------------------------------------------------------
+//
+// Copyright ((c)) 2002-2018, Rice University
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// * Neither the name of Rice University (RICE) nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+// This software is provided by RICE and contributors "as is" and any
+// express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular
+// purpose are disclaimed. In no event shall RICE or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or
+// business interruption) however caused and on any theory of liability,
+// whether in contract, strict liability, or tort (including negligence
+// or otherwise) arising in any way out of the use of this software, even
+// if advised of the possibility of such damage.
+//
+// ******************************************************* EndRiceCopyright *
+
+
+//***************************************************************************
+//
+// File: ReadLineMap.cpp
+//
+// Purpose:
+//   Read line map of cubin. 
+//
+//***************************************************************************
+
+//******************************************************************************
+// system includes
+//******************************************************************************
+
+#include <assert.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+
+#include <dwarf.h>
+
+
+
+//******************************************************************************
+// local includes
+//******************************************************************************
+
+#include "ElfHelper.hpp"
+#include "Line.hpp"
+
+
+//******************************************************************************
+// macros
+//******************************************************************************
+
+#define DEBUG_LINE_SECTION_NAME ".debug_line"
+
+#define need(p,k,e) if (p + k > e) return false;
+
+#define V(x) ((void*) x)
+
+
+#if DUMP_RAW_LINE
+#define DUMP_RAW_OUTPUT(...) std::cout << __VA_ARGS__
+#else
+#define DUMP_RAW_OUTPUT(...)
+#endif
+
+
+#define DUMP_HEADER       1
+
+
+#if DUMP_HEADER 
+#define DUMP_HEADER_OUTPUT(...) std::cout << __VA_ARGS__
+#else
+#define DUMP_HEADER_OUTPUT(...)
+#endif
+
+
+
+//******************************************************************************
+// type definitions
+//******************************************************************************
+
+typedef std::vector<Elf_Scn *> Elf_SectionVector;
+typedef std::vector<Elf64_Addr> Elf_SymbolVector;
+
+class DirectoryTable : public std::vector<const char *> {
+public:
+  const void *offset;
+
+  DirectoryTable() { push_back(NULL); };
+
+  const char *
+  entry
+  (
+    int i
+  ) 
+  { 
+    const char **str = this->data();
+    const char *result = NULL;
+    if (i > 0 && i < size()) result = str[i];
+    return result;
+  }
+
+  void dump() {
+    const char **str = this->data();
+    for (int i = 1; i < size(); i++) {
+      DUMP_HEADER_OUTPUT("  " << i << "\t" << str[i] << "\n");
+    }
+  };
+};
+
+
+class FileTableEntry {
+public:
+  uint16_t dir_index;
+  uint64_t time;
+  uint64_t size;
+  const char *filename;
+
+  FileTableEntry
+  (
+   const unsigned char *&ptr
+  );
+  
+  const char *getFileName() { return filename; };
+
+  uint16_t getDirIndex() { return dir_index; };
+
+  void dump
+  (
+    int i
+  ) {
+    DUMP_HEADER_OUTPUT("  " << i << "\t" << dir_index << "\t" << time <<
+		 "\t" << size << "\t" << filename << "\n");
+  };
+};
+
+
+class FileTable : public std::vector<FileTableEntry *> {
+public:
+  FileTable() { push_back(NULL); };
+
+
+  const char *getFileName
+  (
+    int i
+  ) {
+    FileTableEntry **fte = this->data();
+    return fte[i]->getFileName(); 
+  }; 
+
+
+  uint16_t 
+  getDirIndex
+  (
+    int i
+  ) {
+    FileTableEntry **fte = this->data();
+    return fte[i]->getDirIndex(); 
+  };
+
+  void dump() {
+    FileTableEntry **fte = this->data();
+    for (int i = 1; i < size(); i++) {
+      fte[i]->dump(i);
+    }
+  };
+
+public:
+  const void *offset;
+};
+
+
+class FileSystemRepr {
+public:
+  DirectoryTable &getDirectoryTable() { return directories; }
+
+  FileTable &getFileTable() { return files; }
+
+public:
+  DirectoryTable directories;
+  FileTable files;
+};
+
+class LineDecoderSettings {
+public:
+  uint8_t min_inst_len;
+  uint8_t max_ops_per_inst;
+  uint8_t default_is_stmt;
+  int8_t  line_base;
+  uint8_t line_range;
+  uint8_t opcode_base;
+
+  void read
+  (
+   uint16_t dwarf_version,
+   const unsigned char *&ptr
+  );
+};
+
+
+class OpcodeLengthTable : public std::vector<uint16_t> {
+public:
+  OpcodeLengthTable() { push_back(0); };
+
+  void dump() {
+    uint16_t *len = this->data();
+    for (int i = 1; i < size(); i++) {
+      DUMP_HEADER_OUTPUT("  Opcode " << i << " has " << len[i]); 
+      if (len[i] == 1) DUMP_HEADER_OUTPUT(" arg\n"); 
+      else DUMP_HEADER_OUTPUT(" args\n"); 
+    }
+  };
+};
+
+
+class LineMapInfo {
+public:
+  uint64_t unit_length;
+  uint64_t hdr_length;
+  uint16_t dwarf_version;
+
+  LineDecoderSettings lds;
+  OpcodeLengthTable op_lengths; 
+  FileSystem fs;
+
+  bool parse
+  (
+   GElf_Ehdr *ehdr,
+   Elf_Scn *scn,
+   GElf_Shdr *shdr,
+   const unsigned char *start,
+   const unsigned char *end,
+   const unsigned char **sptr,
+   LineInfoHandler *lih
+  );
+
+  void dump();
+
+private:
+
+  bool parseHeader
+  (
+   GElf_Ehdr *ehdr,
+   Elf_Scn *scn,
+   GElf_Shdr *shdr,
+   const unsigned char *end,
+   const unsigned char **sptr
+  );
+
+  bool parseOpcodeLengthTable
+  (
+   GElf_Ehdr *ehdr,
+   Elf_Scn *scn,
+   GElf_Shdr *shdr,
+   const unsigned char *end,
+   const unsigned char **sptr
+  );
+
+  bool parseDirectoryTable
+  (
+   GElf_Ehdr *ehdr,
+   Elf_Scn *scn,
+   GElf_Shdr *shdr,
+   const unsigned char *start,
+   const unsigned char *end,
+   const unsigned char **sptr
+  );
+
+  bool parseFileTable
+  (
+   GElf_Ehdr *ehdr,
+   Elf_Scn *scn,
+   GElf_Shdr *shdr,
+   const unsigned char *start,
+   const unsigned char *end,
+   const unsigned char **sptr
+  );
+
+  bool parseLineMap
+  (
+   GElf_Ehdr *ehdr,
+   Elf_Scn *scn,
+   GElf_Shdr *shdr,
+   const unsigned char *start,
+   const unsigned char *end,
+   const unsigned char **sptr,
+   LineInfoHandler *lih
+  );
+
+  void dumpHeader();
+  void dumpOpcodeTable();
+  void dumpDirectoryTable();
+  void dumpFileTable();
+}; 
+
+
+
+//******************************************************************************
+// private functions
+//******************************************************************************
+
+// form an unsigned integer by reading 'len' bytes in little endian order
+static uint64_t
+uread(const unsigned char *&p, unsigned int len)
+{
+  uint64_t val = 0;
+  uint8_t o = 0;
+  switch (len) {
+  case 8: val |= ((uint64_t) *p++); o+=8;
+  case 7: val |= ((uint64_t) *p++) << o; o+=8;
+  case 6: val |= ((uint64_t) *p++) << o; o+=8;
+  case 5: val |= ((uint64_t) *p++) << o; o+=8;
+  case 4: val |= ((uint64_t) *p++) << o; o+=8;
+  case 3: val |= ((uint64_t) *p++) << o; o+=8;
+  case 2: val |= ((uint64_t) *p++) << o; o+=8;
+  case 1: val |= ((uint64_t) *p++) << o;
+    break;
+  }
+  return val;
+}
+
+
+// read an unsigned LEB128 encoded integer
+// return the result and advance ptr past the data
+static uint64_t
+uread_leb128(const unsigned char *&ptr)
+{
+  uint64_t result = 0;
+  uint8_t pos = 0; // bits from the first byte belong in the lowest result bits
+  unsigned char byte;
+  for (;;) {
+    byte = *ptr++;
+    result |= ((byte & 0x7f) << pos); // put bottom 7 bits of byte in place
+    if ((byte & 0x80) == 0) break; // no more bytes remain
+    pos += 7; // compute next insertion point
+  }
+  return result;
+}
+
+
+// read a signed LEB128 encoded integer
+// return the result and advance ptr past the data
+static int64_t
+sread_leb128(const unsigned char *&ptr)
+{
+  uint64_t result = 0;
+  uint8_t pos = 0; // bits from the first byte belong in the lowest result bits
+  uint8_t nbits = sizeof(result) << 3;
+  unsigned char byte;
+  for (;;) {
+    byte = *ptr++;
+    result |= ((byte & 0x7f) << pos); // put bottom 7 bits of byte in place
+    pos += 7; // compute next insertion point
+    if ((byte & 0x80) == 0) break; // no more bytes remain
+  }
+
+  if (byte & 0x40) { // check sign bit of last byte 
+    // result must be negative
+    if (pos < nbits) { // sign needs extension
+      result |= (~0 << pos); // extend sign bit
+    }
+  }
+  return (int64_t) result;
+}
+
+
+FileSystem::FileSystem()
+{
+  repr = new FileSystemRepr;
+}
+
+
+FileSystem::~FileSystem()
+{
+  delete repr;
+}
+
+const char *
+FileSystem::getFileName(int i)
+{
+  return repr->files.getFileName(i);
+}
+
+
+const char *
+FileSystem::getDirName(int i)
+{
+  uint16_t index = repr->files.getDirIndex(i);
+  return repr->directories.entry(index);
+}
+
+
+FileTableEntry::FileTableEntry
+(
+ const unsigned char *&ptr
+)
+{
+  // get file name
+  filename = (const char *) ptr;
+  while (*ptr++ != 0); // advance to end of current string
+  
+  // read directory index
+  dir_index = uread_leb128(ptr);
+  
+  // skip modification time
+  time = uread_leb128(ptr);
+  
+  // skip file length 
+  size = uread_leb128(ptr);
+}
+
+
+
+void
+LineDecoderSettings::read
+(
+ uint16_t dwarf_version,
+ const unsigned char *&ptr
+)
+{
+  // read minimum instruction length
+  min_inst_len = uread(ptr, 1);
+  
+  // read maximum operations per instruction if version 4+; else assume 1 
+  max_ops_per_inst = dwarf_version < 4 ? 1 : uread(ptr, 1);
+
+  // read default value of is_stmt
+  default_is_stmt = uread(ptr, 1);
+
+  // read line base 
+  line_base = *(const char *)ptr++; // signed read of one byte
+
+  // read line range
+  line_range = uread(ptr, 1);
+
+  // read opcode base
+  opcode_base = uread(ptr, 1);
+}
+    
+
+void
+LineInfo::reset
+(
+  const LineDecoderSettings &lds
+)
+{
+  address = 0;
+  op_index = 0;
+  file = 1;
+  line = 1;
+  column = 0;
+  is_stmt = lds.default_is_stmt;
+  isa = 0;
+
+  resetFlagsAndDiscriminator();
+}
+
+void
+LineInfo::resetFlagsAndDiscriminator
+(
+)
+{
+  basic_block = false;
+  end_sequence = false;
+  prologue_end = false;
+  epilogue_begin = false;
+  discriminator = 0;
+}
+      
+
+
+void 
+LineInfo::advancePC
+(
+ uint64_t operation_advance,
+ const LineDecoderSettings &lds
+)
+{									
+  int sum = op_index + operation_advance;
+  int div = sum / lds.max_ops_per_inst;
+  int rem = sum % lds.max_ops_per_inst;
+
+  address += lds.min_inst_len * div;
+  op_index = rem;
+}
+
+
+void 
+LineInfo::applySpecialOp
+(
+ uint64_t opcode,
+ const LineDecoderSettings &lds
+)
+{									
+  uint64_t special_opcode = opcode - lds.opcode_base;
+  line += lds.line_base + (special_opcode % lds.line_range);
+
+  uint64_t operation_advance = special_opcode / lds.line_range;
+  advancePC(operation_advance, lds);
+}
+
+
+bool
+LineMapInfo::parse
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ const unsigned char *start,
+ const unsigned char *end,
+ const unsigned char **sptr,
+ LineInfoHandler *lih
+)
+{
+  parseHeader(ehdr, scn, shdr, end, sptr);
+  parseOpcodeLengthTable(ehdr, scn, shdr, end, sptr);
+  parseDirectoryTable(ehdr, scn, shdr, start, end, sptr);
+  parseFileTable(ehdr, scn, shdr, start, end, sptr);
+  dump();
+  parseLineMap(ehdr, scn, shdr, start, end, sptr, lih);
+}
+
+
+bool
+LineMapInfo::parseHeader
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ const unsigned char *end,
+ const unsigned char **sptr
+)
+{
+  const unsigned char *ptr = *sptr;
+  int hdr_length_bytes = 4;
+
+  // if not 4 bytes available, return false
+  need(ptr, 4, end);
+
+  // read unit length 
+  {
+    unit_length = uread(ptr, 4);
+    
+    if (unit_length == 0xffffffff) {
+      // if not 8 bytes available, return false
+      need(ptr, 8, end);
+      
+      unit_length = uread(ptr, 8);
+      hdr_length_bytes = 8;
+    }
+  }
+
+  // read DWARF version
+  dwarf_version = uread(ptr, 2);
+
+  // read header length
+  hdr_length = uread(ptr, hdr_length_bytes);
+
+  lds.read(dwarf_version, ptr);
+
+  // return updated pointer into the section
+  *sptr = ptr;
+
+  return true;
+}
+
+
+bool
+LineMapInfo::parseOpcodeLengthTable
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ const unsigned char *end,
+ const unsigned char **sptr
+)
+{
+  const unsigned char *ptr = *sptr;
+
+  // read the table of opcode lengths
+  for(int i = 0; i < lds.opcode_base - 1; i++) {
+    op_lengths.push_back((uint16_t) uread(ptr, 1));
+  }
+
+  // return updated pointer into the section
+  *sptr = ptr;
+
+  return true;
+}
+
+
+bool
+LineMapInfo::parseDirectoryTable
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ const unsigned char *start,
+ const unsigned char *end,
+ const unsigned char **sptr
+)
+{
+  const char *ptr = (const char *) *sptr;
+
+  DirectoryTable &directories = fs.repr->getDirectoryTable();
+
+  // the directory table is a sequence of strings ending with a empty string
+
+  directories.offset = (void *) (*sptr - start);
+  
+  // while not an empty string
+  while (*ptr != 0) {
+    directories.push_back(ptr);
+    while (*ptr++ != 0); // advance to end of current string
+  }
+  
+  // advance past final empty string
+  ptr++;
+  
+  // return updated pointer into the section
+  *sptr = (const unsigned char *) ptr;
+
+  return true;
+}
+
+bool
+LineMapInfo::parseFileTable
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ const unsigned char *start,
+ const unsigned char *end,
+ const unsigned char **sptr
+)
+{
+  const unsigned char *ptr = (const unsigned char *) *sptr;
+
+  FileTable &files = fs.repr->getFileTable();
+
+  // the file table is a sequence of strings ending with a empty string
+
+  files.offset = (void *) (*sptr - start);
+
+  // while not an empty string
+  while (*ptr != 0) {
+    files.push_back(new FileTableEntry(ptr));
+  }
+  
+  // advance past final empty string
+  ptr++;
+  
+  // return updated pointer into the section
+  *sptr = ptr;
+
+  return true;
+}
+
+
+bool
+LineMapInfo::parseLineMap
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ const unsigned char *start,
+ const unsigned char *end,
+ const unsigned char **sptr,
+ LineInfoHandler *lih
+)
+{
+  FileTable &files = fs.repr->getFileTable();
+  DirectoryTable &directories = fs.repr->getDirectoryTable();
+
+  const unsigned char *ptr = *sptr;
+
+  LineInfo lineInfo;
+
+  lineInfo.reset(lds);
+
+  if (ptr == end) {
+    DUMP_RAW_OUTPUT(" No Line Number Statements.\n");
+  } else {
+    DUMP_RAW_OUTPUT(" Line Number Statements:\n");
+
+    while (ptr < end) {
+      size_t offset = ptr - start;
+
+      // read the opcode
+      unsigned int opcode = uread(ptr, 1);
+
+      DUMP_RAW_OUTPUT("  [" <<
+		   std::internal << std::hex << std::setw(10) <<
+		   std::setfill('0') << V(offset) << std::dec <<
+		   "]  "); 
+
+      if (opcode >= lds.opcode_base) {
+
+	// special opcode
+
+	uint64_t prev_address = lineInfo.address;
+	uint64_t prev_line = lineInfo.line;
+
+	lineInfo.applySpecialOp(opcode, lds);
+
+	unsigned int addr_offset = lineInfo.address - prev_address;
+	int line_offset = lineInfo.line - prev_line;
+
+	DUMP_RAW_OUTPUT(
+	  "Special opcode "  << opcode - lds.opcode_base <<
+	  ": advance Address by " << addr_offset << " to " <<
+	  std::hex << (void *) lineInfo.address << std::dec);
+
+	if (lineInfo.op_index)
+	  DUMP_RAW_OUTPUT(", op_index = " << (uint16_t) lineInfo.op_index);
+
+	DUMP_RAW_OUTPUT(" and Line by " << line_offset << " to " <<
+		     lineInfo.line << "\n");
+
+	// append row to matrix
+        lih->processMatrixRow(&lineInfo, &fs);
+
+        lineInfo.resetFlagsAndDiscriminator();
+
+      } else if (opcode == 0) {
+
+	// read length
+	unsigned int len = uread_leb128(ptr);
+
+	const unsigned char *inst_begin = ptr;
+
+	// read sub-opcode
+	opcode = uread(ptr, 1);
+
+	DUMP_RAW_OUTPUT("Extended opcode "  << opcode << ": ");
+      
+	switch (opcode) {
+
+	case DW_LNE_end_sequence:
+          lineInfo.endSequence();
+
+	  DUMP_RAW_OUTPUT("End of Sequence\n\n");
+
+	  // append row to matrix 
+          lih->processMatrixRow(&lineInfo, &fs);
+
+	  lineInfo.reset(lds); // reset line to defaults
+
+	  break;
+
+	case DW_LNE_set_address: {
+	  uint64_t addr = uread(ptr, len - (ptr - inst_begin));
+	  lineInfo.setAddress(addr);
+
+	  DUMP_RAW_OUTPUT("set Address to "); 
+
+	  if (addr == 0) 
+	    DUMP_RAW_OUTPUT("0x0\n"); // the line below drops the 0x for 0 
+	  else
+	    DUMP_RAW_OUTPUT(std::hex << (void *) addr << std::dec << "\n");
+
+	  break;
+	}
+	
+	case DW_LNE_define_file: {
+	  FileTableEntry *fte = new FileTableEntry(ptr);
+	  files[fte->dir_index] = fte;
+
+	  DUMP_RAW_OUTPUT("define new file: dir=" << fte->dir_index <<
+		       ", name=" << fte->filename << "\n");
+
+	  break;
+	}
+	
+	case DW_LNE_set_discriminator: {
+	  uint64_t dis = uread_leb128(ptr);
+
+	  lineInfo.setDiscrim(dis);
+
+	  DUMP_RAW_OUTPUT("set discriminator to " << dis << "\n");
+
+	  break;
+	}
+	
+	default:
+	  ptr += len - 1;
+
+	  DUMP_RAW_OUTPUT("unknown opcode\n");
+
+	  break;
+	}
+
+      } else if (opcode <= DW_LNS_set_isa) {
+
+	// standard opcode
+
+	switch (opcode) {
+
+	case DW_LNS_advance_line: {
+	  int64_t offset = sread_leb128(ptr);
+	  lineInfo.advanceLine(offset);
+
+	  DUMP_RAW_OUTPUT("Advance Line by " <<
+		       offset << " to " << lineInfo.line << "\n");
+
+	  break;
+	}
+
+	case DW_LNS_advance_pc: {
+	  uint64_t offset = uread_leb128(ptr);
+	  lineInfo.advancePC(offset, lds);
+
+	  DUMP_RAW_OUTPUT("Advance PC by " <<
+		       offset << " to " << V(lineInfo.address) << "\n");
+
+	  break;
+	}
+
+	case DW_LNS_const_add_pc: {
+	  int offset = 255 - lds.opcode_base;
+	  int div = offset / lds.line_range;
+	  lineInfo.advancePC(div, lds);
+
+	  DUMP_RAW_OUTPUT("Advance address by constant " <<
+		       offset << " to " << V(lineInfo.address));
+
+	  if (lineInfo.op_index)
+	    DUMP_RAW_OUTPUT(", op_index to " << lineInfo.op_index);
+
+	  DUMP_RAW_OUTPUT("\n");
+
+	  break;
+	}
+	
+	case DW_LNS_copy:
+
+	  DUMP_RAW_OUTPUT("Copy\n");
+
+	  // append row to matrix
+          lih->processMatrixRow(&lineInfo, &fs);
+
+          lineInfo.resetFlagsAndDiscriminator();
+
+	  break;
+
+	case DW_LNS_fixed_advance_pc: {
+	  unsigned int offset = uread(ptr, 2);
+	  lineInfo.fixedAdvancePC(offset);
+
+	  DUMP_RAW_OUTPUT("advance address by fixed value " << offset <<
+		       " to " << V(lineInfo.address));
+
+	  break;
+	}
+
+	case DW_LNS_negate_stmt:
+	  lineInfo.negateStmt();
+
+	  DUMP_RAW_OUTPUT("set 'is_stmt' to " << lineInfo.is_stmt << "\n");
+
+	  break;
+	
+	case DW_LNS_set_basic_block:
+	  lineInfo.basicBlock();
+
+	  DUMP_RAW_OUTPUT("set basic block flag\n");
+
+	  break;
+	
+	case DW_LNS_set_column: {
+	  unsigned int col = uread_leb128(ptr);
+	  lineInfo.setColumn(col);
+
+	  DUMP_RAW_OUTPUT("Set Column to " << col << "\n");
+
+	  break;
+	}
+	
+	case DW_LNS_set_epilogue_begin:
+	  lineInfo.beginEpilogue();
+
+	  DUMP_RAW_OUTPUT("Set epilogue begin flag\n");
+
+	  break;
+	
+	case DW_LNS_set_file: {
+	  uint64_t file = uread_leb128(ptr);
+	  lineInfo.setFile(file);
+
+	  DUMP_RAW_OUTPUT("Set File Name to entry " << file <<
+		       " in the File Name Table\n");
+
+	  break;
+	}
+	case DW_LNS_set_isa: {
+	  unsigned int isa = uread_leb128(ptr);
+	  lineInfo.setIsa(isa);
+
+	  DUMP_RAW_OUTPUT("set isa to " << isa << "\n");
+	  break;
+	}
+
+	case DW_LNS_set_prologue_end:
+	  lineInfo.endPrologue();
+	  
+	  DUMP_RAW_OUTPUT("Set prologue end flag\n");
+	  break;
+	}
+      }
+    }
+
+    // return updated pointer into the section
+    *sptr = (const unsigned char *) ptr;
+
+    DUMP_RAW_OUTPUT("\n");
+  }
+
+  return true;
+}
+
+
+static void
+parseLineSection
+(
+ GElf_Ehdr *ehdr,
+ Elf_Scn *scn,
+ GElf_Shdr *shdr,
+ LineInfoHandler *lih
+)
+{
+  if (shdr->sh_size == 0) return;
+
+  Elf_Data *data = elf_getdata(scn, NULL);
+
+  if (data == NULL) return;
+  
+  const unsigned char *start = (const unsigned char *) data->d_buf;
+  const unsigned char *ptr = start;
+
+  if (start == NULL) return;
+
+  const unsigned char *end = start + data->d_size;
+
+  LineMapInfo lmh;
+  lmh.parse(ehdr, scn, shdr, start, end, &ptr, lih);
+}
+
+
+void
+LineMapInfo::dumpHeader
+(
+)
+{
+  DUMP_HEADER_OUTPUT
+    ("Raw dump of debug contents of section .debug_line:\n\n" <<
+     "  Offset:                      0x0\n" <<
+     "  Length:                      " <<          unit_length      << "\n" <<
+     "  DWARF Version:               " <<          dwarf_version    << "\n" <<
+     "  Prologue Length:             " <<          hdr_length       << "\n" <<
+     "  Minimum Instruction Length:  " <<(uint16_t)lds.min_inst_len << "\n" <<
+     "  Initial value of 'is_stmt':  " <<(uint16_t)lds.default_is_stmt<<"\n"<<
+     "  Line Base:                   " <<(int16_t) lds.line_base    << "\n" <<
+     "  Line Range:                  " <<(uint16_t)lds.line_range   << "\n" <<
+     "  Opcode Base:                 " <<(uint16_t)lds.opcode_base  << "\n\n"
+    );
+}
+
+
+void
+LineMapInfo::dumpOpcodeTable
+(
+)
+{
+  DUMP_HEADER_OUTPUT(" Opcodes:\n");
+  op_lengths.dump();
+  DUMP_HEADER_OUTPUT("\n");
+}
+
+
+void
+LineMapInfo::dumpDirectoryTable
+(
+)
+{
+  DirectoryTable &directories = fs.repr->getDirectoryTable();
+
+  DUMP_HEADER_OUTPUT(" The Directory Table (offset " <<
+	       std::hex << directories.offset << std::dec << "):\n");
+  directories.dump();
+  DUMP_HEADER_OUTPUT("\n");
+}
+
+
+void
+LineMapInfo::dumpFileTable
+(
+)
+{
+  FileTable &files = fs.repr->getFileTable();
+
+  DUMP_HEADER_OUTPUT(" The File Name Table (offset " <<
+	       std::hex << files.offset << std::dec << "):\n"
+	       "  Entry\tDir\tTime\tSize\tName\n");
+  files.dump();
+  DUMP_HEADER_OUTPUT("\n");
+}
+
+
+void
+LineMapInfo::dump
+(
+)
+{
+  dumpHeader();
+  dumpOpcodeTable();
+  dumpDirectoryTable();
+  dumpFileTable();
+}
+
+
+// if the cubin contains a line map section and a matching line map relocations
+// section, apply the relocations to the line map
+static void
+readLineMap
+(
+ char *cubin_ptr,
+ Elf *elf,
+ Elf_SectionVector *sections,
+ LineInfoHandler *lih
+)
+{
+  GElf_Ehdr ehdr_v;
+  GElf_Ehdr *ehdr = gelf_getehdr(elf, &ehdr_v);
+  if (ehdr) {
+    unsigned line_map_scn_index;
+    char *line_map = NULL;
+
+    //-------------------------------------------------------------
+    // scan through the sections to locate a line map, if any
+    //-------------------------------------------------------------
+    int index = 0;
+    for (auto si = sections->begin(); si != sections->end(); si++, index++) {
+      Elf_Scn *scn = *si;
+      GElf_Shdr shdr;
+      if (!gelf_getshdr(scn, &shdr)) continue;
+      if (shdr.sh_type == SHT_PROGBITS) {
+	const char *section_name =
+	  elf_strptr(elf, ehdr->e_shstrndx, shdr.sh_name);
+	if (strcmp(section_name, DEBUG_LINE_SECTION_NAME) == 0) {
+	  // remember the index of line map section. we need this index to find
+	  // the corresponding relocation section.
+	  line_map_scn_index = index;
+
+	  // compute line map position from start of cubin and the offset
+	  // of the line map section in the cubin
+	  line_map = cubin_ptr + shdr.sh_offset;
+
+	  // found the line map, so we are done with the linear scan of sections
+
+	  parseLineSection(ehdr, scn, &shdr, lih);
+	  break;
+	}
+      }
+    }
+  }
+}
+
+
+
+//******************************************************************************
+// interface functions
+//******************************************************************************
+
+bool
+readCubinLineMap
+(
+ char *cubin_ptr,
+ Elf *cubin_elf,
+ LineInfoHandler *lih
+)
+{
+  bool success = false;
+
+  Elf_SectionVector *sections = elfGetSectionVector(cubin_elf);
+  if (sections) {
+    readLineMap(cubin_ptr, cubin_elf, sections, lih);
+    delete sections;
+  }
+
+  return success;
+}
diff --git a/dyninst/cuda/ReadCubinLineMap.hpp b/dyninst/cuda/ReadCubinLineMap.hpp
new file mode 100644
index 0000000..97826a0
--- /dev/null
+++ b/dyninst/cuda/ReadCubinLineMap.hpp
@@ -0,0 +1,76 @@
+// * BeginRiceCopyright *****************************************************
+//
+// $HeadURL$
+// $Id$
+//
+// --------------------------------------------------------------------------
+// Part of HPCToolkit (hpctoolkit.org)
+//
+// Information about sources of support for research and development of
+// HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
+// --------------------------------------------------------------------------
+//
+// Copyright ((c)) 2002-2018, Rice University
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// * Neither the name of Rice University (RICE) nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+// This software is provided by RICE and contributors "as is" and any
+// express or implied warranties, including, but not limited to, the
+// implied warranties of merchantability and fitness for a particular
+// purpose are disclaimed. In no event shall RICE or contributors be
+// liable for any direct, indirect, incidental, special, exemplary, or
+// consequential damages (including, but not limited to, procurement of
+// substitute goods or services; loss of use, data, or profits; or
+// business interruption) however caused and on any theory of liability,
+// whether in contract, strict liability, or tort (including negligence
+// or otherwise) arising in any way out of the use of this software, even
+// if advised of the possibility of such damage.
+//
+// ******************************************************* EndRiceCopyright *
+
+
+//***************************************************************************
+//
+// File: ReadCubinLineMap.cpp
+//
+// Purpose:
+//   Interface definition for reading a line map from a cubin. 
+//
+//***************************************************************************
+
+#ifndef __ReadCubinLineMap_hpp__
+#define __ReadCubinLineMap_hpp__
+
+//******************************************************************************
+// local includes
+//******************************************************************************
+
+#include "Line.hpp"
+
+//******************************************************************************
+// interface functions
+//******************************************************************************
+
+bool
+readCubinLineMap
+(
+ char *cubin_ptr,
+ Elf *cubin_elf,
+ LineInfoHandler *lih
+);
+
+#endif
diff --git a/dyninst/cuda/cuda-parse.cpp b/dyninst/cuda/cuda-parse.cpp
index 95e6e86..9a5cf9a 100644
--- a/dyninst/cuda/cuda-parse.cpp
+++ b/dyninst/cuda/cuda-parse.cpp
@@ -87,6 +87,8 @@
 #include <vector>
 #include <mutex>
 
+#include <libdw.h>
+
 #include <CFG.h>
 #include <CodeObject.h>
 #include <CodeSource.h>
@@ -103,8 +105,11 @@
 #include "CudaFunction.hpp"
 #include "CudaBlock.hpp"
 #include "CudaCodeSource.hpp"
-#include "GraphReader.hpp"
 #include "CFGParser.hpp"
+#include "GraphReader.hpp"
+#include "Line.hpp"
+#include "ReadCubinLineMap.hpp"
+#include "LineMapping.hpp"
 
 #define MAX_VMA  0xfffffffffffffff0
 
@@ -501,6 +506,60 @@ printTime(const char *label, struct timeval *tv_prev, struct timeval *tv_now,
 
 //----------------------------------------------------------------------
 
+bool
+dumpRelocatedCubin(const std::string &relocated_cubin, char *elf_addr, size_t elf_len) {
+    int fd = open(relocated_cubin.c_str(), O_CREAT|O_WRONLY|O_TRUNC, S_IRWXU);
+    if (write(fd, elf_addr, elf_len) != elf_len) {
+        return false;
+    }
+    close(fd);
+    return true;
+}
+
+//----------------------------------------------------------------------
+
+bool
+dumpRelocatedDot(const std::string &filename, const std::string &relocated_dot) {
+    std::string cmd = "nvdisasm -cfg -poff " + filename + " > " + relocated_dot;
+    FILE *output = popen(cmd.c_str(), "r");
+    if (!output) {
+      cout << "Dump " << relocated_dot << " to disk failed" << endl; 
+      return false;
+    }
+    pclose(output);
+    return true;
+}
+
+
+void
+parseDotCFG(const std::string &filename, std::vector<CudaParse::Function *> &functions) {
+    std::string relocated_dot = filename + ".dot";
+    CudaParse::GraphReader graph_reader(relocated_dot);
+    CudaParse::Graph graph;
+    graph_reader.read(graph);
+    CudaParse::CFGParser cfg_parser;
+    cfg_parser.parse(graph, functions);
+}
+
+
+void
+relocateInstructions(std::vector<CudaParse::Function *> &functions) {
+  std::vector<Symbol *> symbols;
+  the_symtab->getAllSymbols(symbols);
+  for (auto *symbol : symbols) {
+    for (auto *function : functions) {
+      if (function->name == symbol->getMangledName()) {
+        for (auto *block : function->blocks) {
+          for (auto *inst : block->insts) {
+            inst->offset += symbol->getOffset();
+          }
+        }
+      }
+    }
+  }
+}
+
+
 int
 main(int argc, char **argv)
 {
@@ -554,15 +613,8 @@ main(int argc, char **argv)
 	}
 	bool cuda_file = (the_symtab->getArchitecture() == Dyninst::Arch_cuda);
 
-	the_symtab->parseTypesNow();
-	the_symtab->parseFunctionRanges();
-
-	vector <Module *> modVec;
-	the_symtab->getAllModules(modVec);
-
-	for (auto mit = modVec.begin(); mit != modVec.end(); ++mit) {
-	    (*mit)->parseLineInformation();
-	}
+  the_symtab->parseTypesNow();
+  the_symtab->parseFunctionRanges();
 
 	gettimeofday(&tv_symtab, NULL);
 	getrusage(RUSAGE_SELF, &ru_symtab);
@@ -572,45 +624,37 @@ main(int argc, char **argv)
   CFGFactory * cfg_fact = NULL;
 
 	if (cuda_file) {
+      std::string relocated_dot = filename + ".dot";
       std::string relocated_cubin = filename + ".relocated";
-      int fd = open(relocated_cubin.c_str(), O_CREAT|O_WRONLY|O_TRUNC, S_IRWXU);
-      if (write(fd, elf_addr, elf_len) != elf_len) {
-          cout << "Write " << relocated_cubin << " to disk failed" << endl; 
+
+      if (!dumpRelocatedCubin(relocated_cubin, elf_addr, elf_len)) {
+          cout << "Write " + relocated_cubin + " to disk failed" << endl; 
           continue;
       }
-      close(fd);
-      std::string relocated_dot = filename + ".dot";
-      std::string cmd = "nvdisasm -cfg -poff " + filename + " > " + relocated_dot;
-      FILE *output = popen(cmd.c_str(), "r");
-      if (!output) {
-          cout << "Dump " << relocated_dot << " to disk failed" << endl; 
+
+      if (!dumpRelocatedDot(filename, relocated_dot)) {
+          cout << "Write " + relocated_dot + " to disk failed" << endl; 
           continue;
       }
-      pclose(output);
 
-      // parse dot cfg
-      CudaParse::GraphReader graph_reader(relocated_dot);
-      CudaParse::Graph graph;
-      graph_reader.read(graph);
-      CudaParse::CFGParser cfg_parser;
+      // Parse dot cfg
       std::vector<CudaParse::Function *> functions;
-      cfg_parser.parse(graph, functions);
+      parseDotCFG(filename, functions);
 
       // relocate instructions
-      std::vector<Symbol *> symbols;
-      the_symtab->getAllSymbols(symbols);
-      for (auto *symbol : symbols) {
-        for (auto *function : functions) {
-          if (function->name == symbol->getMangledName()) {
-            for (auto *block : function->blocks) {
-              for (auto *inst : block->insts) {
-                inst->offset += symbol->getOffset();
-              }
-            }
-          }
+      relocateInstructions(functions);
+
+      LineMapping line_mapping;
+
+      if (line_mapping.read_lines(relocated_cubin)) {
+        if (!line_mapping.insert_lines(the_symtab)) {
+          cout << "Insert " + relocated_cubin + " line_mapping failed" << endl; 
+          continue;
         }
       }
 
+      // Record line-mapping
+
       cfg_fact = new CudaCFGFactory(functions);
       code_src = new CudaCodeSource(functions); 
       code_obj = new CodeObject(code_src, cfg_fact);
@@ -620,6 +664,11 @@ main(int argc, char **argv)
       }
 	}
 	else {
+      vector <Module *> modVec;
+      the_symtab->getAllModules(modVec);
+      for (auto mit = modVec.begin(); mit != modVec.end(); ++mit) {
+        (*mit)->parseLineInformation();
+      }
 	    code_src = new SymtabCodeSource(the_symtab);
 	    code_obj = new CodeObject(code_src);
 	    code_obj->parse();