diff --git a/OverlapHelement.C b/OverlapHelement.C index 18246dc..8362c37 100644 --- a/OverlapHelement.C +++ b/OverlapHelement.C @@ -39,7 +39,7 @@ int main(int argc, char* argv []) { //readMPSFromDiskAndInitializeStaticVariables(); //initializeGlobalMPS(mpsstate); - if (rank =0) + if (rank == 0) printf("Reading file %s\n", argv[2]); test(argv[2]); //exit(0); diff --git a/StackOperators.C b/StackOperators.C index 0344332..8dc5056 100644 --- a/StackOperators.C +++ b/StackOperators.C @@ -493,14 +493,14 @@ void SpinAdapted::StackDesCre::build(StackMatrix& m, int row, int col, const Sta { const boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(i), i); boost::shared_ptr op2 = rightBlock->get_op_rep(CRE, getSpinQuantum(j), j); - SpinAdapted::operatorfunctions::TensorProductElement(leftBlock, *op1, *op2, &b, &(b.get_stateInfo()), *this, m, row, col, 1.0); + double parity = getCommuteParity(op1->get_deltaQuantum()[0], op2->get_deltaQuantum()[0], get_deltaQuantum()[0]); + SpinAdapted::operatorfunctions::TensorProductElement(rightBlock, *op2, *op1, &b, &(b.get_stateInfo()), *this, m, row, col, parity); } else if (rightBlock->get_op_array(DES).has(i)) { const boost::shared_ptr op1 = rightBlock->get_op_rep(DES, -getSpinQuantum(i), i); boost::shared_ptr op2 = leftBlock->get_op_rep(CRE, getSpinQuantum(j), j); - double parity = getCommuteParity(-op1->get_deltaQuantum()[0], op2->get_deltaQuantum()[0], get_deltaQuantum()[0]); - SpinAdapted::operatorfunctions::TensorProductElement(rightBlock, *op1, *op2, &b, &(b.get_stateInfo()), *this, m, row, col, 1.0*parity); + SpinAdapted::operatorfunctions::TensorProductElement(leftBlock, *op2, *op1, &b, &(b.get_stateInfo()), *this, m, row, col, 1.0); } else abort(); @@ -549,19 +549,19 @@ void SpinAdapted::StackDesCre::build(const StackSpinBlock& b) SpinAdapted::operatorfunctions::TensorProduct(rightBlock, *op, *Overlap, &b, &(b.get_stateInfo()), *this, 1.0); dmrginp.makeopsT -> stop(); return; - } + } if (leftBlock->get_op_array(DES).has(i)) { const boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(i), i); const boost::shared_ptr op2 = rightBlock->get_op_rep(CRE, getSpinQuantum(j), j); - SpinAdapted::operatorfunctions::TensorProduct(rightBlock, *op2, *op1, &b, &(b.get_stateInfo()), *this, 1.0); + double parity = getCommuteParity(op1->get_deltaQuantum()[0], op2->get_deltaQuantum()[0], get_deltaQuantum()[0]); + SpinAdapted::operatorfunctions::TensorProduct(rightBlock, *op2, *op1, &b, &(b.get_stateInfo()), *this, parity); } else if (rightBlock->get_op_array(DES).has(i)) { const boost::shared_ptr op1 = rightBlock->get_op_rep(DES, -getSpinQuantum(i), i); const boost::shared_ptr op2 = leftBlock->get_op_rep(CRE, getSpinQuantum(j), j); - double parity = getCommuteParity(op1->get_deltaQuantum()[0], op2->get_deltaQuantum()[0], get_deltaQuantum()[0]); - SpinAdapted::operatorfunctions::TensorProduct(rightBlock, *op1, *op2, &b, &(b.get_stateInfo()), *this, 1.0*parity); + SpinAdapted::operatorfunctions::TensorProduct(leftBlock, *op2, *op1, &b, &(b.get_stateInfo()), *this, 1.0); } else abort(); @@ -934,9 +934,9 @@ void SpinAdapted::StackCreDesComp::buildfromCreDes(StackSpinBlock& b) TensorOp CD1(i, j, 1, -1, (-deltaQuantum[0].get_s()).getirrep(), (-sym).getirrep()); std::vector > allops1; - int numCD = 0, numDC = 0; + int numCD = 0, numDC = 0, numCC = 0; std::vector scaleCD, scaleCC; - if (dmrginp.hamiltonian() == BCS) scaleCC.resize(b.get_op_array(CRE_CRE).get_size()*2, 0.0); + if (dmrginp.hamiltonian() == BCS) scaleCC.resize(b.get_op_array(CRE_CRE).get_size()*4, 0.0); //for each CD there is spin0, spin 1 and normal and transpose if (dmrginp.spinAdapted()) scaleCD.resize(b.get_op_array(CRE_DES).get_size()*8, 0.0); else scaleCD.resize(b.get_op_array(CRE_DES).get_size()*4, 0.0); @@ -944,51 +944,52 @@ void SpinAdapted::StackCreDesComp::buildfromCreDes(StackSpinBlock& b) for (int ji=0; jiget_deltaQuantum(0) == deltaQuantum[0] || b.get_op_array(CRE_DES).get_local_element(ii)[ji]->get_deltaQuantum(0) == -deltaQuantum[0]) { - allops1.push_back(b.get_op_array(CRE_DES).get_local_element(ii)[ji]); - const int k = allops1.back()->get_orbs()[0]; - const int l = allops1.back()->get_orbs()[1]; - - //TensorOp CK(k,1), DL(l,-1); - //TensorOp CD2 = CK.product(DL, spin, sym.getirrep()); - TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); - if (!CD2.empty) - scaleCD[2*(allops1.size()-1)] = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); - - //CK=TensorOp(l,1); DL=TensorOp(k,-1); - //CD2 = CK.product(DL, spin, sym.getirrep()); - CD2 = TensorOp(l, k, 1, -1, spin, sym.getirrep()); - if (!b.has(DES) && l!=k && !CD2.empty) { - double scaleV = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); - if (dmrginp.spinAdapted()) { - scaleV *= getCommuteParity(getSpinQuantum(l), getSpinQuantum(k), get_deltaQuantum()[0]); - } - scaleCD[2*(allops1.size()-1) + 1] = scaleV; - } + allops1.push_back(b.get_op_array(CRE_DES).get_local_element(ii)[ji]); + const int k = allops1.back()->get_orbs()[0]; + const int l = allops1.back()->get_orbs()[1]; + + //TensorOp CK(k,1), DL(l,-1); + //TensorOp CD2 = CK.product(DL, spin, sym.getirrep()); + TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); + if (!CD2.empty) + scaleCD[2*(allops1.size()-1)] = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); + + //CK=TensorOp(l,1); DL=TensorOp(k,-1); + //CD2 = CK.product(DL, spin, sym.getirrep()); + CD2 = TensorOp(l, k, 1, -1, spin, sym.getirrep()); + if (!b.has(DES) && l!=k && !CD2.empty) { + double scaleV = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); + if (dmrginp.spinAdapted()) { + scaleV *= getCommuteParity(getSpinQuantum(l), getSpinQuantum(k), get_deltaQuantum()[0]); + } + scaleCD[2*(allops1.size()-1) + 1] = scaleV; + } } numCD = allops1.size(); if(b.has(DES)) { for (int ii=0; iiget_deltaQuantum(0) == -deltaQuantum[0]) { - allops1.push_back(b.get_op_array(DES_CRE).get_local_element(ii)[ji]); - - const int l = allops1.back()->get_orbs()[0]; - const int k = allops1.back()->get_orbs()[1]; - - if (k!=l) { - //TensorOp CK(k,1), DL(l,-1); - //TensorOp CD2 = CK.product(DL, spin, sym.getirrep()); - TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); - if (!CD2.empty) { - double scaleV = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); + if (b.get_op_array(DES_CRE).get_local_element(ii)[ji]->get_deltaQuantum(0) == deltaQuantum[0]) { + allops1.push_back(b.get_op_array(DES_CRE).get_local_element(ii)[ji]); + + const int l = allops1.back()->get_orbs()[0]; + const int k = allops1.back()->get_orbs()[1]; + + if (k!=l) { + //TensorOp CK(k,1), DL(l,-1); + //TensorOp CD2 = CK.product(DL, spin, sym.getirrep()); + TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); + if (!CD2.empty) { + double scaleV = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); + // FIXME does this need to be used for nonspinadapted as well? if (dmrginp.spinAdapted()) { - scaleV *= getCommuteParity(getSpinQuantum(l), getSpinQuantum(k), get_deltaQuantum()[0]); + scaleV *= getCommuteParity(getSpinQuantum(l), getSpinQuantum(k), get_deltaQuantum()[0]); } - scaleCD[2*(allops1.size()-1)] = scaleV; - } - } - } + scaleCD[2*(allops1.size()-1)] = scaleV; + } + } + } } @@ -1013,7 +1014,7 @@ void SpinAdapted::StackCreDesComp::buildfromCreDes(StackSpinBlock& b) //CK = TensorOp(k,1); //TensorOp CC2_commute = CL.product(CK, spin, sym.getirrep(), k==l); TensorOp CC2_commute(l, k, 1, 1, spin, sym.getirrep(), k == l); - int parity = getCommuteParity(getSpinQuantum(l),getSpinQuantum(k),get_deltaQuantum()[0]); + int parity = getCommuteParity(getSpinQuantum(l),getSpinQuantum(k),get_deltaQuantum()[1]); scaleCC[2*(allops1.size()-numDC-1)] = calcCompfactor(CD1, CC2, CD, v_cccd[b.get_integralIndex()]); if (k != l) scaleCC[2*(allops1.size()-numDC-1)] += parity * calcCompfactor(CD1, CC2_commute, CD, v_cccd[b.get_integralIndex()]); @@ -1029,16 +1030,32 @@ void SpinAdapted::StackCreDesComp::buildfromCreDes(StackSpinBlock& b) //DL = TensorOp(l,-1); //TensorOp DD2_commute = DK.product(DL, spin, sym.getirrep(), k == l); TensorOp DD2_commute(k, l, -1, -1, spin, sym.getirrep(), k == l); - int parity = getCommuteParity(getSpinQuantum(l),getSpinQuantum(k),get_deltaQuantum()[0]); - scaleCC[2*(allops1.size()-numCD-1)+1] = calcCompfactor(CD1, DD2, CD, v_cccd[b.get_integralIndex()]); + int parity = getCommuteParity(-getSpinQuantum(l),-getSpinQuantum(k),get_deltaQuantum()[2]); + scaleCC[2*(allops1.size()-numDC-1)+1] = calcCompfactor(CD1, DD2, CD, v_cccd[b.get_integralIndex()]); if (k != l) - scaleCC[2*(allops1.size()-numCD-1)+1] += parity * calcCompfactor(CD1, DD2_commute, CD, v_cccd[b.get_integralIndex()]); + scaleCC[2*(allops1.size()-numDC-1)+1] += parity * calcCompfactor(CD1, DD2_commute, CD, v_cccd[b.get_integralIndex()]); } } - + numCC = allops1.size(); if (b.has(DES)) { - pout << "buildfromCreDes with DES in BCS not implemented" << endl; - abort(); + for (int ii = 0; ii < b.get_op_array(DES_DES).get_size(); ++ii) + for (int ji = 0; ji < b.get_op_array(DES_DES).get_local_element(ii).size(); ++ji) + if (b.get_op_array(DES_DES).get_local_element(ii)[ji]->get_deltaQuantum(0).get_s() == deltaQuantum[0].get_s() || + b.get_op_array(DES_DES).get_local_element(ii)[ji]->get_deltaQuantum(0).get_s() == -deltaQuantum[0].get_s()) { + allops1.push_back(b.get_op_array(DES_DES).get_local_element(ii)[ji]); + + const int k = allops1.back()->get_orbs()[0]; + const int l = allops1.back()->get_orbs()[1]; + TensorOp DD2(k, l, -1, -1, spin, sym.getirrep(), k == l); + + if (!DD2.empty) { + TensorOp DD2_commute(l, k, -1, -1, spin, sym.getirrep(), k == l); + int parity = getCommuteParity(-getSpinQuantum(l),-getSpinQuantum(k),get_deltaQuantum()[2]); + scaleCC[2*(allops1.size()-numDC-1)] = calcCompfactor(CD1, DD2, CD, v_cccd[b.get_integralIndex()]); + if (k != l) + scaleCC[2*(allops1.size()-numDC-1)] += parity * calcCompfactor(CD1, DD2_commute, CD, v_cccd[b.get_integralIndex()]); + } + } } } @@ -1065,33 +1082,37 @@ void SpinAdapted::StackCreDesComp::buildfromCreDes(StackSpinBlock& b) //this is a cd operator if (opindex allowed(lQ, rQ) && allowed(lQ, rQ) && fabs(scaleCD[2*opindex]) >TINY) - MatrixScaleAdd(scaleCD[2*opindex], allops1[opindex]->operator_element(lQ, rQ), this->operator_element(lQ, rQ)); + MatrixScaleAdd(scaleCD[2*opindex], allops1[opindex]->operator_element(lQ, rQ), this->operator_element(lQ, rQ)); if (!b.has(DES) && l!=k && fabs(scaleCD[2*opindex+1]) > TINY) { - if (allowed(lQ, rQ) && allops1[opindex]->allowed(rQ, lQ)) { - double scaling = getStandAlonescaling(-(allops1[opindex]->get_deltaQuantum(0)), b.get_braStateInfo().quanta[lQ], b.get_ketStateInfo().quanta[rQ]); - int nrows = operator_element(lQ, rQ).Nrows(); - int ncols = operator_element(lQ, rQ).Ncols(); - for (int row=0; rowoperator_element(rQ, lQ)(1, row+1)), nrows, &(this->operator_element(lQ, rQ)(row+1, 1)), 1); - } + if (allowed(lQ, rQ) && allops1[opindex]->allowed(rQ, lQ)) { + double scaling = getStandAlonescaling(-(allops1[opindex]->get_deltaQuantum(0)), b.get_braStateInfo().quanta[lQ], b.get_ketStateInfo().quanta[rQ]); + int nrows = operator_element(lQ, rQ).Nrows(); + int ncols = operator_element(lQ, rQ).Ncols(); + for (int row=0; rowoperator_element(rQ, lQ)(1, row+1)), nrows, &(this->operator_element(lQ, rQ)(row+1, 1)), 1); + } } } else if (opindex < numDC) { if (k!=l && allops1[opindex]->allowed(lQ, rQ) && allowed(lQ, rQ) && fabs(scaleCD[2*opindex]) > TINY) MatrixScaleAdd(scaleCD[2*opindex], allops1[opindex]->operator_element(lQ, rQ), this->operator_element(lQ, rQ)); - } else { // CC + } else if (opindex < numCC) { // CC if (allops1[opindex]->allowed(lQ, rQ) && allowed(lQ, rQ) && fabs(scaleCC[2*(opindex-numDC)]) > TINY) MatrixScaleAdd(scaleCC[2*(opindex-numDC)], allops1[opindex]->operator_element(lQ, rQ), this->operator_element(lQ, rQ)); - if (!b.has(DES) && fabs(scaleCC[2*(opindex-numDC)+1]) > TINY) { - if (allowed(lQ, rQ) && allops1[opindex]->allowed(rQ, lQ)) { - double scaling = getStandAlonescaling(-(allops1[opindex]->get_deltaQuantum(0)), b.get_braStateInfo().quanta[lQ], b.get_ketStateInfo().quanta[rQ]); + if (!b.has(DES)) { + if (fabs(scaleCC[2*(opindex-numDC)+1]) > TINY && allowed(lQ, rQ) && allops1[opindex]->allowed(rQ, lQ)) { + double scaling = getStandAlonescaling(-(allops1[opindex]->get_deltaQuantum(0)), b.get_braStateInfo().quanta[lQ], b.get_ketStateInfo().quanta[rQ]); int nrows = operator_element(lQ, rQ).Nrows(); int ncols = operator_element(lQ, rQ).Ncols(); for (int row=0; rowoperator_element(rQ, lQ)(1, row+1)), nrows, &(this->operator_element(lQ, rQ)(row+1, 1)), 1); } } + } else { + if (fabs(scaleCC[2*(opindex-numDC)]) > TINY && allowed(lQ, rQ) && allops1[opindex]->allowed(lQ, rQ)) { + MatrixScaleAdd(scaleCC[2*(opindex-numDC)], allops1[opindex]->operator_element(lQ, rQ), this->operator_element(lQ, rQ)); + } } } @@ -1238,7 +1259,7 @@ void SpinAdapted::StackCreDesComp::build(StackMatrix& m, int row, int col, const TensorOp DD2_commute(k, l, -1, -1, spin, sym.getirrep(), k==l); double scaleV2 = calcCompfactor(CD1, DD2_commute, CD, v_cccd[b.get_integralIndex()]); if (fabs(scaleV2)+fabs(scaleV) > dmrginp.twoindex_screen_tol()) { - if (leftBlock->has(DES)) { + if (leftBlock->has(DES) && rightBlock->has(DES)) { boost::shared_ptr op1 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); boost::shared_ptr op2 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); double parity = getCommuteParity(op1->get_deltaQuantum()[0], op2->get_deltaQuantum()[0], get_deltaQuantum()[2]); @@ -1523,7 +1544,7 @@ void SpinAdapted::StackCreDesComp::build(const StackSpinBlock& b) TensorOp DD2_commute(k, l, -1, -1, spin, sym.getirrep(), k==l); double scaleV2 = calcCompfactor(CD1, DD2_commute, CD, v_cccd[b.get_integralIndex()]); if (fabs(scaleV2)+fabs(scaleV) > dmrginp.twoindex_screen_tol()) { - if (leftBlock->has(DES)) { + if (leftBlock->has(DES) && rightBlock->has(DES)) { boost::shared_ptr op1 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); boost::shared_ptr op2 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); double parity = getCommuteParity(op1->get_deltaQuantum()[0], op2->get_deltaQuantum()[0], get_deltaQuantum()[2]); @@ -1861,7 +1882,6 @@ void SpinAdapted::StackDesCreComp::buildfromDesCre(StackSpinBlock& b) const int j = get_orbs()[1]; memset(data, 0, totalMemory * sizeof(double)); - //StackDesCreComp* op_array; //initiateMultiThread(this, op_array, numthrds); @@ -1870,10 +1890,10 @@ void SpinAdapted::StackDesCreComp::buildfromDesCre(StackSpinBlock& b) //TensorOp CD1 = C.product(D, (-deltaQuantum[0].get_s()).getirrep(), (-sym).getirrep()); // the operator to be complimentaried TensorOp CD1(j, i, 1, -1, (-deltaQuantum[0].get_s()).getirrep(), (-sym).getirrep()); - std::vector > allops1, allops2; + std::vector > allops1, allops2, allops3, allops4; for (int ii=0; iiget_deltaQuantum(0) == -deltaQuantum[0]) + if (b.get_op_array(CRE_DES).get_local_element(ii)[ji]->get_deltaQuantum(0) == deltaQuantum[0]) allops1.push_back(b.get_op_array(CRE_DES).get_local_element(ii)[ji]); for (int ii=0; iiget_deltaQuantum(0) == deltaQuantum[0]) allops2.push_back(b.get_op_array(DES_CRE).get_local_element(ii)[ji]); + if (dmrginp.hamiltonian() == BCS) { + for (int ii = 0; ii < b.get_op_array(CRE_CRE).get_size(); ++ii) + for (int ji = 0; ji < b.get_op_array(CRE_CRE).get_local_element(ii).size(); ++ji) + if (b.get_op_array(CRE_CRE).get_local_element(ii)[ji]->get_deltaQuantum(0) == deltaQuantum[1]) + allops3.push_back(b.get_op_array(CRE_CRE).get_local_element(ii)[ji]); + + for (int ii = 0; ii < b.get_op_array(DES_DES).get_size(); ++ii) + for (int ji = 0; ji < b.get_op_array(DES_DES).get_local_element(ii).size(); ++ji) + if (b.get_op_array(DES_DES).get_local_element(ii)[ji]->get_deltaQuantum(0) == deltaQuantum[2]) + allops4.push_back(b.get_op_array(DES_DES).get_local_element(ii)[ji]); + } + //#pragma omp parallel for schedule(dynamic) for (int ii = 0; iiget_orbs()[0]; @@ -1895,7 +1927,7 @@ void SpinAdapted::StackDesCreComp::buildfromDesCre(StackSpinBlock& b) } } - + //#pragma omp parallel for schedule(dynamic) for (int ii = 0; iiget_orbs()[0]; @@ -1907,12 +1939,50 @@ void SpinAdapted::StackDesCreComp::buildfromDesCre(StackSpinBlock& b) TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); if (!CD2.empty) { double scaleV = calcCompfactor(CD1, CD2, CD,*(b.get_twoInt()), b.get_integralIndex()); - //scaleV *= getCommuteParity(getSpinQuantum(i), getSpinQuantum(j), get_deltaQuantum()[0]); - scaleV *= getCommuteParity(getSpinQuantum(l), -getSpinQuantum(k), get_deltaQuantum()[0]); + if (dmrginp.spinAdapted()) { + scaleV *= getCommuteParity(getSpinQuantum(l), -getSpinQuantum(k), get_deltaQuantum()[0]); + } ScaleAdd(scaleV, *allops2[ii], *this); } } + if (dmrginp.hamiltonian() == BCS) { + for (int ii = 0; ii < allops3.size(); ++ii) { + const int k = allops3[ii]->get_orbs()[0]; + const int l = allops3[ii]->get_orbs()[1]; + + TensorOp CC2(k, l, 1, 1, spin, sym.getirrep(), k == l); + if (!CC2.empty) { + double scaleV = calcCompfactor(CD1, CC2, CD, v_cccd[b.get_integralIndex()]); + if (k != l) { + TensorOp CC2_commute(l, k, 1, 1, spin, sym.getirrep(), k==l); + double scaleV2 = calcCompfactor(CD1, CC2_commute, CD, v_cccd[b.get_integralIndex()]); + double parity = getCommuteParity(getSpinQuantum(l), getSpinQuantum(k), get_deltaQuantum()[1]); + scaleV += parity * scaleV2; + } + + ScaleAdd(scaleV, *allops3[ii], *this); + } + } + + for (int ii = 0; ii < allops4.size(); ++ii) { + const int k = allops4[ii]->get_orbs()[0]; + const int l = allops4[ii]->get_orbs()[1]; + + TensorOp DD2(k, l, -1, -1, spin, sym.getirrep(), k == l); + if (!DD2.empty) { + double scaleV = calcCompfactor(CD1, DD2, CD, v_cccd[b.get_integralIndex()]); + if (k != l) { + TensorOp DD2_commute(l, k, -1, -1, spin, sym.getirrep(), k == l); + double scaleV2 = calcCompfactor(CD1, DD2_commute, CD, v_cccd[b.get_integralIndex()]); + double parity = getCommuteParity(-getSpinQuantum(l),-getSpinQuantum(k),get_deltaQuantum()[2]); + scaleV += parity*scaleV2; + } + ScaleAdd(scaleV, *allops4[ii], *this); + } + } + } + //accumulateMultiThread(this, op_array, numthrds); } @@ -1965,7 +2035,7 @@ SpinAdapted::operatorfunctions::TensorProduct(leftBlock, *op, *Overlap, &b, &(b. SpinQuantum hq(0, SpinSpace(0), IrrepSpace(0)); const boost::shared_ptr Overlap = leftBlock->get_op_rep(OVERLAP, hq); SpinAdapted::operatorfunctions::TensorProduct(leftBlock, *Overlap, *op, &b, &(b.get_stateInfo()), *this, 1.0); - } + } // build DCcomp explicitely for (int kx = 0; kx < leftBlock->get_sites().size(); ++kx) for (int lx = 0; lx < rightBlock->get_sites().size(); ++lx) { @@ -2095,7 +2165,7 @@ double SpinAdapted::StackDesCreComp::redMatrixElement(Csf c1, vector& ladde } else { //TensorOp CK(k, 1), DL(l, -1); //TensorOp CD2 = CK.product(DL, spin, sym.getirrep()); - TensorOp CD2(k, l, 1, -1, spin, sym.getirrep(), k==l); + TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); if (!CD2.empty) { double MatElements = calcMatrixElements(c1, CD2, ladder[i], backupSlater1, backupSlater2, index) ; double factor = calcCompfactor(CD1, CD2, CD, *(b->get_twoInt()), b->get_integralIndex()); @@ -2133,16 +2203,15 @@ void SpinAdapted::StackDesDesComp::buildfromDesDes(StackSpinBlock& b) TensorOp CC1(i, j, 1, 1, (-deltaQuantum[0].get_s()).getirrep(), (-sym).getirrep(), i==j); std::vector > allops; - int numCC = 0; std::vector scaleDD, scaleCD, scaleCC; // TODO resize scaleCC and scaleCD if (dmrginp.hamiltonian() == BCS) { - scaleCC.resize(b.get_op_array(CRE_CRE).get_size(), 0.); - scaleCD.resize(b.get_op_array(CRE_DES).get_size()*2, 0.); + scaleCC.resize(b.get_op_array(CRE_CRE).get_size()*2, 0.); + scaleCD.resize(b.get_op_array(CRE_DES).get_size()*4, 0.); } if (dmrginp.spinAdapted()) scaleDD.resize(b.get_op_array(CRE_CRE).get_size()*2, 0.); else scaleDD.resize(b.get_op_array(CRE_CRE).get_size(), 0.); - + int numDD = 0, numCC = 0; if(!b.has(DES)) { for (int ii=0; iiget_deltaQuantum(0) == -deltaQuantum[0]) { const int k = b.get_op_array(CRE_CRE).get_local_element(ii)[ji]->get_orbs()[0]; - const int l = b.get_op_array(CRE_CRE).get_local_element(ii)[ji]->get_orbs()[1]; - + const int l = b.get_op_array(CRE_CRE).get_local_element(ii)[ji]->get_orbs()[1]; + //TensorOp DK(k,-1), DL(l,-1); //TensorOp DD2 = DK.product(DL, spin, sym.getirrep(), k==l); TensorOp DD2(k, l, -1, -1, spin, sym.getirrep(), k==l); @@ -2180,7 +2249,7 @@ void SpinAdapted::StackDesDesComp::buildfromDesDes(StackSpinBlock& b) //CK = TensorOp(k,1); CL = TensorOp(l,1); //CC2 = CL.product(CK, spin, sym.getirrep(), k == l); CC2 = TensorOp(l, k, 1, 1, spin, sym.getirrep(), k==l); - double parity = getCommuteParity(getSpinQuantum(k), getSpinQuantum(l), get_deltaQuantum()[0]); + double parity = getCommuteParity(getSpinQuantum(k), getSpinQuantum(l), get_deltaQuantum()[2]); if (k != l) scaleCC[allops.size()] += parity * calcCompfactor(CC1, CC2, DD, v_cccc[b.get_integralIndex()]); } @@ -2189,10 +2258,6 @@ void SpinAdapted::StackDesDesComp::buildfromDesDes(StackSpinBlock& b) if (nonzero) allops.push_back(b.get_op_array(CRE_CRE).get_local_element(ii)[ji]); } } else { - if (dmrginp.hamiltonian() == BCS) { - pout << "buildfromDesDes with DES in BCS not implemented" << endl; - abort(); - } for (int ii=0; iiget_deltaQuantum(0) == deltaQuantum[0]) { @@ -2212,6 +2277,25 @@ void SpinAdapted::StackDesDesComp::buildfromDesDes(StackSpinBlock& b) scaleDD[allops.size()-1] += parity * calcCompfactor(CC1, DD2, DD, *(b.get_twoInt()), b.get_integralIndex()); } } + numDD = allops.size(); + + if (dmrginp.hamiltonian() == BCS) + for (int ii=0; iiget_deltaQuantum(0).get_s() == deltaQuantum[0].get_s()) { // then we need CC + allops.push_back(b.get_op_array(CRE_CRE).get_local_element(ii)[ji]); + const int k = b.get_op_array(CRE_CRE).get_local_element(ii)[ji]->get_orbs()[0]; + const int l = b.get_op_array(CRE_CRE).get_local_element(ii)[ji]->get_orbs()[1]; + TensorOp CC2(k, l, 1, 1, spin, sym.getirrep(), k==l); + if (!CC2.empty) { + scaleCC[allops.size()-1] = calcCompfactor(CC1, CC2, DD, v_cccc[b.get_integralIndex()]); + CC2 = TensorOp(l, k, 1, 1, spin, sym.getirrep(), k==l); + double parity = getCommuteParity(getSpinQuantum(k), getSpinQuantum(l), get_deltaQuantum()[1]); + if (k != l) + scaleCC[allops.size()-1] += parity * calcCompfactor(CC1, CC2, DD, v_cccc[b.get_integralIndex()]); + } + } + } } numCC = allops.size(); @@ -2224,18 +2308,31 @@ void SpinAdapted::StackDesDesComp::buildfromDesDes(StackSpinBlock& b) const int k = allops.back()->get_orbs()[0]; const int l = allops.back()->get_orbs()[1]; - //TensorOp CK(k,1), DL(l,-1); - //TensorOp CD2 = CK.product(DL, spin, sym.getirrep()); TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); if (!CD2.empty) - scaleCD[2*(allops.size()-1)] = calcCompfactor(CC1, CD2, DD, v_cccd[b.get_integralIndex()]); + scaleCD[2*(allops.size()-numCC-1)] = calcCompfactor(CC1, CD2, DD, v_cccd[b.get_integralIndex()]); + + if (!b.has(DES)) { + CD2 = TensorOp(l, k, 1, -1, spin, sym.getirrep()); + if (k!=l && !CD2.empty) + scaleCD[2*(allops.size()-numCC-1)+1] = calcCompfactor(CC1, CD2, DD, v_cccd[b.get_integralIndex()]); + } - //TensorOp CL(l,1), DK(k,-1); - //CD2 = CL.product(DK, spin, sym.getirrep()); - CD2 = TensorOp(l, k, 1, -1, spin, sym.getirrep()); - if (k!=l && !CD2.empty) - scaleCD[2*(allops.size()-1)+1] = calcCompfactor(CC1, CD2, DD, v_cccd[b.get_integralIndex()]); } + if (b.has(DES)) { + for (int ii = 0; ii < b.get_op_array(DES_CRE).get_size(); ++ii) + for (int ji = 0; ji < b.get_op_array(DES_CRE).get_local_element(ii).size(); ++ji) + if (b.get_op_array(DES_CRE).get_local_element(ii)[ji]->get_deltaQuantum(0).get_s() == deltaQuantum[0].get_s() || + b.get_op_array(CRE_DES).get_local_element(ii)[ji]->get_deltaQuantum(0).get_s() == -deltaQuantum[0].get_s()) { + allops.push_back(b.get_op_array(DES_CRE).get_local_element(ii)[ji]); + const int l = allops.back()->get_orbs()[0]; + const int k = allops.back()->get_orbs()[1]; + + TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); + if (k!=l && !CD2.empty) // can't be spin adapted + scaleCD[2*(allops.size()-numCC-1)] = calcCompfactor(CC1, CD2, DD, v_cccd[b.get_integralIndex()]); + } + } } const int quantaSz = b.get_braStateInfo().quanta.size () * b.get_ketStateInfo().quanta.size(); @@ -2265,28 +2362,34 @@ void SpinAdapted::StackDesDesComp::buildfromDesDes(StackSpinBlock& b) for (int row=0; row < nrows; ++row) DAXPY(ncols, scaling*scaleDD[opindex], &(allops[opindex]->operator_element(rQ,lQ)(1, row+1)), nrows, &(this->operator_element(lQ, rQ)(row+1, 1)), 1); } - } else { // operator DD - if (allowed(lQ, rQ) && allops[opindex]->allowed(lQ,rQ) && fabs(scaleDD[opindex])> TINY) { - MatrixScaleAdd(scaleDD[opindex], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); + if (dmrginp.hamiltonian() == BCS && allowed(lQ, rQ) + && allops[opindex]->allowed(lQ,rQ) && fabs(scaleCC[opindex]) > TINY) { + MatrixScaleAdd(scaleCC[opindex], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); + } + } else { + // operator DD + if (opindex < numDD) { + if (allowed(lQ, rQ) && allops[opindex]->allowed(lQ,rQ) && fabs(scaleDD[opindex])> TINY) { + MatrixScaleAdd(scaleDD[opindex], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); + } + } else { + if (allowed(lQ, rQ) && allops[opindex]->allowed(lQ,rQ) && fabs(scaleCC[opindex]) > TINY) { + MatrixScaleAdd(scaleCC[opindex], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); + } } - } - - if (dmrginp.hamiltonian() == BCS && allowed(lQ, rQ) - && allops[opindex]->allowed(lQ,rQ) && fabs(scaleCC[opindex]) > TINY) { - MatrixScaleAdd(scaleCC[opindex], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); } } else { // CkDl and ClDk const int k = allops[opindex]->get_orbs()[0]; const int l = allops[opindex]->get_orbs()[1]; - if (allowed(lQ,rQ) && allops[opindex]->allowed(lQ,rQ) && fabs(scaleCD[2*opindex]) > TINY) - MatrixScaleAdd(scaleCD[2*opindex], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); + if (allowed(lQ,rQ) && allops[opindex]->allowed(lQ,rQ) && fabs(scaleCD[2*(opindex-numCC)]) > TINY) + MatrixScaleAdd(scaleCD[2*(opindex-numCC)], allops[opindex]->operator_element(lQ,rQ), this->operator_element(lQ, rQ)); - if (!b.has(DES) && k != l && allowed(lQ,rQ) && allops[opindex]->allowed(rQ,lQ) && fabs(scaleCD[2*opindex+1]) > TINY) { + if (!b.has(DES) && k != l && allowed(lQ,rQ) && allops[opindex]->allowed(rQ,lQ) && fabs(scaleCD[2*(opindex-numCC)+1]) > TINY) { double scaling = getStandAlonescaling(-(allops[opindex]->get_deltaQuantum(0)), b.get_braStateInfo().quanta[lQ], b.get_ketStateInfo().quanta[rQ]); int nrows = operator_element(lQ, rQ).Nrows(); int ncols = operator_element(lQ, rQ).Ncols(); for (int row=0; row < nrows; ++row) - DAXPY(ncols, scaling*scaleCD[2*opindex+1], &(allops[opindex]->operator_element(rQ,lQ)(1, row+1)), nrows, &(this->operator_element(lQ, rQ)(row+1, 1)), 1); + DAXPY(ncols, scaling*scaleCD[2*(opindex-numCC)+1], &(allops[opindex]->operator_element(rQ,lQ)(1, row+1)), nrows, &(this->operator_element(lQ, rQ)(row+1, 1)), 1); } } } @@ -2397,7 +2500,7 @@ void SpinAdapted::StackDesDesComp::build(StackMatrix& m, int row, int col, const double scaleV2 = calcCompfactor(CC1, DD2, DD, *(b.get_twoInt()), b.get_integralIndex()); if ((fabs(scaleV2)+fabs(scaleV)) > dmrginp.twoindex_screen_tol()) { - if (leftBlock->has(DES)) { + if (leftBlock->has(DES) && rightBlock->has(DES)) { boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); @@ -2535,7 +2638,7 @@ void SpinAdapted::StackDesDesComp::build(StackMatrix& m, int row, int col, const double scaleV2 = calcCompfactor(CC1, DD2, DD, *(b.get_twoInt()), b.get_integralIndex()); if ((fabs(scaleV2)+fabs(scaleV)) > dmrginp.twoindex_screen_tol()) { - if (leftBlock->has(DES)) { + if (leftBlock->has(DES) && rightBlock->has(DES)) { boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); @@ -2609,7 +2712,7 @@ void SpinAdapted::StackDesDesComp::build(StackMatrix& m, int row, int col, const ccomp -> deallocate(); } - if (leftBlock->has(DES)) { + if (rightBlock->has(DES)) { boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); SpinAdapted::operatorfunctions::TensorProductElement(leftBlock, *dcomp, *op2, &b, &(b.get_stateInfo()), *this, m, row, col, 1.); } else { @@ -2818,7 +2921,7 @@ void SpinAdapted::StackDesDesComp::build(const StackSpinBlock& b) double scaleV2 = calcCompfactor(CC1, DD2, DD, *(b.get_twoInt()), b.get_integralIndex()); if ((fabs(scaleV2)+fabs(scaleV)) > dmrginp.twoindex_screen_tol()) { - if (leftBlock->has(DES)) { + if (leftBlock->has(DES) && rightBlock->has(DES)) { boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); @@ -2938,7 +3041,7 @@ void SpinAdapted::StackDesDesComp::build(const StackSpinBlock& b) double scaleV2 = calcCompfactor(CC1, DD2, DD, *(b.get_twoInt()), b.get_integralIndex()); if ((fabs(scaleV2)+fabs(scaleV)) > dmrginp.twoindex_screen_tol()) { - if (leftBlock->has(DES)) { + if (leftBlock->has(DES) && rightBlock->has(DES)) { boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); @@ -3012,7 +3115,7 @@ void SpinAdapted::StackDesDesComp::build(const StackSpinBlock& b) ccomp -> deallocate(); } - if (leftBlock->has(DES)) { + if (rightBlock->has(DES)) { boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); SpinAdapted::operatorfunctions::TensorProduct(leftBlock, *dcomp, *op2, &b, &(b.get_stateInfo()), *this, 1.); } else { @@ -3212,6 +3315,7 @@ double SpinAdapted::StackDesDesComp::redMatrixElement(Csf c1, vector& ladde void SpinAdapted::StackCreCreComp::buildfromCreCre(StackSpinBlock& b) { + int spin = deltaQuantum[0].get_s().getirrep(); IrrepSpace sym = deltaQuantum[0].get_symm(); @@ -3226,19 +3330,36 @@ void SpinAdapted::StackCreCreComp::buildfromCreCre(StackSpinBlock& b) //TensorOp DD1 = D.product(D2, (-deltaQuantum[0].get_s()).getirrep(), (-sym).getirrep(), i==j); TensorOp DD1(i, j, -1, -1, (-deltaQuantum[0].get_s()).getirrep(), (-sym).getirrep(), i==j); - std::vector > allops; + std::vector > allops1, allops2, allops3, allops4; for (int ii=0; iiget_deltaQuantum(0) == deltaQuantum[0]) - allops.push_back(b.get_op_array(CRE_CRE).get_local_element(ii)[ji]); + allops1.push_back(b.get_op_array(CRE_CRE).get_local_element(ii)[ji]); + + if (dmrginp.hamiltonian() == BCS) { + for (int ii=0; iiget_deltaQuantum(0) == deltaQuantum[2]) + allops2.push_back(b.get_op_array(DES_DES).get_local_element(ii)[ji]); + + for (int ii=0; iiget_deltaQuantum(0) == deltaQuantum[1]) + allops3.push_back(b.get_op_array(CRE_DES).get_local_element(ii)[ji]); + + for (int ii=0; iiget_deltaQuantum(0) == deltaQuantum[1]) + allops4.push_back(b.get_op_array(DES_CRE).get_local_element(ii)[ji]); + } //#pragma omp parallel for schedule(dynamic) - for (int ii = 0; iiget_orbs()[0]; - const int l = allops[ii]->get_orbs()[1]; - - //TensorOp CK(k, 1), CL(l, 1); + for (int ii = 0; iiget_orbs()[0]; + const int l = allops1[ii]->get_orbs()[1]; + + //TensorOp CK(k, 1), CL(l, 1); //TensorOp CC2 = CK.product(CL, spin, sym.getirrep(), k==l); TensorOp CC2(k, l, 1, 1, spin, sym.getirrep(), k==l); if (!CC2.empty) { @@ -3253,11 +3374,55 @@ void SpinAdapted::StackCreCreComp::buildfromCreCre(StackSpinBlock& b) if (k != l) scaleV += parity*scaleV2; - ScaleAdd(scaleV, *allops[ii], *this); - + ScaleAdd(scaleV, *allops1[ii], *this); + } } + if (dmrginp.hamiltonian() == BCS) { + for (int ii = 0; ii < allops2.size(); ++ii) { + const int k = allops2[ii]->get_orbs()[0]; + const int l = allops2[ii]->get_orbs()[1]; + + TensorOp DD2(k, l, -1, -1, spin, sym.getirrep(), k == l); + if (!DD2.empty) { + double scaleV = calcCompfactor(DD2, DD1, DD, v_cccc[b.get_integralIndex()]); + TensorOp DD2_commute(l, k, -1, -1, spin, sym.getirrep(), k == l); + double scaleV2 = calcCompfactor(DD2_commute, DD1, DD, v_cccc[b.get_integralIndex()]); + double parity = getCommuteParity(-getSpinQuantum(k), -getSpinQuantum(l), get_deltaQuantum()[2]); + + ScaleAdd(scaleV+parity*scaleV2, *allops2[ii], *this); + } + } + + for (int ii = 0; ii < allops3.size(); ++ii) { + const int k = allops3[ii]->get_orbs()[0]; + const int l = allops3[ii]->get_orbs()[1]; + + TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); + if (!CD2.empty) { + double scaleV = calcCompfactor(DD1, CD2, DD, v_cccd[b.get_integralIndex()]); + ScaleAdd(scaleV, *allops3[ii], *this); + } + } + + for (int ii = 0; ii < allops4.size(); ++ii) { + const int l = allops4[ii]->get_orbs()[0]; + const int k = allops4[ii]->get_orbs()[1]; + + if (k == l) continue; + + TensorOp CD2(k, l, 1, -1, spin, sym.getirrep()); + if (!CD2.empty) { + double scaleV = calcCompfactor(DD1, CD2, DD, v_cccd[b.get_integralIndex()]); + // FIXME does this need to be used for nonspinadapted as well? + if (dmrginp.spinAdapted()) { + scaleV *= getCommuteParity(getSpinQuantum(l), getSpinQuantum(k), get_deltaQuantum()[1]); + } + ScaleAdd(scaleV, *allops4[ii], *this); + } + } + } //accumulateMultiThread(this, op_array, numthrds); } @@ -3346,12 +3511,12 @@ void SpinAdapted::StackCreCreComp::build(const StackSpinBlock& b) //TensorOp DD2 = DK.product(DL, spin, sym.getirrep(), k==l); TensorOp DD2(k, l, -1, -1, spin, sym.getirrep(), k==l); if (!DD2.empty) { - double scaleV = calcCompfactor(DD1, DD2, DD, v_cccc[b.get_integralIndex()]); + double scaleV = calcCompfactor(DD2, DD1, DD, v_cccc[b.get_integralIndex()]); //DK = TensorOp(k,-1); //DL = TensorOp(l,-1); //TensorOp DD2_commute = DL.product(DK, spin, sym.getirrep(), k==l); TensorOp DD2_commute(l, k, -1, -1, spin, sym.getirrep(), k==l); - double scaleV2 = calcCompfactor(DD1, DD2_commute, DD, v_cccc[b.get_integralIndex()]); + double scaleV2 = calcCompfactor(DD2_commute, DD1, DD, v_cccc[b.get_integralIndex()]); if (leftBlock->get_op_array(CRE).has(k) && rightBlock->get_op_array(CRE).has(l) && (fabs(scaleV2)+fabs(scaleV)) > dmrginp.twoindex_screen_tol()) { boost::shared_ptr op1 = leftBlock->get_op_rep(DES, -getSpinQuantum(k), k); boost::shared_ptr op2 = rightBlock->get_op_rep(DES, -getSpinQuantum(l), l); @@ -3521,7 +3686,7 @@ void SpinAdapted::StackCreDesDesComp::build(const StackSpinBlock& b) { FUNCTOR f = boost::bind(&stackopxop::dxcdcomp, otherBlock, _1, &b, k, this, 1.0); for_all_singlethread(loopBlock->get_op_array(DES), f); - + f = boost::bind(&stackopxop::cxddcomp, otherBlock, _1, &b, k, this, 2.0); for_all_singlethread(loopBlock->get_op_array(CRE), f); @@ -3736,6 +3901,7 @@ void SpinAdapted::StackCreCreDesComp::build(StackMatrix& m, int row, int col, co FUNCTOR f = boost::bind(&stackopxop::cxcdcompElement, otherBlock, _1, &b, k, this, m, row, col, 1.0); for_all_singlethread(loopBlock->get_op_array(CRE), f); + f = boost::bind(&stackopxop::dxcccompElement, otherBlock, _1, &b, k, this, m, row, col, 2.0); // factor of 2.0 because CCcomp_{ij} = -CCcomp_{ji} for_all_singlethread(loopBlock->get_op_array(CRE), f); @@ -3785,7 +3951,6 @@ void SpinAdapted::StackCreCreDesComp::build(const StackSpinBlock& b) SpinAdapted::operatorfunctions::TensorProduct(leftBlock, *op, *Overlap, &b, &(b.get_stateInfo()), *this, 1.0); } } - if (rightBlock->get_sites().size() == 0) { //this is a special case where the right block is just a dummy block to make the effective wavefunction have spin 0 return; @@ -4303,7 +4468,7 @@ void SpinAdapted::StackHam::build(const StackSpinBlock& b) const boost::shared_ptr Overlap = leftBlock->get_op_rep(OVERLAP, hq); SpinAdapted::operatorfunctions::TensorProduct(leftBlock, *Overlap, *op, &b, &(b.get_stateInfo()), *this, 1.0); - // CCD_A*D_B + CCD_B*D_A + c.c. + // CCD_A*D_B + CCD_B*D_A + c.c. FUNCTOR f = boost::bind(&stackopxop::cxcddcomp, leftBlock, _1, &b, op_array); for_all_singlethread(rightBlock->get_op_array(CRE), f); diff --git a/Stack_op_components.C b/Stack_op_components.C index 093070c..cd2a068 100644 --- a/Stack_op_components.C +++ b/Stack_op_components.C @@ -528,11 +528,18 @@ namespace SpinAdapted { vec.resize(spinvec.size()); vector orbs(2); orbs[0] = i; orbs[1] = j; for (int j=0; j(new StackCreDesComp); - vec[j]->set_orbs() = orbs; - vec[j]->set_initialised() = true; - vec[j]->set_fermion() = false; - vec[j]->set_deltaQuantum(1, spinvec[j]); + vec[j]=boost::shared_ptr(new StackCreDesComp); + vec[j]->set_orbs() = orbs; + vec[j]->set_initialised() = true; + vec[j]->set_fermion() = false; + if (dmrginp.hamiltonian() == BCS) { + vec[j]->resize_deltaQuantum(3); + vec[j]->set_deltaQuantum(0) = spinvec[j]; + vec[j]->set_deltaQuantum(1) = SpinQuantum(2, spinvec[j].get_s(), spinvec[j].get_symm()); + vec[j]->set_deltaQuantum(2) = SpinQuantum(-2, spinvec[j].get_s(), spinvec[j].get_symm()); + } else { + vec[j]->set_deltaQuantum(1, spinvec[j]); + } } } @@ -628,11 +635,18 @@ namespace SpinAdapted { vec.resize(spinvec.size()); vector orbs(2); orbs[0] = i; orbs[1] = j; for (int j=0; j(new StackDesCreComp); - vec[j]->set_orbs() = orbs; - vec[j]->set_initialised() = true; - vec[j]->set_fermion() = false; - vec[j]->set_deltaQuantum(1, spinvec[j]); + vec[j]=boost::shared_ptr(new StackDesCreComp); + vec[j]->set_orbs() = orbs; + vec[j]->set_initialised() = true; + vec[j]->set_fermion() = false; + if (dmrginp.hamiltonian() == BCS) { + vec[j]->resize_deltaQuantum(3); + vec[j]->set_deltaQuantum(0) = spinvec[j]; + vec[j]->set_deltaQuantum(1) = SpinQuantum(2, spinvec[j].get_s(), spinvec[j].get_symm()); + vec[j]->set_deltaQuantum(2) = SpinQuantum(-2, spinvec[j].get_s(), spinvec[j].get_symm()); + } else { + vec[j]->set_deltaQuantum(1, spinvec[j]); + } } } @@ -729,11 +743,19 @@ namespace SpinAdapted { vec.resize(spinvec.size()); vector orbs(2); orbs[0] = i; orbs[1] = j; for (int j=0; j(new StackDesDesComp); - vec[j]->set_orbs() = orbs; - vec[j]->set_initialised() = true; - vec[j]->set_fermion() = false; - vec[j]->set_deltaQuantum(1, -spinvec[j]); + vec[j]=boost::shared_ptr(new StackDesDesComp); + vec[j]->set_orbs() = orbs; + vec[j]->set_initialised() = true; + vec[j]->set_fermion() = false; + + if (dmrginp.hamiltonian() == BCS) { + vec[j]->resize_deltaQuantum(3); + vec[j]->set_deltaQuantum(0) = -spinvec[j]; + vec[j]->set_deltaQuantum(1) = -SpinQuantum(0, spinvec[j].get_s(), spinvec[j].get_symm()); + vec[j]->set_deltaQuantum(2) = -SpinQuantum(-2, spinvec[j].get_s(), spinvec[j].get_symm()); + } else { + vec[j]->set_deltaQuantum(1, -spinvec[j]); + } } } @@ -829,11 +851,18 @@ namespace SpinAdapted { vec.resize(spinvec.size()); vector orbs(2); orbs[0] = i; orbs[1] = j; for (int j=0; j(new StackCreCreComp); - vec[j]->set_orbs() = orbs; - vec[j]->set_initialised() = true; - vec[j]->set_fermion() = false; - vec[j]->set_deltaQuantum(1, spinvec[j]); + vec[j]=boost::shared_ptr(new StackCreCreComp); + vec[j]->set_orbs() = orbs; + vec[j]->set_initialised() = true; + vec[j]->set_fermion() = false; + if (dmrginp.hamiltonian() == BCS) { + vec[j]->resize_deltaQuantum(3); + vec[j]->set_deltaQuantum(0) = spinvec[j]; + vec[j]->set_deltaQuantum(1) = SpinQuantum(0, spinvec[j].get_s(), spinvec[j].get_symm()); + vec[j]->set_deltaQuantum(2) = SpinQuantum(-2, spinvec[j].get_s(), spinvec[j].get_symm()); + } else { + vec[j]->set_deltaQuantum(1, spinvec[j]); + } } } diff --git a/Stackspinblock.C b/Stackspinblock.C index 9b51833..7635b7b 100644 --- a/Stackspinblock.C +++ b/Stackspinblock.C @@ -77,6 +77,9 @@ void StackSpinBlock::printOperatorSummary() #ifndef SERIAL mpi::communicator world; + //pout << "Printing Operators of this Block" << endl; + //pout << *this << endl; + if (mpigetrank() != 0) { for (std::map >::const_iterator it = ops.begin(); it != ops.end(); ++it) sendobject(it->second->get_size(), 0); @@ -90,7 +93,7 @@ void StackSpinBlock::printOperatorSummary() } else { - p2out << "\t\t\t " << it->second->size()<<" : "<second->get_op_string()<<" Virtual Operators "; + p2out << "\t\t\t " << it->second->size()<<" : "<second->get_op_string()<<" Virtual Operators "; } vector numops(calc.size(), 0); @@ -102,6 +105,14 @@ void StackSpinBlock::printOperatorSummary() p2out << " " << numops[proc]<<" "; } p2out << endl; + //if (it->second->get_op_string() == "STACKHAM" || it->second->get_op_string() == "STACKCRECREDES_COMP" || it->second->get_op_string() == "STACKDESDES_COMP" || it->second->get_op_string() == "STACKCREDES_COMP" || it->second->get_op_string() == "STACKCRE" || it->second->get_op_string() == "STACKCRECRE" || it->second->get_op_string() == "STACKCREDES" || it->second->get_op_string() == "STACKCRECRE" || it->second->get_op_string() == "STACKOVERLAP") { + //if (it->second->get_op_string() == "STACKCREDES_COMP" || it->second->get_op_string() == "STACKDESCRE_COMP") { + // if (it->second->is_core()) { + // for (int i = 0; i < it->second->size(); ++i) { + // pout << *it->second->get_local_element(i)[0] << endl; + // } + // } + //} } } #else @@ -964,6 +975,85 @@ int procWithMinOps(std::vector >& allops) return minproc; } +std::vector> StackSpinBlock::prebuild(int num_threads) const { + std::vector> comps; + if (dmrginp.hamiltonian() == HUBBARD) return comps; + + StackSpinBlock* loopBlock=(leftBlock->is_loopblock()) ? leftBlock : rightBlock; + StackSpinBlock* otherBlock = loopBlock == leftBlock ? rightBlock : leftBlock; + + if (otherBlock->get_rightBlock() == 0) return comps; + + for (int i = 0; i < loopBlock->get_op_array(CRE_DES).get_size(); ++i) + for (int j = 0; j < loopBlock->get_op_array(CRE_DES).get_local_element(i).size(); ++j) { + boost::shared_ptr op1 = loopBlock->get_op_array(CRE_DES).get_local_element(i)[j]; + int I = op1->get_orbs(0); + int J = op1->get_orbs(1); + if (otherBlock->get_op_array(CRE_DESCOMP).has_local_index(I, J)) { + boost::shared_ptr op2; + if (!dmrginp.spinAdapted() || dmrginp.hamiltonian() == BCS) + op2 = otherBlock->get_op_array(CRE_DESCOMP).get_element(I, J).at(0); + else + op2 = otherBlock->get_op_rep(CRE_DESCOMP, -op1->get_deltaQuantum()[0], I, J); + + if (!op2->memoryUsed()) comps.push_back(op2); + + if (otherBlock->has(DES_CRECOMP)) { + if (!dmrginp.spinAdapted() || dmrginp.hamiltonian() == BCS) + op2 = otherBlock->get_op_array(DES_CRECOMP).get_element(I, J).at(0); + else + op2 = otherBlock->get_op_rep(DES_CRECOMP, op1->get_deltaQuantum()[0], I, J); + + if (!op2->memoryUsed()) comps.push_back(op2); + } + } + } + + for (int i = 0; i < loopBlock->get_op_array(CRE_CRE).get_size(); ++i) + for (int j = 0; j < loopBlock->get_op_array(CRE_CRE).get_local_element(i).size(); ++j) { + boost::shared_ptr op1 = loopBlock->get_op_array(CRE_CRE).get_local_element(i)[j]; + int I = op1->get_orbs(0); + int J = op1->get_orbs(1); + if (otherBlock->get_op_array(DES_DESCOMP).has_local_index(I, J)) { + boost::shared_ptr op2; + if (!dmrginp.spinAdapted() || dmrginp.hamiltonian() == BCS) + op2 = otherBlock->get_op_array(DES_DESCOMP).get_element(I, J).at(0); + else + op2 = otherBlock->get_op_rep(DES_DESCOMP, -op1->get_deltaQuantum()[0], I, J); + + if (!op2->memoryUsed()) comps.push_back(op2); + + if (otherBlock->has(CRE_CRECOMP)) { + if (!dmrginp.spinAdapted() || dmrginp.hamiltonian() == BCS) + op2 = otherBlock->get_op_array(CRE_CRECOMP).get_element(I, J).at(0); + else + op2 = otherBlock->get_op_rep(CRE_CRECOMP, op1->get_deltaQuantum()[0], I, J); + + if (!op2->memoryUsed()) comps.push_back(op2); + } + } + } + + // Now allocate comps + double additionalMem = 0.; + for (int i = 0; i < comps.size(); ++i) { + comps.at(i)->allocate(otherBlock->get_braStateInfo(), otherBlock->get_ketStateInfo()); + additionalMem += comps.at(i) -> memoryUsed(); + } + p3out << "additional memory used for pre-build complimentary operators " + << additionalMem*8/1.e9 << " GB" << endl; + + SplitStackmem(); + + // omp parallel build +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < comps.size(); ++i) + comps.at(i)->build(*otherBlock); + MergeStackmem(); + + return comps; +} + void StackSpinBlock::multiplyH(StackWavefunction& c, StackWavefunction* v, int num_threads) const { @@ -1159,7 +1249,7 @@ void StackSpinBlock::multiplyH(StackWavefunction& c, StackWavefunction* v, int n } } - + for (int i = 0; iget_ketStateInfo().unCollectedStateInfo, otherBlock->get_ketStateInfo()); diff --git a/Stackspinblock.h b/Stackspinblock.h index 35ba1f4..ffab087 100644 --- a/Stackspinblock.h +++ b/Stackspinblock.h @@ -212,6 +212,7 @@ class StackSpinBlock void BuildSingleSlaterBlock(std::vector sts); void set_loopblock(bool p_loopblock){loopblock = p_loopblock;} friend ostream& operator<< (ostream& os, const StackSpinBlock& b); + std::vector> prebuild(int num_threads) const; void multiplyH(StackWavefunction& c, StackWavefunction* v, int num_threads) const; void multiplyH_2index(StackWavefunction& c, StackWavefunction* v, int num_threads) const; void multiplyOverlap(StackWavefunction& c, StackWavefunction* v, int num_threads) const; diff --git a/blas_calls.h b/blas_calls.h index 9809f70..c03f30b 100644 --- a/blas_calls.h +++ b/blas_calls.h @@ -33,47 +33,56 @@ Sandeep Sharma and Garnet K.-L. Chan #ifdef AIX extern "C" { +#if !defined(MKL_DIRECT_CALL) && !defined(MKL_DIRECT_CALL_SEQ) void daxpy(int *ntot, double *coeff, double *copy_from, int *inc1, double *copy_to, int *inc2); - void dcopy(int *ntot,double *copy_from,int *inc1, double *copy_to,int *inc2); - double ddot(int *ntot, double *x, int *incx, double *y, int *incy); void dgemm(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); - void dgemv(char *trans, int *m, int *n, double *alpha, - double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy); - void dscal(int *size,double *coeff,double *matrix,int *inc); - + double ddot(int *ntot, double *x, int *incx, double *y, int *incy); void saxpy(int *ntot, float *coeff, float *copy_from, int *inc1, float *copy_to, int *inc2); - void scopy(int *ntot, float *copy_from, int *inc1, float *copy_to, int *inc2); - float sdot(int *ntot, float *x, int *incx, float *y, int *incy); void sgemm(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc); + float sdot(int *ntot, float *x, int *incx, float *y, int *incy); +#endif + + void dcopy(int *ntot,double *copy_from,int *inc1, double *copy_to,int *inc2); + + void dgemv(char *trans, int *m, int *n, double *alpha, + double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy); + void dscal(int *size,double *coeff,double *matrix,int *inc); + + void scopy(int *ntot, float *copy_from, int *inc1, float *copy_to, int *inc2); void sscal(int *size, float *coeff, float *matrix,int *inc); } #else //SGI, Linux extern "C" { +#if !defined(MKL_DIRECT_CALL) && !defined(MKL_DIRECT_CALL_SEQ) void daxpy_(FORTINT *ntot, double *coeff, double *copy_from, FORTINT *inc1, double *copy_to, FORTINT *inc2); - void dcopy_(FORTINT *ntot,double *copy_from,FORTINT *inc1, double *copy_to,FORTINT *inc2); double ddot_(FORTINT *ntot, double *x, FORTINT *incx, double *y, FORTINT *incy); void dgemm_(char *transa, char *transb, FORTINT *m, FORTINT *n, FORTINT *k, double *alpha, double *A, FORTINT *lda, double *B, FORTINT *ldb, double *beta, double *C, FORTINT *ldc); - void dgemv_(char *trans, FORTINT *m, FORTINT *n, double *alpha, - double *a, FORTINT *lda, double *x, FORTINT *incx, double *beta, double *y, FORTINT *incy); - void dscal_(FORTINT *size, double *coeff, double *matrix,FORTINT *inc); - void saxpy_(FORTINT *ntot, float *coeff, float *copy_from, FORTINT *inc1, float *copy_to, FORTINT *inc2); - void scopy_(FORTINT *ntot, float *copy_from, FORTINT *inc1, float *copy_to, FORTINT *inc2); float sdot_(FORTINT *ntot, float *x, FORTINT *incx, float *y, FORTINT *incy); void sgemm_(char *transa, char *transb, FORTINT *m, FORTINT *n, FORTINT *k, float *alpha, float *A, FORTINT *lda, float *B, FORTINT *ldb, float *beta, float *C, FORTINT *ldc); +#endif + + void dcopy_(FORTINT *ntot,double *copy_from,FORTINT *inc1, double *copy_to,FORTINT *inc2); + + void dgemv_(char *trans, FORTINT *m, FORTINT *n, double *alpha, + double *a, FORTINT *lda, double *x, FORTINT *incx, double *beta, double *y, FORTINT *incy); + void dscal_(FORTINT *size, double *coeff, double *matrix,FORTINT *inc); + + void scopy_(FORTINT *ntot, float *copy_from, FORTINT *inc1, float *copy_to, FORTINT *inc2); + void sscal_(FORTINT *size, float *coeff, float *matrix,FORTINT *inc); int idamax_(FORTINT &n, double* d, FORTINT &indx); //int idamax_(int &n, double* d, int &indx); @@ -86,26 +95,29 @@ extern "C" #ifdef AIX extern "C" { - void dgesvd(char* JOBU, char* JOBVT, int* M, int* N, double* A, int* LDA, double* S, double* U, int* LDU, double* VT, int* LDVT, double* WORK, int* LWORK, - int* INFO); +#if !defined(MKL_DIRECT_CALL) && !defined(MKL_DIRECT_CALL_SEQ) void daxpy(int *ntot, double *coeff, double *copy_from, int *inc1, double *copy_to, int *inc2); - void dcopy(int *ntot,double *copy_from,int *inc1, double *copy_to,int *inc2); double ddot(int *ntot, double *x, int *incx, double *y, int *incy); void dgemm(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); - void dgemv(char *trans, int *m, int *n, double *alpha, - double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy); - void dscal(int *size,double *coeff,double *matrix,int *inc); - void saxpy(int *ntot, float *coeff, float *copy_from, int *inc1, float *copy_to, int *inc2); - void scopy(int *ntot, float *copy_from, int *inc1, float *copy_to, int *inc2); float sdot(int *ntot, float *x, int *incx, float *y, int *incy); void sgemm(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc); +#endif + + void dgesvd(char* JOBU, char* JOBVT, int* M, int* N, double* A, int* LDA, double* S, double* U, int* LDU, double* VT, int* LDVT, double* WORK, int* LWORK, + int* INFO); + + void dcopy(int *ntot,double *copy_from,int *inc1, double *copy_to,int *inc2); + void dgemv(char *trans, int *m, int *n, double *alpha, + double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy); + void dscal(int *size,double *coeff,double *matrix,int *inc); + void scopy(int *ntot, float *copy_from, int *inc1, float *copy_to, int *inc2); void sscal(int *size, float *coeff, float *matrix,int *inc); void dstev(char* JOBZ,FORTINT* N,double* A,double* E, double* W, FORTINT* Wlen, double*WORK, FORTINT* INFO); void dsyev(char* JOBZ,char* UPLO,int* N,double* A,int* LDA,double* W,double*WORK,int*LWORK,int* INFO); @@ -114,26 +126,30 @@ extern "C" #else //SGI, Linux extern "C" { - void dgesvd_(char* JOBU, char* JOBVT, FORTINT* M, FORTINT* N, double* A, FORTINT* LDA, double* S, double* U, FORTINT* LDU, double* VT, FORTINT* LDVT, double* WORK, FORTINT* LWORK, - FORTINT* INFO); +#if !defined(MKL_DIRECT_CALL) && !defined(MKL_DIRECT_CALL_SEQ) void daxpy_(FORTINT *ntot, double *coeff, double *copy_from, FORTINT *inc1, double *copy_to, FORTINT *inc2); - void dcopy_(FORTINT *ntot,double *copy_from,FORTINT *inc1, double *copy_to,FORTINT *inc2); double ddot_(FORTINT *ntot, double *x, FORTINT *incx, double *y, FORTINT *incy); void dgemm_(char *transa, char *transb, FORTINT *m, FORTINT *n, FORTINT *k, double *alpha, double *A, FORTINT *lda, double *B, FORTINT *ldb, double *beta, double *C, FORTINT *ldc); - void dgemv_(char *trans, FORTINT *m, FORTINT *n, double *alpha, - double *a, FORTINT *lda, double *x, FORTINT *incx, double *beta, double *y, FORTINT *incy); - void dscal_(FORTINT *size, double *coeff, double *matrix,FORTINT *inc); - void saxpy_(FORTINT *ntot, float *coeff, float *copy_from, FORTINT *inc1, float *copy_to, FORTINT *inc2); - void scopy_(FORTINT *ntot, float *copy_from, FORTINT *inc1, float *copy_to, FORTINT *inc2); float sdot_(FORTINT *ntot, float *x, FORTINT *incx, float *y, FORTINT *incy); void sgemm_(char *transa, char *transb, FORTINT *m, FORTINT *n, FORTINT *k, float *alpha, float *A, FORTINT *lda, float *B, FORTINT *ldb, float *beta, float *C, FORTINT *ldc); +#endif + + void dgesvd_(char* JOBU, char* JOBVT, FORTINT* M, FORTINT* N, double* A, FORTINT* LDA, double* S, double* U, FORTINT* LDU, double* VT, FORTINT* LDVT, double* WORK, FORTINT* LWORK, + FORTINT* INFO); + + void dcopy_(FORTINT *ntot,double *copy_from,FORTINT *inc1, double *copy_to,FORTINT *inc2); + void dgemv_(char *trans, FORTINT *m, FORTINT *n, double *alpha, + double *a, FORTINT *lda, double *x, FORTINT *incx, double *beta, double *y, FORTINT *incy); + void dscal_(FORTINT *size, double *coeff, double *matrix,FORTINT *inc); + + void scopy_(FORTINT *ntot, float *copy_from, FORTINT *inc1, float *copy_to, FORTINT *inc2); void sscal_(FORTINT *size, float *coeff, float *matrix,FORTINT *inc); void dstev_(char* JOBZ,FORTINT* N,double* A,double* E, double* W, FORTINT* Wlen, double*WORK, FORTINT* INFO); void dsyev_(char* JOBZ,char* UPLO,FORTINT* N,double* A,FORTINT* LDA,double* W,double*WORK,FORTINT*LWORK,FORTINT* INFO); @@ -153,6 +169,10 @@ extern "C" ** int ntot: length of data ** int inc1,inc2: increments for copy_from, copy_to */ +#if defined(MKL_DIRECT_CALL) || defined(MKL_DIRECT_CALL_SEQ) +#undef DAXPY +#undef SAXPY +#endif inline void DAXPY(FORTINT ntot, double coeff, double *copy_from, FORTINT inc1, double *copy_to, FORTINT inc2) { @@ -171,7 +191,6 @@ inline void SAXPY(FORTINT ntot, float coeff, float *copy_from, FORTINT inc1, saxpy_(&ntot,&coeff,copy_from,&inc1,copy_to,&inc2); #endif } - /* ** ** void DCOPY(int ntot, double *x, int incx, double *y, int *incy); @@ -208,6 +227,10 @@ inline void SCOPY(FORTINT ntot,float *copy_from,FORTINT inc1,float *copy_to,FORT ** int ntot: length of x,y ** int incx,incy: increments for x,y */ +#if defined(MKL_DIRECT_CALL) || defined(MKL_DIRECT_CALL_SEQ) +#undef DDOT +#undef SDOT +#endif inline double DDOT(FORTINT ntot, double *x, FORTINT incx, double *y, FORTINT incy) { #ifdef AIX @@ -224,7 +247,6 @@ inline float SDOT(FORTINT ntot, float *x, FORTINT incx, float *y, FORTINT incy) return sdot_(&ntot,x,&incx,y,&incy); #endif } - /* ** ** void DGEMM(char transa, char transb, int m, int n, int k, double alpha, @@ -283,6 +305,9 @@ inline float SDOT(FORTINT ntot, float *x, FORTINT incx, float *y, FORTINT incy) ** On exit, ldc is unchanged. ** */ +#if defined(MKL_DIRECT_CALL) || defined(MKL_DIRECT_CALL_SEQ) +#undef DGEMM +#endif inline void DGEMM(char transa, char transb, FORTINT m, FORTINT n, FORTINT k, double alpha, double *A, FORTINT lda, double *B, FORTINT ldb, double beta, double *C, FORTINT ldc) @@ -331,7 +356,9 @@ inline void GESV(FORTINT n, FORTINT nrhs, double* a, FORTINT lda, FORTINT* ipiv, #endif } - +#if defined(MKL_DIRECT_CALL) || defined(MKL_DIRECT_CALL_SEQ) +#undef SGEMM +#endif inline void SGEMM(char transa, char transb, FORTINT m, FORTINT n, FORTINT k, float alpha, float *A, FORTINT lda, float *B, FORTINT ldb, float beta, float *C, FORTINT ldc) diff --git a/davidson.C b/davidson.C index f7af55c..abb0853 100644 --- a/davidson.C +++ b/davidson.C @@ -12,33 +12,40 @@ Sandeep Sharma and Garnet K.-L. Chan #endif #include "Stackwavefunction.h" #include "Stackspinblock.h" -#include "StackBaseOperator.h" #include "MatrixBLAS.h" - -SpinAdapted::multiply_h::multiply_h(const StackSpinBlock& b, const bool &onedot_) : block(b){} - +SpinAdapted::multiply_h::multiply_h(const StackSpinBlock& b, const bool &onedot_) : block(b){ + if (dmrginp.prebuild() && dmrginp.doimplicitTranspose()) + twoidxcomps = b.prebuild(MAX_THRD); +} void SpinAdapted::multiply_h::operator()(StackWavefunction& c, StackWavefunction& v) { block.multiplyH( c, &v, MAX_THRD); - //block.multiplyH_2index( c, &v, MAX_THRD); - //pout << "c" << endl; - //pout << c << endl; - //pout << "v" << endl; - //pout << v << endl; } -SpinAdapted::multiply_h_2Index::multiply_h_2Index(const StackSpinBlock& b, const bool &onedot_) : block(b){} +SpinAdapted::multiply_h::~multiply_h() { + if (dmrginp.prebuild() && dmrginp.doimplicitTranspose()) { + for (int i = twoidxcomps.size()-1; i >= 0; --i) + twoidxcomps.at(i)->deallocate(); + } +} + +SpinAdapted::multiply_h_2Index::multiply_h_2Index(const StackSpinBlock& b, const bool &onedot_) : block(b){ + if (dmrginp.prebuild() && dmrginp.doimplicitTranspose()) + twoidxcomps = b.prebuild(MAX_THRD); +} void SpinAdapted::multiply_h_2Index::operator()(StackWavefunction& c, StackWavefunction& v) { block.multiplyH_2index( c, &v, MAX_THRD); - //block.multiplyH3index( c, &v, MAX_THRD); } - - - +SpinAdapted::multiply_h_2Index::~multiply_h_2Index() { + if (dmrginp.prebuild() && dmrginp.doimplicitTranspose()) { + for (int i = twoidxcomps.size()-1; i >= 0; --i) + twoidxcomps.at(i)->deallocate(); + } +} diff --git a/davidson.h b/davidson.h index d34b403..adc5e95 100644 --- a/davidson.h +++ b/davidson.h @@ -8,6 +8,8 @@ Sandeep Sharma and Garnet K.-L. Chan #ifndef SPIN_DAVIDSON_HEADER #define SPIN_DAVIDSON_HEADER +#include +#include "StackBaseOperator.h" namespace SpinAdapted{ class StackWavefunction; @@ -17,26 +19,31 @@ struct Davidson_functor { virtual void operator()(StackWavefunction& c, StackWavefunction& v) = 0; virtual const StackSpinBlock& get_block() = 0; + virtual ~Davidson_functor() {} }; class multiply_h : public Davidson_functor { private: const StackSpinBlock& block; + std::vector> twoidxcomps; public: multiply_h(const StackSpinBlock& b, const bool &onedot_); void operator()(StackWavefunction& c, StackWavefunction& v); const StackSpinBlock& get_block() {return block;} + ~multiply_h(); }; class multiply_h_2Index : public Davidson_functor { private: const StackSpinBlock& block; + std::vector> twoidxcomps; public: multiply_h_2Index(const StackSpinBlock& b, const bool &onedot_); void operator()(StackWavefunction& c, StackWavefunction& v); const StackSpinBlock& get_block() {return block;} + ~multiply_h_2Index(); }; } diff --git a/dmrg.C b/dmrg.C index 6e4334e..204539f 100644 --- a/dmrg.C +++ b/dmrg.C @@ -200,7 +200,7 @@ int calldmrg(char* input, char* output) cpu_set_t *oldmask, newmask; int oldset, newset; - //if only a subset of processors are being used, then unset processor affinity + // if only a subset of processors are being used, then unset processor affinity if (m_calc_procs.size() < world.size()) { sched_getaffinity(0, oldset, oldmask); CPU_ZERO( &newmask); @@ -293,20 +293,17 @@ int calldmrg(char* input, char* output) dmrginp.set_algorithm_method() = ONEDOT; //initialize state info and canonicalize wavefunction is always done using onedot algorithm if (mpigetrank()==0) { - Sweep::InitializeStateInfo(sweepParams, direction, baseState); - Sweep::InitializeStateInfo(sweepParams, !direction, baseState); - Sweep::CanonicalizeWavefunction(sweepParams, direction, baseState); - Sweep::CanonicalizeWavefunction(sweepParams, !direction, baseState); - Sweep::CanonicalizeWavefunction(sweepParams, direction, baseState); + Sweep::InitializeStateInfo(sweepParams, direction, baseState); + Sweep::InitializeStateInfo(sweepParams, !direction, baseState); + Sweep::CanonicalizeWavefunction(sweepParams, direction, baseState); + Sweep::CanonicalizeWavefunction(sweepParams, !direction, baseState); + Sweep::CanonicalizeWavefunction(sweepParams, direction, baseState); } dmrginp.set_algorithm_method() = atype; } - //this genblock is required to generate all the nontranspose operators dmrginp.setimplicitTranspose() = false; SweepGenblock::do_one(sweepParams, false, false, false, restartsize, baseState, baseState); - - compress(sweep_tol, targetState, baseState); } else if (dmrginp.calc_type() == RESPONSEBW) @@ -490,6 +487,9 @@ int calldmrg(char* input, char* output) else if (dmrginp.calc_type() == TINYCALC) { Sweep::tiny(sweep_tol); } + else if (dmrginp.calc_type() == RESTART_ONEPDM) { + Npdm::npdm(NPDM_ONEPDM); + } else if (dmrginp.calc_type() == RESTART_THREEPDM) { Npdm::npdm(NPDM_THREEPDM); } @@ -572,8 +572,7 @@ int calldmrg(char* input, char* output) #ifndef SERIAL } - //world.barrier(); - sleepBarrier(world, 0, 10); + sleepBarrier(world, 0, 1); MPI_Comm_free(&Calc); sched_setaffinity(0, sizeof(oldmask), oldmask); #endif @@ -1136,7 +1135,11 @@ void responsepartialSweep(double sweep_tol, int targetState, vector& projec } w2.CollectQuantaAlongColumns(*state.leftStateInfo, const_cast(*newSystem.get_stateInfo().unCollectedStateInfo)); StateInfo bigstate; - TensorProduct(*state.leftStateInfo, const_cast(newSystem.get_stateInfo()), bigstate, PARTICLE_SPIN_NUMBER_CONSTRAINT); + if (dmrginp.hamiltonian() == BCS) { + TensorProduct(*state.leftStateInfo, const_cast(newSystem.get_stateInfo()), bigstate, SPIN_NUMBER_CONSTRAINT); + } else { + TensorProduct(*state.leftStateInfo, const_cast(newSystem.get_stateInfo()), bigstate, PARTICLE_SPIN_NUMBER_CONSTRAINT); + } w2.SaveWavefunctionInfo(bigstate, sites, baseStates[0]); w2.deallocate(); w.deallocate(); @@ -1374,7 +1377,6 @@ void compress(double sweep_tol, int targetState, int baseState) sweepParams.current_root() = -1; //this is the warmup sweep, the baseState is used as the initial guess for the targetState last_fe = SweepCompress::do_one(sweepParams, true, true, false, 0, targetState, baseState); - direction = false; while ( true) { diff --git a/enumerator.h b/enumerator.h index 86e9b9f..5a866f3 100644 --- a/enumerator.h +++ b/enumerator.h @@ -34,7 +34,7 @@ namespace SpinAdapted{ enum keywords{ORBS, LASTM, STARTM, MAXM, REORDER, HF_OCC, SCHEDULE, SYM, NELECS, SPIN, IRREP, MAXJ, PREFIX, NROOTS, DOCD, DEFLATION_MAX_SIZE, MAXITER, BASENERGY, SCREEN_TOL, ODOT, SWEEP_TOL, OUTPUTLEVEL, NONSPINADAPTED, BOGOLIUBOV, TWODOT_NOISE, WARMUP, - NPDM_INTERMEDIATE, NPDM_NO_INTERMEDIATE, NPDM_MULTINODE, NPDM_NO_MULTINODE, NUMKEYWORDS}; + NPDM_INTERMEDIATE, NPDM_NO_INTERMEDIATE, NPDM_MULTINODE, NPDM_NO_MULTINODE, PREBUILD, NUMKEYWORDS}; enum { NO_PARTICLE_SPIN_NUMBER_CONSTRAINT, diff --git a/fciqmchelper.C b/fciqmchelper.C index f31b6ad..606c670 100644 --- a/fciqmchelper.C +++ b/fciqmchelper.C @@ -4,6 +4,7 @@ #include "Stackspinblock.h" #include "initblocks.h" #ifndef SERIAL +#include "mpi.h" #include #endif @@ -172,14 +173,14 @@ namespace SpinAdapted{ //this is a helper function to make a MPS from a occupation number representation of determinant //this representation is slightly different than the usual occupation, here each integer //element is a spatial orbital which can have a value 0, -1, 1, or 2. - MPS::MPS(ulong *occnum, int length) + MPS::MPS(unsigned long *occnum, int length) { assert(length*64 >= dmrginp.last_site()); //convert the int array into a vector std::vector occ(dmrginp.last_site(), 0); - ulong temp = 1; + unsigned long temp = 1; int index = 0; for (int i=0; i (MPS::siteBlocks[MPS::sweepIters+1].get_stateInfo()), bigState, SPIN_NUMBER_CONSTRAINT); + } else { TensorProduct(currentState, const_cast(MPS::siteBlocks[MPS::sweepIters+1].get_stateInfo()), bigState, PARTICLE_SPIN_NUMBER_CONSTRAINT); + } if (!dmrginp.spinAdapted()) rotSites[1] -= 2; @@ -393,25 +398,26 @@ namespace SpinAdapted{ InitBlocks::InitStartingBlock(system, forward, leftState, statebindex, forward_starting_size, backward_starting_size, restartSize, restart, warmUp, integralIndex); SpinQuantum hq(0, SpinSpace(0), IrrepSpace(0)); - p2out << system< Rotationa, Rotationb; int sys_add = true; bool direct = true; std::vector rotSites(2,0); int sweepIters = dmrginp.spinAdapted() ? dmrginp.last_site() -2 : dmrginp.last_site()/2-2; + int normToComp = sweepIters/2; + for (int i=0; i=normToComp && !system.has(CRE_DESCOMP)) + system.addAllCompOps(); system.addAdditionalOps(); StackSpinBlock dotsite(i+1, i+1, integralIndex, false); if (mpigetrank() == 0) { - rotSites[1] = i+1; - LoadRotationMatrix(rotSites, Rotationa, statea); - LoadRotationMatrix(rotSites, Rotationb, stateb); - //Rotationa = statea.getSiteTensors(i+1); - //Rotationb = stateb.getSiteTensors(i+1); + rotSites[1] = dmrginp.spinAdapted() ? i+1 : i*2+3; + LoadRotationMatrix(rotSites, Rotationa, statea); + LoadRotationMatrix(rotSites, Rotationb, stateb); + //Rotationa = statea.getSiteTensors(i+1); + //Rotationb = stateb.getSiteTensors(i+1); } #ifndef SERIAL @@ -419,7 +425,11 @@ namespace SpinAdapted{ mpi::broadcast(calc, Rotationa, 0); mpi::broadcast(calc, Rotationb, 0); #endif - InitBlocks::InitNewSystemBlock(system, dotsite, newSystem, 0, statebindex, sys_add, direct, integralIndex, DISTRIBUTED_STORAGE, false, true); + if (i < normToComp) { + InitBlocks::InitNewSystemBlock(system, dotsite, newSystem, 0, statebindex, sys_add, direct, integralIndex, DISTRIBUTED_STORAGE, true, false); + } else { + InitBlocks::InitNewSystemBlock(system, dotsite, newSystem, 0, statebindex, sys_add, direct, integralIndex, DISTRIBUTED_STORAGE, false, true); + } newSystem.transform_operators(const_cast&>(Rotationa), const_cast&>(Rotationb)); @@ -433,32 +443,47 @@ namespace SpinAdapted{ } StackSpinBlock newSystem, big; - StackSpinBlock dotsite1(sweepIters, sweepIters, integralIndex, false); - StackSpinBlock dotsite2(sweepIters+1, sweepIters+1, integralIndex, false); - + StackSpinBlock dotsite1(sweepIters, sweepIters, integralIndex, sameStates); + StackSpinBlock dotsite2(sweepIters+1, sweepIters+1, integralIndex, sameStates); system.addAdditionalOps(); InitBlocks::InitNewSystemBlock(system, dotsite1, newSystem, 0, statebindex, sys_add, direct, integralIndex, DISTRIBUTED_STORAGE, false, true); newSystem.set_loopblock(false); system.set_loopblock(false); - InitBlocks::InitBigBlock(newSystem, dotsite2, big); + InitBlocks::InitBigBlock(newSystem, dotsite2, big); StackWavefunction stateaw, statebw; StateInfo s; - rotSites[1] += 1; + rotSites[1] += dmrginp.spinAdapted() ? 1 : 2; stateaw.LoadWavefunctionInfo(s, rotSites, statea, true); statebw.LoadWavefunctionInfo(s, rotSites, stateb, true); +#ifndef SERIAL + mpi::broadcast(calc, stateaw, 0); + mpi::broadcast(calc, statebw, 0); + if (mpigetrank() != 0) { + double* dataa = Stackmem[omprank].allocate(stateaw.memoryUsed()); + stateaw.set_data(dataa); + stateaw.allocateOperatorMatrix(); + double* datab = Stackmem[omprank].allocate(statebw.memoryUsed()); + statebw.set_data(datab); + statebw.allocateOperatorMatrix(); + } + calc.barrier(); + MPI_Bcast(stateaw.get_data(), stateaw.memoryUsed(), MPI_DOUBLE, 0, Calc); + MPI_Bcast(statebw.get_data(), statebw.memoryUsed(), MPI_DOUBLE, 0, Calc); +#endif + StackWavefunction temp; temp.initialise(stateaw); temp.Clear(); - - + calc.barrier(); big.multiplyH_2index(statebw, &temp, 1); - + calc.barrier(); if (mpigetrank() == 0) h = DotProduct(stateaw, temp); + calc.barrier(); temp.Clear(); big.multiplyOverlap(statebw, &temp, 1); @@ -466,7 +491,6 @@ namespace SpinAdapted{ o = DotProduct(stateaw, temp); #ifndef SERIAL - mpi::communicator world; mpi::broadcast(calc, h, 0); mpi::broadcast(calc, o, 0); #endif diff --git a/fciqmchelper.h b/fciqmchelper.h index 969566b..fba7041 100644 --- a/fciqmchelper.h +++ b/fciqmchelper.h @@ -39,7 +39,7 @@ class MPS{ MPS() {}; MPS(int stateindex); MPS(std::vector& occ); - MPS(ulong* occnum, int length); + MPS(unsigned long* occnum, int length); //void buildMPSrep(); std::vector& getSiteTensors(int i) {return SiteTensors[i];} const std::vector& getSiteTensors(int i) const {return SiteTensors[i];} diff --git a/include/multiarray.h b/include/multiarray.h index e069c26..061b685 100644 --- a/include/multiarray.h +++ b/include/multiarray.h @@ -12,6 +12,7 @@ Sandeep Sharma and Garnet K.-L. Chan #include #include #include +#include #include #include diff --git a/input.C b/input.C index 4ed5e7b..355cb6b 100644 --- a/input.C +++ b/input.C @@ -130,6 +130,7 @@ void SpinAdapted::Input::initialize_defaults() m_guess_permutations = 10; m_direct = true; + m_prebuild = false; m_nroots = 1; m_weights.resize(m_nroots); m_weights[0] = 1.; @@ -184,6 +185,7 @@ void SpinAdapted::Input::initialize_defaults() m_maxM = 0; m_lastM = 500; m_startM = 250; + m_bra_M = 0; m_calc_ri_4pdm=false; m_store_ripdm_readable=false; @@ -303,8 +305,17 @@ SpinAdapted::Input::Input(const string& config_name) { } m_Bogoliubov = true; m_ham_type = BCS; - } - else if (boost::iequals(keyword, "warmup")) { + } else if (boost::iequals(keyword, "prebuild")) { + if (usedkey[PREBUILD] == 0) + usedkey_error(keyword, msg); + usedkey[PREBUILD] = 0; + if (tok.size() != 1) { + pout << "keyword prebuild is a stand alone keyword" << endl; + pout << msg << endl; + abort(); + } + m_prebuild = true; + } else if (boost::iequals(keyword, "warmup")) { if (usedkey[WARMUP] == 0) usedkey_error(keyword, msg); usedkey[WARMUP] = 0; @@ -967,9 +978,18 @@ SpinAdapted::Input::Input(const string& config_name) { { m_npdm_multinode = false; } - - - + else if (boost::iequals(keyword, "specificpdm")) + { + if(tok.size() == 2) { + m_specificpdm.push_back(atoi(tok[1].c_str())); + } else if (tok.size() == 3) { + m_specificpdm.push_back(atoi(tok[1].c_str())); + m_specificpdm.push_back(atoi(tok[2].c_str())); + } else { + pout << "keyword "<1 ) { - pout << "Currently the response code does not work with non-particle number conserving hamiltonians!!"; - abort(); - } - //if (sym != "c1") // must be initialized even if c1 sym. Symmetry::InitialiseTable(sym); @@ -1311,7 +1337,7 @@ SpinAdapted::Input::Input(const string& config_name) { v_cccc[integral].rhf = true; v_cccd[integral].rhf = true; } - + // Kij-based ordering by GA opt. #ifndef SERIAL mpi::broadcast(world,m_reorderType,0); @@ -1376,7 +1402,7 @@ SpinAdapted::Input::Input(const string& config_name) { } } else - readorbitalsfile(orbitalfile[integral], v_1[integral], v_2[integral], coreEnergy[integral], integral); + readorbitalsfile(orbitalfile[integral], v_1[integral], v_2[integral], coreEnergy[integral], integral); } } @@ -1445,30 +1471,35 @@ SpinAdapted::Input::Input(const string& config_name) { mpi::broadcast(world, PROPBITLEN, 0); #endif //if (mpigetrank() == 0) { + // int s = 0; //cout << "v_1" << endl; //for (int i = 0; i < m_norbs; ++i) // for (int j = 0; j < m_norbs; ++j) - // cout << fixed << setprecision(12) << v_1[0](i,j) << " " << i << " " << j << endl; + // if (fabs(v_1[s](i,j)) > 1e-12) + // cout << fixed << setprecision(12) << v_1[s](i,j) << " " << i << " " << j << endl; //cout << "v_2" << endl; //for (int i = 0; i < m_norbs; ++i) // for (int j = 0; j < m_norbs; ++j) // for (int k = 0; k < m_norbs; ++k) // for (int l = 0; l < m_norbs; ++l) - // cout << fixed << setprecision(12) << v_2[0](i,j,k,l) + // if (fabs(v_2[s](i,j,k,l)) > 1e-12) + // cout << fixed << setprecision(12) << v_2[s](i,j,k,l) // << " " << i << " " << j << " " << k << " " << l << endl; //cout << "v_cc" << endl; //for (int i = 0; i < m_norbs; ++i) // for (int j = 0; j < m_norbs; ++j) - // cout << fixed << setprecision(12) << v_cc[0](i,j) << " " << i << " " << j << endl; + // if (fabs(v_cc[s](i,j)) > 1e-12) + // cout << fixed << setprecision(12) << v_cc[s](i,j) << " " << i << " " << j << endl; //cout << "v_cccc" << endl; //for (int i = 0; i < m_norbs; ++i) // for (int j = 0; j < m_norbs; ++j) // for (int k = 0; k < m_norbs; ++k) // for (int l = 0; l < m_norbs; ++l) - // cout << fixed << setprecision(12) << v_cccc[0](i,j,k,l) + // if (fabs(v_cccc[s](i,j,k,l)) > 1e-12) + // cout << fixed << setprecision(12) << v_cccc[s](i,j,k,l) // << " " << i << " " << j << " " << k << " " << l << endl; //cout << "v_cccd" << endl; @@ -1476,11 +1507,11 @@ SpinAdapted::Input::Input(const string& config_name) { // for (int j = 0; j < m_norbs; ++j) // for (int k = 0; k < m_norbs; ++k) // for (int l = 0; l < m_norbs; ++l) - // cout << fixed << setprecision(12) << v_cccd[0](i,j,k,l) + // if (fabs(v_cccd[s](i,j,k,l))>1e-12) + // cout << fixed << setprecision(12) << v_cccd[s](i,j,k,l) // << " " << i << " " << j << " " << k << " " << l << endl; //} //world.barrier(); - //exit(0); } void SpinAdapted::Input::readreorderfile(ifstream& dumpFile, std::vector& oldtonew) { @@ -1515,7 +1546,6 @@ void SpinAdapted::Input::readreorderfile(ifstream& dumpFile, std::vector& o pout << "Numbers or orbitals in reorder file should be equal to "< O_reorder(3, 1) bool RHF = true; int AOrbOffset = 0, BOrbOffset = 0; - + if (rank == 0) { reorder.resize(m_norbs/2); for (int i=0; i m_specificpdm; bool m_npdm_multinode; bool m_set_Sz; int m_maxiter; @@ -135,6 +137,7 @@ class Input { std::string m_save_prefix; std::string m_load_prefix; bool m_direct; + bool m_prebuild; std::vector m_orbenergies; int m_maxj; @@ -185,16 +188,16 @@ class Input { ar & m_sweep_iter_schedule & m_sweep_state_schedule & m_sweep_qstate_schedule & m_sweep_tol_schedule & m_sweep_noise_schedule &m_sweep_additional_noise_schedule & m_reorder; ar & m_molecule_quantum & m_total_symmetry_number & m_total_spin & m_orbenergies & m_add_noninteracting_orbs; ar & m_bra_symmetry_number & m_permSymm & m_activeorbs & m_excitation & m_openorbs & m_closedorbs; - ar & m_save_prefix & m_load_prefix & m_direct & m_max_lanczos_dimension & m_performResponseSolution; + ar & m_save_prefix & m_load_prefix & m_direct & m_prebuild & m_max_lanczos_dimension & m_performResponseSolution; ar & m_deflation_min_size & m_deflation_max_size & m_outputlevel & m_reorderfile; ar & m_algorithm_type & m_twodot_to_onedot_iter & m_orbformat & m_calc_procs; ar & m_nquanta & m_sys_add & m_env_add & m_do_fci & m_no_transform ; - ar & m_do_pdm & m_do_npdm_ops & m_do_npdm_in_core & m_npdm_generate & m_new_npdm_code & m_transition_diff_spatial_irrep & m_occupied_orbitals; + ar & m_do_pdm & m_do_npdm_ops & m_do_npdm_in_core & m_npdm_generate & m_new_npdm_code & m_specificpdm & m_transition_diff_spatial_irrep & m_occupied_orbitals; ar & m_store_spinpdm &m_spatpdm_disk_dump & m_pdm_unsorted & m_npdm_intermediate & m_npdm_multinode; ar & m_maxj & m_ninej & m_maxiter & m_do_deriv & m_oneindex_screen_tol & m_twoindex_screen_tol & m_quantaToKeep & m_noise_type; ar & m_sweep_tol & m_restart & m_backward & m_fullrestart & m_restart_warm & m_reset_iterations & m_calc_type & m_ham_type & m_warmup; ar & m_do_diis & m_diis_error & m_start_diis_iter & m_diis_keep_states & m_diis_error_tol & m_num_spatial_orbs; - ar & m_spatial_to_spin & m_spin_to_spatial & m_maxM & m_schedule_type_backward & m_schedule_type_default &m_integral_disk_storage_thresh; + ar & m_spatial_to_spin & m_spin_to_spatial & m_maxM & m_bra_M & m_schedule_type_backward & m_schedule_type_default &m_integral_disk_storage_thresh; ar & n_twodot_noise & m_twodot_noise & m_twodot_gamma & m_guessState & m_useSharedMemory ; ar & m_calc_ri_4pdm & m_store_ripdm_readable & m_nevpt2 & m_conventional_nevpt2 & m_kept_nevpt2_states & NevPrint; } @@ -443,6 +446,7 @@ class Input { const bool &no_transform() const { return m_no_transform; } const int &deflation_min_size() const { return m_deflation_min_size; } const bool &direct() const { return m_direct; } + const bool &prebuild() const {return m_prebuild; } const int &deflation_max_size() const { return m_deflation_max_size; } const IrrepSpace &total_symmetry_number() const { return m_total_symmetry_number; } const IrrepSpace &bra_symmetry_number() const { return m_bra_symmetry_number; } @@ -524,6 +528,8 @@ class Input { bool &spatpdm_disk_dump() {return m_spatpdm_disk_dump;} const bool &pdm_unsorted() const {return m_pdm_unsorted;} bool &pdm_unsorted(){return m_pdm_unsorted;} + const std::vector &specificpdm() const {return m_specificpdm;} + std::vector &specificpdm() {return m_specificpdm;} const bool &store_nonredundant_pdm() const { return m_store_nonredundant_pdm;} bool &store_nonredundant_pdm() { return m_store_nonredundant_pdm;} int slater_size() const {return m_norbs;} @@ -533,6 +539,7 @@ class Input { const bool &npdm_intermediate() const { return m_npdm_intermediate; } bool &npdm_multinode() { return m_npdm_multinode; } const bool &npdm_multinode() const { return m_npdm_multinode; } + int bra_M() const {return m_bra_M;} }; } #endif diff --git a/modules/ResponseTheory/sweepCompress.C b/modules/ResponseTheory/sweepCompress.C index 20cf4ec..5358953 100644 --- a/modules/ResponseTheory/sweepCompress.C +++ b/modules/ResponseTheory/sweepCompress.C @@ -182,7 +182,12 @@ void SpinAdapted::SweepCompress::BlockDecimateAndCompress (SweepParams &sweepPar kettracedMatrix.makedensitymatrix(solution, newbig, dmrginp.weights(sweepiter), 0.0, 0.0, true); double braerror, keterror; if (!mpigetrank()) { - keterror = makeRotateMatrix(kettracedMatrix, ketrotateMatrix, newbig.get_rightBlock()->get_ketStateInfo().totalStates, 0); + if (dmrginp.hamiltonian() == BCS) { + int largeNumber = 1000000; + keterror = makeRotateMatrix(kettracedMatrix, ketrotateMatrix, largeNumber, 0); + } else { + keterror = makeRotateMatrix(kettracedMatrix, ketrotateMatrix, newbig.get_rightBlock()->get_ketStateInfo().totalStates, 0); + } braerror = makeRotateMatrix(bratracedMatrix, brarotateMatrix, sweepParams.get_keep_states(), sweepParams.get_keep_qstates()); } kettracedMatrix.deallocate(); @@ -484,8 +489,14 @@ void SpinAdapted::SweepCompress::Startup (SweepParams &sweepParams, StackSpinBlo double keterror, braerror; if (!mpigetrank()) { - keterror = makeRotateMatrix(kettracedMatrix, ketrotateMatrix, newbig.get_rightBlock()->get_ketStateInfo().totalStates, 0); - braerror = makeRotateMatrix(bratracedMatrix, brarotateMatrix, newbig.get_rightBlock()->get_braStateInfo().totalStates, 0); + if (dmrginp.hamiltonian() == BCS) { + int largeNumber = 1000000; + keterror = makeRotateMatrix(kettracedMatrix, ketrotateMatrix, largeNumber, 0); + braerror = makeRotateMatrix(bratracedMatrix, brarotateMatrix, largeNumber, 0); + } else { + keterror = makeRotateMatrix(kettracedMatrix, ketrotateMatrix, newbig.get_rightBlock()->get_ketStateInfo().totalStates, 0); + braerror = makeRotateMatrix(bratracedMatrix, brarotateMatrix, newbig.get_rightBlock()->get_braStateInfo().totalStates, 0); + } } #ifndef SERIAL mpi::communicator world; @@ -636,9 +647,6 @@ void SpinAdapted::SweepCompress::WavefunctionCanonicalize (SweepParams &sweepPar //*********** davidson_f(solution[0], outputState[0]); - pout << solution[0]< ketrotateMatrix, brarotateMatrix; StackDensityMatrix bratracedMatrix(newSystem.get_braStateInfo()), kettracedMatrix(newSystem.get_ketStateInfo()); bratracedMatrix.allocate(newSystem.get_braStateInfo()); diff --git a/modules/ResponseTheory/sweepResponse.C b/modules/ResponseTheory/sweepResponse.C index 1ef9870..6265d84 100644 --- a/modules/ResponseTheory/sweepResponse.C +++ b/modules/ResponseTheory/sweepResponse.C @@ -563,29 +563,30 @@ double SpinAdapted::SweepResponse::do_one(SweepParams &sweepParams, const bool & system.set_twoInt(activeSpaceIntegral); - for (int l=0; lclear(); + if (dmrginp.specificpdm().size() == 1) + npdm_do_one_sweep(*npdm_driver, sweepParams, false, direction, false, 0, dmrginp.specificpdm()[0], dmrginp.specificpdm()[0]); + else if (dmrginp.specificpdm().size() == 2) + npdm_do_one_sweep(*npdm_driver, sweepParams, false, direction, false, 0, dmrginp.specificpdm()[0], dmrginp.specificpdm()[1]); + else + abort(); + cputime = timerX.elapsedcputime(); + p3out << "\t\t\t NPDM sweep time " << timerX.elapsedwalltime() << " " << cputime << endl; + + if (dmrginp.hamiltonian() == BCS && npdm_order == NPDM_ONEPDM) { + Timer timerX1; + boost::shared_ptr npdm_driver1 = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); + npdm_driver1->clear(); + + if (dmrginp.specificpdm().size() == 1) + npdm_do_one_sweep(*npdm_driver1, sweepParams, false, direction, false, 0, dmrginp.specificpdm()[0], dmrginp.specificpdm()[0]); + else if (dmrginp.specificpdm().size() == 2) + npdm_do_one_sweep(*npdm_driver1, sweepParams, false, direction, false, 0, dmrginp.specificpdm()[0], dmrginp.specificpdm()[1]); + else + abort(); + cputime = timerX1.elapsedcputime(); + p3out << "\t\t\t NPDM sweep time " << timerX1.elapsedwalltime() << " " << cputime << endl; + } + return; + } if(dmrginp.transition_diff_irrep()){ // It is used when bra and ket has different spatial irrep @@ -474,9 +524,9 @@ void npdm(NpdmOrder npdm_order, bool transitionpdm) if (dmrginp.hamiltonian() == BCS && npdm_order == NPDM_ONEPDM) { Timer timerX1; - npdm_driver = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); - npdm_driver->clear(); - npdm_do_one_sweep(*npdm_driver, sweepParams, false, !direction, false, 0, state,stateB); + boost::shared_ptr npdm_driver1 = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); + npdm_driver1->clear(); + npdm_do_one_sweep(*npdm_driver1, sweepParams, false, direction, false, 0, state,stateB); cputime = timerX1.elapsedcputime(); p3out << "\t\t\t NPDM sweep time " << timerX1.elapsedwalltime() << " " << cputime << endl; } @@ -495,9 +545,9 @@ void npdm(NpdmOrder npdm_order, bool transitionpdm) p3out << "\t\t\t NPDM sweep time " << timerX.elapsedwalltime() << " " << cputime << endl; if (dmrginp.hamiltonian() == BCS && npdm_order == NPDM_ONEPDM) { Timer timerX1; - npdm_driver = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); - npdm_driver->clear(); - npdm_do_one_sweep(*npdm_driver, sweepParams, false, !direction, false, 0, state,state); + boost::shared_ptr npdm_driver1 = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); + npdm_driver1->clear(); + npdm_do_one_sweep(*npdm_driver1, sweepParams, false, direction, false, 0, state,state); cputime = timerX1.elapsedcputime(); p3out << "\t\t\t NPDM sweep time " << timerX1.elapsedwalltime() << " " << cputime << endl; } @@ -556,6 +606,15 @@ void npdm(NpdmOrder npdm_order, bool transitionpdm) npdm_do_one_sweep(*npdm_driver, sweepParams, false, direction, false, 0, state,stateB); cputime = timerX.elapsedcputime(); p3out << "\t\t\t NPDM sweep time " << timerX.elapsedwalltime() << " " << cputime << endl; + + if (dmrginp.hamiltonian() == BCS && npdm_order == NPDM_ONEPDM) { + Timer timerX1; + boost::shared_ptr npdm_driver1 = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); + npdm_driver1->clear(); + npdm_do_one_sweep(*npdm_driver1, sweepParams, false, direction, false, 0, state, stateB); + cputime = timerX1.elapsedcputime(); + p3out << "\t\t\t NPDM sweep time " << timerX1.elapsedwalltime() << " " << cputime << endl; + } } } } @@ -579,6 +638,12 @@ void npdm(NpdmOrder npdm_order, bool transitionpdm) // Do NPDM sweep npdm_driver->clear(); npdm_do_one_sweep(*npdm_driver, sweepParams, false, direction, false, 0, state,state); + + if (dmrginp.hamiltonian() == BCS && npdm_order == NPDM_ONEPDM) { + boost::shared_ptr npdm_driver1 = boost::shared_ptr( new Pairpdm_driver( dmrginp.last_site() ) ); + npdm_driver1->clear(); + npdm_do_one_sweep(*npdm_driver1, sweepParams, false, direction, false, 0, state, state); + } } } diff --git a/saveBlock.C b/saveBlock.C index c3a3b2c..6396c03 100644 --- a/saveBlock.C +++ b/saveBlock.C @@ -337,12 +337,13 @@ void StackSpinBlock::sendcompOps(StackOp_component_base& opcomp, int I, int J, i #ifndef SERIAL boost::mpi::communicator world; std::vector > oparray = opcomp.get_element(I,J); + int nsites = dmrginp.last_site(); for(int i=0; iget_data(), oparray[i]->memoryUsed(), MPI_DOUBLE, trimap_2d(I, J, dmrginp.last_site())); - MPI_Send(oparray[i]->get_data(), oparray[i]->memoryUsed(), MPI_DOUBLE, processorindex(compsite), optype+i*10+1000*J+100000*I, Calc); + MPI_Send(oparray[i]->get_data(), oparray[i]->memoryUsed(), MPI_DOUBLE, processorindex(compsite), ((100*optype+i)*nsites+J)*nsites+I, Calc); } #endif } @@ -352,8 +353,9 @@ void StackSpinBlock::recvcompOps(StackOp_component_base& opcomp, int I, int J, i #ifndef SERIAL boost::mpi::communicator world; std::vector > oparray = opcomp.get_element(I,J); + int nsites = dmrginp.last_site(); for(int i=0; imemoryUsed()); if (additionalMemory == 0) @@ -364,7 +366,7 @@ void StackSpinBlock::recvcompOps(StackOp_component_base& opcomp, int I, int J, i oparray[i]->allocateOperatorMatrix(); //now broadcast the data - MPI_Recv(oparray[i]->get_data(), oparray[i]->memoryUsed(), MPI_DOUBLE, processorindex(trimap_2d(I, J, dmrginp.last_site())), optype+i*10+1000*J+100000*I, Calc,MPI_STATUS_IGNORE); + MPI_Recv(oparray[i]->get_data(), oparray[i]->memoryUsed(), MPI_DOUBLE, processorindex(trimap_2d(I, J, dmrginp.last_site())),((100*optype+i)*nsites+J)*nsites+I, Calc,MPI_STATUS_IGNORE); } #endif } diff --git a/solver.C b/solver.C index 0906415..2e47142 100644 --- a/solver.C +++ b/solver.C @@ -126,26 +126,25 @@ void SpinAdapted::Solver::solve_wavefunction(vector& solution guessWaveTypes guesstype = guesswavetype; if (guesswavetype == TRANSPOSE && big.get_leftBlock()->get_rightBlock() == 0) - guesstype = BASIC; + guesstype = BASIC; GuessWave::guess_wavefunctions(solution, e, big, guesstype, onedot, dot_with_sys, nroots, additional_noise, currentRoot); dmrginp.guesswf->stop(); for (int istate=0; istatestart(); Linear::block_davidson(solution, e, tol, warmUp, *davidson_f, useprecond, currentRoot, lowerStates); dmrginp.blockdavid->stop(); - + delete davidson_f; if (mpigetrank() == 0) { - for (int i=solution.size()-1; i>=nroots; i--) - solution[i].deallocate(); + for (int i=solution.size()-1; i>=nroots; i--) + solution[i].deallocate(); } - delete davidson_f; } else if (dmrginp.solve_method() == CONJUGATE_GRADIENT) { @@ -168,18 +167,14 @@ void SpinAdapted::Solver::solve_wavefunction(vector& solution solution[0].Clear(); double functional = Linear::MinResMethod(solution[0], tol, *davidson_f, lowerStates); + delete davidson_f; //double functional = Linear::ConjugateGradient(solution[0], tol, davidson_f, lowerStates); if (mpigetrank() == 0) - e(1) = functional; - delete davidson_f; + e(1) = functional; } else { pout << "Lanczos is no longer supported"<rightStateInfo, intermediate1, NO_PARTICLE_SPIN_NUMBER_CONSTRAINT); intermediate1.CollectQuanta(); - TensorProduct(intermediate1, *stateInfo.leftStateInfo->leftStateInfo, intermediate2, PARTICLE_SPIN_NUMBER_CONSTRAINT); + if (dmrginp.hamiltonian() == BCS) { + TensorProduct(intermediate1, *stateInfo.leftStateInfo->leftStateInfo, intermediate2, SPIN_NUMBER_CONSTRAINT); + } else { + TensorProduct(intermediate1, *stateInfo.leftStateInfo->leftStateInfo, intermediate2, PARTICLE_SPIN_NUMBER_CONSTRAINT); + } onedot_transpose_wavefunction(intermediate2, stateInfo, oldWave, trial); oldStateInfo.Free(); oldWave.deallocate(); @@ -198,7 +202,7 @@ void GuessWave::onedot_transpose_wavefunction(const StateInfo& guessstateinfo, c const StackWavefunction& guesswf, StackWavefunction& transposewf) { ObjectMatrix3D< vector > threewave; - // first convert into three index wavefunction [s.][e] -> [s].[e] + // first convert into three index wavefunction [s.][e] -> [s].[e] onedot_twoindex_to_threeindex_wavefunction(guessstateinfo, guesswf, threewave); ObjectMatrix3D< vector > threewavetranspose(threewave.NDim2(), threewave.NDim1(), threewave.NDim0()); @@ -543,7 +547,7 @@ void GuessWave::onedot_twoindex_to_threeindex_shufflesysdot(const StateInfo& sta void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const StateInfo& stateInfo, const std::vector &leftsites, const std::vector &rightsites, const int state, const bool &onedot, const bool& transpose_guess_wave) { p2out << "\t\t\t Transforming previous wavefunction " << endl; - + ObjectMatrix3D< vector > oldTrialWavefunction; ObjectMatrix3D< vector > newTrialWavefunction; StateInfo oldStateInfo; @@ -580,7 +584,7 @@ void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const StackSpinBlock &big, const int state, const bool &onedot, const bool& transpose_guess_wave) { p2out << "\t\t\t Transforming previous wavefunction " << endl; - + ObjectMatrix3D< vector > oldTrialWavefunction; ObjectMatrix3D< vector > newTrialWavefunction; StateInfo oldStateInfo; @@ -597,7 +601,6 @@ void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const oldWave.LoadWavefunctionInfo (oldStateInfo, big.get_leftBlock()->get_sites(), state, true); LoadRotationMatrix (big.get_leftBlock()->get_sites(), leftRotationMatrix, state); } - for (int q = 0; q < leftRotationMatrix.size (); ++q) { if (leftRotationMatrix [q].Nrows () > 0) @@ -615,7 +618,6 @@ void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const tempoldWave.Clear(); TransformLeftBlock(oldWave, big.get_stateInfo(), leftRotationMatrix, tempoldWave); - StateInfo tempoldStateInfo; if (dmrginp.hamiltonian() == BCS) TensorProduct (*(big.get_stateInfo().leftStateInfo->leftStateInfo), *oldStateInfo.rightStateInfo, tempoldStateInfo, @@ -640,7 +642,6 @@ void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const tempnewStateInfo.CollectQuanta(); onedot_shufflesysdot(tempoldStateInfo, tempnewStateInfo, tempoldWave, tempnewWave); - LoadRotationMatrix (big.get_rightBlock()->get_sites(), rightRotationMatrix, state); TransformRightBlock(tempnewWave, oldStateInfo, rightRotationMatrix, trial); @@ -664,6 +665,7 @@ void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const onedot_transform_wavefunction(oldStateInfo, big.get_stateInfo(), oldWave, leftRotationMatrix, rightRotationMatrix, trial, transpose_guess_wave); } + oldStateInfo.Free(); oldWave.deallocate(); double norm = DotProduct(trial, trial); @@ -720,7 +722,11 @@ void GuessWave::transform_previous_twodot_to_onedot_wavefunction(StackWavefuncti trial.set_fermion() = false; trial.set_onedot(true); - TensorProduct( *big.get_stateInfo().leftStateInfo->leftStateInfo, *oldStateInfo.rightStateInfo, tempoldStateInfo, PARTICLE_SPIN_NUMBER_CONSTRAINT); + if (dmrginp.hamiltonian() == BCS) { + TensorProduct( *big.get_stateInfo().leftStateInfo->leftStateInfo, *oldStateInfo.rightStateInfo, tempoldStateInfo, SPIN_NUMBER_CONSTRAINT); + } else { + TensorProduct( *big.get_stateInfo().leftStateInfo->leftStateInfo, *oldStateInfo.rightStateInfo, tempoldStateInfo, PARTICLE_SPIN_NUMBER_CONSTRAINT); + } onedot_shufflesysdot( tempoldStateInfo, big.get_stateInfo(), tmpwavefunction, trial); tmpwavefunction.deallocate(); @@ -735,7 +741,6 @@ void GuessWave::transform_previous_wavefunction(StackWavefunction& trial, const // ket determines use braSateinfo or ketStateinfo to guess_wavefunctions. { p2out << "\t\t\t Transforming previous wavefunction " << endl; - ObjectMatrix3D< vector > oldTrialWavefunction; ObjectMatrix3D< vector > newTrialWavefunction; StateInfo oldStateInfo; @@ -877,28 +882,27 @@ void GuessWave::onedot_transform_wavefunction(const StateInfo& oldstateinfo, con aSz = newstateinfo.leftStateInfo->quanta.size (); cSz = newstateinfo.rightStateInfo->quanta.size(); } - ObjectMatrix tmp(oldASz, cSz); //cSz for (int a = 0; a < oldASz; ++a) for (int c = 0; c < oldCSz; ++c) // oldCSz <= cSz { if (oldwavefunction.allowed(a,c)) { - const StackMatrix& oM = oldwavefunction.operator_element(a, c); - - int transC = oldstateinfo.rightStateInfo->newQuantaMap [c]; - - Matrix& tM = tmp (a, transC); - Matrix rM = rightRotationMatrix[transC]; - tM.ReSize (oM.Nrows (), rM.Nrows ()); - - double parity = 1.; - - SpinAdapted::Clear (tM); - // we have the rotation matrices C_r'r, C_l'l, and we are transforming the wavefunction - // coefficient d_lr -> consequently, we need to multiply by the transpose, but NOT hermitian - // conjugate, of rM - rM = rM.t(); - MatrixMultiply (oM, 'n', rM, 'n', tM, parity); // phi -> c psi : phi d -> psi c c^t d : consequently compute c^t d + const StackMatrix& oM = oldwavefunction.operator_element(a, c); + + int transC = oldstateinfo.rightStateInfo->newQuantaMap [c]; + + Matrix& tM = tmp (a, transC); + Matrix rM = rightRotationMatrix[transC]; + tM.ReSize (oM.Nrows (), rM.Nrows ()); + + double parity = 1.; + + SpinAdapted::Clear (tM); + // we have the rotation matrices C_r'r, C_l'l, and we are transforming the wavefunction + // coefficient d_lr -> consequently, we need to multiply by the transpose, but NOT hermitian + // conjugate, of rM + rM = rM.t(); + MatrixMultiply (oM, 'n', rM, 'n', tM, parity); // phi -> c psi : phi d -> psi c c^t d : consequently compute c^t d } } diff --git a/stackopxop.C b/stackopxop.C index 7fff2c3..b46bd8f 100644 --- a/stackopxop.C +++ b/stackopxop.C @@ -38,7 +38,7 @@ void SpinAdapted::stackopxop::cdxcdcomp(const StackSpinBlock* otherblock, std::v if (otherblock->has(DES_CRECOMP)) { op1 = loopblock->get_op_array(DES_CRE).get_element(i,j).at(opind); double parity = 1.0; - if (dmrginp.spinAdapted() && dmrginp.hamiltonian() != BCS) + if (dmrginp.spinAdapted()) parity = getCommuteParity(-getSpinQuantum(i), getSpinQuantum(j), op1->get_deltaQuantum()[0]); op2 = otherblock->get_op_array(DES_CRECOMP).get_element(i,j).at(opind); SpinAdapted::operatorfunctions::TensorProduct(otherblock, *op2, *op1, b, &(b->get_stateInfo()), o[ilock], factor*parity, numthreads); diff --git a/sweep_mps.C b/sweep_mps.C index 9ccb4dc..81b0fdd 100644 --- a/sweep_mps.C +++ b/sweep_mps.C @@ -45,7 +45,7 @@ void SpinAdapted::Sweep::CanonicalizeWavefunction(SweepParams &sweepParams, cons sweepParams.set_block_iter() = 0; std::vector sites; - int new_site, wave_site; + int new_site; if (forward) { pout << "\t\t\t Starting sweep "<< sweepParams.set_sweep_iter()<<" in forwards direction"< complementarySites, spindotsites(1, new_site), oldsites = sites, oldcomplement; @@ -114,34 +112,39 @@ void SpinAdapted::Sweep::CanonicalizeWavefunction(SweepParams &sweepParams, cons else StateInfo::restore(!forward, complementarySites, envstate, currentstate); - TensorProduct(newState1, envstate, bigstate, PARTICLE_SPIN_NUMBER_CONSTRAINT); + if (dmrginp.hamiltonian() == BCS) { + TensorProduct(newState1, envstate, bigstate, SPIN_NUMBER_CONSTRAINT); + } else { + TensorProduct(newState1, envstate, bigstate, PARTICLE_SPIN_NUMBER_CONSTRAINT); + } w.initialise(dmrginp.effective_molecule_quantum_vec(), newState1, envstate, true); w.Clear(); + if (sweepParams.get_block_iter() == 0) GuessWave::transpose_previous_wavefunction(w, bigstate, complementarySites, spindotsites, currentstate, true, true); else GuessWave::transform_previous_wavefunction(w, bigstate, oldsites, oldcomplement, currentstate, true, true); - w.SaveWavefunctionInfo(bigstate, sites, currentstate); - + //make the newstate std::vector rotation1; - - + StackDensityMatrix tracedMatrix(*bigstate.leftStateInfo); tracedMatrix.allocate(*bigstate.leftStateInfo); - operatorfunctions::MultiplyWithOwnTranspose (w, tracedMatrix, 1.0); + operatorfunctions::MultiplyWithOwnTranspose (w, tracedMatrix, 1.0); int largeNumber = 1000000; if (!mpigetrank()) double error = makeRotateMatrix(tracedMatrix, rotation1, largeNumber, sweepParams.get_keep_qstates()); SaveRotationMatrix (sites, rotation1, currentstate); + //SaveRotationMatrix (sites, rotation1, -1); StateInfo renormState1; SpinAdapted::StateInfo::transform_state(rotation1, newState1, renormState1); StateInfo::store(forward, sites, renormState1, currentstate); + //StateInfo::store(forward, sites, renormState1, -1); stateInfo1 = renormState1; ++sweepParams.set_block_iter(); @@ -236,7 +239,11 @@ void SpinAdapted::Sweep::CanonicalizeWavefunctionPartialSweep(SweepParams &sweep else StateInfo::restore(!forward, complementarySites, envstate, currentstate); - TensorProduct(newState1, envstate, bigstate, PARTICLE_SPIN_NUMBER_CONSTRAINT); + if (dmrginp.hamiltonian() == BCS) { + TensorProduct(newState1, envstate, bigstate, SPIN_NUMBER_CONSTRAINT); + } else { + TensorProduct(newState1, envstate, bigstate, PARTICLE_SPIN_NUMBER_CONSTRAINT); + } w.initialise(dmrginp.effective_molecule_quantum_vec(), newState1, envstate, true); w.Clear(); diff --git a/wrapper.C b/wrapper.C index 4dc14d2..c6b96b6 100644 --- a/wrapper.C +++ b/wrapper.C @@ -245,16 +245,16 @@ void test(char* infile) std::vector< std::vector > Overlap(nstates, std::vector(nstates, 0.0)); - for (int i=1; i