へっぽこ博士のなんちゃって研究室

PythonやR、分子軌道計算等に関する記事を書きます

Quantum ESPRESSO : cif2pw

- 事始め


しばらくxlt2pw*1のお世話になっていたのですが、変換をコマンドラインだけで完結したい、カットオフエネルギーを自動でセットしたい等、自分用に改良していったら別物になってしまったので、晒しておきます。
一応、Quantum ESPRESSOのPW/toolsフォルダにcif2qe.shというスクリプトがあるのですが、awkプログラム言語で書かれているうえ、長らくメンテナンスされていないようなので、これを編集する気にはなれませんでした。

- コード

#!/usr/bin/python3

# cif2pw.py : modified xtl2pw.py
# version 1.0 programmed by Doktor-Heppoko

import sys
import os
import subprocess as sp
import numpy as np
from pymatgen.core import periodic_table as mg

# environment check
# check python version
if sys.version_info < (3, 6):
    print("Python 3.6 or lattar required")
    sys.exit(1)
    
# check NUMPY version
if np.version.version < "1.10":
    print("Numpy 1.10 or lattar required")
    sys.exit(1)

# check cif2cell installed
try:
    proc = sp.run(["which","cif2cell"],stdout=sp.PIPE)
    if proc.returncode != 0:
        print("cif2cell not installed")
        sys.exit(1)
except Exception as emsg:
    print(emsg)
    sys.exit(1)

# argment check
if len(sys.argv) < 3:
    print("Usage      : cif2pw.py [ CIF file ] [ method ] [ constraint ]")
    print("For example: > python3 cif2pw.py hoge.cif relax 1")
    print(" method    : scf, relax, vc-relax")
    print(" constraint: 0 fixed, 1 free[default]")
    sys.exit(1)

# initialize variable literal
# argments
cif = sys.argv[1]
method = sys.argv[2]
try:
    flag = sys.argv[3]
except:
    flag = 1
flag_str = "free" if flag == 1 else "fixed"
print(f"Reading File                 : {cif}") 
print(f"Calculation Method           : {method}") 
print(f"Constraint Flag              : {flag_str}") 

# file related
fbase = os.path.splitext(cif)[-2]
ciftmp = "cif2cell.tmp"
output = f"{fbase}.{method}.in"

# calcuration condition related
# CONTROL card
nstep = 100              # iterations for optimization
etot_conv_thr = "1.0D-4" # convergence threshold on total energy
forc_conv_thr = "1.0D-3" # convergence threshold on forces
pseudo_dir = '/home/ubuntu/Downloads/pslibrary.1.0.0/pbe/PSEUDOPOTENTIALS/'

# SYSTEM card
nat = 0                # number of atoms (auto set)
ntyp = 0               # kinds of atomic species (auto set)
nbnd = 0               # total valence electrons (auto set)
ecutwfc_def = 25       # kinetic energy cutoff for wavefunctions
ecutrho_def = 100      # kinetic energy cutoff for charge density and potential
occupations = "fixed"  # for insulators such as general organic compounds
input_dft = "vdw-df2"  # See Modules/funct.f90
vdw_corr = "Grimme-D3" # non-local functions should be specified by input_dft
use_nbnd = False       # auto-calculation flag : use = True (valence * 1.2)
use_vdw_corr = True    # use van der Waals correction flag : use = True

# ELECTRONS card
electron_maxstep = 100    # iterations for SCF
conv_thr = "1.0D-6"       # convergence threshold for SCF
mixing_mode = "local-TF"  # for insulators such as general organic compounds
mixing_beta = 0.4         # ratio of new wave functions to be mixed
diagonalization = "david" # diagonalization algorithm

# IONS card
ion_dynamics = "bfgs"       # mathematical optimization algorithm for ions
ion_temperature = "initial" # ion temperature control method
tempw = "297.16"            # 25 degrees Celsius

# CELL card
cell_dynamics = "bfgs"   # mathematical optimization algorithm for cell
press = "1.0D-3"         # in kbar : 1.0D-3kbar = 0.1MPa = ca. 1 atm
press_conv_thr = "0.5D0" # convergence threshold on pressure
cell_dofree = "all"      # moved cell parameters

# ATOMIC_SPECIES card
kinds = np.empty(0)      # element symbol
wt = np.empty(0)         # atomic mass from pymatgen
ps = np.empty(0)         # pseudo potensial file generated from element symbol
pstype_HA = ".pbe-kjpaw_psl.1.0.0.UPF"     # for H atom
pstype_NM = ".pbe-n-kjpaw_psl.1.0.0.UPF"   # for non-metal elements
pstype_ME = ".pbe-spn-kjpaw_psl.1.0.0.UPF" # for metal elements

atom_count = np.empty(0) # amount of element
ps_type = np.empty(0)    # pseudo potential type from file
ps_func = np.empty(0)    # dft functional type from file
valences = np.empty(0)   # valence electrons from file
wfc = np.empty(0)        # kinetic energy cutoff for wave function from file
rho = np.empty(0)        # kinetic energy cutoff for charge density and potential from file

# ATOMIC_POSITIONS card
atom = np.empty(0) # atomic label from cif2cell template
px = np.empty(0)   # x-coordinate from cif2cell template
py = np.empty(0)   # y-coordinate from cif2cell template
pz = np.empty(0)   # z-coordinate from cif2cell template

# K_POINTS card
nk = np.ones(3, dtype='int') # number of supplied special k-points (auto set)
k_point = 700                # maximum points by Monkhorst-Pack grids

# CELL_PARAMETERS card 
V = np.zeros((3,3)) # lattice vector [ 3 x 3 ] from cif2cell
cell = np.ones(3)   # lattice length [ a b c ]

# main processing
# creat pwscf .in template file 
try:
    proc = sp.run(["cif2cell","-q","-p","pwscf","-f",cif,"-o",ciftmp,"--pwscf-cartesian"],stdout=sp.PIPE)
    if proc.returncode != 0:
        sys.exit(1)
except Exception as emsg:
    print(emsg)
    sys.exit(1)

# read template file
try:
    with open(ciftmp,'r') as f:
        ftxt = f.readlines()
except OSError as emsg:
    print(emsg)
    sys.exit(1)
    
# delete template file
try:
    proc = sp.run(["rm",ciftmp],stdout=sp.PIPE)
except Exception as emeg:
    print(emsg)
    sys.exit(1)

# get cell parameters
for nline, sline in enumerate(ftxt):
    if "CELL_PARAMETERS" in sline:
        break
if nline == (len(ftxt)-1):
    print("Not Found CELL_PARAMETERS card")
    sys.exit(1)
for i in range(3):
    astr = ftxt[nline+i+1].strip().split()
    for j in range(3):
        V[i][j] = float(astr[j])

for i in range(3):
    cell[i] = np.sqrt(V[i] @ V[i])    
#print(f"Lattice Length               : {cell}")

# get atom positions
for nline, sline in enumerate(ftxt):
    if "ATOMIC_POSITIONS" in sline:
        break
if nline == (len(ftxt)-1):
    print("Not Found ATOMIC_POSITIONS")
    sys.exit(1)
for sline in ftxt[nline+1:]:
    if len(sline) == 0:
        break
    astr = sline.strip().split()
    atom = np.append(atom, astr[0])
    px = np.append(px, float(astr[1]))
    py = np.append(py, float(astr[2]))
    pz = np.append(pz, float(astr[3]))

# check atom kinds
nat = len(atom)
kinds, atom_count = np.unique(atom, return_counts=True)
formula = ""
for symbol, count in zip(kinds, atom_count):
    formula += f"{symbol}{count} "
print(f"Chemical Formula             : {formula}")

# set pseudo file
for symbol in kinds:
    wt = np.append(wt, mg.Element(symbol).atomic_mass)
    if symbol == 'H':
        ps = np.append(ps, symbol.rjust(2)+pstype_HA)
    else:
        if mg.Element(symbol).is_metal == True:
            ps = np.append(ps, symbol.rjust(2)+pstype_ME)
        else:
            ps = np.append(ps, symbol.rjust(2)+pstype_NM)

# get pseudo file information
# pseudo potensial file infomation class
class get_psinfo:
    
    def __init__(self, psfile):

        self.psinfo = []
        self.ps_type = ""
        self.ps_func = ""
        self.valence = 0.0
        self.wfc_cutoff = 0.0
        self.rho_cutoff = 0.0
    
        try:
            with open(psfile,'r') as f:
                self.psinfo = f.readlines()
            self.ps_type = self.get_pstype()
            self.ps_func = self.get_psfunc()
            self.valence = self.get_valence()
            self.wfc_cutoff = self.get_wfc()
            self.rho_cutoff = self.get_rho()
        except OSError as emsg:
            print(emsg)
            return
        
    def get_pstype(self):
        flag = False
        for sline in self.psinfo:
            astr = sline.strip().split()
            for s in astr:
                if "pseudo_type=" in s.lower():
                    val = s.split("=")[-1]
                    self.ps_type = val.replace('"','')
                    flag = True
                    break
            if len(self.ps_type) != 0:
                #print(self.ps_type)
                break
        return self.ps_type

    def get_psfunc(self):
        for sline in self.psinfo:
            if "functional:" in sline.lower():
                self.ps_func = sline.split(":")[-1].strip()
                break
        return self.ps_func

    def get_valence(self):
        flag = False
        for sline in self.psinfo:
            astr = sline.strip().split()
            for s in astr:
                if "z_valence=" in s.lower():
                    val = s.split("=")[-1]
                    val = val.replace('"','')
                    self.valence = float(val)
                    flag = True
                    break
            if flag == True:
                break
        return self.valence

    def get_wfc(self):
        flag = False
        for sline in self.psinfo:
            astr = sline.strip().split()
            for s in astr:
                if "wfc_cutoff=" in s.lower():
                    val = s.split("=")[-1]
                    val = val.replace('"','')
                    self.wfc_cutoff = float(val)
                    flag = True
                    break
            if flag == True:
                break
        return self.wfc_cutoff

    def get_rho(self):
        flag = False
        for sline in self.psinfo:
            astr = sline.strip().split()
            for s in astr:
                if "rho_cutoff=" in s.lower():
                    val = s.split("=")[-1]
                    val = val.replace('"','')
                    self.rho_cutoff = float(val)
                    flag = True
                    break
            if flag == True:
                break
        return self.rho_cutoff

print(f"Pseudo Potential Informations:")
for psfile in ps:
    if os.path.exists(pseudo_dir+psfile.strip()) == False:
        print(f"Not Found [{pseudo_dir+psfile.strip()}]")
        sys.exit(1)
    ps_info = get_psinfo(pseudo_dir+psfile.strip())
    ps_type = np.append(ps_type, ps_info.ps_type)
    if ps_info.ps_type == "USPP":
        rho_factor = 10
    else:
        rho_factor = 4
    ps_func = np.append(ps_func, ps_info.ps_func)
    valences = np.append(valences, ps_info.valence)
    wfc = np.append(wfc, ps_info.wfc_cutoff)
    rho = np.append(rho, ps_info.rho_cutoff)
    print(f"{psfile[0:2]:2s} [{ps_info.ps_type:4s}, {ps_info.ps_func:18s}]: valence={ps_info.valence:5.1f}, wfc={ps_info.wfc_cutoff:5.1f}, rho={ps_info.rho_cutoff:6.1f}")

# keyword setting
# set CONTROL card
CONTROL_Card = [
    "&CONTROL\n",
    f"    calculation = '{method}'\n",
    "    verbosity = 'low'\n",
    "    restart_mode = 'from_scratch'\n",
    f"    nstep = {nstep}\n",
    "    outdir = './work/'\n",
    "    wfcdir = './work/'\n",
    f"    prefix = '{fbase}'\n",
    f"    etot_conv_thr = {etot_conv_thr}\n",
    f"    forc_conv_thr = {forc_conv_thr}\n",
    "    disk_io = 'low'\n",
    f"    pseudo_dir = '{pseudo_dir}'\n",
    "/\n\n"
]

# set SYSTEM card
nbnd = np.sum(valences * atom_count)
if use_nbnd == True:
    nbnd = nbnd * 1.2
nbnd_str = " Occupied Only " if use_nbnd == False else "Unoccupied +20%"
print(f"nbnd ({nbnd_str:15s})       : {round(nbnd)}")

min_wfc = max(wfc)
ecutwfc = max(min_wfc, ecutwfc_def)
min_rho = max(rho)
calcd_rho = ecutwfc * rho_factor
ecutrho = max(min_rho, ecutrho_def, calcd_rho)
print(f"Cut Off Energy               : wfc = {ecutwfc:.1f}Ry, rho = {ecutrho:.1f}Ry")

SYSTEM_Card = [
    "&SYSTEM\n",
    "    ibrav = 0\n",
    f"    nat = {nat}\n",
    f"    ntyp = {len(kinds)}\n",
    f"!    nbnd = {round(nbnd)}\n",
    "    tot_charge = 0.0\n",
    f"    ecutwfc = {ecutwfc:.1f}\n",
    f"    ecutrho = {ecutrho:.1f}\n",
    "    nosym = .FALSE.\n",
    "    nspin = 1\n",
    f"!    input_dft = '{input_dft}'\n",
    f"    vdw_corr = '{vdw_corr}'\n",
    "/\n\n"
]

# set ELECTRONS card
ELECTRONS_Card = [
    "&ELECTRONS\n",
    f"    electron_maxstep = {electron_maxstep}\n",
    f"    conv_thr = {conv_thr}\n",
    f"    mixing_mode = '{mixing_mode}'\n",
    f"    mixing_beta = {mixing_beta}\n",
    f"    diagonalization = '{diagonalization}'\n",
    "/\n\n"
]

# set IONS card for relax, md, vc-relax or vc-md
IONS_Card = [
    "&IONS\n",
    f"    ion_dynamics = '{ion_dynamics}'\n",
    f"    ion_temperature = '{ion_temperature}'\n",
    f"    tempw = {tempw}\n",
    "/\n\n"
]

# set CELL card for vc-relax or vc-md
CELL_Card = [
    "&CELL\n",
    f"    cell_dynamics = '{cell_dynamics}'\n",
    f"    press = {press}\n",
    f"    press_conv_thr = {press_conv_thr}\n",
    f"    cell_dofree = '{cell_dofree}'\n",
    "/\n\n"
]

# set ATOMIC_SPECIES card
ATOMIC_SPECIES_Card = [
    "ATOMIC_SPECIES\n"
]
for i in range(len(kinds)):
    ATOMIC_SPECIES_Card += [f"   {kinds[i].rjust(2)} {wt[i]:9.6f} {ps[i]}\n"]
ATOMIC_SPECIES_Card += ["\n"]

# set ATOMIC_POSITIONS card
ATOMIC_POSITIONS_Card = [
    "ATOMIC_POSITIONS {angstrom}\n"
]
for i in range(nat):
    symbol = atom[i].rjust(2)
    PX = format(px[i], "10.6f")
    PY = format(py[i], "10.6f")
    PZ = format(pz[i], "10.6f")
    FLAG = f"{flag}  {flag}  {flag}"
    ATOMIC_POSITIONS_Card += [f"   {symbol}  {PX}  {PY}  {PZ}  {FLAG}\n"]
ATOMIC_POSITIONS_Card += ["\n"]

# set K_POINTS card
cell_max = np.max(cell[0:3])
for i in range(3):
    nk[i] = round(cell[i]/cell_max)
try:
    k_point = k_point / nat
except ZeroDivisionError:
    k_point = 100
factor = np.cbrt(k_point / (nk[0]*nk[1]*nk[2]))
for i in range(3):
    nk[i] = round(nk[i]*factor)
    if nk[i] ==0:
        nk[i] = 1
print(f"K Point = {round(k_point): 4d}               : calcd = {nk[0]*nk[1]*nk[2]} [{nk[0]} x {nk[1]} x {nk[2]}]")

K_POINTS_Card = [
    "K_POINTS {automatic}\n",
    f"  {nk[0]} {nk[1]} {nk[2]} 0 0 0\n",
    "\n"
]

# set CELL_PARAMETERS card
CELL_PARAMETERS_Card = [
    "CELL_PARAMETERS {angstrom}\n",
    f"  {V[0][0]:>9.6f} {V[0][1]:>9.6f} {V[0][2]:>9.6f}\n",
    f"  {V[1][0]:>9.6f} {V[1][1]:>9.6f} {V[1][2]:>9.6f}\n",
    f"  {V[2][0]:>9.6f} {V[2][1]:>9.6f} {V[2][2]:>9.6f}\n",
    "\n"
]

# writing file
print(f"Writing File                 : {output}") 
try:
    with open(output,'w') as f:
        for ftxt in CONTROL_Card:
            f.write(ftxt)
        for ftxt in SYSTEM_Card:
            if "nbnd" in ftxt:
                if use_nbnd == True:
                    ftxt = ftxt.replace('!','')
            if "input_dft" in ftxt:
                if use_vdw_corr == False:
                    ftxt = ftxt.replace('!','')
            if "vdw_corr" in ftxt:
                if use_vdw_corr == False:
                    ftxt = "!"+ftxt
            f.write(ftxt)
        for ftxt in ELECTRONS_Card:
            f.write(ftxt)
        if (method=='relax') or (method=='md') or (method=='vc-relax') or (method=='vc-md'):
            for ftxt in IONS_Card:
                f.write(ftxt)
        if (method=='vc-relax') or (method=='vc-md'):
            for ftxt in CELL_Card:
                f.write(ftxt)
        for ftxt in ATOMIC_SPECIES_Card:
            f.write(ftxt)
        for ftxt in ATOMIC_POSITIONS_Card:
            f.write(ftxt)
        for ftxt in K_POINTS_Card:
            f.write(ftxt)
        for ftxt in CELL_PARAMETERS_Card:
            f.write(ftxt)
except OSError as emsg:
    print(emsg)
    sys.exit(1)

- コードの簡易説明


必要環境
python 3.6以上
・numpy 1.10以上
・pymatgen
・cif2cell

原子座標の読み込み
子プロセスでcif2cell*2を呼び出し、中間ファイルを作成して処理しています。本当は自分で実装したかったのですが、座標変換部分(対称操作)の勉強中なので、ひとまずあるものを利用させていただくということで・・・

余談ですが、pythonCIFファイルを扱えるライブラリとしてはpymatgenのio.cifモジュール*3と、IUCrがサポートしているPyCifRWのCifFile*4が有名です。
前者は機能が豊富なのですが、ソースコードを見る限り独自の実装となっているようで、旧タイプのキーワードを使用しているCIFファイルに対応していないことが致命的です。一見読み込めたように見えても、インスタンスにアクセスするとエラーを吐いてしまうことがあります。
一方後者は、論文誌に投稿されたものだけあって、古い形式のCIFファイルも問題なく読み込めます。ただ、使用方法の情報が少ないのが欠点です。機能が少ない分は、自分でラッパーを用意するだけの手間で済むので、個人的にはこちらの使用をお勧めします。
そのうち、この記事も書きたいです。

疑ポテンシャルファイル情報読み込みクラス
価電子数とカットオフエネルギーの最低値を自動的に取得したかったので、クラスの実装練習がてらに用意してみました。

    #インスタンス作成
    psinfo = get_psinfo("ファイルパス")
    #アトリビュートへのアクセス
    valence = psinfo.valence     #価電子数
    wfc_min = psinfo.wfc_cutoff  #wfc
    rho_min = psinfo.rho_cutoff  #rho

wfc、rhoのカットオフエネルギーはそれぞれ、25、100Ryを初期値として設定しています。疑ポテンシャルファイルに設定されている最低値がこれよりも大きければ、こちらを採用します。ウルトラソフト型やPAW型はwfcが50Ry程度で設定されていることが多いので、デフォルトを50、200Ryにするべきかもしれません。

テンプレート群
書き込みエラーへの対応のため、一旦配列に格納することにしました。各カード毎に分けたので、多分、編集しやすくなっています。

*1:https://www.misasa.okayama-u.ac.jp/~masami/pukiwiki/index.php?Quantum-Espresso%E3%81%A8Vesta%E3%81%AE%E9%80%A3%E6%90%BA

*2:ネットで検索すると、原著優先ということでだいたいpython2用に誘導されるのですが、python3用がこちらからダウンロードできます。
https://github.com/kmu/cif2cell

*3:https://pymatgen.org/pymatgen.io.cif.html

*4:https://www.iucr.org/resources/cif/software/pycifrw