且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

使用Python去除C/C++源程序中的所有注释和空行

更新时间:2022-02-06 23:48:35

说明

使用Python的字符串处理和正则表达式处理实现了一个删除C/C++源程序中所有注释和空行的小脚本。

使用字符串处理

   1: # delete all the comments and empty line of a C/C++ source file
   2: import os, sys,string
   3:  
   4: #-------------------------------------------------------------
   5: def usage():
   6:     print u'''
   7:     help: del_comment.py <filename | dirname>
   8:     '''
   9: #--------------------------------------------------------------
  10: def deal_file(src):
  11:     # file exist or not
  12:     if not os.path.exists(src):
  13:         print 'Error: file - %s doesn\'t exist.'% src
  14:         return False
  15:     if os.path.islink(src):
  16:         print 'Error: file - %s is a link.'
  17:         return False
  18:     filetype = (os.path.splitext(src))[1]
  19:     if not filetype in ['.c','.h','.cpp','.hh','.cc']:
  20:         return False
  21:     try:
  22:         if not os.access(src, os.W_OK):
  23:             os.chmod(src, 0664)
  24:     except:
  25:         print 'Error: you can not chang %s\'s mode.'% src
  26:  
  27:     inputf = open(src, 'r')
  28:     outputfilename = (os.path.splitext(src))[0] + '_no_comment'+filetype
  29:     outputf = open(outputfilename, 'w')    
  30:  
  31:     try:
  32:         #-----find /*.....*/
  33:         rFlag=0
  34:         line=inputf.readline()
  35:         while(line):
  36:             fm=string.find(line,'/*')
  37:             if fm!=-1:  # find a /*
  38:                 if fm>1:# not empty line
  39:                     outputf.write(line[:fm]+'\n')
  40:                 rFlag=1
  41:                 # find */
  42:                 fm=string.find(line,'*/')
  43:                 if fm!=-1:
  44:                     rFlag=0
  45:                 else:
  46:                     line=inputf.readline()
  47:                     while line:
  48:                         fm=string.find(line,'*/')
  49:                         if fm!=-1:
  50:                             rFlag=0
  51:                             break
  52:                         line=inputf.readline()
  53:                     if not line:
  54:                         print 'Match /*...*/ error'
  55:             else: # deal with //
  56:                 fm=string.find(line,'//')
  57:                 if fm==-1:
  58:                     if len(line)>1: # not empty line
  59:                         outputf.write(line)
  60:                 elif fm!=-1 and (not rFlag):
  61:                     if fm>1: # not empty line
  62:                         outputf.write(line[:fm]+'\n')
  63:             #read nextline
  64:             line=inputf.readline()
  65:     except:
  66:         print 'Error: unexcept error.'
  67:         inputf.close()
  68:         outputf.close()
  69:     return True
  70:  
  71: #--------------------------------------------------------------
  72: def deal_dir(src):
  73:     #  dir exist or not
  74:     if not os.path.exists(src):
  75:         print 'Error: dir - %s is not exist.'%s (src)
  76:         return False
  77:     filelists = os.listdir(src)
  78:     for eachfile in filelists:
  79:         eachfile = src + '/' +eachfile
  80:         if os.path.isdir(eachfile):
  81:             deal_dir(eachfile)
  82:         elif os.path.isfile(eachfile):
  83:             deal_file(eachfile)
  84:     return True
  85:  
  86: #--------------------------------------------------------------
  87: def main():
  88:     if len(sys.argv) < 2:
  89:         usage()
  90:         sys.exit(1)
  91:     src = sys.argv[1]
  92:     # get absolute dir/file path
  93:     if os.path.isdir(src):
  94:         dire = os.path.abspath(src)
  95:         dirFlag = True
  96:     elif os.path.isfile(src):
  97:         fl = os.path.abspath(src)
  98:         dirFlag = False
  99:     else:
 100:         print 'File input error'
 101:  
 102:     # deal
 103:     if dirFlag:
 104:         deal_dir(dire)
 105:     else:
 106:         deal_file(fl)
 107:     print 'Successful handle file.'
 108:  
 109: #--------------------------------------------------------------
 110: if __name__ == '__main__':
 111:     main()

使用正则表达式

   1: # delete all the comments and empty line of a C/C++ source file
   2: import os, sys,string,re,glob
   3:  
   4: # /*..*/  //...
   5: Rule1 = "(\/\*(\s|.)*?\*\/)|(\/\/.*)"
   6: c1=re.compile(Rule1)
   7:  
   8: #-------------------------------------------------------------
   9: def usage():
  10:     print u'''
  11:     help: del_comment.py <filename | dirname>
  12:     '''
  13: #--------------------------------------------------------------
  14: def deal_file(src):
  15:     # file exist or not
  16:     if not os.path.exists(src):
  17:         print 'Error: file - %s doesn\'t exist.'% src
  18:         return False
  19:     if os.path.islink(src):
  20:         print 'Error: file - %s is a link.'
  21:         return False
  22:     filetype = (os.path.splitext(src))[1]
  23:     if not filetype in ['.c','.h','.cpp','.hh','.cc']:
  24:         return False
  25:     try:
  26:         if not os.access(src, os.W_OK):
  27:             os.chmod(src, 0664)
  28:     except:
  29:         print 'Error: you can not chang %s\'s mode.'% src
  30:  
  31:     inputf = open(src, 'r')
  32:     outputfilename = (os.path.splitext(src))[0] + '_no_comment'+filetype
  33:     outputf = open(outputfilename, 'w')
  34:  
  35:     lines=inputf.read()
  36:     inputf.close()
  37:     lines=re.sub(Rule1,"",lines)
  38:     outputf.write(lines)    
  39:     outputf.close()
  40:     return True
  41:  
  42: #--------------------------------------------------------------
  43: def deal_dir(src):
  44:     #  dir exist or not
  45:     if not os.path.exists(src):
  46:         print 'Error: dir - %s is not exist.'%s (src)
  47:         return False
  48:     filelists = os.listdir(src)
  49:     for eachfile in filelists:
  50:         eachfile = src + '/' +eachfile
  51:         if os.path.isdir(eachfile):
  52:             deal_dir(eachfile)
  53:         elif os.path.isfile(eachfile):
  54:             deal_file(eachfile)
  55:     return True
  56:  
  57: #--------------------------------------------------------------
  58: def main():
  59:     if len(sys.argv) < 2:
  60:         usage()
  61:         sys.exit(1)
  62:     src = sys.argv[1]
  63:     # get absolute dir/file path
  64:     if os.path.isdir(src):
  65:         dire = os.path.abspath(src)
  66:         dirFlag = True
  67:     elif os.path.isfile(src):
  68:         fl = os.path.abspath(src)
  69:         dirFlag = False
  70:     else:
  71:         print 'File input error'
  72:  
  73:     # deal
  74:     if dirFlag:
  75:         deal_dir(dire)
  76:     else:
  77:         deal_file(fl)
  78:     print 'Successful handle file.'
  79:  
  80: #--------------------------------------------------------------
  81: if __name__ == '__main__':
  82:     main()

使用示例

 

待处理文件:

   1: #ifndef _RS232_H_
   2: #define _RS232_H_
   3:  
   4: /* the maximum number of ports we are willing to open */
   5: #define MAX_PORTS 4
   6:  
   7: /*this array hold information about each port we have opened */
   8: struct PortInfo{
   9:     int busy;
  10:     char name[32];
  11:     int handle;
  12: };
  13:  
  14: int OpenCom(int portNo,const char deviceName[],long baudRate);
  15: int CloseCom(int portNo);
  16: int ComRd(int portNo,char buf[],int maxCnt,int Timeout);
  17: int ComWrt(int portNo,const char * buf,int maxCnt);
  18:  
  19: //long GetBaudRate(long baudRate);
  20: //int OpenComConfig(int port,
  21: //                  const char deviceName[],
  22: //                  long baudRate,
  23: //                  int parity,
  24: //                  int dataBits,
  25: //                  int stopBits,
  26: //                  int iqSize,
  27: //                  int oqSize);
  28:  
  29: #endif

处理结果:

   1: #ifndef _RS232_H_
   2: #define _RS232_H_
   3: #define MAX_PORTS 4
   4: struct PortInfo{
   5:     int busy;
   6:     char name[32];
   7:     int handle;
   8: };
   9: int OpenCom(int portNo,const char deviceName[],long baudRate);
  10: int CloseCom(int portNo);
  11: int ComRd(int portNo,char buf[],int maxCnt,int Timeout);
  12: int ComWrt(int portNo,const char * buf,int maxCnt);
  13: #endif

在使用Python正则表达式处理的时候,有一个问题,就是没法删除文件中的空行。如果想删除空行,可能还得一行行读进来,把长度为0的行不保存。这是目前感觉到的难点。


本文转自feisky博客园博客,原文链接:http://www.cnblogs.com/feisky/archive/2010/12/09/1901349.html,如需转载请自行联系原作者