【问题标题】:Lines of code you have written [closed]您编写的代码行[关闭]
【发布时间】:2013-01-07 18:23:40
【问题描述】:

出于好奇,有什么方法可以获取您(在特定项目中)编写的代码行数?

我尝试使用 p4 describe #CLN | wc -l 执行 perforce,但除了很多边缘情况(包括 cmets、添加新行等)之外,它还会跳过新添加的文件。如果我们尝试显示物理代码行但新添加的文件仍然会导致问题,则可以忽略边缘情况。

【问题讨论】:

    标签: perforce lines-of-code


    【解决方案1】:

    我继续编写了一个 Python 脚本,打印出用户添加/更改的代码行数以及每次更改的平均行数。

    在 Windows 上使用 Python 2.7.2 进行测试。您可以从命令行运行 - 它假定您的路径中有 p4。

    用法:codestats.py -u [用户名]

    它也适用于 git:codestats.py -u [authorname] -g。

    它会进行一些黑名单来删除批量添加(例如,您刚刚添加了一个库),并且还会对某些类型的文件(例如 .HTML 文件等)施加黑名单。否则,它工作得很好。

    希望这会有所帮助!

    ########################################################################
    # Script that computes the lines of code stats for a perforce/git user.
    ########################################################################
    
    import argparse
    import logging
    import subprocess
    import sys
    import re
    
    VALID_ARGUMENTS = [
        ("user", "-u", "--user", "Run lines of code computation for the specified user.", 1),
        ("change", "-c", "--change", "Just display lines of code in the passed in change (useful for debugging).", 1),
        ("git", "-g", "--git", "Use git rather than perforce (which is the default versioning system queried).", 0)
    ]
    
    class PrintHelpOnErrorArgumentParser(argparse.ArgumentParser):
      def error(self, message):
        logging.error("error: {0}\n\n".format(message))
        self.print_help()
        sys.exit(2)
    
    def is_code_file(depot_path):
      fstat_output = subprocess.Popen(['p4', 'fstat', depot_path], stdout=subprocess.PIPE).communicate()[0].split('\n')
      text_file = False
      head_type_regex = re.compile('^... headType (\S+)\s*$')
      for line in fstat_output:
        head_type_line = head_type_regex.match(line)
        if head_type_line:
          head_type = head_type_line.group(1)
    
          text_file = (head_type.find('text') != -1)
    
      if text_file:
        blacklisted_file_types = ['html', 'css', 'twb', 'twbx', 'tbm', 'xml']
    
        for file_type in blacklisted_file_types:
          if re.match('^\/\/depot.*\.{}#\d+$'.format(file_type), depot_path):
            text_file = False
            break
    
      return text_file
    
    def parse_args():
      parser = PrintHelpOnErrorArgumentParser()
    
      for arg_name, short_switch, long_switch, help, num_args in VALID_ARGUMENTS:
        if num_args != 0:
            parser.add_argument(
                short_switch,
                nargs=num_args,
                type=str,
                dest=arg_name)
        else:
            parser.add_argument(
                long_switch,
                short_switch,
                action="store_true",
                help=help,
                dest=arg_name)
    
      return parser.parse_args()
    
    file_edited_regex = re.compile('^... .*?#\d+ edit\s*$')
    file_deleted_regex = re.compile('^... .*?#\d+ delete\s*$')
    file_integrated_regex = re.compile('^... .*?#\d+ integrate\s*$')
    file_added_regex = re.compile('^... (.*?#\d+) add\s*$')
    affected_files_regex = re.compile('^Affected files ...')
    outliers = [] # Changes that seem as if they weren't hand coded and merit inspection
    
    def num_lines_in_file(depot_path):
      lines = len(subprocess.Popen(['p4', 'print', depot_path], stdout=subprocess.PIPE).communicate()[0].split('\n'))
      return lines
    
    def parse_change(changelist):
      change_description = subprocess.Popen(['p4', 'describe', '-ds', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
    
      parsing_differences = False
      parsing_affected_files = False
    
      differences_regex = re.compile('^Differences \.\.\..*$')
      line_added_regex = re.compile('^add \d+ chunks (\d+) lines.*$')
      line_removed_regex = re.compile('^deleted \d+ chunks (\d+) lines.*$')
      line_changed_regex = re.compile('^changed \d+ chunks (\d+) / (\d+) lines.*$')
      file_diff_regex = re.compile('^==== (\/\/depot.*#\d+)\s*\S+$')
      skip_file = False
    
      num_lines_added = 0
      num_lines_deleted = 0
      num_lines_changed_added = 0
      num_lines_changed_deleted = 0
      num_files_added = 0
      num_files_edited = 0
    
      for line in change_description:
        if differences_regex.match(line):
          parsing_differences = True
        elif affected_files_regex.match(line):
          parsing_affected_files = True
        elif parsing_differences:
          if file_diff_regex.match(line):
            regex_match = file_diff_regex.match(line)
            skip_file = not is_code_file(regex_match.group(1))
          elif not skip_file:
            regex_match = line_added_regex.match(line)
            if regex_match:
              num_lines_added += int(regex_match.group(1))
            else:
              regex_match = line_removed_regex.match(line)
    
              if regex_match:
                num_lines_deleted += int(regex_match.group(1))
              else:
                regex_match = line_changed_regex.match(line)
    
                if regex_match:
                  num_lines_changed_added += int(regex_match.group(2))
                  num_lines_changed_deleted += int(regex_match.group(1))
    
        elif parsing_affected_files:
          if file_added_regex.match(line):
            file_added_match = file_added_regex.match(line)
            depot_path = file_added_match.group(1)
    
            if is_code_file(depot_path):
              lines_in_file = num_lines_in_file(depot_path)
    
              if lines_in_file > 3000:
                # Anomaly - probably a copy of existing code - discard this
                lines_in_file = 0
    
              num_lines_added += lines_in_file
    
            num_files_added += 1
          elif file_edited_regex.match(line):
            num_files_edited += 1
    
      return [num_files_added, num_files_edited, num_lines_added, num_lines_deleted, num_lines_changed_added, num_lines_changed_deleted]
    
    def contains_integrates(changelist):
      change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
    
      contains_integrates = False
      parsing_affected_files = False
    
      for line in change_description:
        if affected_files_regex.match(line):
          parsing_affected_files = True
        elif parsing_affected_files:
          if file_integrated_regex.match(line):
            contains_integrates = True
            break
    
      return contains_integrates
    
    #################################################
    # Note: Keep this function in sync with 
    # generate_line.
    #################################################
    def generate_output_specifier(output_headers):
      output_specifier = ''
    
      for output_header in output_headers:
        output_specifier += '| {:'
        output_specifier += '{}'.format(len(output_header))
        output_specifier += '}'
    
      if output_specifier != '':
        output_specifier += ' |'
    
      return output_specifier
    
    #################################################
    # Note: Keep this function in sync with 
    # generate_output_specifier.
    #################################################
    def generate_line(output_headers):
      line = ''
    
      for output_header in output_headers:
        line += '--' # for the '| '
        header_padding_specifier = '{:-<'
        header_padding_specifier += '{}'.format(len(output_header))
        header_padding_specifier += '}'
        line += header_padding_specifier.format('')
    
      if line != '':
        line += '--' # for the last ' |'
    
      return line
    
    # Returns true if a change is a bulk addition or a private change
    def is_black_listed_change(user, changelist):
      large_add_change = False
      all_adds = True
      num_adds = 0
      is_private_change = False
      is_third_party_change = False
    
      change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
    
      for line in change_description:
        if file_edited_regex.match(line) or file_deleted_regex.match(line):
          all_adds = False
        elif file_added_regex.match(line):
          num_adds += 1
    
        if line.find('... //depot/private') != -1:
          is_private_change = True
          break
    
        if line.find('... //depot/third-party') != -1:
          is_third_party_change = True
          break
    
      large_add_change = all_adds and num_adds > 70
    
      #print "{}: {}".format(changelist, large_add_change or is_private_change)
      return large_add_change or is_third_party_change
    
    change_header_regex = re.compile('^Change (\d+)\s*.*?\s*(\S+)@.*$')
    
    def get_user_and_change_header_for_change(changelist):
      change_description = subprocess.Popen(['p4', 'describe', '-s', changelist], stdout=subprocess.PIPE).communicate()[0].split('\n')
    
      user = None
      change_header = None
    
      for line in change_description:
        change_header_match = change_header_regex.match(line)
    
        if change_header_match:
          user = change_header_match.group(2)
          change_header = line
          break
    
      return [user, change_header]
    
    if __name__ == "__main__":
      log = logging.getLogger()
      log.setLevel(logging.DEBUG)
    
      args = parse_args()
      user_stats = {}
      user_stats['num_changes'] = 0
      user_stats['lines_added'] = 0
      user_stats['lines_deleted'] = 0
      user_stats['lines_changed_added'] = 0
      user_stats['lines_changed_removed'] = 0
      user_stats['total_lines'] = 0
      user_stats['files_edited'] = 0
      user_stats['files_added'] = 0
    
      change_log = []
    
      if args.git:
        git_log_command = ['git', 'log', '--author={}'.format(args.user[0]), '--pretty=tformat:', '--numstat']
        git_log_output = subprocess.Popen(git_log_command, stdout=subprocess.PIPE).communicate()[0].split('\n')
    
        git_log_line_regex = re.compile('^(\d+)\s*(\d+)\s*\S+$')
        total = 0
        adds = 0
        subs = 0
        for git_log_line in git_log_output:
          line_match = git_log_line_regex.match(git_log_line)
    
          if line_match:
            adds += int(line_match.group(1))
            subs += int(line_match.group(2))
    
        total = adds - subs
        num_commits = 0
    
        git_shortlog_command = ['git', 'shortlog', '--author={}'.format(args.user[0]), '-s']
        git_shortlog_output = subprocess.Popen(git_shortlog_command, stdout=subprocess.PIPE).communicate()[0].split('\n')
    
        git_shortlog_line_regex = re.compile('^\s*(\d+)\s+.*$')
        for git_shortlog_line in git_shortlog_output:
          line_match = git_shortlog_line_regex.match(git_shortlog_line)
          if line_match:
            num_commits += int(line_match.group(1))
    
        print "Git Stats for {}: Commits: {}. Lines of code: {}. Average Lines Per Change: {}.".format(args.user[0], num_commits, total, total*1.0/num_commits)
        sys.exit(0)
      elif args.change:
        [args.user, change_header] = get_user_and_change_header_for_change(args.change)
        change_log = [change_header]
      else:
        change_log = subprocess.Popen(['p4', 'changes', '-u', args.user, '-s', 'submitted'], stdout=subprocess.PIPE).communicate()[0].split('\n')
    
      output_headers = ['Current Change', 'Num Changes', 'Files Added', 'Files Edited']
      output_headers.append('Lines Added')
      output_headers.append('Lines Deleted')
    
      if not args.git:
        output_headers.append('Lines Changed (Added/Removed)')
    
      avg_change_size = 0.0
      output_headers.append('Total Lines')
      output_headers.append('Avg. Lines/Change')
    
      line = generate_line(output_headers)
      output_specifier = generate_output_specifier(output_headers)
    
      print line
    
      print output_specifier.format(*output_headers)
      print line
    
      output_specifier_with_carriage_return = output_specifier + '\r'
    
      for change in change_log:
        change_match = change_header_regex.search(change)
    
        if change_match:
          user_stats['num_changes'] += 1
    
          changelist = change_match.group(1)
    
          if not is_black_listed_change(args.user, changelist) and not contains_integrates(changelist):
            [files_added_in_change, files_edited_in_change, lines_added_in_change, lines_deleted_in_change, lines_changed_added_in_change, lines_changed_removed_in_change] = parse_change(change_match.group(1))
            if lines_added_in_change > 5000 and changelist not in outliers:
              outliers.append([changelist, lines_added_in_change])
            else:
              user_stats['lines_added'] += lines_added_in_change
              user_stats['lines_deleted'] += lines_deleted_in_change
              user_stats['lines_changed_added'] += lines_changed_added_in_change
              user_stats['lines_changed_removed'] += lines_changed_removed_in_change
              user_stats['total_lines'] += lines_changed_added_in_change
              user_stats['total_lines'] -= lines_changed_removed_in_change
              user_stats['total_lines'] += lines_added_in_change
              user_stats['files_edited'] += files_edited_in_change
              user_stats['files_added'] += files_added_in_change
    
          current_output = [changelist, user_stats['num_changes'], user_stats['files_added'], user_stats['files_edited']]
    
          current_output.append(user_stats['lines_added'])
          current_output.append(user_stats['lines_deleted'])
    
          if not args.git:
            current_output.append('{}/{}'.format(user_stats['lines_changed_added'], user_stats['lines_changed_removed']))
    
          current_output.append(user_stats['total_lines'])
          current_output.append(user_stats['total_lines']*1.0/user_stats['num_changes'])
    
          print output_specifier_with_carriage_return.format(*current_output),
    
      print
      print line
    
      if len(outliers) > 0:
        print "Outliers (changes that merit inspection - and have not been included in the stats):"
        outlier_headers = ['Changelist', 'Lines of Code']
        outlier_specifier = generate_output_specifier(outlier_headers)
        outlier_line = generate_line(outlier_headers)
    
        print outlier_line
        print outlier_specifier.format(*outlier_headers)
        print outlier_line
    
        for change in outliers:
          print outlier_specifier.format(*change)
    
        print outlier_line
    

    【讨论】:

      【解决方案2】:

      其他答案似乎错过了源代码控制历史方面的事情。

      来自http://forums.perforce.com/index.php?/topic/359-how-many-lines-of-code-have-i-written/

      分多个步骤计算答案:

      1) 新增文件:

      p4 filelog ... | grep ' add on .* by <username>'
      p4 print -q foo#1 | wc -l
      

      2) 更改的文件:

      p4 describe <changelist> | grep "^>" | wc -l
      

      将所有计数组合在一起(脚本...),您将得到一个总数。

      您可能还想用 grep 删除空白行或没有字母数字字符的行?

      此外,如果您定期执行此操作,那么在 P4Python 中编写代码并逐步执行会更有效 - 保留历史记录并仅查看新提交。

      【讨论】:

      • 谢谢,但首先看不到显示所有文件(仅显示从当前工作区提交的文件)。结果是否推送到 foo#1?我需要为每个文件运行吗?
      • 您必须编写其余的脚本以从第一个命令中获取文件名,然后在每个命令上运行第二个命令。
      • 实际看输出,反正可能需要更多处理。
      • p4 filelog -s -L ... | grep ' add on .* by DouglasLeeder\|^//' | grep -B1 "add on" | grep "^//" 提供了我添加的所有路径,在我运行命令的目录中。
      【解决方案3】:

      是的,有很多方法可以计算代码行数。

      tl;dr 安装 Eclipse Metrics PluginHere 是如何做的说明。如果您想在没有 Eclipse 的情况下执行此操作,下面有一个简短的脚本。

      Shell 脚本

      我将向您介绍非常通用的方法。它适用于 Linux,但它可以移植到其他系统。将这两行保存到lines.sh 文件:

      #!/bin/sh
      find -name "*.java" | awk '{ system("wc "$0)  }' | awk '{ print $1 "\t" $4; lines += $1; files++ } END { print "Total: " lines " lines in " files " files."}'
      

      这是一个使用findwc 和很棒的awk 的shell 脚本。添加执行权限:

      chmod +x lines.sh

      现在我们可以执行我们的 shell 脚本了。

      假设您将lines.sh 保存在/home/you/workspace/projectX 中。
      脚本计算.java 文件中的行数,这些文件位于/home/you/workspace/projectX 的子目录中。

      让我们用./lines.sh 运行它。您可以为任何其他类型的文件更改*.java

      样本输出:

      adam@adam ~/workspace/Checkers $ ./lines.sh 
      23  ./src/Checkers.java
      14  ./src/event/StartGameEvent.java
      38  ./src/event/YourColorEvent.java
      52  ./src/event/BoardClickEvent.java
      61  ./src/event/GameQueue.java
      14  ./src/event/PlayerEscapeEvent.java
      14  ./src/event/WaitEvent.java
      16  ./src/event/GameEvent.java
      38  ./src/event/EndGameEvent.java
      38  ./src/event/FakeBoardEvent.java
      127 ./src/controller/ServerThread.java
      14  ./src/controller/ServerConfig.java
      46  ./src/controller/Server.java
      170 ./src/controller/Controller.java
      141 ./src/controller/ServerNetwork.java
      246 ./src/view/ClientNetwork.java
      36  ./src/view/Messages.java
      53  ./src/view/ButtonField.java
      47  ./src/view/ViewConfig.java
      32  ./src/view/MainWindow.java
      455 ./src/view/View.java
      36  ./src/view/ImageLoader.java
      88  ./src/model/KingJump.java
      130 ./src/model/Cords.java
      70  ./src/model/King.java
      77  ./src/model/FakeBoard.java
      90  ./src/model/CheckerMove.java
      53  ./src/model/PlayerColor.java
      73  ./src/model/Checker.java
      201 ./src/model/AbstractPiece.java
      75  ./src/model/CheckerJump.java
      154 ./src/model/Model.java
      105 ./src/model/KingMove.java
      99  ./src/model/FieldType.java
      269 ./src/model/Board.java
      56  ./src/model/AbstractJump.java
      80  ./src/model/AbstractMove.java
      82  ./src/model/BoardState.java
      Total: 3413 lines in 38 files.
      

      【讨论】:

        【解决方案4】:

        找一个计算行数的应用程序,计算行数有很多微妙之处 - cmets、空行、每行多个运算符等。

        Visual Studio 具有“计算代码指标”功能,因为您没有提到一种语言,所以我无法更具体地说明使用哪种工具,只是说“查找”和“grep”可能不是解决问题的方法去吧。

        还要考虑这样一个事实,即代码行数并不能衡量实际进度。路线图上已完成的功能衡量进度,代码行数越低越好。如果一个自豪的开发人员声称他的 60,000 行代码非常了不起,但后来发现有一种方法可以在 1000 行中完成同样的事情,这并不是第一次。

        【讨论】:

          【解决方案5】:

          看看SLOCCount。它只计算实际的代码行数并执行一些额外的计算。

          在 OSX 上,您可以通过 Homebrew 使用 brew install sloccount 轻松安装它。

          我的一个项目的示例输出:

          $ sloccount .
          Have a non-directory at the top, so creating directory top_dir
          Adding /Users/padde/Desktop/project/./Gemfile to top_dir
          Adding /Users/padde/Desktop/project/./Gemfile.lock to top_dir
          Adding /Users/padde/Desktop/project/./Procfile to top_dir
          Adding /Users/padde/Desktop/project/./README to top_dir
          Adding /Users/padde/Desktop/project/./application.rb to top_dir
          Creating filelist for config
          Adding /Users/padde/Desktop/project/./config.ru to top_dir
          Creating filelist for controllers
          Creating filelist for db
          Creating filelist for helpers
          Creating filelist for models
          Creating filelist for public
          Creating filelist for tmp
          Creating filelist for views
          Categorizing files.
          Finding a working MD5 command....
          Found a working MD5 command.
          Computing results.
          
          
          SLOC    Directory   SLOC-by-Language (Sorted)
          256     controllers     ruby=256
          66      models          ruby=66
          10      config          ruby=10
          9       top_dir         ruby=9
          5       helpers         ruby=5
          0       db              (none)
          0       public          (none)
          0       tmp             (none)
          0       views           (none)
          
          
          Totals grouped by language (dominant language first):
          ruby:           346 (100.00%)
          
          
          
          
          Total Physical Source Lines of Code (SLOC)                = 346
          Development Effort Estimate, Person-Years (Person-Months) = 0.07 (0.79)
           (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
          Schedule Estimate, Years (Months)                         = 0.19 (2.28)
           (Basic COCOMO model, Months = 2.5 * (person-months**0.38))
          Estimated Average Number of Developers (Effort/Schedule)  = 0.34
          Total Estimated Cost to Develop                           = $ 8,865
           (average salary = $56,286/year, overhead = 2.40).
          SLOCCount, Copyright (C) 2001-2004 David A. Wheeler
          SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.
          SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to
          redistribute it under certain conditions as specified by the GNU GPL license;
          see the documentation for details.
          Please credit this data as "generated using David A. Wheeler's 'SLOCCount'."
          

          【讨论】:

            【解决方案6】:

            有一种更简单的方法来完成这一切,顺便说一下,它比使用 grep 更快:

            首先获取特定用户的所有更改列表,这是一个命令行命令,您可以使用 os.system() 在 python 脚本中使用它:

            p4 changes -u <username> > 'some_text_file.txt'
            

            现在你需要提取所有的更改列表编号,我们将使用正则表达式,这里使用 python 完成:

            f = open('some_text_file.txt','r')
            lists = f.readlines()
            pattern = re.compile(r'\b[0-9][0-9][0-9][0-9][0-9][0-9][0-9]\b')
            
            labels = []
            for i in lists:
                labels.append(pattern.findall(i))
            
            changelists = []
            for h in labels:
                if(type(h) is list):
                    changelists.append(str(h[0]))
                else:
                    changelists.append(str(h))
            

            现在您在“标签”中拥有所有更改列表编号。 我们将遍历列表并为每个更改列表查找添加的行数和删除的行数,获得最终差异将为我们提供添加的总行数。以下代码留置权正是这样做的:

            for i in changelists:
                os.system('p4 describe -ds '+i+' | findstr "^add" >> added.txt')
                os.system('p4 describe -ds '+i+' | findstr "^del" >> deleted.txt')
            
            added = []
            deleted = []
            
            file = open('added.txt')
            for i in file:
                added.append(i)
            
            count = []
            count_added = 0
            count_add = 0
            count_del = 0
            
            for j in added:
                count = [int(s) for s in j.split() if s.isdigit()]
                count_add += count[1]
                count = []
            
            
            file = open('deleted.txt')
            for i in file:
                deleted.append(i)
            
            for j in labels:
                count = [int(s) for s in j.split() if s.isdigit()]
                count_del += count[1]
                count = []
            
            count_added = count_add - count_del
            
            print count_added
            

            count_added 将包含用户添加的行数。

            【讨论】:

              猜你喜欢
              • 1970-01-01
              • 1970-01-01
              • 1970-01-01
              • 2011-05-01
              • 1970-01-01
              • 2010-11-29
              • 2023-01-01
              • 2010-09-14
              • 1970-01-01
              相关资源
              最近更新 更多