#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#***********************************************************************
# This file is part of OpenMolcas.                                     *
#                                                                      *
# OpenMolcas is free software; you can redistribute it and/or modify   *
# it under the terms of the GNU Lesser General Public License, v. 2.1. *
# OpenMolcas is distributed in the hope that it will be useful, but it *
# is provided "as is" and without any express or implied warranties.   *
# For more details see the full text of the license in the file        *
# LICENSE or in <http://www.gnu.org/licenses/>.                        *
#                                                                      *
# Copyright (C) 2017, Ignacio Fdez. Galván                             *
#***********************************************************************

import sys
import subprocess
from glob import glob
from os.path import join, relpath, basename, dirname, getmtime
from os import environ, walk
import re
from fnmatch import translate
import json

try:
  from colorama import init, Fore, Style
except ImportError:
  def init(*args, **kwargs):
    pass
  class Dummy(object):
    pass
  Fore = Dummy()
  Fore.RED = ''
  Fore.BLUE = ''
  Fore.GREEN = ''
  Style = Dummy()
  Style.RESET_ALL = ''

helptext = '''
{1}################################################################################
#                     Searching the OpenMolcas source code                     #
################################################################################{0}

Use with a command-line argument to search for a procedure (subroutine/function)
or module in the Molcas source code. Wildcards are supported, but the argument
may need to be put inside quotes to prevent its expansion by the shell.

Examples:

  {2}abend{0}       find the definition of the Abend subroutine

  {2}pget?{0}       find all procedures whose name is "pget" plus one character

  {2}"sb*"{0}       find all procedures with a name starting with "sb"
              (without the quotes, the shell will probably expand it to "sbin")
'''.format(Style.RESET_ALL, Fore.BLUE, Fore.GREEN)

# Main regex to identify Fortran procedures
# Some shortcomings:
# - Does not handle continuation lines
# - Does not skip procedures inside an interface block
find_regex = re.compile(r'((?![!*c])\s*\S*(?<!end)\s*(function|subroutine|interface|entry|module)\s+)(\w+)', flags=re.IGNORECASE)

# Use colors in terminal, not if output is redirected to file
if sys.stdout.isatty():
  init()
else:
  init(strip=True)

# Filter the list of procedures to get only those whose name matches with the query
def filter_procs(query):
  return [p for p in procs if query.match(p[0])]

# Color the part of a line that matches the query
# - If the line matches the Fortran regex, we can identify the portion
# - Otherwise just match whatever fits
def color_match(line, query_text):
  try:
    match = find_regex.match(line)
    name = Fore.RED + match.group(3) + Style.RESET_ALL
    return find_regex.sub(r'\1{0}'.format(name), line)
  except:
    q = re.compile(translate(query_text).replace('\Z', ''), flags=re.IGNORECASE)
    match = q.search(line)
    name = Fore.RED + match.group() + Style.RESET_ALL
    return q.sub(name, line)

# First get the query from the command line,
# so we can spare reading the source
try:
  query_text = sys.argv[1]
except IndexError:
  print(helptext)
  sys.exit(0)

try:
  MOLCAS = environ['OPENMOLCAS_SOURCE']
except KeyError:
  try:
    MOLCAS = environ['MOLCAS']
  except KeyError:
    MOLCAS = '.'

# Find out the latest modification date in the source
# in order to read/write the database file
all_files = []
src_dir = join(MOLCAS, 'src')
mtime = getmtime(src_dir)
for root, dirnames, filenames in walk(src_dir):
  for filename in filenames:
    f = join(root, filename)
    mtime = max(mtime, getmtime(f))
    all_files.append(f)
  # Checking directory timestamps would be useful to detect deleted files
  # Unfortunately it also "detects" removed *.swp files (created by vim)
  #for dirnm in dirnames:
  #  d = join(root, dirnm)
  #  mtime = max(mtime, getmtime(d))

# Get a list of all procedures in the source
# "procs": list of lists: [ name, file, line, text ]
try:
  db_file = join(MOLCAS, 'data', 'find.db')
  if (mtime > getmtime(db_file)):
    raise
  with open(db_file, 'r') as f:
    procs = json.loads(f.readline())
except:
  # First try to build the database with ctags, check that it's the right variant
  try:
    output = subprocess.check_output(['ctags', '--version'], stderr=subprocess.STDOUT)
    if (not output.startswith(b'Exuberant')):
      raise
  except:
    # If no ctags, process all files with the Fortran regex
    print('Scanning source files ... (install ctags for a faster, more accurate scan)')
    procs = []
    for f in all_files:
      l = 0
      for line in open(f, 'r', errors='replace'):
        l += 1
        match = find_regex.match(line)
        if match:
          procs.append([match.group(3), relpath(f, src_dir), l, line.rstrip()])
  else:
    # If ctags, just parse the human-readable output
    procs = []
    ctags = subprocess.Popen(['ctags', '-x', '--c-kinds=f', '--fortran-kinds=fsiem', '-h .h.fh', '--langmap=c:.c.h,fortran:.f.f90.fh.F.F90', '--languages=c,fortran', '-R', src_dir], stdout=subprocess.PIPE)
    for line in ctags.stdout:
      l = line.decode('ascii').split()
      procs.append([l[0], relpath(l[3], src_dir), int(l[2]), ' '+' '.join(l[4:])])
  # Attempt to save the database for later use
  try:
    with open(db_file, 'w') as f:
      json.dump(procs, f)
      f.write('\n')
  except:
    pass

# If the command-line argument is -conf or -Conf, search for conflicts
# This is not perfect because subroutines inside interfaces appear as duplicated
# -conf searches for conflicts between *_util and anything else
# -Conf searches for conflicts between anything
if (query_text.startswith(('-conf', '-Conf'))):
  unique = {}
  conflicts = []
  for proc in procs:
    dirnm = basename(dirname(proc[1]))
    # Skip these directories, they are supposed to contain duplicates
    if (dirnm.startswith('delayed')):
      continue
    # For fortran files, normalize the name to lowercase
    if (proc[1].endswith(('.f','.f90','.fh'))):
      name = proc[0].lower()
    else:
      name = proc[0]
    # Report a duplicate if the name was already found in a different file
    # If -conf, at least one of the files must be in a *_util directory
    if (name in unique):
      if (query_text.startswith('-Conf') or
          (dirnm.endswith('_util') or basename(dirname(unique[name][1])).endswith('_util'))):
        if (proc[1] != unique[name][1]):
          conflicts.append((proc, unique[name]))
    else:
      unique[name] = proc

  # Print a list of possible conflicts
  print('')
  if (len(conflicts) > 0):
    maxlen = max([0]+[len(c[0][0]) for c in conflicts])
    fmt = '{{0:<{0}}} : {{1}} , {{2}}'.format(maxlen)
    print(Fore.BLUE + 'Possible conflicts' + Style.RESET_ALL + '\n')
    print(fmt.format("name","file 1","file 2"))
    print('='*(maxlen+80))
  for a,b in conflicts:
    print(fmt.format(a[0], relpath(join(src_dir, a[1])), relpath(join(src_dir, b[1]))))
  sys.exit(0)

# Convert the query into a regex
query = re.compile(translate(query_text), flags=re.IGNORECASE)

# Print results from the query
print('')
result = filter_procs(query)
if (len(result) > 0):
  maxlen = max([0]+[len(relpath(join(src_dir, r[1]))) for r in result])
  fmt = '{{0:<{0}}} : {{1:5}} : {{2}}'.format(maxlen)
  print(Fore.BLUE + 'Procedures matching ' + Fore.GREEN + query_text + Style.RESET_ALL + '\n')
  print(fmt.format("file","line","definition"))
  print('='*(maxlen+80))
  for proc in result:
    print(fmt.format(relpath(join(src_dir, proc[1])), proc[2], color_match(proc[3], query_text)))
else:
  print(Fore.BLUE + 'No procedures matching ' + Fore.GREEN + query_text + Style.RESET_ALL)
print('')
