#!/usr/bin/env python
# Copyright 2016, 2017, Cumulus Networks, Inc.  All rights reserved.

from __future__ import print_function
from StringIO import StringIO
import re
import tokenize
import argparse


# Some code taken from:
# https://github.com/liftoff/pyminifier/blob/master/pyminifier/minification.py
# which is licensed under GPL 3.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

shebang = re.compile('^#\!.*$')

COPYRIGHT = re.compile(r'^#\s*copyright', re.IGNORECASE)


def _tokenize(source):
    """Tokenizes *source* and returns the tokens as a list of lists."""
    io_obj = StringIO(source)
    return [list(a) for a in tokenize.generate_tokens(io_obj.readline)]


def _untokenize(tokens):
    """
    Converts the output of tokenize.generate_tokens back into a human-readable
    string (that doesn't contain oddly-placed whitespace everywhere).
    .. note::
        Unlike :meth:`tokenize.untokenize`, this function requires the 3rd and
        4th items in each token tuple (though we can use lists *or* tuples).
    """
    out = ""
    last_lineno = -1
    last_col = 0
    for tok in tokens:
        token_string = tok[1]
        start_line, start_col = tok[2]
        end_line, end_col = tok[3]
        # The following two conditionals preserve indentation:
        if start_line > last_lineno:
            last_col = 0
        if start_col > last_col and token_string != '\n':
            out += (" " * (start_col - last_col))
        out += token_string
        last_col = end_col
        last_lineno = end_line
    return out


def remove_comments(tokens):
    """
    Removes comments from *tokens* which is expected to be a list equivalent of
    tokenize.generate_tokens() (so we can update in-place).
    .. note::
        * If the comment makes up the whole line, the newline will also
          be removed (so you don't end up with lots of blank lines).
        * Preserves shebangs and encoding strings.
    """
    preserved_shebang = ""
    # This (short) loop preserves shebangs and encoding strings:
    for tok in tokens[0:4]:  # Will always be in the first four tokens
        line = tok[4]
        # Save the first comment line if it starts with a shebang
        # (e.g. '#!/usr/bin/env python')
        if shebang.match(line):  # Must be first line
            preserved_shebang = line
    # Now remove comments:
    prev_tok_type = 0
    for index, tok in enumerate(tokens):
        token_type = tok[0]
        if token_type == tokenize.COMMENT:
            if not COPYRIGHT.match(tok[4]):
                tokens[index][1] = ''  # Making it an empty string removes it
        # TODO: Figure out a way to make this work
        # elif prev_tok_type == tokenize.COMMENT:
            # if token_type == tokenize.NL:
            #     tokens[index][1] = '' # Remove trailing newline
        prev_tok_type = token_type
    # Prepend our preserved items back into the token list:
    if preserved_shebang:  # Have to re-tokenize them
        preserved = _tokenize(preserved_shebang)
        preserved.pop()  # Get rid of ENDMARKER
        preserved.reverse()  # Round and round we go!
        for item in preserved:
            tokens.insert(0, item)
    return tokens


def remove_docstrings(tokens):
    """
    Removes docstrings from *tokens* which is expected to be a list equivalent
    of `tokenize.generate_tokens()` (so we can update in-place).
    """
    prev_tok_type = None
    for index, tok in enumerate(tokens):
        token_type = tok[0]
        if token_type == tokenize.STRING:
            if prev_tok_type == tokenize.INDENT:
                # Definitely a docstring
                tokens[index][1] = ''  # Remove it
                # Remove the leftover indentation and newline:
                tokens[index-1][1] = ''
                tokens[index-2][1] = ''
            elif prev_tok_type == tokenize.NL:
                # This captures whole-module docstrings:
                if tokens[index+1][0] == tokenize.NEWLINE:
                    tokens[index][1] = ''
                    # Remove the trailing newline:
                    tokens[index+1][1] = ''
        prev_tok_type = token_type
    return tokens


def remove_blank_lines(source):
    """
    Removes blank lines from *source* and returns the result.
    Example:
    .. code-block:: python
        test = "foo"
        test2 = "bar"
    Will become:
    .. code-block:: python
        test = "foo"
        test2 = "bar"
    """
    io_obj = StringIO(source)
    source = [a for a in io_obj.readlines() if a.strip()]
    return "".join(source)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Decomment python file')
    parser.add_argument('src', help='Source file', metavar="IN-FILE")
    parser.add_argument('dst', help='Destination file', metavar="OUT-FILE")
    args = parser.parse_args()

    tokens = None
    with open(args.src) as src:
        tokens = _tokenize(src.read())
    result = _untokenize(remove_docstrings(remove_comments(tokens)))
    result = remove_blank_lines(result)
    with open(args.dst, 'w') as dst:
        print(result, file=dst)
