#!/usr/bin/python
#
# Decoder of CSS font
#     (as described here: http://www.cssplay.co.uk/menu/cssfont.html)
#
# It's a proof of concept, thus incomplete.
#


#
# Copyright (c) 2006  Michal Kwiatkowski <ruby@joker.linuxstuff.pl>
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
#
# * Neither the name of the author nor the names of his contributors
#   may be used to endorse or promote products derived from this software
#   without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 

import getopt
import doctest
import re
import sys
import urllib


LETTERS = {
    (' ### ',
     '#   #',
     '#####',
     '#   #',
     '#   #') : 'a',
    ('#### ',
     '#   #',
     '#### ',
     '#   #',
     '#### ') : 'b',
    (' ### ',
     '#   #',
     '#    ',
     '#   #',
     ' ### ') : 'c',
    ('#### ',
     '#   #',
     '#   #',
     '#   #',
     '#### ') : 'd',
    ('#####',
     '#    ',
     '#### ',
     '#    ',
     '#####') : 'e',
    (' ####',
     '#    ',
     '#   #',
     '#   #',
     ' ####') : 'g',
    ('#   #',
     '#   #',
     '#####',
     '#   #',
     '#   #') : 'h',
    (' ### ',
     '  # ',
     '  # ',
     '  # ',
     ' ### ') : 'i',
    (' ### ',
     '  # ',
     '  # ',
     '  # ',
     '## ') : 'j',
    ('#   #',
     '#  #',
     '###',
     '#  #',
     '#   #') : 'k',
    ('#    ',
     '#    ',
     '#    ',
     '#    ',
     '#####') : 'l',
    ('#   #',
     '## ##',
     '# # #',
     '#   #',
     '#   #') : 'm',
    ('#   #',
     '##  #',
     '# # #',
     '#  ##',
     '#   #') : 'n',
    (' ### ',
     '#   #',
     '#   #',
     '#   #',
     ' ### ') : 'o',
    ('#### ',
     '#   #',
     '#### ',
     '#    ',
     '#    ') : 'p',
    ('#### ',
     '#   #',
     '#### ',
     '#   #',
     '#   #') : 'r',
    (' ####',
     '#    ',
     ' ### ',
     '    #',
     '#### ') : 's',
    ('#   #',
     '#   #',
     ' ### ',
     '  # ',
     '  # ') : 'y',

    (' ####',
     '#  ##',
     '# # #',
     '##  #',
     '#### ') : '0',
    (' ##',
     '  # ',
     '  # ',
     '  # ',
     ' ### ') : '1',
    ('#### ',
     '    #',
     '#### ',
     '    #',
     '#### ') : '3',
    ('#####',
     '   #',
     '  # ',
     '  # ',
     '  # ') : '7',

    (' ### ',
     '#   #',
     '# ###',
     '# # #',
     ' ### ') : '@',
    ('     ',
     '     ',
     '     ',
     ' ##',
     ' ##') : '.',
}


def collapse_tuple(tup):
    """Collapse tuple with only one element.

    >>> collapse_tuple((1,))
    1
    >>> collapse_tuple((1,2))
    (1, 2)
    >>> collapse_tuple((1, '', ''))
    1
    """
    if tup[-1] == '':
        return collapse_tuple(tup[:-1])

    if len(tup) == 1:
        return tup[0]

    return tup


def get_class_names(L):
    """Return class name for given identifier.
    
    Ignore IE hack classes ("* html .class")

    >>> get_class_names(['.klas', '* html .klas2'])
    ['klas']
    """
    def ignore_hacks(L):
        return filter(lambda x: not x.startswith('* html'), L)

    return map(lambda x: x[1:], ignore_hacks(L))


def iter_rules(css_rules):
    """Return iterator that returns tuple:
        ([class, class, ...], {key: value, ...})
    where each class represents CSS class name and key-value pairs
    represent attributes attached to these classes.

    >>> rules = '.klas {width: 5px; height: 1px;}'
    >>> list(iter_rules(rules))
    [(['klas'], {'width': '5px', 'height': '1px'})]
    """
    regex = r'([^{]+?){([^}]*)}'

    def filter_empty(L):
        return filter(lambda x: x != '', L)

    def strip_all(L):
        return map(lambda x: x.strip(), L)

    for classes, rules in re.findall(regex, css_rules):
        classes = get_class_names(filter_empty(strip_all(classes.split(','))))

        rules_dict = {}
        for rule in filter_empty(strip_all(rules.split(';'))):
            key, value = rule.split(':')
            rules_dict[key.strip()] = value.strip()

        yield (classes, rules_dict)


def compute_rules(css_rules):
    """Compute final rules for all CSS classes that were defined in @css_rules.

    >>> rules = '.klas, .klas2 {width:5px;} .klas {width: 10px;}'
    >>> compute_rules(rules)
    {'klas2': {'width': '5px'}, 'klas': {'width': '10px'}}
    """
    final_rules = {}

    for classes, rules in iter_rules(css_rules):
        for clas in classes:
            if clas not in final_rules:
                final_rules[clas] = {}
            final_rules[clas].update(rules)

    return final_rules


def pixel2int(value):
    try:
        return int(re.sub(r'px', '', value))
    except ValueError:
        return 0


class Line(str):
    # will affect child elements position
    padding_left = 0


def decode_css(css_rules, foreground_colors=['#000', 'black']):
    """Return dictionary of blocks associated with given CSS classes.

    >>> rules = '.line {width:5px; background:#000;}'
    >>> decode_css(rules)
    {'line': '#####'}

    >>> rules = '.line {width: 4px}'
    >>> decode_css(rules)
    {'line': '    '}

    >>> rules = '.line {width: 4px; background: black; margin-right: 1px}'
    >>> decode_css(rules)
    {'line': '#### '}

    >>> rules = '.line {width: 4px} .line {border-right: 1px solid #000}'
    >>> decode_css(rules)
    {'line': '    #'}

    >>> rules = '.line {width: 2px} .line {border-right: 2px solid #000; border-left: 1px solid transparent}'
    >>> decode_css(rules)
    {'line': '   ##'}

    >>> rules = '.line {width: 2px; border-left: 1px solid #000;}'
    >>> result = decode_css(rules)
    >>> result
    {'line': '#  '}
    >>> isinstance(result['line'], Line)
    True
    >>> result['line'].padding_left == 1
    True
    """
    def make_color(color):
        if color in foreground_colors:
            return '#'
        return ' '

    blocks = {}

    for clas, rules in compute_rules(css_rules).iteritems():
        color = make_color(rules.get('background', 'transparent'))

        blocks[clas] = Line(color * pixel2int(rules.get('width', '0')))

        if 'border-right' in rules:
            length, type, color = rules['border-right'].split()
            if type != 'none':
                color = make_color(color)
                blocks[clas] = Line(blocks[clas] + color * pixel2int(length))

        if 'border-left' in rules:
            length, type, color = rules['border-left'].split()
            if type != 'none':
                color = make_color(color)
                length = pixel2int(length)
                blocks[clas] = Line(color * length + blocks[clas])
                blocks[clas].padding_left = length

        if 'margin-right' in rules:
            blocks[clas] = Line(blocks[clas] + ' ' * pixel2int(rules['margin-right']))

    return blocks


def decode_html(html_blocks, letter_delimiter='outer'):
    """Decode HTML source into sequence of classes.

    >>> html = '<div class="outer"><div class="dl"></div><div class="io"></div></div>'
    >>> decode_html(html)
    [['dl', 'io']]

    >>> html = '<div class="outer"><div class="mu"><div class="ua"></div></div></div>'
    >>> decode_html(html)
    [[('mu', 'ua')]]

    >>> html = '<div class="outer"></div>'
    >>> decode_html(html)
    [[]]
    """
    letter_regex = r'<div class="%s"[^>]*>((?:<div class="[^"]+">(?:<div class="[^"]+"></div>)*</div>)*)</div>' % letter_delimiter
    part_regex = r'<div class="([^"]+)">(?:<div class="([^"]+)"></div>)*</div>'

    blocks = []

    for letter in re.findall(letter_regex, html_blocks):
        blocks.append(map(collapse_tuple, re.findall(part_regex, letter)))

    return blocks


def decode_blocks(blocks, rules):
    """Decode given sequence of classes using given rules.

    >>> blocks = ['klas1', 'klas2', 'klas3']
    >>> rules = {'klas1': ' # ', 'klas2': '  #', 'klas3': '###'}
    >>> decode_blocks(blocks, rules)
    [' # ', '  #', '###']

    >>> blocks = ['klas1', ('klas2a', 'klas2b'), 'klas3']
    >>> rules = {'klas1': ' # ', 'klas2a': '   ', 'klas2b': '  #', 'klas3': '###'}
    >>> decode_blocks(blocks, rules)
    [' # ', '  #', '###']

    >>> blocks = [('klas1a', 'klas1b'), 'klas2']
    >>> klas1a = Line('   ')
    >>> klas1a.padding_left = 1
    >>> rules = {'klas1a': klas1a, 'klas1b': '#', 'klas2': '#'}
    >>> decode_blocks(blocks, rules)
    [' # ', '#']
    """
    def merge_lines(x, y):
        result = ''

        if hasattr(x, 'padding_left'):
            y = ' ' * x.padding_left + y

        for num in xrange(len(x)):
            if num < len(y) and y[num] != ' ':
                result += y[num]
            else:
                result += x[num]

        return result

    def resolve_block(block):
        if isinstance(block, tuple):
            return reduce(lambda x,y: merge_lines(x, rules[y]), block[1:], rules[block[0]])
        return rules[block]

    return map(resolve_block, blocks)


def decode_letter(block_list):
    block_tuple = tuple(block_list)

    if block_tuple in LETTERS:
        return LETTERS[block_tuple]

    return '?'


def decode_message(message, rules):
    decoded = ''

    for letter in message:
        if letter == []:
            decoded += ' '
        else:
            decoded += decode_letter(decode_blocks(letter, rules))

    return decoded


def _read_stream(stream):
    contents = stream.read()
    stream.close()
    return contents

def read_file(filename):
    return _read_stream(file(filename))

def read_url(url):
    return _read_stream(urllib.urlopen(url))


def find_css_url(url, html_content):
    def join_with_slash(x, y):
        if x.endswith('/') or y.startswith('/'):
            return x + y
        return x + '/' + y

    base_url = re.sub(r'/[^/]*$', '', url)

    regex = r'<link rel="[^"]+" href="([^"]+)" type="text/css">'
    match = re.search(regex, html_content)

    if match:
        return join_with_slash(base_url, match.group(1))

    return None


if __name__ == '__main__':
    try:
        options, args = getopt.getopt(sys.argv[1:],
            'c:d:h:t',
            ['css=', 'decode=', 'html=', 'test'])
    except getopt.GetoptError, ex:
        print >> sys.stderr, "Error: " + ex.msg
        sys.exit(1)

    css_file = html_file = None
    decode = False

    for opt, val in options:
        if opt == '-c' or opt == '--css':
            css_file = read_file(val)
        elif opt == '-d' or opt == '--decode':
            decode = True
            html_file = read_url(val)
            css_file = read_url(find_css_url(val, html_file))
        elif opt == '-h' or opt == '--html':
            html_file = read_file(val)
        elif opt == '-t' or opt == '--test':
            import doctest
            doctest.testmod()
            sys.exit(0)

    if not css_file and not html_file:
        print >> sys.stderr, "Error: use -h and/or -c option."
        sys.exit(1)
    elif css_file and not html_file:
        for key, value in decode_css(css_file).iteritems():
            print "%s -> \"%s\"" % (key, value)
    elif html_file and not css_file:
        print decode_html(html_file)
    else:
        blocks = decode_html(html_file)
        rules = decode_css(css_file)

        # Decode given URL.
        if decode:
            print decode_message(blocks, rules)
        else:
            # Raw output.
            for letter in blocks:
                for line in decode_blocks(letter, rules):
                    print ".%s." % line

