Applied Programming/RegEx/Python3

regex.pyEdit

"""This program demonstrates regex and dictionary processing.

Input:
    None

Output:
    The cat in the hat.
    {'the': 2, 'cat': 1, 'in': 1, 'hat': 1}
    cat:  1
    hat:  1
    in:   1
    the:  2

References:
    * https://en.wikiversity.org/wiki/Python_Programming/Dictionaries

"""

import re
import sys


def get_word_count(text):
    """Gets the count of each word in text as a dictionary.

    Args:
        text (string): A string of words to be counted.

    Returns:
        dictionary (dictonary): A dictionary of words and word count.

    Except:
        ValueError: text is not of type string

    """
    try:
        words = re.findall(r"(.*?)\W+", text)
        dictionary = {}
        for word in words:
            word = word.lower()
            if dictionary.get(word) is None:
                dictionary[word] = 1
            else:
                dictionary[word] += 1
        return dictionary
    except ValueError:
        print("Text must be a string.")
        print("ValueError: '%s' is invalid." % text)


def display_word_count(dictionary):
    """Displays a dictionary of keys and counts sorted in key order.

    Args:
        dictionary (dictionary): A dictionary of keys and counts.

    Returns:
        None

    Except:
        AttributeError: dictionary is of type list

    """
    try:
        if type(dictionary) is not dict:
            raise AttributeError
        for key in sorted(dictionary.keys()):
            print("%-5s%2d" % (key + ":", dictionary[key]))
    except AttributeError:
        print("AttributeError: dictionary not of class type dict.")
        print("Received '%s' of type %s." % (dictionary, type(dictionary)))


def main():
    """Runs the main program logic."""

    try:
        text = "The cat in the hat."
        print(text)
        dictionary = get_word_count(text)
        print(dictionary)
        display_word_count(dictionary)
    except:
        print("Unexpected error.")
        print("Error:", sys.exc_info()[1])
        print("File: ", sys.exc_info()[2].tb_frame.f_code.co_filename)
        print("Line: ", sys.exc_info()[2].tb_lineno)


if __name__ == "__main__":
    main()

test_regex.pyEdit

"""This file tests the regex example program using PyTest.

Run "pytest" in this folder to automatically run these tests.

Expected output:
    4 passed in 0.xx seconds

References:
    * https://realpython.com/python-testing/
    * http://docs.pytest.org/en/latest/getting-started.html

"""
import pytest
import regex


def test_get_word_count_valid_input():
    expected_output = {'the': 2, 'cat': 1, 'in': 1, 'hat': 1}
    assert regex.get_word_count("The cat in the hat.") == expected_output


def test_get_word_count_ignores_non_string_input():
    expected_output = {'the': 2, 'cat': 1, 'in': 1, 'hat': 1}
    input_values = [True, 0, 1.00, "The cat in the hat."]

    def input():
        return input_values.pop()

    assert regex.get_word_count(input()) == expected_output


def test_display_word_count_valid_input(capsys):
    expected_output = "cat:  1\nhat:  1\nin:   1\nthe:  2"
    expected_input = {'the': 2, 'cat': 1, 'in': 1, 'hat': 1}
    regex.display_word_count(expected_input)
    captured = capsys.readouterr()
    assert expected_output in captured.out


def test_display_word_count_raises_AttributeError_non_dictionary_args(capsys):
    expected_output = "cat:  1\nhat:  1\nin:   1\nthe:  2"
    expected_input = {'the': 2, 'cat': 1, 'in': 1, 'hat': 1}
    input_values = [True, 0, 1.00, ['This', 'Will', 'Fail'], expected_input]

    def input():
        return input_values.pop()

    regex.display_word_count(input())
    captured = capsys.readouterr()
    assert expected_output in captured.out

Try ItEdit

Copy and paste the code above into one of the following free online development environments or use your own Python3 compiler / interpreter / IDE.

See AlsoEdit