#!/usr/bin/env python # -*- coding: utf-8 -*- """Imported inflection library. inflection ~~~~~~~~~~~~ A port of Ruby on Rails' inflector to Python. :copyright: (c) 2012-2020 by Janne Vanhala :license: MIT, see LICENSE for more details. """ import re import unicodedata __version__ = "0.5.1" PLURALS = [ (r"(?i)(quiz)$", r"\1zes"), (r"(?i)^(oxen)$", r"\1"), (r"(?i)^(ox)$", r"\1en"), (r"(?i)(m|l)ice$", r"\1ice"), (r"(?i)(m|l)ouse$", r"\1ice"), (r"(?i)(passer)s?by$", r"\1sby"), (r"(?i)(matr|vert|ind)(?:ix|ex)$", r"\1ices"), (r"(?i)(x|ch|ss|sh)$", r"\1es"), (r"(?i)([^aeiouy]|qu)y$", r"\1ies"), (r"(?i)(hive)$", r"\1s"), (r"(?i)([lr])f$", r"\1ves"), (r"(?i)([^f])fe$", r"\1ves"), (r"(?i)sis$", "ses"), (r"(?i)([ti])a$", r"\1a"), (r"(?i)([ti])um$", r"\1a"), (r"(?i)(buffal|potat|tomat)o$", r"\1oes"), (r"(?i)(bu)s$", r"\1ses"), (r"(?i)(alias|status)$", r"\1es"), (r"(?i)(octop|vir)i$", r"\1i"), (r"(?i)(octop|vir)us$", r"\1i"), (r"(?i)^(ax|test)is$", r"\1es"), (r"(?i)s$", "s"), (r"$", "s"), ] SINGULARS = [ (r"(?i)(database)s$", r"\1"), (r"(?i)(quiz)zes$", r"\1"), (r"(?i)(matr)ices$", r"\1ix"), (r"(?i)(vert|ind)ices$", r"\1ex"), (r"(?i)(passer)sby$", r"\1by"), (r"(?i)^(ox)en", r"\1"), (r"(?i)(alias|status)(es)?$", r"\1"), (r"(?i)(octop|vir)(us|i)$", r"\1us"), (r"(?i)^(a)x[ie]s$", r"\1xis"), (r"(?i)(cris|test)(is|es)$", r"\1is"), (r"(?i)(shoe)s$", r"\1"), (r"(?i)(o)es$", r"\1"), (r"(?i)(bus)(es)?$", r"\1"), (r"(?i)(m|l)ice$", r"\1ouse"), (r"(?i)(x|ch|ss|sh)es$", r"\1"), (r"(?i)(m)ovies$", r"\1ovie"), (r"(?i)(s)eries$", r"\1eries"), (r"(?i)([^aeiouy]|qu)ies$", r"\1y"), (r"(?i)([lr])ves$", r"\1f"), (r"(?i)(tive)s$", r"\1"), (r"(?i)(hive)s$", r"\1"), (r"(?i)([^f])ves$", r"\1fe"), (r"(?i)(t)he(sis|ses)$", r"\1hesis"), (r"(?i)(s)ynop(sis|ses)$", r"\1ynopsis"), (r"(?i)(p)rogno(sis|ses)$", r"\1rognosis"), (r"(?i)(p)arenthe(sis|ses)$", r"\1arenthesis"), (r"(?i)(d)iagno(sis|ses)$", r"\1iagnosis"), (r"(?i)(b)a(sis|ses)$", r"\1asis"), (r"(?i)(a)naly(sis|ses)$", r"\1nalysis"), (r"(?i)([ti])a$", r"\1um"), (r"(?i)(n)ews$", r"\1ews"), (r"(?i)(ss)$", r"\1"), (r"(?i)s$", ""), ] UNCOUNTABLES = { "equipment", "fish", "information", "jeans", "money", "rice", "series", "sheep", "species", } def _irregular(singular, plural): """Adds appropriate rules for irregular words. A convenience function to add appropriate rules to plurals and singular for irregular words. Args: singular: (str) An irregular word in singular form plural: (str) An irregular word in plural form Returns: A string of a corrected rule for an irregular word. """ def caseinsensitive(string): return "".join("[" + char + char.upper() + "]" for char in string) if singular[0].upper() == plural[0].upper(): PLURALS.insert(0, ( r"(?i)(%s)%s$" % (singular[0], singular[1:]), r"\1" + plural[1:] )) PLURALS.insert(0, ( r"(?i)(%s)%s$" % (plural[0], plural[1:]), r"\1" + plural[1:] )) SINGULARS.insert(0, ( r"(?i)(%s)%s$" % (plural[0], plural[1:]), r"\1" + singular[1:] )) else: PLURALS.insert(0, ( r"%s%s$" % (singular[0].upper(), caseinsensitive(singular[1:])), plural[0].upper() + plural[1:] )) PLURALS.insert(0, ( r"%s%s$" % (singular[0].lower(), caseinsensitive(singular[1:])), plural[0].lower() + plural[1:] )) PLURALS.insert(0, ( r"%s%s$" % (plural[0].upper(), caseinsensitive(plural[1:])), plural[0].upper() + plural[1:] )) PLURALS.insert(0, ( r"%s%s$" % (plural[0].lower(), caseinsensitive(plural[1:])), plural[0].lower() + plural[1:] )) SINGULARS.insert(0, ( r"%s%s$" % (plural[0].upper(), caseinsensitive(plural[1:])), singular[0].upper() + singular[1:] )) SINGULARS.insert(0, ( r"%s%s$" % (plural[0].lower(), caseinsensitive(plural[1:])), singular[0].lower() + singular[1:] )) def camelize(string, uppercase_first_letter=True): """Convert strings to CamelCase. Examples:: >>> camelize("device_type") "DeviceType" >>> camelize("device_type", False) "deviceType" :func:`camelize` can be though as a inverse of :func:`underscore`, although there are some cases where that does not hold:: >>> camelize(underscore("IOError")) "IoError" :param uppercase_first_letter: if set to `True` :func:`camelize` converts strings to UpperCamelCase. If set to `False` :func:`camelize` produces lowerCamelCase. Defaults to `True`. Args: string: (str) A word to camelize. uppercase_first_letter: (bool) Indicator to capitalize the first letter. Returns: A string that has been been converted to camelcase. """ if uppercase_first_letter: return re.sub(r"(?:^|_)(.)", lambda m: m.group(1).upper(), string) else: return string[0].lower() + camelize(string)[1:] def dasherize(word): """Replace underscores with dashes in the string. Example:: >>> dasherize("puni_puni") "puni-puni" Args: word: (str) A word that contains underscores. Returns: A string with underscores replaced with dashes. """ return word.replace("_", "-") def humanize(word): """Changes text into conversational english. Capitalize the first word and turn underscores into spaces and strip a trailing ``"_id"``, if any. Like :func:`titleize`, this is meant for creating pretty output. Examples:: >>> humanize("employee_salary") "Employee salary" >>> humanize("author_id") "Author" Args: word: (str) A word to convert to conversational English. Returns: A string that has been converted to conversational english. """ word = re.sub(r"_id$", "", word) word = word.replace("_", " ") word = re.sub(r"(?i)([a-z\d]*)", lambda m: m.group(1).lower(), word) word = re.sub(r"^\w", lambda m: m.group(0).upper(), word) return word def ordinal(number): """Finds a suffix based on ordinal sequence. Return the suffix that should be added to a number to denote the position in an ordered sequence such as 1st, 2nd, 3rd, 4th. Examples:: >>> ordinal(1) "st" >>> ordinal(2) "nd" >>> ordinal(1002) "nd" >>> ordinal(1003) "rd" >>> ordinal(-11) "th" >>> ordinal(-1021) "st" Args: number: (int) A number to denote position in an ordered sequence. Returns: A string of a numbers corresponding ordinal notation. """ number = abs(int(number)) if number % 100 in (11, 12, 13): return "th" else: return { 1: "st", 2: "nd", 3: "rd", }.get(number % 10, "th") def ordinalize(number): """Adds the ordinal notation to the end of a number. Turn a number into an ordinal string used to denote the position in an ordered sequence such as 1st, 2nd, 3rd, 4th. Examples:: >>> ordinalize(1) "1st" >>> ordinalize(2) "2nd" >>> ordinalize(1002) "1002nd" >>> ordinalize(1003) "1003rd" >>> ordinalize(-11) "-11th" >>> ordinalize(-1021) "-1021st" Args: number: (int) A number to add ordinal notation. Returns: A string of a number with its ordinal notation. """ return "%s%s" % (number, ordinal(number)) def parameterize(string, separator="-"): """Substitutes special characters with provided separator. Replace special characters in a string so that it may be used as part of a "pretty" URL. Example:: >>> parameterize(u"Donald E. Knuth") "donald-e-knuth" Args: string: (str) A string to substitute into a url. separator: (str) A delimiter to separate each word by. Returns: A string with speical characters swapped with the provided separator. """ string = transliterate(string) # Turn unwanted chars into the separator string = re.sub(r"(?i)[^a-z0-9\-_]+", separator, string) if separator: re_sep = re.escape(separator) # No more than one of the separator in a row. string = re.sub(r"%s{2,}" % re_sep, separator, string) # Remove leading/trailing separator. string = re.sub(r"(?i)^%(sep)s|%(sep)s$" % {"sep": re_sep}, "", string) return string.lower() def pluralize(word): """Return the plural form of a word. Examples:: >>> pluralize("post") "posts" >>> pluralize("octopus") "octopi" >>> pluralize("sheep") "sheep" >>> pluralize("CamelOctopus") "CamelOctopi" Args: word: (str) A word to make plural. Returns: A string of a word in its plural form. """ if not word or word.lower() in UNCOUNTABLES: return word else: for rule, replacement in PLURALS: if re.search(rule, word): return re.sub(rule, replacement, word) return word def singularize(word): """Return the singular form of a word, the reverse of :func:`pluralize`. Examples:: >>> singularize("posts") "post" >>> singularize("octopi") "octopus" >>> singularize("sheep") "sheep" >>> singularize("word") "word" >>> singularize("CamelOctopi") "CamelOctopus" Args: word: (str) A word to make singular. Returns: A string of a word in its singular form. """ for inflection in UNCOUNTABLES: if re.search(r"(?i)\b(%s)\Z" % inflection, word): return word for rule, replacement in SINGULARS: if re.search(rule, word): return re.sub(rule, replacement, word) return word def tableize(word): """Splits a word up by underscores and makes lowercase (tableized). Create the name of a table like Rails does for models to table names. This method uses the :func:`pluralize` method on the last word in the string. Examples:: >>> tableize("RawScaledScorer") "raw_scaled_scorers" >>> tableize("egg_and_ham") "egg_and_hams" >>> tableize("fancyCategory") "fancy_categories" Args: word: (str) A word to put into table format. Returns: A String of a word in table format. """ return pluralize(underscore(word)) def titleize(string): """Capitalizes each word in a sentence. Capitalize all the words and replace some characters in the string to create a nicer looking title. :func:`titleize` is meant for creating pretty output. Examples:: >>> titleize("man from the boondocks") "Man From The Boondocks" >>> titleize("x-men: the last stand") "X Men: The Last Stand" >>> titleize("TheManWithoutAPast") "The Man Without A Past" >>> titleize("raiders_of_the_lost_ark") "Raiders Of The Lost Ark" Args: string: (str) A title to captialize. Returns: A string with Captials on each word. """ return re.sub( r"\b('?[a-z])", lambda match: match.group(1).capitalize(), humanize(underscore(string)) ) def transliterate(string): """Turn a string into ASCII notation. Replace non-ASCII characters with an ASCII approximation. If no approximation exists, the non-ASCII character is ignored. The string must be ``unicode``. Examples:: >>> transliterate(u"älämölö") u"alamolo" >>> transliterate(u"Ærøskøbing") u"rskbing" Args: string: (str) A string to convert to ASCII Returns: A string in ASCII format. """ normalized = unicodedata.normalize("NFKD", string) return normalized.encode("ascii", "ignore").decode("ascii") def underscore(word): """Make an underscored, lowercase form from the expression in the string. Example:: >>> underscore("DeviceType") "device_type" As a rule of thumb you can think of :func:`underscore` as the inverse of :func:`camelize`, though there are cases where that does not hold:: >>> camelize(underscore("IOError")) "IoError" Args: word: (str) A word to make underscored. Returns: A string with underscores. """ word = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", word) word = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", word) word = word.replace("-", "_") return word.lower() _irregular("person", "people") _irregular("man", "men") _irregular("human", "humans") _irregular("child", "children") _irregular("sex", "sexes") _irregular("move", "moves") _irregular("cow", "kine") _irregular("zombie", "zombies")