Paul Paul - 3 months ago 17
Python Question

Transform list with regex

I have a list that has elements in this form,the strings may change but the formats stay similar:

["Radio0","Tether0","Serial0/0","Eth0/0","Eth0/1","Eth1/0","Eth1/1","vlanX","modem0","modem1","modem2","modem3","modem6"]


I would like to transform it to the list below. You can see it would remove copies of the same occurrence of a string such as Eth - just having one occurrence in the new list and transforms numbers into x and y to be more generic:

["RadioX","TetherX","SerialX/Y","EthX/Y","vlanX","modemX"]


I am messing around with different regex's and my method is quite messy, would be interested in any elegant solutions you guys think of.

Here is some code for it that could be improved on, also set does not preserve order so that should be improved too:

a = ["Radio0","Tether0","Serial0/0","Eth0/0","Eth0/1","Eth0/2","Eth1/0","vlanX","modem0","modem1","modem2","modem3","modem6"]
c =[]
for i in a:
b = re.split("[0-9]", i)
if "/" in i:
c.append(b[0]+"X/Y")
elif len(b) > 1:
c.append(b[0]+"X")
else:
c.append(b)
print set(c)

set(['modemX', 'TetherX', 'RadioX', 'vlanX', 'SerialX/Y', 'EthX/Y'])


Possible improvement on set for preserving order:

unique=[]
[unique.append(item) for item in c if item not in unique]
print unique

['RadioX', 'TetherX', 'SerialX/Y', 'EthX/Y', 'vlanX', 'modemX']

BPL BPL
Answer
import re


def particular_case(string):
    return re.sub("\d+", "X", re.sub("\d+/\d+", "X/Y", w))


def generic_case(string, letters=['X', 'Y', 'Z']):
    len_letters = len(letters)
    list_matches = list(re.finditer(r'\d+', string))
    result, last_index = "", 0

    if len(list_matches) == 0:
        return string

    for index, match in enumerate(list_matches):
        result += string[last_index:
                         match.start(0)] + letters[index % len_letters]
        last_index = match.end(0)

    return result

if __name__ == "__main__":
    words = ["Radio0", "Tether0", "Serial0/0", "Eth0/0", "Eth0/1", "Eth1/0",
             "Eth1/1", "vlanX", "modem0", "modem1", "modem2", "modem3", "modem6"]

    result = []
    result2 = []

    for w in words:
        new_value = particular_case(w)

        if new_value not in result:
            result.append(new_value)

        new_value = generic_case(w)

        if new_value not in result2:
            result2.append(new_value)

    print result
    print result2
Comments