162 lines
8.5 KiB
Python
162 lines
8.5 KiB
Python
input = """CCCGC
|
|
ACCCA
|
|
TATAC
|
|
GAGGC
|
|
TGTAA"""
|
|
sequences = ("AT","TAC","CG")
|
|
creatures = """Limaçon de Folfanga 5 3 2
|
|
Porc Taudanzain vert 6 2 5"""
|
|
|
|
input = """CGTCGGCGCACCCGAAATCGGAGGTTCGCTAGCGAGGTTCTCACAGTCAGAACTTTTCTTCTTATGGGTAGTATGATCAG
|
|
AAGGCAACTAGGTCTATTCTCGTATGCTCCCATTCATAAATTGGATTATAATACAAACTACGCGAGCATGGGATGACTAT
|
|
GAGATCGAGTCTGTGAAAGTTAAGGGCGGTTAAGACTACAACGGTTATAGGTGCAATATCGTCAAGGCGAAGCCTCGTTA
|
|
TTTGTTCTCCGATCGTCTTGTGGTCTACTAGCAATGTAAACCCCGATCACGCAACGGGTCCTACGCCCCTACGCTGGACG
|
|
ATGATTAAATTCACCGAATGTTTAACCACGCTGTTGAAGGCACATCGTATGAACTGTGGACAAGGGCTAGTTGCCCGGGG
|
|
GGTACATTCGTAGGCCATATGCCGACAAAAATGCTCATAACAAACACAGCGCGCTGGGTAGAGATCTGGAGTACATTAAT
|
|
GTGAGTACTTTGATGGGAGCGACTCACGACTATACGTGTCGCGTCGTGATCGGAAACTACTCAGCGGTCTATCTATTTGC
|
|
GTAGTAAAATCAAGGGGCAAAAACTCTAATAGTTGAACAAGGTGCGGATCCACATATTCTAGCTCATCCCATGTATCGTA
|
|
TAAAGCAAACACACCCGGACTGCTTCGATCAGTAATACCGACAACCGCATGCTGTCTAACAAACCCGATAGTAACGGAAT
|
|
GTCGCTCCCAAGATGTATAGAACTAAGCTGCACCACAGGTCTGATACATTTTTTGCCGGCGATAAGAAATTACCGGCAGT
|
|
TGGCGAATTTAGTGTCCGGGGTAGCATAAAGTGGGAATGTCGCTAGATTGAATGTACGCAACTAATTCGGGAGGTAGATG
|
|
CATATGTGGGAGGCGGGTCAGCAGCTGCAATTGGTAAGTCCCGTTTTACAGGACTGTACGAACACGTCCTACCATGATCG
|
|
TCGTATCTACTGCGGAGAGCGTTGATTTTGGGGCGTACGAGACGAATCCCCACCAAAGTAGGAGATGTCGAGCTCGCATG
|
|
GAGAGGATCGTGATTACCGTAGCATTGGCTGTTAGGAGCGTTGAAAGGTTGTTTATTGCATTTAAGCGGTAAGCCATCGA
|
|
GCGAGTGGATACGTCGGAGGCGTATGCCGCACGCCTATAGAGGGGCAAGGGTGACCATTCAAAGTACTCTAGTTGTTATG
|
|
TTCTATCGATATCATCGACTGCATCTGTGTACGGGGACGCGGCACTACCTAGGGGTTTCGAGGACTTGTGCCGCTATCCG
|
|
TGCGGGCCGAATCATTGCGATATTGCTGCATTGTATTATGGCACGCACTCGTTAACCATGACCCACGCGATTAGTGTGGG
|
|
GTTGATGCGACGAAAAGAGCTATAGACTAGTTCGAGAAGCGCAAAGATTATGCCGTTCACGCAGCCAAGAAACGTTCCGG
|
|
AGTTTCCTGACGTCGTCACTAGGAAGCCCTATTGGGACACCGGCTCTGTTCCTCGCACCTCATAGGAGTCCGCGCAACTT
|
|
TCCGCACTAGCACCCTTAATTCGTCACCGTTCCGCTACCTGTTCGAATCTGGAGACCTATCTGGTGGAACGCGGTAGGTG
|
|
GAGTTTTATTCCGGCGTTCCCGAGTCGCTCCGTGGATTAATTGTCTGCCTGTATTATGTCTATAGCCTGGCCCTGAAAGC
|
|
GGCTACCTTAGGCTCCTGCTATAAATCACTGTGATTTTAAACCCCTCGGGCAATTTTACTGTACAGCTAACGGGATCCTT
|
|
TCCGTAATCTAGCCGTAGTACTCAAGTGAAGTGCTAAGGTATTTGAGTTTCTGGTCGAAGGCGCTTTGGCACTACAATTG
|
|
CTCTAACGAACCGGACTGTCATGAGCTTCCCCGGACCCTTGACGATCATTCTTGGGGCTGGGTGGTTAGACAGCCTTATC
|
|
GGGCTTGACCCTCTGACCATTAGTAGATATACTCTGACCTTGAGGATATACGTTGTCCCGCACCTCAAAGGGGGATATTG
|
|
GAAATCCCATCCCTTCCAATAAAACGAGCGCACGCCCGTATATAGTACCAACCTTACCAGTCACATATGATGTTTATGGG
|
|
ATTAACGCAGGTGACCGCAGGCAAAATAAGGAAATGCACGATTGGTTTGAAGCCGCGATAAGTCGCTGACAGTATAGTGG
|
|
TCGTTACTTACACTTCTCGACTCGTAGAAAGGCCAAGGCCTCTCTACGAAGTAAAGCAGACTCGAACATGGAAACTCACG
|
|
AGGATCCAGGCGAAGCTGATGTTGACTACAAACGCTGCCGAATGGCTAGTTATGTCTGTGAGCCTACTACTGCTGGAGTG
|
|
GACGCCTAATTTGAGTTGGCGTTAAGGCCGTATGGGCAGATAGGGGGGAGAGGCCTACGACGGGTAAGTGATGAAGGTCG
|
|
CATCGGTGCGCTTGGACATAGCCCAAGCCGCGAGGATTAGCGTGGTTACTAACGCGATACAGTTACTTTAGTGAAGTTTG
|
|
TAGGATTGTGAGCTTTCCTCAAGGGGCCGATCCGTAAATTAAGCGGAATATGGAGATTCACTCGTATGTGGAATGATGCG
|
|
CCAGGTCGACGTATGTTGATTTAATGTACTATCGGGATAACTGGACGCATACCGCCGACTCAGTGCGGTTGTGCCTTGGG
|
|
CGTCGCCCCCCTGCTTCTGTAGGGTCATGACGCTCCGACATCTGCTGGGCTCTCTAACCGAGACGGGCTCTATGCAAGGT
|
|
TTCCGTAAAGGGGGAGTGAAGGTATATACCGAAGCTAATTGAAACCAGGAGGACGACGGTAAAATCCCAGCCAGGACTCT
|
|
GGACGCTATTGGGTCTAGGCAAGCATACCGAGAACGCCCGATAGGTTACTCACGTTGTGACGATACGATTGCGGTGGAAC
|
|
GTACCAATCTTAAAGGCGTACTGTGGTTAAAGCCTGTGACGGTATCATGGCACAATTTGTCAAGCTGCGCAACACGTCTG
|
|
AGGGACAGCCGCCTTCCTGAACCACGTCGTGCCTACGCTGTACGCGCTGGGGTACGGTCACTAGGCTCGGCACGGTAAAT
|
|
GTCGAGCGCAATACGCGGAACGGCAGGATCAACCATTACCGTGTAGCTGATACTCTGTGTGTTATCTCTACATTCTGATG
|
|
TTTCATTTTAGAGCTTAGGTGCATCCATCTTTGAACCCAGCTAACCGGCGGTGTCCGTTCACCCTAATGGCGCTGAACCT
|
|
CCGGTCCGGGTTTATCAAGGAAAAATCCGAACTCGTATGCATCGCGCGTCGTCACAAAGTTCTTTGAGATGCTTCAAGGT
|
|
TTGATCCTTGGGCGATTTACTGGCTTTACACTATCGACGGAATTATCCCAACCGGACTTATGTATACATTTCTTACACAG
|
|
CTCGTTTCGGCCAGACCCCGTTTCGTATAGCTAATGTTACGATATTTTCCTATGGGGACGATGACGCAATTAAGGAAAAA
|
|
GCATGATGCTAGATAGCACCCCCACGTAGGAAACTGTAGGATACAGGTCTGTAGCCGCACAAAGAAACCATCTTACAACT
|
|
ACAACAGGGGCAGCCTGTCAAGATGGTCAGCACGTTATTACTTTGGCCCGTGCCGCGAGCAATATAAAGTATTAGGTGCG
|
|
ACATAGTAAGAGCTTTCAGTTGCAGATAGTTGTCGGGGAGCCCGGTGAGTGTAACTCGTAGTGATGGCTCAGTTCGACCC
|
|
ACATTGGAAACTCGGTTAGTACAAATCACTTGGATTTCGCCTGAGGACCGTACAATGCCTGCCACCTAAACAGTCATGGA
|
|
GCTCAGGATCACCTATAGGGTGACATGTTGTCAACGCGCCAATGACAGGAGGGGATCTCTGAACTTCGCGCCAATCAGCT
|
|
TATAAACTACTATCTTCCGATAGCACCATCCAGAGACTTCCGCGTATGAGAGAACAACTGCCATTGTCCCTAAGGGACTG
|
|
GCCGCATAAGAAGGACCCCATTGTAACTCAACGTAAGTGTTCGACGGCTTGCATTTCTTGATTAGATCGAGCACGTCCCG
|
|
TCGGGACTGTATTTGTATTGCTCCTAGTAGAACACGCATGGCAGCACTATGTTCTTTCACCCCATCGTTCGGTGATCGGA
|
|
TTATACCCAACGATATCTATTTGACATTACATTCGTAACCTATAACTTCCTTTGCAGTGCTTTGCCTTTGCGAGTACGAA
|
|
ACTCGGCACATTTGGCAGCCTACAGACAAACCGACCTGGAGTTTCTCGACAGGCGCTAAACTTGTTTGGTGGCAGTGCGC
|
|
AGCGCTCCGTACCACCACGCGTCGGATTGATCCGGGCCAATGTCATGGCCGGGGCTGATACTTCGTTAGCGTGTAAGAAC
|
|
AGGCGCTCCTGAAGTACCTACGATACGTTCACGCATGCAAACGAGCCCTTAGAGCCTGCCCGACGTCCGTTAATATGGCG
|
|
AGCAGAAGGCATAAATGCTGTTAATCAAGCTTCCTGTACTCCCGCTTATTTACTACTGTGCCAAGTGGAGTTAATGTTGA
|
|
GACAAGGACTTGCTTGGTGACCCAGCGGGCTACAACATCCTGAACTTGCCCATGGGTCCGGAGTGTTTCTAGGGGACAAG
|
|
GGTTAACAGTATTACCGTGATATTTTCTAACCACGTCAGCCCAATTTAATTCGAAACATGTCGAGTGTCCACTTATGGGC
|
|
TACCAAGCTCTAGCGAGTGTAATCTCCGCCTAATGCTCCCCCTAGGAACGTCAGAACGGGACGCGCGTTGCGTACAGAGA
|
|
GTTCTGAAATCGGCACTATAAGTGATTAACTGGGTACCGGTCGTTCAGCATCAGAAGCGTCCATTAACCGTGAGCGCGGA
|
|
AGATACTGCAGGCTTCATCTCCTCGAGTAATTTTGATGTATGGCATGAATATATACCGTGCATGTGGTTGCTGGCATGAC
|
|
CCTGAAAGTCAGAGACTAGTTCCCAGGCCTCGTCAGCTCCTCCCGAGCGCTCACGTCACGTTGATTAATGTCTGCTTGGG
|
|
TGGTCTGCCCGTAGCCAGAACTTGGTCGGTATTCCGCACGCACTAAGGCGTGTCATAGAAGCGTCCCTGCTCACCGCGGC
|
|
GCAAGGTGAGCGTACATCTTGCCAGGACACAGGTACGGAGAATCCTATCGGGAAGGAGGCCATAACAGTGAGTACAGGTT
|
|
CTTAACCCAAACATTCCCTTGGCGGACTACAGAGTAAGACCGGCAACAGCTAAACGAGATTGCCTAGACTTAACATACTC
|
|
AGTGATAGTACTCTCCAGGATTTGCCCCGTTAGCAAGTTCGACAAGCTGCGAGCGGATCTCATAGATATCACGACGGGCT
|
|
TCTCAATCGTCGTCTAACCAAAGCTACCTATTCGTTCCCCGGGCGACTAGGTTTCTGGGGCCCGGTATGGTAAATTTGTA
|
|
TTCGTTATGGCTTTGGGGAGTAAGCACCTAAAAACTGACCTCAGGCCTGTAAATATTCGTAACCCAACACCGCTGGTCTG
|
|
TGAGGCGAAAGGTCGCCGGCGCTGCTTGTCGAAGTGGTAACTTGGCTTCTTGCATGACCCATGGATATTTCATAAAGGGA
|
|
AGGCTCGACACAATCGCACATCTGACAGAGTAGACTGATTGGTCCGTCGCGTTGTGAGAATTGGTCCAGTAAGCCCAAGT
|
|
CTTATACGTCCGTAGTTGATGCGGTGGAACCCATTGAGAGACGAGAATCCCTGGTGATCAAGGCAAGGTGTACCACATCA
|
|
AGTAATCCCGTAAAGGTCCTTGCCCTACAGTGAATTTGCCAGCCAGTCTCGGGATTACCATATAACCTCCGTATCCGATC
|
|
TTATGGAAACTGGGAGAAGGTAGACCGTGGTAACTACCTAGAAGTCGCAGTCACGGAGTACCCCGTGACATCCATATGGG
|
|
ACTTAGGCCGCACTATAACGGCCAGCCATTCAAAGGAGTATTTAGTCACCCGCATCTGATCGGACGTAGTGTCTTCAGCG
|
|
CTACGCCTTGTTCGCAAACATTGCTAAGGTGTCCTACTCTCCCCGGATAACCAAAGGAGAGGCCAGATAAGCTTCATCTG
|
|
CCGCAAGAACAAGCGCGAACTCAGTCCCGCTGGTTATGGGGTCACTTGCTCTTCACTCCCCCTCCAGGGGGGAACTCGTA
|
|
TTGGCCGATCGGGATTGGAGTAGAGGTGATTCAAGGTAAACCGACATTTCGTGCGACTCTATGGAGTATACAGAAATTAC
|
|
TGGACTCGGAGAGGCCCTAGCGACCCGAACCGATATAGCAAGGATTAAGGCTTTCAGCAGTTACGTTTCCCATTCGATAC
|
|
GCGACTCGATGGCCGAGTAAAGACACAGAGAATTCCGGGTAAACTTAATCTGCATGTTGATCCAATCTACTCTCCGACGT
|
|
CCTCGAGAAGTACTGGAAGGCTCCCTGAGATAAACACACGCAACTTGTCCTCACTAAATGTGCAACAACTGTGAAAAAAG"""
|
|
sequences = ("AGTCA","TGCA","AAGCT","AGTC","AGTGG")
|
|
creatures = """Andorien 31 100 36 97 20
|
|
Bétazoïde 36 102 32 91 21
|
|
Kazon 38 106 35 90 29
|
|
Cardassien 36 107 32 97 24
|
|
Tellarite 30 102 33 98 27
|
|
Ocampa 31 108 31 91 25
|
|
Trill 32 103 32 91 25
|
|
Rémien 35 100 36 97 26
|
|
Ferengi 31 108 38 97 22
|
|
Denobulan 37 109 36 92 25
|
|
Anticanien 33 109 36 98 28
|
|
Ligonien 39 100 30 91 22
|
|
Xindis 36 108 38 93 21
|
|
Talaxien 36 108 39 90 22
|
|
Suliban 33 101 33 99 21
|
|
Vorta 36 104 35 97 29
|
|
Klingon 38 100 32 90 27
|
|
Dreman 30 109 34 92 27
|
|
Benzite 33 104 37 90 27
|
|
Breen 34 108 37 93 20"""
|
|
|
|
#reformat input
|
|
table = list()
|
|
lines = input.splitlines()
|
|
for line in lines:
|
|
order = len(line)
|
|
table.append(line)
|
|
table.append(line[::-1])
|
|
for i in range(order):
|
|
s = ""
|
|
for line in lines:
|
|
s = s + line[i]
|
|
table.append(s)
|
|
table.append(s[::-1])
|
|
|
|
sequences = {k:0 for k in sequences}
|
|
creatures_dict = dict()
|
|
for cretature in creatures.splitlines():
|
|
creat = list(cretature.split("\t"))
|
|
creature_name = creat[0]
|
|
creatures_dict[creature_name] = dict()
|
|
creat.pop(0)
|
|
for i,seq in enumerate(sequences):
|
|
creatures_dict[creature_name][seq] = int(creat[i].strip())
|
|
print(creatures_dict)
|
|
print("*******")
|
|
|
|
for seq in sequences:
|
|
for item in table:
|
|
offset = 0
|
|
while True:
|
|
pos = item[offset:].find(seq)
|
|
if pos==-1:
|
|
break
|
|
sequences[seq] += 1
|
|
offset += pos + 1
|
|
print(sequences)
|
|
print("*******")
|
|
|
|
creature_ressemble = "Rognogno"
|
|
distance_ressemble = 999
|
|
for creature in creatures_dict:
|
|
distance = 0
|
|
for seq in creatures_dict[creature]:
|
|
distance = distance + abs(creatures_dict[creature][seq]-sequences[seq])
|
|
print(creature, distance)
|
|
if distance < distance_ressemble:
|
|
distance_ressemble = distance
|
|
creature_ressemble = creature
|
|
|
|
print("*******")
|
|
print("{}:{}".format(distance_ressemble, creature_ressemble)) |