Files
pyDefis/La créature inconnue .py
Francois JUMELLE 20526d93c8 Initial release
2021-05-03 22:32:40 +02:00

162 lines
8.5 KiB
Python

input = """CCCGC
ACCCA
TATAC
GAGGC
TGTAA"""
sequences = ("AT","TAC","CG")
creatures = """Limaçon de Folfanga 5 3 2
Porc Taudanzain vert 6 2 5"""
input = """CGTCGGCGCACCCGAAATCGGAGGTTCGCTAGCGAGGTTCTCACAGTCAGAACTTTTCTTCTTATGGGTAGTATGATCAG
AAGGCAACTAGGTCTATTCTCGTATGCTCCCATTCATAAATTGGATTATAATACAAACTACGCGAGCATGGGATGACTAT
GAGATCGAGTCTGTGAAAGTTAAGGGCGGTTAAGACTACAACGGTTATAGGTGCAATATCGTCAAGGCGAAGCCTCGTTA
TTTGTTCTCCGATCGTCTTGTGGTCTACTAGCAATGTAAACCCCGATCACGCAACGGGTCCTACGCCCCTACGCTGGACG
ATGATTAAATTCACCGAATGTTTAACCACGCTGTTGAAGGCACATCGTATGAACTGTGGACAAGGGCTAGTTGCCCGGGG
GGTACATTCGTAGGCCATATGCCGACAAAAATGCTCATAACAAACACAGCGCGCTGGGTAGAGATCTGGAGTACATTAAT
GTGAGTACTTTGATGGGAGCGACTCACGACTATACGTGTCGCGTCGTGATCGGAAACTACTCAGCGGTCTATCTATTTGC
GTAGTAAAATCAAGGGGCAAAAACTCTAATAGTTGAACAAGGTGCGGATCCACATATTCTAGCTCATCCCATGTATCGTA
TAAAGCAAACACACCCGGACTGCTTCGATCAGTAATACCGACAACCGCATGCTGTCTAACAAACCCGATAGTAACGGAAT
GTCGCTCCCAAGATGTATAGAACTAAGCTGCACCACAGGTCTGATACATTTTTTGCCGGCGATAAGAAATTACCGGCAGT
TGGCGAATTTAGTGTCCGGGGTAGCATAAAGTGGGAATGTCGCTAGATTGAATGTACGCAACTAATTCGGGAGGTAGATG
CATATGTGGGAGGCGGGTCAGCAGCTGCAATTGGTAAGTCCCGTTTTACAGGACTGTACGAACACGTCCTACCATGATCG
TCGTATCTACTGCGGAGAGCGTTGATTTTGGGGCGTACGAGACGAATCCCCACCAAAGTAGGAGATGTCGAGCTCGCATG
GAGAGGATCGTGATTACCGTAGCATTGGCTGTTAGGAGCGTTGAAAGGTTGTTTATTGCATTTAAGCGGTAAGCCATCGA
GCGAGTGGATACGTCGGAGGCGTATGCCGCACGCCTATAGAGGGGCAAGGGTGACCATTCAAAGTACTCTAGTTGTTATG
TTCTATCGATATCATCGACTGCATCTGTGTACGGGGACGCGGCACTACCTAGGGGTTTCGAGGACTTGTGCCGCTATCCG
TGCGGGCCGAATCATTGCGATATTGCTGCATTGTATTATGGCACGCACTCGTTAACCATGACCCACGCGATTAGTGTGGG
GTTGATGCGACGAAAAGAGCTATAGACTAGTTCGAGAAGCGCAAAGATTATGCCGTTCACGCAGCCAAGAAACGTTCCGG
AGTTTCCTGACGTCGTCACTAGGAAGCCCTATTGGGACACCGGCTCTGTTCCTCGCACCTCATAGGAGTCCGCGCAACTT
TCCGCACTAGCACCCTTAATTCGTCACCGTTCCGCTACCTGTTCGAATCTGGAGACCTATCTGGTGGAACGCGGTAGGTG
GAGTTTTATTCCGGCGTTCCCGAGTCGCTCCGTGGATTAATTGTCTGCCTGTATTATGTCTATAGCCTGGCCCTGAAAGC
GGCTACCTTAGGCTCCTGCTATAAATCACTGTGATTTTAAACCCCTCGGGCAATTTTACTGTACAGCTAACGGGATCCTT
TCCGTAATCTAGCCGTAGTACTCAAGTGAAGTGCTAAGGTATTTGAGTTTCTGGTCGAAGGCGCTTTGGCACTACAATTG
CTCTAACGAACCGGACTGTCATGAGCTTCCCCGGACCCTTGACGATCATTCTTGGGGCTGGGTGGTTAGACAGCCTTATC
GGGCTTGACCCTCTGACCATTAGTAGATATACTCTGACCTTGAGGATATACGTTGTCCCGCACCTCAAAGGGGGATATTG
GAAATCCCATCCCTTCCAATAAAACGAGCGCACGCCCGTATATAGTACCAACCTTACCAGTCACATATGATGTTTATGGG
ATTAACGCAGGTGACCGCAGGCAAAATAAGGAAATGCACGATTGGTTTGAAGCCGCGATAAGTCGCTGACAGTATAGTGG
TCGTTACTTACACTTCTCGACTCGTAGAAAGGCCAAGGCCTCTCTACGAAGTAAAGCAGACTCGAACATGGAAACTCACG
AGGATCCAGGCGAAGCTGATGTTGACTACAAACGCTGCCGAATGGCTAGTTATGTCTGTGAGCCTACTACTGCTGGAGTG
GACGCCTAATTTGAGTTGGCGTTAAGGCCGTATGGGCAGATAGGGGGGAGAGGCCTACGACGGGTAAGTGATGAAGGTCG
CATCGGTGCGCTTGGACATAGCCCAAGCCGCGAGGATTAGCGTGGTTACTAACGCGATACAGTTACTTTAGTGAAGTTTG
TAGGATTGTGAGCTTTCCTCAAGGGGCCGATCCGTAAATTAAGCGGAATATGGAGATTCACTCGTATGTGGAATGATGCG
CCAGGTCGACGTATGTTGATTTAATGTACTATCGGGATAACTGGACGCATACCGCCGACTCAGTGCGGTTGTGCCTTGGG
CGTCGCCCCCCTGCTTCTGTAGGGTCATGACGCTCCGACATCTGCTGGGCTCTCTAACCGAGACGGGCTCTATGCAAGGT
TTCCGTAAAGGGGGAGTGAAGGTATATACCGAAGCTAATTGAAACCAGGAGGACGACGGTAAAATCCCAGCCAGGACTCT
GGACGCTATTGGGTCTAGGCAAGCATACCGAGAACGCCCGATAGGTTACTCACGTTGTGACGATACGATTGCGGTGGAAC
GTACCAATCTTAAAGGCGTACTGTGGTTAAAGCCTGTGACGGTATCATGGCACAATTTGTCAAGCTGCGCAACACGTCTG
AGGGACAGCCGCCTTCCTGAACCACGTCGTGCCTACGCTGTACGCGCTGGGGTACGGTCACTAGGCTCGGCACGGTAAAT
GTCGAGCGCAATACGCGGAACGGCAGGATCAACCATTACCGTGTAGCTGATACTCTGTGTGTTATCTCTACATTCTGATG
TTTCATTTTAGAGCTTAGGTGCATCCATCTTTGAACCCAGCTAACCGGCGGTGTCCGTTCACCCTAATGGCGCTGAACCT
CCGGTCCGGGTTTATCAAGGAAAAATCCGAACTCGTATGCATCGCGCGTCGTCACAAAGTTCTTTGAGATGCTTCAAGGT
TTGATCCTTGGGCGATTTACTGGCTTTACACTATCGACGGAATTATCCCAACCGGACTTATGTATACATTTCTTACACAG
CTCGTTTCGGCCAGACCCCGTTTCGTATAGCTAATGTTACGATATTTTCCTATGGGGACGATGACGCAATTAAGGAAAAA
GCATGATGCTAGATAGCACCCCCACGTAGGAAACTGTAGGATACAGGTCTGTAGCCGCACAAAGAAACCATCTTACAACT
ACAACAGGGGCAGCCTGTCAAGATGGTCAGCACGTTATTACTTTGGCCCGTGCCGCGAGCAATATAAAGTATTAGGTGCG
ACATAGTAAGAGCTTTCAGTTGCAGATAGTTGTCGGGGAGCCCGGTGAGTGTAACTCGTAGTGATGGCTCAGTTCGACCC
ACATTGGAAACTCGGTTAGTACAAATCACTTGGATTTCGCCTGAGGACCGTACAATGCCTGCCACCTAAACAGTCATGGA
GCTCAGGATCACCTATAGGGTGACATGTTGTCAACGCGCCAATGACAGGAGGGGATCTCTGAACTTCGCGCCAATCAGCT
TATAAACTACTATCTTCCGATAGCACCATCCAGAGACTTCCGCGTATGAGAGAACAACTGCCATTGTCCCTAAGGGACTG
GCCGCATAAGAAGGACCCCATTGTAACTCAACGTAAGTGTTCGACGGCTTGCATTTCTTGATTAGATCGAGCACGTCCCG
TCGGGACTGTATTTGTATTGCTCCTAGTAGAACACGCATGGCAGCACTATGTTCTTTCACCCCATCGTTCGGTGATCGGA
TTATACCCAACGATATCTATTTGACATTACATTCGTAACCTATAACTTCCTTTGCAGTGCTTTGCCTTTGCGAGTACGAA
ACTCGGCACATTTGGCAGCCTACAGACAAACCGACCTGGAGTTTCTCGACAGGCGCTAAACTTGTTTGGTGGCAGTGCGC
AGCGCTCCGTACCACCACGCGTCGGATTGATCCGGGCCAATGTCATGGCCGGGGCTGATACTTCGTTAGCGTGTAAGAAC
AGGCGCTCCTGAAGTACCTACGATACGTTCACGCATGCAAACGAGCCCTTAGAGCCTGCCCGACGTCCGTTAATATGGCG
AGCAGAAGGCATAAATGCTGTTAATCAAGCTTCCTGTACTCCCGCTTATTTACTACTGTGCCAAGTGGAGTTAATGTTGA
GACAAGGACTTGCTTGGTGACCCAGCGGGCTACAACATCCTGAACTTGCCCATGGGTCCGGAGTGTTTCTAGGGGACAAG
GGTTAACAGTATTACCGTGATATTTTCTAACCACGTCAGCCCAATTTAATTCGAAACATGTCGAGTGTCCACTTATGGGC
TACCAAGCTCTAGCGAGTGTAATCTCCGCCTAATGCTCCCCCTAGGAACGTCAGAACGGGACGCGCGTTGCGTACAGAGA
GTTCTGAAATCGGCACTATAAGTGATTAACTGGGTACCGGTCGTTCAGCATCAGAAGCGTCCATTAACCGTGAGCGCGGA
AGATACTGCAGGCTTCATCTCCTCGAGTAATTTTGATGTATGGCATGAATATATACCGTGCATGTGGTTGCTGGCATGAC
CCTGAAAGTCAGAGACTAGTTCCCAGGCCTCGTCAGCTCCTCCCGAGCGCTCACGTCACGTTGATTAATGTCTGCTTGGG
TGGTCTGCCCGTAGCCAGAACTTGGTCGGTATTCCGCACGCACTAAGGCGTGTCATAGAAGCGTCCCTGCTCACCGCGGC
GCAAGGTGAGCGTACATCTTGCCAGGACACAGGTACGGAGAATCCTATCGGGAAGGAGGCCATAACAGTGAGTACAGGTT
CTTAACCCAAACATTCCCTTGGCGGACTACAGAGTAAGACCGGCAACAGCTAAACGAGATTGCCTAGACTTAACATACTC
AGTGATAGTACTCTCCAGGATTTGCCCCGTTAGCAAGTTCGACAAGCTGCGAGCGGATCTCATAGATATCACGACGGGCT
TCTCAATCGTCGTCTAACCAAAGCTACCTATTCGTTCCCCGGGCGACTAGGTTTCTGGGGCCCGGTATGGTAAATTTGTA
TTCGTTATGGCTTTGGGGAGTAAGCACCTAAAAACTGACCTCAGGCCTGTAAATATTCGTAACCCAACACCGCTGGTCTG
TGAGGCGAAAGGTCGCCGGCGCTGCTTGTCGAAGTGGTAACTTGGCTTCTTGCATGACCCATGGATATTTCATAAAGGGA
AGGCTCGACACAATCGCACATCTGACAGAGTAGACTGATTGGTCCGTCGCGTTGTGAGAATTGGTCCAGTAAGCCCAAGT
CTTATACGTCCGTAGTTGATGCGGTGGAACCCATTGAGAGACGAGAATCCCTGGTGATCAAGGCAAGGTGTACCACATCA
AGTAATCCCGTAAAGGTCCTTGCCCTACAGTGAATTTGCCAGCCAGTCTCGGGATTACCATATAACCTCCGTATCCGATC
TTATGGAAACTGGGAGAAGGTAGACCGTGGTAACTACCTAGAAGTCGCAGTCACGGAGTACCCCGTGACATCCATATGGG
ACTTAGGCCGCACTATAACGGCCAGCCATTCAAAGGAGTATTTAGTCACCCGCATCTGATCGGACGTAGTGTCTTCAGCG
CTACGCCTTGTTCGCAAACATTGCTAAGGTGTCCTACTCTCCCCGGATAACCAAAGGAGAGGCCAGATAAGCTTCATCTG
CCGCAAGAACAAGCGCGAACTCAGTCCCGCTGGTTATGGGGTCACTTGCTCTTCACTCCCCCTCCAGGGGGGAACTCGTA
TTGGCCGATCGGGATTGGAGTAGAGGTGATTCAAGGTAAACCGACATTTCGTGCGACTCTATGGAGTATACAGAAATTAC
TGGACTCGGAGAGGCCCTAGCGACCCGAACCGATATAGCAAGGATTAAGGCTTTCAGCAGTTACGTTTCCCATTCGATAC
GCGACTCGATGGCCGAGTAAAGACACAGAGAATTCCGGGTAAACTTAATCTGCATGTTGATCCAATCTACTCTCCGACGT
CCTCGAGAAGTACTGGAAGGCTCCCTGAGATAAACACACGCAACTTGTCCTCACTAAATGTGCAACAACTGTGAAAAAAG"""
sequences = ("AGTCA","TGCA","AAGCT","AGTC","AGTGG")
creatures = """Andorien 31 100 36 97 20
Bétazoïde 36 102 32 91 21
Kazon 38 106 35 90 29
Cardassien 36 107 32 97 24
Tellarite 30 102 33 98 27
Ocampa 31 108 31 91 25
Trill 32 103 32 91 25
Rémien 35 100 36 97 26
Ferengi 31 108 38 97 22
Denobulan 37 109 36 92 25
Anticanien 33 109 36 98 28
Ligonien 39 100 30 91 22
Xindis 36 108 38 93 21
Talaxien 36 108 39 90 22
Suliban 33 101 33 99 21
Vorta 36 104 35 97 29
Klingon 38 100 32 90 27
Dreman 30 109 34 92 27
Benzite 33 104 37 90 27
Breen 34 108 37 93 20"""
#reformat input
table = list()
lines = input.splitlines()
for line in lines:
order = len(line)
table.append(line)
table.append(line[::-1])
for i in range(order):
s = ""
for line in lines:
s = s + line[i]
table.append(s)
table.append(s[::-1])
sequences = {k:0 for k in sequences}
creatures_dict = dict()
for cretature in creatures.splitlines():
creat = list(cretature.split("\t"))
creature_name = creat[0]
creatures_dict[creature_name] = dict()
creat.pop(0)
for i,seq in enumerate(sequences):
creatures_dict[creature_name][seq] = int(creat[i].strip())
print(creatures_dict)
print("*******")
for seq in sequences:
for item in table:
offset = 0
while True:
pos = item[offset:].find(seq)
if pos==-1:
break
sequences[seq] += 1
offset += pos + 1
print(sequences)
print("*******")
creature_ressemble = "Rognogno"
distance_ressemble = 999
for creature in creatures_dict:
distance = 0
for seq in creatures_dict[creature]:
distance = distance + abs(creatures_dict[creature][seq]-sequences[seq])
print(creature, distance)
if distance < distance_ressemble:
distance_ressemble = distance
creature_ressemble = creature
print("*******")
print("{}:{}".format(distance_ressemble, creature_ressemble))