# -*- coding:Utf-8 -*- #from numpy import * from random import * from math import * # Claude TOUZET 4 - 2010 # SOFM code (bouquin) import codecs BLOCKSIZE = 1048576 # or some other, desired size in bytes sourceFileName = "texte.txt" destinationFile = "10lettres.txt" def verif (s) : for i in range (len(s)) : if s[i]=='.' : return False if s[i]=="'" : return False if s[i]==',' : return False if s[i]=='-' : return False if s[i]=='(' : return False if s[i]==')' : return False if s[i]=='0' : return False if s[i]=='1' : return False if s[i]=='/' : return False if s[i]=='§' : return False return True def convertir (s) : s=s.replace('è','e') s=s.replace('é','e') s=s.replace('ê','e') s=s.replace('à','a') s=s.replace('â','a') s=s.replace('î','i') s=s.replace('ù','u') s=s.replace('ç','c') s=s.replace('û','u') return (s) BA1=[] BA=[] nbL=10 with codecs.open(destinationFile, "w", "utf-8") as destFile : with codecs.open(sourceFileName, "r", "utf-8") as sourceFile: while True: # contents = sourceFile.readlines() contents = sourceFile.read(BLOCKSIZE) if not contents: break line = contents #print (line) words = contents.split() #print (words) #print (len(words)) for i in range (len(words)) : mot1=words[i] mot=mot1.lower() if (len(mot)==nbL) : if verif(mot) : mot2 = convertir(mot) #print (mot2) destFile.write(mot2) destFile.write(" ") BA1.append(mot2) sourceFile.close() nb_entree=nbL*26 #(26 lettres dans l'alphabet) nb1D = 7 nbN=nb1D*nb1D # nb Neurones (5 x 5) a=0.5 #alpha b=0.2 # beta nb_iterations = 10 # nb iterations apprentissage nb_samples=len(BA1) print (nb_samples) destFile.write(str(nb_samples)) destFile.write('\n') for i in range (len(BA1)) : BA.append([]) for j in range (0, 26*nbL) : BA[i].append(0) for j in range (0, len(BA1[i])) : k = ord(BA1[i][j]) - ord('a') if (k>26) or (k<0) : k=25 #print (str(k) + ' ' + str(i) + ' ' + str(j)) BA[i][j*26+int(k)] = 1 #print (BA) WSOM=[] for i in range (0, nbN) : WSOM.append([]) for j in range (0, nb_entree) : WSOM[i].append(0) for i in range (0, nbN) : for j in range (0, nb_entree) : WSOM[i][j] = 0.45 + random()/10 #print (WSOM) # Self-organizing map mu1=a beta1=b learning = 1 SOM=[] for i in range (0, nbN) : SOM.append(0) if (learning==1) : #LEARNING for n in range (0, nb_iterations) : # iterations it = 'iteration = ' + str(n) + '\n' print ('iteration = ',n) destFile.write (it) mu = mu1 - mu1 * (n/nb_iterations) beta = beta1 - beta1 * (n/nb_iterations) for b in range (0, nb_samples) : a=b a= floor (random()*(nb_samples-1)-0.001) +1 #print (a) for m in range (0, nbN) : # compute distances SOM[m]=0 for k in range (0, nb_entree) : SOM[m] = SOM[m] + abs(BA[a][k]-WSOM[m][k]) mini = nb_entree for i in range (0, nbN) : if (SOM[i]= 0) : m=index-nb1D for k in range (0,nb_entree) : WSOM[m][k] = WSOM[m][k] + beta*(BA[a][k]-WSOM[m][k]) # il y a des voisins au dessus if ((index+nb1D) < nbN) : m=index+nb1D for k in range (0,nb_entree) : WSOM[m][k] = WSOM[m][k] + beta*(BA[a][k]-WSOM[m][k]) for m in range (0, nb1D) : label1="" for i in range (0, nb1D) : label = "" for j in range (0, nbL) : maxi = 0 for k in range (0, 26) : if (WSOM[m*nb1D + i][j*26 + k]> maxi) : maxi = WSOM[m*nb1D + i][j*26 + k] index = k label = label + (str(chr(ord('a')+ index) )) label1 = label1 + ' ' + label print (label1) label1 = label1 + '\n' destFile.write(label1) if (learning ==1) : genes=open('WSOM_final.txt', 'w') for i in range (0, nbN) : genes.write('\n') genes.write(str(i)) genes.write(" : ") for j in range (0, nb_entree) : genes.write (str(WSOM[i][j])) genes.write(" ") genes.close() destFile.close()