import pyodbc,nltk,array cnxn = pyodbc.connect('driver={mysql odbc 5.1 driver};server=127.0.0.1;port=3306;database=information_schema;user=root; password=1234;option=3;') cursor = cnxn.cursor() cursor.execute("use collegedatabase ;") cursor.execute("select * sampledata ; ") cnxn.commit() s=[] j=[] x=[] words = [] w = [] sfq = [] pos=[] entry in cursor: s.append(entry.injury_type),j.append(entry.injury_desc) nltk.tokenize import punktwordtokenizer nltk.corpus import stopwords tokenizer = punktwordtokenizer() english_stops = set(stopwords.words('english')) in range(0,26): # filter stop words tokenizer.tokenize(j[i]) w.append([word word in tokenizer.tokenize(j[i]) if word not in english_stops]) in range(0 , 26):#converting tokenzied text ito string sfq.append(" ".join(w[a])) replacers import regexpreplacer replacer = regexpreplacer() in range (0,26):#pos tagging replacer.repl...
Comments
Post a Comment