Advertisement
Advertisement
| 04.10.2008 at 07:47PM PDT, ID: 23314014 |
|
[x]
Attachment Details
|
||
|
[x]
The Solution Rating System
|
||
|
With so many solutions, how can you tell which solutions are most likely to help you and which ones are not? To provide you with a tool to use, we rate our solutions based on various elements that most accurately determine if a solution is a quality solution. To explain what factors affect the solution rating, here are the elements we take into consideration when formulating our solution rating.
Your Input Matters If you have any suggestions that you would like to make for our rating system, please ask a question in the Suggestions Zone of Community Support. Thank you! |
||
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: |
from sqlite3 import dbapi2 as sqlite
CSV_FILE = "C:/his/testdataFromGen8.csv"
DATABASE_FILE = "C:/his/test.db"
index_dict ={}
def write_to_csv(content_list):
f = open(CSV_FILE,"a")
f.write(",".join(content_list) + "\n")
f.flush()
f.close()
def get_csv_output(matrix,words,urls):
title = ["N/A"]
title.extend(words)
write_to_csv(title)
for url in urls:
line = [url]
for word in words:
line.append(get_amount(matrix,word,url));
print "finish make url : " + url
write_to_csv(line)
print "have done..."
def get_amount(matrix,word,url):
key = word + "&" + url
if index_dict.has_key(key):
return index_dict[key]
else:
return "0"
for r in matrix:
if url in r and word in r:
return str(r[0])
return "0";
def make_index_dict(rows):
key =""
value = ""
for row in rows:
key = row[1]+"&"+row[2]
index_dict[key] = str(row[0])
print "finish init index_dict..."
def start():
conn = sqlite.connect(DATABASE_FILE)
sql = '''SELECT count( wordlocation.wordid ) AS amount ,
wordlist.word ,
urllist.url
FROM wordlocation , wordlist , urllist
WHERE wordlist.rowid = wordlocation.wordid
AND urllist.rowid = wordlocation.urlid
GROUP BY urlid , wordid'''
cu = conn.cursor()
cu.execute(sql)
rs = cu.fetchall()
if not rs or len(rs)==0 :
print "no data found..."
urls = []
words = []
moreIgnoreWords = ['googl', 'blog', 'search', 't', 'link', 'activ', 'background', 'k', 'h', 'font', 'i', 'z', 'displai', 'none', 'div', 'n', 'margin', 'top', 'bold', 'q', 'b', 'ch', 'cursor', 'pointer', 'e', 'p', 'pr', 'ul', 'li', 'list', 'style', 'j', 'line', 'height', 'br', 'sbb', 'td', 'lrr', 'sop', 'gbar', 'float', 'gbh', 'border', 'solid', 'posit', 'absolut', 'gbi', 'fff', 'index', 'guser', 'import', 'media', 'all', 'right', 'vertic', 'align', 'block', 'text', 'decor', 'hover', 'bodi', 'famili', 'arial', 'san', 'serif', 'm', 'tpb', 'ttb', 'white', 'space', 'nowrap', 'rsb', 'btb', 'bt', 'ln', 'ccc', 'hd', 'eee', 's', 'f', 'fl', 'w', 'green', 'img', 'l', 'g', 'tr', 'asb', 'as', 'window', 'function', 'd', 'c', 'var', 'on', 'if', 'addeventlisten', 'fals', 'els', 'attachev', 'appli', 'thi', 'argument', 'return', 'undefin', 'o', 'firstchild', 'tagnam', 'px', 'tg', 'navextra', 'document', 'getelementbyid', 'getelementsbytagnam', 'span', 'event', 'cancelbubbl', 'createel', 'arrai', 'everi', 'createpopup', 'ifram', 'framebord', 'scroll', 'no', 'src', 'javascript', 'parentnod', 'appendchild', 'id', 'for', 'insertbefor', 'classnam', 'click', 'close', 'while', 'do', 'offsetleft', 'offsetpar', 'ss', 'statu', 'TRUE', 'cs', 'ga', 'substr', 'r', 'target', 'srcelement', 'locat', 'href', 'clk', 'url', 'ct', 'cd', 'cad', 'sg', 'rwurl', 'escap', 'replac', 'rdh', 'rdp', 'length', 'host', 'hostnam', 'pathnam', 'field', 'split', 'break', 'new', 'imag', 'blogsearch', 'sa', 'ei', 'rl', 'nln', 'continu', 'sig', 'colonpo', 'indexof', 'posidarrai', 'pb', 'number', 'nan', 'onmousedown', 're', 'web', 'map', 'shop', 'gmail', 'more', 'video', 'group', 'book', 'scholar', 'financ', 'youtub', 'calendar', 'photo', 'reader', 'even', 'raquo', 'sign', 'nbsp', 'advanc', 'prefer', 'result', 'about', 'second', 'toggletoadvanc', 'noadvanc', 'togglefromadvanc', 'publish', 'last', 'hour', 'dai', 'past', 'week', 'month', 'anytim', 'write', 'choos', 'date', 'hide', 'form', 'action', 'http', 'com', 'daterang', 'name', 'onsubmit', 'formatdateinput', 'input', 'as_q', 'type', 'hidden', 'valu', 'num', 'hl', 'en', 'as_epq', 'as_eq', 'lr', 'safe', 'ie', 'iso', 'as_mind', 'as_minm', 'as_mini', 'as_maxd', 'as_maxm', 'as_maxi', 'as_drrb', 'ctz', 'start', 'autocomplet', 'off', 'maxlength', 'onblur', 'checkandclos', 'onfocu', 'open', 'end', 'option', 'btnd', 'submit', 'go', 'gettimezoneoffset', 'montharrai', 'jan', 'feb', 'mar', 'apr', 'mai', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'dowarrai', 'su', 'tu', 'th', 'datedelim', 'ymdpermut', 'mdy', '_calendarpopup', 'setinitialcalendarset', 'class', 'ddcalp_dai', 'curr_dai', 'subscrib', 'alert', 'atom', 'rss', 'sort', 'by', 'relev', 'relat', 'care', 'center', 'inform', 'you', 'can', 'us']
rows = rs
print "finish load data..."
filtered_rows = []
for row in rows:
if row[1] not in moreIgnoreWords:
filtered_rows.append(row)
rows = filtered_rows
for row in rows:
if row[2] not in urls:
urls.append(row[2])
if row[1] not in words:
words.append(row[1])
make_index_dict(rows)
get_csv_output(rows,words,urls)
if __name__ =="__main__":
start()
|