|
|
@ -62,6 +62,8 @@ def clean(result, path):
|
|
|
|
result = result.replace('<br>','')
|
|
|
|
result = result.replace('<br>','')
|
|
|
|
result = re.sub(r'<a href=.*?>', '', result)
|
|
|
|
result = re.sub(r'<a href=.*?>', '', result)
|
|
|
|
result = result.replace('</a>','')
|
|
|
|
result = result.replace('</a>','')
|
|
|
|
|
|
|
|
result = re.sub(r'<h\d>','',result)
|
|
|
|
|
|
|
|
result = re.sub(r'</h\d>','',result)
|
|
|
|
return result
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def snip_article(article, path):
|
|
|
|
def snip_article(article, path):
|
|
|
@ -69,6 +71,7 @@ def snip_article(article, path):
|
|
|
|
limit = 300
|
|
|
|
limit = 300
|
|
|
|
result = article[0:min(len(article),limit)]
|
|
|
|
result = article[0:min(len(article),limit)]
|
|
|
|
result = result.rsplit(' ',1)[0]
|
|
|
|
result = result.rsplit(' ',1)[0]
|
|
|
|
|
|
|
|
# cut off at certain line count, too?
|
|
|
|
return result + " ... "
|
|
|
|
return result + " ... "
|
|
|
|
|
|
|
|
|
|
|
|
def sort_files(files):
|
|
|
|
def sort_files(files):
|
|
|
|