#coding=utf-8
def get_file_path(root_path,file_list,dir_list):
import os
#获取该目录下所有的文件名称和目录名称
dir_or_files = os.listdir(root_path)
for dir_file in dir_or_files:
#获取目录或者文件的路径
dir_file_path = os.path.join(root_path,dir_file)
#判断该路径为文件还是路径
if os.path.isdir(dir_file_path):
dir_list.append(dir_file_path)
#递归获取所有文件和目录的路径
get_file_path(dir_file_path,file_list,dir_list)
else:
file_list.append(dir_file_path)
return file_list,dir_list
def alter(file,old_str,new_str):
a= open(file,'r') #打开所有文件
str = a.read()
str = str.replace(old_str,new_str)
b = open(file,'w')
b.write(str) #再写入
b.close() #关闭文件
#获取后缀名
def file_extension(file):
return os.path.splitext(file)[1]
def get_domain(file_list):
import re
domain=[]
for html in file_list:
with open(html) as file:
for line in file:
urls = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', line)
if len(urls):
tmp=','.join(urls)
domain.append(tmp)
return domain
def count_domain(domain):
from collections import Counter
counts = Counter()
for sentence in domain:
counts.update(word.strip('.,?!"\'').lower() for word in sentence.split())
return counts
def bat_replace(file_list,old_str,new_str):
for j in old_str:
for i in file_list:
#if file_extension(i)==".ctl":
alter(i,j,new_str)
if __name__ == "__main__":
#替换文件夹下的所有文件里的字符串
#old_str 被替换的
#new_str 替换成
#root_path 文件夹
root_path = "/mnt/d/hexo/tmp2/www.crust.cn/public" #/mnt/d/hexo #根目录路径
file_list = [] #用来存放所有的文件路径
dir_list = [] #用来存放所有的目录路径
file_list,dir_list=get_file_path(root_path,file_list,dir_list)
domain=get_domain(file_list)
counts=count_domain(domain)
old_str=['https://crust.cn','http://http','https://creativecommons.org','https://hexo.io','http://purl.org','https://i.creativecommons.org']
new_str='http://crust.cn'
bat_replace(file_list,old_str,new_str)
#print file_list
print counts
print counts['http://crust.cn']
本篇
替换文件夹下的所有文件里的字符串
#coding=utf-8
def get_file_path(root_path,file_list,dir_list):
import os
#获取该目录下所有的文件名称和目录名称
dir_or_files
2019-11-12
下一篇
keywords
https://ads.google.com/intl/en_uk/home/tools/keyword-planner/
https://ahrefs.com/blog/zh/google-keyword-planner/
2019-11-12