发布时间:2022-08-09 文章分类:编程知识 投稿人:李佳 字号: 默认 | | 超大 打印
Python开发Http代理服务器

之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。
简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)

1#-*-coding:utf-8-*-
2#http代理服务器
3#1.ip限制,mac限制
4#
5#socketref@hotmail.com
6#www.sw2us.com
7
8"exec""python""-O""$0""$@"
9
10__doc__="""sw2usHTTPProxy.
11
12"""
13
14__version__="0.2.1"
15
16importBaseHTTPServer,select,socket,SocketServer,urlparse
17importhttplib,traceback,re
18importos,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO
19
20
21classConfigProperty:
22def__init__(self,owner):
23self.key=''
24self.value=''
25
26defcreate(self,text):
27#text-key=value
28#@return:boolean
29pos=text.find('#')
30if(pos!=-1):
31text=text[:pos]
32pair=text.split('=')
33iflen(pair)!=2:
34#print"PropertyLineInvalid:%s"%(text)
35returnFalse
36k=pair[0].strip()
37v=pair[1].strip()
38self.key=k
39self.value=v
40
41returnTrue
42
43deftoString(self):
44s=''
45try:
46s="%s=%s"%(self.key,self.value)
47except:
48return''
49returns
50
51deftoInt(self):
52r=0
53try:
54r=int(self.value)
55except:
56r=0
57returnr
58
59deftoFloat(self):
60r=0.0
61try:
62r=float(self.value)
63except:
64r=0.0
65returnr
66
67
68#@defSimpleConfig
69#简单配置信息文件,基本格式:key=value
70classSimpleConfig:
71def__init__(self):
72self._file=''
73self._props=[]
74self._strip=True
75
76defopen(self,file,strip=True):
77#打开配置文件
78#@paramstrip-是否裁剪不可见首尾两端的字符
79try:
80self._strip=strip
81self._props=[]
82fh=open(file,'r')
83lines=fh.readlines()
84fortextinlines:
85prop=ConfigProperty(self)
86ifprop.create(text)==False:
87prop=None
88else:
89self._props.append(prop)
90fh.close()
91except:
92returnFalse
93returnTrue
94
95deftoString(self):
96s=''
97forpinself._props:
98s=s+p.toString()+"\n"
99returns
100
101defsaveAs(self,file):
102#保存配置信息到文件
103try:
104fh=open(file,'w')
105fh.write(toString())
106fh.close()
107except:
108print"writeFileFailed!"
109returnFalse
110returnTrue
111
112defgetProperty(self,name):
113#取属性值
114prop=None
115try:
116forpinself._props:
117ifp.key==name:
118prop=p
119break
120except:
121pass
122
123returnprop
124
125defgetPropertyValue(self,key,default=''):
126prop=self.getProperty(key)
127ifnotprop:
128returndefault
129returnprop.value
130
131defgetPropertyValueAsInt(self,name,default=0):
132prop=self.getPropertyValue(name)
133
134ifnotprop:
135returndefault
136r=default
137try:
138r=int(prop)
139except:pass
140returnr
141
142defgetPropertyValueAsFloat(self,name,default=0.0):
143prop=self.getPropertyValue(name)
144ifnotprop:
145returndefault
146r=default
147try:
148r=float(r)
149except:pass
150returnr
151
152
153#===========================================#
154
155
156#===========================================#
157
158defgetMacList():
159maclist=[]
160f=os.popen('arp-a','r')
161whileTrue:
162line=f.readline()
163ifnotline:
164break
165line=line.strip()
166rst=re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}).*',line)
167#rst=re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',line)
168ifrst:
169#printrst.groups()
170maclist.append(rst.groups())
171#printmaclist
172returnmaclist
173
174
175
176##########################################
177confile=SimpleConfig()
178confile.open('proxy.conf')
179dbconn=None
180
181##########################################
182#初始化系统配置
183definitConfiguration():
184r=True
185
186returnr
187
188##########################################
189
190classProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
191__base=BaseHTTPServer.BaseHTTPRequestHandler
192__base_handle=__base.handle
193server_version="TinyHTTPProxy/"+__version__
194rbufsize=0#self.rfileBeunbuffered
195
196
197#######################################################33
198
199#handle()是在单独线程中执行
200defhandle(self):#调用入口,线程刚进入,携带socket进入
201print'clientincomingPython开发Http代理服务器  socketref,呆在autonavi.com  C++博客'
202#self.__base_handle()
203#return
204(ip,port)=self.client_address
205ifhasattr(self,'allowed_clients')andipnotinself.allowed_clients:
206self.raw_requestline=self.rfile.readline()
207ifself.parse_request():
208self.send_error(403)
209else:
210self.__base_handle()
211
212def_connect_to(self,netloc,soc):
213i=netloc.find(':')
214ifi>=0:
215host_port=netloc[:i],int(netloc[i+1:])
216else:
217host_port=netloc,80
218#print"\t""connectto%s:%d"%host_port
219try:soc.connect(host_port)
220exceptsocket.error,arg:
221try:msg=arg[1]
222except:msg=arg
223self.send_error(404,msg)
224return0
225return1
226
227defdo_CONNECT(self):
228soc=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
229try:
230ifself._connect_to(self.path,soc):
231self.log_request(200)
232self.wfile.write(self.protocol_version+
233"200Connectionestablished\r\n")
234self.wfile.write("Proxy-agent:%s\r\n"%self.version_string())
235self.wfile.write("\r\n")
236self._read_write(soc,300)
237finally:
238print"\t""bye"
239soc.close()
240self.connection.close()
241
242
243defdo_GET(self):
244(scm,netloc,path,params,query,fragment)=urlparse.urlparse(
245self.path,'http')
246piars=(scm,netloc,path,params,query,fragment)
247ifnotnetloc:
248netloc=self.headers.get('Host',"")
249#print">>requester:",self.connection.getpeername(),"path:",self.path
250#print'>>2.',(scm,netloc,path,params,query,fragment)
251#print'nexthost:',netloc
252ifscm!='http'orfragmentornotnetloc:
253self.send_error(400,"badurl%s"%self.path)
254return
255soc=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
256try:
257ifself._connect_to(netloc,soc):
258self.log_request()
259soc.send("%s%s%s\r\n"%(
260self.command,
261urlparse.urlunparse(('','',path,params,query,'')),
262self.request_version))
263self.headers['Connection']='close'
264delself.headers['Proxy-Connection']
265forkey_valinself.headers.items():
266soc.send("%s:%s\r\n"%key_val)
267soc.send("\r\n")
268#到此完成发送请求和头部信息
269self._read_write(soc)
270finally:
271print"\t""bye"
272soc.close()
273self.connection.close()
274
275
276
277definsertTags(self,tag,body,insert):
278p1=body.find('<%s'%tag)
279ifp1!=-1:
280p2=body.find('>',p1)
281ifp2!=-1:
282part1=body[:p2+1]
283part2=body[p2+1:]
284print'*-'*20
285body=part1+insert+part2
286returnbody
287
288#google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以
289#插入数据之后要重新计算content-length并返回给客户浏览器
290#发现压缩的有很多,content-encoding:gzip
291
292#处理'transfer-encoding':'chunked'类型
293#gzip有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储
294#在这里将gzip数据全部解压,还原成原始数据传出到客户端
295defsendBackResponse(self,command,headers,body):
296
297insert='<h1>ThisisTest</h1>'
298ifheaders.has_key('content-encoding')andheaders['content-encoding'].strip().lower()=='gzip':
299try:
300delheaders['content-encoding']
301gzipdata=''
302ifheaders.has_key('transfer-encoding')andheaders['transfer-encoding']=='chunked':
303delheaders['transfer-encoding']
304
305pos=0
306whilepos<len(body):
307p=body.find('\x0d\x0a',pos)
308sizewidth=p-pos
309
310chuncksize=int(body[pos:p],16)
311#print'chuncksize:',body[pos:p]
312p+=2
313gzipdata+=body[p:p+chuncksize]
314pos=p+chuncksize+2
315ifchuncksize==0:
316break
317#
318body=gzipdata
319
320#
321
322#ss=zlib.decompress(gzipdata)
323compressedstream=StringIO.StringIO(body)
324gzipper=gzip.GzipFile(fileobj=compressedstream)
325ifgzipper==None:
326print'*'*200
327body=gzipper.read()
328#f=open('body%s.txt'%time.time(),'wb')
329#f.write(body)
330#f.close()
331
332
333#body=gzipdata
334except:
335printtraceback.print_exc()
336print'decompressfailed!'
337#pos=body.find('\x0d\x0a')
338#pos=body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
339#ifpos!=-1:
340#body=body[pos+9:]
341#
342#compressedstream=StringIO.StringIO(body)
343#gzipper=gzip.GzipFile(fileobj=compressedstream)
344#ifgzipper==None:
345#print'*'*200
346#body=gzipper.read()
347
348#body=zlib.decompressobj().decompress('x\x9c'+body)
349
350#m=re.search('(<body.*>)',body,re.I)
351#ifm:
352#pos=m.start(0)
353#part1=body[:pos+len(m.group(0))]
354#part2=body[pos+len(m.group(0)):]
355#body=part1+insert+part2
356#print'-*'*20,insert,'-*'*20
357
358#self.insertTags('body',body,insert)
359
360css="""<style>
361#kk{
362border:1pxdottedred;
363200px;
364height:300px;
365float:left;
366background:#0x00ff00;
367}
368</style>
369"""
370#body=self.insertTags('head',body,css)
371
372#body=self.insertTags('body',body,insert)
373div="""
374<divid="kk">
375ThisisTestDIVBlock!!
376</div>
377"""
378
379#readexternalhtmltags
380try:
381#ff=open('head.tag','r')
382#div=ff.read()
383#ff.close()
384#body=self.insertTags('head',body,div)
385body=self.publish_advertisement(body)#插入配置的广告信息
386except:
387pass
388
389#p1=body.find('<body')
390#ifp1!=-1:
391#p2=body.find('>',p1)
392#ifp2!=-1:
393#part1=body[:p2+1]
394#part2=body[p2+1:]
395#print'*-'*20
396#body=part1+insert+part2
397#printm.group(0)
398headers['Content-Length']=str(len(body))
399
400#ifheaders.has_key('content-length'):
401
402self.connection.send(command)
403self.connection.send('\r\n')
404fork,vinheaders.items():
405self.connection.send("%s:%s\r\n"%(k,v))
406self.connection.send("\r\n")
407self.connection.sendall(body)
408
409
410
411#----------------------------------------------------
412
413def_read_write(self,soc,max_idling=20):
414#getMacList()
415iw=[self.connection,soc]#self.connnection-内网主机连接,soc-向外连接
416ow=[]
417count=0
418#respfile=soc.makefile('rb',1024)
419httpCommand=''
420httpBody=''
421httpHeaders={}
422isOkPageResponse=False
423nextReadBytes=0
424datacnt=0
425NoContentLength=False
426#printself.connection.getpeername()
427while1:
428count+=1
429datacnt+=1
430(ins,_,exs)=select.select(iw,ow,iw,3)
431ifexs:
432print'erroroccr!'
433break#异常产生
434ifins:
435foriinins:
436ifiissoc:
437out=self.connection
438else:
439out=soc
440
441data=i.recv(8192)
442ifdata:
443out.send(data)
444count=0
445else:
446ifnotisOkPageResponse:
447return
448else:
449pass#print"\t""idle",count
450ifcount==max_idling:
451print'idlingexitPython开发Http代理服务器  socketref,呆在autonavi.com  C++博客'
452break#指定时间内都接收不到双向数据便退出循环20*3=60secs
453
454
455do_HEAD=do_GET
456do_POST=do_GET
457do_PUT=do_GET
458do_DELETE=do_GET
459
460classThreadingHTTPServer(SocketServer.ThreadingMixIn,
461BaseHTTPServer.HTTPServer):pass
462
463
464
465
466defserving(HandlerClass,
467ServerClass,protocol="HTTP/1.0"):
468
469iflen(sys.argv)<2orsys.argv[1]!='www.sw2us.com':
470sys.exit()
471
472ifsys.argv[2:]:
473port=int(sys.argv[2])
474else:
475
476port=confile.getPropertyValueAsInt('httpport',8000)
477
478#port=8000
479
480server_address=('',port)
481
482HandlerClass.protocol_version=protocol
483httpd=ServerClass(server_address,HandlerClass)
484
485sa=httpd.socket.getsockname()
486print"www.sw2us.com@2010v.1.0.0"
487print"ServingHTTPon",sa[0],"port",sa[1],"Python开发Http代理服务器  socketref,呆在autonavi.com  C++博客"
488sys.stdout=buff
489sys.stderr=buff
490
491httpd.serve_forever()
492
493
494
495if__name__=='__main__':
496#getMacList()
497fromsysimportargv
498
499f=open('proxy.pid','w')
500f.write(str(os.getpid()))
501f.close()
502
503#ProxyHandler.allowed_clients=[]
504try:
505allowed=[]
506ss=confile.getPropertyValue('allowed_clients').strip()
507hosts=ss.split(',')
508forhinhosts:
509ifh:
510client=socket.gethostbyname(h.strip())
511allowed.append(client)
512iflen(allowed):
513ProxyHandler.allowed_clients=allowed
514buff=StringIO.StringIO()
515
516serving(ProxyHandler,ThreadingHTTPServer)
517except:
518pass