Changeset 156 for table_parser
- Timestamp:
- 21/11/10 01:21:34 (18 months ago)
- File:
-
- 1 edited
-
table_parser/table_parser.py (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
table_parser/table_parser.py
r155 r156 32 32 def append(self,item): 33 33 if self.data != None: 34 print "Overwriting %s"%self.data34 print "Overwriting %s"%self.data 35 35 self.data = item 36 36 … … 42 42 def __init__(self, parser=None): 43 43 """ 44 The parser is a method which will be passed the doc at the end45 of the parsing. Useful if TableParser is within an inner loop and46 you want to automatically process the document. If it is omitted then47 it will do nothing48 """44 The parser is a method which will be passed the doc at the end 45 of the parsing. Useful if TableParser is within an inner loop and 46 you want to automatically process the document. If it is omitted then 47 it will do nothing 48 """ 49 49 self._tag = None 50 self._buf = None51 self._attrs = None52 self.doc = None # Where the document will be stored53 self._stack = None54 self._parser = parser55 self.reset()50 self._buf = None 51 self._attrs = None 52 self.doc = None # Where the document will be stored 53 self._stack = None 54 self._parser = parser 55 self.reset() 56 56 return 57 57 58 58 def reset(self): 59 59 HTMLParser.reset(self) 60 self.doc = []61 self._stack = [self.doc]62 self._buf = ''60 self.doc = [] 61 self._stack = [self.doc] 62 self._buf = '' 63 63 64 64 def close(self): 65 65 HTMLParser.close(self) 66 if self._parser != None:67 self._parser(self.doc)66 if self._parser != None: 67 self._parser(self.doc) 68 68 69 69 def handle_starttag(self, tag, attrs): 70 70 self._tag = tag 71 self._attrs = attrs72 if lower(tag) == 'table':73 self._buf = ''71 self._attrs = attrs 72 if lower(tag) == 'table': 73 self._buf = '' 74 74 self._stack.append(Table()) 75 elif lower(tag) == 'tr':76 self._buf = ''75 elif lower(tag) == 'tr': 76 self._buf = '' 77 77 self._stack.append(Row()) 78 elif lower(tag) == 'td':79 self._buf = ''78 elif lower(tag) == 'td': 79 self._buf = '' 80 80 self._stack.append(Cell()) 81 81 … … 83 83 84 84 def handle_endtag(self, tag): 85 if lower(tag) == 'table':86 t = None87 while not isinstance(t, Table):85 if lower(tag) == 'table': 86 t = None 87 while not isinstance(t, Table): 88 88 t = self._stack.pop() 89 r = top(self._stack)89 r = top(self._stack) 90 90 r.append(t) 91 91 92 elif lower(tag) == 'tr':93 t = None94 while not isinstance(t, Row):92 elif lower(tag) == 'tr': 93 t = None 94 while not isinstance(t, Row): 95 95 t = self._stack.pop() 96 r = top(self._stack)96 r = top(self._stack) 97 97 r.append(t) 98 98 99 elif lower(tag) == 'td':100 c = None101 while not isinstance(c, Cell):99 elif lower(tag) == 'td': 100 c = None 101 while not isinstance(c, Cell): 102 102 c = self._stack.pop() 103 t = top(self._stack)104 if isinstance(t, Row):105 # We can not currently have text and other table elements in the same cell.106 # Table elements get precedence107 if c.data == None:103 t = top(self._stack) 104 if isinstance(t, Row): 105 # We can not currently have text and other table elements in the same cell. 106 # Table elements get precedence 107 if c.data == None: 108 108 t.append(self._buf) 109 else:110 t.append(c.data)111 else:112 print "Cell not in a row, rather in a %s"%t109 else: 110 t.append(c.data) 111 else: 112 print "Cell not in a row, rather in a %s"%t 113 113 self._tag = None 114 114 #print "Encountered the end of a %s tag" % tag
Note: See TracChangeset
for help on using the changeset viewer.
