XpressEngine Core  1.11.2
 All Classes Namespaces Files Functions Variables Pages
extract.class.php
Go to the documentation of this file.
1 <?php
2 /* Copyright (C) NAVER <http://www.navercorp.com> */
11 class extract
12 {
17  var $key = '';
22  var $cache_path = './files/cache/importer';
27  var $cache_index_file = './files/cache/importer';
32  var $filename = null;
37  var $startTag = '';
42  var $endTag = '';
47  var $itemStartTag = '';
52  var $itemEndTag = '';
53 
58  var $fd = null;
63  var $index_fd = null;
64 
69  var $isStarted = false;
74  var $isFinished = true;
75 
80  var $buff = 0;
81 
86  var $index = 0;
87 
97  function set($filename, $startTag, $endTag, $itemTag, $itemEndTag)
98  {
99  $this->filename = $filename;
100 
101  $this->startTag = $startTag;
102  if($endTag) $this->endTag = $endTag;
103  $this->itemStartTag = $itemTag;
104  $this->itemEndTag = $itemEndTag;
105 
106  $this->key = md5($filename);
107 
108  $this->cache_path = './files/cache/importer/'.$this->key;
109  $this->cache_index_file = $this->cache_path.'/index';
110 
111  if(!is_dir($this->cache_path)) FileHandler::makeDir($this->cache_path);
112 
113  return $this->openFile();
114  }
115 
120  function openFile()
121  {
122  FileHandler::removeFile($this->cache_index_file);
123  $this->index_fd = fopen($this->cache_index_file,"a");
124  // If local file
125  if(strncasecmp('http://', $this->filename, 7) !== 0)
126  {
127  if(!file_exists($this->filename)) return new BaseObject(-1,'msg_no_xml_file');
128  $this->fd = fopen($this->filename,"r");
129  // If remote file
130  }
131  else
132  {
133  $url_info = parse_url($this->filename);
134  if(!$url_info['port']) $url_info['port'] = 80;
135  if(!$url_info['path']) $url_info['path'] = '/';
136 
137  $this->fd = @fsockopen($url_info['host'], $url_info['port']);
138  if(!$this->fd) return new BaseObject(-1,'msg_no_xml_file');
139  // If the file name contains Korean, do urlencode(iconv required)
140  $path = $url_info['path'];
141  if(preg_match('/[\xEA-\xED][\x80-\xFF]{2}/', $path)&&function_exists('iconv'))
142  {
143  $path_list = explode('/',$path);
144  $cnt = count($path_list);
145  $filename = $path_list[$cnt-1];
146  $filename = urlencode(iconv("UTF-8","EUC-KR",$filename));
147  $path_list[$cnt-1] = $filename;
148  $path = implode('/',$path_list);
149  $url_info['path'] = $path;
150  }
151 
152  $header = sprintf("GET %s?%s HTTP/1.0\r\nHost: %s\r\nReferer: %s://%s\r\nConnection: Close\r\n\r\n", $url_info['path'], $url_info['query'], $url_info['host'], $url_info['scheme'], $url_info['host']);
153  @fwrite($this->fd, $header);
154  $buff = '';
155  while(!feof($this->fd))
156  {
157  $buff .= $str = fgets($this->fd, 1024);
158  if(!trim($str)) break;
159  }
160  if(preg_match('/404 Not Found/i',$buff)) return new BaseObject(-1,'msg_no_xml_file');
161  }
162 
163  if($this->startTag)
164  {
165  while(!feof($this->fd))
166  {
167  $str = fgets($this->fd, 1024);
168  $pos = strpos($str, $this->startTag);
169  if($pos !== false)
170  {
171  $this->buff = substr($this->buff, $pos+strlen($this->startTag));
172  $this->isStarted = true;
173  $this->isFinished = false;
174  break;
175  }
176  }
177  }
178  else
179  {
180  $this->isStarted = true;
181  $this->isFinished = false;
182  }
183 
184  return new BaseObject();
185  }
186 
191  function closeFile()
192  {
193  $this->isFinished = true;
194  fclose($this->fd);
195  fclose($this->index_fd);
196  }
197 
198  function isFinished()
199  {
200  return $this->isFinished || !$this->fd || feof($this->fd);
201  }
202 
207  function saveItems()
208  {
209  FileHandler::removeDir($this->cache_path.$this->key);
210  $this->index = 0;
211  while(!$this->isFinished())
212  {
213  $this->getItem();
214  }
215  }
216 
222  {
223  $this->saveItems();
224 
225  $filename = sprintf('%s/%s', $this->cache_path, $filename);
226 
227  $index_fd = fopen($this->cache_index_file,"r");
228  $fd = fopen($filename,'w');
229 
230  fwrite($fd, '<items>');
231  while(!feof($index_fd))
232  {
233  $target_file = trim(fgets($index_fd,1024));
234  if(!file_exists($target_file)) continue;
235  $buff = FileHandler::readFile($target_file);
236  fwrite($fd, FileHandler::readFile($target_file));
237 
238  FileHandler::removeFile($target_file);
239  }
240  fwrite($fd, '</items>');
241  fclose($fd);
242  }
243 
248  function getItem()
249  {
250  if($this->isFinished()) return;
251 
252  while(!feof($this->fd))
253  {
254  $startPos = strpos($this->buff, $this->itemStartTag);
255  if($startPos !== false)
256  {
257  $this->buff = substr($this->buff, $startPos);
258  $this->buff = preg_replace("/>/",">\r\n",$this->buff,1);
259  break;
260  }
261  elseif($this->endTag)
262  {
263  $endPos = strpos($this->buff, $this->endTag);
264  if($endPos !== false)
265  {
266  $this->closeFile();
267  return;
268  }
269  }
270  $this->buff .= fgets($this->fd, 1024);
271  }
272 
273  $startPos = strpos($this->buff, $this->itemStartTag);
274  if($startPos === false)
275  {
276  $this->closeFile();
277  return;
278  }
279 
280  $filename = sprintf('%s/%s.xml',$this->cache_path, $this->index++);
281  fwrite($this->index_fd, $filename."\r\n");
282 
283  $fd = fopen($filename,'w');
284 
285  while(!feof($this->fd))
286  {
287  $endPos = strpos($this->buff, $this->itemEndTag);
288  if($endPos !== false)
289  {
290  $endPos += strlen($this->itemEndTag);
291  $buff = substr($this->buff, 0, $endPos);
292  fwrite($fd, $this->_addTagCRTail($buff));
293  fclose($fd);
294  $this->buff = substr($this->buff, $endPos);
295  break;
296  }
297 
298  fwrite($fd, $this->_addTagCRTail($this->buff));
299  $this->buff = fgets($this->fd, 1024);
300  }
301  }
302 
303  function getTotalCount()
304  {
305  return $this->index;
306  }
307 
308  function getKey()
309  {
310  return $this->key;
311  }
312 
313  function _addTagCRTail($str) {
314  $str = preg_replace('/<\/([^>]*)></i', "</$1>\r\n<", $str);
315  return $str;
316  }
317 }
318 /* End of file extract.class.php */
319 /* Location: ./modules/importer/extract.class.php */
removeFile($filename)
set($filename, $startTag, $endTag, $itemTag, $itemEndTag)
mergeItems($filename)
_addTagCRTail($str)
makeDir($path_string)
readFile($filename)