TYPO3  7.6
Parser.php
Go to the documentation of this file.
1 <?php
2 
20 
27 
31 class Parser implements ParserInterface
32 {
36  protected $granularity;
37 
41  protected $opcodes;
42 
46  protected $from_text;
47 
51  protected $from_offset = 0;
52 
56  protected $last_edit;
57 
61  protected $stackpointer = 0;
62 
66  protected $edits = array();
67 
72  {
73  $this->granularity = $granularity;
74 
75  // Set default opcodes generator
76  $this->opcodes = new Opcodes;
77  }
78 
82  public function getGranularity()
83  {
84  return $this->granularity;
85  }
86 
91  {
92  $this->granularity = $granularity;
93  }
94 
98  public function getOpcodes()
99  {
100  return $this->opcodes;
101  }
102 
107  {
108  $this->opcodes = $opcodes;
109  }
110 
114  public function parse($from_text, $to_text)
115  {
116  // Ensure the granularity contains some delimiters
117  if (count($this->granularity) === 0) {
118  throw new GranularityCountException('Granularity contains no delimiters');
119  }
120 
121  // Reset internal parser properties
122  $this->from_text = $from_text;
123  $this->from_offset = 0;
124  $this->last_edit = null;
125  $this->stackpointer = 0;
126  $this->edits = array();
127 
128  // Parse the two string
129  $this->process($from_text, $to_text);
130 
131  // Return processed diff
132  $this->opcodes->setOpcodes($this->edits);
133  return $this->opcodes;
134  }
135 
143  protected function process($from_text, $to_text)
144  {
145  // Lets get parsing
146  $delimiters = $this->granularity[$this->stackpointer++];
147  $has_next_stage = $this->stackpointer < count($this->granularity);
148 
149  // Actually perform diff
150  $diff = $this->diff($from_text, $to_text, $delimiters);
151  $diff = (is_array($diff)) ? $diff : array();
152 
153  foreach ($diff as $fragment) {
154 
155  // increase granularity
156  if ($fragment instanceof Replace && $has_next_stage) {
157  $this->process(
158  substr($this->from_text, $this->from_offset, $fragment->getFromLen()),
159  $fragment->getText()
160  );
161  }
162  // fuse copy ops whenever possible
163  elseif ($fragment instanceof Copy && $this->last_edit instanceof Copy) {
164  $this->edits[count($this->edits)-1]->increase($fragment->getFromLen());
165  $this->from_offset += $fragment->getFromLen();
166  }
167  else {
168  /* $fragment instanceof Copy */
169  /* $fragment instanceof Delete */
170  /* $fragment instanceof Insert */
171  $this->edits[] = $this->last_edit = $fragment;
172  $this->from_offset += $fragment->getFromLen();
173  }
174  }
175 
176  $this->stackpointer--;
177  }
178 
187  protected function diff($from_text, $to_text, $delimiters)
188  {
189  // Empty delimiter means character-level diffing.
190  // In such case, use code path optimized for character-level diffing.
191  if (empty($delimiters)) {
192  return $this->charDiff($from_text, $to_text);
193  }
194 
195  $result = array();
196 
197  // fragment-level diffing
198  $from_text_len = strlen($from_text);
199  $to_text_len = strlen($to_text);
200  $from_fragments = $this->extractFragments($from_text, $delimiters);
201  $to_fragments = $this->extractFragments($to_text, $delimiters);
202 
203  $jobs = array(array(0, $from_text_len, 0, $to_text_len));
204  $cached_array_keys = array();
205 
206 
207  while ($job = array_pop($jobs)) {
208 
209  // get the segments which must be diff'ed
210  list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job;
211 
212  // catch easy cases first
213  $from_segment_length = $from_segment_end - $from_segment_start;
214  $to_segment_length = $to_segment_end - $to_segment_start;
215 
216  if (!$from_segment_length || !$to_segment_length ) {
217 
218  if ( $from_segment_length ) {
219  $result[$from_segment_start * 4] = new Delete($from_segment_length);
220  } else if ( $to_segment_length ) {
221  $result[$from_segment_start * 4 + 1] = new Insert(substr($to_text, $to_segment_start, $to_segment_length));
222  }
223 
224  continue;
225  }
226 
227  // find longest copy operation for the current segments
228  $best_copy_length = 0;
229 
230  $from_base_fragment_index = $from_segment_start;
231  $cached_array_keys_for_current_segment = array();
232 
233  while ( $from_base_fragment_index < $from_segment_end ) {
234 
235  $from_base_fragment = $from_fragments[$from_base_fragment_index];
236  $from_base_fragment_length = strlen($from_base_fragment);
237 
238  // performance boost: cache array keys
239  if (!isset($cached_array_keys_for_current_segment[$from_base_fragment])) {
240 
241  if ( !isset($cached_array_keys[$from_base_fragment]) ) {
242  $to_all_fragment_indices = $cached_array_keys[$from_base_fragment] = array_keys($to_fragments, $from_base_fragment, true);
243  }
244  else {
245  $to_all_fragment_indices = $cached_array_keys[$from_base_fragment];
246  }
247 
248  // get only indices which falls within current segment
249  if ($to_segment_start > 0 || $to_segment_end < $to_text_len) {
250 
251  $to_fragment_indices = array();
252 
253  foreach ($to_all_fragment_indices as $to_fragment_index) {
254 
255  if ($to_fragment_index < $to_segment_start) {
256  continue;
257  }
258 
259  if ($to_fragment_index >= $to_segment_end) {
260  break;
261  }
262 
263  $to_fragment_indices[] = $to_fragment_index;
264  }
265 
266  $cached_array_keys_for_current_segment[$from_base_fragment] = $to_fragment_indices;
267 
268  } else {
269  $to_fragment_indices = $to_all_fragment_indices;
270  }
271 
272  } else {
273  $to_fragment_indices = $cached_array_keys_for_current_segment[$from_base_fragment];
274  }
275 
276  // iterate through collected indices
277  foreach ($to_fragment_indices as $to_base_fragment_index) {
278 
279  $fragment_index_offset = $from_base_fragment_length;
280 
281  // iterate until no more match
282  for (;;) {
283 
284  $fragment_from_index = $from_base_fragment_index + $fragment_index_offset;
285 
286  if ($fragment_from_index >= $from_segment_end) {
287  break;
288  }
289 
290  $fragment_to_index = $to_base_fragment_index + $fragment_index_offset;
291 
292  if ($fragment_to_index >= $to_segment_end) {
293  break;
294  }
295 
296  if ($from_fragments[$fragment_from_index] !== $to_fragments[$fragment_to_index]) {
297  break;
298  }
299 
300  $fragment_length = strlen($from_fragments[$fragment_from_index]);
301  $fragment_index_offset += $fragment_length;
302  }
303 
304  if ($fragment_index_offset > $best_copy_length) {
305  $best_copy_length = $fragment_index_offset;
306  $best_from_start = $from_base_fragment_index;
307  $best_to_start = $to_base_fragment_index;
308  }
309  }
310 
311  $from_base_fragment_index += strlen($from_base_fragment);
312 
313  // If match is larger than half segment size, no point trying to find better
314  // TODO: Really?
315  if ($best_copy_length >= $from_segment_length / 2) {
316  break;
317  }
318 
319  // no point to keep looking if what is left is less than
320  // current best match
321  if ( $from_base_fragment_index + $best_copy_length >= $from_segment_end ) {
322  break;
323  }
324  }
325 
326  if ($best_copy_length) {
327  $jobs[] = array($from_segment_start, $best_from_start, $to_segment_start, $best_to_start);
328  $result[$best_from_start * 4 + 2] = new Copy($best_copy_length);
329  $jobs[] = array($best_from_start + $best_copy_length, $from_segment_end, $best_to_start + $best_copy_length, $to_segment_end);
330  } else {
331  $result[$from_segment_start * 4 ] = new Replace($from_segment_length, substr($to_text, $to_segment_start, $to_segment_length));
332  }
333  }
334 
335  ksort($result, SORT_NUMERIC);
336  return array_values($result);
337  }
338 
346  protected function charDiff($from_text, $to_text)
347  {
348  $result = array();
349  $jobs = array(array(0, strlen($from_text), 0, strlen($to_text)));
350 
351  while ($job = array_pop($jobs)) {
352 
353  // get the segments which must be diff'ed
354  list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job;
355 
356  $from_segment_len = $from_segment_end - $from_segment_start;
357  $to_segment_len = $to_segment_end - $to_segment_start;
358 
359  // catch easy cases first
360  if (!$from_segment_len || !$to_segment_len) {
361 
362  if ($from_segment_len) {
363  $result[$from_segment_start * 4 + 0] = new Delete($from_segment_len);
364  } else if ( $to_segment_len ) {
365  $result[$from_segment_start * 4 + 1] = new Insert(substr($to_text, $to_segment_start, $to_segment_len));
366  }
367 
368  continue;
369  }
370 
371  if ($from_segment_len >= $to_segment_len) {
372 
373  $copy_len = $to_segment_len;
374 
375  while ($copy_len) {
376 
377  $to_copy_start = $to_segment_start;
378  $to_copy_start_max = $to_segment_end - $copy_len;
379 
380  while ($to_copy_start <= $to_copy_start_max) {
381 
382  $from_copy_start = strpos(substr($from_text, $from_segment_start, $from_segment_len), substr($to_text, $to_copy_start, $copy_len));
383 
384  if ($from_copy_start !== false) {
385  $from_copy_start += $from_segment_start;
386  break 2;
387  }
388 
389  $to_copy_start++;
390  }
391 
392  $copy_len--;
393  }
394  } else {
395 
396  $copy_len = $from_segment_len;
397 
398  while ($copy_len) {
399 
400  $from_copy_start = $from_segment_start;
401  $from_copy_start_max = $from_segment_end - $copy_len;
402 
403  while ($from_copy_start <= $from_copy_start_max) {
404 
405  $to_copy_start = strpos(substr($to_text, $to_segment_start, $to_segment_len), substr($from_text, $from_copy_start, $copy_len));
406 
407  if ($to_copy_start !== false) {
408  $to_copy_start += $to_segment_start;
409  break 2;
410  }
411 
412  $from_copy_start++;
413  }
414 
415  $copy_len--;
416  }
417  }
418 
419  // match found
420  if ( $copy_len ) {
421  $jobs[] = array($from_segment_start, $from_copy_start, $to_segment_start, $to_copy_start);
422  $result[$from_copy_start * 4 + 2] = new Copy($copy_len);
423  $jobs[] = array($from_copy_start + $copy_len, $from_segment_end, $to_copy_start + $copy_len, $to_segment_end);
424  }
425  // no match, so delete all, insert all
426  else {
427  $result[$from_segment_start * 4] = new Replace($from_segment_len, substr($to_text, $to_segment_start, $to_segment_len));
428  }
429  }
430 
431  ksort($result, SORT_NUMERIC);
432  return array_values($result);
433  }
434 
446  protected function extractFragments($text, $delimiters)
447  {
448  // special case: split into characters
449  if (empty($delimiters)) {
450  $chars = str_split($text, 1);
451  $chars[strlen($text)] = '';
452 
453  return $chars;
454  }
455 
456  $fragments = array();
457  $start = 0;
458  $end = 0;
459 
460  for (;;) {
461 
462  $end += strcspn($text, $delimiters, $end);
463  $end += strspn($text, $delimiters, $end);
464 
465  if ($end === $start) {
466  break;
467  }
468 
469  $fragments[$start] = substr($text, $start, $end - $start);
470  $start = $end;
471  }
472 
473  $fragments[$start] = '';
474 
475  return $fragments;
476  }
477 }