Skip to content

Commit 8cdeaef

Browse files
author
Martin Smeeckaert
committed
truncate html
1 parent 1f92438 commit 8cdeaef

File tree

5 files changed

+138
-8
lines changed

5 files changed

+138
-8
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ echo String::truncate($text, strlen($text)); // "Lorem ipsum dolorem si amet, lo
4646

4747
```
4848

49+
**Truncate HTML**
50+
51+
Truncate and HTML string to word closest to a certain length
52+
53+
```php
54+
use Coduo\PHPHumanizer\String;
55+
56+
$text = '<p><b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup language</a> used to create <a href="/wiki/Web_page" title="Web page">web pages</a>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span>[</span>1<span>]</span></a></sup> <a href="/wiki/Web_browser" title="Web browser">Web browsers</a> can read HTML files and render them into visible or audible web pages. HTML describes the structure of a <a href="/wiki/Website" title="Website">website</a> <a href="/wiki/Semantic" title="Semantic" class="mw-redirect">semantically</a> along with cues for presentation, making it a markup language, rather than a <a href="/wiki/Programming_language" title="Programming language">programming language</a>.</p>';
57+
58+
echo String::truncateHtml($text, 3); // "<b>HyperText</b>"
59+
echo String::truncateHtml($text, 12, ''); // "HyperText Markup"
60+
echo String::truncateHtml($text, 50, '', '...); // "HyperText Markup Language, commonly referred to as..."
61+
echo String::truncateHtml($text, 75, '<b><i><u><em><strong><a><span>', '...'); // '<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup...</a>'
62+
63+
```
64+
4965
## Number
5066

5167
**Ordinalize**

spec/Coduo/PHPHumanizer/StringSpec.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,30 @@ function it_truncate_string_to_word_closest_to_a_certain_number_of_characters()
3333
$this->truncate($text, 0, '...')->shouldReturn("Lorem...");
3434
$this->truncate($text, -2)->shouldReturn($text);
3535
}
36+
37+
38+
function it_truncate_string_to_word_closest_to_a_certain_number_of_characters_with_html_tags()
39+
{
40+
$text = '<p><b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup language</a> used to create <a href="/wiki/Web_page" title="Web page">web pages</a>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span>[</span>1<span>]</span></a></sup> <a href="/wiki/Web_browser" title="Web browser">Web browsers</a> can read HTML files and render them into visible or audible web pages. HTML describes the structure of a <a href="/wiki/Website" title="Website">website</a> <a href="/wiki/Semantic" title="Semantic" class="mw-redirect">semantically</a> along with cues for presentation, making it a markup language, rather than a <a href="/wiki/Programming_language" title="Programming language">programming language</a>.</p>';
41+
42+
43+
// Test with allowed tags
44+
$this->truncateHtml($text, 3)->shouldReturn("<b>HyperText</b>");
45+
$this->truncateHtml($text, 12)->shouldReturn("<b>HyperText Markup</b>");
46+
$this->truncateHtml($text, 50)->shouldReturn("<b>HyperText Markup Language</b>, commonly referred to as");
47+
$this->truncateHtml($text, 75)->shouldReturn('<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup</a>');
48+
$this->truncateHtml($text, 100)->shouldReturn('<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup language</a> used to create');
49+
50+
// Test without tags
51+
52+
$this->truncateHtml($text, 3, '')->shouldReturn("HyperText");
53+
$this->truncateHtml($text, 12, '')->shouldReturn("HyperText Markup");
54+
$this->truncateHtml($text, 50, '')->shouldReturn("HyperText Markup Language, commonly referred to as");
55+
$this->truncateHtml($text, 75, '')->shouldReturn('HyperText Markup Language, commonly referred to as HTML, is the standard markup');
56+
$this->truncateHtml($text, 100, '')->shouldReturn('HyperText Markup Language, commonly referred to as HTML, is the standard markup language used to create');
57+
58+
// Test with append
59+
$this->truncateHtml($text, 50, '', '...')->shouldReturn("HyperText Markup Language, commonly referred to as...");
60+
$this->truncateHtml($text, 75, '<b><i><u><em><strong><a><span>', '...')->shouldReturn('<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup...</a>');
61+
}
3662
}

src/Coduo/PHPHumanizer/String.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use Coduo\PHPHumanizer\String\Humanize;
66
use Coduo\PHPHumanizer\String\Truncate;
7+
use Coduo\PHPHumanizer\String\TruncateHtml;
78

89
class String
910
{
@@ -16,4 +17,9 @@ public static function truncate($text, $charactersCount, $append = '')
1617
{
1718
return (string) new Truncate($text, $charactersCount, $append);
1819
}
20+
21+
public static function truncateHtml($text, $charactersCount, $allowedTags = '<b><i><u><em><strong><a><span>', $append = '')
22+
{
23+
return (string) new TruncateHtml($text, $charactersCount, $allowedTags, $append);
24+
}
1925
}

src/Coduo/PHPHumanizer/String/Truncate.php

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ class Truncate
77
/**
88
* @var string
99
*/
10-
private $text;
10+
protected $text;
1111

1212
/**
1313
* @var int
1414
*/
15-
private $charactersCount;
15+
protected $charactersCount;
1616

1717
/**
1818
* @var string
1919
*/
20-
private $append;
20+
protected $append;
2121

2222
/**
2323
* @param string $text
@@ -31,17 +31,22 @@ public function __construct($text, $charactersCount, $append = '')
3131
$this->append = $append;
3232
}
3333

34+
protected function breakpoint($text, $charCount)
35+
{
36+
$length = $charCount;
37+
if (false !== ($breakpoint = mb_strpos($text, ' ', $charCount))) {
38+
$length = $breakpoint;
39+
}
40+
return $length;
41+
}
42+
3443
public function __toString()
3544
{
3645
if ($this->charactersCount < 0 || strlen($this->text) <= $this->charactersCount) {
3746
return $this->text;
3847
}
3948

40-
$length = $this->charactersCount;
41-
if (false !== ($breakpoint = mb_strpos($this->text, ' ', $this->charactersCount))) {
42-
$length = $breakpoint;
43-
}
44-
49+
$length = $this->breakpoint($this->text, $this->charactersCount);
4550
return rtrim(mb_substr($this->text, 0, $length)) . $this->append;
4651
}
4752
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
<?php
2+
3+
namespace Coduo\PHPHumanizer\String;
4+
5+
class TruncateHtml extends Truncate
6+
{
7+
/**
8+
* @var string
9+
*/
10+
protected $allowedTags;
11+
12+
/**
13+
* @param string $text
14+
* @param int $charactersCount
15+
* @param string $allowedTags
16+
* @param string $append
17+
*/
18+
public function __construct($text, $charactersCount, $allowedTags = '<b><i><u><em><strong><a><span>', $append = '')
19+
{
20+
$this->text = $text;
21+
$this->charactersCount = $charactersCount;
22+
$this->append = $append;
23+
$this->allowedTags = $allowedTags;
24+
}
25+
26+
/**
27+
* Truncates a string to the given length. It will optionally preserve
28+
* HTML tags if $is_html is set to true.
29+
*
30+
* Adapted from FuelPHP Str::truncate (https://github.com/fuelphp/common/blob/master/src/Str.php)
31+
*
32+
* @param string $string
33+
*
34+
* @return string the truncated string
35+
*/
36+
protected function truncateHtml($string)
37+
{
38+
$limit = $this->charactersCount;
39+
$continuation = $this->append;
40+
$offset = 0;
41+
$tags = array();
42+
// Handle special characters.
43+
preg_match_all('/&[a-z]+;/i', strip_tags($string), $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
44+
foreach ($matches as $match) {
45+
if ($match[0][1] >= $limit) {
46+
break;
47+
}
48+
$limit += (mb_strlen($match[0][0]) - 1);
49+
}
50+
51+
// Handle all the html tags.
52+
preg_match_all('/<[^>]+>([^<]*)/', $string, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
53+
foreach ($matches as $match) {
54+
if ($match[0][1] - $offset >= $limit) {
55+
break;
56+
}
57+
$tag = mb_substr(strtok($match[0][0], " \t\n\r\0\x0B>"), 1);
58+
if ($tag[0] != '/') {
59+
$tags[] = $tag;
60+
} elseif (end($tags) == mb_substr($tag, 1)) {
61+
array_pop($tags);
62+
}
63+
$offset += $match[1][1] - $match[0][1];
64+
}
65+
66+
$new_string = mb_substr($string, 0, $limit = min(mb_strlen($string), $this->breakpoint($string, $limit + $offset)));
67+
$new_string .= (mb_strlen($string) > $limit ? $continuation : '');
68+
$new_string .= (count($tags = array_reverse($tags)) ? '</'.implode('></', $tags).'>' : '');
69+
return $new_string;
70+
}
71+
72+
public function __toString()
73+
{
74+
$string = strip_tags($this->text, $this->allowedTags);
75+
return $this->truncateHtml($string);
76+
}
77+
}

0 commit comments

Comments
 (0)