Skip to content

Commit 0465bd0

Browse files
committed
Merge branch 'master' of git://github.com/smeeckaert/php-humanizer into smeeckaert-master
Conflicts: spec/Coduo/PHPHumanizer/StringSpec.php src/Coduo/PHPHumanizer/String.php
2 parents 4a6ed1f + d5df1e4 commit 0465bd0

File tree

10 files changed

+296
-4
lines changed

10 files changed

+296
-4
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ echo String::truncate($text, strlen($text)); // "Lorem ipsum dolorem si amet, lo
4646

4747
```
4848

49+
**Truncate HTML**
50+
51+
Truncate and HTML string to word closest to a certain length
52+
53+
```php
54+
use Coduo\PHPHumanizer\String;
55+
56+
$text = '<p><b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup language</a> used to create <a href="/wiki/Web_page" title="Web page">web pages</a>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span>[</span>1<span>]</span></a></sup> <a href="/wiki/Web_browser" title="Web browser">Web browsers</a> can read HTML files and render them into visible or audible web pages. HTML describes the structure of a <a href="/wiki/Website" title="Website">website</a> <a href="/wiki/Semantic" title="Semantic" class="mw-redirect">semantically</a> along with cues for presentation, making it a markup language, rather than a <a href="/wiki/Programming_language" title="Programming language">programming language</a>.</p>';
57+
58+
echo String::truncateHtml($text, 3); // "<b>HyperText</b>"
59+
echo String::truncateHtml($text, 12, ''); // "HyperText Markup"
60+
echo String::truncateHtml($text, 50, '', '...'); // "HyperText Markup Language, commonly referred to as..."
61+
echo String::truncateHtml($text, 75, '<b><i><u><em><strong><a><span>', '...'); // '<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup...</a>'
62+
63+
```
64+
4965
## Number
5066

5167
**Ordinalize**
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?php
2+
3+
namespace spec\Coduo\PHPHumanizer\String;
4+
5+
use Coduo\PHPHumanizer\String\WordBreakpoint;
6+
use PhpSpec\ObjectBehavior;
7+
use Prophecy\Argument;
8+
9+
class WordBreakpointSpec extends ObjectBehavior
10+
{
11+
12+
function it_calculate_breakpoint_position_when_sentence_is_longer_than_characters_count()
13+
{
14+
$this->calculatePosition('Lorem ipsum dolorem', 2)->shouldReturn(5);
15+
$this->calculatePosition('Lorem ipsum dolorem', 4)->shouldReturn(5);
16+
$this->calculatePosition('Lorem ipsum dolorem', 5)->shouldReturn(5);
17+
$this->calculatePosition('Lorem ipsum dolorem', 10)->shouldReturn(11);
18+
$this->calculatePosition('Lorem ipsum dolorem', -2)->shouldReturn(19);
19+
$this->calculatePosition('Lorem ipsum dolorem', 0)->shouldReturn(5);
20+
}
21+
22+
function it_calculate_breakpoint_position_when_sentence_is_shorter_than_characters_count()
23+
{
24+
$this->calculatePosition('Lorem ipsum dolorem', 20)->shouldReturn(19);
25+
}
26+
27+
function it_calculate_breakpoint_position_when_characters_count_ends_in_last_word()
28+
{
29+
$this->calculatePosition('Lorem ipsum', 7)->shouldReturn(11);
30+
}
31+
32+
function it_calculate_breakpoint_position_when_characters_count_ends_in_last_space()
33+
{
34+
$this->calculatePosition('Lorem ipsum', 5)->shouldReturn(5);
35+
}
36+
}

src/Coduo/PHPHumanizer/String.php

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
namespace Coduo\PHPHumanizer;
44

55
use Coduo\PHPHumanizer\String\Humanize;
6-
use Coduo\PHPHumanizer\String\Truncate;
6+
use Coduo\PHPHumanizer\String\TextTruncate;
7+
use Coduo\PHPHumanizer\String\HtmlTruncate;
8+
use Coduo\PHPHumanizer\String\WordBreakpoint;
79

810
class String
911
{
@@ -12,6 +14,7 @@ class String
1214
* @param bool|true $capitalize
1315
* @param string $separator
1416
* @param array $forbiddenWords
17+
*
1518
* @return string
1619
*/
1720
public static function humanize($text, $capitalize = true, $separator = '_', array $forbiddenWords = array())
@@ -23,10 +26,28 @@ public static function humanize($text, $capitalize = true, $separator = '_', arr
2326
* @param $text
2427
* @param $charactersCount
2528
* @param string $append
29+
*
2630
* @return string
2731
*/
2832
public static function truncate($text, $charactersCount, $append = '')
2933
{
30-
return (string) new Truncate($text, $charactersCount, $append);
34+
$truncate = new TextTruncate(new WordBreakpoint(), $append);
35+
36+
return $truncate->truncate($text, $charactersCount);
37+
}
38+
39+
/**
40+
* @param $text
41+
* @param $charactersCount
42+
* @param string $allowedTags
43+
* @param string $append
44+
*
45+
* @return string
46+
*/
47+
public static function truncateHtml($text, $charactersCount, $allowedTags = '', $append = '')
48+
{
49+
$truncate = new HtmlTruncate(new WordBreakpoint(), $allowedTags, $append);
50+
51+
return $truncate->truncate($text, $charactersCount);
3152
}
3253
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
namespace Coduo\PHPHumanizer\String;
4+
5+
interface Breakpoint
6+
{
7+
/**
8+
* Return the length of the truncated $text depending on the $characterCount
9+
*
10+
* @param string $text
11+
* @param int $charactersCount
12+
* @param int $charactersCount
13+
*
14+
* @return int
15+
*/
16+
public function calculatePosition($text, $charactersCount);
17+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
<?php
2+
3+
namespace Coduo\PHPHumanizer\String;
4+
5+
class HtmlTruncate implements TruncateInterface
6+
{
7+
/**
8+
* @var string
9+
*/
10+
private $append;
11+
12+
/**
13+
* @var string
14+
*/
15+
private $allowedTags;
16+
17+
/**
18+
* @var Breakpoint
19+
*/
20+
private $breakpoint;
21+
22+
/**
23+
* @param Breakpoint $breakpoint
24+
* @param string $allowedTags
25+
* @param string $append
26+
*/
27+
public function __construct(Breakpoint $breakpoint, $allowedTags = '', $append = '')
28+
{
29+
$this->breakpoint = $breakpoint;
30+
$this->append = $append;
31+
$this->allowedTags = $allowedTags;
32+
}
33+
34+
/**
35+
* @return string
36+
*/
37+
public function truncate($text, $charactersCount)
38+
{
39+
$strippedText = strip_tags($text, $this->allowedTags);
40+
41+
return $this->truncateHtml($strippedText, $charactersCount);
42+
}
43+
44+
/**
45+
* Truncates a string to the given length. It will optionally preserve
46+
* HTML tags if $is_html is set to true.
47+
*
48+
* Adapted from FuelPHP Str::truncate (https://github.com/fuelphp/common/blob/master/src/Str.php)
49+
*
50+
* @param string $string
51+
* @param int $charactersCount
52+
*
53+
* @return string the truncated string
54+
*/
55+
private function truncateHtml($string, $charactersCount)
56+
{
57+
$limit = $charactersCount;
58+
$offset = 0;
59+
$tags = array();
60+
61+
// Handle special characters.
62+
preg_match_all('/&[a-z]+;/i', strip_tags($string), $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
63+
foreach ($matches as $match) {
64+
if ($match[0][1] >= $limit) {
65+
break;
66+
}
67+
$limit += (mb_strlen($match[0][0]) - 1);
68+
}
69+
70+
// Handle all the html tags.
71+
preg_match_all('/<[^>]+>([^<]*)/', $string, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
72+
foreach ($matches as $match) {
73+
if ($match[0][1] - $offset >= $limit) {
74+
break;
75+
}
76+
77+
$tag = mb_substr(strtok($match[0][0], " \t\n\r\0\x0B>"), 1);
78+
if ($tag[0] != '/') {
79+
$tags[] = $tag;
80+
} elseif (end($tags) == mb_substr($tag, 1)) {
81+
array_pop($tags);
82+
}
83+
84+
$offset += $match[1][1] - $match[0][1];
85+
}
86+
87+
$newString = mb_substr($string, 0, $limit = min(mb_strlen($string), $this->breakpoint->calculatePosition($string, $limit + $offset)));
88+
$newString .= (mb_strlen($string) > $limit ? $this->append : '');
89+
$newString .= (count($tags = array_reverse($tags)) ? '</'.implode('></', $tags).'>' : '');
90+
91+
return $newString;
92+
}
93+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<?php
2+
3+
namespace Coduo\PHPHumanizer\String;
4+
5+
class TextTruncate implements TruncateInterface
6+
{
7+
/**
8+
* @var string
9+
*/
10+
private $append;
11+
12+
/**
13+
* @var Breakpoint
14+
*/
15+
private $breakpoint;
16+
17+
/**
18+
* @param Breakpoint $breakpoint
19+
* @param string $append
20+
*/
21+
public function __construct(Breakpoint $breakpoint, $append = '')
22+
{
23+
$this->breakpoint = $breakpoint;
24+
$this->append = $append;
25+
}
26+
27+
/**
28+
* @param string $text
29+
* @param int $charactersCount
30+
* @return string
31+
*/
32+
public function truncate($text, $charactersCount)
33+
{
34+
if ($charactersCount < 0 || mb_strlen($text) <= $charactersCount) {
35+
return $text;
36+
}
37+
38+
$truncatedText = rtrim(mb_substr($text, 0, $this->breakpoint->calculatePosition($text, $charactersCount)));
39+
40+
return ($truncatedText === $text) ? $truncatedText : $truncatedText . $this->append;
41+
}
42+
}

src/Coduo/PHPHumanizer/String/Truncate.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
namespace Coduo\PHPHumanizer\String;
44

5+
/**
6+
* @deprecated since 1.0 use Coduo\PHPHumanizer\String\TextTruncate or Coduo\PHPHumanizer\String\HtmlTruncate instead
7+
*/
58
class Truncate
69
{
710
/**
@@ -44,4 +47,4 @@ public function __toString()
4447

4548
return rtrim(mb_substr($this->text, 0, $length)).$this->append;
4649
}
47-
}
50+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
namespace Coduo\PHPHumanizer\String;
4+
5+
interface TruncateInterface
6+
{
7+
/**
8+
* @param string $text
9+
* @param int $charactersCount
10+
* @return string mixed
11+
*/
12+
public function truncate($text, $charactersCount);
13+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
3+
namespace Coduo\PHPHumanizer\String;
4+
5+
class WordBreakpoint implements Breakpoint
6+
{
7+
public function calculatePosition($text, $charactersCount)
8+
{
9+
if ($charactersCount < 0) {
10+
return mb_strlen($text);
11+
}
12+
13+
if ($charactersCount > mb_strlen($text)) {
14+
return mb_strlen($text);
15+
}
16+
17+
$breakpoint = mb_strpos($text, ' ', $charactersCount);
18+
19+
if (false === $breakpoint) {
20+
return mb_strlen($text);
21+
}
22+
23+
return $breakpoint;
24+
}
25+
}

tests/Coduo/PHPHumanizer/Tests/StringTest.php

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ function test_truncate_string_to_word_closest_to_a_certain_number_of_characters(
3434
$this->assertEquals($expected, String::truncate($text, $charactersCount, $append));
3535
}
3636

37+
function it_truncate_string_to_word_closest_to_a_certain_number_of_characters_with_html_tags($text, $charactersCount, $allowedTags, $expected, $append = '')
38+
{
39+
$this->assertEquals($expected, String::truncateHtml($text, $charactersCount, $allowedTags, $append));
40+
}
41+
3742
/**
3843
*
3944
* @return array
@@ -71,11 +76,32 @@ public function truncateStringProvider()
7176
array($shortText, "Short...", 3, '...'),
7277
array($shortText, "Short...", 4, '...'),
7378
array($shortText, "Short...", 5, '...'),
74-
array($shortText, "Short...", 6, '...'),
79+
array($shortText, "Short text", 6, '...'),
7580
array($shortText, "Short text", 7, '...'),
7681
array($shortText, "Short text", 8, '...'),
7782
array($shortText, "Short text", 9, '...'),
7883
array($shortText, "Short text", 10, '...')
7984
);
8085
}
86+
87+
public function truncateHtmlStringProvider()
88+
{
89+
$text = '<p><b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup language</a> used to create <a href="/wiki/Web_page" title="Web page">web pages</a>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span>[</span>1<span>]</span></a></sup> <a href="/wiki/Web_browser" title="Web browser">Web browsers</a> can read HTML files and render them into visible or audible web pages. HTML describes the structure of a <a href="/wiki/Website" title="Website">website</a> <a href="/wiki/Semantic" title="Semantic" class="mw-redirect">semantically</a> along with cues for presentation, making it a markup language, rather than a <a href="/wiki/Programming_language" title="Programming language">programming language</a>.</p>';
90+
91+
return array(
92+
array($text, 3, '<b><i><u><em><strong><a><span>', "<b>HyperText</b>"),
93+
array($text, 12, '<b><i><u><em><strong><a><span>', "<b>HyperText Markup</b>"),
94+
array($text, 30, '<b><i><u><em><strong><a><span>', "<b>HyperText Markup Language</b>, commonly"),
95+
array($text, 50, '<b><i><u><em><strong><a><span>', "<b>HyperText Markup Language</b>, commonly referred to as"),
96+
array($text, 75, '<b><i><u><em><strong><a><span>', '<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup</a>'),
97+
array($text, 100,'<b><i><u><em><strong><a><span>', '<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup language</a> used to create'),
98+
array($text, 3 , '', "HyperText"),
99+
array($text, 12 , '', "HyperText Markup"),
100+
array($text, 50 , '', "HyperText Markup Language, commonly referred to as"),
101+
array($text, 75 , '', "HyperText Markup Language, commonly referred to as HTML, is the standard markup"),
102+
array($text, 100, '', "HyperText Markup Language, commonly referred to as HTML, is the standard markup language used to create"),
103+
array($text, 50, '', "HyperText Markup Language, commonly referred to as...", '...'),
104+
array($text, 75, '<b><i><u><em><strong><a><span>', '<b>HyperText Markup Language</b>, commonly referred to as <b>HTML</b>, is the standard <a href="/wiki/Markup_language" title="Markup language">markup...</a>', '...')
105+
);
106+
}
81107
}

0 commit comments

Comments
 (0)