@@ -11,8 +11,9 @@ defmodule PdfExtractor do
1111
1212 # Client
1313
14- def start_link ( [ ] = _opts \\ [ ] ) do
15- GenServer . start_link ( __MODULE__ , [ ] , name: __MODULE__ )
14+ def start_link ( opts \\ [ ] ) do
15+ opts = Keyword . validate! ( opts , name: __MODULE__ )
16+ GenServer . start_link ( __MODULE__ , [ ] , name: opts [ :name ] )
1617 end
1718
1819 @ doc ~S"""
@@ -47,9 +48,18 @@ defmodule PdfExtractor do
4748 "✂\nReceipt Payment part Account / Payable to\nCH4431999123000889012\n✂\nMax Muster & Söhne\nAccount / Payable to\nCH4431999123000889012 Musterstrasse 123\nMax Muster & Söhne 8000 Seldwyla\nMusterstrasse 123\n8000 Seldwyla\nReference\n210000000003139471430009017\nReference\n210000000003139471430009017\nAdditional information\nBestellung vom 15.10.2020\nPayable by (name/address)\nSimon Muster\nPayable by (name/address)\nMusterstrasse 1\nCurrency Amount\nSimon Muster\n8000 Seldwyla\nCHF 1 949.75 Musterstrasse 1\n8000 Seldwyla\nCurrency Amount\nCHF 1 949.75\nAcceptance point"
4849 }}
4950
51+ Extract text from only some pages.
52+
53+ iex> PdfExtractor.extract_text("priv/fixtures/fatura.pdf", [0])
54+ {:ok,
55+ %{
56+ 0 =>
57+ "Text Example Bill FATURA\n# 2025010002\nData: Jun 21, 2025\nProjeto de lei para:\nSaldo devedor: 1 525,59 €\nElixir Company\nItem Quantidade Avaliar Quantia\nTrabalho 1 1 500,00 € 1 500,00 €\nMais trabalho 1 25,59 € 25,59 €\nSubtotal: 1 525,59 €\nImposto (0%): 0,00 €\nTotal: 1 525,59 €"
58+ }}
59+
5060 Extract only the titles in the book chapters.
5161
52- iex> PdfExtractor.extract_text("priv/fixtures/book.pdf", [2, 8, 10], %{
62+ iex> PdfExtractor.extract_text("priv/fixtures/book.pdf", %{
5363 ...> 2 => {0, 0, 612, 190},
5464 ...> 8 => {0, 0, 612, 190},
5565 ...> 10 => {0, 0, 612, 190}
@@ -63,7 +73,7 @@ defmodule PdfExtractor do
6373
6474 Extract multiple areas from a single page.
6575
66- iex> PdfExtractor.extract_text("priv/fixtures/book.pdf", 1, %{
76+ iex> PdfExtractor.extract_text("priv/fixtures/book.pdf", %{
6777 ...> 1 => [{0, 100, 612, 140}, {0, 400, 612, 440}]
6878 ...> })
6979 {:ok,
@@ -74,8 +84,8 @@ defmodule PdfExtractor do
7484 ]
7585 }}
7686 """
77- def extract_text ( file_path , page_numbers \\ [ ] , areas \\ % { } ) do
78- GenServer . call ( __MODULE__ , { :extract_text , [ file_path , page_numbers , areas ] } )
87+ def extract_text ( file_path , pages \\ [ ] ) do
88+ GenServer . call ( __MODULE__ , { :extract_text , [ file_path , pages ] } )
7989 end
8090
8191 @ doc ~S"""
@@ -99,11 +109,21 @@ defmodule PdfExtractor do
99109 "✂\nReceipt Payment part Account / Payable to\nCH4431999123000889012\n✂\nMax Muster & Söhne\nAccount / Payable to\nCH4431999123000889012 Musterstrasse 123\nMax Muster & Söhne 8000 Seldwyla\nMusterstrasse 123\n8000 Seldwyla\nReference\n210000000003139471430009017\nReference\n210000000003139471430009017\nAdditional information\nBestellung vom 15.10.2020\nPayable by (name/address)\nSimon Muster\nPayable by (name/address)\nMusterstrasse 1\nCurrency Amount\nSimon Muster\n8000 Seldwyla\nCHF 1 949.75 Musterstrasse 1\n8000 Seldwyla\nCurrency Amount\nCHF 1 949.75\nAcceptance point"
100110 }}
101111
112+ Extract text from only some pages.
113+
114+ iex> content = File.read!("priv/fixtures/fatura.pdf")
115+ ...> PdfExtractor.extract_text_from_binary(content, [0])
116+ {:ok,
117+ %{
118+ 0 =>
119+ "Text Example Bill FATURA\n# 2025010002\nData: Jun 21, 2025\nProjeto de lei para:\nSaldo devedor: 1 525,59 €\nElixir Company\nItem Quantidade Avaliar Quantia\nTrabalho 1 1 500,00 € 1 500,00 €\nMais trabalho 1 25,59 € 25,59 €\nSubtotal: 1 525,59 €\nImposto (0%): 0,00 €\nTotal: 1 525,59 €"
120+ }}
121+
102122 Extract only the titles in the book chapters.
103123
104124 iex> content = File.read!("priv/fixtures/book.pdf")
105125 ...>
106- ...> PdfExtractor.extract_text_from_binary(content, [2, 8, 10], %{
126+ ...> PdfExtractor.extract_text_from_binary(content, %{
107127 ...> 2 => {0, 0, 612, 190},
108128 ...> 8 => {0, 0, 612, 190},
109129 ...> 10 => {0, 0, 612, 190}
@@ -119,7 +139,7 @@ defmodule PdfExtractor do
119139
120140 iex> content = File.read!("priv/fixtures/book.pdf")
121141 ...>
122- ...> PdfExtractor.extract_text_from_binary(content, 1, %{
142+ ...> PdfExtractor.extract_text_from_binary(content, %{
123143 ...> 1 => [{0, 100, 612, 140}, {0, 400, 612, 440}]
124144 ...> })
125145 {:ok,
@@ -131,8 +151,8 @@ defmodule PdfExtractor do
131151 }}
132152
133153 """
134- def extract_text_from_binary ( binary , page_numbers \\ [ ] , areas \\ % { } ) do
135- GenServer . call ( __MODULE__ , { :extract_text_from_binary , [ binary , page_numbers , areas ] } )
154+ def extract_text_from_binary ( binary , pages \\ [ ] ) do
155+ GenServer . call ( __MODULE__ , { :extract_text_from_binary , [ binary , pages ] } )
136156 end
137157
138158 @ doc """
0 commit comments