@@ -50,6 +50,59 @@ def get_first_heading(file_path):
50
50
break
51
51
return heading_text
52
52
53
+ def get_description_content (file_path ):
54
+ """Extract description content from index file, starting from 'What is Ensemble?' section."""
55
+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
56
+ in_frontmatter = False
57
+ found_what_is = False
58
+ description_lines = []
59
+ for line in f :
60
+ line_stripped = line .strip ()
61
+
62
+ # Skip frontmatter
63
+ if line_stripped .startswith ("---" ):
64
+ in_frontmatter = not in_frontmatter
65
+ continue
66
+ if in_frontmatter :
67
+ continue
68
+
69
+ # Skip HTML comments
70
+ if line_stripped .startswith ("<!--" ):
71
+ continue
72
+
73
+ # Look for "What is Ensemble?" section
74
+ if line_stripped .startswith ("## What is Ensemble?" ):
75
+ found_what_is = True
76
+ continue
77
+
78
+ # If we found the section, collect content until next section or end
79
+ if found_what_is :
80
+ if line_stripped .startswith ("##" ): # Next section starts
81
+ break
82
+ if line_stripped : # Non-empty line
83
+ description_lines .append (line_stripped )
84
+
85
+ return " " .join (description_lines ) if description_lines else None
86
+
87
+ def get_full_content (file_path ):
88
+ """Extract the full content from a file, skipping YAML frontmatter."""
89
+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
90
+ content_lines = []
91
+ in_frontmatter = False
92
+ for line in f :
93
+ if line .strip ().startswith ("---" ):
94
+ in_frontmatter = not in_frontmatter
95
+ continue
96
+ if in_frontmatter :
97
+ continue
98
+ content_lines .append (line .rstrip ())
99
+
100
+ # Join and clean up the content
101
+ content = '\n ' .join (content_lines ).strip ()
102
+ # Remove any remaining HTML comments
103
+ content = re .sub (r'<!--.*?-->' , '' , content , flags = re .DOTALL )
104
+ return content
105
+
53
106
def resolve_entry_path (dir_path , name ):
54
107
"""
55
108
Given a directory and an entry name from _meta.json, try to resolve it to an actual file or directory.
@@ -71,16 +124,26 @@ def process_dir(dir_path, skip_index=False):
71
124
"""
72
125
meta_file = os .path .join (dir_path , "_meta.json" )
73
126
entries = []
127
+ meta_descriptions = {}
128
+
74
129
if os .path .isfile (meta_file ):
75
130
with open (meta_file , 'r' , encoding = 'utf-8' ) as f :
76
131
meta = json .load (f )
77
132
for key , val in meta .items ():
78
- entries .append ((key , val ))
133
+ if isinstance (val , dict ) and 'description' in val :
134
+ # Store description for this key
135
+ meta_descriptions [key ] = val ['description' ]
136
+ # Extract title if it exists
137
+ title = val .get ('title' , key )
138
+ entries .append ((key , title ))
139
+ else :
140
+ entries .append ((key , val ))
79
141
else :
80
142
for name in sorted (os .listdir (dir_path )):
81
143
if name .startswith ('_' ):
82
144
continue
83
145
entries .append ((name , None ))
146
+
84
147
nodes = []
85
148
for name , title in entries :
86
149
resolved = resolve_entry_path (dir_path , name )
@@ -92,6 +155,9 @@ def process_dir(dir_path, skip_index=False):
92
155
if base in ("training-videos.md" , "training-videos.mdx" ):
93
156
continue
94
157
158
+ # Get description from _meta.json if available
159
+ meta_description = meta_descriptions .get (name )
160
+
95
161
if os .path .isfile (resolved ):
96
162
if skip_index and base in ("index.md" , "index.mdx" ):
97
163
continue
@@ -101,10 +167,12 @@ def process_dir(dir_path, skip_index=False):
101
167
page_title = heading_text if heading_text else name
102
168
if heading_text is None :
103
169
heading_text = page_title
170
+
104
171
nodes .append ({
105
172
"title" : page_title ,
106
173
"path" : resolved ,
107
- "heading" : heading_text
174
+ "heading" : heading_text ,
175
+ "meta_description" : meta_description # Only from _meta.json
108
176
})
109
177
elif os .path .isdir (resolved ):
110
178
group_title = title if isinstance (title , str ) else name
@@ -117,8 +185,10 @@ def process_dir(dir_path, skip_index=False):
117
185
if index_node :
118
186
group_node ["index_path" ] = index_node ["path" ]
119
187
group_node ["heading" ] = index_node .get ("heading" , group_title )
188
+ group_node ["meta_description" ] = meta_description # Only from _meta.json
120
189
else :
121
190
group_node ["heading" ] = group_title
191
+ group_node ["meta_description" ] = meta_description # Only from _meta.json
122
192
nodes .append (group_node )
123
193
return nodes
124
194
@@ -145,6 +215,114 @@ def generate_toc(nodes, depth=0):
145
215
toc_lines .append (f"{ indent } - [{ title } ](#{ anchor } )" )
146
216
return toc_lines
147
217
218
+ def generate_llms_toc (nodes , base_url = "https://docs.ensembleui.com" ):
219
+ """Generate table of contents in llms.txt format (Cursor style)."""
220
+ lines = []
221
+
222
+ for node in nodes :
223
+ if "children" in node :
224
+ # This is a section/group
225
+ section_title = to_sentence_case (node ["title" ])
226
+
227
+ # Add index page if it exists
228
+ if node .get ("index_path" ):
229
+ title = node .get ("heading" , section_title )
230
+ rel_path = os .path .relpath (node ["index_path" ], "pages" )
231
+ url_path = rel_path .replace ("\\ " , "/" ).replace (".mdx" , "" ).replace (".md" , "" )
232
+ if url_path == "index" :
233
+ url_path = ""
234
+ elif url_path .endswith ("/index" ):
235
+ url_path = url_path [:- 6 ]
236
+
237
+ url = f"{ base_url } /{ url_path } " if url_path else base_url
238
+ meta_description = node .get ("meta_description" )
239
+ if meta_description :
240
+ lines .append (f"- [{ title } ]({ url } ): { meta_description } " )
241
+ else :
242
+ lines .append (f"- [{ title } ]({ url } )" )
243
+
244
+ # Add child pages
245
+ for child in node ["children" ]:
246
+ if "path" in child :
247
+ title = child ["title" ]
248
+ rel_path = os .path .relpath (child ["path" ], "pages" )
249
+ url_path = rel_path .replace ("\\ " , "/" ).replace (".mdx" , "" ).replace (".md" , "" )
250
+ url = f"{ base_url } /{ url_path } "
251
+ meta_description = child .get ("meta_description" )
252
+ if meta_description :
253
+ lines .append (f"- [{ title } ]({ url } ): { meta_description } " )
254
+ else :
255
+ lines .append (f"- [{ title } ]({ url } )" )
256
+ else :
257
+ # This is a standalone page at root level
258
+ title = node ["title" ]
259
+ rel_path = os .path .relpath (node ["path" ], "pages" )
260
+ url_path = rel_path .replace ("\\ " , "/" ).replace (".mdx" , "" ).replace (".md" , "" )
261
+ url = f"{ base_url } /{ url_path } "
262
+ meta_description = node .get ("meta_description" )
263
+ if meta_description :
264
+ lines .append (f"- [{ title } ]({ url } ): { meta_description } " )
265
+ else :
266
+ lines .append (f"- [{ title } ]({ url } )" )
267
+
268
+ return lines
269
+
270
+ def collect_all_pages (nodes ):
271
+ """Collect all pages from the structure for full content generation."""
272
+ pages = []
273
+
274
+ for node in nodes :
275
+ if "children" in node :
276
+ # Add index page if it exists
277
+ if node .get ("index_path" ):
278
+ pages .append ({
279
+ "title" : node .get ("heading" , node ["title" ]),
280
+ "path" : node ["index_path" ],
281
+ "url_path" : get_url_path (node ["index_path" ])
282
+ })
283
+
284
+ # Add child pages
285
+ pages .extend (collect_all_pages (node ["children" ]))
286
+ else :
287
+ # This is a standalone page
288
+ pages .append ({
289
+ "title" : node ["title" ],
290
+ "path" : node ["path" ],
291
+ "url_path" : get_url_path (node ["path" ])
292
+ })
293
+
294
+ return pages
295
+
296
+ def get_url_path (file_path ):
297
+ """Convert file path to URL path."""
298
+ rel_path = os .path .relpath (file_path , "pages" )
299
+ url_path = rel_path .replace ("\\ " , "/" ).replace (".mdx" , "" ).replace (".md" , "" )
300
+ if url_path == "index" :
301
+ url_path = ""
302
+ elif url_path .endswith ("/index" ):
303
+ url_path = url_path [:- 6 ]
304
+ return url_path
305
+
306
+ def generate_full_docs (pages , base_url = "https://docs.ensembleui.com" ):
307
+ """Generate full documentation content in llms-full.txt format (Cursor style)."""
308
+ content_blocks = []
309
+
310
+ for page in pages :
311
+ title = page ["title" ]
312
+ file_path = page ["path" ]
313
+ url_path = page ["url_path" ]
314
+
315
+ url = f"{ base_url } /{ url_path } " if url_path else base_url
316
+
317
+ # Get full content
318
+ full_content = get_full_content (file_path )
319
+
320
+ # Format as Cursor does: # Title \n Source: URL \n Content
321
+ block = f"# { title } \n Source: { url } \n { full_content } "
322
+ content_blocks .append (block )
323
+
324
+ return content_blocks
325
+
148
326
def clean_content (lines ):
149
327
"""
150
328
Clean the content lines by:
@@ -243,14 +421,6 @@ def collect_content(nodes, level=1):
243
421
lines .append ("" )
244
422
return lines
245
423
246
- # Base directory settings
247
- repo_root = os .getcwd ()
248
- pages_dir = os .path .join (repo_root , "pages" )
249
-
250
- # Process the pages directory.
251
- structure = process_dir (pages_dir , skip_index = True )
252
-
253
- # Read the root index.mdx content to place it at the beginning.
254
424
def resolve_entry_path_custom (dir_path , name ):
255
425
"""Helper to resolve an index entry from the given dir."""
256
426
for candidate in [name , name + ".md" , name + ".mdx" ]:
@@ -259,13 +429,39 @@ def resolve_entry_path_custom(dir_path, name):
259
429
return path
260
430
return None
261
431
432
+ # Base directory settings
433
+ repo_root = os .getcwd ()
434
+ pages_dir = os .path .join (repo_root , "pages" )
435
+ public_dir = os .path .join (repo_root , "public" )
436
+
437
+ # Ensure public directory exists
438
+ os .makedirs (public_dir , exist_ok = True )
439
+
440
+ # Process the pages directory.
441
+ structure = process_dir (pages_dir , skip_index = True )
442
+
443
+ # Read the root index.mdx content to place it at the beginning.
262
444
index_path = resolve_entry_path_custom (pages_dir , "index" )
263
445
index_lines = []
446
+ main_title = "Ensemble"
447
+ main_description = "Documentation for the Ensemble platform"
448
+
264
449
if index_path and os .path .isfile (index_path ):
265
450
with open (index_path , 'r' , encoding = 'utf-8' ) as f :
266
451
raw_index = f .read ().splitlines ()
267
452
index_lines = clean_content (raw_index )
453
+
454
+ # Extract title for llms.txt
455
+ title_from_index = get_first_heading (index_path )
456
+ if title_from_index :
457
+ main_title = title_from_index
458
+
459
+ # Extract description for llms.txt
460
+ description_content = get_description_content (index_path )
461
+ if description_content :
462
+ main_description = description_content
268
463
464
+ # Generate README.md
269
465
# Assemble the final README content.
270
466
output_lines = []
271
467
if index_lines :
@@ -287,3 +483,40 @@ def resolve_entry_path_custom(dir_path, name):
287
483
out_file .write ("\n " .join (output_lines ))
288
484
289
485
print ("Merged documentation written to README.md" )
486
+
487
+ # Generate llms.txt (table of contents)
488
+ toc_lines = []
489
+ toc_lines .append (f"# { main_title } " )
490
+ toc_lines .append ("" )
491
+ toc_lines .append (f"{ main_description } " )
492
+ toc_lines .append ("" )
493
+ toc_lines .append ("## Docs" )
494
+ toc_lines .append ("" )
495
+
496
+ # Generate TOC links
497
+ toc_content = generate_llms_toc (structure )
498
+ toc_lines .extend (toc_content )
499
+
500
+ # Add optional section at the end (like Cursor does)
501
+ toc_lines .append ("" )
502
+ toc_lines .append ("## Optional" )
503
+ toc_lines .append ("" )
504
+ toc_lines .append ("- [Website](https://ensembleui.com/)" )
505
+ toc_lines .append ("- [Ensemble Studio](https://studio.ensembleui.com/)" )
506
+
507
+ # Write llms.txt
508
+ llms_txt_path = os .path .join (public_dir , "llms.txt" )
509
+ with open (llms_txt_path , 'w' , encoding = 'utf-8' ) as f :
510
+ f .write ("\n " .join (toc_lines ))
511
+
512
+ # Generate llms-full.txt (full content)
513
+ all_pages = collect_all_pages (structure )
514
+ full_content_blocks = generate_full_docs (all_pages )
515
+
516
+ llms_full_txt_path = os .path .join (public_dir , "llms-full.txt" )
517
+ with open (llms_full_txt_path , 'w' , encoding = 'utf-8' ) as f :
518
+ f .write ("\n " .join (full_content_blocks ))
519
+
520
+ print (f"Generated { llms_txt_path } successfully!" )
521
+ print (f"Generated { llms_full_txt_path } successfully!" )
522
+ print (f"Total pages in full docs: { len (all_pages )} " )
0 commit comments