-
Notifications
You must be signed in to change notification settings - Fork 5
Closed
Milestone
Description
Output from memory profiler for the 'ENCODE' query.
Note that popular_authors function results in 2.5GB memory allocation.
Line # Mem usage Increment Line Contents
================================================
35 203.3 MiB 203.3 MiB @profile
36 def launch(self, *terms, task=None):
37 """:return full log"""
38
39 203.3 MiB 0.0 MiB try:
40 # Search articles relevant to the terms
41 203.3 MiB 0.0 MiB self.terms = terms
42 219.7 MiB 16.3 MiB self.ids = self.loader.search(*terms, current=1, task=task)
43 219.7 MiB 0.0 MiB self.n_papers = len(self.ids)
44
45 # Nothing found
46 219.7 MiB 0.0 MiB if self.n_papers == 0:
47 raise RuntimeError("Nothing found")
48
49 # Load data about publications, citations and co-citations
50 670.1 MiB 450.5 MiB self.pub_df = self.loader.load_publications(current=2, task=task)
51 670.1 MiB 0.0 MiB if len(self.pub_df) == 0:
52 raise RuntimeError("Nothing found in DB")
53
54 687.6 MiB 17.5 MiB cit_stats_df_from_query = self.loader.load_citation_stats(current=3, task=task)
55 691.8 MiB 4.2 MiB self.cit_stats_df = self.build_cit_df(cit_stats_df_from_query, self.n_papers, current=3.5, task=task)
56 691.8 MiB 0.0 MiB if len(self.cit_stats_df) == 0:
57 raise RuntimeError("Citations stats not found DB")
58
59 691.8 MiB 0.0 MiB self.df, self.min_year, self.max_year, self.citation_years = self.merge_citation_stats(self.pub_df,
60 714.3 MiB 22.5 MiB self.cit_stats_df)
61 714.3 MiB 0.0 MiB if len(self.df) == 0:
62 raise RuntimeError("Failed to merge publications and citations")
63
64 890.8 MiB 176.5 MiB self.cocit_df = self.loader.load_cocitations(current=4, task=task)
65 966.6 MiB 75.8 MiB cocit_grouped_df = self.build_cocit_grouped_df(self.cocit_df)
66 971.8 MiB 5.1 MiB self.CG = self.build_cocitation_graph(cocit_grouped_df, current=5, task=task)
67 971.8 MiB 0.0 MiB if len(self.CG.nodes()) == 0:
68 raise RuntimeError("Failed to build co-citations graph")
69
70 # Perform subtopic analysis and get subtopic descriptions
71 971.8 MiB 0.0 MiB self.df, self.components, self.comp_other, self.pm, self.pmcomp_sizes = self.subtopic_analysis(
72 1001.8 MiB 30.0 MiB self.df, self.CG, current=7, task=task
73 )
74 1097.1 MiB 95.3 MiB self.df_kwd = self.subtopic_descriptions(self.df)
75
76 # Find interesting papers
77 1097.1 MiB 0.0 MiB self.top_cited_papers, self.top_cited_df = self.find_top_cited_papers(self.df, current=8, task=task)
78
79 1097.1 MiB 0.0 MiB self.max_gain_papers, self.max_gain_df = self.find_max_gain_papers(self.df, self.citation_years,
80 1097.1 MiB 0.0 MiB current=9, task=task)
81
82 1097.1 MiB 0.0 MiB self.max_rel_gain_papers, self.max_rel_gain_df = self.find_max_relative_gain_papers(
83 1140.3 MiB 43.2 MiB self.df, self.citation_years, current=10, task=task
84 )
85
86 # Perform subtopic evolution analysis and get subtopic descriptions
87 1140.3 MiB 0.0 MiB self.evolution_df, self.evolution_year_range = self.subtopic_evolution_analysis(self.cocit_df, current=11,
88 1171.7 MiB 31.4 MiB task=task)
89 1171.7 MiB 0.0 MiB self.evolution_kwds = self.subtopic_evolution_descriptions(self.df, self.evolution_df,
90 1567.8 MiB 396.1 MiB self.evolution_year_range, self.terms)
91
92 # Find top journals
93 1218.3 MiB 0.0 MiB self.journal_stats = self.popular_journals(self.df, current=12, task=task)
94
95 # Find top authors
96 3715.9 MiB 2497.7 MiB self.author_stats = self.popular_authors(self.df, current=13, task=task)
97
98 3715.9 MiB 0.0 MiB return self.logger.stream.getvalue()
99 finally:
100 3715.9 MiB 0.0 MiB self.loader.close_connection()
101 3715.9 MiB 0.0 MiB self.logger.remove_handler()
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels