Module praisonai.ui.context
Functions
def main()
Classes
class ContextGatherer (directory='.', output_file='context.txt', relevant_extensions=None, max_file_size=1000000, max_tokens=900000)
-
Expand source code
class ContextGatherer: def __init__(self, directory='.', output_file='context.txt', relevant_extensions=None, max_file_size=1_000_000, max_tokens=900000): self.directory = directory self.output_file = output_file self.relevant_extensions = relevant_extensions or [ '.py', '.js', '.ts', '.java', '.rb', '.php', '.pl', '.pm', '.c', '.h', '.cpp', '.hpp', '.cs', '.vb', '.swift', '.kt', '.m', '.mm', '.go', '.rs', '.hs', '.r', '.lua', '.sh', '.bat', '.clj', '.scala', '.erl', '.ex', '.ml', '.fs', '.groovy', '.jsm', '.jsx', '.tsx', '.yaml' ] self.max_file_size = max_file_size self.max_tokens = int(os.getenv("PRAISONAI_MAX_TOKENS", max_tokens)) self.ignore_patterns = self.get_ignore_patterns() self.include_paths = self.get_include_paths() self.included_files = [] def get_ignore_patterns(self): """ Loads ignore patterns from various sources, prioritizing them in the following order: 1. .praisonignore 2. settings.yaml (under code.ignore_files) 3. PRAISONAI_IGNORE_FILES environment variable 4. .gitignore 5. Default patterns """ ignore_patterns = [] def load_from_file(filepath): if os.path.exists(filepath): with open(filepath, 'r') as f: ignore_patterns.extend( line.strip() for line in f if line.strip() and not line.startswith('#') ) # 1. Load from .praisonignore load_from_file(os.path.join(self.directory, '.praisonignore')) # 2. Load from settings.yaml settings_path = os.path.join(self.directory, 'settings.yaml') if os.path.exists(settings_path): with open(settings_path, 'r') as f: settings = yaml.safe_load(f) if 'code' in settings and 'ignore_files' in settings['code']: ignore_patterns.extend(settings['code']['ignore_files']) # 3. Load from environment variable ignore_files_env = os.getenv("PRAISONAI_IGNORE_FILES") if ignore_files_env: ignore_patterns.extend(ignore_files_env.split(",")) # 4. Load from .gitignore load_from_file(os.path.join(self.directory, '.gitignore')) # 5. Default patterns (only if no patterns loaded from above sources) if not ignore_patterns: ignore_patterns = [ ".*", "*.pyc", "__pycache__", ".git", ".gitignore", ".vscode", ".idea", ".DS_Store", "*.lock", "*.pyc", ".env", "docs", "tests", "test", "tmp", "temp", "*.txt", "*.md", "*.json", "*.csv", "*.tsv", "public", "*.sql", "*.sqlite", "*.db", "*.db3", "*.sqlite3", "*.log", "*.zip", "*.gz", "*.tar", "*.rar", "*.7z", "*.pdf", "*.jpg", "*.jpeg", "*.png", "*.gif", "*.svg", "cookbooks", "assets", "__pycache__", "dist", "build", "node_modules", "venv" ] logger.debug(f"Using default ignore patterns: {ignore_patterns}") # Modify patterns to match directories and add leading '*' if necessary modified_ignore_patterns = [ '*' + pattern if not pattern.startswith('.') and not pattern.startswith('*') else pattern for pattern in ignore_patterns ] logger.debug(f"Final ignore patterns: {modified_ignore_patterns}") return modified_ignore_patterns def get_include_paths(self): """ Loads include paths from: 1. .praisoninclude (includes ONLY files/directories listed) 2. .praisoncontext (if .praisoninclude doesn't exist, this is used to include all other relevant files, excluding ignore patterns) """ include_paths = [] include_all = False # Flag to indicate if we need to include all files include_file = os.path.join(self.directory, '.praisoncontext') if os.path.exists(include_file): with open(include_file, 'r') as f: include_paths.extend( line.strip() for line in f if line.strip() and not line.startswith('#') ) # If .praisoncontext doesn't exist, fall back to .praisoninclude # for including all relevant files if not include_paths: include_file = os.path.join(self.directory, '.praisoninclude') if os.path.exists(include_file): with open(include_file, 'r') as f: include_paths.extend( line.strip() for line in f if line.strip() and not line.startswith('#') ) include_all = True # Include all files along with specified paths return include_paths, include_all def should_ignore(self, file_path): """ Check if a file or directory should be ignored based on patterns. Handles both file names and directory names for more comprehensive filtering. """ relative_path = os.path.relpath(file_path, self.directory) if relative_path.startswith('.'): return True for pattern in self.ignore_patterns: if fnmatch.fnmatch(relative_path, pattern) or \ fnmatch.fnmatch(os.path.basename(file_path), pattern): return True return False def is_relevant_file(self, file_path): """Determine if a file is relevant for the context.""" return os.path.isfile(file_path) and \ os.path.getsize(file_path) <= self.max_file_size and \ any(file_path.endswith(ext) for ext in self.relevant_extensions) def gather_context(self): """ Gather context from relevant files, respecting ignore patterns and include options from .praisoninclude and .praisoncontext. """ context = [] total_files = 0 processed_files = 0 self.include_paths, include_all = self.get_include_paths() def add_file_content(file_path): """Helper function to add file content to context.""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() context.append( f"File: {file_path}\n\n{content}\n\n{'=' * 50}\n" ) self.included_files.append( Path(file_path).relative_to(self.directory) ) except Exception as e: logger.error(f"Error reading {file_path}: {e}") def process_path(path): """Helper function to process a single path (file or directory).""" nonlocal total_files, processed_files if os.path.isdir(path): for root, dirs, files in os.walk(path): total_files += len(files) dirs[:] = [ d for d in dirs if not self.should_ignore(os.path.join(root, d)) ] for file in files: file_path = os.path.join(root, file) if not self.should_ignore(file_path) and self.is_relevant_file(file_path): add_file_content(file_path) processed_files += 1 print( f"\rProcessed {processed_files}/{total_files} files", end="", flush=True, ) elif os.path.isfile(path) and self.is_relevant_file(path): add_file_content(path) processed_files += 1 print( f"\rProcessed {processed_files}/1 files", end="", flush=True, ) if include_all: # Include ALL relevant files from the entire directory process_path(self.directory) # Include files from .praisoninclude specifically for include_path in self.include_paths: full_path = os.path.join(self.directory, include_path) process_path(full_path) elif self.include_paths: # Include only files specified in .praisoncontext for include_path in self.include_paths: full_path = os.path.join(self.directory, include_path) process_path(full_path) else: # No include options, process the entire directory process_path(self.directory) print() # New line after progress indicator return "\n".join(context) def count_tokens(self, text): """Count tokens using a simple whitespace-based tokenizer.""" return len(text.split()) def truncate_context(self, context): """Truncate context to stay within the token limit.""" tokens = context.split() if len(tokens) > self.max_tokens: truncated_context = ' '.join(tokens[:self.max_tokens]) logger.warning("Context truncated due to token limit.") return truncated_context return context def save_context(self, context): """Save the gathered context to a file.""" with open(self.output_file, 'w', encoding='utf-8') as f: f.write(context) def get_context_tree(self): """Generate a formatted tree structure of included files and folders.""" tree = [] start_dir = Path(self.directory) def add_to_tree(path, prefix=''): contents = sorted(path.iterdir()) pointers = [('└── ' if i == len(contents) - 1 else '├── ') for i in range(len(contents))] for pointer, item in zip(pointers, contents): rel_path = item.relative_to(start_dir) if rel_path in self.included_files: tree.append(f"{prefix}{pointer}{rel_path}") if item.is_dir(): add_to_tree(item, prefix + (' ' if pointer == '└── ' else '│ ')) add_to_tree(start_dir) return '\n'.join(tree) def run(self): """Execute the context gathering, truncation, and reporting.""" context = self.gather_context() context = self.truncate_context(context) token_count = self.count_tokens(context) print(f"Context gathered successfully.") print(f"Total number of tokens (estimated): {token_count}") # self.save_context(context) context_tree = self.get_context_tree() logger.debug(f"Context tree:\n{context_tree}") return context, token_count, context_tree
Methods
def count_tokens(self, text)
-
Count tokens using a simple whitespace-based tokenizer.
def gather_context(self)
-
Gather context from relevant files, respecting ignore patterns and include options from .praisoninclude and .praisoncontext.
def get_context_tree(self)
-
Generate a formatted tree structure of included files and folders.
def get_ignore_patterns(self)
-
Loads ignore patterns from various sources, prioritizing them in the following order: 1. .praisonignore 2. settings.yaml (under code.ignore_files) 3. PRAISONAI_IGNORE_FILES environment variable 4. .gitignore 5. Default patterns
def get_include_paths(self)
-
Loads include paths from: 1. .praisoninclude (includes ONLY files/directories listed) 2. .praisoncontext (if .praisoninclude doesn't exist, this is used to include all other relevant files, excluding ignore patterns)
def is_relevant_file(self, file_path)
-
Determine if a file is relevant for the context.
def run(self)
-
Execute the context gathering, truncation, and reporting.
def save_context(self, context)
-
Save the gathered context to a file.
def should_ignore(self, file_path)
-
Check if a file or directory should be ignored based on patterns. Handles both file names and directory names for more comprehensive filtering.
def truncate_context(self, context)
-
Truncate context to stay within the token limit.