Coverage for lynceus/devops/github_devops_analyzer.py: 91%
218 statements
« prev ^ index » next coverage.py v7.10.0, created at 2025-07-29 08:46 +0000
« prev ^ index » next coverage.py v7.10.0, created at 2025-07-29 08:46 +0000
1from collections import defaultdict
2from datetime import date, datetime, timedelta, timezone
3from pathlib import Path
5import github
6import requests
7from github import Github, GithubObject, Permissions, UnknownObjectException
8from github.PaginatedList import PaginatedList
10from lynceus.core.exchange.lynceus_exchange import LynceusExchange
11from lynceus.core.lynceus import LynceusSession
12from lynceus.devops.devops_analyzer import DevOpsAnalyzer
13from lynceus.utils import filter_kwargs
14from lynceus.utils.lynceus_dict import LynceusDict
17def github_exception_handler(func):
18 """
19 Decorator to handle GitHub-specific exceptions and convert them to standard exceptions.
21 Parameters
22 ----------
23 func : callable
24 Function to wrap with exception handling
26 Returns
27 -------
28 callable
29 Wrapped function that handles GitHub exceptions
31 Raises
32 ------
33 PermissionError
34 For 401/403 HTTP status codes
35 NameError
36 For 404 HTTP status codes
37 """
39 def func_wrapper(*args, **kwargs):
40 """
41 Internal wrapper function that handles GitHub API errors.
43 Parameters
44 ----------
45 *args : tuple
46 Positional arguments passed to the wrapped function
47 **kwargs
48 Keyword arguments passed to the wrapped function
50 Returns
51 -------
52 object
53 Result of the wrapped function call
55 Raises
56 ------
57 PermissionError
58 For 401/403 HTTP status codes
59 NameError
60 For 404 HTTP status codes
61 """
62 try:
63 return func(*args, **kwargs)
64 except github.GithubException as error:
65 # Intercepts permission error.
66 if error.status in (401, 403):
67 raise PermissionError(
68 "You don't have enough permission to perform this operation on Github."
69 ) from error
70 if error.status == 404:
71 raise NameError("Unable to find requested Object.") from error
73 # Raises any other error.
74 raise
76 return func_wrapper
79def get_list_from_paginated_and_count(plist: PaginatedList, count: int | None = None):
80 """
81 Helper function to get a list from GitHub paginated results with optional count limit.
83 Parameters
84 ----------
85 plist : PaginatedList
86 GitHub PaginatedList object
87 count : int, optional
88 Maximum number of items to retrieve
90 Returns
91 -------
92 list
93 List of items from the paginated result, limited by count if specified
94 """
95 # Tries to return cut list according to specified count.
96 try:
97 if count is not None and count:
98 return list(plist[:count])
99 except IndexError:
100 # Ignores IndexError, to returns the complete list, like if count was never specified.
101 pass
103 # Returns the complete list.
104 return list(plist)
107# Cf. https://pygithub.readthedocs.io/en/latest/introduction.html
108# Cf. https://docs.github.com/en/rest
109# Cf. https://pygithub.readthedocs.io/en/stable/changes.html#breaking-changes for 2.x version breaking changes (mainly on datetime which are no more naive).
110# Important:
111# - the Gitlab group notion corresponds to Organization notion on Github
112# - the Team notion of Github is NOT managed
113class GithubDevOpsAnalyzer(DevOpsAnalyzer):
114 """
115 GitHub-specific implementation of the DevOps analyzer.
117 This class provides concrete implementations for all DevOps operations
118 specific to GitHub, including authentication, user/organization/repository management,
119 and statistics gathering. It uses the PyGithub library to interact with GitHub's REST API.
121 Important notes:
122 - GitLab "groups" correspond to GitHub "organizations"
123 - GitHub "teams" are not currently managed by this implementation
124 - Some features may have different capabilities compared to GitLab
125 """
127 def __init__(
128 self,
129 lynceus_session: LynceusSession,
130 uri: str,
131 token: str,
132 lynceus_exchange: LynceusExchange,
133 ):
134 """
135 Initialize the GitHub DevOps analyzer.
137 Parameters
138 ----------
139 lynceus_session : LynceusSession
140 The Lynceus session instance
141 uri : str
142 The GitHub instance URI (for GitHub Enterprise) or None for github.com
143 token : str
144 Personal access token for GitHub authentication
145 lynceus_exchange : LynceusExchange
146 Exchange instance for data communication
147 """
148 super().__init__(lynceus_session, uri, "github", lynceus_exchange)
149 kwargs = {"auth": github.Auth.Token(token), "timeout": 60}
150 if uri:
151 kwargs["base_url"] = uri
153 self.__manager = Github(**kwargs)
155 # The following methods are only for uniformity and coherence.
156 def _extract_user_info(self, user) -> LynceusDict:
157 return LynceusDict(
158 {
159 "id": user.id,
160 "name": user.name,
161 "login": user.login,
162 "username": user.login,
163 "e-mail": user.email,
164 "avatar_url": user.avatar_url,
165 "bio": user.bio,
166 }
167 )
169 def _extract_group_info(self, group) -> LynceusDict:
170 return LynceusDict(
171 {
172 "id": group.id,
173 "name": group.name,
174 # N.B.: it does not seem to be any "path" for Github organization
175 "path": group.login,
176 }
177 )
179 def _extract_project_info(self, project) -> LynceusDict:
180 return LynceusDict(
181 {
182 "id": project.id,
183 "name": project.name,
184 "path": project.full_name,
185 "web_url": project.html_url,
186 }
187 )
189 def _extract_member_info(self, member) -> LynceusDict:
190 # In Github the corresponding notion is 'contributors', and there is no 'status' information.
191 return LynceusDict(
192 {
193 "id": member.id,
194 "name": member.name,
195 "login": member.login,
196 "username": member.login,
197 "parent_id": self.INFO_UNDEFINED,
198 "state": self.STATUS_ACTIVE,
199 }
200 )
202 def _extract_issue_event_info(self, issue_event, **kwargs) -> LynceusDict:
203 return LynceusDict(
204 {
205 "id": issue_event.id,
206 "issue_id": issue_event.issue.number,
207 "action": issue_event.event,
208 "target_type": "issue",
209 "created_at": issue_event.created_at,
210 "author": (
211 self.INFO_UNDEFINED
212 if not issue_event.actor
213 else issue_event.actor.name
214 ),
215 "title": issue_event.issue.title,
216 "issue_web_url": kwargs["project_web_url"] + f"/issues/{issue_event.issue.number}",
217 # N.B.: project information is unable, and must be added by caller via kwargs.
218 }
219 | kwargs
220 )
222 def _extract_commit_info(self, commit) -> LynceusDict:
223 return LynceusDict(
224 {
225 "id": commit.sha,
226 "short_id": commit.sha[:8],
227 "parent_ids": [parent_commit.sha for parent_commit in commit.parents],
228 "message": commit.raw_data["commit"].get("message"),
229 "created_at": commit.commit.author.date,
230 "author_name": commit.author.name,
231 "author_email": commit.raw_data["commit"]["author"]["email"],
232 "committer_name": commit.committer.name,
233 "committer_email": commit.raw_data["commit"]["committer"]["email"],
234 }
235 )
237 def _extract_branch_info(self, branch) -> LynceusDict:
238 return LynceusDict(
239 {
240 "name": branch.name,
241 "merged": len(branch.commit.parents) > 1,
242 "commit_id": branch.commit.sha,
243 "commit_short_id": branch.commit.sha[:8],
244 "created_at": branch.commit.commit.author.date,
245 }
246 )
248 def _extract_tag_info(self, tag) -> LynceusDict:
249 return LynceusDict(
250 {
251 "name": tag.name,
252 "commit_id": tag.commit.sha,
253 "commit_short_id": tag.commit.sha[:8],
254 "created_at": tag.commit.commit.author.date,
255 }
256 )
258 # The following methods are only performing read access on DevOps backend.
259 @github_exception_handler
260 def authenticate(self):
261 """
262 Authenticate with the GitHub instance using the configured credentials.
264 GitHub doesn't have a direct authentication endpoint, so this method
265 validates the credentials by attempting to retrieve the current user
266 information. If successful, the credentials are valid.
267 """
268 # There is no direct authentication with Github,
269 # simply retrieves the current user to test it.
270 self._do_get_current_user()
272 @github_exception_handler
273 def _do_get_current_user(self):
274 return self.__manager.get_user()
276 # pylint: disable=unused-argument
277 @github_exception_handler
278 def _do_get_user_without_cache(
279 self, *, username: str = None, email: str = None, **kwargs
280 ):
281 kwargs_filtered = filter_kwargs(args_filter=["user_id"], **kwargs)
282 if kwargs_filtered:
283 return self.__manager.get_user_by_id(**kwargs_filtered)
285 return self.__manager.get_user(login=username)
287 @github_exception_handler
288 def _do_get_groups(self, *, count: int | None = None, **kwargs):
289 return get_list_from_paginated_and_count(
290 self.__manager.get_organizations(**kwargs), count
291 )
293 @github_exception_handler
294 def _do_get_group_without_cache(self, *, full_path: str, **kwargs):
295 # https://pygithub.readthedocs.io/en/latest/github_objects/Repository.html?highlight=organization#github.Repository.Repository.organization
296 # https://pygithub.readthedocs.io/en/latest/github_objects/Team.html?highlight=organization#github.Team.Team.organization
297 # https://pygithub.readthedocs.io/en/latest/examples/MainClass.html?highlight=organization#get-organization-by-name
299 try:
300 # There are no optional kwargs available in this implementation.
301 return self.__manager.get_organization(full_path)
302 except UnknownObjectException as exception:
303 raise NameError(f'Group "{full_path}" has not been found.') from exception
305 @github_exception_handler
306 def _do_get_projects(self, *, count: int | None = None, **kwargs):
307 return get_list_from_paginated_and_count(
308 self._do_get_current_user().get_repos(**kwargs), count
309 )
311 @github_exception_handler
312 def _do_get_project_without_cache(self, *, full_path: str, **kwargs):
313 try:
314 # There are no optional kwargs available in this implementation.
315 return self.__manager.get_repo(full_path)
316 except UnknownObjectException as exception:
317 raise NameError(
318 f'Project/repository "{full_path}" has not been found.'
319 ) from exception
321 @github_exception_handler
322 def check_permissions_on_project(
323 self,
324 *,
325 full_path: str,
326 get_metadata: bool,
327 pull: bool,
328 push: bool = False,
329 maintain: bool = False,
330 admin: bool = False,
331 **kwargs,
332 ):
333 """
334 Check user permissions on a specific GitHub repository.
336 Verify that the authenticated user has the requested permissions
337 on the specified repository by checking the repository permissions object.
339 Parameters
340 ----------
341 full_path : str
342 Full path to the GitHub repository (e.g., 'owner/repo')
343 get_metadata : bool
344 Whether metadata access is required
345 pull : bool
346 Whether pull/read access is required
347 push : bool, optional
348 Whether push/write access is required (default: False)
349 maintain : bool, optional
350 Whether maintainer access is required (default: False)
351 admin : bool, optional
352 Whether admin access is required (default: False)
353 **kwargs
354 Additional repository lookup arguments
356 Returns
357 -------
358 bool
359 Permission check results with granted access levels
361 Raises
362 ------
363 PermissionError
364 If required permissions are not granted
365 """
366 try:
367 # Retrieves repository metadata (can lead to NameError if authenticated user has not enough permissions).
368 repository = self._do_get_project(full_path=full_path, **kwargs)
370 # From here, we consider get_metadata permission is OK.
372 # Checks others permissions.
373 permissions: Permissions = repository.permissions
374 if pull and not permissions.pull:
375 return False
377 if push and not permissions.push:
378 return False
380 if maintain and not permissions.maintain:
381 return False
383 if admin and not permissions.admin:
384 return False
386 # All permissions check are OK.
387 return True
388 except (PermissionError, NameError):
389 # Returns True if there were NO permission at all to check ...
390 return (
391 not get_metadata
392 and not pull
393 and not push
394 and not maintain
395 and not admin
396 )
398 @github_exception_handler
399 def _do_get_project_commits(
400 self, *, full_path: str, git_ref_name: str, count: int | None = None, **kwargs
401 ):
402 repository = self._do_get_project(full_path=full_path, **kwargs)
403 return get_list_from_paginated_and_count(
404 repository.get_commits(sha=git_ref_name or GithubObject.NotSet), count
405 )
407 @github_exception_handler
408 def _do_get_project_branches(
409 self, *, full_path: str, count: int | None = None, **kwargs
410 ):
411 repository = self._do_get_project(full_path=full_path, **kwargs)
412 return get_list_from_paginated_and_count(repository.get_branches(), count)
414 @github_exception_handler
415 def _do_get_project_tags(
416 self, *, full_path: str, count: int | None = None, **kwargs
417 ):
418 repository = self._do_get_project(full_path=full_path, **kwargs)
419 return get_list_from_paginated_and_count(repository.get_tags(), count)
421 @github_exception_handler
422 def _do_get_project_members(
423 self, *, full_path: str, count: int | None = None, **kwargs
424 ):
425 repository = self._do_get_project(full_path=full_path, **kwargs)
426 return get_list_from_paginated_and_count(
427 repository.get_contributors(
428 **filter_kwargs(args_filter=["anon"], **kwargs)
429 ),
430 count,
431 )
433 @github_exception_handler
434 def _do_get_group_members(
435 self, *, full_path: str, count: int | None = None, **kwargs
436 ):
437 # https://pygithub.readthedocs.io/en/latest/github_objects/Organization.html?highlight=member#github.Organization.Organization.get_members
438 organization = self._do_get_group(full_path=full_path, **kwargs)
439 return get_list_from_paginated_and_count(
440 organization.get_members(
441 **filter_kwargs(args_filter=["role", "filter_"], **kwargs)
442 ),
443 count,
444 )
446 @github_exception_handler
447 def _do_get_project_issue_events(
448 self,
449 *,
450 full_path: str,
451 action: str | None = None,
452 from_date: datetime | None = None,
453 to_date: datetime | None = None,
454 count: int | None = None,
455 **kwargs,
456 ):
457 # https://docs.github.com/en/developers/webhooks-and-events/events/github-event-types
458 # https://docs.github.com/en/rest/activity/events
460 repository = self._do_get_project(full_path=full_path, **kwargs)
461 # Important: unfortunately it is NOT possible to filter on server side ...
462 issue_events = get_list_from_paginated_and_count(
463 repository.get_issues_events(), count
464 )
466 # Filters issue event according to specified parameter(s).
467 for issue_event in issue_events:
468 if action and issue_event.event != action:
469 continue
471 if from_date and issue_event.created_at < from_date:
472 # Stops the iteration since this issue event is the first being too old.
473 break
475 if to_date and issue_event.created_at > to_date:
476 # Ignores this issue event.
477 continue
479 yield issue_event
481 @github_exception_handler
482 def _do_get_project_issues(
483 self, *, full_path: str, count: int | None = None, **kwargs
484 ):
485 repository = self._do_get_project(full_path=full_path, **kwargs)
487 if "state" not in kwargs:
488 kwargs["state"] = "all"
490 return get_list_from_paginated_and_count(repository.get_issues(**kwargs), count)
492 @github_exception_handler
493 def _do_get_project_merge_requests(
494 self, *, full_path: str, count: int | None = None, **kwargs
495 ):
496 repository = self._do_get_project(full_path=full_path, **kwargs)
498 if "state" not in kwargs:
499 kwargs["state"] = "all"
501 return get_list_from_paginated_and_count(repository.get_pulls(**kwargs), count)
503 @github_exception_handler
504 def _do_get_project_milestones(
505 self, *, full_path: str, count: int | None = None, **kwargs
506 ):
507 repository = self._do_get_project(full_path=full_path, **kwargs)
508 milestone_list = get_list_from_paginated_and_count(
509 repository.get_milestones(**kwargs), count
510 )
511 return [milestone.title for milestone in milestone_list]
513 @github_exception_handler
514 def _do_get_group_milestones(
515 self, *, full_path: str, count: int | None = None, **kwargs
516 ):
517 self._logger.warning(
518 "There is no milestone on Organization under Github (you need to check milestones directly from a project/repository)."
519 )
520 return []
522 @github_exception_handler
523 def _do_get_group_projects(
524 self, *, full_path: str, count: int | None = None, **kwargs
525 ):
526 organization = self._do_get_group(full_path=full_path, **kwargs)
527 return get_list_from_paginated_and_count(
528 organization.get_repos(
529 **filter_kwargs(args_filter=["type", "sort", "direction"], **kwargs)
530 ),
531 count,
532 )
534 @github_exception_handler
535 def get_user_stats_commit_activity(
536 self,
537 *,
538 group_full_path: str = None,
539 project_full_path: str = None,
540 since: datetime = None,
541 keep_empty_stats: bool = False,
542 count: int | None = None,
543 ):
544 """
545 Get commit activity statistics for the authenticated user in GitHub.
547 Parameters
548 ----------
549 group_full_path : str, optional
550 Organization path to limit statistics to (unused)
551 project_full_path : str, optional
552 Repository path to include in statistics (unused)
553 since : datetime, optional
554 Start date for statistics (defaults to 365 days ago)
555 keep_empty_stats : bool, optional
556 Whether to include days with zero commits (default: False)
557 count : int, optional
558 Maximum number of repositories to analyze
560 Returns
561 -------
562 dict
563 Mapping of dates to commit counts
564 """
565 # Cf. https://developer.github.com/v3/repos/statistics/#get-the-last-year-of-commit-activity-data
567 # Defines threshold date.
568 contributions_since: datetime = (
569 since if since else datetime.now(tz=timezone.utc) - timedelta(days=365)
570 )
571 stats_user_commit_activity: dict[date, int] = defaultdict(int)
572 for repo in self._do_get_projects(count=count):
573 for commit_activity in repo.get_stats_commit_activity() or []:
574 # TODO: define how to ensure it is the good author ?
575 # if not current_user.name == commit_activity.author:
576 # print('TODO: remove // AUTHOR is NOT the same: ', current_user.name, commit_activity.author)
577 # continue
579 # Ignores oldest statistics.
580 if commit_activity.week < contributions_since:
581 continue
583 # Ignores 0 stats but if wanted.
584 if not keep_empty_stats and not commit_activity.total:
585 continue
587 # For each stats.
588 for day, commit_count in enumerate(commit_activity.days):
589 # Ignores 0 stats but if wanted.
590 if not keep_empty_stats and not commit_count:
591 continue
593 day_date = commit_activity.week + timedelta(days=day)
594 stats_user_commit_activity[day_date.date()] += commit_count
596 return stats_user_commit_activity
598 @github_exception_handler
599 def get_user_contributions(
600 self,
601 *,
602 since: datetime = None,
603 keep_empty_stats: bool = False,
604 count: int | None = None,
605 ):
606 """
607 Get detailed user contribution statistics including additions, deletions, and commits.
609 Parameters
610 ----------
611 since : datetime, optional
612 Start date for contributions (defaults to 365 days ago)
613 keep_empty_stats : bool, optional
614 Whether to include periods with zero contributions (default: False)
615 count : int, optional
616 Maximum number of repositories to analyze
618 Returns
619 -------
620 dict
621 Mapping of dates to contribution statistics (additions, deletions, commits)
622 """
623 # Cf. https://developer.github.com/v3/repos/statistics/
625 # Defines threshold date.
626 contributions_since: datetime = (
627 since if since else datetime.now(tz=timezone.utc) - timedelta(days=365)
628 )
629 current_user = self._do_get_current_user()
630 stats_user_contributions: dict[date, dict[str, int]] = {}
631 for repo in self._do_get_projects(count=count):
632 for contribution in repo.get_stats_contributors() or []:
633 if not current_user.name == contribution.author:
634 continue
636 # For each stats.
637 for stats in contribution.weeks:
638 # Ignores oldest statistics.
639 if stats.w < contributions_since:
640 continue
642 # Ignores 0 stats but if wanted.
643 if (
644 not keep_empty_stats
645 and not stats.a
646 and not stats.d
647 and not stats.c
648 ):
649 continue
651 try:
652 stats_map = stats_user_contributions[stats.w.date()]
653 except KeyError:
654 stats_map = {"additions": 0, "deletions": 0, "commits": 0}
655 stats_user_contributions[stats.w.date()] = stats_map
657 stats_map["additions"] += stats.a
658 stats_map["deletions"] += stats.d
659 stats_map["commits"] += stats.c
661 return stats_user_contributions
663 @github_exception_handler
664 def get_user_stats_code_frequency(self, *, count: int | None = None):
665 """
666 Get code frequency statistics showing additions and deletions over time.
668 Parameters
669 ----------
670 count : int, optional
671 Maximum number of repositories to analyze
673 Returns
674 -------
675 dict
676 Mapping of time periods to code frequency data (additions, deletions)
677 """
678 stats_code_frequency = {}
679 for repo in self._do_get_projects(count=count):
680 for stats in repo.get_stats_code_frequency() or []:
681 try:
682 stats_map = stats_code_frequency[stats.week]
683 except KeyError:
684 stats_map = {"additions": 0, "deletions": 0}
685 stats_code_frequency[stats.week] = stats_map
687 stats_map["additions"] += stats.additions
688 stats_map["deletions"] += stats.deletions
689 return stats_code_frequency
691 @github_exception_handler
692 def _do_download_repository(
693 self,
694 *,
695 project_full_path: str,
696 dest_path: Path,
697 reference: str = None,
698 chunk_size: int = 1024,
699 **kwargs,
700 ):
701 # Cf. https://developer.github.com/v3/repos/contents/#get-archive-link
702 # Cf. https://github.com/PyGithub/PyGithub/blob/main/github/Repository.py#L1535
703 repository = self._do_get_project(full_path=project_full_path, **kwargs)
704 url: str = repository.get_archive_link(
705 archive_format="tarball", ref=reference or GithubObject.NotSet
706 )
707 with requests.get(url, stream=True, timeout=60) as reader:
708 # Ensures specified reference exists.
709 if reader.status_code == 404:
710 raise NameError(
711 f'Reference "{reference}" does not exists for project/repository "{project_full_path}".'
712 )
714 with open(dest_path, "wb") as dest_file:
715 for chunk in reader.iter_content(chunk_size=chunk_size):
716 dest_file.write(chunk)