From 776f3e2f5364bb70629ae80a15d847297cc789c7 Mon Sep 17 00:00:00 2001 From: systemreliability <51009183+systemreliability@users.noreply.github.com> Date: Sat, 2 May 2026 23:20:09 +0200 Subject: [PATCH 1/2] Update citation workflow schedule and permissions --- .github/workflows/update-citations.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-citations.yml b/.github/workflows/update-citations.yml index e6c59eef..1ba98cb9 100644 --- a/.github/workflows/update-citations.yml +++ b/.github/workflows/update-citations.yml @@ -2,9 +2,12 @@ name: Update Google Scholar Citations on: schedule: - - cron: "0 0 1 * *" + - cron: "0 0 * * 1" workflow_dispatch: # Allow manual triggering +permissions: + contents: write + jobs: update-citations: runs-on: ubuntu-latest From 2256cbc0f50fa49fbc3d3572267f34716196db2f Mon Sep 17 00:00:00 2001 From: systemreliability <51009183+systemreliability@users.noreply.github.com> Date: Sat, 2 May 2026 23:20:43 +0200 Subject: [PATCH 2/2] Update citation timestamp only after successful fetch --- bin/update_scholar_citations.py | 114 +++++++++++++++++--------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/bin/update_scholar_citations.py b/bin/update_scholar_citations.py index 661479c2..dd5817f7 100644 --- a/bin/update_scholar_citations.py +++ b/bin/update_scholar_citations.py @@ -25,22 +25,20 @@ def get_scholar_citations(): """ print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}") - # Initialize citation data structure citation_data = { - 'metadata': { - 'last_updated': datetime.now().strftime("%Y-%m-%d %H:%M:%S") - }, - 'papers': {} # Initialize as empty dict, not None + 'metadata': {}, + 'papers': {} } - # Try to load existing data first to avoid unnecessary requests + # Try to load existing data first to preserve the last known good values. if os.path.exists(OUTPUT_FILE): try: with open(OUTPUT_FILE, 'r') as f: existing_data = yaml.safe_load(f) if existing_data and isinstance(existing_data, dict): - # Keep existing metadata if available - if 'papers' in existing_data and existing_data['papers'] is not None: + if isinstance(existing_data.get('metadata'), dict): + citation_data['metadata'] = existing_data['metadata'] + if existing_data.get('papers') is not None: citation_data['papers'] = existing_data['papers'] except Exception as e: print(f"Warning: Could not read existing citation data: {e}") @@ -59,55 +57,60 @@ def get_scholar_citations(): print(f"Retrying in {wait_time:.1f} seconds...") time.sleep(wait_time) else: - print("All retries failed. Using existing data if available.") - return citation_data + raise RuntimeError("All Google Scholar fetch retries failed") from e if not author_data: - print("Could not fetch author data") - return citation_data - - # Process publications - if 'publications' in author_data: - for pub in author_data['publications']: - try: - # Get publication ID - pub_id = None - if 'pub_id' in pub and pub['pub_id']: - pub_id = pub['pub_id'] - elif 'author_pub_id' in pub and pub['author_pub_id']: - pub_id = pub['author_pub_id'] - - if not pub_id: - print(f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}") - continue - - # Get publication metadata - title = "Unknown Title" - year = "Unknown Year" - citations = 0 - - if 'bib' in pub: - if 'title' in pub['bib']: - title = pub['bib']['title'] - if 'pub_year' in pub['bib']: - year = str(pub['bib']['pub_year']) - - if 'num_citations' in pub: - citations = pub['num_citations'] - - print(f"Found: {title} ({year}) - Citations: {citations}") - - # Store citation data - citation_data['papers'][pub_id] = { - 'title': title, - 'year': year, - 'citations': citations - } - - except Exception as e: - print(f"Error processing publication: {str(e)}") - else: - print("No publications found in author data") + raise RuntimeError("Could not fetch author data") + + publications = author_data.get('publications') + if not publications: + raise RuntimeError("No publications found in author data") + + fetched_papers = 0 + for pub in publications: + try: + # Get publication ID + pub_id = None + if 'pub_id' in pub and pub['pub_id']: + pub_id = pub['pub_id'] + elif 'author_pub_id' in pub and pub['author_pub_id']: + pub_id = pub['author_pub_id'] + + if not pub_id: + print(f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}") + continue + + # Get publication metadata + title = "Unknown Title" + year = "Unknown Year" + citations = 0 + + if 'bib' in pub: + if 'title' in pub['bib']: + title = pub['bib']['title'] + if 'pub_year' in pub['bib']: + year = str(pub['bib']['pub_year']) + + if 'num_citations' in pub: + citations = pub['num_citations'] + + print(f"Found: {title} ({year}) - Citations: {citations}") + + # Store citation data + citation_data['papers'][pub_id] = { + 'title': title, + 'year': year, + 'citations': citations + } + fetched_papers += 1 + + except Exception as e: + print(f"Error processing publication: {str(e)}") + + if fetched_papers == 0: + raise RuntimeError("Google Scholar fetch completed but no publications could be processed") + + citation_data['metadata']['last_updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Save to YAML file try: @@ -116,6 +119,7 @@ def get_scholar_citations(): print(f"Citation data saved to {OUTPUT_FILE}") except Exception as e: print(f"Error saving citation data: {str(e)}") + raise return citation_data