diff --git a/.github/workflows/update-citations.yml b/.github/workflows/update-citations.yml index 1ba98cb9..0a5aaae0 100644 --- a/.github/workflows/update-citations.yml +++ b/.github/workflows/update-citations.yml @@ -26,15 +26,25 @@ jobs: pip install -r requirements.txt - name: Run citation update script + id: update_citations + continue-on-error: true run: | python bin/update_scholar_citations.py - name: Configure Git + if: always() run: | git config --local user.email "actions@github.com" git config --local user.name "GitHub Actions" - name: Commit and push if changed + if: always() run: | git add _data/citations.yml git diff --staged --quiet || (git commit -m "Update Google Scholar citations" && git push) + + - name: Fail if citation update failed + if: steps.update_citations.outcome == 'failure' + run: | + echo "Google Scholar citation update failed; failure metadata was recorded in _data/citations.yml." + exit 1 diff --git a/bin/update_scholar_citations.py b/bin/update_scholar_citations.py index dd5817f7..b7b5ae98 100644 --- a/bin/update_scholar_citations.py +++ b/bin/update_scholar_citations.py @@ -19,18 +19,13 @@ # Create data directory if it doesn't exist os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True) -def get_scholar_citations(): - """ - Fetch citation data from Google Scholar for all papers by the specified author - """ - print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}") +def load_existing_citation_data(): citation_data = { 'metadata': {}, 'papers': {} } - # Try to load existing data first to preserve the last known good values. if os.path.exists(OUTPUT_FILE): try: with open(OUTPUT_FILE, 'r') as f: @@ -43,6 +38,32 @@ def get_scholar_citations(): except Exception as e: print(f"Warning: Could not read existing citation data: {e}") + return citation_data + + +def save_citation_data(citation_data): + with open(OUTPUT_FILE, 'w') as f: + yaml.dump(citation_data, f, default_flow_style=False, sort_keys=False) + print(f"Citation data saved to {OUTPUT_FILE}") + + +def record_fetch_failure(error): + citation_data = load_existing_citation_data() + citation_data['metadata']['fetch_status'] = 'failed' + citation_data['metadata']['fetch_failed_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + citation_data['metadata']['fetch_error'] = str(error) + save_citation_data(citation_data) + + +def get_scholar_citations(): + """ + Fetch citation data from Google Scholar for all papers by the specified author + """ + print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}") + + # Try to load existing data first to preserve the last known good values. + citation_data = load_existing_citation_data() + # Fetch author data with retries author_data = None for attempt in range(MAX_RETRIES): @@ -110,13 +131,14 @@ def get_scholar_citations(): if fetched_papers == 0: raise RuntimeError("Google Scholar fetch completed but no publications could be processed") + citation_data['metadata'].pop('fetch_failed_at', None) + citation_data['metadata'].pop('fetch_error', None) + citation_data['metadata']['fetch_status'] = 'success' citation_data['metadata']['last_updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Save to YAML file try: - with open(OUTPUT_FILE, 'w') as f: - yaml.dump(citation_data, f, default_flow_style=False, sort_keys=False) - print(f"Citation data saved to {OUTPUT_FILE}") + save_citation_data(citation_data) except Exception as e: print(f"Error saving citation data: {str(e)}") raise @@ -124,4 +146,8 @@ def get_scholar_citations(): return citation_data if __name__ == "__main__": - get_scholar_citations() + try: + get_scholar_citations() + except Exception as e: + record_fetch_failure(e) + raise