Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/update-citations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ name: Update Google Scholar Citations

on:
schedule:
- cron: "0 0 1 * *"
- cron: "0 0 * * 1"
workflow_dispatch: # Allow manual triggering

permissions:
contents: write

jobs:
update-citations:
runs-on: ubuntu-latest
Expand Down
114 changes: 59 additions & 55 deletions bin/update_scholar_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,20 @@ def get_scholar_citations():
"""
print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}")

# Initialize citation data structure
citation_data = {
'metadata': {
'last_updated': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
},
'papers': {} # Initialize as empty dict, not None
'metadata': {},
'papers': {}
}

# Try to load existing data first to avoid unnecessary requests
# Try to load existing data first to preserve the last known good values.
if os.path.exists(OUTPUT_FILE):
try:
with open(OUTPUT_FILE, 'r') as f:
existing_data = yaml.safe_load(f)
if existing_data and isinstance(existing_data, dict):
# Keep existing metadata if available
if 'papers' in existing_data and existing_data['papers'] is not None:
if isinstance(existing_data.get('metadata'), dict):
citation_data['metadata'] = existing_data['metadata']
if existing_data.get('papers') is not None:
citation_data['papers'] = existing_data['papers']
except Exception as e:
print(f"Warning: Could not read existing citation data: {e}")
Expand All @@ -59,55 +57,60 @@ def get_scholar_citations():
print(f"Retrying in {wait_time:.1f} seconds...")
time.sleep(wait_time)
else:
print("All retries failed. Using existing data if available.")
return citation_data
raise RuntimeError("All Google Scholar fetch retries failed") from e

if not author_data:
print("Could not fetch author data")
return citation_data

# Process publications
if 'publications' in author_data:
for pub in author_data['publications']:
try:
# Get publication ID
pub_id = None
if 'pub_id' in pub and pub['pub_id']:
pub_id = pub['pub_id']
elif 'author_pub_id' in pub and pub['author_pub_id']:
pub_id = pub['author_pub_id']

if not pub_id:
print(f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}")
continue

# Get publication metadata
title = "Unknown Title"
year = "Unknown Year"
citations = 0

if 'bib' in pub:
if 'title' in pub['bib']:
title = pub['bib']['title']
if 'pub_year' in pub['bib']:
year = str(pub['bib']['pub_year'])

if 'num_citations' in pub:
citations = pub['num_citations']

print(f"Found: {title} ({year}) - Citations: {citations}")

# Store citation data
citation_data['papers'][pub_id] = {
'title': title,
'year': year,
'citations': citations
}

except Exception as e:
print(f"Error processing publication: {str(e)}")
else:
print("No publications found in author data")
raise RuntimeError("Could not fetch author data")

publications = author_data.get('publications')
if not publications:
raise RuntimeError("No publications found in author data")

fetched_papers = 0
for pub in publications:
try:
# Get publication ID
pub_id = None
if 'pub_id' in pub and pub['pub_id']:
pub_id = pub['pub_id']
elif 'author_pub_id' in pub and pub['author_pub_id']:
pub_id = pub['author_pub_id']

if not pub_id:
print(f"Warning: No ID found for publication: {pub.get('bib', {}).get('title', 'Unknown')}")
continue

# Get publication metadata
title = "Unknown Title"
year = "Unknown Year"
citations = 0

if 'bib' in pub:
if 'title' in pub['bib']:
title = pub['bib']['title']
if 'pub_year' in pub['bib']:
year = str(pub['bib']['pub_year'])

if 'num_citations' in pub:
citations = pub['num_citations']

print(f"Found: {title} ({year}) - Citations: {citations}")

# Store citation data
citation_data['papers'][pub_id] = {
'title': title,
'year': year,
'citations': citations
}
fetched_papers += 1

except Exception as e:
print(f"Error processing publication: {str(e)}")

if fetched_papers == 0:
raise RuntimeError("Google Scholar fetch completed but no publications could be processed")

citation_data['metadata']['last_updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Save to YAML file
try:
Expand All @@ -116,6 +119,7 @@ def get_scholar_citations():
print(f"Citation data saved to {OUTPUT_FILE}")
except Exception as e:
print(f"Error saving citation data: {str(e)}")
raise

return citation_data

Expand Down
Loading