Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/update-citations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,25 @@ jobs:
pip install -r requirements.txt

- name: Run citation update script
id: update_citations
continue-on-error: true
run: |
python bin/update_scholar_citations.py

- name: Configure Git
if: always()
run: |
git config --local user.email "actions@github.com"
git config --local user.name "GitHub Actions"

- name: Commit and push if changed
if: always()
run: |
git add _data/citations.yml
git diff --staged --quiet || (git commit -m "Update Google Scholar citations" && git push)

- name: Fail if citation update failed
if: steps.update_citations.outcome == 'failure'
run: |
echo "Google Scholar citation update failed; failure metadata was recorded in _data/citations.yml."
exit 1
46 changes: 36 additions & 10 deletions bin/update_scholar_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,13 @@
# Create data directory if it doesn't exist
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

def get_scholar_citations():
"""
Fetch citation data from Google Scholar for all papers by the specified author
"""
print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}")

def load_existing_citation_data():
citation_data = {
'metadata': {},
'papers': {}
}

# Try to load existing data first to preserve the last known good values.
if os.path.exists(OUTPUT_FILE):
try:
with open(OUTPUT_FILE, 'r') as f:
Expand All @@ -43,6 +38,32 @@ def get_scholar_citations():
except Exception as e:
print(f"Warning: Could not read existing citation data: {e}")

return citation_data


def save_citation_data(citation_data):
with open(OUTPUT_FILE, 'w') as f:
yaml.dump(citation_data, f, default_flow_style=False, sort_keys=False)
print(f"Citation data saved to {OUTPUT_FILE}")


def record_fetch_failure(error):
citation_data = load_existing_citation_data()
citation_data['metadata']['fetch_status'] = 'failed'
citation_data['metadata']['fetch_failed_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
citation_data['metadata']['fetch_error'] = str(error)
save_citation_data(citation_data)


def get_scholar_citations():
"""
Fetch citation data from Google Scholar for all papers by the specified author
"""
print(f"Fetching citations for Google Scholar ID: {SCHOLAR_USER_ID}")

# Try to load existing data first to preserve the last known good values.
citation_data = load_existing_citation_data()

# Fetch author data with retries
author_data = None
for attempt in range(MAX_RETRIES):
Expand Down Expand Up @@ -110,18 +131,23 @@ def get_scholar_citations():
if fetched_papers == 0:
raise RuntimeError("Google Scholar fetch completed but no publications could be processed")

citation_data['metadata'].pop('fetch_failed_at', None)
citation_data['metadata'].pop('fetch_error', None)
citation_data['metadata']['fetch_status'] = 'success'
citation_data['metadata']['last_updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Save to YAML file
try:
with open(OUTPUT_FILE, 'w') as f:
yaml.dump(citation_data, f, default_flow_style=False, sort_keys=False)
print(f"Citation data saved to {OUTPUT_FILE}")
save_citation_data(citation_data)
except Exception as e:
print(f"Error saving citation data: {str(e)}")
raise

return citation_data

if __name__ == "__main__":
get_scholar_citations()
try:
get_scholar_citations()
except Exception as e:
record_fetch_failure(e)
raise
Loading