Skip to content

Commit

Permalink
feat: prevent unecessary sync (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
louisgrasset authored Oct 29, 2023
1 parent cad44a2 commit 4ea135f
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 40 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export const preventMigrationOnWrongVersion = (
outdatedCache: NonNullable<unknown>,
requiredVersion: string,
) =>
(outdatedCache as NonNullable<unknown & { version: string }>).version !==
requiredVersion;
25 changes: 25 additions & 0 deletions src/helpers/tweet/__tests__/tweet-formatter.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { Tweet } from "@the-convocation/twitter-scraper";

import { tweetFormatter } from "../tweet-formatter.js";

jest.mock("../format-tweet-text.js", () => {
return {
formatTweetText: jest
.fn()
.mockImplementation((t: Tweet) => `formatted:${t.text}`),
};
});

describe("tweetFormatter", () => {
it("should properly format the give tweet", () => {
const result = tweetFormatter({
text: "text",
timestamp: 966236400,
} as Tweet);

expect(result).toStrictEqual({
text: "formatted:text",
timestamp: 966236400000,
});
});
});
1 change: 1 addition & 0 deletions src/helpers/tweet/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ export * from "./is-tweet-cached.js";
export * from "./keep-recent-tweets.js";
export * from "./keep-self-quotes.js";
export * from "./keep-self-replies.js";
export * from "./tweet-formatter.js";
11 changes: 11 additions & 0 deletions src/helpers/tweet/tweet-formatter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { Tweet } from "@the-convocation/twitter-scraper";

import { formatTweetText } from "./format-tweet-text.js";

export const tweetFormatter = (tweet: Tweet): Tweet => {
return {
...tweet,
timestamp: (tweet.timestamp ?? 0) * 1000,
text: formatTweetText(tweet),
};
};
101 changes: 61 additions & 40 deletions src/services/tweets-getter.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { API_RATE_LIMIT, TWITTER_HANDLE } from "../constants.js";
import { getCache } from "../helpers/cache/index.js";
import { oraPrefixer } from "../helpers/logs/ora-prefixer.js";
import { getEligibleTweet } from "../helpers/tweet/get-eligible-tweet.js";
import { formatTweetText, isTweetCached } from "../helpers/tweet/index.js";
import { isTweetCached, tweetFormatter } from "../helpers/tweet/index.js";

const pullContentStats = (tweets: Tweet[], title: string) => {
const stats = {
Expand Down Expand Up @@ -34,53 +34,74 @@ export const tweetsGetterService = async (
}).start();
log.text = "filtering";

let preventPostsSynchronization = false;
const LATEST_TWEETS_COUNT = 5;

/**
* Synchronization optimization: prevent excessive API calls & potential rate-limiting
*
* Pull the ${LATEST_TWEETS_COUNT}, filter eligible ones.
* This optimization prevents the post sync if the latest eligible tweet is cached.
*/
const latestTweets = twitterClient.getTweets(
TWITTER_HANDLE,
LATEST_TWEETS_COUNT,
);

for await (const latestTweet of latestTweets) {
if (!preventPostsSynchronization) {
// Only consider eligible tweets.
const tweet = await getEligibleTweet(tweetFormatter(latestTweet));

if (tweet) {
// If the latest eligible tweet is cached, mark sync as unneeded.
if (isTweetCached(tweet, cache)) {
preventPostsSynchronization = true;
}
// If the latest tweet is not cached,
// skip the current optimization and go to synchronization step.
break;
}
}
}

// Get tweets from API
const tweets: Tweet[] = [];
const tweetsIds = twitterClient.getTweets(TWITTER_HANDLE, 200);

let hasRateLimitReached = false;
let latestTweetAlreadySynced = false;
let tweetsCount = 0;
for await (const tweet of tweetsIds) {
const rateLimitTimeout = setTimeout(
() => (hasRateLimitReached = true),
1000 * API_RATE_LIMIT,
);

if (
latestTweetAlreadySynced ||
hasRateLimitReached ||
isTweetCached(tweet, cache)
) {
continue;
}

// Skip posts sync if the latest one has already synced
if (tweetsCount === 0 && isTweetCached(tweet, cache)) {
latestTweetAlreadySynced = true;
}
if (preventPostsSynchronization) {
log.succeed("task finished (unneeded sync)");
} else {
const tweetsIds = twitterClient.getTweets(TWITTER_HANDLE, 200);

let hasRateLimitReached = false;
for await (const tweet of tweetsIds) {
const rateLimitTimeout = setTimeout(
() => (hasRateLimitReached = true),
1000 * API_RATE_LIMIT,
);

if (hasRateLimitReached || isTweetCached(tweet, cache)) {
continue;
}

const t: Tweet = {
...tweet,
timestamp: (tweet.timestamp ?? 0) * 1000,
text: formatTweetText(tweet),
};
const t: Tweet = tweetFormatter(tweet);

const eligibleTweet = await getEligibleTweet(t);
if (eligibleTweet) {
tweets.unshift(eligibleTweet);
const eligibleTweet = await getEligibleTweet(t);
if (eligibleTweet) {
tweets.unshift(eligibleTweet);
}
clearTimeout(rateLimitTimeout);
}

if (hasRateLimitReached) {
log.warn(
`rate limit reached, more than ${API_RATE_LIMIT}s to fetch a single tweet`,
);
}
clearTimeout(rateLimitTimeout);
tweetsCount++;
}

if (hasRateLimitReached) {
log.warn(
`rate limit reached, more than ${API_RATE_LIMIT}s to fetch a single tweet`,
);
log.succeed(pullContentStats(tweets, "tweets"));
log.succeed("task finished");
}
log.succeed(pullContentStats(tweets, "tweets"));
log.succeed("task finished");

return tweets;
};

0 comments on commit 4ea135f

Please sign in to comment.